summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-01-04 15:38:37 +0000
committerTom Stellard <thomas.stellard@amd.com>2013-01-04 21:05:09 +0000
commitaed37cbee8efb59b2f1a6bc69adcbaecd9e4fa13 (patch)
tree5748d373dc01011b860049208135c3d942882e29
parent05c143cc049a87c515ecdc5695e5912da60cf5cb (diff)
radeon/llvm: Remove backend code from Mesa
This code now lives in an external tree. For the next Mesa release fetch the code from the master branch of this LLVM repo: http://cgit.freedesktop.org/~tstellar/llvm/ For all subsequent Mesa releases, fetch the code from the official LLVM project: www.llvm.org
-rw-r--r--src/gallium/drivers/radeon/.gitignore18
-rw-r--r--src/gallium/drivers/radeon/AMDGPU.h46
-rw-r--r--src/gallium/drivers/radeon/AMDGPU.td38
-rw-r--r--src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp134
-rw-r--r--src/gallium/drivers/radeon/AMDGPUAsmPrinter.h43
-rw-r--r--src/gallium/drivers/radeon/AMDGPUCodeEmitter.h48
-rw-r--r--src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp62
-rw-r--r--src/gallium/drivers/radeon/AMDGPUISelLowering.cpp351
-rw-r--r--src/gallium/drivers/radeon/AMDGPUISelLowering.h142
-rw-r--r--src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp258
-rw-r--r--src/gallium/drivers/radeon/AMDGPUInstrInfo.h148
-rw-r--r--src/gallium/drivers/radeon/AMDGPUInstrInfo.td71
-rw-r--r--src/gallium/drivers/radeon/AMDGPUInstructions.td183
-rw-r--r--src/gallium/drivers/radeon/AMDGPUIntrinsics.td63
-rw-r--r--src/gallium/drivers/radeon/AMDGPUMCInstLower.cpp82
-rw-r--r--src/gallium/drivers/radeon/AMDGPUMCInstLower.h30
-rw-r--r--src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp50
-rw-r--r--src/gallium/drivers/radeon/AMDGPURegisterInfo.h62
-rw-r--r--src/gallium/drivers/radeon/AMDGPURegisterInfo.td22
-rw-r--r--src/gallium/drivers/radeon/AMDGPUSubtarget.cpp94
-rw-r--r--src/gallium/drivers/radeon/AMDGPUSubtarget.h66
-rw-r--r--src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp143
-rw-r--r--src/gallium/drivers/radeon/AMDGPUTargetMachine.h70
-rw-r--r--src/gallium/drivers/radeon/AMDIL.h106
-rw-r--r--src/gallium/drivers/radeon/AMDIL7XXDevice.cpp129
-rw-r--r--src/gallium/drivers/radeon/AMDIL7XXDevice.h70
-rw-r--r--src/gallium/drivers/radeon/AMDILBase.td85
-rw-r--r--src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp3274
-rw-r--r--src/gallium/drivers/radeon/AMDILDevice.cpp137
-rw-r--r--src/gallium/drivers/radeon/AMDILDevice.h115
-rw-r--r--src/gallium/drivers/radeon/AMDILDeviceInfo.cpp94
-rw-r--r--src/gallium/drivers/radeon/AMDILDeviceInfo.h90
-rw-r--r--src/gallium/drivers/radeon/AMDILDevices.h19
-rw-r--r--src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp169
-rw-r--r--src/gallium/drivers/radeon/AMDILEvergreenDevice.h87
-rw-r--r--src/gallium/drivers/radeon/AMDILFrameLowering.cpp53
-rw-r--r--src/gallium/drivers/radeon/AMDILFrameLowering.h46
-rw-r--r--src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp395
-rw-r--r--src/gallium/drivers/radeon/AMDILISelLowering.cpp677
-rw-r--r--src/gallium/drivers/radeon/AMDILInstrInfo.td270
-rw-r--r--src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp93
-rw-r--r--src/gallium/drivers/radeon/AMDILIntrinsicInfo.h47
-rw-r--r--src/gallium/drivers/radeon/AMDILIntrinsics.td242
-rw-r--r--src/gallium/drivers/radeon/AMDILNIDevice.cpp71
-rw-r--r--src/gallium/drivers/radeon/AMDILNIDevice.h59
-rw-r--r--src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp1275
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterInfo.td110
-rw-r--r--src/gallium/drivers/radeon/AMDILSIDevice.cpp49
-rw-r--r--src/gallium/drivers/radeon/AMDILSIDevice.h45
-rw-r--r--src/gallium/drivers/radeon/AMDILUtilityFunctions.h75
-rw-r--r--src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.cpp34
-rw-r--r--src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.h34
-rw-r--r--src/gallium/drivers/radeon/LICENSE.TXT43
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp80
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.cpp96
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.h30
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h59
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.cpp111
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.h51
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp727
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp296
-rw-r--r--src/gallium/drivers/radeon/Makefile66
-rw-r--r--src/gallium/drivers/radeon/Makefile.sources83
-rw-r--r--src/gallium/drivers/radeon/Processors.td29
-rw-r--r--src/gallium/drivers/radeon/R600Defines.h35
-rw-r--r--src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp292
-rw-r--r--src/gallium/drivers/radeon/R600GenRegisterInfo.pl206
-rw-r--r--src/gallium/drivers/radeon/R600ISelLowering.cpp740
-rw-r--r--src/gallium/drivers/radeon/R600ISelLowering.h69
-rw-r--r--src/gallium/drivers/radeon/R600InstrInfo.cpp512
-rw-r--r--src/gallium/drivers/radeon/R600InstrInfo.h132
-rw-r--r--src/gallium/drivers/radeon/R600Instructions.td1458
-rw-r--r--src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td50
-rw-r--r--src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td26
-rw-r--r--src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp33
-rw-r--r--src/gallium/drivers/radeon/R600MachineFunctionInfo.h38
-rw-r--r--src/gallium/drivers/radeon/R600RegisterInfo.cpp128
-rw-r--r--src/gallium/drivers/radeon/R600RegisterInfo.h63
-rw-r--r--src/gallium/drivers/radeon/R600Schedule.td36
-rw-r--r--src/gallium/drivers/radeon/SIAssignInterpRegs.cpp151
-rw-r--r--src/gallium/drivers/radeon/SIGenRegisterInfo.pl291
-rw-r--r--src/gallium/drivers/radeon/SIISelLowering.cpp466
-rw-r--r--src/gallium/drivers/radeon/SIISelLowering.h63
-rw-r--r--src/gallium/drivers/radeon/SIInstrFormats.td131
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.cpp76
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.h62
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.td506
-rw-r--r--src/gallium/drivers/radeon/SIInstructions.td1180
-rw-r--r--src/gallium/drivers/radeon/SIIntrinsics.td40
-rw-r--r--src/gallium/drivers/radeon/SILowerFlowControl.cpp161
-rw-r--r--src/gallium/drivers/radeon/SILowerLiteralConstants.cpp105
-rw-r--r--src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp19
-rw-r--r--src/gallium/drivers/radeon/SIMachineFunctionInfo.h38
-rw-r--r--src/gallium/drivers/radeon/SIRegisterInfo.cpp60
-rw-r--r--src/gallium/drivers/radeon/SIRegisterInfo.h54
-rw-r--r--src/gallium/drivers/radeon/SISchedule.td15
-rw-r--r--src/gallium/drivers/radeon/TargetInfo/AMDGPUTargetInfo.cpp26
-rw-r--r--src/gallium/drivers/radeon/loader.cpp35
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.cpp26
99 files changed, 0 insertions, 19168 deletions
diff --git a/src/gallium/drivers/radeon/.gitignore b/src/gallium/drivers/radeon/.gitignore
deleted file mode 100644
index b723d73ff40..00000000000
--- a/src/gallium/drivers/radeon/.gitignore
+++ /dev/null
@@ -1,18 +0,0 @@
1AMDGPUInstrEnums.h.include
2AMDGPUInstrEnums.include
3AMDGPUInstrEnums.td
4AMDILGenAsmWriter.inc
5AMDILGenCallingConv.inc
6AMDILGenCodeEmitter.inc
7AMDILGenDAGISel.inc
8AMDILGenEDInfo.inc
9AMDILGenInstrInfo.inc
10AMDILGenIntrinsics.inc
11AMDILGenRegisterInfo.inc
12AMDILGenSubtargetInfo.inc
13R600HwRegInfo.include
14R600Intrinsics.td
15R600RegisterInfo.td
16SIRegisterGetHWRegNum.inc
17SIRegisterInfo.td
18loader
diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h
deleted file mode 100644
index f484caa9a3e..00000000000
--- a/src/gallium/drivers/radeon/AMDGPU.h
+++ /dev/null
@@ -1,46 +0,0 @@
1//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef AMDGPU_H
11#define AMDGPU_H
12
13#include "AMDGPUTargetMachine.h"
14#include "llvm/Support/TargetRegistry.h"
15#include "llvm/Target/TargetMachine.h"
16
17namespace llvm {
18
19class FunctionPass;
20class AMDGPUTargetMachine;
21
22// R600 Passes
23FunctionPass* createR600KernelParametersPass(const TargetData* TD);
24FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
25
26// SI Passes
27FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
28FunctionPass *createSILowerFlowControlPass(TargetMachine &tm);
29FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
30FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
31
32// Passes common to R600 and SI
33FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
34
35} // End namespace llvm
36
37namespace ShaderType {
38 enum Type {
39 PIXEL = 0,
40 VERTEX = 1,
41 GEOMETRY = 2,
42 COMPUTE = 3
43 };
44}
45
46#endif // AMDGPU_H
diff --git a/src/gallium/drivers/radeon/AMDGPU.td b/src/gallium/drivers/radeon/AMDGPU.td
deleted file mode 100644
index 5086f63d79c..00000000000
--- a/src/gallium/drivers/radeon/AMDGPU.td
+++ /dev/null
@@ -1,38 +0,0 @@
1//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9
10// Include AMDIL TD files
11include "AMDILBase.td"
12
13
14def AMDGPUInstrInfo : InstrInfo {}
15
16//===----------------------------------------------------------------------===//
17// Declare the target which we are implementing
18//===----------------------------------------------------------------------===//
19def AMDGPUAsmWriter : AsmWriter {
20 string AsmWriterClassName = "InstPrinter";
21 int Variant = 0;
22 bit isMCAsmWriter = 1;
23}
24
25def AMDGPU : Target {
26 // Pull in Instruction Info:
27 let InstructionSet = AMDGPUInstrInfo;
28 let AssemblyWriters = [AMDGPUAsmWriter];
29}
30
31// Include AMDGPU TD files
32include "R600Schedule.td"
33include "SISchedule.td"
34include "Processors.td"
35include "AMDGPUInstrInfo.td"
36include "AMDGPUIntrinsics.td"
37include "AMDGPURegisterInfo.td"
38include "AMDGPUInstructions.td"
diff --git a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp
deleted file mode 100644
index 392791cd49d..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
1//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// The AMDGPUAsmPrinter is used to print both assembly string and also binary
11// code. When passed an MCAsmStreamer it prints assembly and when passed
12// an MCObjectStreamer it outputs binary code.
13//
14//===----------------------------------------------------------------------===//
15//
16
17
18#include "AMDGPUAsmPrinter.h"
19#include "AMDGPU.h"
20#include "SIMachineFunctionInfo.h"
21#include "SIRegisterInfo.h"
22#include "llvm/MC/MCStreamer.h"
23#include "llvm/Target/TargetLoweringObjectFile.h"
24#include "llvm/Support/TargetRegistry.h"
25
26using namespace llvm;
27
28
29static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
30 MCStreamer &Streamer) {
31 return new AMDGPUAsmPrinter(tm, Streamer);
32}
33
34extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
35 TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
36}
37
38/// runOnMachineFunction - We need to override this function so we can avoid
39/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
40bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
41 const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
42 if (STM.dumpCode()) {
43 MF.dump();
44 }
45 SetupMachineFunction(MF);
46 if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
47 EmitProgramInfo(MF);
48 }
49 OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
50 EmitFunctionBody();
51 return false;
52}
53
54void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
55 unsigned MaxSGPR = 0;
56 unsigned MaxVGPR = 0;
57 bool VCCUsed = false;
58 const SIRegisterInfo * RI =
59 static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
60
61 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
62 BB != BB_E; ++BB) {
63 MachineBasicBlock &MBB = *BB;
64 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
65 I != E; ++I) {
66 MachineInstr &MI = *I;
67
68 unsigned numOperands = MI.getNumOperands();
69 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
70 MachineOperand & MO = MI.getOperand(op_idx);
71 unsigned maxUsed;
72 unsigned width = 0;
73 bool isSGPR = false;
74 unsigned reg;
75 unsigned hwReg;
76 if (!MO.isReg()) {
77 continue;
78 }
79 reg = MO.getReg();
80 if (reg == AMDGPU::VCC) {
81 VCCUsed = true;
82 continue;
83 }
84 switch (reg) {
85 default: break;
86 case AMDGPU::EXEC:
87 case AMDGPU::SI_LITERAL_CONSTANT:
88 case AMDGPU::SREG_LIT_0:
89 case AMDGPU::M0:
90 continue;
91 }
92
93 if (AMDGPU::SReg_32RegClass.contains(reg)) {
94 isSGPR = true;
95 width = 1;
96 } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
97 isSGPR = false;
98 width = 1;
99 } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
100 isSGPR = true;
101 width = 2;
102 } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
103 isSGPR = false;
104 width = 2;
105 } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
106 isSGPR = true;
107 width = 4;
108 } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
109 isSGPR = false;
110 width = 4;
111 } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
112 isSGPR = true;
113 width = 8;
114 } else {
115 assert(!"Unknown register class");
116 }
117 hwReg = RI->getHWRegNum(reg);
118 maxUsed = hwReg + width - 1;
119 if (isSGPR) {
120 MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
121 } else {
122 MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
123 }
124 }
125 }
126 }
127 if (VCCUsed) {
128 MaxSGPR += 2;
129 }
130 SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
131 OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
132 OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
133 OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
134}
diff --git a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.h b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.h
deleted file mode 100644
index b35d2e9b2ca..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.h
+++ /dev/null
@@ -1,43 +0,0 @@
1//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// AMDGPU Assembly printer class.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef AMDGPU_ASMPRINTER_H
15#define AMDGPU_ASMPRINTER_H
16
17#include "llvm/CodeGen/AsmPrinter.h"
18
19namespace llvm {
20
21class AMDGPUAsmPrinter : public AsmPrinter {
22
23public:
24 explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
25 : AsmPrinter(TM, Streamer) { }
26
27 virtual bool runOnMachineFunction(MachineFunction &MF);
28
29 virtual const char *getPassName() const {
30 return "AMDGPU Assembly Printer";
31 }
32
33 /// EmitProgramInfo - Emit register usage information so that the GPU driver
34 /// can correctly setup the GPU state.
35 void EmitProgramInfo(MachineFunction &MF);
36
37 /// EmitInstuction - Implemented in AMDGPUMCInstLower.cpp
38 virtual void EmitInstruction(const MachineInstr *MI);
39};
40
41} // End anonymous llvm
42
43#endif //AMDGPU_ASMPRINTER_H
diff --git a/src/gallium/drivers/radeon/AMDGPUCodeEmitter.h b/src/gallium/drivers/radeon/AMDGPUCodeEmitter.h
deleted file mode 100644
index f1daec19d54..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUCodeEmitter.h
+++ /dev/null
@@ -1,48 +0,0 @@
1//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// CodeEmitter interface for R600 and SI codegen.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef AMDGPUCODEEMITTER_H
15#define AMDGPUCODEEMITTER_H
16
17namespace llvm {
18
19 class AMDGPUCodeEmitter {
20 public:
21 uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
22 virtual uint64_t getMachineOpValue(const MachineInstr &MI,
23 const MachineOperand &MO) const { return 0; }
24 virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
25 unsigned OpNo) const {
26 return 0;
27 }
28 virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
29 unsigned OpNo) const {
30 return 0;
31 }
32 virtual uint64_t VOPPostEncode(const MachineInstr &MI,
33 uint64_t Value) const {
34 return Value;
35 }
36 virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
37 unsigned OpNo) const {
38 return 0;
39 }
40 virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
41 const {
42 return 0;
43 }
44 };
45
46} // End namespace llvm
47
48#endif // AMDGPUCODEEMITTER_H
diff --git a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp
deleted file mode 100644
index fbca0a7b832..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
1//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass lowers AMDIL machine instructions to the appropriate hardware
11// instructions.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
16#include "AMDGPUInstrInfo.h"
17#include "llvm/CodeGen/MachineFunctionPass.h"
18
19using namespace llvm;
20
21namespace {
22
23class AMDGPUConvertToISAPass : public MachineFunctionPass {
24
25private:
26 static char ID;
27 TargetMachine &TM;
28
29public:
30 AMDGPUConvertToISAPass(TargetMachine &tm) :
31 MachineFunctionPass(ID), TM(tm) { }
32
33 virtual bool runOnMachineFunction(MachineFunction &MF);
34
35 virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
36
37};
38
39} // End anonymous namespace
40
41char AMDGPUConvertToISAPass::ID = 0;
42
43FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
44 return new AMDGPUConvertToISAPass(tm);
45}
46
47bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
48{
49 const AMDGPUInstrInfo * TII =
50 static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
51
52 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
53 BB != BB_E; ++BB) {
54 MachineBasicBlock &MBB = *BB;
55 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
56 I != E; ++I) {
57 MachineInstr &MI = *I;
58 TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
59 }
60 }
61 return false;
62}
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
deleted file mode 100644
index d37df6b986a..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
+++ /dev/null
@@ -1,351 +0,0 @@
1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This is the parent TargetLowering class for hardware code gen targets.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUISelLowering.h"
15#include "AMDILIntrinsicInfo.h"
16#include "llvm/CodeGen/MachineFunction.h"
17#include "llvm/CodeGen/MachineRegisterInfo.h"
18#include "llvm/CodeGen/SelectionDAG.h"
19#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
20
21using namespace llvm;
22
23AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
24 TargetLowering(TM, new TargetLoweringObjectFileELF())
25{
26
27 // Initialize target lowering borrowed from AMDIL
28 InitAMDILLowering();
29
30 // We need to custom lower some of the intrinsics
31 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
32
33 // Library functions. These default to Expand, but we have instructions
34 // for them.
35 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
36 setOperationAction(ISD::FEXP2, MVT::f32, Legal);
37 setOperationAction(ISD::FPOW, MVT::f32, Legal);
38 setOperationAction(ISD::FLOG2, MVT::f32, Legal);
39 setOperationAction(ISD::FABS, MVT::f32, Legal);
40 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
41 setOperationAction(ISD::FRINT, MVT::f32, Legal);
42
43 setOperationAction(ISD::UDIV, MVT::i32, Expand);
44 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
45 setOperationAction(ISD::UREM, MVT::i32, Expand);
46}
47
48//===---------------------------------------------------------------------===//
49// TargetLowering Callbacks
50//===---------------------------------------------------------------------===//
51
52SDValue AMDGPUTargetLowering::LowerFormalArguments(
53 SDValue Chain,
54 CallingConv::ID CallConv,
55 bool isVarArg,
56 const SmallVectorImpl<ISD::InputArg> &Ins,
57 DebugLoc DL, SelectionDAG &DAG,
58 SmallVectorImpl<SDValue> &InVals) const
59{
60 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
61 InVals.push_back(SDValue());
62 }
63 return Chain;
64}
65
66SDValue AMDGPUTargetLowering::LowerReturn(
67 SDValue Chain,
68 CallingConv::ID CallConv,
69 bool isVarArg,
70 const SmallVectorImpl<ISD::OutputArg> &Outs,
71 const SmallVectorImpl<SDValue> &OutVals,
72 DebugLoc DL, SelectionDAG &DAG) const
73{
74 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
75}
76
77//===---------------------------------------------------------------------===//
78// Target specific lowering
79//===---------------------------------------------------------------------===//
80
81SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
82 const
83{
84 switch (Op.getOpcode()) {
85 default:
86 Op.getNode()->dump();
87 assert(0 && "Custom lowering code for this"
88 "instruction is not implemented yet!");
89 break;
90 // AMDIL DAG lowering
91 case ISD::SDIV: return LowerSDIV(Op, DAG);
92 case ISD::SREM: return LowerSREM(Op, DAG);
93 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
94 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
95 // AMDGPU DAG lowering
96 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
97 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
98 }
99 return Op;
100}
101
102SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
103 SelectionDAG &DAG) const
104{
105 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
106 DebugLoc DL = Op.getDebugLoc();
107 EVT VT = Op.getValueType();
108
109 switch (IntrinsicID) {
110 default: return Op;
111 case AMDGPUIntrinsic::AMDIL_abs:
112 return LowerIntrinsicIABS(Op, DAG);
113 case AMDGPUIntrinsic::AMDIL_exp:
114 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
115 case AMDGPUIntrinsic::AMDGPU_lrp:
116 return LowerIntrinsicLRP(Op, DAG);
117 case AMDGPUIntrinsic::AMDIL_fraction:
118 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
119 case AMDGPUIntrinsic::AMDIL_mad:
120 return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
121 Op.getOperand(2), Op.getOperand(3));
122 case AMDGPUIntrinsic::AMDIL_max:
123 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
124 Op.getOperand(2));
125 case AMDGPUIntrinsic::AMDGPU_imax:
126 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
127 Op.getOperand(2));
128 case AMDGPUIntrinsic::AMDGPU_umax:
129 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
130 Op.getOperand(2));
131 case AMDGPUIntrinsic::AMDIL_min:
132 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
133 Op.getOperand(2));
134 case AMDGPUIntrinsic::AMDGPU_imin:
135 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
136 Op.getOperand(2));
137 case AMDGPUIntrinsic::AMDGPU_umin:
138 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
139 Op.getOperand(2));
140 case AMDGPUIntrinsic::AMDIL_round_nearest:
141 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
142 }
143}
144
145///IABS(a) = SMAX(sub(0, a), a)
146SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
147 SelectionDAG &DAG) const
148{
149
150 DebugLoc DL = Op.getDebugLoc();
151 EVT VT = Op.getValueType();
152 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
153 Op.getOperand(1));
154
155 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
156}
157
158/// Linear Interpolation
159/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
160SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
161 SelectionDAG &DAG) const
162{
163 DebugLoc DL = Op.getDebugLoc();
164 EVT VT = Op.getValueType();
165 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
166 DAG.getConstantFP(1.0f, MVT::f32),
167 Op.getOperand(1));
168 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
169 Op.getOperand(3));
170 return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
171 Op.getOperand(2),
172 OneSubAC);
173}
174
175
176
177SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
178 SelectionDAG &DAG) const
179{
180 DebugLoc DL = Op.getDebugLoc();
181 EVT VT = Op.getValueType();
182
183 SDValue Num = Op.getOperand(0);
184 SDValue Den = Op.getOperand(1);
185
186 SmallVector<SDValue, 8> Results;
187
188 // RCP = URECIP(Den) = 2^32 / Den + e
189 // e is rounding error.
190 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
191
192 // RCP_LO = umulo(RCP, Den) */
193 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
194
195 // RCP_HI = mulhu (RCP, Den) */
196 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
197
198 // NEG_RCP_LO = -RCP_LO
199 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
200 RCP_LO);
201
202 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
203 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
204 NEG_RCP_LO, RCP_LO,
205 ISD::SETEQ);
206 // Calculate the rounding error from the URECIP instruction
207 // E = mulhu(ABS_RCP_LO, RCP)
208 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
209
210 // RCP_A_E = RCP + E
211 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
212
213 // RCP_S_E = RCP - E
214 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
215
216 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
217 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
218 RCP_A_E, RCP_S_E,
219 ISD::SETEQ);
220 // Quotient = mulhu(Tmp0, Num)
221 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
222
223 // Num_S_Remainder = Quotient * Den
224 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
225
226 // Remainder = Num - Num_S_Remainder
227 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
228
229 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
230 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
231 DAG.getConstant(-1, VT),
232 DAG.getConstant(0, VT),
233 ISD::SETGE);
234 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
235 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
236 DAG.getConstant(0, VT),
237 DAG.getConstant(-1, VT),
238 DAG.getConstant(0, VT),
239 ISD::SETGE);
240 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
241 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
242 Remainder_GE_Zero);
243
244 // Calculate Division result:
245
246 // Quotient_A_One = Quotient + 1
247 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
248 DAG.getConstant(1, VT));
249
250 // Quotient_S_One = Quotient - 1
251 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
252 DAG.getConstant(1, VT));
253
254 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
255 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
256 Quotient, Quotient_A_One, ISD::SETEQ);
257
258 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
259 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
260 Quotient_S_One, Div, ISD::SETEQ);
261
262 // Calculate Rem result:
263
264 // Remainder_S_Den = Remainder - Den
265 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
266
267 // Remainder_A_Den = Remainder + Den
268 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
269
270 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
271 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
272 Remainder, Remainder_S_Den, ISD::SETEQ);
273
274 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
275 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
276 Remainder_A_Den, Rem, ISD::SETEQ);
277
278 DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
279 DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
280
281 return Op;
282}
283
284//===----------------------------------------------------------------------===//
285// Helper functions
286//===----------------------------------------------------------------------===//
287
288bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
289{
290 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
291 return CFP->isExactlyValue(1.0);
292 }
293 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
294 return C->isAllOnesValue();
295 }
296 return false;
297}
298
299bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
300{
301 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
302 return CFP->getValueAPF().isZero();
303 }
304 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
305 return C->isNullValue();
306 }
307 return false;
308}
309
310SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
311 const TargetRegisterClass *RC,
312 unsigned Reg, EVT VT) const {
313 MachineFunction &MF = DAG.getMachineFunction();
314 MachineRegisterInfo &MRI = MF.getRegInfo();
315 unsigned VirtualRegister;
316 if (!MRI.isLiveIn(Reg)) {
317 VirtualRegister = MRI.createVirtualRegister(RC);
318 MRI.addLiveIn(Reg, VirtualRegister);
319 } else {
320 VirtualRegister = MRI.getLiveInVirtReg(Reg);
321 }
322 return DAG.getRegister(VirtualRegister, VT);
323}
324
325#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
326
327const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
328{
329 switch (Opcode) {
330 default: return 0;
331 // AMDIL DAG nodes
332 NODE_NAME_CASE(MAD);
333 NODE_NAME_CASE(CALL);
334 NODE_NAME_CASE(UMUL);
335 NODE_NAME_CASE(DIV_INF);
336 NODE_NAME_CASE(RET_FLAG);
337 NODE_NAME_CASE(BRANCH_COND);
338
339 // AMDGPU DAG nodes
340 NODE_NAME_CASE(FRACT)
341 NODE_NAME_CASE(FMAX)
342 NODE_NAME_CASE(SMAX)
343 NODE_NAME_CASE(UMAX)
344 NODE_NAME_CASE(FMIN)
345 NODE_NAME_CASE(SMIN)
346 NODE_NAME_CASE(UMIN)
347 NODE_NAME_CASE(URECIP)
348 NODE_NAME_CASE(INTERP)
349 NODE_NAME_CASE(INTERP_P0)
350 }
351}
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
deleted file mode 100644
index 2d8ed82c117..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h
+++ /dev/null
@@ -1,142 +0,0 @@
1//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the interface defintiion of the TargetLowering class
11// that is common to all AMD GPUs.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef AMDGPUISELLOWERING_H
16#define AMDGPUISELLOWERING_H
17
18#include "llvm/Target/TargetLowering.h"
19
20namespace llvm {
21
22class MachineRegisterInfo;
23
24class AMDGPUTargetLowering : public TargetLowering
25{
26private:
27 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
28 SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
29
30protected:
31
32 /// CreateLiveInRegister - Helper function that adds Reg to the LiveIn list
33 /// of the DAG's MachineFunction. This returns a Register SDNode representing
34 /// Reg.
35 SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
36 unsigned Reg, EVT VT) const;
37
38 bool isHWTrueValue(SDValue Op) const;
39 bool isHWFalseValue(SDValue Op) const;
40
41public:
42 AMDGPUTargetLowering(TargetMachine &TM);
43
44 virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
45 bool isVarArg,
46 const SmallVectorImpl<ISD::InputArg> &Ins,
47 DebugLoc DL, SelectionDAG &DAG,
48 SmallVectorImpl<SDValue> &InVals) const;
49
50 virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
51 bool isVarArg,
52 const SmallVectorImpl<ISD::OutputArg> &Outs,
53 const SmallVectorImpl<SDValue> &OutVals,
54 DebugLoc DL, SelectionDAG &DAG) const;
55
56 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
57 SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
58 SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
59 virtual const char* getTargetNodeName(unsigned Opcode) const;
60
61// Functions defined in AMDILISelLowering.cpp
62public:
63
64 /// computeMaskedBitsForTargetNode - Determine which of the bits specified
65 /// in Mask are known to be either zero or one and return them in the
66 /// KnownZero/KnownOne bitsets.
67 virtual void computeMaskedBitsForTargetNode(const SDValue Op,
68 APInt &KnownZero,
69 APInt &KnownOne,
70 const SelectionDAG &DAG,
71 unsigned Depth = 0) const;
72
73 virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
74 const CallInst &I, unsigned Intrinsic) const;
75
76 /// isFPImmLegal - We want to mark f32/f64 floating point values as legal.
77 bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
78
79 /// ShouldShrinkFPConstant - We don't want to shrink f64/f32 constants.
80 bool ShouldShrinkFPConstant(EVT VT) const;
81
82private:
83 void InitAMDILLowering();
84 SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
85 SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
86 SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
87 SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
88 SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
89 SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
90 SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
91 SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
92 SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
93 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
94 EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
95 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
96 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
97};
98
99namespace AMDGPUISD
100{
101
102enum
103{
104 // AMDIL ISD Opcodes
105 FIRST_NUMBER = ISD::BUILTIN_OP_END,
106 MAD, // 32bit Fused Multiply Add instruction
107 CALL, // Function call based on a single integer
108 UMUL, // 32bit unsigned multiplication
109 DIV_INF, // Divide with infinity returned on zero divisor
110 RET_FLAG,
111 BRANCH_COND,
112 // End AMDIL ISD Opcodes
113 BITALIGN,
114 FRACT,
115 FMAX,
116 SMAX,
117 UMAX,
118 FMIN,
119 SMIN,
120 UMIN,
121 URECIP,
122 INTERP,
123 INTERP_P0,
124 LAST_AMDGPU_ISD_NUMBER
125};
126
127
128} // End namespace AMDGPUISD
129
130namespace SIISD {
131
132enum {
133 SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
134 VCC_AND,
135 VCC_BITCAST
136};
137
138} // End namespace SIISD
139
140} // End namespace llvm
141
142#endif // AMDGPUISELLOWERING_H
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
deleted file mode 100644
index 9aae09a4a15..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
+++ /dev/null
@@ -1,258 +0,0 @@
1//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the implementation of the TargetInstrInfo class that is
11// common to all AMD GPUs.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterInfo.h"
17#include "AMDGPUTargetMachine.h"
18#include "AMDIL.h"
19#include "AMDILUtilityFunctions.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23
24#define GET_INSTRINFO_CTOR
25#include "AMDGPUGenInstrInfo.inc"
26
27using namespace llvm;
28
29AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
30 : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
31
32const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
33 return RI;
34}
35
36bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
37 unsigned &SrcReg, unsigned &DstReg,
38 unsigned &SubIdx) const {
39// TODO: Implement this function
40 return false;
41}
42
43unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
44 int &FrameIndex) const {
45// TODO: Implement this function
46 return 0;
47}
48
49unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
50 int &FrameIndex) const {
51// TODO: Implement this function
52 return 0;
53}
54
55bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
56 const MachineMemOperand *&MMO,
57 int &FrameIndex) const {
58// TODO: Implement this function
59 return false;
60}
61unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
62 int &FrameIndex) const {
63// TODO: Implement this function
64 return 0;
65}
66unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
67 int &FrameIndex) const {
68// TODO: Implement this function
69 return 0;
70}
71bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
72 const MachineMemOperand *&MMO,
73 int &FrameIndex) const {
74// TODO: Implement this function
75 return false;
76}
77
78MachineInstr *
79AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
80 MachineBasicBlock::iterator &MBBI,
81 LiveVariables *LV) const {
82// TODO: Implement this function
83 return NULL;
84}
85bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
86 MachineBasicBlock &MBB) const {
87 while (iter != MBB.end()) {
88 switch (iter->getOpcode()) {
89 default:
90 break;
91 ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
92 case AMDGPU::BRANCH:
93 return true;
94 };
95 ++iter;
96 }
97 return false;
98}
99
100MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
101 MachineBasicBlock::iterator tmp = MBB->end();
102 if (!MBB->size()) {
103 return MBB->end();
104 }
105 while (--tmp) {
106 if (tmp->getOpcode() == AMDGPU::ENDLOOP
107 || tmp->getOpcode() == AMDGPU::ENDIF
108 || tmp->getOpcode() == AMDGPU::ELSE) {
109 if (tmp == MBB->begin()) {
110 return tmp;
111 } else {
112 continue;
113 }
114 } else {
115 return ++tmp;
116 }
117 }
118 return MBB->end();
119}
120
121void
122AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
123 MachineBasicBlock::iterator MI,
124 unsigned SrcReg, bool isKill,
125 int FrameIndex,
126 const TargetRegisterClass *RC,
127 const TargetRegisterInfo *TRI) const {
128 assert(!"Not Implemented");
129}
130
131void
132AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MI,
134 unsigned DestReg, int FrameIndex,
135 const TargetRegisterClass *RC,
136 const TargetRegisterInfo *TRI) const {
137 assert(!"Not Implemented");
138}
139
140MachineInstr *
141AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
142 MachineInstr *MI,
143 const SmallVectorImpl<unsigned> &Ops,
144 int FrameIndex) const {
145// TODO: Implement this function
146 return 0;
147}
148MachineInstr*
149AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
150 MachineInstr *MI,
151 const SmallVectorImpl<unsigned> &Ops,
152 MachineInstr *LoadMI) const {
153 // TODO: Implement this function
154 return 0;
155}
156bool
157AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
158 const SmallVectorImpl<unsigned> &Ops) const
159{
160 // TODO: Implement this function
161 return false;
162}
163bool
164AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
165 unsigned Reg, bool UnfoldLoad,
166 bool UnfoldStore,
167 SmallVectorImpl<MachineInstr*> &NewMIs) const {
168 // TODO: Implement this function
169 return false;
170}
171
172bool
173AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
174 SmallVectorImpl<SDNode*> &NewNodes) const {
175 // TODO: Implement this function
176 return false;
177}
178
179unsigned
180AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
181 bool UnfoldLoad, bool UnfoldStore,
182 unsigned *LoadRegIndex) const {
183 // TODO: Implement this function
184 return 0;
185}
186
187bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
188 int64_t Offset1, int64_t Offset2,
189 unsigned NumLoads) const {
190 assert(Offset2 > Offset1
191 && "Second offset should be larger than first offset!");
192 // If we have less than 16 loads in a row, and the offsets are within 16,
193 // then schedule together.
194 // TODO: Make the loads schedule near if it fits in a cacheline
195 return (NumLoads < 16 && (Offset2 - Offset1) < 16);
196}
197
198bool
199AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
200 const {
201 // TODO: Implement this function
202 return true;
203}
204void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
205 MachineBasicBlock::iterator MI) const {
206 // TODO: Implement this function
207}
208
209bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
210 // TODO: Implement this function
211 return false;
212}
213bool
214AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
215 const SmallVectorImpl<MachineOperand> &Pred2)
216 const {
217 // TODO: Implement this function
218 return false;
219}
220
221bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
222 std::vector<MachineOperand> &Pred) const {
223 // TODO: Implement this function
224 return false;
225}
226
227bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
228 // TODO: Implement this function
229 return MI->getDesc().isPredicable();
230}
231
232bool
233AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
234 // TODO: Implement this function
235 return true;
236}
237
238void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
239 DebugLoc DL) const
240{
241 MachineRegisterInfo &MRI = MF.getRegInfo();
242 const AMDGPURegisterInfo & RI = getRegisterInfo();
243
244 for (unsigned i = 0; i < MI.getNumOperands(); i++) {
245 MachineOperand &MO = MI.getOperand(i);
246 // Convert dst regclass to one that is supported by the ISA
247 if (MO.isReg() && MO.isDef()) {
248 if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
249 const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
250 const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
251
252 assert(newRegClass);
253
254 MRI.setRegClass(MO.getReg(), newRegClass);
255 }
256 }
257 }
258}
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
deleted file mode 100644
index a3080767883..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
+++ /dev/null
@@ -1,148 +0,0 @@
1//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the definition of a TargetInstrInfo class that is common
11// to all AMD GPUs.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef AMDGPUINSTRUCTIONINFO_H_
16#define AMDGPUINSTRUCTIONINFO_H_
17
18#include "AMDGPURegisterInfo.h"
19#include "AMDGPUInstrInfo.h"
20#include "llvm/Target/TargetInstrInfo.h"
21
22#include <map>
23
24#define GET_INSTRINFO_HEADER
25#define GET_INSTRINFO_ENUM
26#include "AMDGPUGenInstrInfo.inc"
27
28#define OPCODE_IS_ZERO_INT 0x00000042
29#define OPCODE_IS_NOT_ZERO_INT 0x00000045
30#define OPCODE_IS_ZERO 0x00000020
31#define OPCODE_IS_NOT_ZERO 0x00000023
32
33namespace llvm {
34
35class AMDGPUTargetMachine;
36class MachineFunction;
37class MachineInstr;
38class MachineInstrBuilder;
39
40class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
41private:
42 const AMDGPURegisterInfo RI;
43 TargetMachine &TM;
44 bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
45 MachineBasicBlock &MBB) const;
46public:
47 explicit AMDGPUInstrInfo(TargetMachine &tm);
48
49 virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
50
51 bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
52 unsigned &DstReg, unsigned &SubIdx) const;
53
54 unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
55 unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
56 int &FrameIndex) const;
57 bool hasLoadFromStackSlot(const MachineInstr *MI,
58 const MachineMemOperand *&MMO,
59 int &FrameIndex) const;
60 unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
61 unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
62 int &FrameIndex) const;
63 bool hasStoreFromStackSlot(const MachineInstr *MI,
64 const MachineMemOperand *&MMO,
65 int &FrameIndex) const;
66
67 MachineInstr *
68 convertToThreeAddress(MachineFunction::iterator &MFI,
69 MachineBasicBlock::iterator &MBBI,
70 LiveVariables *LV) const;
71
72
73 virtual void copyPhysReg(MachineBasicBlock &MBB,
74 MachineBasicBlock::iterator MI, DebugLoc DL,
75 unsigned DestReg, unsigned SrcReg,
76 bool KillSrc) const = 0;
77
78 void storeRegToStackSlot(MachineBasicBlock &MBB,
79 MachineBasicBlock::iterator MI,
80 unsigned SrcReg, bool isKill, int FrameIndex,
81 const TargetRegisterClass *RC,
82 const TargetRegisterInfo *TRI) const;
83 void loadRegFromStackSlot(MachineBasicBlock &MBB,
84 MachineBasicBlock::iterator MI,
85 unsigned DestReg, int FrameIndex,
86 const TargetRegisterClass *RC,
87 const TargetRegisterInfo *TRI) const;
88
89protected:
90 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
91 MachineInstr *MI,
92 const SmallVectorImpl<unsigned> &Ops,
93 int FrameIndex) const;
94 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
95 MachineInstr *MI,
96 const SmallVectorImpl<unsigned> &Ops,
97 MachineInstr *LoadMI) const;
98public:
99 bool canFoldMemoryOperand(const MachineInstr *MI,
100 const SmallVectorImpl<unsigned> &Ops) const;
101 bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
102 unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
103 SmallVectorImpl<MachineInstr *> &NewMIs) const;
104 bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
105 SmallVectorImpl<SDNode *> &NewNodes) const;
106 unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
107 bool UnfoldLoad, bool UnfoldStore,
108 unsigned *LoadRegIndex = 0) const;
109 bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
110 int64_t Offset1, int64_t Offset2,
111 unsigned NumLoads) const;
112
113 bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
114 void insertNoop(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator MI) const;
116 bool isPredicated(const MachineInstr *MI) const;
117 bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
118 const SmallVectorImpl<MachineOperand> &Pred2) const;
119 bool DefinesPredicate(MachineInstr *MI,
120 std::vector<MachineOperand> &Pred) const;
121 bool isPredicable(MachineInstr *MI) const;
122 bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
123
124 // Helper functions that check the opcode for status information
125 bool isLoadInst(llvm::MachineInstr *MI) const;
126 bool isExtLoadInst(llvm::MachineInstr *MI) const;
127 bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
128 bool isSExtLoadInst(llvm::MachineInstr *MI) const;
129 bool isZExtLoadInst(llvm::MachineInstr *MI) const;
130 bool isAExtLoadInst(llvm::MachineInstr *MI) const;
131 bool isStoreInst(llvm::MachineInstr *MI) const;
132 bool isTruncStoreInst(llvm::MachineInstr *MI) const;
133
134 virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
135 int64_t Imm) const = 0;
136 virtual unsigned getIEQOpcode() const = 0;
137 virtual bool isMov(unsigned opcode) const = 0;
138
139 /// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
140 /// MachineInstr
141 virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
142 DebugLoc DL) const;
143
144};
145
146} // End llvm namespace
147
148#endif // AMDGPUINSTRINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.td b/src/gallium/drivers/radeon/AMDGPUInstrInfo.td
deleted file mode 100644
index 23ca35aadc2..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.td
+++ /dev/null
@@ -1,71 +0,0 @@
1//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains DAG node defintions for the AMDGPU target.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15// AMDGPU DAG Profiles
16//===----------------------------------------------------------------------===//
17
18def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
19 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
20]>;
21
22//===----------------------------------------------------------------------===//
23// AMDGPU DAG Nodes
24//
25
26// out = ((a << 32) | b) >> c)
27//
28// Can be used to optimize rtol:
29// rotl(a, b) = bitalign(a, a, 32 - b)
30def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
31
32// out = a - floor(a)
33def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
34
35// out = max(a, b) a and b are floats
36def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
37 [SDNPCommutative, SDNPAssociative]
38>;
39
40// out = max(a, b) a and b are signed ints
41def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
42 [SDNPCommutative, SDNPAssociative]
43>;
44
45// out = max(a, b) a and b are unsigned ints
46def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
47 [SDNPCommutative, SDNPAssociative]
48>;
49
50// out = min(a, b) a and b are floats
51def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
52 [SDNPCommutative, SDNPAssociative]
53>;
54
55// out = min(a, b) a snd b are signed ints
56def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
57 [SDNPCommutative, SDNPAssociative]
58>;
59
60// out = min(a, b) a and b are unsigned ints
61def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
62 [SDNPCommutative, SDNPAssociative]
63>;
64
65// urecip - This operation is a helper for integer division, it returns the
66// result of 1 / a as a fractional unsigned integer.
67// out = (2^32 / a) + e
68// e is rounding error
69def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
70
71def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td
deleted file mode 100644
index 9dbdc615e2d..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUInstructions.td
+++ /dev/null
@@ -1,183 +0,0 @@
1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains instruction defs that are common to all hw codegen
11// targets.
12//
13//===----------------------------------------------------------------------===//
14
15class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
16 field bits<16> AMDILOp = 0;
17 field bits<3> Gen = 0;
18
19 let Namespace = "AMDGPU";
20 let OutOperandList = outs;
21 let InOperandList = ins;
22 let AsmString = asm;
23 let Pattern = pattern;
24 let Itinerary = NullALU;
25 let TSFlags{42-40} = Gen;
26 let TSFlags{63-48} = AMDILOp;
27}
28
29class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
30 : AMDGPUInst<outs, ins, asm, pattern> {
31
32 field bits<32> Inst = 0xffffffff;
33
34}
35
36def COND_EQ : PatLeaf <
37 (cond),
38 [{switch(N->get()){{default: return false;
39 case ISD::SETOEQ: case ISD::SETUEQ:
40 case ISD::SETEQ: return true;}}}]
41>;
42
43def COND_NE : PatLeaf <
44 (cond),
45 [{switch(N->get()){{default: return false;
46 case ISD::SETONE: case ISD::SETUNE:
47 case ISD::SETNE: return true;}}}]
48>;
49def COND_GT : PatLeaf <
50 (cond),
51 [{switch(N->get()){{default: return false;
52 case ISD::SETOGT: case ISD::SETUGT:
53 case ISD::SETGT: return true;}}}]
54>;
55
56def COND_GE : PatLeaf <
57 (cond),
58 [{switch(N->get()){{default: return false;
59 case ISD::SETOGE: case ISD::SETUGE:
60 case ISD::SETGE: return true;}}}]
61>;
62
63def COND_LT : PatLeaf <
64 (cond),
65 [{switch(N->get()){{default: return false;
66 case ISD::SETOLT: case ISD::SETULT:
67 case ISD::SETLT: return true;}}}]
68>;
69
70def COND_LE : PatLeaf <
71 (cond),
72 [{switch(N->get()){{default: return false;
73 case ISD::SETOLE: case ISD::SETULE:
74 case ISD::SETLE: return true;}}}]
75>;
76
77//===----------------------------------------------------------------------===//
78// Load/Store Pattern Fragments
79//===----------------------------------------------------------------------===//
80
81def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
82 return isGlobalLoad(dyn_cast<LoadSDNode>(N));
83}]>;
84
85class Constants {
86int TWO_PI = 0x40c90fdb;
87int PI = 0x40490fdb;
88int TWO_PI_INV = 0x3e22f983;
89}
90def CONST : Constants;
91
92def FP_ZERO : PatLeaf <
93 (fpimm),
94 [{return N->getValueAPF().isZero();}]
95>;
96
97def FP_ONE : PatLeaf <
98 (fpimm),
99 [{return N->isExactlyValue(1.0);}]
100>;
101
102let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
103
104class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
105 (outs rc:$dst),
106 (ins rc:$src0),
107 "CLAMP $dst, $src0",
108 [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
109>;
110
111class FABS <RegisterClass rc> : AMDGPUShaderInst <
112 (outs rc:$dst),
113 (ins rc:$src0),
114 "FABS $dst, $src0",
115 [(set rc:$dst, (fabs rc:$src0))]
116>;
117
118class FNEG <RegisterClass rc> : AMDGPUShaderInst <
119 (outs rc:$dst),
120 (ins rc:$src0),
121 "FNEG $dst, $src0",
122 [(set rc:$dst, (fneg rc:$src0))]
123>;
124
125def SHADER_TYPE : AMDGPUShaderInst <
126 (outs),
127 (ins i32imm:$type),
128 "SHADER_TYPE $type",
129 [(int_AMDGPU_shader_type imm:$type)]
130>;
131
132} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
133
134/* Generic helper patterns for intrinsics */
135/* -------------------------------------- */
136
137class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
138 RegisterClass rc> : Pat <
139 (fpow rc:$src0, rc:$src1),
140 (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
141>;
142
143/* Other helper patterns */
144/* --------------------- */
145
146/* Extract element pattern */
147class Extract_Element <ValueType sub_type, ValueType vec_type,
148 RegisterClass vec_class, int sub_idx,
149 SubRegIndex sub_reg>: Pat<
150 (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
151 (EXTRACT_SUBREG vec_class:$src, sub_reg)
152>;
153
154/* Insert element pattern */
155class Insert_Element <ValueType elem_type, ValueType vec_type,
156 RegisterClass elem_class, RegisterClass vec_class,
157 int sub_idx, SubRegIndex sub_reg> : Pat <
158
159 (vec_type (vector_insert (vec_type vec_class:$vec),
160 (elem_type elem_class:$elem), sub_idx)),
161 (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
162>;
163
164// Vector Build pattern
165class Vector_Build <ValueType vecType, RegisterClass vectorClass,
166 ValueType elemType, RegisterClass elemClass> : Pat <
167 (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
168 (elemType elemClass:$z), (elemType elemClass:$w))),
169 (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
170 (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
171 elemClass:$z, sel_z), elemClass:$w, sel_w)
172>;
173
174// bitconvert pattern
175class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
176 (dt (bitconvert (st rc:$src0))),
177 (dt rc:$src0)
178>;
179
180include "R600Instructions.td"
181
182include "SIInstrInfo.td"
183
diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
deleted file mode 100644
index eaca4cf9856..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
+++ /dev/null
@@ -1,63 +0,0 @@
1//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines intrinsics that are used by all hw codegen targets.
11//
12//===----------------------------------------------------------------------===//
13
14let TargetPrefix = "AMDGPU", isTarget = 1 in {
15
16 def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
17 def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
18 def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
19 def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
20 def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
21
22 def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
23 def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
24 def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
25 def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
26 def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
27 def int_AMDGPU_kilp : Intrinsic<[], [], []>;
28 def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
29 def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
30 def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
31 def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
32 def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
33 def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
34 def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
35 def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
36 def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
37 def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
38 def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
39 def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
40 def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
41 def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
42 def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
43 def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
44 def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
45 def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
46 def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
47 def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
48 def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
49 def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
50 def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
51 def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
52 def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
53 def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
54
55 def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
56}
57
58let TargetPrefix = "TGSI", isTarget = 1 in {
59
60 def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
61}
62
63include "SIIntrinsics.td"
diff --git a/src/gallium/drivers/radeon/AMDGPUMCInstLower.cpp b/src/gallium/drivers/radeon/AMDGPUMCInstLower.cpp
deleted file mode 100644
index f3d80a39c3c..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUMCInstLower.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
1//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains code to lower AMDGPU MachineInstrs to their corresponding
11// MCInst.
12//
13//===----------------------------------------------------------------------===//
14//
15
16#include "AMDGPUMCInstLower.h"
17#include "AMDGPUAsmPrinter.h"
18#include "R600InstrInfo.h"
19#include "llvm/CodeGen/MachineBasicBlock.h"
20#include "llvm/CodeGen/MachineInstr.h"
21#include "llvm/Constants.h"
22#include "llvm/MC/MCInst.h"
23#include "llvm/MC/MCStreamer.h"
24#include "llvm/Support/ErrorHandling.h"
25
26using namespace llvm;
27
28AMDGPUMCInstLower::AMDGPUMCInstLower() { }
29
30void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
31 OutMI.setOpcode(MI->getOpcode());
32
33 for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
34 const MachineOperand &MO = MI->getOperand(i);
35
36 MCOperand MCOp;
37 switch (MO.getType()) {
38 default:
39 llvm_unreachable("unknown operand type");
40 case MachineOperand::MO_FPImmediate: {
41 const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
42 assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
43 "Only floating point immediates are supported at the moment.");
44 MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
45 break;
46 }
47 case MachineOperand::MO_Immediate:
48 MCOp = MCOperand::CreateImm(MO.getImm());
49 break;
50 case MachineOperand::MO_Register:
51 MCOp = MCOperand::CreateReg(MO.getReg());
52 break;
53 }
54 OutMI.addOperand(MCOp);
55 }
56}
57
58void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
59 AMDGPUMCInstLower MCInstLowering;
60
61 // Ignore placeholder instructions:
62 if (MI->getOpcode() == AMDGPU::MASK_WRITE) {
63 return;
64 }
65
66 if (MI->isBundle()) {
67 const MachineBasicBlock *MBB = MI->getParent();
68 MachineBasicBlock::const_instr_iterator I = MI;
69 ++I;
70 while (I != MBB->end() && I->isInsideBundle()) {
71 MCInst MCBundleInst;
72 const MachineInstr *BundledInst = I;
73 MCInstLowering.lower(BundledInst, MCBundleInst);
74 OutStreamer.EmitInstruction(MCBundleInst);
75 ++I;
76 }
77 } else {
78 MCInst TmpInst;
79 MCInstLowering.lower(MI, TmpInst);
80 OutStreamer.EmitInstruction(TmpInst);
81 }
82}
diff --git a/src/gallium/drivers/radeon/AMDGPUMCInstLower.h b/src/gallium/drivers/radeon/AMDGPUMCInstLower.h
deleted file mode 100644
index 3f68ff0874e..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUMCInstLower.h
+++ /dev/null
@@ -1,30 +0,0 @@
1//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef AMDGPU_MCINSTLOWER_H
11#define AMDGPU_MCINSTLOWER_H
12
13namespace llvm {
14
15class MCInst;
16class MachineInstr;
17
18class AMDGPUMCInstLower {
19
20public:
21 AMDGPUMCInstLower();
22
23 /// lower - Lower a MachineInstr to an MCInst
24 void lower(const MachineInstr *MI, MCInst &OutMI) const;
25
26};
27
28} // End namespace llvm
29
30#endif //AMDGPU_MCINSTLOWER_H
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp
deleted file mode 100644
index 69bda631738..00000000000
--- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
1//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Parent TargetRegisterInfo class common to all hw codegen targets.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPURegisterInfo.h"
15#include "AMDGPUTargetMachine.h"
16
17using namespace llvm;
18
19AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
20 const TargetInstrInfo &tii)
21: AMDGPUGenRegisterInfo(0),
22 TM(tm),
23 TII(tii)
24 { }
25
26//===----------------------------------------------------------------------===//
27// Function handling callbacks - Functions are a seldom used feature of GPUS, so
28// they are not supported at this time.
29//===----------------------------------------------------------------------===//
30
31const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
32
33const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
34 const {
35 return &CalleeSavedReg;
36}
37
38void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
39 int SPAdj,
40 RegScavenger *RS) const {
41 assert(!"Subroutines not supported yet");
42}
43
44unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
45 assert(!"Subroutines not supported yet");
46 return 0;
47}
48
49#define GET_REGINFO_TARGET_DESC
50#include "AMDGPUGenRegisterInfo.inc"
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h
deleted file mode 100644
index 326610d333e..00000000000
--- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h
+++ /dev/null
@@ -1,62 +0,0 @@
1//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the TargetRegisterInfo interface that is implemented
11// by all hw codegen targets.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef AMDGPUREGISTERINFO_H_
16#define AMDGPUREGISTERINFO_H_
17
18#include "llvm/ADT/BitVector.h"
19#include "llvm/Target/TargetRegisterInfo.h"
20
21#define GET_REGINFO_HEADER
22#define GET_REGINFO_ENUM
23#include "AMDGPUGenRegisterInfo.inc"
24
25namespace llvm {
26
27class AMDGPUTargetMachine;
28class TargetInstrInfo;
29
30struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo
31{
32 TargetMachine &TM;
33 const TargetInstrInfo &TII;
34 static const uint16_t CalleeSavedReg;
35
36 AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
37
38 virtual BitVector getReservedRegs(const MachineFunction &MF) const {
39 assert(!"Unimplemented"); return BitVector();
40 }
41
42 /// getISARegClass - rc is an AMDIL reg class. This function returns the
43 /// ISA reg class that is equivalent to the given AMDIL reg class.
44 virtual const TargetRegisterClass * getISARegClass(
45 const TargetRegisterClass * rc) const {
46 assert(!"Unimplemented"); return NULL;
47 }
48
49 virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
50 assert(!"Unimplemented"); return NULL;
51 }
52
53 const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
54 void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
55 RegScavenger *RS) const;
56 unsigned getFrameRegister(const MachineFunction &MF) const;
57
58};
59
60} // End namespace llvm
61
62#endif // AMDIDSAREGISTERINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td
deleted file mode 100644
index 8181e023aa3..00000000000
--- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td
+++ /dev/null
@@ -1,22 +0,0 @@
1//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Tablegen register definitions common to all hw codegen targets.
11//
12//===----------------------------------------------------------------------===//
13
14let Namespace = "AMDGPU" in {
15 def sel_x : SubRegIndex;
16 def sel_y : SubRegIndex;
17 def sel_z : SubRegIndex;
18 def sel_w : SubRegIndex;
19}
20
21include "R600RegisterInfo.td"
22include "SIRegisterInfo.td"
diff --git a/src/gallium/drivers/radeon/AMDGPUSubtarget.cpp b/src/gallium/drivers/radeon/AMDGPUSubtarget.cpp
deleted file mode 100644
index d4a70b6c62f..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUSubtarget.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
1//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the AMDGPU specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUSubtarget.h"
15
16using namespace llvm;
17
18#define GET_SUBTARGETINFO_ENUM
19#define GET_SUBTARGETINFO_TARGET_DESC
20#define GET_SUBTARGETINFO_CTOR
21#include "AMDGPUGenSubtargetInfo.inc"
22
23AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
24 AMDGPUGenSubtargetInfo(TT, CPU, FS), mDumpCode(false) {
25 InstrItins = getInstrItineraryForCPU(CPU);
26
27 memset(CapsOverride, 0, sizeof(*CapsOverride)
28 * AMDGPUDeviceInfo::MaxNumberCapabilities);
29 // Default card
30 StringRef GPU = CPU;
31 mIs64bit = false;
32 mDefaultSize[0] = 64;
33 mDefaultSize[1] = 1;
34 mDefaultSize[2] = 1;
35 ParseSubtargetFeatures(GPU, FS);
36 mDevName = GPU;
37 mDevice = AMDGPUDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit);
38}
39
40AMDGPUSubtarget::~AMDGPUSubtarget()
41{
42 delete mDevice;
43}
44
45bool
46AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const
47{
48 assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
49 "Caps index is out of bounds!");
50 return CapsOverride[caps];
51}
52bool
53AMDGPUSubtarget::is64bit() const
54{
55 return mIs64bit;
56}
57bool
58AMDGPUSubtarget::isTargetELF() const
59{
60 return false;
61}
62size_t
63AMDGPUSubtarget::getDefaultSize(uint32_t dim) const
64{
65 if (dim > 3) {
66 return 1;
67 } else {
68 return mDefaultSize[dim];
69 }
70}
71
72std::string
73AMDGPUSubtarget::getDataLayout() const
74{
75 if (!mDevice) {
76 return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
77 "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
78 "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
79 "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
80 "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
81 }
82 return mDevice->getDataLayout();
83}
84
85std::string
86AMDGPUSubtarget::getDeviceName() const
87{
88 return mDevName;
89}
90const AMDGPUDevice *
91AMDGPUSubtarget::device() const
92{
93 return mDevice;
94}
diff --git a/src/gallium/drivers/radeon/AMDGPUSubtarget.h b/src/gallium/drivers/radeon/AMDGPUSubtarget.h
deleted file mode 100644
index 30bda83a205..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUSubtarget.h
+++ /dev/null
@@ -1,66 +0,0 @@
1//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file declares the AMDGPU specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef _AMDGPUSUBTARGET_H_
15#define _AMDGPUSUBTARGET_H_
16#include "AMDILDevice.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/Target/TargetSubtargetInfo.h"
20
21#define GET_SUBTARGETINFO_HEADER
22#include "AMDGPUGenSubtargetInfo.inc"
23
24#define MAX_CB_SIZE (1 << 16)
25
26namespace llvm {
27
28class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo
29{
30private:
31 bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
32 const AMDGPUDevice *mDevice;
33 size_t mDefaultSize[3];
34 size_t mMinimumSize[3];
35 std::string mDevName;
36 bool mIs64bit;
37 bool mIs32on64bit;
38 bool mDumpCode;
39 bool mR600ALUInst;
40
41 InstrItineraryData InstrItins;
42
43public:
44 AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
45 virtual ~AMDGPUSubtarget();
46
47 const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
48 virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
49
50 bool isOverride(AMDGPUDeviceInfo::Caps) const;
51 bool is64bit() const;
52
53 // Helper functions to simplify if statements
54 bool isTargetELF() const;
55 const AMDGPUDevice* device() const;
56 std::string getDataLayout() const;
57 std::string getDeviceName() const;
58 virtual size_t getDefaultSize(uint32_t dim) const;
59 bool dumpCode() const { return mDumpCode; }
60 bool r600ALUEncoding() const { return mR600ALUInst; }
61
62};
63
64} // End namespace llvm
65
66#endif // AMDGPUSUBTARGET_H_
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
deleted file mode 100644
index bfe9d81303b..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
1//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// The AMDGPU target machine contains all of the hardware specific information
11// needed to emit code for R600 and SI GPUs.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUTargetMachine.h"
16#include "AMDGPU.h"
17#include "R600ISelLowering.h"
18#include "R600InstrInfo.h"
19#include "SIISelLowering.h"
20#include "SIInstrInfo.h"
21#include "llvm/Analysis/Passes.h"
22#include "llvm/Analysis/Verifier.h"
23#include "llvm/CodeGen/MachineFunctionAnalysis.h"
24#include "llvm/CodeGen/MachineModuleInfo.h"
25#include "llvm/CodeGen/Passes.h"
26#include "llvm/MC/MCAsmInfo.h"
27#include "llvm/PassManager.h"
28#include "llvm/Support/TargetRegistry.h"
29#include "llvm/Support/raw_os_ostream.h"
30#include "llvm/Transforms/IPO.h"
31#include "llvm/Transforms/Scalar.h"
32#include <llvm/CodeGen/Passes.h>
33
34using namespace llvm;
35
36extern "C" void LLVMInitializeAMDGPUTarget() {
37 // Register the target
38 RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
39}
40
41AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
42 StringRef CPU, StringRef FS,
43 TargetOptions Options,
44 Reloc::Model RM, CodeModel::Model CM,
45 CodeGenOpt::Level OptLevel
46)
47:
48 LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
49 Subtarget(TT, CPU, FS),
50 DataLayout(Subtarget.getDataLayout()),
51 FrameLowering(TargetFrameLowering::StackGrowsUp,
52 Subtarget.device()->getStackAlignment(), 0),
53 IntrinsicInfo(this),
54 InstrItins(&Subtarget.getInstrItineraryData()),
55 mDump(false)
56
57{
58 // TLInfo uses InstrInfo so it must be initialized after.
59 if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
60 InstrInfo = new R600InstrInfo(*this);
61 TLInfo = new R600TargetLowering(*this);
62 } else {
63 InstrInfo = new SIInstrInfo(*this);
64 TLInfo = new SITargetLowering(*this);
65 }
66}
67
68AMDGPUTargetMachine::~AMDGPUTargetMachine()
69{
70}
71
72namespace {
73class AMDGPUPassConfig : public TargetPassConfig {
74public:
75 AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
76 : TargetPassConfig(TM, PM) {}
77
78 AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
79 return getTM<AMDGPUTargetMachine>();
80 }
81
82 virtual bool addPreISel();
83 virtual bool addInstSelector();
84 virtual bool addPreRegAlloc();
85 virtual bool addPostRegAlloc();
86 virtual bool addPreSched2();
87 virtual bool addPreEmitPass();
88};
89} // End of anonymous namespace
90
91TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
92 return new AMDGPUPassConfig(this, PM);
93}
94
95bool
96AMDGPUPassConfig::addPreISel()
97{
98 return false;
99}
100
101bool AMDGPUPassConfig::addInstSelector() {
102 PM->add(createAMDGPUPeepholeOpt(*TM));
103 PM->add(createAMDGPUISelDag(getAMDGPUTargetMachine()));
104 return false;
105}
106
107bool AMDGPUPassConfig::addPreRegAlloc() {
108 const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
109
110 if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
111 PM->add(createSIAssignInterpRegsPass(*TM));
112 }
113 PM->add(createAMDGPUConvertToISAPass(*TM));
114 return false;
115}
116
117bool AMDGPUPassConfig::addPostRegAlloc() {
118 return false;
119}
120
121bool AMDGPUPassConfig::addPreSched2() {
122
123 addPass(IfConverterID);
124 return false;
125}
126
127bool AMDGPUPassConfig::addPreEmitPass() {
128 PM->add(createAMDGPUCFGPreparationPass(*TM));
129 PM->add(createAMDGPUCFGStructurizerPass(*TM));
130
131 const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
132 if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
133 PM->add(createR600ExpandSpecialInstrsPass(*TM));
134 addPass(FinalizeMachineBundlesID);
135 } else {
136 PM->add(createSILowerLiteralConstantsPass(*TM));
137 // piglit is unreliable (VM protection faults, GPU lockups) with this pass:
138 //PM->add(createSILowerFlowControlPass(*TM));
139 }
140
141 return false;
142}
143
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
deleted file mode 100644
index 8b405a882cc..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
+++ /dev/null
@@ -1,70 +0,0 @@
1//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// The AMDGPU TargetMachine interface definition for hw codgen targets.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef AMDGPU_TARGET_MACHINE_H
15#define AMDGPU_TARGET_MACHINE_H
16
17#include "AMDGPUInstrInfo.h"
18#include "AMDGPUSubtarget.h"
19#include "AMDILFrameLowering.h"
20#include "AMDILIntrinsicInfo.h"
21#include "R600ISelLowering.h"
22#include "llvm/ADT/OwningPtr.h"
23#include "llvm/Target/TargetData.h"
24
25namespace llvm {
26
27MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
28
29class AMDGPUTargetMachine : public LLVMTargetMachine {
30
31 AMDGPUSubtarget Subtarget;
32 const TargetData DataLayout;
33 AMDGPUFrameLowering FrameLowering;
34 AMDGPUIntrinsicInfo IntrinsicInfo;
35 const AMDGPUInstrInfo * InstrInfo;
36 AMDGPUTargetLowering * TLInfo;
37 const InstrItineraryData* InstrItins;
38 bool mDump;
39
40public:
41 AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
42 StringRef CPU,
43 TargetOptions Options,
44 Reloc::Model RM, CodeModel::Model CM,
45 CodeGenOpt::Level OL);
46 ~AMDGPUTargetMachine();
47 virtual const AMDGPUFrameLowering* getFrameLowering() const {
48 return &FrameLowering;
49 }
50 virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
51 return &IntrinsicInfo;
52 }
53 virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
54 virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
55 virtual const AMDGPURegisterInfo *getRegisterInfo() const {
56 return &InstrInfo->getRegisterInfo();
57 }
58 virtual AMDGPUTargetLowering * getTargetLowering() const {
59 return TLInfo;
60 }
61 virtual const InstrItineraryData* getInstrItineraryData() const {
62 return InstrItins;
63 }
64 virtual const TargetData* getTargetData() const { return &DataLayout; }
65 virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
66};
67
68} // End namespace llvm
69
70#endif // AMDGPU_TARGET_MACHINE_H
diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h
deleted file mode 100644
index e96b123bb7d..00000000000
--- a/src/gallium/drivers/radeon/AMDIL.h
+++ /dev/null
@@ -1,106 +0,0 @@
1//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file contains the entry points for global functions defined in the LLVM
11// AMDGPU back-end.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef AMDIL_H_
16#define AMDIL_H_
17
18#include "llvm/CodeGen/MachineFunction.h"
19#include "llvm/Target/TargetMachine.h"
20
21#define ARENA_SEGMENT_RESERVED_UAVS 12
22#define DEFAULT_ARENA_UAV_ID 8
23#define DEFAULT_RAW_UAV_ID 7
24#define GLOBAL_RETURN_RAW_UAV_ID 11
25#define HW_MAX_NUM_CB 8
26#define MAX_NUM_UNIQUE_UAVS 8
27#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
28#define OPENCL_MAX_READ_IMAGES 128
29#define OPENCL_MAX_WRITE_IMAGES 8
30#define OPENCL_MAX_SAMPLERS 16
31
32// The next two values can never be zero, as zero is the ID that is
33// used to assert against.
34#define DEFAULT_LDS_ID 1
35#define DEFAULT_GDS_ID 1
36#define DEFAULT_SCRATCH_ID 1
37#define DEFAULT_VEC_SLOTS 8
38
39#define OCL_DEVICE_RV710 0x0001
40#define OCL_DEVICE_RV730 0x0002
41#define OCL_DEVICE_RV770 0x0004
42#define OCL_DEVICE_CEDAR 0x0008
43#define OCL_DEVICE_REDWOOD 0x0010
44#define OCL_DEVICE_JUNIPER 0x0020
45#define OCL_DEVICE_CYPRESS 0x0040
46#define OCL_DEVICE_CAICOS 0x0080
47#define OCL_DEVICE_TURKS 0x0100
48#define OCL_DEVICE_BARTS 0x0200
49#define OCL_DEVICE_CAYMAN 0x0400
50#define OCL_DEVICE_ALL 0x3FFF
51
52/// The number of function ID's that are reserved for
53/// internal compiler usage.
54const unsigned int RESERVED_FUNCS = 1024;
55
56namespace llvm {
57class AMDGPUInstrPrinter;
58class FunctionPass;
59class MCAsmInfo;
60class raw_ostream;
61class Target;
62class TargetMachine;
63
64/// Instruction selection passes.
65FunctionPass*
66 createAMDGPUISelDag(TargetMachine &TM);
67FunctionPass*
68 createAMDGPUPeepholeOpt(TargetMachine &TM);
69
70/// Pre emit passes.
71FunctionPass*
72 createAMDGPUCFGPreparationPass(TargetMachine &TM);
73FunctionPass*
74 createAMDGPUCFGStructurizerPass(TargetMachine &TM);
75
76extern Target TheAMDGPUTarget;
77} // end namespace llvm;
78
79/// Include device information enumerations
80#include "AMDILDeviceInfo.h"
81
82namespace llvm {
83/// OpenCL uses address spaces to differentiate between
84/// various memory regions on the hardware. On the CPU
85/// all of the address spaces point to the same memory,
86/// however on the GPU, each address space points to
87/// a seperate piece of memory that is unique from other
88/// memory locations.
89namespace AMDGPUAS {
90enum AddressSpaces {
91 PRIVATE_ADDRESS = 0, // Address space for private memory.
92 GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
93 CONSTANT_ADDRESS = 2, // Address space for constant memory.
94 LOCAL_ADDRESS = 3, // Address space for local memory.
95 REGION_ADDRESS = 4, // Address space for region memory.
96 ADDRESS_NONE = 5, // Address space for unknown memory.
97 PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
98 PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
99 USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
100 LAST_ADDRESS = 9
101};
102
103} // namespace AMDGPUAS
104
105} // end namespace llvm
106#endif // AMDIL_H_
diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp
deleted file mode 100644
index 8561f0b3175..00000000000
--- a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
1//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9#include "AMDIL7XXDevice.h"
10#include "AMDGPUSubtarget.h"
11#include "AMDILDevice.h"
12
13using namespace llvm;
14
15AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST)
16{
17 setCaps();
18 std::string name = mSTM->getDeviceName();
19 if (name == "rv710") {
20 mDeviceFlag = OCL_DEVICE_RV710;
21 } else if (name == "rv730") {
22 mDeviceFlag = OCL_DEVICE_RV730;
23 } else {
24 mDeviceFlag = OCL_DEVICE_RV770;
25 }
26}
27
28AMDGPU7XXDevice::~AMDGPU7XXDevice()
29{
30}
31
32void AMDGPU7XXDevice::setCaps()
33{
34 mSWBits.set(AMDGPUDeviceInfo::LocalMem);
35}
36
37size_t AMDGPU7XXDevice::getMaxLDSSize() const
38{
39 if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
40 return MAX_LDS_SIZE_700;
41 }
42 return 0;
43}
44
45size_t AMDGPU7XXDevice::getWavefrontSize() const
46{
47 return AMDGPUDevice::HalfWavefrontSize;
48}
49
50uint32_t AMDGPU7XXDevice::getGeneration() const
51{
52 return AMDGPUDeviceInfo::HD4XXX;
53}
54
55uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const
56{
57 switch (DeviceID) {
58 default:
59 assert(0 && "ID type passed in is unknown!");
60 break;
61 case GLOBAL_ID:
62 case CONSTANT_ID:
63 case RAW_UAV_ID:
64 case ARENA_UAV_ID:
65 break;
66 case LDS_ID:
67 if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
68 return DEFAULT_LDS_ID;
69 }
70 break;
71 case SCRATCH_ID:
72 if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
73 return DEFAULT_SCRATCH_ID;
74 }
75 break;
76 case GDS_ID:
77 assert(0 && "GDS UAV ID is not supported on this chip");
78 if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
79 return DEFAULT_GDS_ID;
80 }
81 break;
82 };
83
84 return 0;
85}
86
87uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const
88{
89 return 1;
90}
91
92AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST)
93{
94 setCaps();
95}
96
97AMDGPU770Device::~AMDGPU770Device()
98{
99}
100
101void AMDGPU770Device::setCaps()
102{
103 if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
104 mSWBits.set(AMDGPUDeviceInfo::FMA);
105 mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
106 }
107 mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
108 mHWBits.reset(AMDGPUDeviceInfo::LongOps);
109 mSWBits.set(AMDGPUDeviceInfo::LongOps);
110 mSWBits.set(AMDGPUDeviceInfo::LocalMem);
111}
112
113size_t AMDGPU770Device::getWavefrontSize() const
114{
115 return AMDGPUDevice::WavefrontSize;
116}
117
118AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST)
119{
120}
121
122AMDGPU710Device::~AMDGPU710Device()
123{
124}
125
126size_t AMDGPU710Device::getWavefrontSize() const
127{
128 return AMDGPUDevice::QuarterWavefrontSize;
129}
diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.h b/src/gallium/drivers/radeon/AMDIL7XXDevice.h
deleted file mode 100644
index e848e2e0f2c..00000000000
--- a/src/gallium/drivers/radeon/AMDIL7XXDevice.h
+++ /dev/null
@@ -1,70 +0,0 @@
1//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// Interface for the subtarget data classes.
11//
12//===----------------------------------------------------------------------===//
13// This file will define the interface that each generation needs to
14// implement in order to correctly answer queries on the capabilities of the
15// specific hardware.
16//===----------------------------------------------------------------------===//
17#ifndef _AMDIL7XXDEVICEIMPL_H_
18#define _AMDIL7XXDEVICEIMPL_H_
19#include "AMDILDevice.h"
20
21namespace llvm {
22class AMDGPUSubtarget;
23
24//===----------------------------------------------------------------------===//
25// 7XX generation of devices and their respective sub classes
26//===----------------------------------------------------------------------===//
27
28// The AMDGPU7XXDevice class represents the generic 7XX device. All 7XX
29// devices are derived from this class. The AMDGPU7XX device will only
30// support the minimal features that are required to be considered OpenCL 1.0
31// compliant and nothing more.
32class AMDGPU7XXDevice : public AMDGPUDevice {
33public:
34 AMDGPU7XXDevice(AMDGPUSubtarget *ST);
35 virtual ~AMDGPU7XXDevice();
36 virtual size_t getMaxLDSSize() const;
37 virtual size_t getWavefrontSize() const;
38 virtual uint32_t getGeneration() const;
39 virtual uint32_t getResourceID(uint32_t DeviceID) const;
40 virtual uint32_t getMaxNumUAVs() const;
41
42protected:
43 virtual void setCaps();
44}; // AMDGPU7XXDevice
45
46// The AMDGPU770Device class represents the RV770 chip and it's
47// derivative cards. The difference between this device and the base
48// class is this device device adds support for double precision
49// and has a larger wavefront size.
50class AMDGPU770Device : public AMDGPU7XXDevice {
51public:
52 AMDGPU770Device(AMDGPUSubtarget *ST);
53 virtual ~AMDGPU770Device();
54 virtual size_t getWavefrontSize() const;
55private:
56 virtual void setCaps();
57}; // AMDGPU770Device
58
59// The AMDGPU710Device class derives from the 7XX base class, but this
60// class is a smaller derivative, so we need to overload some of the
61// functions in order to correctly specify this information.
62class AMDGPU710Device : public AMDGPU7XXDevice {
63public:
64 AMDGPU710Device(AMDGPUSubtarget *ST);
65 virtual ~AMDGPU710Device();
66 virtual size_t getWavefrontSize() const;
67}; // AMDGPU710Device
68
69} // namespace llvm
70#endif // _AMDILDEVICEIMPL_H_
diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td
deleted file mode 100644
index ffe9ce2c532..00000000000
--- a/src/gallium/drivers/radeon/AMDILBase.td
+++ /dev/null
@@ -1,85 +0,0 @@
1//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// Target-independent interfaces which we are implementing
10//===----------------------------------------------------------------------===//
11
12include "llvm/Target/Target.td"
13
14// Dummy Instruction itineraries for pseudo instructions
15def ALU_NULL : FuncUnit;
16def NullALU : InstrItinClass;
17
18//===----------------------------------------------------------------------===//
19// AMDIL Subtarget features.
20//===----------------------------------------------------------------------===//
21def FeatureFP64 : SubtargetFeature<"fp64",
22 "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
23 "true",
24 "Enable 64bit double precision operations">;
25def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
26 "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
27 "true",
28 "Enable byte addressable stores">;
29def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
30 "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
31 "true",
32 "Enable duplicate barrier detection(HD5XXX or later).">;
33def FeatureImages : SubtargetFeature<"images",
34 "CapsOverride[AMDGPUDeviceInfo::Images]",
35 "true",
36 "Enable image functions">;
37def FeatureMultiUAV : SubtargetFeature<"multi_uav",
38 "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
39 "true",
40 "Generate multiple UAV code(HD5XXX family or later)">;
41def FeatureMacroDB : SubtargetFeature<"macrodb",
42 "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
43 "true",
44 "Use internal macrodb, instead of macrodb in driver">;
45def FeatureNoAlias : SubtargetFeature<"noalias",
46 "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
47 "true",
48 "assert that all kernel argument pointers are not aliased">;
49def FeatureNoInline : SubtargetFeature<"no-inline",
50 "CapsOverride[AMDGPUDeviceInfo::NoInline]",
51 "true",
52 "specify whether to not inline functions">;
53
54def Feature64BitPtr : SubtargetFeature<"64BitPtr",
55 "mIs64bit",
56 "false",
57 "Specify if 64bit addressing should be used.">;
58
59def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
60 "mIs32on64bit",
61 "false",
62 "Specify if 64bit sized pointers with 32bit addressing should be used.">;
63def FeatureDebug : SubtargetFeature<"debug",
64 "CapsOverride[AMDGPUDeviceInfo::Debug]",
65 "true",
66 "Debug mode is enabled, so disable hardware accelerated address spaces.">;
67def FeatureDumpCode : SubtargetFeature <"DumpCode",
68 "mDumpCode",
69 "true",
70 "Dump MachineInstrs in the CodeEmitter">;
71
72def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
73 "mR600ALUInst",
74 "false",
75 "Older version of ALU instructions encoding.">;
76
77
78//===----------------------------------------------------------------------===//
79// Register File, Calling Conv, Instruction Descriptions
80//===----------------------------------------------------------------------===//
81
82
83include "AMDILRegisterInfo.td"
84include "AMDILInstrInfo.td"
85
diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
deleted file mode 100644
index 20e27ef1132..00000000000
--- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
+++ /dev/null
@@ -1,3274 +0,0 @@
1//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9
10#define DEBUGME 0
11#define DEBUG_TYPE "structcfg"
12
13#include "AMDGPUInstrInfo.h"
14#include "AMDIL.h"
15#include "AMDILUtilityFunctions.h"
16#include "llvm/ADT/SCCIterator.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/DominatorInternals.h"
20#include "llvm/Analysis/Dominators.h"
21#include "llvm/CodeGen/MachineDominators.h"
22#include "llvm/CodeGen/MachineDominators.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineFunctionAnalysis.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineJumpTableInfo.h"
29#include "llvm/CodeGen/MachineLoopInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/Target/TargetInstrInfo.h"
32
33#define FirstNonDebugInstr(A) A->begin()
34using namespace llvm;
35
36// TODO: move-begin.
37
38//===----------------------------------------------------------------------===//
39//
40// Statistics for CFGStructurizer.
41//
42//===----------------------------------------------------------------------===//
43
44STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
45 "matched");
46STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
47 "matched");
48STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
49 "pattern matched");
50STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
51 "pattern matched");
52STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
53 "matched");
54STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
55STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
56
57//===----------------------------------------------------------------------===//
58//
59// Miscellaneous utility for CFGStructurizer.
60//
61//===----------------------------------------------------------------------===//
62namespace llvmCFGStruct
63{
64#define SHOWNEWINSTR(i) \
65 if (DEBUGME) errs() << "New instr: " << *i << "\n"
66
67#define SHOWNEWBLK(b, msg) \
68if (DEBUGME) { \
69 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
70 errs() << "\n"; \
71}
72
73#define SHOWBLK_DETAIL(b, msg) \
74if (DEBUGME) { \
75 if (b) { \
76 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
77 b->print(errs()); \
78 errs() << "\n"; \
79 } \
80}
81
82#define INVALIDSCCNUM -1
83#define INVALIDREGNUM 0
84
85template<class LoopinfoT>
86void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
87 for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
88 iterEnd = LoopInfo.end();
89 iter != iterEnd; ++iter) {
90 (*iter)->print(OS, 0);
91 }
92}
93
94template<class NodeT>
95void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
96 size_t sz = Src.size();
97 for (size_t i = 0; i < sz/2; ++i) {
98 NodeT *t = Src[i];
99 Src[i] = Src[sz - i - 1];
100 Src[sz - i - 1] = t;
101 }
102}
103
104} //end namespace llvmCFGStruct
105
106
107//===----------------------------------------------------------------------===//
108//
109// MachinePostDominatorTree
110//
111//===----------------------------------------------------------------------===//
112
113namespace llvm {
114
115/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
116/// to compute the a post-dominator tree.
117///
118struct MachinePostDominatorTree : public MachineFunctionPass {
119 static char ID; // Pass identification, replacement for typeid
120 DominatorTreeBase<MachineBasicBlock> *DT;
121 MachinePostDominatorTree() : MachineFunctionPass(ID)
122 {
123 DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
124 // postdominator
125 }
126
127 ~MachinePostDominatorTree();
128
129 virtual bool runOnMachineFunction(MachineFunction &MF);
130
131 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
132 AU.setPreservesAll();
133 MachineFunctionPass::getAnalysisUsage(AU);
134 }
135
136 inline const std::vector<MachineBasicBlock *> &getRoots() const {
137 return DT->getRoots();
138 }
139
140 inline MachineDomTreeNode *getRootNode() const {
141 return DT->getRootNode();
142 }
143
144 inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
145 return DT->getNode(BB);
146 }
147
148 inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
149 return DT->getNode(BB);
150 }
151
152 inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
153 return DT->dominates(A, B);
154 }
155
156 inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
157 return DT->dominates(A, B);
158 }
159
160 inline bool
161 properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
162 return DT->properlyDominates(A, B);
163 }
164
165 inline bool
166 properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
167 return DT->properlyDominates(A, B);
168 }
169
170 inline MachineBasicBlock *
171 findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
172 return DT->findNearestCommonDominator(A, B);
173 }
174
175 virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
176 DT->print(OS);
177 }
178};
179} //end of namespace llvm
180
181char MachinePostDominatorTree::ID = 0;
182static RegisterPass<MachinePostDominatorTree>
183machinePostDominatorTreePass("machinepostdomtree",
184 "MachinePostDominator Tree Construction",
185 true, true);
186
187//const PassInfo *const llvm::MachinePostDominatorsID
188//= &machinePostDominatorTreePass;
189
190bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
191 DT->recalculate(F);
192 //DEBUG(DT->dump());
193 return false;
194}
195
196MachinePostDominatorTree::~MachinePostDominatorTree() {
197 delete DT;
198}
199
200//===----------------------------------------------------------------------===//
201//
202// supporting data structure for CFGStructurizer
203//
204//===----------------------------------------------------------------------===//
205
206namespace llvmCFGStruct
207{
208template<class PassT>
209struct CFGStructTraits {
210};
211
212template <class InstrT>
213class BlockInformation {
214public:
215 bool isRetired;
216 int sccNum;
217 //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
218 //Instructions defining the corresponding successor.
219 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
220};
221
222template <class BlockT, class InstrT, class RegiT>
223class LandInformation {
224public:
225 BlockT *landBlk;
226 std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
227 //WHILELOOP(thisloop) init before entering
228 //thisloop.
229 std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
230 //WHILELOOP(thisloop) init after entering
231 //thisloop.
232 std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
233 //land block, branch cond on this reg.
234 std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
235 //endif" after ENDLOOP(thisloop) break
236 //outerLoopOf(thisLoop).
237 std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
238 //endif" after ENDLOOP(thisloop) continue on
239 //outerLoopOf(thisLoop).
240 LandInformation() : landBlk(NULL) {}
241};
242
243} //end of namespace llvmCFGStruct
244
245//===----------------------------------------------------------------------===//
246//
247// CFGStructurizer
248//
249//===----------------------------------------------------------------------===//
250
251namespace llvmCFGStruct
252{
253// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
254template<class PassT>
255class CFGStructurizer
256{
257public:
258 typedef enum {
259 Not_SinglePath = 0,
260 SinglePath_InPath = 1,
261 SinglePath_NotInPath = 2
262 } PathToKind;
263
264public:
265 typedef typename PassT::InstructionType InstrT;
266 typedef typename PassT::FunctionType FuncT;
267 typedef typename PassT::DominatortreeType DomTreeT;
268 typedef typename PassT::PostDominatortreeType PostDomTreeT;
269 typedef typename PassT::DomTreeNodeType DomTreeNodeT;
270 typedef typename PassT::LoopinfoType LoopInfoT;
271
272 typedef GraphTraits<FuncT *> FuncGTraits;
273 //typedef FuncGTraits::nodes_iterator BlockIterator;
274 typedef typename FuncT::iterator BlockIterator;
275
276 typedef typename FuncGTraits::NodeType BlockT;
277 typedef GraphTraits<BlockT *> BlockGTraits;
278 typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
279 //typedef BlockGTraits::succ_iterator InstructionIterator;
280 typedef typename BlockT::iterator InstrIterator;
281
282 typedef CFGStructTraits<PassT> CFGTraits;
283 typedef BlockInformation<InstrT> BlockInfo;
284 typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
285
286 typedef int RegiT;
287 typedef typename PassT::LoopType LoopT;
288 typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
289 typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
290 //landing info for loop break
291 typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
292
293public:
294 CFGStructurizer();
295 ~CFGStructurizer();
296
297 /// Perform the CFG structurization
298 bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
299
300 /// Perform the CFG preparation
301 bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
302
303private:
304 void reversePredicateSetter(typename BlockT::iterator);
305 void orderBlocks();
306 void printOrderedBlocks(llvm::raw_ostream &OS);
307 int patternMatch(BlockT *CurBlock);
308 int patternMatchGroup(BlockT *CurBlock);
309
310 int serialPatternMatch(BlockT *CurBlock);
311 int ifPatternMatch(BlockT *CurBlock);
312 int switchPatternMatch(BlockT *CurBlock);
313 int loopendPatternMatch(BlockT *CurBlock);
314 int loopPatternMatch(BlockT *CurBlock);
315
316 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
317 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
318 //int loopWithoutBreak(BlockT *);
319
320 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
321 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
322 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
323 BlockT *ContBlock, LoopT *contLoop);
324 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
325 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
326 BlockT *FalseBlock);
327 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
328 BlockT *FalseBlock);
329 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
330 BlockT *FalseBlock, BlockT **LandBlockPtr);
331 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
332 BlockT *FalseBlock, BlockT *LandBlock,
333 bool Detail = false);
334 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
335 bool AllowSideEntry = true);
336 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
337 bool AllowSideEntry = true);
338 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
339 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
340
341 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
342 BlockT *TrueBlock, BlockT *FalseBlock,
343 BlockT *LandBlock);
344 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
345 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
346 BlockT *ExitLandBlock, RegiT SetReg);
347 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
348 RegiT SetReg);
349 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
350 std::set<BlockT*> &ExitBlockSet,
351 BlockT *ExitLandBlk);
352 BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
353 BlockTSmallerVector &ExitingBlocks,
354 BlockTSmallerVector &ExitBlocks);
355 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
356 void removeUnconditionalBranch(BlockT *SrcBlock);
357 void removeRedundantConditionalBranch(BlockT *SrcBlock);
358 void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
359
360 void removeSuccessor(BlockT *SrcBlock);
361 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
362 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
363
364 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
365 InstrIterator InsertPos);
366
367 void recordSccnum(BlockT *SrcBlock, int SCCNum);
368 int getSCCNum(BlockT *srcBlk);
369
370 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
371 bool isRetiredBlock(BlockT *SrcBlock);
372 bool isActiveLoophead(BlockT *CurBlock);
373 bool needMigrateBlock(BlockT *Block);
374
375 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
376 BlockTSmallerVector &exitBlocks,
377 std::set<BlockT*> &ExitBlockSet);
378 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
379 BlockT *getLoopLandBlock(LoopT *LoopRep);
380 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
381
382 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
383 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
384 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
385 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
386 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
387
388 bool hasBackEdge(BlockT *curBlock);
389 unsigned getLoopDepth (LoopT *LoopRep);
390 int countActiveBlock(
391 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
392 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
393 BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
394 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
395
396private:
397 DomTreeT *domTree;
398 PostDomTreeT *postDomTree;
399 LoopInfoT *loopInfo;
400 PassT *passRep;
401 FuncT *funcRep;
402
403 BlockInfoMap blockInfoMap;
404 LoopLandInfoMap loopLandInfoMap;
405 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
406 const AMDGPURegisterInfo *TRI;
407
408}; //template class CFGStructurizer
409
410template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
411 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
412}
413
414template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
415 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
416 E = blockInfoMap.end(); I != E; ++I) {
417 delete I->second;
418 }
419}
420
421template<class PassT>
422bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
423 const AMDGPURegisterInfo * tri) {
424 passRep = &pass;
425 funcRep = &func;
426 TRI = tri;
427
428 bool changed = false;
429 //func.RenumberBlocks();
430
431 //to do, if not reducible flow graph, make it so ???
432
433 if (DEBUGME) {
434 errs() << "AMDGPUCFGStructurizer::prepare\n";
435 //func.viewCFG();
436 //func.viewCFGOnly();
437 //func.dump();
438 }
439
440 //FIXME: gcc complains on this.
441 //domTree = &pass.getAnalysis<DomTreeT>();
442 //domTree = CFGTraits::getDominatorTree(pass);
443 //if (DEBUGME) {
444 // domTree->print(errs());
445 //}
446
447 //FIXME: gcc complains on this.
448 //domTree = &pass.getAnalysis<DomTreeT>();
449 //postDomTree = CFGTraits::getPostDominatorTree(pass);
450 //if (DEBUGME) {
451 // postDomTree->print(errs());
452 //}
453
454 //FIXME: gcc complains on this.
455 //loopInfo = &pass.getAnalysis<LoopInfoT>();
456 loopInfo = CFGTraits::getLoopInfo(pass);
457 if (DEBUGME) {
458 errs() << "LoopInfo:\n";
459 PrintLoopinfo(*loopInfo, errs());
460 }
461
462 orderBlocks();
463 if (DEBUGME) {
464 errs() << "Ordered blocks:\n";
465 printOrderedBlocks(errs());
466 }
467
468 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
469
470 for (typename LoopInfoT::iterator iter = loopInfo->begin(),
471 iterEnd = loopInfo->end();
472 iter != iterEnd; ++iter) {
473 LoopT* loopRep = (*iter);
474 BlockTSmallerVector exitingBlks;
475 loopRep->getExitingBlocks(exitingBlks);
476
477 if (exitingBlks.size() == 0) {
478 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
479 if (dummyExitBlk != NULL)
480 retBlks.push_back(dummyExitBlk);
481 }
482 }
483
484 // Remove unconditional branch instr.
485 // Add dummy exit block iff there are multiple returns.
486
487 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
488 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
489 iterBlk != iterEndBlk;
490 ++iterBlk) {
491 BlockT *curBlk = *iterBlk;
492 removeUnconditionalBranch(curBlk);
493 removeRedundantConditionalBranch(curBlk);
494 if (CFGTraits::isReturnBlock(curBlk)) {
495 retBlks.push_back(curBlk);
496 }
497 assert(curBlk->succ_size() <= 2);
498 //assert(curBlk->size() > 0);
499 //removeEmptyBlock(curBlk) ??
500 } //for
501
502 if (retBlks.size() >= 2) {
503 addDummyExitBlock(retBlks);
504 changed = true;
505 }
506
507 return changed;
508} //CFGStructurizer::prepare
509
510template<class PassT>
511bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
512 const AMDGPURegisterInfo * tri) {
513 passRep = &pass;
514 funcRep = &func;
515 TRI = tri;
516
517 //func.RenumberBlocks();
518
519 //Assume reducible CFG...
520 if (DEBUGME) {
521 errs() << "AMDGPUCFGStructurizer::run\n";
522 //errs() << func.getFunction()->getNameStr() << "\n";
523 func.viewCFG();
524 //func.viewCFGOnly();
525 //func.dump();
526 }
527
528#if 1
529 //FIXME: gcc complains on this.
530 //domTree = &pass.getAnalysis<DomTreeT>();
531 domTree = CFGTraits::getDominatorTree(pass);
532 if (DEBUGME) {
533 domTree->print(errs(), (const llvm::Module*)0);
534 }
535#endif
536
537 //FIXME: gcc complains on this.
538 //domTree = &pass.getAnalysis<DomTreeT>();
539 postDomTree = CFGTraits::getPostDominatorTree(pass);
540 if (DEBUGME) {
541 postDomTree->print(errs());
542 }
543
544 //FIXME: gcc complains on this.
545 //loopInfo = &pass.getAnalysis<LoopInfoT>();
546 loopInfo = CFGTraits::getLoopInfo(pass);
547 if (DEBUGME) {
548 errs() << "LoopInfo:\n";
549 PrintLoopinfo(*loopInfo, errs());
550 }
551
552 orderBlocks();
553//#define STRESSTEST
554#ifdef STRESSTEST
555 //Use the worse block ordering to test the algorithm.
556 ReverseVector(orderedBlks);
557#endif
558
559 if (DEBUGME) {
560 errs() << "Ordered blocks:\n";
561 printOrderedBlocks(errs());
562 }
563 int numIter = 0;
564 bool finish = false;
565 BlockT *curBlk;
566 bool makeProgress = false;
567 int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
568 orderedBlks.end());
569
570 do {
571 ++numIter;
572 if (DEBUGME) {
573 errs() << "numIter = " << numIter
574 << ", numRemaintedBlk = " << numRemainedBlk << "\n";
575 }
576
577 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
578 iterBlk = orderedBlks.begin();
579 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
580 iterBlkEnd = orderedBlks.end();
581
582 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
583 sccBeginIter = iterBlk;
584 BlockT *sccBeginBlk = NULL;
585 int sccNumBlk = 0; // The number of active blocks, init to a
586 // maximum possible number.
587 int sccNumIter; // Number of iteration in this SCC.
588
589 while (iterBlk != iterBlkEnd) {
590 curBlk = *iterBlk;
591
592 if (sccBeginBlk == NULL) {
593 sccBeginIter = iterBlk;
594 sccBeginBlk = curBlk;
595 sccNumIter = 0;
596 sccNumBlk = numRemainedBlk; // Init to maximum possible number.
597 if (DEBUGME) {
598 errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
599 errs() << "\n";
600 }
601 }
602
603 if (!isRetiredBlock(curBlk)) {
604 patternMatch(curBlk);
605 }
606
607 ++iterBlk;
608
609 bool contNextScc = true;
610 if (iterBlk == iterBlkEnd
611 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
612 // Just finish one scc.
613 ++sccNumIter;
614 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
615 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
616 if (DEBUGME) {
617 errs() << "Can't reduce SCC " << getSCCNum(curBlk)
618 << ", sccNumIter = " << sccNumIter;
619 errs() << "doesn't make any progress\n";
620 }
621 contNextScc = true;
622 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
623 sccNumBlk = sccRemainedNumBlk;
624 iterBlk = sccBeginIter;
625 contNextScc = false;
626 if (DEBUGME) {
627 errs() << "repeat processing SCC" << getSCCNum(curBlk)
628 << "sccNumIter = " << sccNumIter << "\n";
629 func.viewCFG();
630 //func.viewCFGOnly();
631 }
632 } else {
633 // Finish the current scc.
634 contNextScc = true;
635 }
636 } else {
637 // Continue on next component in the current scc.
638 contNextScc = false;
639 }
640
641 if (contNextScc) {
642 sccBeginBlk = NULL;
643 }
644 } //while, "one iteration" over the function.
645
646 BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
647 if (entryBlk->succ_size() == 0) {
648 finish = true;
649 if (DEBUGME) {
650 errs() << "Reduce to one block\n";
651 }
652 } else {
653 int newnumRemainedBlk
654 = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
655 // consider cloned blocks ??
656 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
657 makeProgress = true;
658 numRemainedBlk = newnumRemainedBlk;
659 } else {
660 makeProgress = false;
661 if (DEBUGME) {
662 errs() << "No progress\n";
663 }
664 }
665 }
666 } while (!finish && makeProgress);
667
668 // Misc wrap up to maintain the consistency of the Function representation.
669 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
670
671 // Detach retired Block, release memory.
672 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
673 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
674 if ((*iterMap).second && (*iterMap).second->isRetired) {
675 assert(((*iterMap).first)->getNumber() != -1);
676 if (DEBUGME) {
677 errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
678 }
679 (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
680 }
681 delete (*iterMap).second;
682 }
683 blockInfoMap.clear();
684
685 // clear loopLandInfoMap
686 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
687 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
688 delete (*iterMap).second;
689 }
690 loopLandInfoMap.clear();
691
692 if (DEBUGME) {
693 func.viewCFG();
694 //func.dump();
695 }
696
697 if (!finish) {
698 assert(!"IRREDUCIBL_CF");
699 }
700
701 return true;
702} //CFGStructurizer::run
703
704/// Print the ordered Blocks.
705///
706template<class PassT>
707void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
708 size_t i = 0;
709 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
710 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
711 iterBlk != iterBlkEnd;
712 ++iterBlk, ++i) {
713 os << "BB" << (*iterBlk)->getNumber();
714 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
715 if (i != 0 && i % 10 == 0) {
716 os << "\n";
717 } else {
718 os << " ";
719 }
720 }
721} //printOrderedBlocks
722
723/// Compute the reversed DFS post order of Blocks
724///
725template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
726 int sccNum = 0;
727 BlockT *bb;
728 for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
729 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
730 std::vector<BlockT *> &sccNext = *sccIter;
731 for (typename std::vector<BlockT *>::const_iterator
732 blockIter = sccNext.begin(), blockEnd = sccNext.end();
733 blockIter != blockEnd; ++blockIter) {
734 bb = *blockIter;
735 orderedBlks.push_back(bb);
736 recordSccnum(bb, sccNum);
737 }
738 }
739
740 //walk through all the block in func to check for unreachable
741 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
742 blockEnd1 = FuncGTraits::nodes_end(funcRep);
743 blockIter1 != blockEnd1; ++blockIter1) {
744 BlockT *bb = &(*blockIter1);
745 sccNum = getSCCNum(bb);
746 if (sccNum == INVALIDSCCNUM) {
747 errs() << "unreachable block BB" << bb->getNumber() << "\n";
748 }
749 } //end of for
750} //orderBlocks
751
752template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
753 int numMatch = 0;
754 int curMatch;
755
756 if (DEBUGME) {
757 errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
758 }
759
760 while ((curMatch = patternMatchGroup(curBlk)) > 0) {
761 numMatch += curMatch;
762 }
763
764 if (DEBUGME) {
765 errs() << "End patternMatch BB" << curBlk->getNumber()
766 << ", numMatch = " << numMatch << "\n";
767 }
768
769 return numMatch;
770} //patternMatch
771
772template<class PassT>
773int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
774 int numMatch = 0;
775 numMatch += serialPatternMatch(curBlk);
776 numMatch += ifPatternMatch(curBlk);
777 //numMatch += switchPatternMatch(curBlk);
778 numMatch += loopendPatternMatch(curBlk);
779 numMatch += loopPatternMatch(curBlk);
780 return numMatch;
781}//patternMatchGroup
782
783template<class PassT>
784int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
785 if (curBlk->succ_size() != 1) {
786 return 0;
787 }
788
789 BlockT *childBlk = *curBlk->succ_begin();
790 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
791 return 0;
792 }
793
794 mergeSerialBlock(curBlk, childBlk);
795 ++numSerialPatternMatch;
796 return 1;
797} //serialPatternMatch
798
799template<class PassT>
800int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
801 //two edges
802 if (curBlk->succ_size() != 2) {
803 return 0;
804 }
805
806 if (hasBackEdge(curBlk)) {
807 return 0;
808 }
809
810 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
811 if (branchInstr == NULL) {
812 return 0;
813 }
814
815 assert(CFGTraits::isCondBranch(branchInstr));
816
817 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
818 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
819 BlockT *landBlk;
820 int cloned = 0;
821
822 // TODO: Simplify
823 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
824 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
825 landBlk = *trueBlk->succ_begin();
826 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
827 landBlk = NULL;
828 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
829 landBlk = falseBlk;
830 falseBlk = NULL;
831 } else if (falseBlk->succ_size() == 1
832 && *falseBlk->succ_begin() == trueBlk) {
833 landBlk = trueBlk;
834 trueBlk = NULL;
835 } else if (falseBlk->succ_size() == 1
836 && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
837 landBlk = *falseBlk->succ_begin();
838 } else if (trueBlk->succ_size() == 1
839 && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
840 landBlk = *trueBlk->succ_begin();
841 } else {
842 return handleJumpintoIf(curBlk, trueBlk, falseBlk);
843 }
844
845 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
846 // new BB created for landBlk==NULL may introduce new challenge to the
847 // reduction process.
848 if (landBlk != NULL &&
849 ((trueBlk && trueBlk->pred_size() > 1)
850 || (falseBlk && falseBlk->pred_size() > 1))) {
851 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
852 }
853
854 if (trueBlk && trueBlk->pred_size() > 1) {
855 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
856 ++cloned;
857 }
858
859 if (falseBlk && falseBlk->pred_size() > 1) {
860 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
861 ++cloned;
862 }
863
864 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
865
866 ++numIfPatternMatch;
867
868 numClonedBlock += cloned;
869
870 return 1 + cloned;
871} //ifPatternMatch
872
873template<class PassT>
874int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
875 return 0;
876} //switchPatternMatch
877
878template<class PassT>
879int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
880 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
881 typename std::vector<LoopT *> nestedLoops;
882 while (loopRep) {
883 nestedLoops.push_back(loopRep);
884 loopRep = loopRep->getParentLoop();
885 }
886
887 if (nestedLoops.size() == 0) {
888 return 0;
889 }
890
891 // Process nested loop outside->inside, so "continue" to a outside loop won't
892 // be mistaken as "break" of the current loop.
893 int num = 0;
894 for (typename std::vector<LoopT *>::reverse_iterator
895 iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
896 iter != iterEnd; ++iter) {
897 loopRep = *iter;
898
899 if (getLoopLandBlock(loopRep) != NULL) {
900 continue;
901 }
902
903 BlockT *loopHeader = loopRep->getHeader();
904
905 int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
906
907 if (numBreak == -1) {
908 break;
909 }
910
911 int numCont = loopcontPatternMatch(loopRep, loopHeader);
912 num += numBreak + numCont;
913 }
914
915 return num;
916} //loopendPatternMatch
917
918template<class PassT>
919int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
920 if (curBlk->succ_size() != 0) {
921 return 0;
922 }
923
924 int numLoop = 0;
925 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
926 while (loopRep && loopRep->getHeader() == curBlk) {
927 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
928 if (loopLand) {
929 BlockT *landBlk = loopLand->landBlk;
930 assert(landBlk);
931 if (!isRetiredBlock(landBlk)) {
932 mergeLooplandBlock(curBlk, loopLand);
933 ++numLoop;
934 }
935 }
936 loopRep = loopRep->getParentLoop();
937 }
938
939 numLoopPatternMatch += numLoop;
940
941 return numLoop;
942} //loopPatternMatch
943
944template<class PassT>
945int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
946 BlockT *loopHeader) {
947 BlockTSmallerVector exitingBlks;
948 loopRep->getExitingBlocks(exitingBlks);
949
950 if (DEBUGME) {
951 errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
952 }
953
954 if (exitingBlks.size() == 0) {
955 setLoopLandBlock(loopRep);
956 return 0;
957 }
958
959 // Compute the corresponding exitBlks and exit block set.
960 BlockTSmallerVector exitBlks;
961 std::set<BlockT *> exitBlkSet;
962 for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
963 iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
964 BlockT *exitingBlk = *iter;
965 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
966 exitBlks.push_back(exitBlk);
967 exitBlkSet.insert(exitBlk); //non-duplicate insert
968 }
969
970 assert(exitBlkSet.size() > 0);
971 assert(exitBlks.size() == exitingBlks.size());
972
973 if (DEBUGME) {
974 errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
975 }
976
977 // Find exitLandBlk.
978 BlockT *exitLandBlk = NULL;
979 int numCloned = 0;
980 int numSerial = 0;
981
982 if (exitBlkSet.size() == 1)
983 {
984 exitLandBlk = *exitBlkSet.begin();
985 } else {
986 exitLandBlk = findNearestCommonPostDom(exitBlkSet);
987
988 if (exitLandBlk == NULL) {
989 return -1;
990 }
991
992 bool allInPath = true;
993 bool allNotInPath = true;
994 for (typename std::set<BlockT*>::const_iterator
995 iter = exitBlkSet.begin(),
996 iterEnd = exitBlkSet.end();
997 iter != iterEnd; ++iter) {
998 BlockT *exitBlk = *iter;
999
1000 PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
1001 if (DEBUGME) {
1002 errs() << "BB" << exitBlk->getNumber()
1003 << " to BB" << exitLandBlk->getNumber() << " PathToKind="
1004 << pathKind << "\n";
1005 }
1006
1007 allInPath = allInPath && (pathKind == SinglePath_InPath);
1008 allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
1009
1010 if (!allInPath && !allNotInPath) {
1011 if (DEBUGME) {
1012 errs() << "singlePath check fail\n";
1013 }
1014 return -1;
1015 }
1016 } // check all exit blocks
1017
1018 if (allNotInPath) {
1019#if 1
1020
1021 // TODO: Simplify, maybe separate function?
1022 //funcRep->viewCFG();
1023 LoopT *parentLoopRep = loopRep->getParentLoop();
1024 BlockT *parentLoopHeader = NULL;
1025 if (parentLoopRep)
1026 parentLoopHeader = parentLoopRep->getHeader();
1027
1028 if (exitLandBlk == parentLoopHeader &&
1029 (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
1030 loopRep,
1031 exitBlkSet,
1032 exitLandBlk)) != NULL) {
1033 if (DEBUGME) {
1034 errs() << "relocateLoopcontBlock success\n";
1035 }
1036 } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
1037 exitingBlks,
1038 exitBlks)) != NULL) {
1039 if (DEBUGME) {
1040 errs() << "insertEndbranchBlock success\n";
1041 }
1042 } else {
1043 if (DEBUGME) {
1044 errs() << "loop exit fail\n";
1045 }
1046 return -1;
1047 }
1048#else
1049 return -1;
1050#endif
1051 }
1052
1053 // Handle side entry to exit path.
1054 exitBlks.clear();
1055 exitBlkSet.clear();
1056 for (typename BlockTSmallerVector::iterator iterExiting =
1057 exitingBlks.begin(),
1058 iterExitingEnd = exitingBlks.end();
1059 iterExiting != iterExitingEnd; ++iterExiting) {
1060 BlockT *exitingBlk = *iterExiting;
1061 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
1062 BlockT *newExitBlk = exitBlk;
1063
1064 if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
1065 newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
1066 ++numCloned;
1067 }
1068
1069 numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
1070
1071 exitBlks.push_back(newExitBlk);
1072 exitBlkSet.insert(newExitBlk);
1073 }
1074
1075 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1076 iterExitEnd = exitBlks.end();
1077 iterExit != iterExitEnd; ++iterExit) {
1078 BlockT *exitBlk = *iterExit;
1079 numSerial += serialPatternMatch(exitBlk);
1080 }
1081
1082 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1083 iterExitEnd = exitBlks.end();
1084 iterExit != iterExitEnd; ++iterExit) {
1085 BlockT *exitBlk = *iterExit;
1086 if (exitBlk->pred_size() > 1) {
1087 if (exitBlk != exitLandBlk) {
1088 return -1;
1089 }
1090 } else {
1091 if (exitBlk != exitLandBlk &&
1092 (exitBlk->succ_size() != 1 ||
1093 *exitBlk->succ_begin() != exitLandBlk)) {
1094 return -1;
1095 }
1096 }
1097 }
1098 } // else
1099
1100 // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
1101 exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
1102
1103 // Fold break into the breaking block. Leverage across level breaks.
1104 assert(exitingBlks.size() == exitBlks.size());
1105 for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
1106 iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
1107 iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
1108 BlockT *exitBlk = *iterExit;
1109 BlockT *exitingBlk = *iterExiting;
1110 assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
1111 LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
1112 handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
1113 }
1114
1115 int numBreak = static_cast<int>(exitingBlks.size());
1116 numLoopbreakPatternMatch += numBreak;
1117 numClonedBlock += numCloned;
1118 return numBreak + numSerial + numCloned;
1119} //loopbreakPatternMatch
1120
1121template<class PassT>
1122int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
1123 BlockT *loopHeader) {
1124 int numCont = 0;
1125 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
1126 for (typename InvBlockGTraits::ChildIteratorType iter =
1127 InvBlockGTraits::child_begin(loopHeader),
1128 iterEnd = InvBlockGTraits::child_end(loopHeader);
1129 iter != iterEnd; ++iter) {
1130 BlockT *curBlk = *iter;
1131 if (loopRep->contains(curBlk)) {
1132 handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
1133 loopHeader, loopRep);
1134 contBlk.push_back(curBlk);
1135 ++numCont;
1136 }
1137 }
1138
1139 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
1140 iter = contBlk.begin(), iterEnd = contBlk.end();
1141 iter != iterEnd; ++iter) {
1142 (*iter)->removeSuccessor(loopHeader);
1143 }
1144
1145 numLoopcontPatternMatch += numCont;
1146
1147 return numCont;
1148} //loopcontPatternMatch
1149
1150
1151template<class PassT>
1152bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1153 BlockT *src2Blk) {
1154 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1155 // same loop with LoopLandInfo without explicitly keeping track of
1156 // loopContBlks and loopBreakBlks, this is a method to get the information.
1157 //
1158 if (src1Blk->succ_size() == 0) {
1159 LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1160 if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1161 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1162 if (theEntry != NULL) {
1163 if (DEBUGME) {
1164 errs() << "isLoopContBreakBlock yes src1 = BB"
1165 << src1Blk->getNumber()
1166 << " src2 = BB" << src2Blk->getNumber() << "\n";
1167 }
1168 return true;
1169 }
1170 }
1171 }
1172 return false;
1173} //isSameloopDetachedContbreak
1174
1175template<class PassT>
1176int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1177 BlockT *trueBlk,
1178 BlockT *falseBlk) {
1179 int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1180 if (num == 0) {
1181 if (DEBUGME) {
1182 errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1183 }
1184 num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1185 }
1186 return num;
1187}
1188
1189template<class PassT>
1190int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1191 BlockT *trueBlk,
1192 BlockT *falseBlk) {
1193 int num = 0;
1194 BlockT *downBlk;
1195
1196 //trueBlk could be the common post dominator
1197 downBlk = trueBlk;
1198
1199 if (DEBUGME) {
1200 errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1201 << " true = BB" << trueBlk->getNumber()
1202 << ", numSucc=" << trueBlk->succ_size()
1203 << " false = BB" << falseBlk->getNumber() << "\n";
1204 }
1205
1206 while (downBlk) {
1207 if (DEBUGME) {
1208 errs() << "check down = BB" << downBlk->getNumber();
1209 }
1210
1211 if (//postDomTree->dominates(downBlk, falseBlk) &&
1212 singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1213 if (DEBUGME) {
1214 errs() << " working\n";
1215 }
1216
1217 num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1218 num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1219
1220 numClonedBlock += num;
1221 num += serialPatternMatch(*headBlk->succ_begin());
1222 num += serialPatternMatch(*(++headBlk->succ_begin()));
1223 num += ifPatternMatch(headBlk);
1224 assert(num > 0); //
1225
1226 break;
1227 }
1228 if (DEBUGME) {
1229 errs() << " not working\n";
1230 }
1231 downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1232 } // walk down the postDomTree
1233
1234 return num;
1235} //handleJumpintoIf
1236
1237template<class PassT>
1238void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1239 BlockT *trueBlk,
1240 BlockT *falseBlk,
1241 BlockT *landBlk,
1242 bool detail) {
1243 errs() << "head = BB" << headBlk->getNumber()
1244 << " size = " << headBlk->size();
1245 if (detail) {
1246 errs() << "\n";
1247 headBlk->print(errs());
1248 errs() << "\n";
1249 }
1250
1251 if (trueBlk) {
1252 errs() << ", true = BB" << trueBlk->getNumber() << " size = "
1253 << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1254 if (detail) {
1255 errs() << "\n";
1256 trueBlk->print(errs());
1257 errs() << "\n";
1258 }
1259 }
1260 if (falseBlk) {
1261 errs() << ", false = BB" << falseBlk->getNumber() << " size = "
1262 << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1263 if (detail) {
1264 errs() << "\n";
1265 falseBlk->print(errs());
1266 errs() << "\n";
1267 }
1268 }
1269 if (landBlk) {
1270 errs() << ", land = BB" << landBlk->getNumber() << " size = "
1271 << landBlk->size() << " numPred = " << landBlk->pred_size();
1272 if (detail) {
1273 errs() << "\n";
1274 landBlk->print(errs());
1275 errs() << "\n";
1276 }
1277 }
1278
1279 errs() << "\n";
1280} //showImproveSimpleJumpintoIf
1281
1282template<class PassT>
1283int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1284 BlockT *trueBlk,
1285 BlockT *falseBlk,
1286 BlockT **plandBlk) {
1287 bool migrateTrue = false;
1288 bool migrateFalse = false;
1289
1290 BlockT *landBlk = *plandBlk;
1291
1292 assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1293 && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1294
1295 if (trueBlk == falseBlk) {
1296 return 0;
1297 }
1298
1299#if 0
1300 if (DEBUGME) {
1301 errs() << "improveSimpleJumpintoIf: ";
1302 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1303 }
1304#endif
1305
1306 // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
1307 // May consider the # landBlk->pred_size() as it represents the number of
1308 // assignment initReg = .. needed to insert.
1309 migrateTrue = needMigrateBlock(trueBlk);
1310 migrateFalse = needMigrateBlock(falseBlk);
1311
1312 if (!migrateTrue && !migrateFalse) {
1313 return 0;
1314 }
1315
1316 // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1317 // have more than one predecessors. without doing this, its predecessor
1318 // rather than headBlk will have undefined value in initReg.
1319 if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1320 migrateTrue = true;
1321 }
1322 if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1323 migrateFalse = true;
1324 }
1325
1326 if (DEBUGME) {
1327 errs() << "before improveSimpleJumpintoIf: ";
1328 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1329 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1330 }
1331
1332 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1333 //
1334 // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1335 // {initReg = 0; org falseBlk branch }
1336 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1337 // => org landBlk
1338 // if landBlk->pred_size() > 2, put the about if-else inside
1339 // if (initReg !=2) {...}
1340 //
1341 // add initReg = initVal to headBlk
1342
1343 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1344 unsigned initReg =
1345 funcRep->getRegInfo().createVirtualRegister(I32RC);
1346 if (!migrateTrue || !migrateFalse) {
1347 int initVal = migrateTrue ? 0 : 1;
1348 CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1349 }
1350
1351 int numNewBlk = 0;
1352
1353 if (landBlk == NULL) {
1354 landBlk = funcRep->CreateMachineBasicBlock();
1355 funcRep->push_back(landBlk); //insert to function
1356
1357 if (trueBlk) {
1358 trueBlk->addSuccessor(landBlk);
1359 } else {
1360 headBlk->addSuccessor(landBlk);
1361 }
1362
1363 if (falseBlk) {
1364 falseBlk->addSuccessor(landBlk);
1365 } else {
1366 headBlk->addSuccessor(landBlk);
1367 }
1368
1369 numNewBlk ++;
1370 }
1371
1372 bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1373
1374 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1375 typename BlockT::iterator insertPos =
1376 CFGTraits::getInstrPos
1377 (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1378
1379 if (landBlkHasOtherPred) {
1380 unsigned immReg =
1381 funcRep->getRegInfo().createVirtualRegister(I32RC);
1382 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1383 unsigned cmpResReg =
1384 funcRep->getRegInfo().createVirtualRegister(I32RC);
1385
1386 CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1387 initReg, immReg);
1388 CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1389 AMDGPU::IF_LOGICALZ_i32, passRep,
1390 cmpResReg, DebugLoc());
1391 }
1392
1393 CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32,
1394 passRep, initReg, DebugLoc());
1395
1396 if (migrateTrue) {
1397 migrateInstruction(trueBlk, landBlk, insertPos);
1398 // need to uncondionally insert the assignment to ensure a path from its
1399 // predecessor rather than headBlk has valid value in initReg if
1400 // (initVal != 1).
1401 CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1402 }
1403 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1404
1405 if (migrateFalse) {
1406 migrateInstruction(falseBlk, landBlk, insertPos);
1407 // need to uncondionally insert the assignment to ensure a path from its
1408 // predecessor rather than headBlk has valid value in initReg if
1409 // (initVal != 0)
1410 CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1411 }
1412 //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1413
1414 if (landBlkHasOtherPred) {
1415 // add endif
1416 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1417
1418 // put initReg = 2 to other predecessors of landBlk
1419 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1420 predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1421 ++predIter) {
1422 BlockT *curBlk = *predIter;
1423 if (curBlk != trueBlk && curBlk != falseBlk) {
1424 CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1425 }
1426 } //for
1427 }
1428 if (DEBUGME) {
1429 errs() << "result from improveSimpleJumpintoIf: ";
1430 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1431 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1432 }
1433
1434 // update landBlk
1435 *plandBlk = landBlk;
1436
1437 return numNewBlk;
1438} //improveSimpleJumpintoIf
1439
1440template<class PassT>
1441void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1442 LoopT *exitingLoop,
1443 BlockT *exitBlk,
1444 LoopT *exitLoop,
1445 BlockT *landBlk) {
1446 if (DEBUGME) {
1447 errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1448 << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1449 }
1450 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1451
1452 RegiT initReg = INVALIDREGNUM;
1453 if (exitingLoop != exitLoop) {
1454 initReg = static_cast<int>
1455 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1456 assert(initReg != INVALIDREGNUM);
1457 addLoopBreakInitReg(exitLoop, initReg);
1458 while (exitingLoop != exitLoop && exitingLoop) {
1459 addLoopBreakOnReg(exitingLoop, initReg);
1460 exitingLoop = exitingLoop->getParentLoop();
1461 }
1462 assert(exitingLoop == exitLoop);
1463 }
1464
1465 mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1466
1467} //handleLoopbreak
1468
1469template<class PassT>
1470void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1471 LoopT *contingLoop,
1472 BlockT *contBlk,
1473 LoopT *contLoop) {
1474 if (DEBUGME) {
1475 errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1476 << " header = BB" << contBlk->getNumber() << "\n";
1477
1478 errs() << "Trying to continue loop-depth = "
1479 << getLoopDepth(contLoop)
1480 << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1481 }
1482
1483 RegiT initReg = INVALIDREGNUM;
1484 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1485 if (contingLoop != contLoop) {
1486 initReg = static_cast<int>
1487 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1488 assert(initReg != INVALIDREGNUM);
1489 addLoopContInitReg(contLoop, initReg);
1490 while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1491 addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
1492 contingLoop = contingLoop->getParentLoop();
1493 }
1494 assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1495 addLoopContOnReg(contingLoop, initReg);
1496 }
1497
1498 settleLoopcontBlock(contingBlk, contBlk, initReg);
1499 //contingBlk->removeSuccessor(loopHeader);
1500} //handleLoopcontBlock
1501
1502template<class PassT>
1503void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1504 if (DEBUGME) {
1505 errs() << "serialPattern BB" << dstBlk->getNumber()
1506 << " <= BB" << srcBlk->getNumber() << "\n";
1507 }
1508 //removeUnconditionalBranch(dstBlk);
1509 dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
1510
1511 dstBlk->removeSuccessor(srcBlk);
1512 CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1513
1514 removeSuccessor(srcBlk);
1515 retireBlock(dstBlk, srcBlk);
1516} //mergeSerialBlock
1517
1518template<class PassT>
1519void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1520 BlockT *curBlk,
1521 BlockT *trueBlk,
1522 BlockT *falseBlk,
1523 BlockT *landBlk) {
1524 if (DEBUGME) {
1525 errs() << "ifPattern BB" << curBlk->getNumber();
1526 errs() << "{ ";
1527 if (trueBlk) {
1528 errs() << "BB" << trueBlk->getNumber();
1529 }
1530 errs() << " } else ";
1531 errs() << "{ ";
1532 if (falseBlk) {
1533 errs() << "BB" << falseBlk->getNumber();
1534 }
1535 errs() << " }\n ";
1536 errs() << "landBlock: ";
1537 if (landBlk == NULL) {
1538 errs() << "NULL";
1539 } else {
1540 errs() << "BB" << landBlk->getNumber();
1541 }
1542 errs() << "\n";
1543 }
1544
1545 int oldOpcode = branchInstr->getOpcode();
1546 DebugLoc branchDL = branchInstr->getDebugLoc();
1547
1548// transform to
1549// if cond
1550// trueBlk
1551// else
1552// falseBlk
1553// endif
1554// landBlk
1555
1556 typename BlockT::iterator branchInstrPos =
1557 CFGTraits::getInstrPos(curBlk, branchInstr);
1558 CFGTraits::insertCondBranchBefore(branchInstrPos,
1559 CFGTraits::getBranchNzeroOpcode(oldOpcode),
1560 passRep,
1561 branchDL);
1562
1563 if (trueBlk) {
1564 curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
1565 curBlk->removeSuccessor(trueBlk);
1566 if (landBlk && trueBlk->succ_size()!=0) {
1567 trueBlk->removeSuccessor(landBlk);
1568 }
1569 retireBlock(curBlk, trueBlk);
1570 }
1571 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1572
1573 if (falseBlk) {
1574 curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
1575 falseBlk->end());
1576 curBlk->removeSuccessor(falseBlk);
1577 if (landBlk && falseBlk->succ_size() != 0) {
1578 falseBlk->removeSuccessor(landBlk);
1579 }
1580 retireBlock(curBlk, falseBlk);
1581 }
1582 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1583
1584 //curBlk->remove(branchInstrPos);
1585 branchInstr->eraseFromParent();
1586
1587 if (landBlk && trueBlk && falseBlk) {
1588 curBlk->addSuccessor(landBlk);
1589 }
1590
1591} //mergeIfthenelseBlock
1592
1593template<class PassT>
1594void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1595 LoopLandInfo *loopLand) {
1596 BlockT *landBlk = loopLand->landBlk;
1597
1598 if (DEBUGME) {
1599 errs() << "loopPattern header = BB" << dstBlk->getNumber()
1600 << " land = BB" << landBlk->getNumber() << "\n";
1601 }
1602
1603 // Loop contInitRegs are init at the beginning of the loop.
1604 for (typename std::set<RegiT>::const_iterator iter =
1605 loopLand->contInitRegs.begin(),
1606 iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1607 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1608 }
1609
1610 /* we last inserterd the DebugLoc in the
1611 * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1612 * search for the DebugLoc in the that statement.
1613 * if not found, we have to insert the empty/default DebugLoc */
1614 InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1615 DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1616
1617 CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1618 // Loop breakInitRegs are init before entering the loop.
1619 for (typename std::set<RegiT>::const_iterator iter =
1620 loopLand->breakInitRegs.begin(),
1621 iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
1622 {
1623 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1624 }
1625 // Loop endbranchInitRegs are init before entering the loop.
1626 for (typename std::set<RegiT>::const_iterator iter =
1627 loopLand->endbranchInitRegs.begin(),
1628 iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1629 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1630 }
1631
1632 /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1633 * search for the DebugLoc in the continue statement.
1634 * if not found, we have to insert the empty/default DebugLoc */
1635 InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1636 DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1637
1638 CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1639 // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1640 // loop.
1641 for (typename std::set<RegiT>::const_iterator iter =
1642 loopLand->breakOnRegs.begin(),
1643 iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1644 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep,
1645 *iter);
1646 }
1647
1648 // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1649 // loop.
1650 for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1651 iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1652 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1653 passRep, *iter);
1654 }
1655
1656 dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1657
1658 for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1659 iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1660 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
1661 }
1662
1663 removeSuccessor(landBlk);
1664 retireBlock(dstBlk, landBlk);
1665} //mergeLooplandBlock
1666
1667template<class PassT>
1668void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
1669{
1670 while (I--) {
1671 if (I->getOpcode() == AMDGPU::PRED_X) {
1672 switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
1673 case OPCODE_IS_ZERO_INT:
1674 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
1675 return;
1676 case OPCODE_IS_NOT_ZERO_INT:
1677 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
1678 return;
1679 case OPCODE_IS_ZERO:
1680 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
1681 return;
1682 case OPCODE_IS_NOT_ZERO:
1683 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
1684 return;
1685 default:
1686 assert(0 && "PRED_X Opcode invalid!");
1687 }
1688 }
1689 }
1690}
1691
1692template<class PassT>
1693void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1694 BlockT *exitBlk,
1695 BlockT *exitLandBlk,
1696 RegiT setReg) {
1697 if (DEBUGME) {
1698 errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1699 << " exit = BB" << exitBlk->getNumber()
1700 << " land = BB" << exitLandBlk->getNumber() << "\n";
1701 }
1702
1703 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1704 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1705
1706 DebugLoc DL = branchInstr->getDebugLoc();
1707
1708 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1709 int oldOpcode = branchInstr->getOpcode();
1710
1711 // transform exitingBlk to
1712 // if ( ) {
1713 // exitBlk (if exitBlk != exitLandBlk)
1714 // setReg = 1
1715 // break
1716 // }endif
1717 // successor = {orgSuccessor(exitingBlk) - exitBlk}
1718
1719 typename BlockT::iterator branchInstrPos =
1720 CFGTraits::getInstrPos(exitingBlk, branchInstr);
1721
1722 if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1723 //break_logical
1724
1725 if (trueBranch != exitBlk) {
1726 reversePredicateSetter(branchInstrPos);
1727 }
1728 int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
1729 CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1730 } else {
1731 if (trueBranch != exitBlk) {
1732 reversePredicateSetter(branchInstr);
1733 }
1734 int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
1735 CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1736 if (exitBlk != exitLandBlk) {
1737 //splice is insert-before ...
1738 exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1739 exitBlk->end());
1740 }
1741 if (setReg != INVALIDREGNUM) {
1742 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1743 }
1744 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1745 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1746 } //if_logical
1747
1748 //now branchInst can be erase safely
1749 //exitingBlk->eraseFromParent(branchInstr);
1750 branchInstr->eraseFromParent();
1751
1752 //now take care of successors, retire blocks
1753 exitingBlk->removeSuccessor(exitBlk);
1754 if (exitBlk != exitLandBlk) {
1755 //splice is insert-before ...
1756 exitBlk->removeSuccessor(exitLandBlk);
1757 retireBlock(exitingBlk, exitBlk);
1758 }
1759
1760} //mergeLoopbreakBlock
1761
1762template<class PassT>
1763void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1764 BlockT *contBlk,
1765 RegiT setReg) {
1766 if (DEBUGME) {
1767 errs() << "settleLoopcontBlock conting = BB"
1768 << contingBlk->getNumber()
1769 << ", cont = BB" << contBlk->getNumber() << "\n";
1770 }
1771
1772 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1773 if (branchInstr) {
1774 assert(CFGTraits::isCondBranch(branchInstr));
1775 typename BlockT::iterator branchInstrPos =
1776 CFGTraits::getInstrPos(contingBlk, branchInstr);
1777 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1778 int oldOpcode = branchInstr->getOpcode();
1779 DebugLoc DL = branchInstr->getDebugLoc();
1780
1781 // transform contingBlk to
1782 // if () {
1783 // move instr after branchInstr
1784 // continue
1785 // or
1786 // setReg = 1
1787 // break
1788 // }endif
1789 // successor = {orgSuccessor(contingBlk) - loopHeader}
1790
1791 bool useContinueLogical =
1792 (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1793
1794 if (useContinueLogical == false)
1795 {
1796 int branchOpcode =
1797 trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1798 : CFGTraits::getBranchZeroOpcode(oldOpcode);
1799
1800 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1801
1802 if (setReg != INVALIDREGNUM) {
1803 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1804 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1805 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1806 } else {
1807 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1808 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1809 }
1810
1811 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1812 } else {
1813 int branchOpcode =
1814 trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1815 : CFGTraits::getContinueZeroOpcode(oldOpcode);
1816
1817 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1818 }
1819
1820 //contingBlk->eraseFromParent(branchInstr);
1821 branchInstr->eraseFromParent();
1822 } else {
1823 /* if we've arrived here then we've already erased the branch instruction
1824 * travel back up the basic block to see the last reference of our debug location
1825 * we've just inserted that reference here so it should be representative */
1826 if (setReg != INVALIDREGNUM) {
1827 CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1828 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1829 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1830 } else {
1831 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1832 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1833 }
1834 } //else
1835
1836} //settleLoopcontBlock
1837
1838// BBs in exitBlkSet are determined as in break-path for loopRep,
1839// before we can put code for BBs as inside loop-body for loopRep
1840// check whether those BBs are determined as cont-BB for parentLoopRep
1841// earlier.
1842// If so, generate a new BB newBlk
1843// (1) set newBlk common successor of BBs in exitBlkSet
1844// (2) change the continue-instr in BBs in exitBlkSet to break-instr
1845// (3) generate continue-instr in newBlk
1846//
1847template<class PassT>
1848typename CFGStructurizer<PassT>::BlockT *
1849CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1850 LoopT *loopRep,
1851 std::set<BlockT *> &exitBlkSet,
1852 BlockT *exitLandBlk) {
1853 std::set<BlockT *> endBlkSet;
1854
1855// BlockT *parentLoopHead = parentLoopRep->getHeader();
1856
1857
1858 for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1859 iterEnd = exitBlkSet.end();
1860 iter != iterEnd; ++iter) {
1861 BlockT *exitBlk = *iter;
1862 BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1863
1864 if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1865 return NULL;
1866
1867 endBlkSet.insert(endBlk);
1868 }
1869
1870 BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1871 funcRep->push_back(newBlk); //insert to function
1872 CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1873 SHOWNEWBLK(newBlk, "New continue block: ");
1874
1875 for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1876 iterEnd = endBlkSet.end();
1877 iter != iterEnd; ++iter) {
1878 BlockT *endBlk = *iter;
1879 InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1880 if (contInstr) {
1881 contInstr->eraseFromParent();
1882 }
1883 endBlk->addSuccessor(newBlk);
1884 if (DEBUGME) {
1885 errs() << "Add new continue Block to BB"
1886 << endBlk->getNumber() << " successors\n";
1887 }
1888 }
1889
1890 return newBlk;
1891} //relocateLoopcontBlock
1892
1893
1894// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1895// LoopLandBlock. This BB branch on the loop endBranchInit register to the
1896// pathes corresponding to the loop exiting branches.
1897
1898template<class PassT>
1899typename CFGStructurizer<PassT>::BlockT *
1900CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1901 BlockTSmallerVector &exitingBlks,
1902 BlockTSmallerVector &exitBlks) {
1903 const AMDGPUInstrInfo *tii =
1904 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
1905 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1906
1907 RegiT endBranchReg = static_cast<int>
1908 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1909 assert(endBranchReg >= 0);
1910
1911 // reg = 0 before entering the loop
1912 addLoopEndbranchInitReg(loopRep, endBranchReg);
1913
1914 uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1915 assert(numBlks >=2 && numBlks == exitBlks.size());
1916
1917 BlockT *preExitingBlk = exitingBlks[0];
1918 BlockT *preExitBlk = exitBlks[0];
1919 BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1920 funcRep->push_back(preBranchBlk); //insert to function
1921 SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1922
1923 BlockT *newLandBlk = preBranchBlk;
1924
1925 CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1926 newLandBlk);
1927 preExitingBlk->removeSuccessor(preExitBlk);
1928 preExitingBlk->addSuccessor(newLandBlk);
1929
1930 //it is redundant to add reg = 0 to exitingBlks[0]
1931
1932 // For 1..n th exiting path (the last iteration handles two pathes) create the
1933 // branch to the previous path and the current path.
1934 for (uint32_t i = 1; i < numBlks; ++i) {
1935 BlockT *curExitingBlk = exitingBlks[i];
1936 BlockT *curExitBlk = exitBlks[i];
1937 BlockT *curBranchBlk;
1938
1939 if (i == numBlks - 1) {
1940 curBranchBlk = curExitBlk;
1941 } else {
1942 curBranchBlk = funcRep->CreateMachineBasicBlock();
1943 funcRep->push_back(curBranchBlk); //insert to function
1944 SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1945 }
1946
1947 // Add reg = i to exitingBlks[i].
1948 CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1949 endBranchReg, i);
1950
1951 // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1952 // (exitingBlks[i], newLandBlk).
1953 CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1954 newLandBlk);
1955 curExitingBlk->removeSuccessor(curExitBlk);
1956 curExitingBlk->addSuccessor(newLandBlk);
1957
1958 // add to preBranchBlk the branch instruction:
1959 // if (endBranchReg == preVal)
1960 // preExitBlk
1961 // else
1962 // curBranchBlk
1963 //
1964 // preValReg = i - 1
1965
1966 DebugLoc DL;
1967 RegiT preValReg = static_cast<int>
1968 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1969
1970 preBranchBlk->insert(preBranchBlk->begin(),
1971 tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1972 i - 1));
1973
1974 // condResReg = (endBranchReg == preValReg)
1975 RegiT condResReg = static_cast<int>
1976 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1977 BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1978 .addReg(endBranchReg).addReg(preValReg);
1979
1980 BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1981 .addMBB(preExitBlk).addReg(condResReg);
1982
1983 preBranchBlk->addSuccessor(preExitBlk);
1984 preBranchBlk->addSuccessor(curBranchBlk);
1985
1986 // Update preExitingBlk, preExitBlk, preBranchBlk.
1987 preExitingBlk = curExitingBlk;
1988 preExitBlk = curExitBlk;
1989 preBranchBlk = curBranchBlk;
1990
1991 } //end for 1 .. n blocks
1992
1993 return newLandBlk;
1994} //addLoopEndbranchBlock
1995
1996template<class PassT>
1997typename CFGStructurizer<PassT>::PathToKind
1998CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1999 bool allowSideEntry) {
2000 assert(dstBlk);
2001
2002 if (srcBlk == dstBlk) {
2003 return SinglePath_InPath;
2004 }
2005
2006 while (srcBlk && srcBlk->succ_size() == 1) {
2007 srcBlk = *srcBlk->succ_begin();
2008 if (srcBlk == dstBlk) {
2009 return SinglePath_InPath;
2010 }
2011
2012 if (!allowSideEntry && srcBlk->pred_size() > 1) {
2013 return Not_SinglePath;
2014 }
2015 }
2016
2017 if (srcBlk && srcBlk->succ_size()==0) {
2018 return SinglePath_NotInPath;
2019 }
2020
2021 return Not_SinglePath;
2022} //singlePathTo
2023
2024// If there is a single path from srcBlk to dstBlk, return the last block before
2025// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
2026// last block in the path Otherwise, return NULL
2027template<class PassT>
2028typename CFGStructurizer<PassT>::BlockT *
2029CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
2030 bool allowSideEntry) {
2031 assert(dstBlk);
2032
2033 if (srcBlk == dstBlk) {
2034 return srcBlk;
2035 }
2036
2037 if (srcBlk->succ_size() == 0) {
2038 return srcBlk;
2039 }
2040
2041 while (srcBlk && srcBlk->succ_size() == 1) {
2042 BlockT *preBlk = srcBlk;
2043
2044 srcBlk = *srcBlk->succ_begin();
2045 if (srcBlk == NULL) {
2046 return preBlk;
2047 }
2048
2049 if (!allowSideEntry && srcBlk->pred_size() > 1) {
2050 return NULL;
2051 }
2052 }
2053
2054 if (srcBlk && srcBlk->succ_size()==0) {
2055 return srcBlk;
2056 }
2057
2058 return NULL;
2059
2060} //singlePathEnd
2061
2062template<class PassT>
2063int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
2064 BlockT *dstBlk) {
2065 int cloned = 0;
2066 assert(preBlk->isSuccessor(srcBlk));
2067 while (srcBlk && srcBlk != dstBlk) {
2068 assert(srcBlk->succ_size() == 1);
2069 if (srcBlk->pred_size() > 1) {
2070 srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
2071 ++cloned;
2072 }
2073
2074 preBlk = srcBlk;
2075 srcBlk = *srcBlk->succ_begin();
2076 }
2077
2078 return cloned;
2079} //cloneOnSideEntryTo
2080
2081template<class PassT>
2082typename CFGStructurizer<PassT>::BlockT *
2083CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
2084 BlockT *predBlk) {
2085 assert(predBlk->isSuccessor(curBlk) &&
2086 "succBlk is not a prececessor of curBlk");
2087
2088 BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
2089 CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
2090 //srcBlk, oldBlk, newBlk
2091
2092 predBlk->removeSuccessor(curBlk);
2093 predBlk->addSuccessor(cloneBlk);
2094
2095 // add all successor to cloneBlk
2096 CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
2097
2098 numClonedInstr += curBlk->size();
2099
2100 if (DEBUGME) {
2101 errs() << "Cloned block: " << "BB"
2102 << curBlk->getNumber() << "size " << curBlk->size() << "\n";
2103 }
2104
2105 SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
2106
2107 return cloneBlk;
2108} //cloneBlockForPredecessor
2109
2110template<class PassT>
2111typename CFGStructurizer<PassT>::BlockT *
2112CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
2113 BlockT *exitingBlk) {
2114 BlockT *exitBlk = NULL;
2115
2116 for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
2117 iterSuccEnd = exitingBlk->succ_end();
2118 iterSucc != iterSuccEnd; ++iterSucc) {
2119 BlockT *curBlk = *iterSucc;
2120 if (!loopRep->contains(curBlk)) {
2121 assert(exitBlk == NULL);
2122 exitBlk = curBlk;
2123 }
2124 }
2125
2126 assert(exitBlk != NULL);
2127
2128 return exitBlk;
2129} //exitingBlock2ExitBlock
2130
2131template<class PassT>
2132void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
2133 BlockT *dstBlk,
2134 InstrIterator insertPos) {
2135 InstrIterator spliceEnd;
2136 //look for the input branchinstr, not the AMDGPU branchinstr
2137 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2138 if (branchInstr == NULL) {
2139 if (DEBUGME) {
2140 errs() << "migrateInstruction don't see branch instr\n" ;
2141 }
2142 spliceEnd = srcBlk->end();
2143 } else {
2144 if (DEBUGME) {
2145 errs() << "migrateInstruction see branch instr\n" ;
2146 branchInstr->dump();
2147 }
2148 spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
2149 }
2150 if (DEBUGME) {
2151 errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
2152 << "srcSize = " << srcBlk->size() << "\n";
2153 }
2154
2155 //splice insert before insertPos
2156 dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
2157
2158 if (DEBUGME) {
2159 errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
2160 << "srcSize = " << srcBlk->size() << "\n";
2161 }
2162} //migrateInstruction
2163
2164// normalizeInfiniteLoopExit change
2165// B1:
2166// uncond_br LoopHeader
2167//
2168// to
2169// B1:
2170// cond_br 1 LoopHeader dummyExit
2171// and return the newly added dummy exit block
2172//
2173template<class PassT>
2174typename CFGStructurizer<PassT>::BlockT *
2175CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2176 BlockT *loopHeader;
2177 BlockT *loopLatch;
2178 loopHeader = LoopRep->getHeader();
2179 loopLatch = LoopRep->getLoopLatch();
2180 BlockT *dummyExitBlk = NULL;
2181 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2182 if (loopHeader!=NULL && loopLatch!=NULL) {
2183 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2184 if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2185 dummyExitBlk = funcRep->CreateMachineBasicBlock();
2186 funcRep->push_back(dummyExitBlk); //insert to function
2187 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2188
2189 if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
2190
2191 typename BlockT::iterator insertPos =
2192 CFGTraits::getInstrPos(loopLatch, branchInstr);
2193 unsigned immReg =
2194 funcRep->getRegInfo().createVirtualRegister(I32RC);
2195 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2196 InstrT *newInstr =
2197 CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
2198 MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
2199
2200 SHOWNEWINSTR(newInstr);
2201
2202 branchInstr->eraseFromParent();
2203 loopLatch->addSuccessor(dummyExitBlk);
2204 }
2205 }
2206
2207 return dummyExitBlk;
2208} //normalizeInfiniteLoopExit
2209
2210template<class PassT>
2211void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2212 InstrT *branchInstr;
2213
2214 // I saw two unconditional branch in one basic block in example
2215 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2216 while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2217 && CFGTraits::isUncondBranch(branchInstr)) {
2218 if (DEBUGME) {
2219 errs() << "Removing unconditional branch instruction" ;
2220 branchInstr->dump();
2221 }
2222 branchInstr->eraseFromParent();
2223 }
2224} //removeUnconditionalBranch
2225
2226template<class PassT>
2227void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2228 if (srcBlk->succ_size() == 2) {
2229 BlockT *blk1 = *srcBlk->succ_begin();
2230 BlockT *blk2 = *(++srcBlk->succ_begin());
2231
2232 if (blk1 == blk2) {
2233 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2234 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2235 if (DEBUGME) {
2236 errs() << "Removing unneeded conditional branch instruction" ;
2237 branchInstr->dump();
2238 }
2239 branchInstr->eraseFromParent();
2240 SHOWNEWBLK(blk1, "Removing redundant successor");
2241 srcBlk->removeSuccessor(blk1);
2242 }
2243 }
2244} //removeRedundantConditionalBranch
2245
2246template<class PassT>
2247void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
2248 DEFAULT_VEC_SLOTS> &retBlks) {
2249 BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2250 funcRep->push_back(dummyExitBlk); //insert to function
2251 CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2252
2253 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
2254 retBlks.begin(),
2255 iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2256 BlockT *curBlk = *iter;
2257 InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2258 if (curInstr) {
2259 curInstr->eraseFromParent();
2260 }
2261#if 0
2262 if (curBlk->size()==0 && curBlk->pred_size() == 1) {
2263 if (DEBUGME) {
2264 errs() << "Replace empty block BB" << curBlk->getNumber()
2265 << " with dummyExitBlock\n";
2266 }
2267 BlockT *predb = *curBlk->pred_begin();
2268 predb->removeSuccessor(curBlk);
2269 curBlk = predb;
2270 } //handle empty curBlk
2271#endif
2272 curBlk->addSuccessor(dummyExitBlk);
2273 if (DEBUGME) {
2274 errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2275 << " successors\n";
2276 }
2277 } //for
2278
2279 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2280} //addDummyExitBlock
2281
2282template<class PassT>
2283void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2284 while (srcBlk->succ_size()) {
2285 srcBlk->removeSuccessor(*srcBlk->succ_begin());
2286 }
2287}
2288
2289template<class PassT>
2290void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2291 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2292
2293 if (srcBlkInfo == NULL) {
2294 srcBlkInfo = new BlockInfo();
2295 }
2296
2297 srcBlkInfo->sccNum = sccNum;
2298}
2299
2300template<class PassT>
2301int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2302 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2303 return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2304}
2305
2306template<class PassT>
2307void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2308 if (DEBUGME) {
2309 errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2310 }
2311
2312 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2313
2314 if (srcBlkInfo == NULL) {
2315 srcBlkInfo = new BlockInfo();
2316 }
2317
2318 srcBlkInfo->isRetired = true;
2319 //int i = srcBlk->succ_size();
2320 //int j = srcBlk->pred_size();
2321 assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2322 && "can't retire block yet");
2323}
2324
2325template<class PassT>
2326bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2327 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2328 return (srcBlkInfo && srcBlkInfo->isRetired);
2329}
2330
2331template<class PassT>
2332bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2333 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2334 while (loopRep && loopRep->getHeader() == curBlk) {
2335 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2336
2337 if(loopLand == NULL)
2338 return true;
2339
2340 BlockT *landBlk = loopLand->landBlk;
2341 assert(landBlk);
2342 if (!isRetiredBlock(landBlk)) {
2343 return true;
2344 }
2345
2346 loopRep = loopRep->getParentLoop();
2347 }
2348
2349 return false;
2350} //isActiveLoophead
2351
2352template<class PassT>
2353bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2354 const unsigned blockSizeThreshold = 30;
2355 const unsigned cloneInstrThreshold = 100;
2356
2357 bool multiplePreds = blk && (blk->pred_size() > 1);
2358
2359 if(!multiplePreds)
2360 return false;
2361
2362 unsigned blkSize = blk->size();
2363 return ((blkSize > blockSizeThreshold)
2364 && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2365} //needMigrateBlock
2366
2367template<class PassT>
2368typename CFGStructurizer<PassT>::BlockT *
2369CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2370 BlockTSmallerVector &exitBlks,
2371 std::set<BlockT *> &exitBlkSet) {
2372 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
2373
2374 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2375 predIterEnd = landBlk->pred_end();
2376 predIter != predIterEnd; ++predIter) {
2377 BlockT *curBlk = *predIter;
2378 if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2379 inpathBlks.push_back(curBlk);
2380 }
2381 } //for
2382
2383 //if landBlk has predecessors that are not in the given loop,
2384 //create a new block
2385 BlockT *newLandBlk = landBlk;
2386 if (inpathBlks.size() != landBlk->pred_size()) {
2387 newLandBlk = funcRep->CreateMachineBasicBlock();
2388 funcRep->push_back(newLandBlk); //insert to function
2389 newLandBlk->addSuccessor(landBlk);
2390 for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
2391 inpathBlks.begin(),
2392 iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2393 BlockT *curBlk = *iter;
2394 CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2395 //srcBlk, oldBlk, newBlk
2396 curBlk->removeSuccessor(landBlk);
2397 curBlk->addSuccessor(newLandBlk);
2398 }
2399 for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2400 if (exitBlks[i] == landBlk) {
2401 exitBlks[i] = newLandBlk;
2402 }
2403 }
2404 SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2405 }
2406
2407 setLoopLandBlock(loopRep, newLandBlk);
2408
2409 return newLandBlk;
2410} // recordLoopbreakLand
2411
2412template<class PassT>
2413void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2414 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2415
2416 if (theEntry == NULL) {
2417 theEntry = new LoopLandInfo();
2418 }
2419 assert(theEntry->landBlk == NULL);
2420
2421 if (blk == NULL) {
2422 blk = funcRep->CreateMachineBasicBlock();
2423 funcRep->push_back(blk); //insert to function
2424 SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2425 }
2426
2427 theEntry->landBlk = blk;
2428
2429 if (DEBUGME) {
2430 errs() << "setLoopLandBlock loop-header = BB"
2431 << loopRep->getHeader()->getNumber()
2432 << " landing-block = BB" << blk->getNumber() << "\n";
2433 }
2434} // setLoopLandBlock
2435
2436template<class PassT>
2437void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2438 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2439
2440 if (theEntry == NULL) {
2441 theEntry = new LoopLandInfo();
2442 }
2443
2444 theEntry->breakOnRegs.insert(regNum);
2445
2446 if (DEBUGME) {
2447 errs() << "addLoopBreakOnReg loop-header = BB"
2448 << loopRep->getHeader()->getNumber()
2449 << " regNum = " << regNum << "\n";
2450 }
2451} // addLoopBreakOnReg
2452
2453template<class PassT>
2454void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2455 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2456
2457 if (theEntry == NULL) {
2458 theEntry = new LoopLandInfo();
2459 }
2460 theEntry->contOnRegs.insert(regNum);
2461
2462 if (DEBUGME) {
2463 errs() << "addLoopContOnReg loop-header = BB"
2464 << loopRep->getHeader()->getNumber()
2465 << " regNum = " << regNum << "\n";
2466 }
2467} // addLoopContOnReg
2468
2469template<class PassT>
2470void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2471 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2472
2473 if (theEntry == NULL) {
2474 theEntry = new LoopLandInfo();
2475 }
2476 theEntry->breakInitRegs.insert(regNum);
2477
2478 if (DEBUGME) {
2479 errs() << "addLoopBreakInitReg loop-header = BB"
2480 << loopRep->getHeader()->getNumber()
2481 << " regNum = " << regNum << "\n";
2482 }
2483} // addLoopBreakInitReg
2484
2485template<class PassT>
2486void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2487 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2488
2489 if (theEntry == NULL) {
2490 theEntry = new LoopLandInfo();
2491 }
2492 theEntry->contInitRegs.insert(regNum);
2493
2494 if (DEBUGME) {
2495 errs() << "addLoopContInitReg loop-header = BB"
2496 << loopRep->getHeader()->getNumber()
2497 << " regNum = " << regNum << "\n";
2498 }
2499} // addLoopContInitReg
2500
2501template<class PassT>
2502void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2503 RegiT regNum) {
2504 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2505
2506 if (theEntry == NULL) {
2507 theEntry = new LoopLandInfo();
2508 }
2509 theEntry->endbranchInitRegs.insert(regNum);
2510
2511 if (DEBUGME)
2512 {
2513 errs() << "addLoopEndbranchInitReg loop-header = BB"
2514 << loopRep->getHeader()->getNumber()
2515 << " regNum = " << regNum << "\n";
2516 }
2517} // addLoopEndbranchInitReg
2518
2519template<class PassT>
2520typename CFGStructurizer<PassT>::LoopLandInfo *
2521CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2522 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2523
2524 return theEntry;
2525} // getLoopLandInfo
2526
2527template<class PassT>
2528typename CFGStructurizer<PassT>::BlockT *
2529CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2530 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2531
2532 return theEntry ? theEntry->landBlk : NULL;
2533} // getLoopLandBlock
2534
2535
2536template<class PassT>
2537bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2538 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2539 if (loopRep == NULL)
2540 return false;
2541
2542 BlockT *loopHeader = loopRep->getHeader();
2543
2544 return curBlk->isSuccessor(loopHeader);
2545
2546} //hasBackEdge
2547
2548template<class PassT>
2549unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2550 return loopRep ? loopRep->getLoopDepth() : 0;
2551} //getLoopDepth
2552
2553template<class PassT>
2554int CFGStructurizer<PassT>::countActiveBlock
2555(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
2556 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
2557 int count = 0;
2558 while (iterStart != iterEnd) {
2559 if (!isRetiredBlock(*iterStart)) {
2560 ++count;
2561 }
2562 ++iterStart;
2563 }
2564
2565 return count;
2566} //countActiveBlock
2567
2568// This is work around solution for findNearestCommonDominator not avaiable to
2569// post dom a proper fix should go to Dominators.h.
2570
2571template<class PassT>
2572typename CFGStructurizer<PassT>::BlockT*
2573CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2574
2575 if (postDomTree->dominates(blk1, blk2)) {
2576 return blk1;
2577 }
2578 if (postDomTree->dominates(blk2, blk1)) {
2579 return blk2;
2580 }
2581
2582 DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2583 DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2584
2585 // Handle newly cloned node.
2586 if (node1 == NULL && blk1->succ_size() == 1) {
2587 return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2588 }
2589 if (node2 == NULL && blk2->succ_size() == 1) {
2590 return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2591 }
2592
2593 if (node1 == NULL || node2 == NULL) {
2594 return NULL;
2595 }
2596
2597 node1 = node1->getIDom();
2598 while (node1) {
2599 if (postDomTree->dominates(node1, node2)) {
2600 return node1->getBlock();
2601 }
2602 node1 = node1->getIDom();
2603 }
2604
2605 return NULL;
2606}
2607
2608template<class PassT>
2609typename CFGStructurizer<PassT>::BlockT *
2610CFGStructurizer<PassT>::findNearestCommonPostDom
2611(typename std::set<BlockT *> &blks) {
2612 BlockT *commonDom;
2613 typename std::set<BlockT *>::const_iterator iter = blks.begin();
2614 typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2615 for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2616 BlockT *curBlk = *iter;
2617 if (curBlk != commonDom) {
2618 commonDom = findNearestCommonPostDom(curBlk, commonDom);
2619 }
2620 }
2621
2622 if (DEBUGME) {
2623 errs() << "Common post dominator for exit blocks is ";
2624 if (commonDom) {
2625 errs() << "BB" << commonDom->getNumber() << "\n";
2626 } else {
2627 errs() << "NULL\n";
2628 }
2629 }
2630
2631 return commonDom;
2632} //findNearestCommonPostDom
2633
2634} //end namespace llvm
2635
2636//todo: move-end
2637
2638
2639//===----------------------------------------------------------------------===//
2640//
2641// CFGStructurizer for AMDGPU
2642//
2643//===----------------------------------------------------------------------===//
2644
2645
2646using namespace llvmCFGStruct;
2647
2648namespace llvm
2649{
2650class AMDGPUCFGStructurizer : public MachineFunctionPass
2651{
2652public:
2653 typedef MachineInstr InstructionType;
2654 typedef MachineFunction FunctionType;
2655 typedef MachineBasicBlock BlockType;
2656 typedef MachineLoopInfo LoopinfoType;
2657 typedef MachineDominatorTree DominatortreeType;
2658 typedef MachinePostDominatorTree PostDominatortreeType;
2659 typedef MachineDomTreeNode DomTreeNodeType;
2660 typedef MachineLoop LoopType;
2661
2662protected:
2663 TargetMachine &TM;
2664 const TargetInstrInfo *TII;
2665 const AMDGPURegisterInfo *TRI;
2666
2667public:
2668 AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
2669 const TargetInstrInfo *getTargetInstrInfo() const;
2670 //bool runOnMachineFunction(MachineFunction &F);
2671
2672private:
2673
2674}; //end of class AMDGPUCFGStructurizer
2675
2676//char AMDGPUCFGStructurizer::ID = 0;
2677} //end of namespace llvm
2678AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm
2679 )
2680: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
2681 TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())
2682 ) {
2683}
2684
2685const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
2686 return TII;
2687}
2688//===----------------------------------------------------------------------===//
2689//
2690// CFGPrepare
2691//
2692//===----------------------------------------------------------------------===//
2693
2694
2695using namespace llvmCFGStruct;
2696
2697namespace llvm
2698{
2699class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer
2700{
2701public:
2702 static char ID;
2703
2704public:
2705 AMDGPUCFGPrepare(TargetMachine &tm);
2706
2707 virtual const char *getPassName() const;
2708 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2709
2710 bool runOnMachineFunction(MachineFunction &F);
2711
2712private:
2713
2714}; //end of class AMDGPUCFGPrepare
2715
2716char AMDGPUCFGPrepare::ID = 0;
2717} //end of namespace llvm
2718
2719AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
2720 : AMDGPUCFGStructurizer(ID, tm )
2721{
2722}
2723const char *AMDGPUCFGPrepare::getPassName() const {
2724 return "AMD IL Control Flow Graph Preparation Pass";
2725}
2726
2727void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2728 AU.addPreserved<MachineFunctionAnalysis>();
2729 AU.addRequired<MachineFunctionAnalysis>();
2730 AU.addRequired<MachineDominatorTree>();
2731 AU.addRequired<MachinePostDominatorTree>();
2732 AU.addRequired<MachineLoopInfo>();
2733}
2734
2735//===----------------------------------------------------------------------===//
2736//
2737// CFGPerform
2738//
2739//===----------------------------------------------------------------------===//
2740
2741
2742using namespace llvmCFGStruct;
2743
2744namespace llvm
2745{
2746class AMDGPUCFGPerform : public AMDGPUCFGStructurizer
2747{
2748public:
2749 static char ID;
2750
2751public:
2752 AMDGPUCFGPerform(TargetMachine &tm);
2753 virtual const char *getPassName() const;
2754 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2755 bool runOnMachineFunction(MachineFunction &F);
2756
2757private:
2758
2759}; //end of class AMDGPUCFGPerform
2760
2761char AMDGPUCFGPerform::ID = 0;
2762} //end of namespace llvm
2763
2764 AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
2765: AMDGPUCFGStructurizer(ID, tm)
2766{
2767}
2768
2769const char *AMDGPUCFGPerform::getPassName() const {
2770 return "AMD IL Control Flow Graph structurizer Pass";
2771}
2772
2773void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2774 AU.addPreserved<MachineFunctionAnalysis>();
2775 AU.addRequired<MachineFunctionAnalysis>();
2776 AU.addRequired<MachineDominatorTree>();
2777 AU.addRequired<MachinePostDominatorTree>();
2778 AU.addRequired<MachineLoopInfo>();
2779}
2780
2781//===----------------------------------------------------------------------===//
2782//
2783// CFGStructTraits<AMDGPUCFGStructurizer>
2784//
2785//===----------------------------------------------------------------------===//
2786
2787namespace llvmCFGStruct
2788{
2789// this class is tailor to the AMDGPU backend
2790template<>
2791struct CFGStructTraits<AMDGPUCFGStructurizer>
2792{
2793 typedef int RegiT;
2794
2795 static int getBreakNzeroOpcode(int oldOpcode) {
2796 switch(oldOpcode) {
2797 case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
2798 default:
2799 assert(0 && "internal error");
2800 };
2801 return -1;
2802 }
2803
2804 static int getBreakZeroOpcode(int oldOpcode) {
2805 switch(oldOpcode) {
2806 case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
2807 default:
2808 assert(0 && "internal error");
2809 };
2810 return -1;
2811 }
2812
2813 static int getBranchNzeroOpcode(int oldOpcode) {
2814 switch(oldOpcode) {
2815 case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
2816 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
2817 case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
2818 default:
2819 assert(0 && "internal error");
2820 };
2821 return -1;
2822 }
2823
2824 static int getBranchZeroOpcode(int oldOpcode) {
2825 switch(oldOpcode) {
2826 case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
2827 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
2828 case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
2829 default:
2830 assert(0 && "internal error");
2831 };
2832 return -1;
2833 }
2834
2835 static int getContinueNzeroOpcode(int oldOpcode)
2836 {
2837 switch(oldOpcode) {
2838 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
2839 default:
2840 assert(0 && "internal error");
2841 };
2842 return -1;
2843 }
2844
2845 static int getContinueZeroOpcode(int oldOpcode) {
2846 switch(oldOpcode) {
2847 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
2848 default:
2849 assert(0 && "internal error");
2850 };
2851 return -1;
2852 }
2853
2854// the explicitly represented branch target is the true branch target
2855#define getExplicitBranch getTrueBranch
2856#define setExplicitBranch setTrueBranch
2857
2858 static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2859 return instr->getOperand(0).getMBB();
2860 }
2861
2862 static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2863 instr->getOperand(0).setMBB(blk);
2864 }
2865
2866 static MachineBasicBlock *
2867 getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2868 assert(blk->succ_size() == 2);
2869 MachineBasicBlock *trueBranch = getTrueBranch(instr);
2870 MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2871 MachineBasicBlock::succ_iterator iterNext = iter;
2872 ++iterNext;
2873
2874 return (*iter == trueBranch) ? *iterNext : *iter;
2875 }
2876
2877 static bool isCondBranch(MachineInstr *instr) {
2878 switch (instr->getOpcode()) {
2879 case AMDGPU::JUMP:
2880 return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
2881 ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
2882 case AMDGPU::SI_IF_NZ:
2883 case AMDGPU::SI_IF_Z:
2884 break;
2885 default:
2886 return false;
2887 }
2888 return true;
2889 }
2890
2891 static bool isUncondBranch(MachineInstr *instr) {
2892 switch (instr->getOpcode()) {
2893 case AMDGPU::JUMP:
2894 return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
2895 case AMDGPU::BRANCH:
2896 return true;
2897 default:
2898 return false;
2899 }
2900 return true;
2901 }
2902
2903 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2904 //get DebugLoc from the first MachineBasicBlock instruction with debug info
2905 DebugLoc DL;
2906 for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2907 MachineInstr *instr = &(*iter);
2908 if (instr->getDebugLoc().isUnknown() == false) {
2909 DL = instr->getDebugLoc();
2910 }
2911 }
2912 return DL;
2913 }
2914
2915 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2916 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2917 MachineInstr *instr = &*iter;
2918 if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2919 return instr;
2920 }
2921 return NULL;
2922 }
2923
2924 // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2925 //
2926 // BB with backward-edge could have move instructions after the branch
2927 // instruction. Such move instruction "belong to" the loop backward-edge.
2928 //
2929 static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2930 const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
2931 blk->getParent()->getTarget().getInstrInfo());
2932
2933 for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2934 iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2935 // FIXME: Simplify
2936 MachineInstr *instr = &*iter;
2937 if (instr) {
2938 if (isCondBranch(instr) || isUncondBranch(instr)) {
2939 return instr;
2940 } else if (!TII->isMov(instr->getOpcode())) {
2941 break;
2942 }
2943 }
2944 }
2945 return NULL;
2946 }
2947
2948 static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2949 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2950 if (iter != blk->rend()) {
2951 MachineInstr *instr = &(*iter);
2952 if (instr->getOpcode() == AMDGPU::RETURN) {
2953 return instr;
2954 }
2955 }
2956 return NULL;
2957 }
2958
2959 static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2960 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2961 if (iter != blk->rend()) {
2962 MachineInstr *instr = &(*iter);
2963 if (instr->getOpcode() == AMDGPU::CONTINUE) {
2964 return instr;
2965 }
2966 }
2967 return NULL;
2968 }
2969
2970 static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2971 for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2972 MachineInstr *instr = &(*iter);
2973 if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) {
2974 return instr;
2975 }
2976 }
2977 return NULL;
2978 }
2979
2980 static bool isReturnBlock(MachineBasicBlock *blk) {
2981 MachineInstr *instr = getReturnInstr(blk);
2982 bool isReturn = (blk->succ_size() == 0);
2983 if (instr) {
2984 assert(isReturn);
2985 } else if (isReturn) {
2986 if (DEBUGME) {
2987 errs() << "BB" << blk->getNumber()
2988 <<" is return block without RETURN instr\n";
2989 }
2990 }
2991
2992 return isReturn;
2993 }
2994
2995 static MachineBasicBlock::iterator
2996 getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2997 assert(instr->getParent() == blk && "instruction doesn't belong to block");
2998 MachineBasicBlock::iterator iter = blk->begin();
2999 MachineBasicBlock::iterator iterEnd = blk->end();
3000 while (&(*iter) != instr && iter != iterEnd) {
3001 ++iter;
3002 }
3003
3004 assert(iter != iterEnd);
3005 return iter;
3006 }//getInstrPos
3007
3008 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
3009 AMDGPUCFGStructurizer *passRep) {
3010 return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
3011 } //insertInstrBefore
3012
3013 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
3014 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
3015 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3016 MachineInstr *newInstr =
3017 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
3018
3019 MachineBasicBlock::iterator res;
3020 if (blk->begin() != blk->end()) {
3021 blk->insert(blk->begin(), newInstr);
3022 } else {
3023 blk->push_back(newInstr);
3024 }
3025
3026 SHOWNEWINSTR(newInstr);
3027
3028 return newInstr;
3029 } //insertInstrBefore
3030
3031 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
3032 AMDGPUCFGStructurizer *passRep) {
3033 insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
3034 } //insertInstrEnd
3035
3036 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
3037 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
3038 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3039 MachineInstr *newInstr = blk->getParent()
3040 ->CreateMachineInstr(tii->get(newOpcode), DL);
3041
3042 blk->push_back(newInstr);
3043 //assume the instruction doesn't take any reg operand ...
3044
3045 SHOWNEWINSTR(newInstr);
3046 } //insertInstrEnd
3047
3048 static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
3049 int newOpcode,
3050 AMDGPUCFGStructurizer *passRep) {
3051 MachineInstr *oldInstr = &(*instrPos);
3052 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3053 MachineBasicBlock *blk = oldInstr->getParent();
3054 MachineInstr *newInstr =
3055 blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3056 DebugLoc());
3057
3058 blk->insert(instrPos, newInstr);
3059 //assume the instruction doesn't take any reg operand ...
3060
3061 SHOWNEWINSTR(newInstr);
3062 return newInstr;
3063 } //insertInstrBefore
3064
3065 static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
3066 int newOpcode,
3067 AMDGPUCFGStructurizer *passRep,
3068 DebugLoc DL) {
3069 MachineInstr *oldInstr = &(*instrPos);
3070 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3071 MachineBasicBlock *blk = oldInstr->getParent();
3072 MachineInstr *newInstr =
3073 blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3074 DL);
3075
3076 blk->insert(instrPos, newInstr);
3077 MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
3078 false);
3079
3080 SHOWNEWINSTR(newInstr);
3081 //erase later oldInstr->eraseFromParent();
3082 } //insertCondBranchBefore
3083
3084 static void insertCondBranchBefore(MachineBasicBlock *blk,
3085 MachineBasicBlock::iterator insertPos,
3086 int newOpcode,
3087 AMDGPUCFGStructurizer *passRep,
3088 RegiT regNum,
3089 DebugLoc DL) {
3090 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3091
3092 MachineInstr *newInstr =
3093 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
3094
3095 //insert before
3096 blk->insert(insertPos, newInstr);
3097 MachineInstrBuilder(newInstr).addReg(regNum, false);
3098
3099 SHOWNEWINSTR(newInstr);
3100 } //insertCondBranchBefore
3101
3102 static void insertCondBranchEnd(MachineBasicBlock *blk,
3103 int newOpcode,
3104 AMDGPUCFGStructurizer *passRep,
3105 RegiT regNum) {
3106 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3107 MachineInstr *newInstr =
3108 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
3109
3110 blk->push_back(newInstr);
3111 MachineInstrBuilder(newInstr).addReg(regNum, false);
3112
3113 SHOWNEWINSTR(newInstr);
3114 } //insertCondBranchEnd
3115
3116
3117 static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
3118 AMDGPUCFGStructurizer *passRep,
3119 RegiT regNum, int regVal) {
3120 MachineInstr *oldInstr = &(*instrPos);
3121 const AMDGPUInstrInfo *tii =
3122 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3123 MachineBasicBlock *blk = oldInstr->getParent();
3124 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3125 regVal);
3126 blk->insert(instrPos, newInstr);
3127
3128 SHOWNEWINSTR(newInstr);
3129 } //insertAssignInstrBefore
3130
3131 static void insertAssignInstrBefore(MachineBasicBlock *blk,
3132 AMDGPUCFGStructurizer *passRep,
3133 RegiT regNum, int regVal) {
3134 const AMDGPUInstrInfo *tii =
3135 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3136
3137 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3138 regVal);
3139 if (blk->begin() != blk->end()) {
3140 blk->insert(blk->begin(), newInstr);
3141 } else {
3142 blk->push_back(newInstr);
3143 }
3144
3145 SHOWNEWINSTR(newInstr);
3146
3147 } //insertInstrBefore
3148
3149 static void insertCompareInstrBefore(MachineBasicBlock *blk,
3150 MachineBasicBlock::iterator instrPos,
3151 AMDGPUCFGStructurizer *passRep,
3152 RegiT dstReg, RegiT src1Reg,
3153 RegiT src2Reg) {
3154 const AMDGPUInstrInfo *tii =
3155 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3156 MachineInstr *newInstr =
3157 blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
3158
3159 MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
3160 MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
3161 MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
3162
3163 blk->insert(instrPos, newInstr);
3164 SHOWNEWINSTR(newInstr);
3165
3166 } //insertCompareInstrBefore
3167
3168 static void cloneSuccessorList(MachineBasicBlock *dstBlk,
3169 MachineBasicBlock *srcBlk) {
3170 for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
3171 iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
3172 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
3173 }
3174 } //cloneSuccessorList
3175
3176 static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
3177 MachineFunction *func = srcBlk->getParent();
3178 MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
3179 func->push_back(newBlk); //insert to function
3180 //newBlk->setNumber(srcBlk->getNumber());
3181 for (MachineBasicBlock::iterator iter = srcBlk->begin(),
3182 iterEnd = srcBlk->end();
3183 iter != iterEnd; ++iter) {
3184 MachineInstr *instr = func->CloneMachineInstr(iter);
3185 newBlk->push_back(instr);
3186 }
3187 return newBlk;
3188 }
3189
3190 //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
3191 //the AMDGPU instruction is not recognized as terminator fix this and retire
3192 //this routine
3193 static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
3194 MachineBasicBlock *oldBlk,
3195 MachineBasicBlock *newBlk) {
3196 MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
3197 if (branchInstr && isCondBranch(branchInstr) &&
3198 getExplicitBranch(branchInstr) == oldBlk) {
3199 setExplicitBranch(branchInstr, newBlk);
3200 }
3201 }
3202
3203 static void wrapup(MachineBasicBlock *entryBlk) {
3204 assert((!entryBlk->getParent()->getJumpTableInfo()
3205 || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
3206 && "found a jump table");
3207
3208 //collect continue right before endloop
3209 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
3210 MachineBasicBlock::iterator pre = entryBlk->begin();
3211 MachineBasicBlock::iterator iterEnd = entryBlk->end();
3212 MachineBasicBlock::iterator iter = pre;
3213 while (iter != iterEnd) {
3214 if (pre->getOpcode() == AMDGPU::CONTINUE
3215 && iter->getOpcode() == AMDGPU::ENDLOOP) {
3216 contInstr.push_back(pre);
3217 }
3218 pre = iter;
3219 ++iter;
3220 } //end while
3221
3222 //delete continue right before endloop
3223 for (unsigned i = 0; i < contInstr.size(); ++i) {
3224 contInstr[i]->eraseFromParent();
3225 }
3226
3227 // TODO to fix up jump table so later phase won't be confused. if
3228 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3229 // there isn't such an interface yet. alternatively, replace all the other
3230 // blocks in the jump table with the entryBlk //}
3231
3232 } //wrapup
3233
3234 static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
3235 return &pass.getAnalysis<MachineDominatorTree>();
3236 }
3237
3238 static MachinePostDominatorTree*
3239 getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
3240 return &pass.getAnalysis<MachinePostDominatorTree>();
3241 }
3242
3243 static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
3244 return &pass.getAnalysis<MachineLoopInfo>();
3245 }
3246}; // template class CFGStructTraits
3247} //end of namespace llvm
3248
3249// createAMDGPUCFGPreparationPass- Returns a pass
3250FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
3251 ) {
3252 return new AMDGPUCFGPrepare(tm );
3253}
3254
3255bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
3256 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
3257 *this,
3258 TRI);
3259}
3260
3261// createAMDGPUCFGStructurizerPass- Returns a pass
3262FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
3263 ) {
3264 return new AMDGPUCFGPerform(tm );
3265}
3266
3267bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
3268 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
3269 *this,
3270 TRI);
3271}
3272
3273//end of file newline goes below
3274
diff --git a/src/gallium/drivers/radeon/AMDILDevice.cpp b/src/gallium/drivers/radeon/AMDILDevice.cpp
deleted file mode 100644
index 3955828ec31..00000000000
--- a/src/gallium/drivers/radeon/AMDILDevice.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
1//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9#include "AMDILDevice.h"
10#include "AMDGPUSubtarget.h"
11
12using namespace llvm;
13// Default implementation for all of the classes.
14AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST)
15{
16 mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
17 mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
18 setCaps();
19 mDeviceFlag = OCL_DEVICE_ALL;
20}
21
22AMDGPUDevice::~AMDGPUDevice()
23{
24 mHWBits.clear();
25 mSWBits.clear();
26}
27
28size_t AMDGPUDevice::getMaxGDSSize() const
29{
30 return 0;
31}
32
33uint32_t
34AMDGPUDevice::getDeviceFlag() const
35{
36 return mDeviceFlag;
37}
38
39size_t AMDGPUDevice::getMaxNumCBs() const
40{
41 if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
42 return HW_MAX_NUM_CB;
43 }
44
45 return 0;
46}
47
48size_t AMDGPUDevice::getMaxCBSize() const
49{
50 if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
51 return MAX_CB_SIZE;
52 }
53
54 return 0;
55}
56
57size_t AMDGPUDevice::getMaxScratchSize() const
58{
59 return 65536;
60}
61
62uint32_t AMDGPUDevice::getStackAlignment() const
63{
64 return 16;
65}
66
67void AMDGPUDevice::setCaps()
68{
69 mSWBits.set(AMDGPUDeviceInfo::HalfOps);
70 mSWBits.set(AMDGPUDeviceInfo::ByteOps);
71 mSWBits.set(AMDGPUDeviceInfo::ShortOps);
72 mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
73 if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
74 mSWBits.set(AMDGPUDeviceInfo::NoInline);
75 }
76 if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
77 mSWBits.set(AMDGPUDeviceInfo::MacroDB);
78 }
79 if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
80 mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
81 } else {
82 mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
83 }
84 if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
85 mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
86 } else {
87 mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
88 }
89 if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
90 mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
91 }
92 mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
93 mSWBits.set(AMDGPUDeviceInfo::LongOps);
94}
95
96AMDGPUDeviceInfo::ExecutionMode
97AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const
98{
99 if (mHWBits[Caps]) {
100 assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
101 return AMDGPUDeviceInfo::Hardware;
102 }
103
104 if (mSWBits[Caps]) {
105 assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
106 return AMDGPUDeviceInfo::Software;
107 }
108
109 return AMDGPUDeviceInfo::Unsupported;
110
111}
112
113bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const
114{
115 return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
116}
117
118bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const
119{
120 return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
121}
122
123bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const
124{
125 return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
126}
127
128std::string
129AMDGPUDevice::getDataLayout() const
130{
131 return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
132 "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
133 "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
134 "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
135 "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
136 "-n8:16:32:64");
137}
diff --git a/src/gallium/drivers/radeon/AMDILDevice.h b/src/gallium/drivers/radeon/AMDILDevice.h
deleted file mode 100644
index 864fa0a3455..00000000000
--- a/src/gallium/drivers/radeon/AMDILDevice.h
+++ /dev/null
@@ -1,115 +0,0 @@
1//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// Interface for the subtarget data classes.
11//
12//===----------------------------------------------------------------------===//
13// This file will define the interface that each generation needs to
14// implement in order to correctly answer queries on the capabilities of the
15// specific hardware.
16//===----------------------------------------------------------------------===//
17#ifndef _AMDILDEVICEIMPL_H_
18#define _AMDILDEVICEIMPL_H_
19#include "AMDIL.h"
20#include "llvm/ADT/BitVector.h"
21
22namespace llvm {
23 class AMDGPUSubtarget;
24 class MCStreamer;
25//===----------------------------------------------------------------------===//
26// Interface for data that is specific to a single device
27//===----------------------------------------------------------------------===//
28class AMDGPUDevice {
29public:
30 AMDGPUDevice(AMDGPUSubtarget *ST);
31 virtual ~AMDGPUDevice();
32
33 // Enum values for the various memory types.
34 enum {
35 RAW_UAV_ID = 0,
36 ARENA_UAV_ID = 1,
37 LDS_ID = 2,
38 GDS_ID = 3,
39 SCRATCH_ID = 4,
40 CONSTANT_ID = 5,
41 GLOBAL_ID = 6,
42 MAX_IDS = 7
43 } IO_TYPE_IDS;
44
45 // Returns the max LDS size that the hardware supports. Size is in
46 // bytes.
47 virtual size_t getMaxLDSSize() const = 0;
48
49 // Returns the max GDS size that the hardware supports if the GDS is
50 // supported by the hardware. Size is in bytes.
51 virtual size_t getMaxGDSSize() const;
52
53 // Returns the max number of hardware constant address spaces that
54 // are supported by this device.
55 virtual size_t getMaxNumCBs() const;
56
57 // Returns the max number of bytes a single hardware constant buffer
58 // can support. Size is in bytes.
59 virtual size_t getMaxCBSize() const;
60
61 // Returns the max number of bytes allowed by the hardware scratch
62 // buffer. Size is in bytes.
63 virtual size_t getMaxScratchSize() const;
64
65 // Get the flag that corresponds to the device.
66 virtual uint32_t getDeviceFlag() const;
67
68 // Returns the number of work-items that exist in a single hardware
69 // wavefront.
70 virtual size_t getWavefrontSize() const = 0;
71
72 // Get the generational name of this specific device.
73 virtual uint32_t getGeneration() const = 0;
74
75 // Get the stack alignment of this specific device.
76 virtual uint32_t getStackAlignment() const;
77
78 // Get the resource ID for this specific device.
79 virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
80
81 // Get the max number of UAV's for this device.
82 virtual uint32_t getMaxNumUAVs() const = 0;
83
84 // API utilizing more detailed capabilities of each family of
85 // cards. If a capability is supported, then either usesHardware or
86 // usesSoftware returned true. If usesHardware returned true, then
87 // usesSoftware must return false for the same capability. Hardware
88 // execution means that the feature is done natively by the hardware
89 // and is not emulated by the softare. Software execution means
90 // that the feature could be done in the hardware, but there is
91 // software that emulates it with possibly using the hardware for
92 // support since the hardware does not fully comply with OpenCL
93 // specs.
94 bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
95 bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
96 bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
97 virtual std::string getDataLayout() const;
98 static const unsigned int MAX_LDS_SIZE_700 = 16384;
99 static const unsigned int MAX_LDS_SIZE_800 = 32768;
100 static const unsigned int WavefrontSize = 64;
101 static const unsigned int HalfWavefrontSize = 32;
102 static const unsigned int QuarterWavefrontSize = 16;
103protected:
104 virtual void setCaps();
105 llvm::BitVector mHWBits;
106 llvm::BitVector mSWBits;
107 AMDGPUSubtarget *mSTM;
108 uint32_t mDeviceFlag;
109private:
110 AMDGPUDeviceInfo::ExecutionMode
111 getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
112}; // AMDILDevice
113
114} // namespace llvm
115#endif // _AMDILDEVICEIMPL_H_
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp
deleted file mode 100644
index b2f7cfb3092..00000000000
--- a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
1//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// Function that creates DeviceInfo from a device name and other information.
11//
12//==-----------------------------------------------------------------------===//
13#include "AMDILDevices.h"
14#include "AMDGPUSubtarget.h"
15
16using namespace llvm;
17namespace llvm {
18namespace AMDGPUDeviceInfo {
19 AMDGPUDevice*
20getDeviceFromName(const std::string &deviceName, AMDGPUSubtarget *ptr,
21 bool is64bit, bool is64on32bit)
22{
23 if (deviceName.c_str()[2] == '7') {
24 switch (deviceName.c_str()[3]) {
25 case '1':
26 return new AMDGPU710Device(ptr);
27 case '7':
28 return new AMDGPU770Device(ptr);
29 default:
30 return new AMDGPU7XXDevice(ptr);
31 };
32 } else if (deviceName == "cypress") {
33#if DEBUG
34 assert(!is64bit && "This device does not support 64bit pointers!");
35 assert(!is64on32bit && "This device does not support 64bit"
36 " on 32bit pointers!");
37#endif
38 return new AMDGPUCypressDevice(ptr);
39 } else if (deviceName == "juniper") {
40#if DEBUG
41 assert(!is64bit && "This device does not support 64bit pointers!");
42 assert(!is64on32bit && "This device does not support 64bit"
43 " on 32bit pointers!");
44#endif
45 return new AMDGPUEvergreenDevice(ptr);
46 } else if (deviceName == "redwood") {
47#if DEBUG
48 assert(!is64bit && "This device does not support 64bit pointers!");
49 assert(!is64on32bit && "This device does not support 64bit"
50 " on 32bit pointers!");
51#endif
52 return new AMDGPURedwoodDevice(ptr);
53 } else if (deviceName == "cedar") {
54#if DEBUG
55 assert(!is64bit && "This device does not support 64bit pointers!");
56 assert(!is64on32bit && "This device does not support 64bit"
57 " on 32bit pointers!");
58#endif
59 return new AMDGPUCedarDevice(ptr);
60 } else if (deviceName == "barts"
61 || deviceName == "turks") {
62#if DEBUG
63 assert(!is64bit && "This device does not support 64bit pointers!");
64 assert(!is64on32bit && "This device does not support 64bit"
65 " on 32bit pointers!");
66#endif
67 return new AMDGPUNIDevice(ptr);
68 } else if (deviceName == "cayman") {
69#if DEBUG
70 assert(!is64bit && "This device does not support 64bit pointers!");
71 assert(!is64on32bit && "This device does not support 64bit"
72 " on 32bit pointers!");
73#endif
74 return new AMDGPUCaymanDevice(ptr);
75 } else if (deviceName == "caicos") {
76#if DEBUG
77 assert(!is64bit && "This device does not support 64bit pointers!");
78 assert(!is64on32bit && "This device does not support 64bit"
79 " on 32bit pointers!");
80#endif
81 return new AMDGPUNIDevice(ptr);
82 } else if (deviceName == "SI") {
83 return new AMDGPUSIDevice(ptr);
84 } else {
85#if DEBUG
86 assert(!is64bit && "This device does not support 64bit pointers!");
87 assert(!is64on32bit && "This device does not support 64bit"
88 " on 32bit pointers!");
89#endif
90 return new AMDGPU7XXDevice(ptr);
91 }
92}
93} // End namespace AMDGPUDeviceInfo
94} // End namespace llvm
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.h b/src/gallium/drivers/radeon/AMDILDeviceInfo.h
deleted file mode 100644
index 4fa021e3599..00000000000
--- a/src/gallium/drivers/radeon/AMDILDeviceInfo.h
+++ /dev/null
@@ -1,90 +0,0 @@
1//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9#ifndef _AMDILDEVICEINFO_H_
10#define _AMDILDEVICEINFO_H_
11
12
13#include <string>
14
15namespace llvm
16{
17 class AMDGPUDevice;
18 class AMDGPUSubtarget;
19 namespace AMDGPUDeviceInfo
20 {
21 // Each Capabilities can be executed using a hardware instruction,
22 // emulated with a sequence of software instructions, or not
23 // supported at all.
24 enum ExecutionMode {
25 Unsupported = 0, // Unsupported feature on the card(Default value)
26 Software, // This is the execution mode that is set if the
27 // feature is emulated in software
28 Hardware // This execution mode is set if the feature exists
29 // natively in hardware
30 };
31
32 // Any changes to this needs to have a corresponding update to the
33 // twiki page GPUMetadataABI
34 enum Caps {
35 HalfOps = 0x1, // Half float is supported or not.
36 DoubleOps = 0x2, // Double is supported or not.
37 ByteOps = 0x3, // Byte(char) is support or not.
38 ShortOps = 0x4, // Short is supported or not.
39 LongOps = 0x5, // Long is supported or not.
40 Images = 0x6, // Images are supported or not.
41 ByteStores = 0x7, // ByteStores available(!HD4XXX).
42 ConstantMem = 0x8, // Constant/CB memory.
43 LocalMem = 0x9, // Local/LDS memory.
44 PrivateMem = 0xA, // Scratch/Private/Stack memory.
45 RegionMem = 0xB, // OCL GDS Memory Extension.
46 FMA = 0xC, // Use HW FMA or SW FMA.
47 ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
48 MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
49 Reserved0 = 0xF, // ReservedFlag
50 NoAlias = 0x10, // Cached loads.
51 Signed24BitOps = 0x11, // Peephole Optimization.
52 // Debug mode implies that no hardware features or optimizations
53 // are performned and that all memory access go through a single
54 // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
55 Debug = 0x12, // Debug mode is enabled.
56 CachedMem = 0x13, // Cached mem is available or not.
57 BarrierDetect = 0x14, // Detect duplicate barriers.
58 Reserved1 = 0x15, // Reserved flag
59 ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
60 ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
61 TmrReg = 0x18, // Flag to specify if Tmr register is supported.
62 NoInline = 0x19, // Flag to specify that no inlining should occur.
63 MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
64 HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
65 ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
66 PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
67 // If more capabilities are required, then
68 // this number needs to be increased.
69 // All capabilities must come before this
70 // number.
71 MaxNumberCapabilities = 0x20
72 };
73 // These have to be in order with the older generations
74 // having the lower number enumerations.
75 enum Generation {
76 HD4XXX = 0, // 7XX based devices.
77 HD5XXX, // Evergreen based devices.
78 HD6XXX, // NI/Evergreen+ based devices.
79 HD7XXX,
80 HDTEST, // Experimental feature testing device.
81 HDNUMGEN
82 };
83
84
85 AMDGPUDevice*
86 getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
87 bool is64bit = false, bool is64on32bit = false);
88 } // namespace AMDILDeviceInfo
89} // namespace llvm
90#endif // _AMDILDEVICEINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDILDevices.h b/src/gallium/drivers/radeon/AMDILDevices.h
deleted file mode 100644
index cfcc3304b4b..00000000000
--- a/src/gallium/drivers/radeon/AMDILDevices.h
+++ /dev/null
@@ -1,19 +0,0 @@
1//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9#ifndef __AMDIL_DEVICES_H_
10#define __AMDIL_DEVICES_H_
11// Include all of the device specific header files
12// This file is for Internal use only!
13#include "AMDIL7XXDevice.h"
14#include "AMDILDevice.h"
15#include "AMDILEvergreenDevice.h"
16#include "AMDILNIDevice.h"
17#include "AMDILSIDevice.h"
18
19#endif // _AMDIL_DEVICES_H_
diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp
deleted file mode 100644
index 3532a28fe0f..00000000000
--- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
1//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9#include "AMDILEvergreenDevice.h"
10
11using namespace llvm;
12
13AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
14: AMDGPUDevice(ST) {
15 setCaps();
16 std::string name = ST->getDeviceName();
17 if (name == "cedar") {
18 mDeviceFlag = OCL_DEVICE_CEDAR;
19 } else if (name == "redwood") {
20 mDeviceFlag = OCL_DEVICE_REDWOOD;
21 } else if (name == "cypress") {
22 mDeviceFlag = OCL_DEVICE_CYPRESS;
23 } else {
24 mDeviceFlag = OCL_DEVICE_JUNIPER;
25 }
26}
27
28AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
29}
30
31size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
32 if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
33 return MAX_LDS_SIZE_800;
34 } else {
35 return 0;
36 }
37}
38size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
39 if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
40 return MAX_LDS_SIZE_800;
41 } else {
42 return 0;
43 }
44}
45uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
46 return 12;
47}
48
49uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
50 switch(id) {
51 default:
52 assert(0 && "ID type passed in is unknown!");
53 break;
54 case CONSTANT_ID:
55 case RAW_UAV_ID:
56 return GLOBAL_RETURN_RAW_UAV_ID;
57 case GLOBAL_ID:
58 case ARENA_UAV_ID:
59 return DEFAULT_ARENA_UAV_ID;
60 case LDS_ID:
61 if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
62 return DEFAULT_LDS_ID;
63 } else {
64 return DEFAULT_ARENA_UAV_ID;
65 }
66 case GDS_ID:
67 if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
68 return DEFAULT_GDS_ID;
69 } else {
70 return DEFAULT_ARENA_UAV_ID;
71 }
72 case SCRATCH_ID:
73 if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
74 return DEFAULT_SCRATCH_ID;
75 } else {
76 return DEFAULT_ARENA_UAV_ID;
77 }
78 };
79 return 0;
80}
81
82size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
83 return AMDGPUDevice::WavefrontSize;
84}
85
86uint32_t AMDGPUEvergreenDevice::getGeneration() const {
87 return AMDGPUDeviceInfo::HD5XXX;
88}
89
90void AMDGPUEvergreenDevice::setCaps() {
91 mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
92 mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
93 mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
94 mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
95 mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
96 if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
97 mHWBits.set(AMDGPUDeviceInfo::ByteStores);
98 }
99 if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
100 mSWBits.set(AMDGPUDeviceInfo::LocalMem);
101 mSWBits.set(AMDGPUDeviceInfo::RegionMem);
102 } else {
103 mHWBits.set(AMDGPUDeviceInfo::LocalMem);
104 mHWBits.set(AMDGPUDeviceInfo::RegionMem);
105 }
106 mHWBits.set(AMDGPUDeviceInfo::Images);
107 if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
108 mHWBits.set(AMDGPUDeviceInfo::NoAlias);
109 }
110 mHWBits.set(AMDGPUDeviceInfo::CachedMem);
111 if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
112 mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
113 }
114 mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
115 mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
116 mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
117 mHWBits.set(AMDGPUDeviceInfo::LongOps);
118 mSWBits.reset(AMDGPUDeviceInfo::LongOps);
119 mHWBits.set(AMDGPUDeviceInfo::TmrReg);
120}
121
122AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
123 : AMDGPUEvergreenDevice(ST) {
124 setCaps();
125}
126
127AMDGPUCypressDevice::~AMDGPUCypressDevice() {
128}
129
130void AMDGPUCypressDevice::setCaps() {
131 if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
132 mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
133 mHWBits.set(AMDGPUDeviceInfo::FMA);
134 }
135}
136
137
138AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
139 : AMDGPUEvergreenDevice(ST) {
140 setCaps();
141}
142
143AMDGPUCedarDevice::~AMDGPUCedarDevice() {
144}
145
146void AMDGPUCedarDevice::setCaps() {
147 mSWBits.set(AMDGPUDeviceInfo::FMA);
148}
149
150size_t AMDGPUCedarDevice::getWavefrontSize() const {
151 return AMDGPUDevice::QuarterWavefrontSize;
152}
153
154AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
155 : AMDGPUEvergreenDevice(ST) {
156 setCaps();
157}
158
159AMDGPURedwoodDevice::~AMDGPURedwoodDevice()
160{
161}
162
163void AMDGPURedwoodDevice::setCaps() {
164 mSWBits.set(AMDGPUDeviceInfo::FMA);
165}
166
167size_t AMDGPURedwoodDevice::getWavefrontSize() const {
168 return AMDGPUDevice::HalfWavefrontSize;
169}
diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h b/src/gallium/drivers/radeon/AMDILEvergreenDevice.h
deleted file mode 100644
index cde1472f934..00000000000
--- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h
+++ /dev/null
@@ -1,87 +0,0 @@
1//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// Interface for the subtarget data classes.
11//
12//===----------------------------------------------------------------------===//
13// This file will define the interface that each generation needs to
14// implement in order to correctly answer queries on the capabilities of the
15// specific hardware.
16//===----------------------------------------------------------------------===//
17#ifndef _AMDILEVERGREENDEVICE_H_
18#define _AMDILEVERGREENDEVICE_H_
19#include "AMDILDevice.h"
20#include "AMDGPUSubtarget.h"
21
22namespace llvm {
23 class AMDGPUSubtarget;
24//===----------------------------------------------------------------------===//
25// Evergreen generation of devices and their respective sub classes
26//===----------------------------------------------------------------------===//
27
28
29// The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
30// series of cards. This class contains information required to differentiate
31// the Evergreen device from the generic AMDGPUDevice. This device represents
32// that capabilities of the 'Juniper' cards, also known as the HD57XX.
33class AMDGPUEvergreenDevice : public AMDGPUDevice {
34public:
35 AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
36 virtual ~AMDGPUEvergreenDevice();
37 virtual size_t getMaxLDSSize() const;
38 virtual size_t getMaxGDSSize() const;
39 virtual size_t getWavefrontSize() const;
40 virtual uint32_t getGeneration() const;
41 virtual uint32_t getMaxNumUAVs() const;
42 virtual uint32_t getResourceID(uint32_t) const;
43protected:
44 virtual void setCaps();
45}; // AMDGPUEvergreenDevice
46
47// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
48// support for double precision operations. This device is used to represent
49// both the Cypress and Hemlock cards, which are commercially known as HD58XX
50// and HD59XX cards.
51class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
52public:
53 AMDGPUCypressDevice(AMDGPUSubtarget *ST);
54 virtual ~AMDGPUCypressDevice();
55private:
56 virtual void setCaps();
57}; // AMDGPUCypressDevice
58
59
60// The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
61// devices. This class differs from the base AMDGPUEvergreenDevice in that the
62// device is a ~quarter of the 'Juniper'. These are commercially known as the
63// HD54XX and HD53XX series of cards.
64class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
65public:
66 AMDGPUCedarDevice(AMDGPUSubtarget *ST);
67 virtual ~AMDGPUCedarDevice();
68 virtual size_t getWavefrontSize() const;
69private:
70 virtual void setCaps();
71}; // AMDGPUCedarDevice
72
73// The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
74// devices. This class differs from the base class, in that these devices are
75// considered about half of a 'Juniper' device. These are commercially known as
76// the HD55XX and HD56XX series of cards.
77class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
78public:
79 AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
80 virtual ~AMDGPURedwoodDevice();
81 virtual size_t getWavefrontSize() const;
82private:
83 virtual void setCaps();
84}; // AMDGPURedwoodDevice
85
86} // namespace llvm
87#endif // _AMDGPUEVERGREENDEVICE_H_
diff --git a/src/gallium/drivers/radeon/AMDILFrameLowering.cpp b/src/gallium/drivers/radeon/AMDILFrameLowering.cpp
deleted file mode 100644
index f2a0fe5a39c..00000000000
--- a/src/gallium/drivers/radeon/AMDILFrameLowering.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
1//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// Interface to describe a layout of a stack frame on a AMDIL target machine
11//
12//===----------------------------------------------------------------------===//
13#include "AMDILFrameLowering.h"
14#include "llvm/CodeGen/MachineFrameInfo.h"
15
16using namespace llvm;
17AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
18 int LAO, unsigned TransAl)
19 : TargetFrameLowering(D, StackAl, LAO, TransAl)
20{
21}
22
23AMDGPUFrameLowering::~AMDGPUFrameLowering()
24{
25}
26
27/// getFrameIndexOffset - Returns the displacement from the frame register to
28/// the stack frame of the specified index.
29int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
30 int FI) const {
31 const MachineFrameInfo *MFI = MF.getFrameInfo();
32 return MFI->getObjectOffset(FI);
33}
34
35const TargetFrameLowering::SpillSlot *
36AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
37{
38 NumEntries = 0;
39 return 0;
40}
41void
42AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const
43{
44}
45void
46AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
47{
48}
49bool
50AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const
51{
52 return false;
53}
diff --git a/src/gallium/drivers/radeon/AMDILFrameLowering.h b/src/gallium/drivers/radeon/AMDILFrameLowering.h
deleted file mode 100644
index 934ee46821d..00000000000
--- a/src/gallium/drivers/radeon/AMDILFrameLowering.h
+++ /dev/null
@@ -1,46 +0,0 @@
1//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// The LLVM Compiler Infrastructure
11//
12// This file is distributed under the University of Illinois Open Source
13// License. See LICENSE.TXT for details.
14//
15//===----------------------------------------------------------------------===//
16//
17// Interface to describe a layout of a stack frame on a AMDIL target machine
18//
19//===----------------------------------------------------------------------===//
20#ifndef _AMDILFRAME_LOWERING_H_
21#define _AMDILFRAME_LOWERING_H_
22
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/Target/TargetFrameLowering.h"
25
26/// Information about the stack frame layout on the AMDGPU targets. It holds
27/// the direction of the stack growth, the known stack alignment on entry to
28/// each function, and the offset to the locals area.
29/// See TargetFrameInfo for more comments.
30
31namespace llvm {
32 class AMDGPUFrameLowering : public TargetFrameLowering {
33 public:
34 AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
35 TransAl = 1);
36 virtual ~AMDGPUFrameLowering();
37 virtual int getFrameIndexOffset(const MachineFunction &MF,
38 int FI) const;
39 virtual const SpillSlot *
40 getCalleeSavedSpillSlots(unsigned &NumEntries) const;
41 virtual void emitPrologue(MachineFunction &MF) const;
42 virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
43 virtual bool hasFP(const MachineFunction &MF) const;
44 }; // class AMDGPUFrameLowering
45} // namespace llvm
46#endif // _AMDILFRAME_LOWERING_H_
diff --git a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp
deleted file mode 100644
index 807113134d2..00000000000
--- a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp
+++ /dev/null
@@ -1,395 +0,0 @@
1//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the AMDIL target.
11//
12//===----------------------------------------------------------------------===//
13#include "AMDGPUInstrInfo.h"
14#include "AMDGPUISelLowering.h" // For AMDGPUISD
15#include "AMDGPURegisterInfo.h"
16#include "AMDILDevices.h"
17#include "AMDILUtilityFunctions.h"
18#include "llvm/ADT/ValueMap.h"
19#include "llvm/CodeGen/PseudoSourceValue.h"
20#include "llvm/CodeGen/SelectionDAGISel.h"
21#include "llvm/Support/Compiler.h"
22#include <list>
23#include <queue>
24
25using namespace llvm;
26
27//===----------------------------------------------------------------------===//
28// Instruction Selector Implementation
29//===----------------------------------------------------------------------===//
30
31//===----------------------------------------------------------------------===//
32// AMDGPUDAGToDAGISel - AMDGPU specific code to select AMDGPU machine instructions
33// //for SelectionDAG operations.
34//
35namespace {
36class AMDGPUDAGToDAGISel : public SelectionDAGISel {
37 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
38 // make the right decision when generating code for different targets.
39 const AMDGPUSubtarget &Subtarget;
40public:
41 AMDGPUDAGToDAGISel(TargetMachine &TM);
42 virtual ~AMDGPUDAGToDAGISel();
43
44 SDNode *Select(SDNode *N);
45 virtual const char *getPassName() const;
46
47private:
48 inline SDValue getSmallIPtrImm(unsigned Imm);
49
50 // Complex pattern selectors
51 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
52 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
53 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
54
55 static bool checkType(const Value *ptr, unsigned int addrspace);
56 static const Value *getBasePointerValue(const Value *V);
57
58 static bool isGlobalStore(const StoreSDNode *N);
59 static bool isPrivateStore(const StoreSDNode *N);
60 static bool isLocalStore(const StoreSDNode *N);
61 static bool isRegionStore(const StoreSDNode *N);
62
63 static bool isCPLoad(const LoadSDNode *N);
64 static bool isConstantLoad(const LoadSDNode *N, int cbID);
65 static bool isGlobalLoad(const LoadSDNode *N);
66 static bool isPrivateLoad(const LoadSDNode *N);
67 static bool isLocalLoad(const LoadSDNode *N);
68 static bool isRegionLoad(const LoadSDNode *N);
69
70 bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
71 bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
72 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
73
74 // Include the pieces autogenerated from the target description.
75#include "AMDGPUGenDAGISel.inc"
76};
77} // end anonymous namespace
78
79// createAMDGPUISelDag - This pass converts a legalized DAG into a AMDGPU-specific
80// DAG, ready for instruction scheduling.
81//
82FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
83 ) {
84 return new AMDGPUDAGToDAGISel(TM);
85}
86
87AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
88 )
89 : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>())
90{
91}
92
93AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
94}
95
96SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
97 return CurDAG->getTargetConstant(Imm, MVT::i32);
98}
99
100bool AMDGPUDAGToDAGISel::SelectADDRParam(
101 SDValue Addr, SDValue& R1, SDValue& R2) {
102
103 if (Addr.getOpcode() == ISD::FrameIndex) {
104 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
105 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
106 R2 = CurDAG->getTargetConstant(0, MVT::i32);
107 } else {
108 R1 = Addr;
109 R2 = CurDAG->getTargetConstant(0, MVT::i32);
110 }
111 } else if (Addr.getOpcode() == ISD::ADD) {
112 R1 = Addr.getOperand(0);
113 R2 = Addr.getOperand(1);
114 } else {
115 R1 = Addr;
116 R2 = CurDAG->getTargetConstant(0, MVT::i32);
117 }
118 return true;
119}
120
121bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
122 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
123 Addr.getOpcode() == ISD::TargetGlobalAddress) {
124 return false;
125 }
126 return SelectADDRParam(Addr, R1, R2);
127}
128
129
130bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
131 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
132 Addr.getOpcode() == ISD::TargetGlobalAddress) {
133 return false;
134 }
135
136 if (Addr.getOpcode() == ISD::FrameIndex) {
137 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
138 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
139 R2 = CurDAG->getTargetConstant(0, MVT::i64);
140 } else {
141 R1 = Addr;
142 R2 = CurDAG->getTargetConstant(0, MVT::i64);
143 }
144 } else if (Addr.getOpcode() == ISD::ADD) {
145 R1 = Addr.getOperand(0);
146 R2 = Addr.getOperand(1);
147 } else {
148 R1 = Addr;
149 R2 = CurDAG->getTargetConstant(0, MVT::i64);
150 }
151 return true;
152}
153
154SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
155 unsigned int Opc = N->getOpcode();
156 if (N->isMachineOpcode()) {
157 return NULL; // Already selected.
158 }
159 switch (Opc) {
160 default: break;
161 case ISD::FrameIndex:
162 {
163 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
164 unsigned int FI = FIN->getIndex();
165 EVT OpVT = N->getValueType(0);
166 unsigned int NewOpc = AMDGPU::COPY;
167 SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
168 return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
169 }
170 }
171 break;
172 }
173 return SelectCode(N);
174}
175
176bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
177 if (!ptr) {
178 return false;
179 }
180 Type *ptrType = ptr->getType();
181 return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
182}
183
184const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V)
185{
186 if (!V) {
187 return NULL;
188 }
189 const Value *ret = NULL;
190 ValueMap<const Value *, bool> ValueBitMap;
191 std::queue<const Value *, std::list<const Value *> > ValueQueue;
192 ValueQueue.push(V);
193 while (!ValueQueue.empty()) {
194 V = ValueQueue.front();
195 if (ValueBitMap.find(V) == ValueBitMap.end()) {
196 ValueBitMap[V] = true;
197 if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
198 ret = V;
199 break;
200 } else if (dyn_cast<GlobalVariable>(V)) {
201 ret = V;
202 break;
203 } else if (dyn_cast<Constant>(V)) {
204 const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
205 if (CE) {
206 ValueQueue.push(CE->getOperand(0));
207 }
208 } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
209 ret = AI;
210 break;
211 } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
212 uint32_t numOps = I->getNumOperands();
213 for (uint32_t x = 0; x < numOps; ++x) {
214 ValueQueue.push(I->getOperand(x));
215 }
216 } else {
217 // assert(0 && "Found a Value that we didn't know how to handle!");
218 }
219 }
220 ValueQueue.pop();
221 }
222 return ret;
223}
224
225bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
226 return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
227}
228
229bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
230 return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
231 && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
232 && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
233}
234
235bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
236 return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
237}
238
239bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
240 return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
241}
242
243bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
244 if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
245 return true;
246 }
247 MachineMemOperand *MMO = N->getMemOperand();
248 const Value *V = MMO->getValue();
249 const Value *BV = getBasePointerValue(V);
250 if (MMO
251 && MMO->getValue()
252 && ((V && dyn_cast<GlobalValue>(V))
253 || (BV && dyn_cast<GlobalValue>(
254 getBasePointerValue(MMO->getValue()))))) {
255 return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
256 } else {
257 return false;
258 }
259}
260
261bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
262 return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
263}
264
265bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
266 return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
267}
268
269bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
270 return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
271}
272
273bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
274 MachineMemOperand *MMO = N->getMemOperand();
275 if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
276 if (MMO) {
277 const Value *V = MMO->getValue();
278 const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
279 if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
280 return true;
281 }
282 }
283 }
284 return false;
285}
286
287bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
288 if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
289 // Check to make sure we are not a constant pool load or a constant load
290 // that is marked as a private load
291 if (isCPLoad(N) || isConstantLoad(N, -1)) {
292 return false;
293 }
294 }
295 if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
296 && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
297 && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
298 && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
299 && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
300 && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS))
301 {
302 return true;
303 }
304 return false;
305}
306
307const char *AMDGPUDAGToDAGISel::getPassName() const {
308 return "AMDGPU DAG->DAG Pattern Instruction Selection";
309}
310
311#ifdef DEBUGTMP
312#undef INT64_C
313#endif
314#undef DEBUGTMP
315
316///==== AMDGPU Functions ====///
317
318bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
319 SDValue& Offset) {
320 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
321 Addr.getOpcode() == ISD::TargetGlobalAddress) {
322 return false;
323 }
324
325
326 if (Addr.getOpcode() == ISD::ADD) {
327 bool Match = false;
328
329 // Find the base ptr and the offset
330 for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
331 SDValue Arg = Addr.getOperand(i);
332 ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
333 // This arg isn't a constant so it must be the base PTR.
334 if (!OffsetNode) {
335 Base = Addr.getOperand(i);
336 continue;
337 }
338 // Check if the constant argument fits in 8-bits. The offset is in bytes
339 // so we need to convert it to dwords.
340 if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) {
341 Match = true;
342 Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
343 MVT::i32);
344 }
345 }
346 return Match;
347 }
348
349 // Default case, no offset
350 Base = Addr;
351 Offset = CurDAG->getTargetConstant(0, MVT::i32);
352 return true;
353}
354
355bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
356 SDValue &Offset)
357{
358 ConstantSDNode * IMMOffset;
359
360 if (Addr.getOpcode() == ISD::ADD
361 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
362 && isInt<16>(IMMOffset->getZExtValue())) {
363
364 Base = Addr.getOperand(0);
365 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
366 return true;
367 // If the pointer address is constant, we can move it to the offset field.
368 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
369 && isInt<16>(IMMOffset->getZExtValue())) {
370 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
371 CurDAG->getEntryNode().getDebugLoc(),
372 AMDGPU::ZERO, MVT::i32);
373 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
374 return true;
375 }
376
377 // Default case, no offset
378 Base = Addr;
379 Offset = CurDAG->getTargetConstant(0, MVT::i32);
380 return true;
381}
382
383bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
384 SDValue& Offset) {
385 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
386 Addr.getOpcode() == ISD::TargetGlobalAddress ||
387 Addr.getOpcode() != ISD::ADD) {
388 return false;
389 }
390
391 Base = Addr.getOperand(0);
392 Offset = Addr.getOperand(1);
393
394 return true;
395}
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
deleted file mode 100644
index 993025c3d47..00000000000
--- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp
+++ /dev/null
@@ -1,677 +0,0 @@
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file contains TargetLowering functions borrowed from AMDLI.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUISelLowering.h"
15#include "AMDGPURegisterInfo.h"
16#include "AMDILDevices.h"
17#include "AMDILIntrinsicInfo.h"
18#include "AMDGPUSubtarget.h"
19#include "AMDILUtilityFunctions.h"
20#include "llvm/CallingConv.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/PseudoSourceValue.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/SelectionDAGNodes.h"
26#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
27#include "llvm/DerivedTypes.h"
28#include "llvm/Instructions.h"
29#include "llvm/Intrinsics.h"
30#include "llvm/Support/raw_ostream.h"
31#include "llvm/Target/TargetInstrInfo.h"
32#include "llvm/Target/TargetOptions.h"
33
34using namespace llvm;
35//===----------------------------------------------------------------------===//
36// Calling Convention Implementation
37//===----------------------------------------------------------------------===//
38#include "AMDGPUGenCallingConv.inc"
39
40//===----------------------------------------------------------------------===//
41// TargetLowering Implementation Help Functions End
42//===----------------------------------------------------------------------===//
43
44//===----------------------------------------------------------------------===//
45// TargetLowering Class Implementation Begins
46//===----------------------------------------------------------------------===//
47void AMDGPUTargetLowering::InitAMDILLowering()
48{
49 int types[] =
50 {
51 (int)MVT::i8,
52 (int)MVT::i16,
53 (int)MVT::i32,
54 (int)MVT::f32,
55 (int)MVT::f64,
56 (int)MVT::i64,
57 (int)MVT::v2i8,
58 (int)MVT::v4i8,
59 (int)MVT::v2i16,
60 (int)MVT::v4i16,
61 (int)MVT::v4f32,
62 (int)MVT::v4i32,
63 (int)MVT::v2f32,
64 (int)MVT::v2i32,
65 (int)MVT::v2f64,
66 (int)MVT::v2i64
67 };
68
69 int IntTypes[] =
70 {
71 (int)MVT::i8,
72 (int)MVT::i16,
73 (int)MVT::i32,
74 (int)MVT::i64
75 };
76
77 int FloatTypes[] =
78 {
79 (int)MVT::f32,
80 (int)MVT::f64
81 };
82
83 int VectorTypes[] =
84 {
85 (int)MVT::v2i8,
86 (int)MVT::v4i8,
87 (int)MVT::v2i16,
88 (int)MVT::v4i16,
89 (int)MVT::v4f32,
90 (int)MVT::v4i32,
91 (int)MVT::v2f32,
92 (int)MVT::v2i32,
93 (int)MVT::v2f64,
94 (int)MVT::v2i64
95 };
96 size_t numTypes = sizeof(types) / sizeof(*types);
97 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
98 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
99 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
100
101 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
102 // These are the current register classes that are
103 // supported
104
105 for (unsigned int x = 0; x < numTypes; ++x) {
106 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
107
108 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
109 // We cannot sextinreg, expand to shifts
110 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
111 setOperationAction(ISD::SUBE, VT, Expand);
112 setOperationAction(ISD::SUBC, VT, Expand);
113 setOperationAction(ISD::ADDE, VT, Expand);
114 setOperationAction(ISD::ADDC, VT, Expand);
115 setOperationAction(ISD::BRCOND, VT, Custom);
116 setOperationAction(ISD::BR_JT, VT, Expand);
117 setOperationAction(ISD::BRIND, VT, Expand);
118 // TODO: Implement custom UREM/SREM routines
119 setOperationAction(ISD::SREM, VT, Expand);
120 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
121 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
122 if (VT != MVT::i64 && VT != MVT::v2i64) {
123 setOperationAction(ISD::SDIV, VT, Custom);
124 }
125 }
126 for (unsigned int x = 0; x < numFloatTypes; ++x) {
127 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
128
129 // IL does not have these operations for floating point types
130 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
131 setOperationAction(ISD::SETOLT, VT, Expand);
132 setOperationAction(ISD::SETOGE, VT, Expand);
133 setOperationAction(ISD::SETOGT, VT, Expand);
134 setOperationAction(ISD::SETOLE, VT, Expand);
135 setOperationAction(ISD::SETULT, VT, Expand);
136 setOperationAction(ISD::SETUGE, VT, Expand);
137 setOperationAction(ISD::SETUGT, VT, Expand);
138 setOperationAction(ISD::SETULE, VT, Expand);
139 }
140
141 for (unsigned int x = 0; x < numIntTypes; ++x) {
142 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
143
144 // GPU also does not have divrem function for signed or unsigned
145 setOperationAction(ISD::SDIVREM, VT, Expand);
146
147 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
148 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
149 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
150
151 // GPU doesn't have a rotl, rotr, or byteswap instruction
152 setOperationAction(ISD::ROTR, VT, Expand);
153 setOperationAction(ISD::BSWAP, VT, Expand);
154
155 // GPU doesn't have any counting operators
156 setOperationAction(ISD::CTPOP, VT, Expand);
157 setOperationAction(ISD::CTTZ, VT, Expand);
158 setOperationAction(ISD::CTLZ, VT, Expand);
159 }
160
161 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
162 {
163 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
164
165 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
166 setOperationAction(ISD::SDIVREM, VT, Expand);
167 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
168 // setOperationAction(ISD::VSETCC, VT, Expand);
169 setOperationAction(ISD::SELECT_CC, VT, Expand);
170
171 }
172 if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
173 setOperationAction(ISD::MULHU, MVT::i64, Expand);
174 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
175 setOperationAction(ISD::MULHS, MVT::i64, Expand);
176 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
177 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
178 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
179 setOperationAction(ISD::Constant , MVT::i64 , Legal);
180 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
181 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
182 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
183 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
184 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
185 }
186 if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
187 // we support loading/storing v2f64 but not operations on the type
188 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
189 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
190 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
191 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
192 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
193 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
194 // We want to expand vector conversions into their scalar
195 // counterparts.
196 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
197 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
198 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
199 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
200 setOperationAction(ISD::FABS, MVT::f64, Expand);
201 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
202 }
203 // TODO: Fix the UDIV24 algorithm so it works for these
204 // types correctly. This needs vector comparisons
205 // for this to work correctly.
206 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
207 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
208 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
209 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
210 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
211 setOperationAction(ISD::SUBC, MVT::Other, Expand);
212 setOperationAction(ISD::ADDE, MVT::Other, Expand);
213 setOperationAction(ISD::ADDC, MVT::Other, Expand);
214 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
215 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
216 setOperationAction(ISD::BRIND, MVT::Other, Expand);
217 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
218
219
220 // Use the default implementation.
221 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
222 setOperationAction(ISD::Constant , MVT::i32 , Legal);
223
224 setSchedulingPreference(Sched::RegPressure);
225 setPow2DivIsCheap(false);
226 setPrefLoopAlignment(16);
227 setSelectIsExpensive(true);
228 setJumpIsExpensive(true);
229
230 maxStoresPerMemcpy = 4096;
231 maxStoresPerMemmove = 4096;
232 maxStoresPerMemset = 4096;
233
234#undef numTypes
235#undef numIntTypes
236#undef numVectorTypes
237#undef numFloatTypes
238}
239
240bool
241AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
242 const CallInst &I, unsigned Intrinsic) const
243{
244 return false;
245}
246// The backend supports 32 and 64 bit floating point immediates
247bool
248AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
249{
250 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
251 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
252 return true;
253 } else {
254 return false;
255 }
256}
257
258bool
259AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
260{
261 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
262 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
263 return false;
264 } else {
265 return true;
266 }
267}
268
269
270// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
271// be zero. Op is expected to be a target specific node. Used by DAG
272// combiner.
273
274void
275AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
276 const SDValue Op,
277 APInt &KnownZero,
278 APInt &KnownOne,
279 const SelectionDAG &DAG,
280 unsigned Depth) const
281{
282 APInt KnownZero2;
283 APInt KnownOne2;
284 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
285 switch (Op.getOpcode()) {
286 default: break;
287 case ISD::SELECT_CC:
288 DAG.ComputeMaskedBits(
289 Op.getOperand(1),
290 KnownZero,
291 KnownOne,
292 Depth + 1
293 );
294 DAG.ComputeMaskedBits(
295 Op.getOperand(0),
296 KnownZero2,
297 KnownOne2
298 );
299 assert((KnownZero & KnownOne) == 0
300 && "Bits known to be one AND zero?");
301 assert((KnownZero2 & KnownOne2) == 0
302 && "Bits known to be one AND zero?");
303 // Only known if known in both the LHS and RHS
304 KnownOne &= KnownOne2;
305 KnownZero &= KnownZero2;
306 break;
307 };
308}
309
310//===----------------------------------------------------------------------===//
311// Other Lowering Hooks
312//===----------------------------------------------------------------------===//
313
314SDValue
315AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
316{
317 EVT OVT = Op.getValueType();
318 SDValue DST;
319 if (OVT.getScalarType() == MVT::i64) {
320 DST = LowerSDIV64(Op, DAG);
321 } else if (OVT.getScalarType() == MVT::i32) {
322 DST = LowerSDIV32(Op, DAG);
323 } else if (OVT.getScalarType() == MVT::i16
324 || OVT.getScalarType() == MVT::i8) {
325 DST = LowerSDIV24(Op, DAG);
326 } else {
327 DST = SDValue(Op.getNode(), 0);
328 }
329 return DST;
330}
331
332SDValue
333AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
334{
335 EVT OVT = Op.getValueType();
336 SDValue DST;
337 if (OVT.getScalarType() == MVT::i64) {
338 DST = LowerSREM64(Op, DAG);
339 } else if (OVT.getScalarType() == MVT::i32) {
340 DST = LowerSREM32(Op, DAG);
341 } else if (OVT.getScalarType() == MVT::i16) {
342 DST = LowerSREM16(Op, DAG);
343 } else if (OVT.getScalarType() == MVT::i8) {
344 DST = LowerSREM8(Op, DAG);
345 } else {
346 DST = SDValue(Op.getNode(), 0);
347 }
348 return DST;
349}
350
351SDValue
352AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
353{
354 SDValue Data = Op.getOperand(0);
355 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
356 DebugLoc DL = Op.getDebugLoc();
357 EVT DVT = Data.getValueType();
358 EVT BVT = BaseType->getVT();
359 unsigned baseBits = BVT.getScalarType().getSizeInBits();
360 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
361 unsigned shiftBits = srcBits - baseBits;
362 if (srcBits < 32) {
363 // If the op is less than 32 bits, then it needs to extend to 32bits
364 // so it can properly keep the upper bits valid.
365 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
366 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
367 shiftBits = 32 - baseBits;
368 DVT = IVT;
369 }
370 SDValue Shift = DAG.getConstant(shiftBits, DVT);
371 // Shift left by 'Shift' bits.
372 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
373 // Signed shift Right by 'Shift' bits.
374 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
375 if (srcBits < 32) {
376 // Once the sign extension is done, the op needs to be converted to
377 // its original type.
378 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
379 }
380 return Data;
381}
382EVT
383AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
384{
385 int iSize = (size * numEle);
386 int vEle = (iSize >> ((size == 64) ? 6 : 5));
387 if (!vEle) {
388 vEle = 1;
389 }
390 if (size == 64) {
391 if (vEle == 1) {
392 return EVT(MVT::i64);
393 } else {
394 return EVT(MVT::getVectorVT(MVT::i64, vEle));
395 }
396 } else {
397 if (vEle == 1) {
398 return EVT(MVT::i32);
399 } else {
400 return EVT(MVT::getVectorVT(MVT::i32, vEle));
401 }
402 }
403}
404
405SDValue
406AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
407{
408 SDValue Chain = Op.getOperand(0);
409 SDValue Cond = Op.getOperand(1);
410 SDValue Jump = Op.getOperand(2);
411 SDValue Result;
412 Result = DAG.getNode(
413 AMDGPUISD::BRANCH_COND,
414 Op.getDebugLoc(),
415 Op.getValueType(),
416 Chain, Jump, Cond);
417 return Result;
418}
419
420SDValue
421AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
422{
423 DebugLoc DL = Op.getDebugLoc();
424 EVT OVT = Op.getValueType();
425 SDValue LHS = Op.getOperand(0);
426 SDValue RHS = Op.getOperand(1);
427 MVT INTTY;
428 MVT FLTTY;
429 if (!OVT.isVector()) {
430 INTTY = MVT::i32;
431 FLTTY = MVT::f32;
432 } else if (OVT.getVectorNumElements() == 2) {
433 INTTY = MVT::v2i32;
434 FLTTY = MVT::v2f32;
435 } else if (OVT.getVectorNumElements() == 4) {
436 INTTY = MVT::v4i32;
437 FLTTY = MVT::v4f32;
438 }
439 unsigned bitsize = OVT.getScalarType().getSizeInBits();
440 // char|short jq = ia ^ ib;
441 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
442
443 // jq = jq >> (bitsize - 2)
444 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
445
446 // jq = jq | 0x1
447 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
448
449 // jq = (int)jq
450 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
451
452 // int ia = (int)LHS;
453 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
454
455 // int ib, (int)RHS;
456 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
457
458 // float fa = (float)ia;
459 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
460
461 // float fb = (float)ib;
462 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
463
464 // float fq = native_divide(fa, fb);
465 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
466
467 // fq = trunc(fq);
468 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
469
470 // float fqneg = -fq;
471 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
472
473 // float fr = mad(fqneg, fb, fa);
474 SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
475
476 // int iq = (int)fq;
477 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
478
479 // fr = fabs(fr);
480 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
481
482 // fb = fabs(fb);
483 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
484
485 // int cv = fr >= fb;
486 SDValue cv;
487 if (INTTY == MVT::i32) {
488 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
489 } else {
490 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
491 }
492 // jq = (cv ? jq : 0);
493 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
494 DAG.getConstant(0, OVT));
495 // dst = iq + jq;
496 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
497 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
498 return iq;
499}
500
501SDValue
502AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
503{
504 DebugLoc DL = Op.getDebugLoc();
505 EVT OVT = Op.getValueType();
506 SDValue LHS = Op.getOperand(0);
507 SDValue RHS = Op.getOperand(1);
508 // The LowerSDIV32 function generates equivalent to the following IL.
509 // mov r0, LHS
510 // mov r1, RHS
511 // ilt r10, r0, 0
512 // ilt r11, r1, 0
513 // iadd r0, r0, r10
514 // iadd r1, r1, r11
515 // ixor r0, r0, r10
516 // ixor r1, r1, r11
517 // udiv r0, r0, r1
518 // ixor r10, r10, r11
519 // iadd r0, r0, r10
520 // ixor DST, r0, r10
521
522 // mov r0, LHS
523 SDValue r0 = LHS;
524
525 // mov r1, RHS
526 SDValue r1 = RHS;
527
528 // ilt r10, r0, 0
529 SDValue r10 = DAG.getSelectCC(DL,
530 r0, DAG.getConstant(0, OVT),
531 DAG.getConstant(-1, MVT::i32),
532 DAG.getConstant(0, MVT::i32),
533 ISD::SETLT);
534
535 // ilt r11, r1, 0
536 SDValue r11 = DAG.getSelectCC(DL,
537 r1, DAG.getConstant(0, OVT),
538 DAG.getConstant(-1, MVT::i32),
539 DAG.getConstant(0, MVT::i32),
540 ISD::SETLT);
541
542 // iadd r0, r0, r10
543 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
544
545 // iadd r1, r1, r11
546 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
547
548 // ixor r0, r0, r10
549 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
550
551 // ixor r1, r1, r11
552 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
553
554 // udiv r0, r0, r1
555 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
556
557 // ixor r10, r10, r11
558 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
559
560 // iadd r0, r0, r10
561 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
562
563 // ixor DST, r0, r10
564 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
565 return DST;
566}
567
568SDValue
569AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
570{
571 return SDValue(Op.getNode(), 0);
572}
573
574SDValue
575AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
576{
577 DebugLoc DL = Op.getDebugLoc();
578 EVT OVT = Op.getValueType();
579 MVT INTTY = MVT::i32;
580 if (OVT == MVT::v2i8) {
581 INTTY = MVT::v2i32;
582 } else if (OVT == MVT::v4i8) {
583 INTTY = MVT::v4i32;
584 }
585 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
586 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
587 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
588 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
589 return LHS;
590}
591
592SDValue
593AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
594{
595 DebugLoc DL = Op.getDebugLoc();
596 EVT OVT = Op.getValueType();
597 MVT INTTY = MVT::i32;
598 if (OVT == MVT::v2i16) {
599 INTTY = MVT::v2i32;
600 } else if (OVT == MVT::v4i16) {
601 INTTY = MVT::v4i32;
602 }
603 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
604 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
605 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
606 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
607 return LHS;
608}
609
610SDValue
611AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
612{
613 DebugLoc DL = Op.getDebugLoc();
614 EVT OVT = Op.getValueType();
615 SDValue LHS = Op.getOperand(0);
616 SDValue RHS = Op.getOperand(1);
617 // The LowerSREM32 function generates equivalent to the following IL.
618 // mov r0, LHS
619 // mov r1, RHS
620 // ilt r10, r0, 0
621 // ilt r11, r1, 0
622 // iadd r0, r0, r10
623 // iadd r1, r1, r11
624 // ixor r0, r0, r10
625 // ixor r1, r1, r11
626 // udiv r20, r0, r1
627 // umul r20, r20, r1
628 // sub r0, r0, r20
629 // iadd r0, r0, r10
630 // ixor DST, r0, r10
631
632 // mov r0, LHS
633 SDValue r0 = LHS;
634
635 // mov r1, RHS
636 SDValue r1 = RHS;
637
638 // ilt r10, r0, 0
639 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
640
641 // ilt r11, r1, 0
642 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
643
644 // iadd r0, r0, r10
645 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
646
647 // iadd r1, r1, r11
648 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
649
650 // ixor r0, r0, r10
651 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
652
653 // ixor r1, r1, r11
654 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
655
656 // udiv r20, r0, r1
657 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
658
659 // umul r20, r20, r1
660 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
661
662 // sub r0, r0, r20
663 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
664
665 // iadd r0, r0, r10
666 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
667
668 // ixor DST, r0, r10
669 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
670 return DST;
671}
672
673SDValue
674AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
675{
676 return SDValue(Op.getNode(), 0);
677}
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.td b/src/gallium/drivers/radeon/AMDILInstrInfo.td
deleted file mode 100644
index 050a5bd874f..00000000000
--- a/src/gallium/drivers/radeon/AMDILInstrInfo.td
+++ /dev/null
@@ -1,270 +0,0 @@
1//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file describes the AMDIL instructions in TableGen format.
11//
12//===----------------------------------------------------------------------===//
13// AMDIL Instruction Predicate Definitions
14// Predicate that is set to true if the hardware supports double precision
15// divide
16def HasHWDDiv : Predicate<"Subtarget.device()"
17 "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
18 "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
19
20// Predicate that is set to true if the hardware supports double, but not double
21// precision divide in hardware
22def HasSWDDiv : Predicate<"Subtarget.device()"
23 "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
24 "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
25
26// Predicate that is set to true if the hardware support 24bit signed
27// math ops. Otherwise a software expansion to 32bit math ops is used instead.
28def HasHWSign24Bit : Predicate<"Subtarget.device()"
29 "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
30
31// Predicate that is set to true if 64bit operations are supported or not
32def HasHW64Bit : Predicate<"Subtarget.device()"
33 "->usesHardware(AMDGPUDeviceInfo::LongOps)">;
34def HasSW64Bit : Predicate<"Subtarget.device()"
35 "->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
36
37// Predicate that is set to true if the timer register is supported
38def HasTmrRegister : Predicate<"Subtarget.device()"
39 "->isSupported(AMDGPUDeviceInfo::TmrReg)">;
40// Predicate that is true if we are at least evergreen series
41def HasDeviceIDInst : Predicate<"Subtarget.device()"
42 "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
43
44// Predicate that is true if we have region address space.
45def hasRegionAS : Predicate<"Subtarget.device()"
46 "->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
47
48// Predicate that is false if we don't have region address space.
49def noRegionAS : Predicate<"!Subtarget.device()"
50 "->isSupported(AMDGPUDeviceInfo::RegionMem)">;
51
52
53// Predicate that is set to true if 64bit Mul is supported in the IL or not
54def HasHW64Mul : Predicate<"Subtarget.calVersion()"
55 ">= CAL_VERSION_SC_139"
56 "&& Subtarget.device()"
57 "->getGeneration() >="
58 "AMDGPUDeviceInfo::HD5XXX">;
59def HasSW64Mul : Predicate<"Subtarget.calVersion()"
60 "< CAL_VERSION_SC_139">;
61// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
62def HasHW64DivMod : Predicate<"Subtarget.device()"
63 "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
64def HasSW64DivMod : Predicate<"Subtarget.device()"
65 "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
66
67// Predicate that is set to true if 64bit pointer are used.
68def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
69def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
70//===--------------------------------------------------------------------===//
71// Custom Operands
72//===--------------------------------------------------------------------===//
73def brtarget : Operand<OtherVT>;
74
75//===--------------------------------------------------------------------===//
76// Custom Selection DAG Type Profiles
77//===--------------------------------------------------------------------===//
78//===----------------------------------------------------------------------===//
79// Generic Profile Types
80//===----------------------------------------------------------------------===//
81
82def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
83 SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
84 ]>;
85def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
86 SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
87 ]>;
88def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
89 SDTCisEltOfVec<1, 0>
90 ]>;
91
92//===----------------------------------------------------------------------===//
93// Flow Control Profile Types
94//===----------------------------------------------------------------------===//
95// Branch instruction where second and third are basic blocks
96def SDTIL_BRCond : SDTypeProfile<0, 2, [
97 SDTCisVT<0, OtherVT>
98 ]>;
99
100//===--------------------------------------------------------------------===//
101// Custom Selection DAG Nodes
102//===--------------------------------------------------------------------===//
103//===----------------------------------------------------------------------===//
104// Flow Control DAG Nodes
105//===----------------------------------------------------------------------===//
106def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
107
108//===----------------------------------------------------------------------===//
109// Call/Return DAG Nodes
110//===----------------------------------------------------------------------===//
111def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
112 [SDNPHasChain, SDNPOptInGlue]>;
113
114//===--------------------------------------------------------------------===//
115// Instructions
116//===--------------------------------------------------------------------===//
117// Floating point math functions
118def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
119def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
120
121//===----------------------------------------------------------------------===//
122// Integer functions
123//===----------------------------------------------------------------------===//
124def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
125 [SDNPCommutative, SDNPAssociative]>;
126
127//===--------------------------------------------------------------------===//
128// Custom Pattern DAG Nodes
129//===--------------------------------------------------------------------===//
130def global_store : PatFrag<(ops node:$val, node:$ptr),
131 (store node:$val, node:$ptr), [{
132 return isGlobalStore(dyn_cast<StoreSDNode>(N));
133}]>;
134
135//===----------------------------------------------------------------------===//
136// Load pattern fragments
137//===----------------------------------------------------------------------===//
138// Global address space loads
139def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
140 return isGlobalLoad(dyn_cast<LoadSDNode>(N));
141}]>;
142// Constant address space loads
143def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
144 return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
145}]>;
146
147//===----------------------------------------------------------------------===//
148// Complex addressing mode patterns
149//===----------------------------------------------------------------------===//
150def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
151def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
152def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
153def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
154
155//===----------------------------------------------------------------------===//
156// Instruction format classes
157//===----------------------------------------------------------------------===//
158class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
159: Instruction {
160
161 let Namespace = "AMDGPU";
162 dag OutOperandList = outs;
163 dag InOperandList = ins;
164 let Pattern = pattern;
165 let AsmString = !strconcat(asmstr, "\n");
166 let isPseudo = 1;
167 let Itinerary = NullALU;
168 bit hasIEEEFlag = 0;
169 bit hasZeroOpFlag = 0;
170}
171
172//===--------------------------------------------------------------------===//
173// Multiclass Instruction formats
174//===--------------------------------------------------------------------===//
175// Multiclass that handles branch instructions
176multiclass BranchConditional<SDNode Op> {
177 def _i32 : ILFormat<(outs),
178 (ins brtarget:$target, GPRI32:$src0),
179 "; i32 Pseudo branch instruction",
180 [(Op bb:$target, GPRI32:$src0)]>;
181 def _f32 : ILFormat<(outs),
182 (ins brtarget:$target, GPRF32:$src0),
183 "; f32 Pseudo branch instruction",
184 [(Op bb:$target, GPRF32:$src0)]>;
185}
186
187// Only scalar types should generate flow control
188multiclass BranchInstr<string name> {
189 def _i32 : ILFormat<(outs), (ins GPRI32:$src),
190 !strconcat(name, " $src"), []>;
191 def _f32 : ILFormat<(outs), (ins GPRF32:$src),
192 !strconcat(name, " $src"), []>;
193}
194// Only scalar types should generate flow control
195multiclass BranchInstr2<string name> {
196 def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
197 !strconcat(name, " $src0, $src1"), []>;
198 def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
199 !strconcat(name, " $src0, $src1"), []>;
200}
201
202//===--------------------------------------------------------------------===//
203// Intrinsics support
204//===--------------------------------------------------------------------===//
205include "AMDILIntrinsics.td"
206
207//===--------------------------------------------------------------------===//
208// Instructions support
209//===--------------------------------------------------------------------===//
210//===---------------------------------------------------------------------===//
211// Custom Inserter for Branches and returns, this eventually will be a
212// seperate pass
213//===---------------------------------------------------------------------===//
214let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
215 def BRANCH : ILFormat<(outs), (ins brtarget:$target),
216 "; Pseudo unconditional branch instruction",
217 [(br bb:$target)]>;
218 defm BRANCH_COND : BranchConditional<IL_brcond>;
219}
220
221//===---------------------------------------------------------------------===//
222// Flow and Program control Instructions
223//===---------------------------------------------------------------------===//
224let isTerminator=1 in {
225 def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
226 !strconcat("SWITCH", " $src"), []>;
227 def CASE : ILFormat< (outs), (ins GPRI32:$src),
228 !strconcat("CASE", " $src"), []>;
229 def BREAK : ILFormat< (outs), (ins),
230 "BREAK", []>;
231 def CONTINUE : ILFormat< (outs), (ins),
232 "CONTINUE", []>;
233 def DEFAULT : ILFormat< (outs), (ins),
234 "DEFAULT", []>;
235 def ELSE : ILFormat< (outs), (ins),
236 "ELSE", []>;
237 def ENDSWITCH : ILFormat< (outs), (ins),
238 "ENDSWITCH", []>;
239 def ENDMAIN : ILFormat< (outs), (ins),
240 "ENDMAIN", []>;
241 def END : ILFormat< (outs), (ins),
242 "END", []>;
243 def ENDFUNC : ILFormat< (outs), (ins),
244 "ENDFUNC", []>;
245 def ENDIF : ILFormat< (outs), (ins),
246 "ENDIF", []>;
247 def WHILELOOP : ILFormat< (outs), (ins),
248 "WHILE", []>;
249 def ENDLOOP : ILFormat< (outs), (ins),
250 "ENDLOOP", []>;
251 def FUNC : ILFormat< (outs), (ins),
252 "FUNC", []>;
253 def RETDYN : ILFormat< (outs), (ins),
254 "RET_DYN", []>;
255 // This opcode has custom swizzle pattern encoded in Swizzle Encoder
256 defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
257 // This opcode has custom swizzle pattern encoded in Swizzle Encoder
258 defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
259 // This opcode has custom swizzle pattern encoded in Swizzle Encoder
260 defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
261 // This opcode has custom swizzle pattern encoded in Swizzle Encoder
262 defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
263 // This opcode has custom swizzle pattern encoded in Swizzle Encoder
264 defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
265 // This opcode has custom swizzle pattern encoded in Swizzle Encoder
266 defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
267 defm IFC : BranchInstr2<"IFC">;
268 defm BREAKC : BranchInstr2<"BREAKC">;
269 defm CONTINUEC : BranchInstr2<"CONTINUEC">;
270}
diff --git a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp b/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp
deleted file mode 100644
index 23df3822a5a..00000000000
--- a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
1//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file contains the AMDIL Implementation of the IntrinsicInfo class.
11//
12//===-----------------------------------------------------------------------===//
13
14#include "AMDILIntrinsicInfo.h"
15#include "AMDIL.h"
16#include "AMDGPUSubtarget.h"
17#include "llvm/DerivedTypes.h"
18#include "llvm/Intrinsics.h"
19#include "llvm/Module.h"
20
21using namespace llvm;
22
23#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
24#include "AMDGPUGenIntrinsics.inc"
25#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
26
27AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
28 : TargetIntrinsicInfo()
29{
30}
31
32std::string
33AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
34 unsigned int numTys) const
35{
36 static const char* const names[] = {
37#define GET_INTRINSIC_NAME_TABLE
38#include "AMDGPUGenIntrinsics.inc"
39#undef GET_INTRINSIC_NAME_TABLE
40 };
41
42 //assert(!isOverloaded(IntrID)
43 //&& "AMDGPU Intrinsics are not overloaded");
44 if (IntrID < Intrinsic::num_intrinsics) {
45 return 0;
46 }
47 assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
48 && "Invalid intrinsic ID");
49
50 std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
51 return Result;
52}
53
54unsigned int
55AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
56{
57#define GET_FUNCTION_RECOGNIZER
58#include "AMDGPUGenIntrinsics.inc"
59#undef GET_FUNCTION_RECOGNIZER
60 AMDGPUIntrinsic::ID IntrinsicID
61 = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
62 IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
63
64 if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
65 return IntrinsicID;
66 }
67 return 0;
68}
69
70bool
71AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const
72{
73 // Overload Table
74#define GET_INTRINSIC_OVERLOAD_TABLE
75#include "AMDGPUGenIntrinsics.inc"
76#undef GET_INTRINSIC_OVERLOAD_TABLE
77}
78
79/// This defines the "getAttributes(ID id)" method.
80#define GET_INTRINSIC_ATTRIBUTES
81#include "AMDGPUGenIntrinsics.inc"
82#undef GET_INTRINSIC_ATTRIBUTES
83
84Function*
85AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
86 Type **Tys,
87 unsigned numTys) const
88{
89 //Silence a warning
90 AttrListPtr List = getAttributes((AMDGPUIntrinsic::ID)IntrID);
91 (void)List;
92 assert(!"Not implemented");
93}
diff --git a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h b/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h
deleted file mode 100644
index 9ae3d4dc112..00000000000
--- a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h
+++ /dev/null
@@ -1,47 +0,0 @@
1//===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// Interface for the AMDIL Implementation of the Intrinsic Info class.
11//
12//===-----------------------------------------------------------------------===//
13#ifndef _AMDIL_INTRINSICS_H_
14#define _AMDIL_INTRINSICS_H_
15
16#include "llvm/Intrinsics.h"
17#include "llvm/Target/TargetIntrinsicInfo.h"
18
19namespace llvm {
20 class TargetMachine;
21 namespace AMDGPUIntrinsic {
22 enum ID {
23 last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
24#define GET_INTRINSIC_ENUM_VALUES
25#include "AMDGPUGenIntrinsics.inc"
26#undef GET_INTRINSIC_ENUM_VALUES
27 , num_AMDGPU_intrinsics
28 };
29
30 }
31
32
33 class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
34 public:
35 AMDGPUIntrinsicInfo(TargetMachine *tm);
36 std::string getName(unsigned int IntrId, Type **Tys = 0,
37 unsigned int numTys = 0) const;
38 unsigned int lookupName(const char *Name, unsigned int Len) const;
39 bool isOverloaded(unsigned int IID) const;
40 Function *getDeclaration(Module *M, unsigned int ID,
41 Type **Tys = 0,
42 unsigned int numTys = 0) const;
43 }; // AMDGPUIntrinsicInfo
44}
45
46#endif // _AMDIL_INTRINSICS_H_
47
diff --git a/src/gallium/drivers/radeon/AMDILIntrinsics.td b/src/gallium/drivers/radeon/AMDILIntrinsics.td
deleted file mode 100644
index 3f9e20f0c85..00000000000
--- a/src/gallium/drivers/radeon/AMDILIntrinsics.td
+++ /dev/null
@@ -1,242 +0,0 @@
1//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file defines all of the amdil-specific intrinsics
11//
12//===---------------------------------------------------------------===//
13//===--------------------------------------------------------------------===//
14// Intrinsic classes
15// Generic versions of the above classes but for Target specific intrinsics
16// instead of SDNode patterns.
17//===--------------------------------------------------------------------===//
18let TargetPrefix = "AMDIL", isTarget = 1 in {
19 class VoidIntLong :
20 Intrinsic<[llvm_i64_ty], [], []>;
21 class VoidIntInt :
22 Intrinsic<[llvm_i32_ty], [], []>;
23 class VoidIntBool :
24 Intrinsic<[llvm_i32_ty], [], []>;
25 class UnaryIntInt :
26 Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
27 class UnaryIntFloat :
28 Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
29 class ConvertIntFTOI :
30 Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
31 class ConvertIntITOF :
32 Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
33 class UnaryIntNoRetInt :
34 Intrinsic<[], [llvm_anyint_ty], []>;
35 class UnaryIntNoRetFloat :
36 Intrinsic<[], [llvm_anyfloat_ty], []>;
37 class BinaryIntInt :
38 Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
39 class BinaryIntFloat :
40 Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
41 class BinaryIntNoRetInt :
42 Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
43 class BinaryIntNoRetFloat :
44 Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
45 class TernaryIntInt :
46 Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
47 LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
48 class TernaryIntFloat :
49 Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
50 LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
51 class QuaternaryIntInt :
52 Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
53 LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
54 class UnaryAtomicInt :
55 Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
56 class BinaryAtomicInt :
57 Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
58 class TernaryAtomicInt :
59 Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
60 class UnaryAtomicIntNoRet :
61 Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
62 class BinaryAtomicIntNoRet :
63 Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
64 class TernaryAtomicIntNoRet :
65 Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
66}
67
68let TargetPrefix = "AMDIL", isTarget = 1 in {
69 def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
70
71 def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
72 TernaryIntInt;
73 def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
74 TernaryIntInt;
75 def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
76 UnaryIntInt;
77 def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
78 UnaryIntInt;
79 def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
80 UnaryIntInt;
81 def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
82 UnaryIntInt;
83 def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
84 UnaryIntInt;
85 def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
86 TernaryIntInt;
87 def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
88 TernaryIntInt;
89 def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
90 QuaternaryIntInt;
91 def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
92 TernaryIntInt;
93 def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
94 BinaryIntInt;
95 def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
96 TernaryIntInt;
97 def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
98 TernaryIntInt;
99 def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
100 TernaryIntFloat;
101 def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
102 BinaryIntInt;
103 def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
104 BinaryIntInt;
105 def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
106 BinaryIntInt;
107 def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
108 BinaryIntInt;
109 def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
110 BinaryIntInt;
111 def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
112 BinaryIntInt;
113 def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
114 TernaryIntInt;
115 def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
116 TernaryIntInt;
117 def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
118 BinaryIntInt;
119 def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
120 BinaryIntInt;
121 def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
122 BinaryIntInt;
123 def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
124 BinaryIntInt;
125 def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
126 BinaryIntFloat;
127 def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
128 BinaryIntInt;
129 def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
130 BinaryIntInt;
131 def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
132 BinaryIntFloat;
133 def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
134 TernaryIntInt;
135 def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
136 TernaryIntInt;
137 def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
138 TernaryIntInt;
139 def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
140 UnaryIntFloat;
141 def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
142 TernaryIntFloat;
143 def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
144 UnaryIntFloat;
145 def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
146 UnaryIntFloat;
147 def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
148 UnaryIntFloat;
149 def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
150 UnaryIntFloat;
151 def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
152 UnaryIntFloat;
153 def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
154 UnaryIntFloat;
155 def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
156 UnaryIntFloat;
157 def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
158 UnaryIntFloat;
159 def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
160 UnaryIntFloat;
161 def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
162 UnaryIntFloat;
163 def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
164 UnaryIntFloat;
165 def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
166 UnaryIntFloat;
167 def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
168 def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
169 def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
170 def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
171 UnaryIntFloat;
172 def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
173 UnaryIntFloat;
174 def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
175 UnaryIntFloat;
176 def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
177 UnaryIntFloat;
178 def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
179 UnaryIntFloat;
180 def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
181 UnaryIntFloat;
182 def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
183 UnaryIntFloat;
184 def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
185 UnaryIntFloat;
186 def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
187 TernaryIntFloat;
188 def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
189 UnaryIntFloat;
190 def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
191 UnaryIntFloat;
192 def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
193 UnaryIntFloat;
194 def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
195 TernaryIntFloat;
196 def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
197 Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
198 llvm_v4i32_ty, llvm_i32_ty], []>;
199
200 def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
201 Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
202 def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
203 Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
204 def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
205 Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
206 def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
207 ConvertIntITOF;
208 def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
209 ConvertIntFTOI;
210 def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
211 ConvertIntFTOI;
212 def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
213 ConvertIntFTOI;
214 def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
215 ConvertIntFTOI;
216 def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
217 ConvertIntFTOI;
218 def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
219 ConvertIntFTOI;
220 def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
221 Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
222 def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
223 ConvertIntITOF;
224 def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
225 ConvertIntITOF;
226 def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
227 ConvertIntITOF;
228 def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
229 ConvertIntITOF;
230 def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
231 Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
232 llvm_v2f32_ty, llvm_float_ty], []>;
233 def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
234 Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
235 llvm_v2f32_ty], []>;
236 def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
237 Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
238 llvm_v4f32_ty], []>;
239 def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
240 Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
241 llvm_v4f32_ty], []>;
242}
diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.cpp b/src/gallium/drivers/radeon/AMDILNIDevice.cpp
deleted file mode 100644
index 0ebbc9d1e06..00000000000
--- a/src/gallium/drivers/radeon/AMDILNIDevice.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
1//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9#include "AMDILNIDevice.h"
10#include "AMDILEvergreenDevice.h"
11#include "AMDGPUSubtarget.h"
12
13using namespace llvm;
14
15AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
16 : AMDGPUEvergreenDevice(ST)
17{
18 std::string name = ST->getDeviceName();
19 if (name == "caicos") {
20 mDeviceFlag = OCL_DEVICE_CAICOS;
21 } else if (name == "turks") {
22 mDeviceFlag = OCL_DEVICE_TURKS;
23 } else if (name == "cayman") {
24 mDeviceFlag = OCL_DEVICE_CAYMAN;
25 } else {
26 mDeviceFlag = OCL_DEVICE_BARTS;
27 }
28}
29AMDGPUNIDevice::~AMDGPUNIDevice()
30{
31}
32
33size_t
34AMDGPUNIDevice::getMaxLDSSize() const
35{
36 if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
37 return MAX_LDS_SIZE_900;
38 } else {
39 return 0;
40 }
41}
42
43uint32_t
44AMDGPUNIDevice::getGeneration() const
45{
46 return AMDGPUDeviceInfo::HD6XXX;
47}
48
49
50AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
51 : AMDGPUNIDevice(ST)
52{
53 setCaps();
54}
55
56AMDGPUCaymanDevice::~AMDGPUCaymanDevice()
57{
58}
59
60void
61AMDGPUCaymanDevice::setCaps()
62{
63 if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
64 mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
65 mHWBits.set(AMDGPUDeviceInfo::FMA);
66 }
67 mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
68 mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
69 mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
70}
71
diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.h b/src/gallium/drivers/radeon/AMDILNIDevice.h
deleted file mode 100644
index 387f7d1c3b7..00000000000
--- a/src/gallium/drivers/radeon/AMDILNIDevice.h
+++ /dev/null
@@ -1,59 +0,0 @@
1//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// Interface for the subtarget data classes.
11//
12//===---------------------------------------------------------------------===//
13// This file will define the interface that each generation needs to
14// implement in order to correctly answer queries on the capabilities of the
15// specific hardware.
16//===---------------------------------------------------------------------===//
17#ifndef _AMDILNIDEVICE_H_
18#define _AMDILNIDEVICE_H_
19#include "AMDILEvergreenDevice.h"
20#include "AMDGPUSubtarget.h"
21
22namespace llvm {
23 class AMDGPUSubtarget;
24//===---------------------------------------------------------------------===//
25// NI generation of devices and their respective sub classes
26//===---------------------------------------------------------------------===//
27
28// The AMDGPUNIDevice is the base class for all Northern Island series of
29// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
30// exception being differences in wavefront size and hardware capabilities. The
31// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
32// integer operations
33
34 class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
35 public:
36 AMDGPUNIDevice(AMDGPUSubtarget*);
37 virtual ~AMDGPUNIDevice();
38 virtual size_t getMaxLDSSize() const;
39 virtual uint32_t getGeneration() const;
40 protected:
41 }; // AMDGPUNIDevice
42
43// Just as the AMDGPUCypressDevice is the double capable version of the
44// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version of
45// the AMDGPUNIDevice. The other major difference that is not as useful from
46// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
47// NI family is a 5 wide.
48
49 class AMDGPUCaymanDevice: public AMDGPUNIDevice {
50 public:
51 AMDGPUCaymanDevice(AMDGPUSubtarget*);
52 virtual ~AMDGPUCaymanDevice();
53 private:
54 virtual void setCaps();
55 }; // AMDGPUCaymanDevice
56
57 static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
58} // namespace llvm
59#endif // _AMDILNIDEVICE_H_
diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
deleted file mode 100644
index f869b332e53..00000000000
--- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
+++ /dev/null
@@ -1,1275 +0,0 @@
1//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9
10#include "AMDILDevices.h"
11#include "AMDGPUInstrInfo.h"
12#include "llvm/ADT/Statistic.h"
13#include "llvm/ADT/StringExtras.h"
14#include "llvm/ADT/StringRef.h"
15#include "llvm/ADT/Twine.h"
16#include "llvm/Constants.h"
17#include "llvm/CodeGen/MachineFunction.h"
18#include "llvm/CodeGen/MachineFunctionAnalysis.h"
19#include "llvm/Function.h"
20#include "llvm/Instructions.h"
21#include "llvm/Module.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/MathExtras.h"
24
25#include <sstream>
26
27#if 0
28STATISTIC(PointerAssignments, "Number of dynamic pointer "
29 "assigments discovered");
30STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
31#endif
32
33using namespace llvm;
34// The Peephole optimization pass is used to do simple last minute optimizations
35// that are required for correct code or to remove redundant functions
36namespace {
37
38class OpaqueType;
39
40class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
41public:
42 TargetMachine &TM;
43 static char ID;
44 AMDGPUPeepholeOpt(TargetMachine &tm);
45 ~AMDGPUPeepholeOpt();
46 const char *getPassName() const;
47 bool runOnFunction(Function &F);
48 bool doInitialization(Module &M);
49 bool doFinalization(Module &M);
50 void getAnalysisUsage(AnalysisUsage &AU) const;
51protected:
52private:
53 // Function to initiate all of the instruction level optimizations.
54 bool instLevelOptimizations(BasicBlock::iterator *inst);
55 // Quick check to see if we need to dump all of the pointers into the
56 // arena. If this is correct, then we set all pointers to exist in arena. This
57 // is a workaround for aliasing of pointers in a struct/union.
58 bool dumpAllIntoArena(Function &F);
59 // Because I don't want to invalidate any pointers while in the
60 // safeNestedForEachFunction. I push atomic conversions to a vector and handle
61 // it later. This function does the conversions if required.
62 void doAtomicConversionIfNeeded(Function &F);
63 // Because __amdil_is_constant cannot be properly evaluated if
64 // optimizations are disabled, the call's are placed in a vector
65 // and evaluated after the __amdil_image* functions are evaluated
66 // which should allow the __amdil_is_constant function to be
67 // evaluated correctly.
68 void doIsConstCallConversionIfNeeded();
69 bool mChanged;
70 bool mDebug;
71 bool mConvertAtomics;
72 CodeGenOpt::Level optLevel;
73 // Run a series of tests to see if we can optimize a CALL instruction.
74 bool optimizeCallInst(BasicBlock::iterator *bbb);
75 // A peephole optimization to optimize bit extract sequences.
76 bool optimizeBitExtract(Instruction *inst);
77 // A peephole optimization to optimize bit insert sequences.
78 bool optimizeBitInsert(Instruction *inst);
79 bool setupBitInsert(Instruction *base,
80 Instruction *&src,
81 Constant *&mask,
82 Constant *&shift);
83 // Expand the bit field insert instruction on versions of OpenCL that
84 // don't support it.
85 bool expandBFI(CallInst *CI);
86 // Expand the bit field mask instruction on version of OpenCL that
87 // don't support it.
88 bool expandBFM(CallInst *CI);
89 // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
90 // this case we need to expand them. These functions check for 24bit functions
91 // and then expand.
92 bool isSigned24BitOps(CallInst *CI);
93 void expandSigned24BitOps(CallInst *CI);
94 // One optimization that can occur is that if the required workgroup size is
95 // specified then the result of get_local_size is known at compile time and
96 // can be returned accordingly.
97 bool isRWGLocalOpt(CallInst *CI);
98 // On northern island cards, the division is slightly less accurate than on
99 // previous generations, so we need to utilize a more accurate division. So we
100 // can translate the accurate divide to a normal divide on all other cards.
101 bool convertAccurateDivide(CallInst *CI);
102 void expandAccurateDivide(CallInst *CI);
103 // If the alignment is set incorrectly, it can produce really inefficient
104 // code. This checks for this scenario and fixes it if possible.
105 bool correctMisalignedMemOp(Instruction *inst);
106
107 // If we are in no opt mode, then we need to make sure that
108 // local samplers are properly propagated as constant propagation
109 // doesn't occur and we need to know the value of kernel defined
110 // samplers at compile time.
111 bool propagateSamplerInst(CallInst *CI);
112
113 // Helper functions
114
115 // Group of functions that recursively calculate the size of a structure based
116 // on it's sub-types.
117 size_t getTypeSize(Type * const T, bool dereferencePtr = false);
118 size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
119 size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
120 size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
121 size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
122 size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
123 size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
124 size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
125
126 LLVMContext *mCTX;
127 Function *mF;
128 const AMDGPUSubtarget *mSTM;
129 SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
130 SmallVector<CallInst *, 16> isConstVec;
131}; // class AMDGPUPeepholeOpt
132 char AMDGPUPeepholeOpt::ID = 0;
133
134// A template function that has two levels of looping before calling the
135// function with a pointer to the current iterator.
136template<class InputIterator, class SecondIterator, class Function>
137Function safeNestedForEach(InputIterator First, InputIterator Last,
138 SecondIterator S, Function F)
139{
140 for ( ; First != Last; ++First) {
141 SecondIterator sf, sl;
142 for (sf = First->begin(), sl = First->end();
143 sf != sl; ) {
144 if (!F(&sf)) {
145 ++sf;
146 }
147 }
148 }
149 return F;
150}
151
152} // anonymous namespace
153
154namespace llvm {
155 FunctionPass *
156 createAMDGPUPeepholeOpt(TargetMachine &tm)
157 {
158 return new AMDGPUPeepholeOpt(tm);
159 }
160} // llvm namespace
161
162AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
163 : FunctionPass(ID), TM(tm)
164{
165 mDebug = false;
166 optLevel = TM.getOptLevel();
167
168}
169
170AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt()
171{
172}
173
174const char *
175AMDGPUPeepholeOpt::getPassName() const
176{
177 return "AMDGPU PeepHole Optimization Pass";
178}
179
180bool
181containsPointerType(Type *Ty)
182{
183 if (!Ty) {
184 return false;
185 }
186 switch(Ty->getTypeID()) {
187 default:
188 return false;
189 case Type::StructTyID: {
190 const StructType *ST = dyn_cast<StructType>(Ty);
191 for (StructType::element_iterator stb = ST->element_begin(),
192 ste = ST->element_end(); stb != ste; ++stb) {
193 if (!containsPointerType(*stb)) {
194 continue;
195 }
196 return true;
197 }
198 break;
199 }
200 case Type::VectorTyID:
201 case Type::ArrayTyID:
202 return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
203 case Type::PointerTyID:
204 return true;
205 };
206 return false;
207}
208
209bool
210AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F)
211{
212 bool dumpAll = false;
213 for (Function::const_arg_iterator cab = F.arg_begin(),
214 cae = F.arg_end(); cab != cae; ++cab) {
215 const Argument *arg = cab;
216 const PointerType *PT = dyn_cast<PointerType>(arg->getType());
217 if (!PT) {
218 continue;
219 }
220 Type *DereferencedType = PT->getElementType();
221 if (!dyn_cast<StructType>(DereferencedType)
222 ) {
223 continue;
224 }
225 if (!containsPointerType(DereferencedType)) {
226 continue;
227 }
228 // FIXME: Because a pointer inside of a struct/union may be aliased to
229 // another pointer we need to take the conservative approach and place all
230 // pointers into the arena until more advanced detection is implemented.
231 dumpAll = true;
232 }
233 return dumpAll;
234}
235void
236AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded()
237{
238 if (isConstVec.empty()) {
239 return;
240 }
241 for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
242 CallInst *CI = isConstVec[x];
243 Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
244 Type *aType = Type::getInt32Ty(*mCTX);
245 Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
246 : ConstantInt::get(aType, 0);
247 CI->replaceAllUsesWith(Val);
248 CI->eraseFromParent();
249 }
250 isConstVec.clear();
251}
252void
253AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
254{
255 // Don't do anything if we don't have any atomic operations.
256 if (atomicFuncs.empty()) {
257 return;
258 }
259 // Change the function name for the atomic if it is required
260 uint32_t size = atomicFuncs.size();
261 for (uint32_t x = 0; x < size; ++x) {
262 atomicFuncs[x].first->setOperand(
263 atomicFuncs[x].first->getNumOperands()-1,
264 atomicFuncs[x].second);
265
266 }
267 mChanged = true;
268 if (mConvertAtomics) {
269 return;
270 }
271}
272
273bool
274AMDGPUPeepholeOpt::runOnFunction(Function &MF)
275{
276 mChanged = false;
277 mF = &MF;
278 mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
279 if (mDebug) {
280 MF.dump();
281 }
282 mCTX = &MF.getType()->getContext();
283 mConvertAtomics = true;
284 safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
285 std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
286 this));
287
288 doAtomicConversionIfNeeded(MF);
289 doIsConstCallConversionIfNeeded();
290
291 if (mDebug) {
292 MF.dump();
293 }
294 return mChanged;
295}
296
297bool
298AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
299{
300 Instruction *inst = (*bbb);
301 CallInst *CI = dyn_cast<CallInst>(inst);
302 if (!CI) {
303 return false;
304 }
305 if (isSigned24BitOps(CI)) {
306 expandSigned24BitOps(CI);
307 ++(*bbb);
308 CI->eraseFromParent();
309 return true;
310 }
311 if (propagateSamplerInst(CI)) {
312 return false;
313 }
314 if (expandBFI(CI) || expandBFM(CI)) {
315 ++(*bbb);
316 CI->eraseFromParent();
317 return true;
318 }
319 if (convertAccurateDivide(CI)) {
320 expandAccurateDivide(CI);
321 ++(*bbb);
322 CI->eraseFromParent();
323 return true;
324 }
325
326 StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
327 if (calleeName.startswith("__amdil_is_constant")) {
328 // If we do not have optimizations, then this
329 // cannot be properly evaluated, so we add the
330 // call instruction to a vector and process
331 // them at the end of processing after the
332 // samplers have been correctly handled.
333 if (optLevel == CodeGenOpt::None) {
334 isConstVec.push_back(CI);
335 return false;
336 } else {
337 Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
338 Type *aType = Type::getInt32Ty(*mCTX);
339 Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
340 : ConstantInt::get(aType, 0);
341 CI->replaceAllUsesWith(Val);
342 ++(*bbb);
343 CI->eraseFromParent();
344 return true;
345 }
346 }
347
348 if (calleeName.equals("__amdil_is_asic_id_i32")) {
349 ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
350 Type *aType = Type::getInt32Ty(*mCTX);
351 Value *Val = CV;
352 if (Val) {
353 Val = ConstantInt::get(aType,
354 mSTM->device()->getDeviceFlag() & CV->getZExtValue());
355 } else {
356 Val = ConstantInt::get(aType, 0);
357 }
358 CI->replaceAllUsesWith(Val);
359 ++(*bbb);
360 CI->eraseFromParent();
361 return true;
362 }
363 Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
364 if (!F) {
365 return false;
366 }
367 if (F->getName().startswith("__atom") && !CI->getNumUses()
368 && F->getName().find("_xchg") == StringRef::npos) {
369 std::string buffer(F->getName().str() + "_noret");
370 F = dyn_cast<Function>(
371 F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
372 atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
373 }
374
375 if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
376 && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
377 return false;
378 }
379 if (!mConvertAtomics) {
380 return false;
381 }
382 StringRef name = F->getName();
383 if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
384 mConvertAtomics = false;
385 }
386 return false;
387}
388
389bool
390AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
391 Instruction *&src,
392 Constant *&mask,
393 Constant *&shift)
394{
395 if (!base) {
396 if (mDebug) {
397 dbgs() << "Null pointer passed into function.\n";
398 }
399 return false;
400 }
401 bool andOp = false;
402 if (base->getOpcode() == Instruction::Shl) {
403 shift = dyn_cast<Constant>(base->getOperand(1));
404 } else if (base->getOpcode() == Instruction::And) {
405 mask = dyn_cast<Constant>(base->getOperand(1));
406 andOp = true;
407 } else {
408 if (mDebug) {
409 dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
410 }
411 // If the base is neither a Shl or a And, we don't fit any of the patterns above.
412 return false;
413 }
414 src = dyn_cast<Instruction>(base->getOperand(0));
415 if (!src) {
416 if (mDebug) {
417 dbgs() << "Failed setup since the base operand is not an instruction!\n";
418 }
419 return false;
420 }
421 // If we find an 'and' operation, then we don't need to
422 // find the next operation as we already know the
423 // bits that are valid at this point.
424 if (andOp) {
425 return true;
426 }
427 if (src->getOpcode() == Instruction::Shl && !shift) {
428 shift = dyn_cast<Constant>(src->getOperand(1));
429 src = dyn_cast<Instruction>(src->getOperand(0));
430 } else if (src->getOpcode() == Instruction::And && !mask) {
431 mask = dyn_cast<Constant>(src->getOperand(1));
432 }
433 if (!mask && !shift) {
434 if (mDebug) {
435 dbgs() << "Failed setup since both mask and shift are NULL!\n";
436 }
437 // Did not find a constant mask or a shift.
438 return false;
439 }
440 return true;
441}
442bool
443AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst)
444{
445 if (!inst) {
446 return false;
447 }
448 if (!inst->isBinaryOp()) {
449 return false;
450 }
451 if (inst->getOpcode() != Instruction::Or) {
452 return false;
453 }
454 if (optLevel == CodeGenOpt::None) {
455 return false;
456 }
457 // We want to do an optimization on a sequence of ops that in the end equals a
458 // single ISA instruction.
459 // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
460 // Some simplified versions of this pattern are as follows:
461 // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
462 // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
463 // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
464 // (A & B) | (D << F) when (1 << F) >= B
465 // (A << C) | (D & E) when (1 << C) >= E
466 if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
467 // The HD4XXX hardware doesn't support the ubit_insert instruction.
468 return false;
469 }
470 Type *aType = inst->getType();
471 bool isVector = aType->isVectorTy();
472 int numEle = 1;
473 // This optimization only works on 32bit integers.
474 if (aType->getScalarType()
475 != Type::getInt32Ty(inst->getContext())) {
476 return false;
477 }
478 if (isVector) {
479 const VectorType *VT = dyn_cast<VectorType>(aType);
480 numEle = VT->getNumElements();
481 // We currently cannot support more than 4 elements in a intrinsic and we
482 // cannot support Vec3 types.
483 if (numEle > 4 || numEle == 3) {
484 return false;
485 }
486 }
487 // TODO: Handle vectors.
488 if (isVector) {
489 if (mDebug) {
490 dbgs() << "!!! Vectors are not supported yet!\n";
491 }
492 return false;
493 }
494 Instruction *LHSSrc = NULL, *RHSSrc = NULL;
495 Constant *LHSMask = NULL, *RHSMask = NULL;
496 Constant *LHSShift = NULL, *RHSShift = NULL;
497 Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
498 Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
499 if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
500 if (mDebug) {
501 dbgs() << "Found an OR Operation that failed setup!\n";
502 inst->dump();
503 if (LHS) { LHS->dump(); }
504 if (LHSSrc) { LHSSrc->dump(); }
505 if (LHSMask) { LHSMask->dump(); }
506 if (LHSShift) { LHSShift->dump(); }
507 }
508 // There was an issue with the setup for BitInsert.
509 return false;
510 }
511 if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
512 if (mDebug) {
513 dbgs() << "Found an OR Operation that failed setup!\n";
514 inst->dump();
515 if (RHS) { RHS->dump(); }
516 if (RHSSrc) { RHSSrc->dump(); }
517 if (RHSMask) { RHSMask->dump(); }
518 if (RHSShift) { RHSShift->dump(); }
519 }
520 // There was an issue with the setup for BitInsert.
521 return false;
522 }
523 if (mDebug) {
524 dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
525 dbgs() << "Op: "; inst->dump();
526 dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
527 dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
528 dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
529 dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
530 dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
531 dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
532 dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
533 dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
534 }
535 Constant *offset = NULL;
536 Constant *width = NULL;
537 int32_t lhsMaskVal = 0, rhsMaskVal = 0;
538 int32_t lhsShiftVal = 0, rhsShiftVal = 0;
539 int32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
540 int32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
541 lhsMaskVal = (int32_t)(LHSMask
542 ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
543 rhsMaskVal = (int32_t)(RHSMask
544 ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
545 lhsShiftVal = (int32_t)(LHSShift
546 ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
547 rhsShiftVal = (int32_t)(RHSShift
548 ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
549 lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
550 rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
551 lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
552 rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
553 // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
554 if (mDebug) {
555 dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")");
556 dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ;
557 dbgs() << (RHSMask ? " & E)" : ")");
558 dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n");
559 dbgs() << "A = LHSSrc\t\tD = RHSSrc \n";
560 dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n";
561 dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n";
562 dbgs() << "width(B) = " << lhsMaskWidth;
563 dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n";
564 dbgs() << "offset(B) = " << lhsMaskOffset;
565 dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n";
566 dbgs() << "Constraints: \n";
567 dbgs() << "\t(1) B ^ E == 0\n";
568 dbgs() << "\t(2-LHS) B is a mask\n";
569 dbgs() << "\t(2-LHS) E is a mask\n";
570 dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n";
571 dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n";
572 }
573 if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
574 if (mDebug) {
575 dbgs() << lhsMaskVal << " ^ " << rhsMaskVal;
576 dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n";
577 dbgs() << "Failed constraint 1!\n";
578 }
579 return false;
580 }
581 if (mDebug) {
582 dbgs() << "LHS = " << lhsMaskOffset << "";
583 dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = ";
584 dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset));
585 dbgs() << "\nRHS = " << rhsMaskOffset << "";
586 dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = ";
587 dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset));
588 dbgs() << "\n";
589 }
590 if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
591 offset = ConstantInt::get(aType, lhsMaskOffset, false);
592 width = ConstantInt::get(aType, lhsMaskWidth, false);
593 RHSSrc = RHS;
594 if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
595 if (mDebug) {
596 dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n";
597 dbgs() << "Failed constraint 2!\n";
598 }
599 return false;
600 }
601 if (!LHSShift) {
602 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
603 "MaskShr", LHS);
604 } else if (lhsShiftVal != lhsMaskOffset) {
605 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
606 "MaskShr", LHS);
607 }
608 if (mDebug) {
609 dbgs() << "Optimizing LHS!\n";
610 }
611 } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
612 offset = ConstantInt::get(aType, rhsMaskOffset, false);
613 width = ConstantInt::get(aType, rhsMaskWidth, false);
614 LHSSrc = RHSSrc;
615 RHSSrc = LHS;
616 if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
617 if (mDebug) {
618 dbgs() << "Non-Mask: " << rhsMaskVal << "\n";
619 dbgs() << "Failed constraint 2!\n";
620 }
621 return false;
622 }
623 if (!RHSShift) {
624 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
625 "MaskShr", RHS);
626 } else if (rhsShiftVal != rhsMaskOffset) {
627 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
628 "MaskShr", RHS);
629 }
630 if (mDebug) {
631 dbgs() << "Optimizing RHS!\n";
632 }
633 } else {
634 if (mDebug) {
635 dbgs() << "Failed constraint 3!\n";
636 }
637 return false;
638 }
639 if (mDebug) {
640 dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
641 dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
642 dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
643 dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
644 }
645 if (!offset || !width) {
646 if (mDebug) {
647 dbgs() << "Either width or offset are NULL, failed detection!\n";
648 }
649 return false;
650 }
651 // Lets create the function signature.
652 std::vector<Type *> callTypes;
653 callTypes.push_back(aType);
654 callTypes.push_back(aType);
655 callTypes.push_back(aType);
656 callTypes.push_back(aType);
657 FunctionType *funcType = FunctionType::get(aType, callTypes, false);
658 std::string name = "__amdil_ubit_insert";
659 if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
660 Function *Func =
661 dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
662 getOrInsertFunction(llvm::StringRef(name), funcType));
663 Value *Operands[4] = {
664 width,
665 offset,
666 LHSSrc,
667 RHSSrc
668 };
669 CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
670 if (mDebug) {
671 dbgs() << "Old Inst: ";
672 inst->dump();
673 dbgs() << "New Inst: ";
674 CI->dump();
675 dbgs() << "\n\n";
676 }
677 CI->insertBefore(inst);
678 inst->replaceAllUsesWith(CI);
679 return true;
680}
681
682bool
683AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst)
684{
685 if (!inst) {
686 return false;
687 }
688 if (!inst->isBinaryOp()) {
689 return false;
690 }
691 if (inst->getOpcode() != Instruction::And) {
692 return false;
693 }
694 if (optLevel == CodeGenOpt::None) {
695 return false;
696 }
697 // We want to do some simple optimizations on Shift right/And patterns. The
698 // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
699 // value smaller than 32 and C is a mask. If C is a constant value, then the
700 // following transformation can occur. For signed integers, it turns into the
701 // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
702 // integers, it turns into the function call dst =
703 // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
704 // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
705 // Evergreen hardware.
706 if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
707 // This does not work on HD4XXX hardware.
708 return false;
709 }
710 Type *aType = inst->getType();
711 bool isVector = aType->isVectorTy();
712
713 // XXX Support vector types
714 if (isVector) {
715 return false;
716 }
717 int numEle = 1;
718 // This only works on 32bit integers
719 if (aType->getScalarType()
720 != Type::getInt32Ty(inst->getContext())) {
721 return false;
722 }
723 if (isVector) {
724 const VectorType *VT = dyn_cast<VectorType>(aType);
725 numEle = VT->getNumElements();
726 // We currently cannot support more than 4 elements in a intrinsic and we
727 // cannot support Vec3 types.
728 if (numEle > 4 || numEle == 3) {
729 return false;
730 }
731 }
732 BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
733 // If the first operand is not a shift instruction, then we can return as it
734 // doesn't match this pattern.
735 if (!ShiftInst || !ShiftInst->isShift()) {
736 return false;
737 }
738 // If we are a shift left, then we need don't match this pattern.
739 if (ShiftInst->getOpcode() == Instruction::Shl) {
740 return false;
741 }
742 bool isSigned = ShiftInst->isArithmeticShift();
743 Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
744 Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
745 // Lets make sure that the shift value and the and mask are constant integers.
746 if (!AndMask || !ShrVal) {
747 return false;
748 }
749 Constant *newMaskConst;
750 Constant *shiftValConst;
751 if (isVector) {
752 // Handle the vector case
753 std::vector<Constant *> maskVals;
754 std::vector<Constant *> shiftVals;
755 ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
756 ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
757 Type *scalarType = AndMaskVec->getType()->getScalarType();
758 assert(AndMaskVec->getNumOperands() ==
759 ShrValVec->getNumOperands() && "cannot have a "
760 "combination where the number of elements to a "
761 "shift and an and are different!");
762 for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
763 ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
764 ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
765 if (!AndCI || !ShiftIC) {
766 return false;
767 }
768 uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
769 if (!isMask_32(maskVal)) {
770 return false;
771 }
772 maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
773 uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
774 // If the mask or shiftval is greater than the bitcount, then break out.
775 if (maskVal >= 32 || shiftVal >= 32) {
776 return false;
777 }
778 // If the mask val is greater than the the number of original bits left
779 // then this optimization is invalid.
780 if (maskVal > (32 - shiftVal)) {
781 return false;
782 }
783 maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
784 shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
785 }
786 newMaskConst = ConstantVector::get(maskVals);
787 shiftValConst = ConstantVector::get(shiftVals);
788 } else {
789 // Handle the scalar case
790 uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
791 // This must be a mask value where all lower bits are set to 1 and then any
792 // bit higher is set to 0.
793 if (!isMask_32(maskVal)) {
794 return false;
795 }
796 maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
797 // Count the number of bits set in the mask, this is the width of the
798 // resulting bit set that is extracted from the source value.
799 uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
800 // If the mask or shift val is greater than the bitcount, then break out.
801 if (maskVal >= 32 || shiftVal >= 32) {
802 return false;
803 }
804 // If the mask val is greater than the the number of original bits left then
805 // this optimization is invalid.
806 if (maskVal > (32 - shiftVal)) {
807 return false;
808 }
809 newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
810 shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
811 }
812 // Lets create the function signature.
813 std::vector<Type *> callTypes;
814 callTypes.push_back(aType);
815 callTypes.push_back(aType);
816 callTypes.push_back(aType);
817 FunctionType *funcType = FunctionType::get(aType, callTypes, false);
818 std::string name = "llvm.AMDIL.bit.extract.u32";
819 if (isVector) {
820 name += ".v" + itostr(numEle) + "i32";
821 } else {
822 name += ".";
823 }
824 // Lets create the function.
825 Function *Func =
826 dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
827 getOrInsertFunction(llvm::StringRef(name), funcType));
828 Value *Operands[3] = {
829 ShiftInst->getOperand(0),
830 shiftValConst,
831 newMaskConst
832 };
833 // Lets create the Call with the operands
834 CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
835 CI->setDoesNotAccessMemory();
836 CI->insertBefore(inst);
837 inst->replaceAllUsesWith(CI);
838 return true;
839}
840
841bool
842AMDGPUPeepholeOpt::expandBFI(CallInst *CI)
843{
844 if (!CI) {
845 return false;
846 }
847 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
848 if (!LHS->getName().startswith("__amdil_bfi")) {
849 return false;
850 }
851 Type* type = CI->getOperand(0)->getType();
852 Constant *negOneConst = NULL;
853 if (type->isVectorTy()) {
854 std::vector<Constant *> negOneVals;
855 negOneConst = ConstantInt::get(CI->getContext(),
856 APInt(32, StringRef("-1"), 10));
857 for (size_t x = 0,
858 y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
859 negOneVals.push_back(negOneConst);
860 }
861 negOneConst = ConstantVector::get(negOneVals);
862 } else {
863 negOneConst = ConstantInt::get(CI->getContext(),
864 APInt(32, StringRef("-1"), 10));
865 }
866 // __amdil_bfi => (A & B) | (~A & C)
867 BinaryOperator *lhs =
868 BinaryOperator::Create(Instruction::And, CI->getOperand(0),
869 CI->getOperand(1), "bfi_and", CI);
870 BinaryOperator *rhs =
871 BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
872 "bfi_not", CI);
873 rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
874 "bfi_and", CI);
875 lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
876 CI->replaceAllUsesWith(lhs);
877 return true;
878}
879
880bool
881AMDGPUPeepholeOpt::expandBFM(CallInst *CI)
882{
883 if (!CI) {
884 return false;
885 }
886 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
887 if (!LHS->getName().startswith("__amdil_bfm")) {
888 return false;
889 }
890 // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
891 Constant *newMaskConst = NULL;
892 Constant *newShiftConst = NULL;
893 Type* type = CI->getOperand(0)->getType();
894 if (type->isVectorTy()) {
895 std::vector<Constant*> newMaskVals, newShiftVals;
896 newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
897 newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
898 for (size_t x = 0,
899 y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
900 newMaskVals.push_back(newMaskConst);
901 newShiftVals.push_back(newShiftConst);
902 }
903 newMaskConst = ConstantVector::get(newMaskVals);
904 newShiftConst = ConstantVector::get(newShiftVals);
905 } else {
906 newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
907 newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
908 }
909 BinaryOperator *lhs =
910 BinaryOperator::Create(Instruction::And, CI->getOperand(0),
911 newMaskConst, "bfm_mask", CI);
912 lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
913 lhs, "bfm_shl", CI);
914 lhs = BinaryOperator::Create(Instruction::Sub, lhs,
915 newShiftConst, "bfm_sub", CI);
916 BinaryOperator *rhs =
917 BinaryOperator::Create(Instruction::And, CI->getOperand(1),
918 newMaskConst, "bfm_mask", CI);
919 lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
920 CI->replaceAllUsesWith(lhs);
921 return true;
922}
923
924bool
925AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
926{
927 Instruction *inst = (*bbb);
928 if (optimizeCallInst(bbb)) {
929 return true;
930 }
931 if (optimizeBitExtract(inst)) {
932 return false;
933 }
934 if (optimizeBitInsert(inst)) {
935 return false;
936 }
937 if (correctMisalignedMemOp(inst)) {
938 return false;
939 }
940 return false;
941}
942bool
943AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
944{
945 LoadInst *linst = dyn_cast<LoadInst>(inst);
946 StoreInst *sinst = dyn_cast<StoreInst>(inst);
947 unsigned alignment;
948 Type* Ty = inst->getType();
949 if (linst) {
950 alignment = linst->getAlignment();
951 Ty = inst->getType();
952 } else if (sinst) {
953 alignment = sinst->getAlignment();
954 Ty = sinst->getValueOperand()->getType();
955 } else {
956 return false;
957 }
958 unsigned size = getTypeSize(Ty);
959 if (size == alignment || size < alignment) {
960 return false;
961 }
962 if (!Ty->isStructTy()) {
963 return false;
964 }
965 if (alignment < 4) {
966 if (linst) {
967 linst->setAlignment(0);
968 return true;
969 } else if (sinst) {
970 sinst->setAlignment(0);
971 return true;
972 }
973 }
974 return false;
975}
976bool
977AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI)
978{
979 if (!CI) {
980 return false;
981 }
982 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
983 std::string namePrefix = LHS->getName().substr(0, 14);
984 if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
985 && namePrefix != "__amdil__imul24_high") {
986 return false;
987 }
988 if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
989 return false;
990 }
991 return true;
992}
993
994void
995AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI)
996{
997 assert(isSigned24BitOps(CI) && "Must be a "
998 "signed 24 bit operation to call this function!");
999 Value *LHS = CI->getOperand(CI->getNumOperands()-1);
1000 // On 7XX and 8XX we do not have signed 24bit, so we need to
1001 // expand it to the following:
1002 // imul24 turns into 32bit imul
1003 // imad24 turns into 32bit imad
1004 // imul24_high turns into 32bit imulhigh
1005 if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
1006 Type *aType = CI->getOperand(0)->getType();
1007 bool isVector = aType->isVectorTy();
1008 int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
1009 std::vector<Type*> callTypes;
1010 callTypes.push_back(CI->getOperand(0)->getType());
1011 callTypes.push_back(CI->getOperand(1)->getType());
1012 callTypes.push_back(CI->getOperand(2)->getType());
1013 FunctionType *funcType =
1014 FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
1015 std::string name = "__amdil_imad";
1016 if (isVector) {
1017 name += "_v" + itostr(numEle) + "i32";
1018 } else {
1019 name += "_i32";
1020 }
1021 Function *Func = dyn_cast<Function>(
1022 CI->getParent()->getParent()->getParent()->
1023 getOrInsertFunction(llvm::StringRef(name), funcType));
1024 Value *Operands[3] = {
1025 CI->getOperand(0),
1026 CI->getOperand(1),
1027 CI->getOperand(2)
1028 };
1029 CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
1030 nCI->insertBefore(CI);
1031 CI->replaceAllUsesWith(nCI);
1032 } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
1033 BinaryOperator *mulOp =
1034 BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
1035 CI->getOperand(1), "imul24", CI);
1036 CI->replaceAllUsesWith(mulOp);
1037 } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
1038 Type *aType = CI->getOperand(0)->getType();
1039
1040 bool isVector = aType->isVectorTy();
1041 int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
1042 std::vector<Type*> callTypes;
1043 callTypes.push_back(CI->getOperand(0)->getType());
1044 callTypes.push_back(CI->getOperand(1)->getType());
1045 FunctionType *funcType =
1046 FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
1047 std::string name = "__amdil_imul_high";
1048 if (isVector) {
1049 name += "_v" + itostr(numEle) + "i32";
1050 } else {
1051 name += "_i32";
1052 }
1053 Function *Func = dyn_cast<Function>(
1054 CI->getParent()->getParent()->getParent()->
1055 getOrInsertFunction(llvm::StringRef(name), funcType));
1056 Value *Operands[2] = {
1057 CI->getOperand(0),
1058 CI->getOperand(1)
1059 };
1060 CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
1061 nCI->insertBefore(CI);
1062 CI->replaceAllUsesWith(nCI);
1063 }
1064}
1065
1066bool
1067AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI)
1068{
1069 return (CI != NULL
1070 && CI->getOperand(CI->getNumOperands() - 1)->getName()
1071 == "__amdil_get_local_size_int");
1072}
1073
1074bool
1075AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI)
1076{
1077 if (!CI) {
1078 return false;
1079 }
1080 if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
1081 && (mSTM->getDeviceName() == "cayman")) {
1082 return false;
1083 }
1084 return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
1085 == "__amdil_improved_div";
1086}
1087
1088void
1089AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI)
1090{
1091 assert(convertAccurateDivide(CI)
1092 && "expanding accurate divide can only happen if it is expandable!");
1093 BinaryOperator *divOp =
1094 BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
1095 CI->getOperand(1), "fdiv32", CI);
1096 CI->replaceAllUsesWith(divOp);
1097}
1098
1099bool
1100AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI)
1101{
1102 if (optLevel != CodeGenOpt::None) {
1103 return false;
1104 }
1105
1106 if (!CI) {
1107 return false;
1108 }
1109
1110 unsigned funcNameIdx = 0;
1111 funcNameIdx = CI->getNumOperands() - 1;
1112 StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
1113 if (calleeName != "__amdil_image2d_read_norm"
1114 && calleeName != "__amdil_image2d_read_unnorm"
1115 && calleeName != "__amdil_image3d_read_norm"
1116 && calleeName != "__amdil_image3d_read_unnorm") {
1117 return false;
1118 }
1119
1120 unsigned samplerIdx = 2;
1121 samplerIdx = 1;
1122 Value *sampler = CI->getOperand(samplerIdx);
1123 LoadInst *lInst = dyn_cast<LoadInst>(sampler);
1124 if (!lInst) {
1125 return false;
1126 }
1127
1128 if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1129 return false;
1130 }
1131
1132 GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
1133 // If we are loading from what is not a global value, then we
1134 // fail and return.
1135 if (!gv) {
1136 return false;
1137 }
1138
1139 // If we don't have an initializer or we have an initializer and
1140 // the initializer is not a 32bit integer, we fail.
1141 if (!gv->hasInitializer()
1142 || !gv->getInitializer()->getType()->isIntegerTy(32)) {
1143 return false;
1144 }
1145
1146 // Now that we have the global variable initializer, lets replace
1147 // all uses of the load instruction with the samplerVal and
1148 // reparse the __amdil_is_constant() function.
1149 Constant *samplerVal = gv->getInitializer();
1150 lInst->replaceAllUsesWith(samplerVal);
1151 return true;
1152}
1153
1154bool
1155AMDGPUPeepholeOpt::doInitialization(Module &M)
1156{
1157 return false;
1158}
1159
1160bool
1161AMDGPUPeepholeOpt::doFinalization(Module &M)
1162{
1163 return false;
1164}
1165
1166void
1167AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
1168{
1169 AU.addRequired<MachineFunctionAnalysis>();
1170 FunctionPass::getAnalysisUsage(AU);
1171 AU.setPreservesAll();
1172}
1173
1174size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
1175 size_t size = 0;
1176 if (!T) {
1177 return size;
1178 }
1179 switch (T->getTypeID()) {
1180 case Type::X86_FP80TyID:
1181 case Type::FP128TyID:
1182 case Type::PPC_FP128TyID:
1183 case Type::LabelTyID:
1184 assert(0 && "These types are not supported by this backend");
1185 default:
1186 case Type::FloatTyID:
1187 case Type::DoubleTyID:
1188 size = T->getPrimitiveSizeInBits() >> 3;
1189 break;
1190 case Type::PointerTyID:
1191 size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
1192 break;
1193 case Type::IntegerTyID:
1194 size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
1195 break;
1196 case Type::StructTyID:
1197 size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
1198 break;
1199 case Type::ArrayTyID:
1200 size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
1201 break;
1202 case Type::FunctionTyID:
1203 size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
1204 break;
1205 case Type::VectorTyID:
1206 size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
1207 break;
1208 };
1209 return size;
1210}
1211
1212size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
1213 bool dereferencePtr) {
1214 size_t size = 0;
1215 if (!ST) {
1216 return size;
1217 }
1218 Type *curType;
1219 StructType::element_iterator eib;
1220 StructType::element_iterator eie;
1221 for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
1222 curType = *eib;
1223 size += getTypeSize(curType, dereferencePtr);
1224 }
1225 return size;
1226}
1227
1228size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
1229 bool dereferencePtr) {
1230 return IT ? (IT->getBitWidth() >> 3) : 0;
1231}
1232
1233size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
1234 bool dereferencePtr) {
1235 assert(0 && "Should not be able to calculate the size of an function type");
1236 return 0;
1237}
1238
1239size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
1240 bool dereferencePtr) {
1241 return (size_t)(AT ? (getTypeSize(AT->getElementType(),
1242 dereferencePtr) * AT->getNumElements())
1243 : 0);
1244}
1245
1246size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
1247 bool dereferencePtr) {
1248 return VT ? (VT->getBitWidth() >> 3) : 0;
1249}
1250
1251size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
1252 bool dereferencePtr) {
1253 if (!PT) {
1254 return 0;
1255 }
1256 Type *CT = PT->getElementType();
1257 if (CT->getTypeID() == Type::StructTyID &&
1258 PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
1259 return getTypeSize(dyn_cast<StructType>(CT));
1260 } else if (dereferencePtr) {
1261 size_t size = 0;
1262 for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
1263 size += getTypeSize(PT->getContainedType(x), dereferencePtr);
1264 }
1265 return size;
1266 } else {
1267 return 4;
1268 }
1269}
1270
1271size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
1272 bool dereferencePtr) {
1273 //assert(0 && "Should not be able to calculate the size of an opaque type");
1274 return 4;
1275}
diff --git a/src/gallium/drivers/radeon/AMDILRegisterInfo.td b/src/gallium/drivers/radeon/AMDILRegisterInfo.td
deleted file mode 100644
index 42235ff37a1..00000000000
--- a/src/gallium/drivers/radeon/AMDILRegisterInfo.td
+++ /dev/null
@@ -1,110 +0,0 @@
1//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// Declarations that describe the AMDIL register file
11//
12//===----------------------------------------------------------------------===//
13
14class AMDILReg<bits<16> num, string n> : Register<n> {
15 field bits<16> Value;
16 let Value = num;
17 let Namespace = "AMDGPU";
18}
19
20// We will start with 8 registers for each class before expanding to more
21// Since the swizzle is added based on the register class, we can leave it
22// off here and just specify different registers for different register classes
23def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
24def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
25def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
26def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
27def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
28def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
29def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
30def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
31def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
32def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
33def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
34def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
35def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
36def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
37def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
38def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
39def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
40def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
41def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
42def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
43
44// All registers between 1000 and 1024 are reserved and cannot be used
45// unless commented in this section
46// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
47// r1020 is used to hold the frame index for local arrays
48// r1019 is used to hold the dynamic stack allocation pointer
49// r1018 is used as a temporary register for handwritten code
50// r1017 is used as a temporary register for handwritten code
51// r1016 is used as a temporary register for load/store code
52// r1015 is used as a temporary register for data segment offset
53// r1014 is used as a temporary register for store code
54// r1013 is used as the section data pointer register
55// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
56// r1009 is used as the frame pointer register
57// r999 is used as the mem register.
58// r998 is used as the return address register.
59//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
60//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
61//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
62//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
63//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
64//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
65def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
66def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
67def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
68def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
69def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
70def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
71def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
72def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
73def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
74def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
75def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
76def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
77def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
78def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
79def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
80def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
81def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
82def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
83def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
84def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
85def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
86def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
87def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
88 (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
89{
90 let AltOrders = [(add (sequence "R%u", 1, 20))];
91 let AltOrderSelect = [{
92 return 1;
93 }];
94 }
95def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
96 (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
97{
98 let AltOrders = [(add (sequence "R%u", 1, 20))];
99 let AltOrderSelect = [{
100 return 1;
101 }];
102 }
103def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
104 (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
105{
106 let AltOrders = [(add (sequence "R%u", 1, 20))];
107 let AltOrderSelect = [{
108 return 1;
109 }];
110 }
diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.cpp b/src/gallium/drivers/radeon/AMDILSIDevice.cpp
deleted file mode 100644
index 856b00f894a..00000000000
--- a/src/gallium/drivers/radeon/AMDILSIDevice.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
1//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9#include "AMDILSIDevice.h"
10#include "AMDILEvergreenDevice.h"
11#include "AMDILNIDevice.h"
12#include "AMDGPUSubtarget.h"
13
14using namespace llvm;
15
16AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
17 : AMDGPUEvergreenDevice(ST)
18{
19}
20AMDGPUSIDevice::~AMDGPUSIDevice()
21{
22}
23
24size_t
25AMDGPUSIDevice::getMaxLDSSize() const
26{
27 if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
28 return MAX_LDS_SIZE_900;
29 } else {
30 return 0;
31 }
32}
33
34uint32_t
35AMDGPUSIDevice::getGeneration() const
36{
37 return AMDGPUDeviceInfo::HD7XXX;
38}
39
40std::string
41AMDGPUSIDevice::getDataLayout() const
42{
43 return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
44 "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
45 "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
46 "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
47 "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
48 "-n8:16:32:64");
49}
diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.h b/src/gallium/drivers/radeon/AMDILSIDevice.h
deleted file mode 100644
index 6a684cb6095..00000000000
--- a/src/gallium/drivers/radeon/AMDILSIDevice.h
+++ /dev/null
@@ -1,45 +0,0 @@
1//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// Interface for the subtarget data classes.
11//
12//===---------------------------------------------------------------------===//
13// This file will define the interface that each generation needs to
14// implement in order to correctly answer queries on the capabilities of the
15// specific hardware.
16//===---------------------------------------------------------------------===//
17#ifndef _AMDILSIDEVICE_H_
18#define _AMDILSIDEVICE_H_
19#include "AMDILEvergreenDevice.h"
20#include "AMDGPUSubtarget.h"
21
22namespace llvm {
23 class AMDGPUSubtarget;
24//===---------------------------------------------------------------------===//
25// SI generation of devices and their respective sub classes
26//===---------------------------------------------------------------------===//
27
28// The AMDGPUSIDevice is the base class for all Northern Island series of
29// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
30// exception being differences in wavefront size and hardware capabilities. The
31// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
32// integer operations
33
34 class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
35 public:
36 AMDGPUSIDevice(AMDGPUSubtarget*);
37 virtual ~AMDGPUSIDevice();
38 virtual size_t getMaxLDSSize() const;
39 virtual uint32_t getGeneration() const;
40 virtual std::string getDataLayout() const;
41 protected:
42 }; // AMDGPUSIDevice
43
44} // namespace llvm
45#endif // _AMDILSIDEVICE_H_
diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h
deleted file mode 100644
index e6666f97705..00000000000
--- a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h
+++ /dev/null
@@ -1,75 +0,0 @@
1//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file provides helper macros for expanding case statements.
11//
12//===----------------------------------------------------------------------===//
13#ifndef AMDILUTILITYFUNCTIONS_H_
14#define AMDILUTILITYFUNCTIONS_H_
15
16// Macros that are used to help with switch statements for various data types
17// However, these macro's do not return anything unlike the second set below.
18#define ExpandCaseTo32bitIntTypes(Instr) \
19case Instr##_i32:
20
21#define ExpandCaseTo32bitIntTruncTypes(Instr) \
22case Instr##_i32i8: \
23case Instr##_i32i16:
24
25#define ExpandCaseToIntTypes(Instr) \
26 ExpandCaseTo32bitIntTypes(Instr)
27
28#define ExpandCaseToIntTruncTypes(Instr) \
29 ExpandCaseTo32bitIntTruncTypes(Instr)
30
31#define ExpandCaseToFloatTypes(Instr) \
32 case Instr##_f32:
33
34#define ExpandCaseTo32bitScalarTypes(Instr) \
35 ExpandCaseTo32bitIntTypes(Instr) \
36case Instr##_f32:
37
38#define ExpandCaseToAllScalarTypes(Instr) \
39 ExpandCaseToFloatTypes(Instr) \
40ExpandCaseToIntTypes(Instr)
41
42#define ExpandCaseToAllScalarTruncTypes(Instr) \
43 ExpandCaseToFloatTruncTypes(Instr) \
44ExpandCaseToIntTruncTypes(Instr)
45
46#define ExpandCaseToAllTypes(Instr) \
47ExpandCaseToAllScalarTypes(Instr)
48
49#define ExpandCaseToAllTruncTypes(Instr) \
50ExpandCaseToAllScalarTruncTypes(Instr)
51
52// Macros that expand into statements with return values
53#define ExpandCaseTo32bitIntReturn(Instr, Return) \
54case Instr##_i32: return Return##_i32;
55
56#define ExpandCaseToIntReturn(Instr, Return) \
57 ExpandCaseTo32bitIntReturn(Instr, Return)
58
59#define ExpandCaseToFloatReturn(Instr, Return) \
60 case Instr##_f32: return Return##_f32;\
61
62#define ExpandCaseToAllScalarReturn(Instr, Return) \
63 ExpandCaseToFloatReturn(Instr, Return) \
64ExpandCaseToIntReturn(Instr, Return)
65
66// These macros expand to common groupings of RegClass ID's
67#define ExpandCaseTo1CompRegID \
68case AMDGPU::GPRI32RegClassID: \
69case AMDGPU::GPRF32RegClassID:
70
71#define ExpandCaseTo32BitType(Instr) \
72case Instr##_i32: \
73case Instr##_f32:
74
75#endif // AMDILUTILITYFUNCTIONS_H_
diff --git a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.cpp b/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.cpp
deleted file mode 100644
index b6ab9b22fb1..00000000000
--- a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
1
2#include "AMDGPUInstPrinter.h"
3#include "llvm/MC/MCInst.h"
4
5using namespace llvm;
6
7void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
8 StringRef Annot) {
9 printInstruction(MI, OS);
10
11 printAnnotation(OS, Annot);
12}
13
14void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
15 raw_ostream &O) {
16
17 const MCOperand &Op = MI->getOperand(OpNo);
18 if (Op.isReg()) {
19 O << getRegisterName(Op.getReg());
20 } else if (Op.isImm()) {
21 O << Op.getImm();
22 } else if (Op.isFPImm()) {
23 O << Op.getFPImm();
24 } else {
25 assert(!"unknown operand type in printOperand");
26 }
27}
28
29void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
30 raw_ostream &O) {
31 printOperand(MI, OpNo, O);
32}
33
34#include "AMDGPUGenAsmWriter.inc"
diff --git a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.h b/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.h
deleted file mode 100644
index 62c1a5ee04f..00000000000
--- a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.h
+++ /dev/null
@@ -1,34 +0,0 @@
1
2#ifndef AMDGPUINSTPRINTER_H
3#define AMDGPUINSTPRINTER_H
4
5#include "llvm/ADT/StringRef.h"
6#include "llvm/MC/MCInstPrinter.h"
7#include "llvm/Support/raw_ostream.h"
8
9namespace llvm {
10
11class AMDGPUInstPrinter : public MCInstPrinter {
12public:
13 AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
14 const MCRegisterInfo &MRI)
15 : MCInstPrinter(MAI, MII, MRI) {}
16
17 //Autogenerated by tblgen
18 void printInstruction(const MCInst *MI, raw_ostream &O);
19 static const char *getRegisterName(unsigned RegNo);
20
21// virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
22 virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
23
24private:
25 void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
26// void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
27 void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
28
29
30};
31
32} // End namespace llvm
33
34#endif // AMDGPUINSTRPRINTER_H
diff --git a/src/gallium/drivers/radeon/LICENSE.TXT b/src/gallium/drivers/radeon/LICENSE.TXT
deleted file mode 100644
index a57de2e87a1..00000000000
--- a/src/gallium/drivers/radeon/LICENSE.TXT
+++ /dev/null
@@ -1,43 +0,0 @@
1==============================================================================
2LLVM Release License
3==============================================================================
4University of Illinois/NCSA
5Open Source License
6
7Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.
8All rights reserved.
9
10Developed by:
11
12 LLVM Team
13
14 University of Illinois at Urbana-Champaign
15
16 http://llvm.org
17
18Permission is hereby granted, free of charge, to any person obtaining a copy of
19this software and associated documentation files (the "Software"), to deal with
20the Software without restriction, including without limitation the rights to
21use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
22of the Software, and to permit persons to whom the Software is furnished to do
23so, subject to the following conditions:
24
25 * Redistributions of source code must retain the above copyright notice,
26 this list of conditions and the following disclaimers.
27
28 * Redistributions in binary form must reproduce the above copyright notice,
29 this list of conditions and the following disclaimers in the
30 documentation and/or other materials provided with the distribution.
31
32 * Neither the names of the LLVM Team, University of Illinois at
33 Urbana-Champaign, nor the names of its contributors may be used to
34 endorse or promote products derived from this Software without specific
35 prior written permission.
36
37THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
39FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
43SOFTWARE.
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp
deleted file mode 100644
index 5b31bc6c8f0..00000000000
--- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
1//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11#include "llvm/ADT/StringRef.h"
12#include "llvm/MC/MCAsmBackend.h"
13#include "llvm/MC/MCAssembler.h"
14#include "llvm/MC/MCObjectWriter.h"
15#include "llvm/MC/MCValue.h"
16#include "llvm/Support/TargetRegistry.h"
17
18using namespace llvm;
19
20namespace {
21
22class AMDGPUMCObjectWriter : public MCObjectWriter {
23public:
24 AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
25 virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
26 const MCAsmLayout &Layout) {
27 //XXX: Implement if necessary.
28 }
29 virtual void RecordRelocation(const MCAssembler &Asm,
30 const MCAsmLayout &Layout,
31 const MCFragment *Fragment,
32 const MCFixup &Fixup,
33 MCValue Target, uint64_t &FixedValue) {
34 assert(!"Not implemented");
35 }
36
37 virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
38
39};
40
41class AMDGPUAsmBackend : public MCAsmBackend {
42public:
43 AMDGPUAsmBackend(const Target &T)
44 : MCAsmBackend() {}
45
46 virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
47 virtual unsigned getNumFixupKinds() const { return 0; };
48 virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
49 uint64_t Value) const { assert(!"Not implemented"); }
50 virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
51 const MCInstFragment *DF,
52 const MCAsmLayout &Layout) const {
53 return false;
54 }
55 virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
56 assert(!"Not implemented");
57 }
58 virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
59 virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
60 return true;
61 }
62};
63
64} //End anonymous namespace
65
66void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
67 const MCAsmLayout &Layout) {
68 for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
69 Asm.writeSectionData(I, Layout);
70 }
71}
72
73MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT) {
74 return new AMDGPUAsmBackend(T);
75}
76
77AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
78 raw_ostream &OS) const {
79 return new AMDGPUMCObjectWriter(OS);
80}
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.cpp
deleted file mode 100644
index ccd5a3bfaa9..00000000000
--- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
1//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "AMDGPUMCAsmInfo.h"
11
12using namespace llvm;
13AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
14{
15 HasSingleParameterDotFile = false;
16 WeakDefDirective = 0;
17 //===------------------------------------------------------------------===//
18 HasSubsectionsViaSymbols = true;
19 HasMachoZeroFillDirective = false;
20 HasMachoTBSSDirective = false;
21 HasStaticCtorDtorReferenceInStaticMode = false;
22 LinkerRequiresNonEmptyDwarfLines = true;
23 MaxInstLength = 16;
24 PCSymbol = "$";
25 SeparatorString = "\n";
26 CommentColumn = 40;
27 CommentString = ";";
28 LabelSuffix = ":";
29 GlobalPrefix = "@";
30 PrivateGlobalPrefix = ";.";
31 LinkerPrivateGlobalPrefix = "!";
32 InlineAsmStart = ";#ASMSTART";
33 InlineAsmEnd = ";#ASMEND";
34 AssemblerDialect = 0;
35 AllowQuotesInName = false;
36 AllowNameToStartWithDigit = false;
37 AllowPeriodsInName = false;
38
39 //===--- Data Emission Directives -------------------------------------===//
40 ZeroDirective = ".zero";
41 AsciiDirective = ".ascii\t";
42 AscizDirective = ".asciz\t";
43 Data8bitsDirective = ".byte\t";
44 Data16bitsDirective = ".short\t";
45 Data32bitsDirective = ".long\t";
46 Data64bitsDirective = ".quad\t";
47 GPRel32Directive = 0;
48 SunStyleELFSectionSwitchSyntax = true;
49 UsesELFSectionDirectiveForBSS = true;
50 HasMicrosoftFastStdCallMangling = false;
51
52 //===--- Alignment Information ----------------------------------------===//
53 AlignDirective = ".align\t";
54 AlignmentIsInBytes = true;
55 TextAlignFillValue = 0;
56
57 //===--- Global Variable Emission Directives --------------------------===//
58 GlobalDirective = ".global";
59 ExternDirective = ".extern";
60 HasSetDirective = false;
61 HasAggressiveSymbolFolding = true;
62 LCOMMDirectiveType = LCOMM::None;
63 COMMDirectiveAlignmentIsInBytes = false;
64 HasDotTypeDotSizeDirective = false;
65 HasNoDeadStrip = true;
66 HasSymbolResolver = false;
67 WeakRefDirective = ".weakref\t";
68 LinkOnceDirective = 0;
69 //===--- Dwarf Emission Directives -----------------------------------===//
70 HasLEB128 = true;
71 SupportsDebugInformation = true;
72 ExceptionsType = ExceptionHandling::None;
73 DwarfUsesInlineInfoSection = false;
74 DwarfSectionOffsetDirective = ".offset";
75 DwarfUsesLabelOffsetForRanges = true;
76
77 //===--- CBE Asm Translation Table -----------------------------------===//
78 AsmTransCBE = 0;
79}
80const char*
81AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
82{
83 switch (AS) {
84 default:
85 return 0;
86 case 0:
87 return 0;
88 };
89 return 0;
90}
91
92const MCSection*
93AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const
94{
95 return 0;
96}
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.h b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.h
deleted file mode 100644
index 0ca264beb0f..00000000000
--- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ /dev/null
@@ -1,30 +0,0 @@
1//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// TODO: Add full description
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef AMDGPUMCASMINFO_H_
15#define AMDGPUMCASMINFO_H_
16
17#include "llvm/MC/MCAsmInfo.h"
18namespace llvm {
19 class Target;
20 class StringRef;
21
22 class AMDGPUMCAsmInfo : public MCAsmInfo {
23 public:
24 explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
25 const char*
26 getDataASDirective(unsigned int Size, unsigned int AS) const;
27 const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
28 };
29} // namespace llvm
30#endif // AMDGPUMCASMINFO_H_
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h
deleted file mode 100644
index a75a8414e6c..00000000000
--- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ /dev/null
@@ -1,59 +0,0 @@
1//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// CodeEmitter interface for R600 and SI codegen.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef AMDGPUCODEEMITTER_H
15#define AMDGPUCODEEMITTER_H
16
17#include "llvm/MC/MCCodeEmitter.h"
18#include "llvm/Support/raw_ostream.h"
19
20namespace llvm {
21
22 class MCInst;
23 class MCOperand;
24
25 class AMDGPUMCCodeEmitter : public MCCodeEmitter {
26 public:
27
28 uint64_t getBinaryCodeForInstr(const MCInst &MI,
29 SmallVectorImpl<MCFixup> &Fixups) const;
30
31 virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
32 SmallVectorImpl<MCFixup> &Fixups) const {
33 return 0;
34 }
35
36 virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
37 SmallVectorImpl<MCFixup> &Fixups) const {
38 return 0;
39 }
40 virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
41 SmallVectorImpl<MCFixup> &Fixups) const {
42 return 0;
43 }
44 virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const {
45 return Value;
46 }
47 virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
48 SmallVectorImpl<MCFixup> &Fixups) const {
49 return 0;
50 }
51 virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
52 SmallVectorImpl<MCFixup> &Fixups) const {
53 return 0;
54 }
55 };
56
57} // End namespace llvm
58
59#endif // AMDGPUCODEEMITTER_H
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.cpp
deleted file mode 100644
index 6de20fceda6..00000000000
--- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
1//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file provides AMDGPU specific target descriptions.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUMCTargetDesc.h"
15#include "AMDGPUMCAsmInfo.h"
16#include "InstPrinter/AMDGPUInstPrinter.h"
17#include "llvm/MC/MachineLocation.h"
18#include "llvm/MC/MCCodeGenInfo.h"
19#include "llvm/MC/MCInstrInfo.h"
20#include "llvm/MC/MCRegisterInfo.h"
21#include "llvm/MC/MCStreamer.h"
22#include "llvm/MC/MCSubtargetInfo.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/TargetRegistry.h"
25
26#define GET_INSTRINFO_MC_DESC
27#include "AMDGPUGenInstrInfo.inc"
28
29#define GET_SUBTARGETINFO_MC_DESC
30#include "AMDGPUGenSubtargetInfo.inc"
31
32#define GET_REGINFO_MC_DESC
33#include "AMDGPUGenRegisterInfo.inc"
34
35using namespace llvm;
36
37static MCInstrInfo *createAMDGPUMCInstrInfo() {
38 MCInstrInfo *X = new MCInstrInfo();
39 InitAMDGPUMCInstrInfo(X);
40 return X;
41}
42
43static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
44 MCRegisterInfo *X = new MCRegisterInfo();
45 InitAMDGPUMCRegisterInfo(X, 0);
46 return X;
47}
48
49static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
50 StringRef FS) {
51 MCSubtargetInfo * X = new MCSubtargetInfo();
52 InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
53 return X;
54}
55
56static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
57 CodeModel::Model CM,
58 CodeGenOpt::Level OL) {
59 MCCodeGenInfo *X = new MCCodeGenInfo();
60 X->InitMCCodeGenInfo(RM, CM, OL);
61 return X;
62}
63
64static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
65 unsigned SyntaxVariant,
66 const MCAsmInfo &MAI,
67 const MCInstrInfo &MII,
68 const MCRegisterInfo &MRI,
69 const MCSubtargetInfo &STI) {
70 return new AMDGPUInstPrinter(MAI, MII, MRI);
71}
72
73static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
74 const MCSubtargetInfo &STI,
75 MCContext &Ctx) {
76 if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
77 return createSIMCCodeEmitter(MCII, STI, Ctx);
78 } else {
79 return createR600MCCodeEmitter(MCII, STI, Ctx);
80 }
81}
82
83static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
84 MCContext &Ctx, MCAsmBackend &MAB,
85 raw_ostream &_OS,
86 MCCodeEmitter *_Emitter,
87 bool RelaxAll,
88 bool NoExecStack) {
89 return createPureStreamer(Ctx, MAB, _OS, _Emitter);
90}
91
92extern "C" void LLVMInitializeAMDGPUTargetMC() {
93
94 RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
95
96 TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
97
98 TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
99
100 TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
101
102 TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
103
104 TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
105
106 TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
107
108 TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
109
110 TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
111}
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.h b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.h
deleted file mode 100644
index 328e367401a..00000000000
--- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ /dev/null
@@ -1,51 +0,0 @@
1//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file provides AMDGPU specific target descriptions.
11//
12//===----------------------------------------------------------------------===//
13//
14
15#ifndef AMDGPUMCTARGETDESC_H
16#define AMDGPUMCTARGETDESC_H
17
18#include "llvm/ADT/StringRef.h"
19
20namespace llvm {
21class MCAsmBackend;
22class MCCodeEmitter;
23class MCContext;
24class MCInstrInfo;
25class MCRegisterInfo;
26class MCSubtargetInfo;
27class Target;
28
29extern Target TheAMDGPUTarget;
30
31MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
32 const MCSubtargetInfo &STI,
33 MCContext &Ctx);
34
35MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
36 const MCSubtargetInfo &STI,
37 MCContext &Ctx);
38
39MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT);
40} // End llvm namespace
41
42#define GET_REGINFO_ENUM
43#include "AMDGPUGenRegisterInfo.inc"
44
45#define GET_INSTRINFO_ENUM
46#include "AMDGPUGenInstrInfo.inc"
47
48#define GET_SUBTARGETINFO_ENUM
49#include "AMDGPUGenSubtargetInfo.inc"
50
51#endif // AMDGPUMCTARGETDESC_H
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp
deleted file mode 100644
index a11f48234cb..00000000000
--- a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp
+++ /dev/null
@@ -1,727 +0,0 @@
1//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code emitters outputs bytecode that is understood by the r600g driver
11// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
12// except that the size of the instruction fields are rounded up to the
13// nearest byte.
14//
15// [1] http://www.mesa3d.org/
16//
17//===----------------------------------------------------------------------===//
18
19#include "R600Defines.h"
20#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
22#include "llvm/MC/MCCodeEmitter.h"
23#include "llvm/MC/MCContext.h"
24#include "llvm/MC/MCInst.h"
25#include "llvm/MC/MCInstrInfo.h"
26#include "llvm/MC/MCRegisterInfo.h"
27#include "llvm/MC/MCSubtargetInfo.h"
28#include "llvm/Support/raw_ostream.h"
29
30#include <stdio.h>
31
32#define SRC_BYTE_COUNT 11
33#define DST_BYTE_COUNT 5
34
35using namespace llvm;
36
37namespace {
38
39class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
40 R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
41 void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
42 const MCInstrInfo &MCII;
43 const MCSubtargetInfo &STI;
44 MCContext &Ctx;
45
46public:
47
48 R600MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
49 MCContext &ctx)
50 : MCII(mcii), STI(sti), Ctx(ctx) { }
51
52 /// EncodeInstruction - Encode the instruction and write it to the OS.
53 virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
54 SmallVectorImpl<MCFixup> &Fixups) const;
55
56 /// getMachineOpValue - Reutrn the encoding for an MCOperand.
57 virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
58 SmallVectorImpl<MCFixup> &Fixups) const;
59private:
60
61 void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
62 raw_ostream &OS) const;
63 void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
64 void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
65 raw_ostream &OS) const;
66 void EmitDst(const MCInst &MI, raw_ostream &OS) const;
67 void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
68 raw_ostream &OS) const;
69 void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
70
71 void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
72
73 void EmitByte(unsigned int byte, raw_ostream &OS) const;
74
75 void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
76
77 void Emit(uint32_t value, raw_ostream &OS) const;
78 void Emit(uint64_t value, raw_ostream &OS) const;
79
80 unsigned getHWRegIndex(unsigned reg) const;
81 unsigned getHWRegChan(unsigned reg) const;
82 unsigned getHWReg(unsigned regNo) const;
83
84 bool isFCOp(unsigned opcode) const;
85 bool isTexOp(unsigned opcode) const;
86 bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
87
88 /// getHWRegIndexGen - Get the register's hardware index. Implemented in
89 /// R600HwRegInfo.include.
90 unsigned getHWRegIndexGen(unsigned int Reg) const;
91
92 /// getHWRegChanGen - Get the register's channel. Implemented in
93 /// R600HwRegInfo.include.
94 unsigned getHWRegChanGen(unsigned int Reg) const;
95};
96
97} // End anonymous namespace
98
99enum RegElement {
100 ELEMENT_X = 0,
101 ELEMENT_Y,
102 ELEMENT_Z,
103 ELEMENT_W
104};
105
106enum InstrTypes {
107 INSTR_ALU = 0,
108 INSTR_TEX,
109 INSTR_FC,
110 INSTR_NATIVE,
111 INSTR_VTX
112};
113
114enum FCInstr {
115 FC_IF = 0,
116 FC_IF_INT,
117 FC_ELSE,
118 FC_ENDIF,
119 FC_BGNLOOP,
120 FC_ENDLOOP,
121 FC_BREAK,
122 FC_BREAK_NZ_INT,
123 FC_CONTINUE,
124 FC_BREAK_Z_INT,
125 FC_BREAK_NZ
126};
127
128enum TextureTypes {
129 TEXTURE_1D = 1,
130 TEXTURE_2D,
131 TEXTURE_3D,
132 TEXTURE_CUBE,
133 TEXTURE_RECT,
134 TEXTURE_SHADOW1D,
135 TEXTURE_SHADOW2D,
136 TEXTURE_SHADOWRECT,
137 TEXTURE_1D_ARRAY,
138 TEXTURE_2D_ARRAY,
139 TEXTURE_SHADOW1D_ARRAY,
140 TEXTURE_SHADOW2D_ARRAY
141};
142
143MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
144 const MCSubtargetInfo &STI,
145 MCContext &Ctx) {
146 return new R600MCCodeEmitter(MCII, STI, Ctx);
147}
148
149void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
150 SmallVectorImpl<MCFixup> &Fixups) const {
151 if (isTexOp(MI.getOpcode())) {
152 EmitTexInstr(MI, Fixups, OS);
153 } else if (isFCOp(MI.getOpcode())){
154 EmitFCInstr(MI, OS);
155 } else if (MI.getOpcode() == AMDGPU::RETURN ||
156 MI.getOpcode() == AMDGPU::BUNDLE ||
157 MI.getOpcode() == AMDGPU::KILL) {
158 return;
159 } else {
160 switch(MI.getOpcode()) {
161 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
162 case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
163 {
164 uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
165 EmitByte(INSTR_NATIVE, OS);
166 Emit(inst, OS);
167 break;
168 }
169 case AMDGPU::CONSTANT_LOAD_eg:
170 case AMDGPU::VTX_READ_PARAM_i32_eg:
171 case AMDGPU::VTX_READ_PARAM_f32_eg:
172 case AMDGPU::VTX_READ_GLOBAL_i8_eg:
173 case AMDGPU::VTX_READ_GLOBAL_i32_eg:
174 case AMDGPU::VTX_READ_GLOBAL_f32_eg:
175 case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
176 case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
177 {
178 uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
179 uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
180
181 EmitByte(INSTR_VTX, OS);
182 Emit(InstWord01, OS);
183 Emit(InstWord2, OS);
184 break;
185 }
186
187 default:
188 EmitALUInstr(MI, Fixups, OS);
189 break;
190 }
191 }
192}
193
194void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
195 SmallVectorImpl<MCFixup> &Fixups,
196 raw_ostream &OS) const {
197 const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
198 unsigned NumOperands = MI.getNumOperands();
199
200 if(MCDesc.findFirstPredOperandIdx() > -1)
201 NumOperands--;
202
203 if (GET_FLAG_OPERAND_IDX(MCDesc.TSFlags) != 0)
204 NumOperands--;
205
206 if(MI.getOpcode() == AMDGPU::PRED_X)
207 NumOperands = 2;
208
209 // XXX Check if instruction writes a result
210 if (NumOperands < 1) {
211 return;
212 }
213
214 // Emit instruction type
215 EmitByte(INSTR_ALU, OS);
216
217 uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
218
219 //older alu have different encoding for instructions with one or two src
220 //parameters.
221 if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
222 !(MCDesc.TSFlags & R600_InstFlag::OP3)) {
223 uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
224 InstWord01 &= ~(0x3FFULL << 39);
225 InstWord01 |= ISAOpCode << 1;
226 }
227
228 unsigned int OpIndex;
229 for (OpIndex = 1; OpIndex < NumOperands; OpIndex++) {
230 // Literal constants are always stored as the last operand.
231 if (MI.getOperand(OpIndex).isImm() || MI.getOperand(OpIndex).isFPImm()) {
232 break;
233 }
234 EmitSrcISA(MI, OpIndex, InstWord01, OS);
235 }
236
237 // Emit zeros for unused sources
238 for ( ; OpIndex < 4; OpIndex++) {
239 EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
240 }
241
242 // Emit destination register
243 const MCOperand &dstOp = MI.getOperand(0);
244 if (dstOp.isReg() && dstOp.getReg() != AMDGPU::PREDICATE_BIT) {
245 //element of destination register
246 InstWord01 |= uint64_t(getHWRegChan(dstOp.getReg())) << 61;
247
248 // isClamped
249 if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) {
250 InstWord01 |= 1ULL << 63;
251 }
252
253 // write mask
254 if (!isFlagSet(MI, 0, MO_FLAG_MASK) && NumOperands < 4) {
255 InstWord01 |= 1ULL << 36;
256 }
257
258 // XXX: Emit relative addressing mode
259 }
260
261 // Emit ALU
262
263 // Emit IsLast (for this instruction group) (1 byte)
264 if (!isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) {
265 InstWord01 |= 1ULL << 31;
266 }
267
268 // XXX: Emit push modifier
269 if(isFlagSet(MI, 1, MO_FLAG_PUSH)) {
270 InstWord01 |= 1ULL << 34;
271 }
272
273 // XXX: Emit predicate (1 byte)
274 int PredIdx = MCDesc.findFirstPredOperandIdx();
275 if (PredIdx != -1) {
276 switch(MI.getOperand(PredIdx).getReg()) {
277 case AMDGPU::PRED_SEL_ZERO:
278 InstWord01 |= 2ULL << 29;
279 break;
280 case AMDGPU::PRED_SEL_ONE:
281 InstWord01 |= 3ULL << 29;
282 break;
283 }
284 }
285
286 //XXX: predicate
287 //XXX: bank swizzle
288 //XXX: OMOD
289 //XXX: index mode
290
291 Emit(InstWord01, OS);
292}
293
294void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
295 raw_ostream &OS) const {
296 const MCOperand &MO = MI.getOperand(OpIdx);
297 union {
298 float f;
299 uint32_t i;
300 } Value;
301 Value.i = 0;
302 // Emit the source select (2 bytes). For GPRs, this is the register index.
303 // For other potential instruction operands, (e.g. constant registers) the
304 // value of the source select is defined in the r600isa docs.
305 if (MO.isReg()) {
306 unsigned reg = MO.getReg();
307 EmitTwoBytes(getHWReg(reg), OS);
308 if (reg == AMDGPU::ALU_LITERAL_X) {
309 unsigned ImmOpIndex = MI.getNumOperands() - 1;
310 MCOperand ImmOp = MI.getOperand(ImmOpIndex);
311 if (ImmOp.isFPImm()) {
312 Value.f = ImmOp.getFPImm();
313 } else {
314 assert(ImmOp.isImm());
315 Value.i = ImmOp.getImm();
316 }
317 }
318 } else {
319 // XXX: Handle other operand types.
320 EmitTwoBytes(0, OS);
321 }
322
323 // Emit the source channel (1 byte)
324 if (MO.isReg()) {
325 EmitByte(getHWRegChan(MO.getReg()), OS);
326 } else {
327 EmitByte(0, OS);
328 }
329
330 // XXX: Emit isNegated (1 byte)
331 if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
332 && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
333 (MO.isReg() &&
334 (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
335 EmitByte(1, OS);
336 } else {
337 EmitByte(0, OS);
338 }
339
340 // Emit isAbsolute (1 byte)
341 if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
342 EmitByte(1, OS);
343 } else {
344 EmitByte(0, OS);
345 }
346
347 // XXX: Emit relative addressing mode (1 byte)
348 EmitByte(0, OS);
349
350 // Emit kc_bank, This will be adjusted later by r600_asm
351 EmitByte(0, OS);
352
353 // Emit the literal value, if applicable (4 bytes).
354 Emit(Value.i, OS);
355
356}
357
358void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
359 uint64_t &Value, raw_ostream &OS) const {
360 const MCOperand &MO = MI.getOperand(OpIdx);
361 union {
362 float f;
363 uint32_t i;
364 } InlineConstant;
365 InlineConstant.i = 0;
366 // Emit the source select (2 bytes). For GPRs, this is the register index.
367 // For other potential instruction operands, (e.g. constant registers) the
368 // value of the source select is defined in the r600isa docs.
369 if (MO.isReg()) {
370 unsigned Reg = MO.getReg();
371 if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
372 EmitByte(1, OS);
373 } else {
374 EmitByte(0, OS);
375 }
376
377 if (Reg == AMDGPU::ALU_LITERAL_X) {
378 unsigned ImmOpIndex = MI.getNumOperands() - 1;
379 MCOperand ImmOp = MI.getOperand(ImmOpIndex);
380 if (ImmOp.isFPImm()) {
381 InlineConstant.f = ImmOp.getFPImm();
382 } else {
383 assert(ImmOp.isImm());
384 InlineConstant.i = ImmOp.getImm();
385 }
386 }
387 } else {
388 // XXX: Handle other operand types.
389 EmitTwoBytes(0, OS);
390 }
391
392 // source channel
393 uint64_t sourceChannelValue = getHWRegChan(MO.getReg());
394 if (OpIdx == 1)
395 Value |= sourceChannelValue << 10;
396 if (OpIdx == 2)
397 Value |= sourceChannelValue << 23;
398 if (OpIdx == 3)
399 Value |= sourceChannelValue << 42;
400
401 // isNegated
402 if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
403 && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
404 (MO.isReg() &&
405 (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
406 if (OpIdx == 1)
407 Value |= 1ULL << 12;
408 else if (OpIdx == 2)
409 Value |= 1ULL << 25;
410 else if (OpIdx == 3)
411 Value |= 1ULL << 44;
412 }
413
414 // isAbsolute
415 if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
416 assert(OpIdx < 3);
417 Value |= 1ULL << (32+OpIdx-1);
418 }
419
420 // XXX: relative addressing mode
421 // XXX: kc_bank
422
423 // Emit the literal value, if applicable (4 bytes).
424 Emit(InlineConstant.i, OS);
425
426}
427
428void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
429 SmallVectorImpl<MCFixup> &Fixups,
430 raw_ostream &OS) const {
431
432 unsigned opcode = MI.getOpcode();
433 bool hasOffsets = (opcode == AMDGPU::TEX_LD);
434 unsigned op_offset = hasOffsets ? 3 : 0;
435 int64_t sampler = MI.getOperand(op_offset+2).getImm();
436 int64_t textureType = MI.getOperand(op_offset+3).getImm();
437 unsigned srcSelect[4] = {0, 1, 2, 3};
438
439 // Emit instruction type
440 EmitByte(1, OS);
441
442 // Emit instruction
443 EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
444
445 // XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
446 EmitByte(sampler + 1 + 1, OS);
447
448 // Emit source register
449 EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
450
451 // XXX: Emit src isRelativeAddress
452 EmitByte(0, OS);
453
454 // Emit destination register
455 EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
456
457 // XXX: Emit dst isRealtiveAddress
458 EmitByte(0, OS);
459
460 // XXX: Emit dst select
461 EmitByte(0, OS); // X
462 EmitByte(1, OS); // Y
463 EmitByte(2, OS); // Z
464 EmitByte(3, OS); // W
465
466 // XXX: Emit lod bias
467 EmitByte(0, OS);
468
469 // XXX: Emit coord types
470 unsigned coordType[4] = {1, 1, 1, 1};
471
472 if (textureType == TEXTURE_RECT
473 || textureType == TEXTURE_SHADOWRECT) {
474 coordType[ELEMENT_X] = 0;
475 coordType[ELEMENT_Y] = 0;
476 }
477
478 if (textureType == TEXTURE_1D_ARRAY
479 || textureType == TEXTURE_SHADOW1D_ARRAY) {
480 if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
481 coordType[ELEMENT_Y] = 0;
482 } else {
483 coordType[ELEMENT_Z] = 0;
484 srcSelect[ELEMENT_Z] = ELEMENT_Y;
485 }
486 } else if (textureType == TEXTURE_2D_ARRAY
487 || textureType == TEXTURE_SHADOW2D_ARRAY) {
488 coordType[ELEMENT_Z] = 0;
489 }
490
491 for (unsigned i = 0; i < 4; i++) {
492 EmitByte(coordType[i], OS);
493 }
494
495 // XXX: Emit offsets
496 if (hasOffsets)
497 for (unsigned i = 2; i < 5; i++)
498 EmitByte(MI.getOperand(i).getImm()<<1, OS);
499 else
500 EmitNullBytes(3, OS);
501
502 // Emit sampler id
503 EmitByte(sampler, OS);
504
505 // XXX:Emit source select
506 if ((textureType == TEXTURE_SHADOW1D
507 || textureType == TEXTURE_SHADOW2D
508 || textureType == TEXTURE_SHADOWRECT
509 || textureType == TEXTURE_SHADOW1D_ARRAY)
510 && opcode != AMDGPU::TEX_SAMPLE_C_L
511 && opcode != AMDGPU::TEX_SAMPLE_C_LB) {
512 srcSelect[ELEMENT_W] = ELEMENT_Z;
513 }
514
515 for (unsigned i = 0; i < 4; i++) {
516 EmitByte(srcSelect[i], OS);
517 }
518}
519
520void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
521
522 // Emit instruction type
523 EmitByte(INSTR_FC, OS);
524
525 // Emit SRC
526 unsigned NumOperands = MI.getNumOperands();
527 if (NumOperands > 0) {
528 assert(NumOperands == 1);
529 EmitSrc(MI, 0, OS);
530 } else {
531 EmitNullBytes(SRC_BYTE_COUNT, OS);
532 }
533
534 // Emit FC Instruction
535 enum FCInstr instr;
536 switch (MI.getOpcode()) {
537 case AMDGPU::BREAK_LOGICALZ_f32:
538 instr = FC_BREAK;
539 break;
540 case AMDGPU::BREAK_LOGICALNZ_f32:
541 instr = FC_BREAK_NZ;
542 break;
543 case AMDGPU::BREAK_LOGICALNZ_i32:
544 instr = FC_BREAK_NZ_INT;
545 break;
546 case AMDGPU::BREAK_LOGICALZ_i32:
547 instr = FC_BREAK_Z_INT;
548 break;
549 case AMDGPU::CONTINUE_LOGICALNZ_f32:
550 case AMDGPU::CONTINUE_LOGICALNZ_i32:
551 instr = FC_CONTINUE;
552 break;
553 case AMDGPU::IF_LOGICALNZ_f32:
554 instr = FC_IF;
555 case AMDGPU::IF_LOGICALNZ_i32:
556 instr = FC_IF_INT;
557 break;
558 case AMDGPU::IF_LOGICALZ_f32:
559 abort();
560 break;
561 case AMDGPU::ELSE:
562 instr = FC_ELSE;
563 break;
564 case AMDGPU::ENDIF:
565 instr = FC_ENDIF;
566 break;
567 case AMDGPU::ENDLOOP:
568 instr = FC_ENDLOOP;
569 break;
570 case AMDGPU::WHILELOOP:
571 instr = FC_BGNLOOP;
572 break;
573 default:
574 abort();
575 break;
576 }
577 EmitByte(instr, OS);
578}
579
580void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
581 raw_ostream &OS) const {
582
583 for (unsigned int i = 0; i < ByteCount; i++) {
584 EmitByte(0, OS);
585 }
586}
587
588void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
589 OS.write((uint8_t) Byte & 0xff);
590}
591
592void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
593 raw_ostream &OS) const {
594 OS.write((uint8_t) (Bytes & 0xff));
595 OS.write((uint8_t) ((Bytes >> 8) & 0xff));
596}
597
598void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
599 for (unsigned i = 0; i < 4; i++) {
600 OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
601 }
602}
603
604void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
605 for (unsigned i = 0; i < 8; i++) {
606 EmitByte((Value >> (8 * i)) & 0xff, OS);
607 }
608}
609
610unsigned R600MCCodeEmitter::getHWRegIndex(unsigned reg) const {
611 switch(reg) {
612 case AMDGPU::ZERO: return 248;
613 case AMDGPU::ONE:
614 case AMDGPU::NEG_ONE: return 249;
615 case AMDGPU::ONE_INT: return 250;
616 case AMDGPU::HALF:
617 case AMDGPU::NEG_HALF: return 252;
618 case AMDGPU::ALU_LITERAL_X: return 253;
619 case AMDGPU::PREDICATE_BIT:
620 case AMDGPU::PRED_SEL_OFF:
621 case AMDGPU::PRED_SEL_ZERO:
622 case AMDGPU::PRED_SEL_ONE:
623 return 0;
624 default: return getHWRegIndexGen(reg);
625 }
626}
627
628unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
629 switch(reg) {
630 case AMDGPU::ZERO:
631 case AMDGPU::ONE:
632 case AMDGPU::ONE_INT:
633 case AMDGPU::NEG_ONE:
634 case AMDGPU::HALF:
635 case AMDGPU::NEG_HALF:
636 case AMDGPU::ALU_LITERAL_X:
637 case AMDGPU::PREDICATE_BIT:
638 case AMDGPU::PRED_SEL_OFF:
639 case AMDGPU::PRED_SEL_ZERO:
640 case AMDGPU::PRED_SEL_ONE:
641 return 0;
642 default: return getHWRegChanGen(reg);
643 }
644}
645unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
646 unsigned HWReg;
647
648 HWReg = getHWRegIndex(RegNo);
649 if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(RegNo)) {
650 HWReg += 512;
651 }
652 return HWReg;
653}
654
655uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
656 const MCOperand &MO,
657 SmallVectorImpl<MCFixup> &Fixup) const {
658 if (MO.isReg()) {
659 return getHWRegIndex(MO.getReg());
660 } else if (MO.isImm()) {
661 return MO.getImm();
662 } else {
663 assert(0);
664 return 0;
665 }
666}
667
668//===----------------------------------------------------------------------===//
669// Encoding helper functions
670//===----------------------------------------------------------------------===//
671
672bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
673 switch(opcode) {
674 default: return false;
675 case AMDGPU::BREAK_LOGICALZ_f32:
676 case AMDGPU::BREAK_LOGICALNZ_i32:
677 case AMDGPU::BREAK_LOGICALZ_i32:
678 case AMDGPU::BREAK_LOGICALNZ_f32:
679 case AMDGPU::CONTINUE_LOGICALNZ_f32:
680 case AMDGPU::IF_LOGICALNZ_i32:
681 case AMDGPU::IF_LOGICALZ_f32:
682 case AMDGPU::ELSE:
683 case AMDGPU::ENDIF:
684 case AMDGPU::ENDLOOP:
685 case AMDGPU::IF_LOGICALNZ_f32:
686 case AMDGPU::WHILELOOP:
687 return true;
688 }
689}
690
691bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
692 switch(opcode) {
693 default: return false;
694 case AMDGPU::TEX_LD:
695 case AMDGPU::TEX_GET_TEXTURE_RESINFO:
696 case AMDGPU::TEX_SAMPLE:
697 case AMDGPU::TEX_SAMPLE_C:
698 case AMDGPU::TEX_SAMPLE_L:
699 case AMDGPU::TEX_SAMPLE_C_L:
700 case AMDGPU::TEX_SAMPLE_LB:
701 case AMDGPU::TEX_SAMPLE_C_LB:
702 case AMDGPU::TEX_SAMPLE_G:
703 case AMDGPU::TEX_SAMPLE_C_G:
704 case AMDGPU::TEX_GET_GRADIENTS_H:
705 case AMDGPU::TEX_GET_GRADIENTS_V:
706 case AMDGPU::TEX_SET_GRADIENTS_H:
707 case AMDGPU::TEX_SET_GRADIENTS_V:
708 return true;
709 }
710}
711
712bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
713 unsigned Flag) const {
714 const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
715 unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
716 if (FlagIndex == 0) {
717 return false;
718 }
719 assert(MI.getOperand(FlagIndex).isImm());
720 return !!((MI.getOperand(FlagIndex).getImm() >>
721 (NUM_MO_FLAGS * Operand)) & Flag);
722}
723#define R600RegisterInfo R600MCCodeEmitter
724#include "R600HwRegInfo.include"
725#undef R600RegisterInfo
726
727#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp
deleted file mode 100644
index ca4b579dcce..00000000000
--- a/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
1//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// The SI code emitter produces machine code that can be executed directly on
11// the GPU device.
12//
13//===----------------------------------------------------------------------===//
14
15#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
16#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
17#include "llvm/MC/MCCodeEmitter.h"
18#include "llvm/MC/MCContext.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCInstrInfo.h"
21#include "llvm/MC/MCRegisterInfo.h"
22#include "llvm/MC/MCSubtargetInfo.h"
23#include "llvm/Support/raw_ostream.h"
24
25#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
26#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
27
28// These must be kept in sync with SIInstructions.td and also the
29// InstrEncodingInfo array in SIInstrInfo.cpp.
30//
31// NOTE: This enum is only used to identify the encoding type within LLVM,
32// the actual encoding type that is part of the instruction format is different
33namespace SIInstrEncodingType {
34 enum Encoding {
35 EXP = 0,
36 LDS = 1,
37 MIMG = 2,
38 MTBUF = 3,
39 MUBUF = 4,
40 SMRD = 5,
41 SOP1 = 6,
42 SOP2 = 7,
43 SOPC = 8,
44 SOPK = 9,
45 SOPP = 10,
46 VINTRP = 11,
47 VOP1 = 12,
48 VOP2 = 13,
49 VOP3 = 14,
50 VOPC = 15
51 };
52}
53
54using namespace llvm;
55
56namespace {
57class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
58 SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
59 void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
60 const MCInstrInfo &MCII;
61 const MCSubtargetInfo &STI;
62 MCContext &Ctx;
63
64public:
65 SIMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
66 MCContext &ctx)
67 : MCII(mcii), STI(sti), Ctx(ctx) { }
68
69 ~SIMCCodeEmitter() { }
70
71 /// EncodeInstruction - Encode the instruction and write it to the OS.
72 virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
73 SmallVectorImpl<MCFixup> &Fixups) const;
74
75 /// getMachineOpValue - Reutrn the encoding for an MCOperand.
76 virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
77 SmallVectorImpl<MCFixup> &Fixups) const;
78
79public:
80
81 /// GPRAlign - Encode a sequence of registers with the correct alignment.
82 unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
83
84 /// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
85 virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
86 SmallVectorImpl<MCFixup> &Fixup) const;
87
88 /// GPR4AlignEncode - Encoding for when 4 consectuive registers are used
89 virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
90 SmallVectorImpl<MCFixup> &Fixup) const;
91
92 /// SMRDmemriEncode - Encoding for SMRD indexed loads
93 virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
94 SmallVectorImpl<MCFixup> &Fixup) const;
95
96 /// VOPPostEncode - Post-Encoder method for VOP instructions
97 virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const;
98
99private:
100
101 ///getEncodingType = Return this SIInstrEncodingType for this instruction.
102 unsigned getEncodingType(const MCInst &MI) const;
103
104 ///getEncodingBytes - Get then size in bytes of this instructions encoding.
105 unsigned getEncodingBytes(const MCInst &MI) const;
106
107 /// getRegBinaryCode - Returns the hardware encoding for a register
108 unsigned getRegBinaryCode(unsigned reg) const;
109
110 /// getHWRegNum - Generated function that returns the hardware encoding for
111 /// a register
112 unsigned getHWRegNum(unsigned reg) const;
113
114};
115
116} // End anonymous namespace
117
118MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
119 const MCSubtargetInfo &STI,
120 MCContext &Ctx) {
121 return new SIMCCodeEmitter(MCII, STI, Ctx);
122}
123
124void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
125 SmallVectorImpl<MCFixup> &Fixups) const {
126 uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
127 unsigned bytes = getEncodingBytes(MI);
128 for (unsigned i = 0; i < bytes; i++) {
129 OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
130 }
131}
132
133uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
134 const MCOperand &MO,
135 SmallVectorImpl<MCFixup> &Fixups) const {
136 if (MO.isReg()) {
137 return getRegBinaryCode(MO.getReg());
138 } else if (MO.isImm()) {
139 return MO.getImm();
140 } else if (MO.isFPImm()) {
141 // XXX: Not all instructions can use inline literals
142 // XXX: We should make sure this is a 32-bit constant
143 union {
144 float F;
145 uint32_t I;
146 } Imm;
147 Imm.F = MO.getFPImm();
148 return Imm.I;
149 } else{
150 llvm_unreachable("Encoding of this operand type is not supported yet.");
151 }
152 return 0;
153}
154
155//===----------------------------------------------------------------------===//
156// Custom Operand Encodings
157//===----------------------------------------------------------------------===//
158
159unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
160 unsigned shift) const {
161 unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg());
162 return regCode >> shift;
163 return 0;
164}
165unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
166 unsigned OpNo ,
167 SmallVectorImpl<MCFixup> &Fixup) const {
168 return GPRAlign(MI, OpNo, 1);
169}
170
171unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
172 unsigned OpNo,
173 SmallVectorImpl<MCFixup> &Fixup) const {
174 return GPRAlign(MI, OpNo, 2);
175}
176
177#define SMRD_OFFSET_MASK 0xff
178#define SMRD_IMM_SHIFT 8
179#define SMRD_SBASE_MASK 0x3f
180#define SMRD_SBASE_SHIFT 9
181/// SMRDmemriEncode - This function is responsibe for encoding the offset
182/// and the base ptr for SMRD instructions it should return a bit string in
183/// this format:
184///
185/// OFFSET = bits{7-0}
186/// IMM = bits{8}
187/// SBASE = bits{14-9}
188///
189uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
190 SmallVectorImpl<MCFixup> &Fixup) const {
191 uint32_t Encoding;
192
193 const MCOperand &OffsetOp = MI.getOperand(OpNo + 1);
194
195 //XXX: Use this function for SMRD loads with register offsets
196 assert(OffsetOp.isImm());
197
198 Encoding =
199 (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK)
200 | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
201 | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
202 ;
203
204 return Encoding;
205}
206
207//===----------------------------------------------------------------------===//
208// Post Encoder Callbacks
209//===----------------------------------------------------------------------===//
210
211uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{
212 unsigned encodingType = getEncodingType(MI);
213 unsigned numSrcOps;
214 unsigned vgprBitOffset;
215
216 if (encodingType == SIInstrEncodingType::VOP3) {
217 numSrcOps = 3;
218 vgprBitOffset = 32;
219 } else {
220 numSrcOps = 1;
221 vgprBitOffset = 0;
222 }
223
224 // Add one to skip over the destination reg operand.
225 for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
226 const MCOperand &MO = MI.getOperand(opIdx);
227 if (MO.isReg()) {
228 unsigned reg = MI.getOperand(opIdx).getReg();
229 if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) ||
230 AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) {
231 Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
232 }
233 } else if (MO.isFPImm()) {
234 union {
235 float f;
236 uint32_t i;
237 } Imm;
238 // XXX: Not all instructions can use inline literals
239 // XXX: We should make sure this is a 32-bit constant
240 Imm.f = MO.getFPImm();
241 Value |= ((uint64_t)Imm.i) << 32;
242 }
243 }
244 return Value;
245}
246
247//===----------------------------------------------------------------------===//
248// Encoding helper functions
249//===----------------------------------------------------------------------===//
250
251unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
252 return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
253}
254
255unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
256
257 // These instructions aren't real instructions with an encoding type, so
258 // we need to manually specify their size.
259 switch (MI.getOpcode()) {
260 default: break;
261 case AMDGPU::SI_LOAD_LITERAL_I32:
262 case AMDGPU::SI_LOAD_LITERAL_F32:
263 return 4;
264 }
265
266 unsigned encoding_type = getEncodingType(MI);
267 switch (encoding_type) {
268 case SIInstrEncodingType::EXP:
269 case SIInstrEncodingType::LDS:
270 case SIInstrEncodingType::MUBUF:
271 case SIInstrEncodingType::MTBUF:
272 case SIInstrEncodingType::MIMG:
273 case SIInstrEncodingType::VOP3:
274 return 8;
275 default:
276 return 4;
277 }
278}
279
280
281unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
282 switch (reg) {
283 case AMDGPU::VCC: return 106;
284 case AMDGPU::M0: return 124;
285 case AMDGPU::EXEC: return 126;
286 case AMDGPU::EXEC_LO: return 126;
287 case AMDGPU::EXEC_HI: return 127;
288 case AMDGPU::SREG_LIT_0: return 128;
289 case AMDGPU::SI_LITERAL_CONSTANT: return 255;
290 default: return getHWRegNum(reg);
291 }
292}
293
294#define SIRegisterInfo SIMCCodeEmitter
295#include "SIRegisterGetHWRegNum.inc"
296#undef SIRegisterInfo
diff --git a/src/gallium/drivers/radeon/Makefile b/src/gallium/drivers/radeon/Makefile
index 05dc518a9aa..7934c712195 100644
--- a/src/gallium/drivers/radeon/Makefile
+++ b/src/gallium/drivers/radeon/Makefile
@@ -8,74 +8,8 @@ LIBNAME = radeon
8 8
9LIBRARY_INCLUDES = -I$(TOP)/include 9LIBRARY_INCLUDES = -I$(TOP)/include
10 10
11TBLGEN = $(LLVM_BINDIR)/llvm-tblgen
12
13CXXFLAGS+= $(LLVM_CXXFLAGS) 11CXXFLAGS+= $(LLVM_CXXFLAGS)
14 12
15ifeq ($(LLVM_VERSION),3.1)
16 CPP_SOURCES += $(LLVM_CPP_SOURCES)
17 GENERATED_SOURCES = $(LLVM_GENERATED_SOURCES)
18else
19 CXXFLAGS+= -DEXTERNAL_LLVM
20endif
21
22include ../../Makefile.template 13include ../../Makefile.template
23 14
24CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS)) 15CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS))
25
26tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3
27
28HAVE_LLVM_INTRINSICS = $(shell grep IntrinsicsR600.td $(LLVM_INCLUDEDIR)/llvm/Intrinsics.td)
29
30SIRegisterInfo.td: SIGenRegisterInfo.pl
31 $(PERL) $^ > $@
32
33SIRegisterGetHWRegNum.inc: SIGenRegisterInfo.pl
34 $(PERL) $^ $@ > /dev/null
35
36R600Intrinsics.td: R600IntrinsicsNoOpenCL.td R600IntrinsicsOpenCL.td
37ifeq ($(HAVE_LLVM_INTRINSICS),)
38 cp R600IntrinsicsNoOpenCL.td R600Intrinsics.td
39else
40 cp R600IntrinsicsOpenCL.td R600Intrinsics.td
41endif
42
43R600RegisterInfo.td: R600GenRegisterInfo.pl
44 $(PERL) $^ > $@
45
46AMDGPUGenRegisterInfo.inc: $(TD_FILES)
47 $(call tablegen, -gen-register-info, AMDGPU.td, $@)
48
49AMDGPUGenInstrInfo.inc: $(TD_FILES)
50 $(call tablegen, -gen-instr-info, AMDGPU.td, $@)
51
52AMDGPUGenAsmWriter.inc: $(TD_FILES)
53 $(call tablegen, -gen-asm-writer, AMDGPU.td, $@)
54
55AMDGPUGenDAGISel.inc: $(TD_FILES)
56 $(call tablegen, -gen-dag-isel, AMDGPU.td, $@)
57
58AMDGPUGenCallingConv.inc: $(TD_FILES)
59 $(call tablegen, -gen-callingconv, AMDGPU.td, $@)
60
61AMDGPUGenSubtargetInfo.inc: $(TD_FILES)
62 $(call tablegen, -gen-subtarget, AMDGPU.td, $@)
63
64AMDGPUGenEDInfo.inc: $(TD_FILES)
65 $(call tablegen, -gen-enhanced-disassembly-info, AMDGPU.td, $@)
66
67AMDGPUGenIntrinsics.inc: $(TD_FILES)
68 $(call tablegen, -gen-tgt-intrinsic, AMDGPU.td, $@)
69
70AMDGPUGenCodeEmitter.inc: $(TD_FILES)
71 $(call tablegen, -gen-emitter, AMDGPU.td, $@)
72
73AMDGPUGenMCCodeEmitter.inc: $(TD_FILES)
74 $(call tablegen, -mc-emitter -gen-emitter, AMDGPU.td, $@)
75
76AMDGPUGenDFAPacketizer.inc: $(TD_FILES)
77 $(call tablegen, -gen-dfa-packetizer, AMDGPU.td, $@)
78
79LOADER_LIBS=$(shell llvm-config --libs bitreader asmparser)
80loader: loader.o libradeon.a
81 gcc -o loader $(LLVM_LDFLAGS) -L/usr/local/lib $(LDFLAGS) loader.o libradeon.a $(LLVM_LIBS) $(LOADER_LIBS) -lpthread -ldl -lstdc++ -lm
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index 5e793422d66..45d2e8f2e76 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -1,86 +1,3 @@
1
2TD_FILES := \
3 AMDGPU.td \
4 AMDGPUInstrInfo.td \
5 AMDGPUInstructions.td \
6 AMDGPUIntrinsics.td \
7 AMDGPURegisterInfo.td \
8 AMDILBase.td \
9 AMDILInstrInfo.td \
10 AMDILIntrinsics.td \
11 AMDILRegisterInfo.td \
12 Processors.td \
13 R600Instructions.td \
14 R600Intrinsics.td \
15 R600IntrinsicsNoOpenCL.td \
16 R600IntrinsicsOpenCL.td \
17 R600RegisterInfo.td \
18 R600Schedule.td \
19 SIInstrFormats.td \
20 SIInstrInfo.td \
21 SIInstructions.td \
22 SIIntrinsics.td \
23 SIRegisterInfo.td \
24 SISchedule.td
25
26LLVM_GENERATED_SOURCES := \
27 R600Intrinsics.td \
28 R600RegisterInfo.td \
29 SIRegisterInfo.td \
30 SIRegisterGetHWRegNum.inc \
31 AMDGPUGenRegisterInfo.inc \
32 AMDGPUGenInstrInfo.inc \
33 AMDGPUGenAsmWriter.inc \
34 AMDGPUGenDAGISel.inc \
35 AMDGPUGenCallingConv.inc \
36 AMDGPUGenSubtargetInfo.inc \
37 AMDGPUGenEDInfo.inc \
38 AMDGPUGenIntrinsics.inc \
39 AMDGPUGenCodeEmitter.inc \
40 AMDGPUGenMCCodeEmitter.inc \
41 AMDGPUGenDFAPacketizer.inc
42
43LLVM_CPP_SOURCES := \
44 AMDIL7XXDevice.cpp \
45 AMDILCFGStructurizer.cpp \
46 AMDILDevice.cpp \
47 AMDILDeviceInfo.cpp \
48 AMDILEvergreenDevice.cpp \
49 AMDILFrameLowering.cpp \
50 AMDILIntrinsicInfo.cpp \
51 AMDILISelDAGToDAG.cpp \
52 AMDILISelLowering.cpp \
53 AMDILNIDevice.cpp \
54 AMDILPeepholeOptimizer.cpp \
55 AMDILSIDevice.cpp \
56 AMDGPUAsmPrinter.cpp \
57 AMDGPUMCInstLower.cpp \
58 AMDGPUSubtarget.cpp \
59 AMDGPUTargetMachine.cpp \
60 AMDGPUISelLowering.cpp \
61 AMDGPUConvertToISA.cpp \
62 AMDGPUInstrInfo.cpp \
63 AMDGPURegisterInfo.cpp \
64 R600ExpandSpecialInstrs.cpp \
65 R600ISelLowering.cpp \
66 R600InstrInfo.cpp \
67 R600MachineFunctionInfo.cpp \
68 R600RegisterInfo.cpp \
69 SIAssignInterpRegs.cpp \
70 SIInstrInfo.cpp \
71 SIISelLowering.cpp \
72 SILowerLiteralConstants.cpp \
73 SILowerFlowControl.cpp \
74 SIMachineFunctionInfo.cpp \
75 SIRegisterInfo.cpp \
76 InstPrinter/AMDGPUInstPrinter.cpp \
77 MCTargetDesc/AMDGPUMCAsmInfo.cpp \
78 MCTargetDesc/AMDGPUAsmBackend.cpp \
79 MCTargetDesc/AMDGPUMCTargetDesc.cpp \
80 MCTargetDesc/SIMCCodeEmitter.cpp \
81 MCTargetDesc/R600MCCodeEmitter.cpp \
82 TargetInfo/AMDGPUTargetInfo.cpp \
83
84CPP_SOURCES := \ 1CPP_SOURCES := \
85 radeon_llvm_emit.cpp 2 radeon_llvm_emit.cpp
86 3
diff --git a/src/gallium/drivers/radeon/Processors.td b/src/gallium/drivers/radeon/Processors.td
deleted file mode 100644
index 3469f828fc0..00000000000
--- a/src/gallium/drivers/radeon/Processors.td
+++ /dev/null
@@ -1,29 +0,0 @@
1//===-- Processors.td - TODO: Add brief description -------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// AMDIL processors supported.
11//
12//===----------------------------------------------------------------------===//
13
14class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
15: Processor<Name, itin, Features>;
16def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
17def : Proc<"rv710", R600_EG_Itin, []>;
18def : Proc<"rv730", R600_EG_Itin, []>;
19def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
20def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
21def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
22def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
23def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
24def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
25def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
26def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
27def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
28def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
29
diff --git a/src/gallium/drivers/radeon/R600Defines.h b/src/gallium/drivers/radeon/R600Defines.h
deleted file mode 100644
index 20c357cc15f..00000000000
--- a/src/gallium/drivers/radeon/R600Defines.h
+++ /dev/null
@@ -1,35 +0,0 @@
1//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10
11// Operand Flags
12#define MO_FLAG_CLAMP (1 << 0)
13#define MO_FLAG_NEG (1 << 1)
14#define MO_FLAG_ABS (1 << 2)
15#define MO_FLAG_MASK (1 << 3)
16#define MO_FLAG_PUSH (1 << 4)
17#define MO_FLAG_NOT_LAST (1 << 5)
18#define NUM_MO_FLAGS 6
19
20// Helper for finding getting the operand index for the instruction flags
21// operand.
22#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
23
24namespace R600_InstFlag {
25 enum TIF {
26 TRANS_ONLY = (1 << 0),
27 TEX = (1 << 1),
28 REDUCTION = (1 << 2),
29 FC = (1 << 3),
30 TRIG = (1 << 4),
31 OP3 = (1 << 5),
32 VECTOR = (1 << 6)
33 //FlagOperand bits 7, 8
34 };
35}
diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
deleted file mode 100644
index d6184e55302..00000000000
--- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
+++ /dev/null
@@ -1,292 +0,0 @@
1//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// Vector, Reduction, and Cube instructions need to fill the entire instruction
10// group to work correctly. This pass expands these individual instructions
11// into several instructions that will completely fill the instruction group.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "R600Defines.h"
16#include "R600InstrInfo.h"
17#include "R600RegisterInfo.h"
18#include "R600MachineFunctionInfo.h"
19#include "llvm/CodeGen/MachineFunctionPass.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22
23using namespace llvm;
24
25namespace {
26
27class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
28
29private:
30 static char ID;
31 const R600InstrInfo *TII;
32
33 bool ExpandInputPerspective(MachineInstr& MI);
34 bool ExpandInputConstant(MachineInstr& MI);
35
36public:
37 R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
38 TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
39
40 virtual bool runOnMachineFunction(MachineFunction &MF);
41
42 const char *getPassName() const {
43 return "R600 Expand special instructions pass";
44 }
45};
46
47} // End anonymous namespace
48
49char R600ExpandSpecialInstrsPass::ID = 0;
50
51FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
52 return new R600ExpandSpecialInstrsPass(TM);
53}
54
55bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
56{
57 const R600RegisterInfo &TRI = TII->getRegisterInfo();
58 if (MI.getOpcode() != AMDGPU::input_perspective)
59 return false;
60
61 MachineBasicBlock::iterator I = &MI;
62 unsigned DstReg = MI.getOperand(0).getReg();
63 R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
64 ->getInfo<R600MachineFunctionInfo>();
65 unsigned IJIndexBase;
66
67 // In Evergreen ISA doc section 8.3.2 :
68 // We need to interpolate XY and ZW in two different instruction groups.
69 // An INTERP_* must occupy all 4 slots of an instruction group.
70 // Output of INTERP_XY is written in X,Y slots
71 // Output of INTERP_ZW is written in Z,W slots
72 //
73 // Thus interpolation requires the following sequences :
74 //
75 // AnyGPR.x = INTERP_ZW; (Write Masked Out)
76 // AnyGPR.y = INTERP_ZW; (Write Masked Out)
77 // DstGPR.z = INTERP_ZW;
78 // DstGPR.w = INTERP_ZW; (End of first IG)
79 // DstGPR.x = INTERP_XY;
80 // DstGPR.y = INTERP_XY;
81 // AnyGPR.z = INTERP_XY; (Write Masked Out)
82 // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
83 //
84 switch (MI.getOperand(1).getImm()) {
85 case 0:
86 IJIndexBase = MFI->GetIJPerspectiveIndex();
87 break;
88 case 1:
89 IJIndexBase = MFI->GetIJLinearIndex();
90 break;
91 default:
92 assert(0 && "Unknow ij index");
93 }
94
95 for (unsigned i = 0; i < 8; i++) {
96 unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
97 2 * IJIndexBase + ((i + 1) % 2));
98 unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
99 4 * MI.getOperand(2).getImm());
100
101 unsigned Sel;
102 switch (i % 4) {
103 case 0:Sel = AMDGPU::sel_x;break;
104 case 1:Sel = AMDGPU::sel_y;break;
105 case 2:Sel = AMDGPU::sel_z;break;
106 case 3:Sel = AMDGPU::sel_w;break;
107 default:break;
108 }
109
110 unsigned Res = TRI.getSubReg(DstReg, Sel);
111
112 const MCInstrDesc &Opcode = (i < 4)?
113 TII->get(AMDGPU::INTERP_ZW):
114 TII->get(AMDGPU::INTERP_XY);
115
116 MachineInstr *NewMI = BuildMI(*(MI.getParent()),
117 I, MI.getParent()->findDebugLoc(I),
118 Opcode, Res)
119 .addReg(IJIndex)
120 .addReg(ReadReg)
121 .addImm(0);
122
123 if (!(i> 1 && i < 6)) {
124 TII->addFlag(NewMI, 0, MO_FLAG_MASK);
125 }
126
127 if (i % 4 != 3)
128 TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
129 }
130
131 MI.eraseFromParent();
132
133 return true;
134}
135
136bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
137{
138 const R600RegisterInfo &TRI = TII->getRegisterInfo();
139 if (MI.getOpcode() != AMDGPU::input_constant)
140 return false;
141
142 MachineBasicBlock::iterator I = &MI;
143 unsigned DstReg = MI.getOperand(0).getReg();
144
145 for (unsigned i = 0; i < 4; i++) {
146 unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
147 4 * MI.getOperand(1).getImm() + i);
148
149 unsigned Sel;
150 switch (i % 4) {
151 case 0:Sel = AMDGPU::sel_x;break;
152 case 1:Sel = AMDGPU::sel_y;break;
153 case 2:Sel = AMDGPU::sel_z;break;
154 case 3:Sel = AMDGPU::sel_w;break;
155 default:break;
156 }
157
158 unsigned Res = TRI.getSubReg(DstReg, Sel);
159
160 MachineInstr *NewMI = BuildMI(*(MI.getParent()),
161 I, MI.getParent()->findDebugLoc(I),
162 TII->get(AMDGPU::INTERP_LOAD_P0), Res)
163 .addReg(ReadReg)
164 .addImm(0);
165
166 if (i % 4 != 3)
167 TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
168 }
169
170 MI.eraseFromParent();
171
172 return true;
173}
174
175bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
176
177 const R600RegisterInfo &TRI = TII->getRegisterInfo();
178
179 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
180 BB != BB_E; ++BB) {
181 MachineBasicBlock &MBB = *BB;
182 MachineBasicBlock::iterator I = MBB.begin();
183 while (I != MBB.end()) {
184 MachineInstr &MI = *I;
185 I = llvm::next(I);
186
187 if (ExpandInputPerspective(MI))
188 continue;
189 if (ExpandInputConstant(MI))
190 continue;
191
192 bool IsReduction = TII->isReductionOp(MI.getOpcode());
193 bool IsVector = TII->isVector(MI);
194 bool IsCube = TII->isCubeOp(MI.getOpcode());
195 if (!IsReduction && !IsVector && !IsCube) {
196 continue;
197 }
198
199 // Expand the instruction
200 //
201 // Reduction instructions:
202 // T0_X = DP4 T1_XYZW, T2_XYZW
203 // becomes:
204 // TO_X = DP4 T1_X, T2_X
205 // TO_Y (write masked) = DP4 T1_Y, T2_Y
206 // TO_Z (write masked) = DP4 T1_Z, T2_Z
207 // TO_W (write masked) = DP4 T1_W, T2_W
208 //
209 // Vector instructions:
210 // T0_X = MULLO_INT T1_X, T2_X
211 // becomes:
212 // T0_X = MULLO_INT T1_X, T2_X
213 // T0_Y (write masked) = MULLO_INT T1_X, T2_X
214 // T0_Z (write masked) = MULLO_INT T1_X, T2_X
215 // T0_W (write masked) = MULLO_INT T1_X, T2_X
216 //
217 // Cube instructions:
218 // T0_XYZW = CUBE T1_XYZW
219 // becomes:
220 // TO_X = CUBE T1_Z, T1_Y
221 // T0_Y = CUBE T1_Z, T1_X
222 // T0_Z = CUBE T1_X, T1_Z
223 // T0_W = CUBE T1_Y, T1_Z
224 for (unsigned Chan = 0; Chan < 4; Chan++) {
225 unsigned DstReg = MI.getOperand(0).getReg();
226 unsigned Src0 = MI.getOperand(1).getReg();
227 unsigned Src1 = 0;
228
229 // Determine the correct source registers
230 if (!IsCube) {
231 Src1 = MI.getOperand(2).getReg();
232 }
233 if (IsReduction) {
234 unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
235 Src0 = TRI.getSubReg(Src0, SubRegIndex);
236 Src1 = TRI.getSubReg(Src1, SubRegIndex);
237 } else if (IsCube) {
238 static const int CubeSrcSwz[] = {2, 2, 0, 1};
239 unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
240 unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
241 Src1 = TRI.getSubReg(Src0, SubRegIndex1);
242 Src0 = TRI.getSubReg(Src0, SubRegIndex0);
243 }
244
245 // Determine the correct destination registers;
246 unsigned Flags = 0;
247 if (IsCube) {
248 unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
249 DstReg = TRI.getSubReg(DstReg, SubRegIndex);
250 } else {
251 // Mask the write if the original instruction does not write to
252 // the current Channel.
253 Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
254 unsigned DstBase = TRI.getHWRegIndex(DstReg);
255 DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
256 }
257
258 // Set the IsLast bit
259 Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
260
261 // Add the new instruction
262 unsigned Opcode;
263 if (IsCube) {
264 switch (MI.getOpcode()) {
265 case AMDGPU::CUBE_r600_pseudo:
266 Opcode = AMDGPU::CUBE_r600_real;
267 break;
268 case AMDGPU::CUBE_eg_pseudo:
269 Opcode = AMDGPU::CUBE_eg_real;
270 break;
271 default:
272 assert(!"Unknown CUBE instruction");
273 Opcode = 0;
274 break;
275 }
276 } else {
277 Opcode = MI.getOpcode();
278 }
279 MachineInstr *NewMI =
280 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
281 .addReg(Src0)
282 .addReg(Src1)
283 .addImm(0); // Flag
284
285 NewMI->setIsInsideBundle(Chan != 0);
286 TII->addFlag(NewMI, 0, Flags);
287 }
288 MI.eraseFromParent();
289 }
290 }
291 return false;
292}
diff --git a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
deleted file mode 100644
index c0a05f54cae..00000000000
--- a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
+++ /dev/null
@@ -1,206 +0,0 @@
1#===-- R600GenRegisterInfo.pl - Script for generating register info files --===#
2#
3# The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9#
10# This perl script prints to stdout .td code to be used as R600RegisterInfo.td
11# it also generates a file called R600HwRegInfo.include, which contains helper
12# functions for determining the hw encoding of registers.
13#
14#===------------------------------------------------------------------------===#
15
16use strict;
17use warnings;
18
19use constant CONST_REG_COUNT => 512;
20use constant TEMP_REG_COUNT => 128;
21
22my $CREG_MAX = CONST_REG_COUNT - 1;
23my $TREG_MAX = TEMP_REG_COUNT - 1;
24
25print <<STRING;
26
27class R600Reg <string name> : Register<name> {
28 let Namespace = "AMDGPU";
29}
30
31class R600Reg_128<string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
32 let Namespace = "AMDGPU";
33 let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
34}
35
36STRING
37
38my $i;
39
40### REG DEFS ###
41
42my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C");
43my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T");
44
45my @t128reg;
46my @treg_x;
47for (my $i = 0; $i < TEMP_REG_COUNT; $i++) {
48 my $name = "T$i\_XYZW";
49 print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W] >;\n};
50 $t128reg[$i] = $name;
51 $treg_x[$i] = "T$i\_X";
52}
53
54my $treg_string = join(",", @treg_list);
55my $creg_list = join(",", @creg_list);
56my $t128_string = join(",", @t128reg);
57my $treg_x_string = join(",", @treg_x);
58print <<STRING;
59
60class RegSet <dag s> {
61 dag set = s;
62}
63
64def ZERO : R600Reg<"0.0">;
65def HALF : R600Reg<"0.5">;
66def ONE : R600Reg<"1.0">;
67def ONE_INT : R600Reg<"1">;
68def NEG_HALF : R600Reg<"-0.5">;
69def NEG_ONE : R600Reg<"-1.0">;
70def PV_X : R600Reg<"pv.x">;
71def ALU_LITERAL_X : R600Reg<"literal.x">;
72def PREDICATE_BIT : R600Reg<"PredicateBit">;
73def PRED_SEL_OFF: R600Reg<"Pred_sel_off">;
74def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero">;
75def PRED_SEL_ONE : R600Reg<"Pred_sel_one">;
76
77def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
78 $creg_list)>;
79
80def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
81 $treg_string)>;
82
83def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add
84 $treg_x_string)>;
85
86def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
87 R600_TReg32,
88 R600_CReg32,
89 ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
90
91def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
92 PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
93
94def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
95 PREDICATE_BIT)>;
96
97def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add
98 $t128_string)>
99{
100 let SubRegClasses = [(R600_TReg32 sel_x, sel_y, sel_z, sel_w)];
101 let CopyCost = -1;
102}
103
104STRING
105
106my %index_map;
107my %chan_map;
108
109for ($i = 0; $i <= $#creg_list; $i++) {
110 push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]);
111 push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]);
112}
113
114for ($i = 0; $i <= $#treg_list; $i++) {
115 push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]);
116 push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]);
117}
118
119for ($i = 0; $i <= $#t128reg; $i++) {
120 push(@{$index_map{$i}}, $t128reg[$i]);
121 push(@{$chan_map{'X'}}, $t128reg[$i]);
122}
123
124open(OUTFILE, ">", "R600HwRegInfo.include");
125
126print OUTFILE <<STRING;
127
128unsigned R600RegisterInfo::getHWRegIndexGen(unsigned reg) const
129{
130 switch(reg) {
131 default: assert(!"Unknown register"); return 0;
132STRING
133foreach my $key (keys(%index_map)) {
134 foreach my $reg (@{$index_map{$key}}) {
135 print OUTFILE " case AMDGPU::$reg:\n";
136 }
137 print OUTFILE " return $key;\n\n";
138}
139
140print OUTFILE " }\n}\n\n";
141
142print OUTFILE <<STRING;
143
144unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const
145{
146 switch(reg) {
147 default: assert(!"Unknown register"); return 0;
148STRING
149
150foreach my $key (keys(%chan_map)) {
151 foreach my $reg (@{$chan_map{$key}}) {
152 print OUTFILE " case AMDGPU::$reg:\n";
153 }
154 my $val;
155 if ($key eq 'X') {
156 $val = 0;
157 } elsif ($key eq 'Y') {
158 $val = 1;
159 } elsif ($key eq 'Z') {
160 $val = 2;
161 } elsif ($key eq 'W') {
162 $val = 3;
163 } else {
164 die("Unknown chan value; $key");
165 }
166 print OUTFILE " return $val;\n\n";
167}
168
169print OUTFILE " }\n}\n\n";
170
171sub print_reg_defs {
172 my ($count, $prefix) = @_;
173
174 my @reg_list;
175
176 for ($i = 0; $i < $count; $i++) {
177 my $hw_index = get_hw_index($i);
178 my $chan= get_chan_str($i);
179 my $name = "$prefix$hw_index\_$chan";
180 print qq{def $name : R600Reg <"$prefix$hw_index.$chan">;\n};
181 $reg_list[$i] = $name;
182 }
183 return @reg_list;
184}
185
186#Helper functions
187sub get_hw_index {
188 my ($index) = @_;
189 return int($index / 4);
190}
191
192sub get_chan_str {
193 my ($index) = @_;
194 my $chan = $index % 4;
195 if ($chan == 0 ) {
196 return 'X';
197 } elsif ($chan == 1) {
198 return 'Y';
199 } elsif ($chan == 2) {
200 return 'Z';
201 } elsif ($chan == 3) {
202 return 'W';
203 } else {
204 die("Unknown chan value: $chan");
205 }
206}
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
deleted file mode 100644
index 5dd2f5334c5..00000000000
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ /dev/null
@@ -1,740 +0,0 @@
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
11// is mostly EmitInstrWithCustomInserter().
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
19#include "llvm/Argument.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/SelectionDAG.h"
23
24using namespace llvm;
25
26R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
27 AMDGPUTargetLowering(TM),
28 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
29{
30 setOperationAction(ISD::MUL, MVT::i64, Expand);
31 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
32 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
33 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
34 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
35 computeRegisterProperties();
36
37 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
38 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
39
40 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
41 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
42
43 setOperationAction(ISD::FSUB, MVT::f32, Expand);
44
45 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
46 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
47 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
48
49 setOperationAction(ISD::ROTL, MVT::i32, Custom);
50
51 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
52 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
53
54 setOperationAction(ISD::SETCC, MVT::i32, Custom);
55 setOperationAction(ISD::SETCC, MVT::f32, Custom);
56 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
57
58 setTargetDAGCombine(ISD::FP_ROUND);
59
60 setSchedulingPreference(Sched::VLIW);
61}
62
63MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
64 MachineInstr * MI, MachineBasicBlock * BB) const
65{
66 MachineFunction * MF = BB->getParent();
67 MachineRegisterInfo &MRI = MF->getRegInfo();
68 MachineBasicBlock::iterator I = *MI;
69
70 switch (MI->getOpcode()) {
71 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
72 case AMDGPU::SHADER_TYPE: break;
73 case AMDGPU::CLAMP_R600:
74 {
75 MachineInstr *NewMI =
76 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
77 .addOperand(MI->getOperand(0))
78 .addOperand(MI->getOperand(1))
79 .addImm(0) // Flags
80 .addReg(AMDGPU::PRED_SEL_OFF);
81 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
82 break;
83 }
84 case AMDGPU::FABS_R600:
85 {
86 MachineInstr *NewMI =
87 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
88 .addOperand(MI->getOperand(0))
89 .addOperand(MI->getOperand(1))
90 .addImm(0) // Flags
91 .addReg(AMDGPU::PRED_SEL_OFF);
92 TII->addFlag(NewMI, 1, MO_FLAG_ABS);
93 break;
94 }
95
96 case AMDGPU::FNEG_R600:
97 {
98 MachineInstr *NewMI =
99 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
100 .addOperand(MI->getOperand(0))
101 .addOperand(MI->getOperand(1))
102 .addImm(0) // Flags
103 .addReg(AMDGPU::PRED_SEL_OFF);
104 TII->addFlag(NewMI, 1, MO_FLAG_NEG);
105 break;
106 }
107
108 case AMDGPU::R600_LOAD_CONST:
109 {
110 int64_t RegIndex = MI->getOperand(1).getImm();
111 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
112 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
113 .addOperand(MI->getOperand(0))
114 .addReg(ConstantReg);
115 break;
116 }
117
118 case AMDGPU::MASK_WRITE:
119 {
120 unsigned maskedRegister = MI->getOperand(0).getReg();
121 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
122 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
123 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
124 // Return early so the instruction is not erased
125 return BB;
126 }
127
128 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
129 case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
130 {
131 // Convert to DWORD address
132 unsigned NewAddr = MRI.createVirtualRegister(
133 &AMDGPU::R600_TReg32_XRegClass);
134 unsigned ShiftValue = MRI.createVirtualRegister(
135 &AMDGPU::R600_TReg32RegClass);
136 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
137
138 // XXX In theory, we should be able to pass ShiftValue directly to
139 // the LSHR_eg instruction as an inline literal, but I tried doing it
140 // this way and it didn't produce the correct results.
141 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32),
142 ShiftValue)
143 .addReg(AMDGPU::ALU_LITERAL_X)
144 .addReg(AMDGPU::PRED_SEL_OFF)
145 .addImm(2);
146 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
147 .addOperand(MI->getOperand(1))
148 .addReg(ShiftValue)
149 .addReg(AMDGPU::PRED_SEL_OFF);
150 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
151 .addOperand(MI->getOperand(0))
152 .addReg(NewAddr)
153 .addImm(EOP); // Set End of program bit
154 break;
155 }
156
157 case AMDGPU::RESERVE_REG:
158 {
159 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
160 int64_t ReservedIndex = MI->getOperand(0).getImm();
161 unsigned ReservedReg =
162 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
163 MFI->ReservedRegs.push_back(ReservedReg);
164 break;
165 }
166
167 case AMDGPU::TXD:
168 {
169 unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
170 unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
171
172 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
173 .addOperand(MI->getOperand(3))
174 .addOperand(MI->getOperand(4))
175 .addOperand(MI->getOperand(5));
176 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
177 .addOperand(MI->getOperand(2))
178 .addOperand(MI->getOperand(4))
179 .addOperand(MI->getOperand(5));
180 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
181 .addOperand(MI->getOperand(0))
182 .addOperand(MI->getOperand(1))
183 .addOperand(MI->getOperand(4))
184 .addOperand(MI->getOperand(5))
185 .addReg(t0, RegState::Implicit)
186 .addReg(t1, RegState::Implicit);
187 break;
188 }
189 case AMDGPU::TXD_SHADOW:
190 {
191 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
192 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
193
194 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
195 .addOperand(MI->getOperand(3))
196 .addOperand(MI->getOperand(4))
197 .addOperand(MI->getOperand(5));
198 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
199 .addOperand(MI->getOperand(2))
200 .addOperand(MI->getOperand(4))
201 .addOperand(MI->getOperand(5));
202 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
203 .addOperand(MI->getOperand(0))
204 .addOperand(MI->getOperand(1))
205 .addOperand(MI->getOperand(4))
206 .addOperand(MI->getOperand(5))
207 .addReg(t0, RegState::Implicit)
208 .addReg(t1, RegState::Implicit);
209 break;
210 }
211 case AMDGPU::BRANCH:
212 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
213 .addOperand(MI->getOperand(0))
214 .addReg(0);
215 break;
216 case AMDGPU::BRANCH_COND_f32:
217 {
218 MachineInstr *NewMI =
219 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
220 .addReg(AMDGPU::PREDICATE_BIT)
221 .addOperand(MI->getOperand(1))
222 .addImm(OPCODE_IS_NOT_ZERO)
223 .addImm(0); // Flags
224 TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
225 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
226 .addOperand(MI->getOperand(0))
227 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
228 break;
229 }
230 case AMDGPU::BRANCH_COND_i32:
231 {
232 MachineInstr *NewMI =
233 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
234 .addReg(AMDGPU::PREDICATE_BIT)
235 .addOperand(MI->getOperand(1))
236 .addImm(OPCODE_IS_NOT_ZERO_INT)
237 .addImm(0); // Flags
238 TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
239 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
240 .addOperand(MI->getOperand(0))
241 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
242 break;
243 }
244 case AMDGPU::input_perspective:
245 {
246 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
247
248 // XXX Be more fine about register reservation
249 for (unsigned i = 0; i < 4; i ++) {
250 unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
251 MFI->ReservedRegs.push_back(ReservedReg);
252 }
253
254 switch (MI->getOperand(1).getImm()) {
255 case 0:// Perspective
256 MFI->HasPerspectiveInterpolation = true;
257 break;
258 case 1:// Linear
259 MFI->HasLinearInterpolation = true;
260 break;
261 default:
262 assert(0 && "Unknow ij index");
263 }
264
265 return BB;
266 }
267 }
268
269 MI->eraseFromParent();
270 return BB;
271}
272
273//===----------------------------------------------------------------------===//
274// Custom DAG Lowering Operations
275//===----------------------------------------------------------------------===//
276
277using namespace llvm::Intrinsic;
278using namespace llvm::AMDGPUIntrinsic;
279
280SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
281{
282 switch (Op.getOpcode()) {
283 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
284 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
285 case ISD::ROTL: return LowerROTL(Op, DAG);
286 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
287 case ISD::SETCC: return LowerSETCC(Op, DAG);
288 case ISD::INTRINSIC_VOID: {
289 SDValue Chain = Op.getOperand(0);
290 unsigned IntrinsicID =
291 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
292 switch (IntrinsicID) {
293 case AMDGPUIntrinsic::AMDGPU_store_output: {
294 MachineFunction &MF = DAG.getMachineFunction();
295 MachineRegisterInfo &MRI = MF.getRegInfo();
296 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
297 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
298 if (!MRI.isLiveOut(Reg)) {
299 MRI.addLiveOut(Reg);
300 }
301 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
302 }
303 // default for switch(IntrinsicID)
304 default: break;
305 }
306 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
307 break;
308 }
309 case ISD::INTRINSIC_WO_CHAIN: {
310 unsigned IntrinsicID =
311 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
312 EVT VT = Op.getValueType();
313 DebugLoc DL = Op.getDebugLoc();
314 switch(IntrinsicID) {
315 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
316 case AMDGPUIntrinsic::R600_load_input: {
317 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
318 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
319 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
320 }
321 case AMDGPUIntrinsic::R600_load_input_perspective: {
322 unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
323 SDValue FullVector = DAG.getNode(
324 AMDGPUISD::INTERP,
325 DL, MVT::v4f32,
326 DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
327 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
328 DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
329 }
330 case AMDGPUIntrinsic::R600_load_input_linear: {
331 unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
332 SDValue FullVector = DAG.getNode(
333 AMDGPUISD::INTERP,
334 DL, MVT::v4f32,
335 DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
336 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
337 DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
338 }
339 case AMDGPUIntrinsic::R600_load_input_constant: {
340 unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
341 SDValue FullVector = DAG.getNode(
342 AMDGPUISD::INTERP_P0,
343 DL, MVT::v4f32,
344 DAG.getConstant(slot / 4 , MVT::i32));
345 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
346 DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
347 }
348 case AMDGPUIntrinsic::R600_load_input_position: {
349 unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
350 unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
351 SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
352 RegIndex, MVT::f32);
353 if ((slot % 4) == 3) {
354 return DAG.getNode(ISD::FDIV,
355 DL, VT,
356 DAG.getConstantFP(1.0f, MVT::f32),
357 Reg);
358 } else {
359 return Reg;
360 }
361 }
362
363 case r600_read_ngroups_x:
364 return LowerImplicitParameter(DAG, VT, DL, 0);
365 case r600_read_ngroups_y:
366 return LowerImplicitParameter(DAG, VT, DL, 1);
367 case r600_read_ngroups_z:
368 return LowerImplicitParameter(DAG, VT, DL, 2);
369 case r600_read_global_size_x:
370 return LowerImplicitParameter(DAG, VT, DL, 3);
371 case r600_read_global_size_y:
372 return LowerImplicitParameter(DAG, VT, DL, 4);
373 case r600_read_global_size_z:
374 return LowerImplicitParameter(DAG, VT, DL, 5);
375 case r600_read_local_size_x:
376 return LowerImplicitParameter(DAG, VT, DL, 6);
377 case r600_read_local_size_y:
378 return LowerImplicitParameter(DAG, VT, DL, 7);
379 case r600_read_local_size_z:
380 return LowerImplicitParameter(DAG, VT, DL, 8);
381
382 case r600_read_tgid_x:
383 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
384 AMDGPU::T1_X, VT);
385 case r600_read_tgid_y:
386 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
387 AMDGPU::T1_Y, VT);
388 case r600_read_tgid_z:
389 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
390 AMDGPU::T1_Z, VT);
391 case r600_read_tidig_x:
392 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
393 AMDGPU::T0_X, VT);
394 case r600_read_tidig_y:
395 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
396 AMDGPU::T0_Y, VT);
397 case r600_read_tidig_z:
398 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
399 AMDGPU::T0_Z, VT);
400 }
401 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
402 break;
403 }
404 } // end switch(Op.getOpcode())
405 return SDValue();
406}
407
408void R600TargetLowering::ReplaceNodeResults(SDNode *N,
409 SmallVectorImpl<SDValue> &Results,
410 SelectionDAG &DAG) const
411{
412 switch (N->getOpcode()) {
413 default: return;
414 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
415 case ISD::INTRINSIC_WO_CHAIN:
416 {
417 unsigned IntrinsicID =
418 cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
419 if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) {
420 Results.push_back(LowerInputFace(N, DAG));
421 } else {
422 return;
423 }
424 }
425 }
426}
427
428SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
429{
430 unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
431 unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
432 SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
433 RegIndex, MVT::f32);
434 return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
435 Reg, DAG.getConstantFP(0.0f, MVT::f32),
436 DAG.getCondCode(ISD::SETUGT));
437}
438
439SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
440{
441 return DAG.getNode(
442 ISD::SETCC,
443 Op.getDebugLoc(),
444 MVT::i1,
445 Op, DAG.getConstantFP(0.0f, MVT::f32),
446 DAG.getCondCode(ISD::SETNE)
447 );
448}
449
450SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
451{
452 SDValue Chain = Op.getOperand(0);
453 SDValue CC = Op.getOperand(1);
454 SDValue LHS = Op.getOperand(2);
455 SDValue RHS = Op.getOperand(3);
456 SDValue JumpT = Op.getOperand(4);
457 SDValue CmpValue;
458 SDValue Result;
459
460 if (LHS.getValueType() == MVT::i32) {
461 CmpValue = DAG.getNode(
462 ISD::SELECT_CC,
463 Op.getDebugLoc(),
464 MVT::i32,
465 LHS, RHS,
466 DAG.getConstant(-1, MVT::i32),
467 DAG.getConstant(0, MVT::i32),
468 CC);
469 } else if (LHS.getValueType() == MVT::f32) {
470 CmpValue = DAG.getNode(
471 ISD::SELECT_CC,
472 Op.getDebugLoc(),
473 MVT::f32,
474 LHS, RHS,
475 DAG.getConstantFP(1.0f, MVT::f32),
476 DAG.getConstantFP(0.0f, MVT::f32),
477 CC);
478 } else {
479 assert(0 && "Not valid type for br_cc");
480 }
481 Result = DAG.getNode(
482 AMDGPUISD::BRANCH_COND,
483 CmpValue.getDebugLoc(),
484 MVT::Other, Chain,
485 JumpT, CmpValue);
486 return Result;
487}
488
489SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
490 DebugLoc DL,
491 unsigned DwordOffset) const
492{
493 unsigned ByteOffset = DwordOffset * 4;
494 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
495 AMDGPUAS::PARAM_I_ADDRESS);
496
497 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
498 assert(isInt<16>(ByteOffset));
499
500 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
501 DAG.getConstant(ByteOffset, MVT::i32), // PTR
502 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
503 false, false, false, 0);
504}
505
506SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
507{
508 DebugLoc DL = Op.getDebugLoc();
509 EVT VT = Op.getValueType();
510
511 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
512 Op.getOperand(0),
513 Op.getOperand(0),
514 DAG.getNode(ISD::SUB, DL, VT,
515 DAG.getConstant(32, MVT::i32),
516 Op.getOperand(1)));
517}
518
519bool R600TargetLowering::isZero(SDValue Op) const
520{
521 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
522 return Cst->isNullValue();
523 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
524 return CstFP->isZero();
525 } else {
526 return false;
527 }
528}
529
530SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
531{
532 DebugLoc DL = Op.getDebugLoc();
533 EVT VT = Op.getValueType();
534
535 SDValue LHS = Op.getOperand(0);
536 SDValue RHS = Op.getOperand(1);
537 SDValue True = Op.getOperand(2);
538 SDValue False = Op.getOperand(3);
539 SDValue CC = Op.getOperand(4);
540 SDValue Temp;
541
542 // LHS and RHS are guaranteed to be the same value type
543 EVT CompareVT = LHS.getValueType();
544
545 // We need all the operands of SELECT_CC to have the same value type, so if
546 // necessary we need to convert LHS and RHS to be the same type True and
547 // False. True and False are guaranteed to have the same type as this
548 // SELECT_CC node.
549
550 if (isHWTrueValue(True) && isHWFalseValue(False)) {
551 if (CompareVT != VT) {
552 if (VT == MVT::f32 && CompareVT == MVT::i32) {
553 SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
554 LHS, RHS,
555 DAG.getConstant(-1, MVT::i32),
556 DAG.getConstant(0, MVT::i32),
557 CC);
558 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Boolean);
559 } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
560 SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
561 LHS, RHS,
562 DAG.getConstantFP(1.0f, MVT::f32),
563 DAG.getConstantFP(0.0f, MVT::f32),
564 CC);
565 return DAG.getNode(ISD::FP_TO_UINT, DL, VT, BoolAsFlt);
566 } else {
567 // I don't think there will be any other type pairings.
568 assert(!"Unhandled operand type parings in SELECT_CC");
569 }
570 } else {
571 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
572 }
573 }
574
575
576 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
577 // we can handle this with a native instruction, but we need to swap true
578 // and false and change the conditional.
579 if (isHWTrueValue(False) && isHWFalseValue(True)) {
580 }
581
582 // Check if we can lower this to a native operation.
583 // CND* instructions requires all operands to have the same type,
584 // and RHS to be zero.
585
586 if (isZero(LHS) || isZero(RHS)) {
587 SDValue Cond = (isZero(LHS) ? RHS : LHS);
588 SDValue Zero = (isZero(LHS) ? LHS : RHS);
589 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
590 if (CompareVT != VT) {
591 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
592 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
593 }
594 if (isZero(LHS)) {
595 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
596 }
597
598 switch (CCOpcode) {
599 case ISD::SETONE:
600 case ISD::SETUNE:
601 case ISD::SETNE:
602 case ISD::SETULE:
603 case ISD::SETULT:
604 case ISD::SETOLE:
605 case ISD::SETOLT:
606 case ISD::SETLE:
607 case ISD::SETLT:
608 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
609 Temp = True;
610 True = False;
611 False = Temp;
612 break;
613 default:
614 break;
615 }
616 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
617 Cond, Zero,
618 True, False,
619 DAG.getCondCode(CCOpcode));
620 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
621 }
622
623
624 // If we make it this for it means we have no native instructions to handle
625 // this SELECT_CC, so we must lower it.
626 SDValue HWTrue, HWFalse;
627
628 if (CompareVT == MVT::f32) {
629 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
630 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
631 } else if (CompareVT == MVT::i32) {
632 HWTrue = DAG.getConstant(-1, CompareVT);
633 HWFalse = DAG.getConstant(0, CompareVT);
634 }
635 else {
636 assert(!"Unhandled value type in LowerSELECT_CC");
637 }
638
639 // Lower this unsupported SELECT_CC into a combination of two supported
640 // SELECT_CC operations.
641 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
642
643 return DAG.getNode(ISD::SELECT_CC, DL, VT,
644 Cond, HWFalse,
645 True, False,
646 DAG.getCondCode(ISD::SETNE));
647}
648
649SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
650{
651 SDValue Cond;
652 SDValue LHS = Op.getOperand(0);
653 SDValue RHS = Op.getOperand(1);
654 SDValue CC = Op.getOperand(2);
655 DebugLoc DL = Op.getDebugLoc();
656 assert(Op.getValueType() == MVT::i32);
657 if (LHS.getValueType() == MVT::i32) {
658 Cond = DAG.getNode(
659 ISD::SELECT_CC,
660 Op.getDebugLoc(),
661 MVT::i32,
662 LHS, RHS,
663 DAG.getConstant(-1, MVT::i32),
664 DAG.getConstant(0, MVT::i32),
665 CC);
666 } else if (LHS.getValueType() == MVT::f32) {
667 Cond = DAG.getNode(
668 ISD::SELECT_CC,
669 Op.getDebugLoc(),
670 MVT::f32,
671 LHS, RHS,
672 DAG.getConstantFP(1.0f, MVT::f32),
673 DAG.getConstantFP(0.0f, MVT::f32),
674 CC);
675 Cond = DAG.getNode(
676 ISD::FP_TO_SINT,
677 DL,
678 MVT::i32,
679 Cond);
680 } else {
681 assert(0 && "Not valid type for set_cc");
682 }
683 Cond = DAG.getNode(
684 ISD::AND,
685 DL,
686 MVT::i32,
687 DAG.getConstant(1, MVT::i32),
688 Cond);
689 return Cond;
690}
691
692// XXX Only kernel functions are supporte, so we can assume for now that
693// every function is a kernel function, but in the future we should use
694// separate calling conventions for kernel and non-kernel functions.
695// Only kernel functions are supported, so we can assume for now
696SDValue R600TargetLowering::LowerFormalArguments(
697 SDValue Chain,
698 CallingConv::ID CallConv,
699 bool isVarArg,
700 const SmallVectorImpl<ISD::InputArg> &Ins,
701 DebugLoc DL, SelectionDAG &DAG,
702 SmallVectorImpl<SDValue> &InVals) const
703{
704 unsigned ParamOffsetBytes = 36;
705 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
706 EVT VT = Ins[i].VT;
707 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
708 AMDGPUAS::PARAM_I_ADDRESS);
709 SDValue Arg = DAG.getLoad(VT, DL, DAG.getRoot(),
710 DAG.getConstant(ParamOffsetBytes, MVT::i32),
711 MachinePointerInfo(new Argument(PtrTy)),
712 false, false, false, 4);
713 InVals.push_back(Arg);
714 ParamOffsetBytes += (VT.getStoreSize());
715 }
716 return Chain;
717}
718
719//===----------------------------------------------------------------------===//
720// Custom DAG Optimizations
721//===----------------------------------------------------------------------===//
722
723SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
724 DAGCombinerInfo &DCI) const
725{
726 SelectionDAG &DAG = DCI.DAG;
727
728 switch (N->getOpcode()) {
729 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
730 case ISD::FP_ROUND: {
731 SDValue Arg = N->getOperand(0);
732 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
733 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
734 Arg.getOperand(0));
735 }
736 break;
737 }
738 }
739 return SDValue();
740}
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h
deleted file mode 100644
index 7df2dd13787..00000000000
--- a/src/gallium/drivers/radeon/R600ISelLowering.h
+++ /dev/null
@@ -1,69 +0,0 @@
1//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600 DAG Lowering interface definition
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef R600ISELLOWERING_H
15#define R600ISELLOWERING_H
16
17#include "AMDGPUISelLowering.h"
18
19namespace llvm {
20
21class R600InstrInfo;
22
23class R600TargetLowering : public AMDGPUTargetLowering
24{
25public:
26 R600TargetLowering(TargetMachine &TM);
27 virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
28 MachineBasicBlock * BB) const;
29 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
30 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
31 void ReplaceNodeResults(SDNode * N,
32 SmallVectorImpl<SDValue> &Results,
33 SelectionDAG &DAG) const;
34 virtual SDValue LowerFormalArguments(
35 SDValue Chain,
36 CallingConv::ID CallConv,
37 bool isVarArg,
38 const SmallVectorImpl<ISD::InputArg> &Ins,
39 DebugLoc DL, SelectionDAG &DAG,
40 SmallVectorImpl<SDValue> &InVals) const;
41private:
42 const R600InstrInfo * TII;
43
44 /// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters
45 /// that are stored in the first nine dwords of a Vertex Buffer. These
46 /// implicit parameters are lowered to load instructions which retreive the
47 /// values from the Vertex Buffer.
48 SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
49 DebugLoc DL, unsigned DwordOffset) const;
50
51 void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
52 MachineRegisterInfo & MRI, unsigned dword_offset) const;
53
54 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
55
56 /// LowerROTL - Lower ROTL opcode to BITALIGN
57 SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
58
59 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
60 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
61 SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
62 SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
63
64 bool isZero(SDValue Op) const;
65};
66
67} // End namespace llvm;
68
69#endif // R600ISELLOWERING_H
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp
deleted file mode 100644
index e990dd9370b..00000000000
--- a/src/gallium/drivers/radeon/R600InstrInfo.cpp
+++ /dev/null
@@ -1,512 +0,0 @@
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600 Implementation of TargetInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600InstrInfo.h"
15#include "AMDGPUTargetMachine.h"
16#include "AMDGPUSubtarget.h"
17#include "R600Defines.h"
18#include "R600RegisterInfo.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "AMDILUtilityFunctions.h"
21
22#define GET_INSTRINFO_CTOR
23#include "AMDGPUGenDFAPacketizer.inc"
24
25using namespace llvm;
26
27R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
28 : AMDGPUInstrInfo(tm),
29 RI(tm, *this),
30 TM(tm)
31 { }
32
33const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
34{
35 return RI;
36}
37
38bool R600InstrInfo::isTrig(const MachineInstr &MI) const
39{
40 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
41}
42
43bool R600InstrInfo::isVector(const MachineInstr &MI) const
44{
45 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
46}
47
48void
49R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
50 MachineBasicBlock::iterator MI, DebugLoc DL,
51 unsigned DestReg, unsigned SrcReg,
52 bool KillSrc) const
53{
54 if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
55 && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
56 for (unsigned I = 0; I < 4; I++) {
57 unsigned SubRegIndex = RI.getSubRegFromChannel(I);
58 BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
59 .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
60 .addReg(RI.getSubReg(SrcReg, SubRegIndex))
61 .addImm(0) // Flag
62 .addReg(0) // PREDICATE_BIT
63 .addReg(DestReg, RegState::Define | RegState::Implicit);
64 }
65 } else {
66
67 /* We can't copy vec4 registers */
68 assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
69 && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
70
71 BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
72 .addReg(SrcReg, getKillRegState(KillSrc))
73 .addImm(0) // Flag
74 .addReg(0); // PREDICATE_BIT
75 }
76}
77
78MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
79 unsigned DstReg, int64_t Imm) const
80{
81 MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
82 MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
83 MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
84 MachineInstrBuilder(MI).addImm(Imm);
85 MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
86
87 return MI;
88}
89
90unsigned R600InstrInfo::getIEQOpcode() const
91{
92 return AMDGPU::SETE_INT;
93}
94
95bool R600InstrInfo::isMov(unsigned Opcode) const
96{
97
98
99 switch(Opcode) {
100 default: return false;
101 case AMDGPU::MOV:
102 case AMDGPU::MOV_IMM_F32:
103 case AMDGPU::MOV_IMM_I32:
104 return true;
105 }
106}
107
108// Some instructions act as place holders to emulate operations that the GPU
109// hardware does automatically. This function can be used to check if
110// an opcode falls into this category.
111bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const
112{
113 switch (Opcode) {
114 default: return false;
115 case AMDGPU::RETURN:
116 case AMDGPU::MASK_WRITE:
117 case AMDGPU::RESERVE_REG:
118 return true;
119 }
120}
121
122bool R600InstrInfo::isReductionOp(unsigned Opcode) const
123{
124 switch(Opcode) {
125 default: return false;
126 case AMDGPU::DOT4_r600:
127 case AMDGPU::DOT4_eg:
128 return true;
129 }
130}
131
132bool R600InstrInfo::isCubeOp(unsigned Opcode) const
133{
134 switch(Opcode) {
135 default: return false;
136 case AMDGPU::CUBE_r600_pseudo:
137 case AMDGPU::CUBE_r600_real:
138 case AMDGPU::CUBE_eg_pseudo:
139 case AMDGPU::CUBE_eg_real:
140 return true;
141 }
142}
143
144DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
145 const ScheduleDAG *DAG) const
146{
147 const InstrItineraryData *II = TM->getInstrItineraryData();
148 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
149}
150
151static bool
152isPredicateSetter(unsigned Opcode)
153{
154 switch (Opcode) {
155 case AMDGPU::PRED_X:
156 return true;
157 default:
158 return false;
159 }
160}
161
162static MachineInstr *
163findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
164 MachineBasicBlock::iterator I)
165{
166 while (I != MBB.begin()) {
167 --I;
168 MachineInstr *MI = I;
169 if (isPredicateSetter(MI->getOpcode()))
170 return MI;
171 }
172
173 return NULL;
174}
175
176bool
177R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
178 MachineBasicBlock *&TBB,
179 MachineBasicBlock *&FBB,
180 SmallVectorImpl<MachineOperand> &Cond,
181 bool AllowModify) const
182{
183 // Most of the following comes from the ARM implementation of AnalyzeBranch
184
185 // If the block has no terminators, it just falls into the block after it.
186 MachineBasicBlock::iterator I = MBB.end();
187 if (I == MBB.begin())
188 return false;
189 --I;
190 while (I->isDebugValue()) {
191 if (I == MBB.begin())
192 return false;
193 --I;
194 }
195 if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
196 return false;
197 }
198
199 // Get the last instruction in the block.
200 MachineInstr *LastInst = I;
201
202 // If there is only one terminator instruction, process it.
203 unsigned LastOpc = LastInst->getOpcode();
204 if (I == MBB.begin() ||
205 static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
206 if (LastOpc == AMDGPU::JUMP) {
207 if(!isPredicated(LastInst)) {
208 TBB = LastInst->getOperand(0).getMBB();
209 return false;
210 } else {
211 MachineInstr *predSet = I;
212 while (!isPredicateSetter(predSet->getOpcode())) {
213 predSet = --I;
214 }
215 TBB = LastInst->getOperand(0).getMBB();
216 Cond.push_back(predSet->getOperand(1));
217 Cond.push_back(predSet->getOperand(2));
218 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
219 return false;
220 }
221 }
222 return true; // Can't handle indirect branch.
223 }
224
225 // Get the instruction before it if it is a terminator.
226 MachineInstr *SecondLastInst = I;
227 unsigned SecondLastOpc = SecondLastInst->getOpcode();
228
229 // If the block ends with a B and a Bcc, handle it.
230 if (SecondLastOpc == AMDGPU::JUMP &&
231 isPredicated(SecondLastInst) &&
232 LastOpc == AMDGPU::JUMP &&
233 !isPredicated(LastInst)) {
234 MachineInstr *predSet = --I;
235 while (!isPredicateSetter(predSet->getOpcode())) {
236 predSet = --I;
237 }
238 TBB = SecondLastInst->getOperand(0).getMBB();
239 FBB = LastInst->getOperand(0).getMBB();
240 Cond.push_back(predSet->getOperand(1));
241 Cond.push_back(predSet->getOperand(2));
242 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
243 return false;
244 }
245
246 // Otherwise, can't handle this.
247 return true;
248}
249
250int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
251 const MachineInstr *MI = op.getParent();
252
253 switch (MI->getDesc().OpInfo->RegClass) {
254 default: // FIXME: fallthrough??
255 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
256 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
257 };
258}
259
260unsigned
261R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
262 MachineBasicBlock *TBB,
263 MachineBasicBlock *FBB,
264 const SmallVectorImpl<MachineOperand> &Cond,
265 DebugLoc DL) const
266{
267 assert(TBB && "InsertBranch must not be told to insert a fallthrough");
268
269 if (FBB == 0) {
270 if (Cond.empty()) {
271 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
272 return 1;
273 } else {
274 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
275 assert(PredSet && "No previous predicate !");
276 addFlag(PredSet, 1, MO_FLAG_PUSH);
277 PredSet->getOperand(2).setImm(Cond[1].getImm());
278
279 BuildMI(&MBB, DL, get(AMDGPU::JUMP))
280 .addMBB(TBB)
281 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
282 return 1;
283 }
284 } else {
285 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
286 assert(PredSet && "No previous predicate !");
287 addFlag(PredSet, 1, MO_FLAG_PUSH);
288 PredSet->getOperand(2).setImm(Cond[1].getImm());
289 BuildMI(&MBB, DL, get(AMDGPU::JUMP))
290 .addMBB(TBB)
291 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
292 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
293 return 2;
294 }
295}
296
297unsigned
298R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
299{
300
301 // Note : we leave PRED* instructions there.
302 // They may be needed when predicating instructions.
303
304 MachineBasicBlock::iterator I = MBB.end();
305
306 if (I == MBB.begin()) {
307 return 0;
308 }
309 --I;
310 switch (I->getOpcode()) {
311 default:
312 return 0;
313 case AMDGPU::JUMP:
314 if (isPredicated(I)) {
315 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
316 clearFlag(predSet, 1, MO_FLAG_PUSH);
317 }
318 I->eraseFromParent();
319 break;
320 }
321 I = MBB.end();
322
323 if (I == MBB.begin()) {
324 return 1;
325 }
326 --I;
327 switch (I->getOpcode()) {
328 // FIXME: only one case??
329 default:
330 return 1;
331 case AMDGPU::JUMP:
332 if (isPredicated(I)) {
333 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
334 clearFlag(predSet, 1, MO_FLAG_PUSH);
335 }
336 I->eraseFromParent();
337 break;
338 }
339 return 2;
340}
341
342bool
343R600InstrInfo::isPredicated(const MachineInstr *MI) const
344{
345 int idx = MI->findFirstPredOperandIdx();
346 if (idx < 0)
347 return false;
348
349 unsigned Reg = MI->getOperand(idx).getReg();
350 switch (Reg) {
351 default: return false;
352 case AMDGPU::PRED_SEL_ONE:
353 case AMDGPU::PRED_SEL_ZERO:
354 case AMDGPU::PREDICATE_BIT:
355 return true;
356 }
357}
358
359bool
360R600InstrInfo::isPredicable(MachineInstr *MI) const
361{
362 return AMDGPUInstrInfo::isPredicable(MI);
363}
364
365
366bool
367R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
368 unsigned NumCyles,
369 unsigned ExtraPredCycles,
370 const BranchProbability &Probability) const{
371 return true;
372}
373
374bool
375R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
376 unsigned NumTCycles,
377 unsigned ExtraTCycles,
378 MachineBasicBlock &FMBB,
379 unsigned NumFCycles,
380 unsigned ExtraFCycles,
381 const BranchProbability &Probability) const
382{
383 return true;
384}
385
386bool
387R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
388 unsigned NumCyles,
389 const BranchProbability &Probability)
390 const
391{
392 return true;
393}
394
395bool
396R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
397 MachineBasicBlock &FMBB) const
398{
399 return false;
400}
401
402
403bool
404R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
405{
406 MachineOperand &MO = Cond[1];
407 switch (MO.getImm()) {
408 case OPCODE_IS_ZERO_INT:
409 MO.setImm(OPCODE_IS_NOT_ZERO_INT);
410 break;
411 case OPCODE_IS_NOT_ZERO_INT:
412 MO.setImm(OPCODE_IS_ZERO_INT);
413 break;
414 case OPCODE_IS_ZERO:
415 MO.setImm(OPCODE_IS_NOT_ZERO);
416 break;
417 case OPCODE_IS_NOT_ZERO:
418 MO.setImm(OPCODE_IS_ZERO);
419 break;
420 default:
421 return true;
422 }
423
424 MachineOperand &MO2 = Cond[2];
425 switch (MO2.getReg()) {
426 case AMDGPU::PRED_SEL_ZERO:
427 MO2.setReg(AMDGPU::PRED_SEL_ONE);
428 break;
429 case AMDGPU::PRED_SEL_ONE:
430 MO2.setReg(AMDGPU::PRED_SEL_ZERO);
431 break;
432 default:
433 return true;
434 }
435 return false;
436}
437
438bool
439R600InstrInfo::DefinesPredicate(MachineInstr *MI,
440 std::vector<MachineOperand> &Pred) const
441{
442 return isPredicateSetter(MI->getOpcode());
443}
444
445
446bool
447R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
448 const SmallVectorImpl<MachineOperand> &Pred2) const
449{
450 return false;
451}
452
453
454bool
455R600InstrInfo::PredicateInstruction(MachineInstr *MI,
456 const SmallVectorImpl<MachineOperand> &Pred) const
457{
458 int PIdx = MI->findFirstPredOperandIdx();
459
460 if (PIdx != -1) {
461 MachineOperand &PMO = MI->getOperand(PIdx);
462 PMO.setReg(Pred[2].getReg());
463 MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
464 return true;
465 }
466
467 return false;
468}
469
470int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
471 const MachineInstr *MI,
472 unsigned *PredCost) const
473{
474 if (PredCost)
475 *PredCost = 2;
476 return 2;
477}
478
479//===----------------------------------------------------------------------===//
480// Instruction flag getters/setters
481//===----------------------------------------------------------------------===//
482
483bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const
484{
485 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
486}
487
488MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const
489{
490 unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags);
491 assert(FlagIndex != 0 &&
492 "Instruction flags not supported for this instruction");
493 MachineOperand &FlagOp = MI->getOperand(FlagIndex);
494 assert(FlagOp.isImm());
495 return FlagOp;
496}
497
498void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
499 unsigned Flag) const
500{
501 MachineOperand &FlagOp = getFlagOp(MI);
502 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
503}
504
505void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
506 unsigned Flag) const
507{
508 MachineOperand &FlagOp = getFlagOp(MI);
509 unsigned InstFlags = FlagOp.getImm();
510 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
511 FlagOp.setImm(InstFlags);
512}
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h
deleted file mode 100644
index de82542fa2c..00000000000
--- a/src/gallium/drivers/radeon/R600InstrInfo.h
+++ /dev/null
@@ -1,132 +0,0 @@
1//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Interface definition for R600InstrInfo
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef R600INSTRUCTIONINFO_H_
15#define R600INSTRUCTIONINFO_H_
16
17#include "AMDIL.h"
18#include "AMDGPUInstrInfo.h"
19#include "R600RegisterInfo.h"
20
21#include <map>
22
23namespace llvm {
24
25 class AMDGPUTargetMachine;
26 class DFAPacketizer;
27 class ScheduleDAG;
28 class MachineFunction;
29 class MachineInstr;
30 class MachineInstrBuilder;
31
32 class R600InstrInfo : public AMDGPUInstrInfo {
33 private:
34 const R600RegisterInfo RI;
35 AMDGPUTargetMachine &TM;
36
37 int getBranchInstr(const MachineOperand &op) const;
38
39 public:
40 explicit R600InstrInfo(AMDGPUTargetMachine &tm);
41
42 const R600RegisterInfo &getRegisterInfo() const;
43 virtual void copyPhysReg(MachineBasicBlock &MBB,
44 MachineBasicBlock::iterator MI, DebugLoc DL,
45 unsigned DestReg, unsigned SrcReg,
46 bool KillSrc) const;
47
48 bool isTrig(const MachineInstr &MI) const;
49 bool isPlaceHolderOpcode(unsigned opcode) const;
50 bool isReductionOp(unsigned opcode) const;
51 bool isCubeOp(unsigned opcode) const;
52
53 /// isVector - Vector instructions are instructions that must fill all
54 /// instruction slots within an instruction group.
55 bool isVector(const MachineInstr &MI) const;
56
57 virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
58 int64_t Imm) const;
59
60 virtual unsigned getIEQOpcode() const;
61 virtual bool isMov(unsigned Opcode) const;
62
63 DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
64 const ScheduleDAG *DAG) const;
65
66 bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
67
68 bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
69 SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
70
71 unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
72
73 unsigned RemoveBranch(MachineBasicBlock &MBB) const;
74
75 bool isPredicated(const MachineInstr *MI) const;
76
77 bool isPredicable(MachineInstr *MI) const;
78
79 bool
80 isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
81 const BranchProbability &Probability) const;
82
83 bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
84 unsigned ExtraPredCycles,
85 const BranchProbability &Probability) const ;
86
87 bool
88 isProfitableToIfCvt(MachineBasicBlock &TMBB,
89 unsigned NumTCycles, unsigned ExtraTCycles,
90 MachineBasicBlock &FMBB,
91 unsigned NumFCycles, unsigned ExtraFCycles,
92 const BranchProbability &Probability) const;
93
94 bool DefinesPredicate(MachineInstr *MI,
95 std::vector<MachineOperand> &Pred) const;
96
97 bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
98 const SmallVectorImpl<MachineOperand> &Pred2) const;
99
100 bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
101 MachineBasicBlock &FMBB) const;
102
103 bool PredicateInstruction(MachineInstr *MI,
104 const SmallVectorImpl<MachineOperand> &Pred) const;
105
106 int getInstrLatency(const InstrItineraryData *ItinData,
107 const MachineInstr *MI,
108 unsigned *PredCost = 0) const;
109
110 virtual int getInstrLatency(const InstrItineraryData *ItinData,
111 SDNode *Node) const { return 1;}
112
113 ///hasFlagOperand - Returns true if this instruction has an operand for
114 /// storing target flags.
115 bool hasFlagOperand(const MachineInstr &MI) const;
116
117 ///addFlag - Add one of the MO_FLAG* flags to the specified Operand.
118 void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
119
120 ///isFlagSet - Determine if the specified flag is set on this Operand.
121 bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
122
123 ///getFlagOp - Return the operand containing the flags for this instruction.
124 MachineOperand &getFlagOp(MachineInstr *MI) const;
125
126 ///clearFlag - Clear the specified flag on the instruction.
127 void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
128};
129
130} // End llvm namespace
131
132#endif // R600INSTRINFO_H_
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
deleted file mode 100644
index 120a71c5b9e..00000000000
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ /dev/null
@@ -1,1458 +0,0 @@
1//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600 Tablegen instruction definitions
11//
12//===----------------------------------------------------------------------===//
13
14include "R600Intrinsics.td"
15
16class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
17 InstrItinClass itin>
18 : AMDGPUInst <outs, ins, asm, pattern> {
19
20 field bits<64> Inst;
21 bit Trig = 0;
22 bit Op3 = 0;
23 bit isVector = 0;
24 bits<2> FlagOperandIdx = 0;
25
26 bits<11> op_code = inst;
27 //let Inst = inst;
28 let Namespace = "AMDGPU";
29 let OutOperandList = outs;
30 let InOperandList = ins;
31 let AsmString = asm;
32 let Pattern = pattern;
33 let Itinerary = itin;
34
35 let TSFlags{4} = Trig;
36 let TSFlags{5} = Op3;
37
38 // Vector instructions are instructions that must fill all slots in an
39 // instruction group
40 let TSFlags{6} = isVector;
41 let TSFlags{8-7} = FlagOperandIdx;
42}
43
44class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
45 AMDGPUInst <outs, ins, asm, pattern>
46{
47 field bits<64> Inst;
48
49 let Namespace = "AMDGPU";
50}
51
52def MEMxi : Operand<iPTR> {
53 let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
54}
55
56def MEMrr : Operand<iPTR> {
57 let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
58}
59
60def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
61def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
62def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
63
64class R600_ALU {
65
66 bits<7> DST_GPR = 0;
67 bits<9> SRC0_SEL = 0;
68 bits<1> SRC0_NEG = 0;
69 bits<9> SRC1_SEL = 0;
70 bits<1> SRC1_NEG = 0;
71 bits<1> CLAMP = 0;
72
73}
74
75def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
76 (ops PRED_SEL_OFF)>;
77
78
79class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
80 InstrItinClass itin = AnyALU> :
81 InstR600 <inst,
82 (outs R600_Reg32:$dst),
83 (ins R600_Reg32:$src, R600_Pred:$p, variable_ops),
84 !strconcat(opName, " $dst, $src ($p)"),
85 pattern,
86 itin>{
87 bits<7> dst;
88 bits<9> src;
89 let Inst{8-0} = src;
90 let Inst{49-39} = inst;
91 let Inst{59-53} = dst;
92 }
93
94class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
95 InstrItinClass itin = AnyALU> :
96 InstR600 <inst,
97 (outs R600_Reg32:$dst),
98 (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops),
99 !strconcat(opName, " $dst, $src0, $src1"),
100 pattern,
101 itin>{
102 bits<7> dst;
103 bits<9> src0;
104 bits<9> src1;
105 let Inst{8-0} = src0;
106 let Inst{21-13} = src1;
107 let Inst{49-39} = inst;
108 let Inst{59-53} = dst;
109 }
110
111class R600_3OP <bits<11> inst, string opName, list<dag> pattern,
112 InstrItinClass itin = AnyALU> :
113 InstR600 <inst,
114 (outs R600_Reg32:$dst),
115 (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2,R600_Pred:$p, variable_ops),
116 !strconcat(opName, " $dst, $src0, $src1, $src2"),
117 pattern,
118 itin>{
119 bits<7> dst;
120 bits<9> src0;
121 bits<9> src1;
122 bits<9> src2;
123 let Inst{8-0} = src0;
124 let Inst{21-13} = src1;
125 let Inst{40-32} = src2;
126 let Inst{49-45} = inst{4-0};
127 let Inst{59-53} = dst;
128 let Op3 = 1;
129 }
130
131
132
133def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst),
134 (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
135 "PRED $dst, $src0, $src1",
136 [], NullALU>
137{
138 bits<7> dst;
139 bits<9> src0;
140 bits<11> src1;
141 let Inst{8-0} = src0;
142 let Inst{49-39} = src1;
143 let Inst{59-53} = dst;
144 let FlagOperandIdx = 3;
145}
146
147let isTerminator = 1, isBranch = 1, isPseudo = 1 in {
148def JUMP : InstR600 <0x10,
149 (outs),
150 (ins brtarget:$target, R600_Pred:$p),
151 "JUMP $target ($p)",
152 [], AnyALU
153 >;
154}
155
156class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
157 InstrItinClass itin = VecALU> :
158 InstR600 <inst,
159 (outs R600_Reg32:$dst),
160 ins,
161 asm,
162 pattern,
163 itin>{
164 bits<7> dst;
165 let Inst{49-39} = inst;
166 let Inst{59-53} = dst;
167 }
168
169class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
170 InstrItinClass itin = AnyALU> :
171 InstR600 <inst,
172 (outs R600_Reg128:$dst),
173 (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2),
174 !strconcat(opName, "$dst, $src0, $src1, $src2"),
175 pattern,
176 itin>{
177 let Inst {10-0} = inst;
178 }
179
180def TEX_SHADOW : PatLeaf<
181 (imm),
182 [{uint32_t TType = (uint32_t)N->getZExtValue();
183 return (TType >= 6 && TType <= 8) || TType == 11 || TType == 12;
184 }]
185>;
186
187class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
188 dag ins, string asm, list<dag> pattern> :
189 InstR600ISA <outs, ins, asm, pattern>
190{
191 bits<7> RW_GPR;
192 bits<7> INDEX_GPR;
193
194 bits<2> RIM;
195 bits<2> TYPE;
196 bits<1> RW_REL;
197 bits<2> ELEM_SIZE;
198
199 bits<12> ARRAY_SIZE;
200 bits<4> COMP_MASK;
201 bits<4> BURST_COUNT;
202 bits<1> VPM;
203 bits<1> eop;
204 bits<1> MARK;
205 bits<1> BARRIER;
206
207 // CF_ALLOC_EXPORT_WORD0_RAT
208 let Inst{3-0} = rat_id;
209 let Inst{9-4} = rat_inst;
210 let Inst{10} = 0; // Reserved
211 let Inst{12-11} = RIM;
212 let Inst{14-13} = TYPE;
213 let Inst{21-15} = RW_GPR;
214 let Inst{22} = RW_REL;
215 let Inst{29-23} = INDEX_GPR;
216 let Inst{31-30} = ELEM_SIZE;
217
218 // CF_ALLOC_EXPORT_WORD1_BUF
219 let Inst{43-32} = ARRAY_SIZE;
220 let Inst{47-44} = COMP_MASK;
221 let Inst{51-48} = BURST_COUNT;
222 let Inst{52} = VPM;
223 let Inst{53} = eop;
224 let Inst{61-54} = cf_inst;
225 let Inst{62} = MARK;
226 let Inst{63} = BARRIER;
227}
228
229def load_param : PatFrag<(ops node:$ptr),
230 (load node:$ptr),
231 [{
232 const Value *Src = cast<LoadSDNode>(N)->getSrcValue();
233 if (Src) {
234 PointerType * PT = dyn_cast<PointerType>(Src->getType());
235 return PT && PT->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS;
236 }
237 return false;
238 }]>;
239
240def isR600 : Predicate<"Subtarget.device()"
241 "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
242def isR700 : Predicate<"Subtarget.device()"
243 "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
244 "Subtarget.device()->getDeviceFlag()"
245 ">= OCL_DEVICE_RV710">;
246def isEG : Predicate<
247 "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
248 "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && "
249 "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
250
251def isCayman : Predicate<"Subtarget.device()"
252 "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
253def isEGorCayman : Predicate<"Subtarget.device()"
254 "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
255 "|| Subtarget.device()->getGeneration() =="
256 "AMDGPUDeviceInfo::HD6XXX">;
257
258def isR600toCayman : Predicate<
259 "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
260
261//===----------------------------------------------------------------------===//
262// Interpolation Instructions
263//===----------------------------------------------------------------------===//
264
265def INTERP: SDNode<"AMDGPUISD::INTERP",
266 SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
267 >;
268
269def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
270 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
271 >;
272
273let usesCustomInserter = 1 in {
274def input_perspective : AMDGPUShaderInst <
275 (outs R600_Reg128:$dst),
276 (ins i32imm:$src0, i32imm:$src1),
277 "input_perspective $src0 $src1 : dst",
278 [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
279} // End usesCustomInserter = 1
280
281def input_constant : AMDGPUShaderInst <
282 (outs R600_Reg128:$dst),
283 (ins i32imm:$src),
284 "input_perspective $src : dst",
285 [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
286
287
288
289def INTERP_XY : InstR600 <0xD6,
290 (outs R600_Reg32:$dst),
291 (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
292 "INTERP_XY dst",
293 [], AnyALU>
294{
295 let FlagOperandIdx = 3;
296}
297
298def INTERP_ZW : InstR600 <0xD7,
299 (outs R600_Reg32:$dst),
300 (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
301 "INTERP_ZW dst",
302 [], AnyALU>
303{
304 let FlagOperandIdx = 3;
305}
306
307def INTERP_LOAD_P0 : InstR600 <0xE0,
308 (outs R600_Reg32:$dst),
309 (ins R600_Reg32:$src, i32imm:$flags),
310 "INTERP_LOAD_P0 dst",
311 [], AnyALU>
312{
313 let FlagOperandIdx = 2;
314}
315
316let Predicates = [isR600toCayman] in {
317
318//===----------------------------------------------------------------------===//
319// Common Instructions R600, R700, Evergreen, Cayman
320//===----------------------------------------------------------------------===//
321
322def ADD : R600_2OP <
323 0x0, "ADD",
324 [(set R600_Reg32:$dst, (fadd R600_Reg32:$src0, R600_Reg32:$src1))]
325>;
326
327// Non-IEEE MUL: 0 * anything = 0
328def MUL : R600_2OP <
329 0x1, "MUL NON-IEEE",
330 [(set R600_Reg32:$dst, (int_AMDGPU_mul R600_Reg32:$src0, R600_Reg32:$src1))]
331>;
332
333def MUL_IEEE : R600_2OP <
334 0x2, "MUL_IEEE",
335 [(set R600_Reg32:$dst, (fmul R600_Reg32:$src0, R600_Reg32:$src1))]
336>;
337
338def MAX : R600_2OP <
339 0x3, "MAX",
340 [(set R600_Reg32:$dst, (AMDGPUfmax R600_Reg32:$src0, R600_Reg32:$src1))]
341>;
342
343def MIN : R600_2OP <
344 0x4, "MIN",
345 [(set R600_Reg32:$dst, (AMDGPUfmin R600_Reg32:$src0, R600_Reg32:$src1))]
346>;
347
348// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
349// so some of the instruction names don't match the asm string.
350// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
351
352def SETE : R600_2OP <
353 0x08, "SETE",
354 [(set R600_Reg32:$dst,
355 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
356 COND_EQ))]
357>;
358
359def SGT : R600_2OP <
360 0x09, "SETGT",
361 [(set R600_Reg32:$dst,
362 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
363 COND_GT))]
364>;
365
366def SGE : R600_2OP <
367 0xA, "SETGE",
368 [(set R600_Reg32:$dst,
369 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
370 COND_GE))]
371>;
372
373def SNE : R600_2OP <
374 0xB, "SETNE",
375 [(set R600_Reg32:$dst,
376 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
377 COND_NE))]
378>;
379
380def FRACT : R600_1OP <
381 0x10, "FRACT",
382 [(set R600_Reg32:$dst, (AMDGPUfract R600_Reg32:$src))]
383>;
384
385def TRUNC : R600_1OP <
386 0x11, "TRUNC",
387 [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))]
388>;
389
390def CEIL : R600_1OP <
391 0x12, "CEIL",
392 [(set R600_Reg32:$dst, (fceil R600_Reg32:$src))]
393>;
394
395def RNDNE : R600_1OP <
396 0x13, "RNDNE",
397 [(set R600_Reg32:$dst, (frint R600_Reg32:$src))]
398>;
399
400def FLOOR : R600_1OP <
401 0x14, "FLOOR",
402 [(set R600_Reg32:$dst, (ffloor R600_Reg32:$src))]
403>;
404
405def MOV : InstR600 <0x19, (outs R600_Reg32:$dst),
406 (ins R600_Reg32:$src0, i32imm:$flags,
407 R600_Pred:$p),
408 "MOV $dst, $src0", [], AnyALU> {
409 let FlagOperandIdx = 2;
410 bits<7> dst;
411 bits<9> src0;
412 let Inst{8-0} = src0;
413 let Inst{49-39} = op_code;
414 let Inst{59-53} = dst;
415}
416
417class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19,
418 (outs R600_Reg32:$dst),
419 (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm),
420 "MOV_IMM $dst, $imm",
421 [], AnyALU
422>{
423 bits<7> dst;
424 bits<9> alu_literal;
425 bits<9> p;
426 let Inst{8-0} = alu_literal;
427 let Inst{21-13} = p;
428 let Inst{49-39} = op_code;
429 let Inst{59-53} = dst;
430}
431
432def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
433def : Pat <
434 (imm:$val),
435 (MOV_IMM_I32 (i32 ALU_LITERAL_X), imm:$val)
436>;
437
438def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
439def : Pat <
440 (fpimm:$val),
441 (MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val)
442>;
443
444def KILLGT : InstR600 <0x2D,
445 (outs R600_Reg32:$dst),
446 (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags, R600_Pred:$p,
447 variable_ops),
448 "KILLGT $dst, $src0, $src1, $flags ($p)",
449 [],
450 NullALU>{
451 let FlagOperandIdx = 3;
452 bits<7> dst;
453 bits<9> src0;
454 bits<9> src1;
455 let Inst{8-0} = src0;
456 let Inst{21-13} = src1;
457 let Inst{49-39} = op_code;
458 let Inst{59-53} = dst;
459}
460
461def AND_INT : R600_2OP <
462 0x30, "AND_INT",
463 [(set R600_Reg32:$dst, (and R600_Reg32:$src0, R600_Reg32:$src1))]
464>;
465
466def OR_INT : R600_2OP <
467 0x31, "OR_INT",
468 [(set R600_Reg32:$dst, (or R600_Reg32:$src0, R600_Reg32:$src1))]
469>;
470
471def XOR_INT : R600_2OP <
472 0x32, "XOR_INT",
473 [(set R600_Reg32:$dst, (xor R600_Reg32:$src0, R600_Reg32:$src1))]
474>;
475
476def NOT_INT : R600_1OP <
477 0x33, "NOT_INT",
478 [(set R600_Reg32:$dst, (not R600_Reg32:$src))]
479>;
480
481def ADD_INT : R600_2OP <
482 0x34, "ADD_INT",
483 [(set R600_Reg32:$dst, (add R600_Reg32:$src0, R600_Reg32:$src1))]
484>;
485
486def SUB_INT : R600_2OP <
487 0x35, "SUB_INT",
488 [(set R600_Reg32:$dst, (sub R600_Reg32:$src0, R600_Reg32:$src1))]
489>;
490
491def MAX_INT : R600_2OP <
492 0x36, "MAX_INT",
493 [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]>;
494
495def MIN_INT : R600_2OP <
496 0x37, "MIN_INT",
497 [(set R600_Reg32:$dst, (AMDGPUsmin R600_Reg32:$src0, R600_Reg32:$src1))]>;
498
499def MAX_UINT : R600_2OP <
500 0x38, "MAX_UINT",
501 [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]
502>;
503
504def MIN_UINT : R600_2OP <
505 0x39, "MIN_UINT",
506 [(set R600_Reg32:$dst, (AMDGPUumin R600_Reg32:$src0, R600_Reg32:$src1))]
507>;
508
509def SETE_INT : R600_2OP <
510 0x3A, "SETE_INT",
511 [(set (i32 R600_Reg32:$dst),
512 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
513>;
514
515def SETGT_INT : R600_2OP <
516 0x3B, "SGT_INT",
517 [(set (i32 R600_Reg32:$dst),
518 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
519>;
520
521def SETGE_INT : R600_2OP <
522 0x3C, "SETGE_INT",
523 [(set (i32 R600_Reg32:$dst),
524 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
525>;
526
527def SETNE_INT : R600_2OP <
528 0x3D, "SETNE_INT",
529 [(set (i32 R600_Reg32:$dst),
530 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
531>;
532
533def SETGT_UINT : R600_2OP <
534 0x3E, "SETGT_UINT",
535 [(set (i32 R600_Reg32:$dst),
536 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
537>;
538
539def SETGE_UINT : R600_2OP <
540 0x3F, "SETGE_UINT",
541 [(set (i32 R600_Reg32:$dst),
542 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
543>;
544
545def CNDE_INT : R600_3OP <
546 0x1C, "CNDE_INT",
547 [(set (i32 R600_Reg32:$dst),
548 (selectcc (i32 R600_Reg32:$src0), 0,
549 (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
550 COND_EQ))]
551>;
552
553def CNDGE_INT : R600_3OP <
554 0x1E, "CNDGE_INT",
555 [(set (i32 R600_Reg32:$dst),
556 (selectcc (i32 R600_Reg32:$src0), 0,
557 (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
558 COND_GE))]
559>;
560
561def CNDGT_INT : R600_3OP <
562 0x1D, "CNDGT_INT",
563 [(set (i32 R600_Reg32:$dst),
564 (selectcc (i32 R600_Reg32:$src0), 0,
565 (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
566 COND_GT))]
567>;
568
569//===----------------------------------------------------------------------===//
570// Texture instructions
571//===----------------------------------------------------------------------===//
572
573def TEX_LD : R600_TEX <
574 0x03, "TEX_LD",
575 [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$src4, imm:$src5))]
576> {
577let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $src4, $src5";
578let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5);
579}
580
581def TEX_GET_TEXTURE_RESINFO : R600_TEX <
582 0x04, "TEX_GET_TEXTURE_RESINFO",
583 [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))]
584>;
585
586def TEX_GET_GRADIENTS_H : R600_TEX <
587 0x07, "TEX_GET_GRADIENTS_H",
588 [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))]
589>;
590
591def TEX_GET_GRADIENTS_V : R600_TEX <
592 0x08, "TEX_GET_GRADIENTS_V",
593 [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))]
594>;
595
596def TEX_SET_GRADIENTS_H : R600_TEX <
597 0x0B, "TEX_SET_GRADIENTS_H",
598 []
599>;
600
601def TEX_SET_GRADIENTS_V : R600_TEX <
602 0x0C, "TEX_SET_GRADIENTS_V",
603 []
604>;
605
606def TEX_SAMPLE : R600_TEX <
607 0x10, "TEX_SAMPLE",
608 [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))]
609>;
610
611def TEX_SAMPLE_C : R600_TEX <
612 0x18, "TEX_SAMPLE_C",
613 [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
614>;
615
616def TEX_SAMPLE_L : R600_TEX <
617 0x11, "TEX_SAMPLE_L",
618 [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, imm:$src2))]
619>;
620
621def TEX_SAMPLE_C_L : R600_TEX <
622 0x19, "TEX_SAMPLE_C_L",
623 [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
624>;
625
626def TEX_SAMPLE_LB : R600_TEX <
627 0x12, "TEX_SAMPLE_LB",
628 [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, imm:$src2))]
629>;
630
631def TEX_SAMPLE_C_LB : R600_TEX <
632 0x1A, "TEX_SAMPLE_C_LB",
633 [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
634>;
635
636def TEX_SAMPLE_G : R600_TEX <
637 0x14, "TEX_SAMPLE_G",
638 []
639>;
640
641def TEX_SAMPLE_C_G : R600_TEX <
642 0x1C, "TEX_SAMPLE_C_G",
643 []
644>;
645
646//===----------------------------------------------------------------------===//
647// Helper classes for common instructions
648//===----------------------------------------------------------------------===//
649
650class MUL_LIT_Common <bits<11> inst> : R600_3OP <
651 inst, "MUL_LIT",
652 []
653>;
654
655class MULADD_Common <bits<11> inst> : R600_3OP <
656 inst, "MULADD",
657 [(set (f32 R600_Reg32:$dst),
658 (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
659>;
660
661class CNDE_Common <bits<11> inst> : R600_3OP <
662 inst, "CNDE",
663 [(set R600_Reg32:$dst,
664 (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
665 (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
666 COND_EQ))]
667>;
668
669class CNDGT_Common <bits<11> inst> : R600_3OP <
670 inst, "CNDGT",
671 [(set R600_Reg32:$dst,
672 (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
673 (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
674 COND_GT))]
675>;
676
677class CNDGE_Common <bits<11> inst> : R600_3OP <
678 inst, "CNDGE",
679 [(set R600_Reg32:$dst,
680 (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
681 (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
682 COND_GE))]
683>;
684
685class DOT4_Common <bits<11> inst> : R600_REDUCTION <
686 inst,
687 (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags),
688 "DOT4 $dst $src0, $src1",
689 []
690 > {
691 bits<9> src0;
692 bits<9> src1;
693 let Inst{8-0} = src0;
694 let Inst{21-13} = src1;
695 let FlagOperandIdx = 3;
696}
697
698class DOT4_Pat <Instruction dot4> : Pat <
699 (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1),
700 (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0)
701>;
702
703multiclass CUBE_Common <bits<11> inst> {
704
705 def _pseudo : InstR600 <
706 inst,
707 (outs R600_Reg128:$dst),
708 (ins R600_Reg128:$src),
709 "CUBE $dst $src",
710 [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
711 VecALU
712 >;
713
714 def _real : InstR600 <
715 inst,
716 (outs R600_Reg32:$dst),
717 (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
718 "CUBE $dst, $src0, $src1",
719 [], VecALU
720 >{
721 let FlagOperandIdx = 3;
722 bits<7> dst;
723 bits<9> src0;
724 bits<9> src1;
725 let Inst{8-0} = src0;
726 let Inst{21-13} = src1;
727 let Inst{49-39} = inst;
728 let Inst{59-53} = dst;
729 }
730}
731
732class EXP_IEEE_Common <bits<11> inst> : R600_1OP <
733 inst, "EXP_IEEE",
734 [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))]
735>;
736
737class FLT_TO_INT_Common <bits<11> inst> : R600_1OP <
738 inst, "FLT_TO_INT",
739 [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))]
740>;
741
742class INT_TO_FLT_Common <bits<11> inst> : R600_1OP <
743 inst, "INT_TO_FLT",
744 [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))]
745>;
746
747class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP <
748 inst, "FLT_TO_UINT",
749 [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))]
750>;
751
752class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP <
753 inst, "UINT_TO_FLT",
754 [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))]
755>;
756
757class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
758 inst, "LOG_CLAMPED",
759 []
760>;
761
762class LOG_IEEE_Common <bits<11> inst> : R600_1OP <
763 inst, "LOG_IEEE",
764 [(set R600_Reg32:$dst, (flog2 R600_Reg32:$src))]
765>;
766
767class LSHL_Common <bits<11> inst> : R600_2OP <
768 inst, "LSHL $dst, $src0, $src1",
769 [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))]
770>;
771
772class LSHR_Common <bits<11> inst> : R600_2OP <
773 inst, "LSHR $dst, $src0, $src1",
774 [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))]
775>;
776
777class ASHR_Common <bits<11> inst> : R600_2OP <
778 inst, "ASHR $dst, $src0, $src1",
779 [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))]
780>;
781
782class MULHI_INT_Common <bits<11> inst> : R600_2OP <
783 inst, "MULHI_INT $dst, $src0, $src1",
784 [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))]
785>;
786
787class MULHI_UINT_Common <bits<11> inst> : R600_2OP <
788 inst, "MULHI $dst, $src0, $src1",
789 [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))]
790>;
791
792class MULLO_INT_Common <bits<11> inst> : R600_2OP <
793 inst, "MULLO_INT $dst, $src0, $src1",
794 [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))]
795>;
796
797class MULLO_UINT_Common <bits<11> inst> : R600_2OP <
798 inst, "MULLO_UINT $dst, $src0, $src1",
799 []
800>;
801
802class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
803 inst, "RECIP_CLAMPED",
804 []
805>;
806
807class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
808 inst, "RECIP_IEEE",
809 [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))]
810>;
811
812class RECIP_UINT_Common <bits<11> inst> : R600_1OP <
813 inst, "RECIP_INT $dst, $src",
814 [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))]
815>;
816
817class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP <
818 inst, "RECIPSQRT_CLAMPED",
819 [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))]
820>;
821
822class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
823 inst, "RECIPSQRT_IEEE",
824 []
825>;
826
827class SIN_Common <bits<11> inst> : R600_1OP <
828 inst, "SIN", []>{
829 let Trig = 1;
830}
831
832class COS_Common <bits<11> inst> : R600_1OP <
833 inst, "COS", []> {
834 let Trig = 1;
835}
836
837//===----------------------------------------------------------------------===//
838// Helper patterns for complex intrinsics
839//===----------------------------------------------------------------------===//
840
841multiclass DIV_Common <InstR600 recip_ieee> {
842def : Pat<
843 (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
844 (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
845>;
846
847def : Pat<
848 (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
849 (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
850>;
851}
852
853class SSG_Common <InstR600 cndgt, InstR600 cndge> : Pat <
854 (int_AMDGPU_ssg R600_Reg32:$src),
855 (cndgt R600_Reg32:$src, (f32 ONE), (cndge R600_Reg32:$src, (f32 ZERO), (f32 NEG_ONE)))
856>;
857
858class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
859 (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
860 (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
861>;
862
863//===----------------------------------------------------------------------===//
864// R600 / R700 Instructions
865//===----------------------------------------------------------------------===//
866
867let Predicates = [isR600] in {
868
869 def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
870 def MULADD_r600 : MULADD_Common<0x10>;
871 def CNDE_r600 : CNDE_Common<0x18>;
872 def CNDGT_r600 : CNDGT_Common<0x19>;
873 def CNDGE_r600 : CNDGE_Common<0x1A>;
874 def DOT4_r600 : DOT4_Common<0x50>;
875 def : DOT4_Pat <DOT4_r600>;
876 defm CUBE_r600 : CUBE_Common<0x52>;
877 def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
878 def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
879 def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
880 def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
881 def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
882 def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
883 def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
884 def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
885 def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
886 def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
887 def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
888 def SIN_r600 : SIN_Common<0x6E>;
889 def COS_r600 : COS_Common<0x6F>;
890 def ASHR_r600 : ASHR_Common<0x70>;
891 def LSHR_r600 : LSHR_Common<0x71>;
892 def LSHL_r600 : LSHL_Common<0x72>;
893 def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
894 def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
895 def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
896 def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
897 def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
898
899 defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
900 def POW_r600 : POW_Common<LOG_IEEE_r600, EXP_IEEE_r600, MUL, GPRF32>;
901 def SSG_r600 : SSG_Common<CNDGT_r600, CNDGE_r600>;
902 def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
903
904}
905
906// Helper pattern for normalizing inputs to triginomic instructions for R700+
907// cards.
908class COS_PAT <InstR600 trig> : Pat<
909 (fcos R600_Reg32:$src),
910 (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src))
911>;
912
913class SIN_PAT <InstR600 trig> : Pat<
914 (fsin R600_Reg32:$src),
915 (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src))
916>;
917
918//===----------------------------------------------------------------------===//
919// R700 Only instructions
920//===----------------------------------------------------------------------===//
921
922let Predicates = [isR700] in {
923 def SIN_r700 : SIN_Common<0x6E>;
924 def COS_r700 : COS_Common<0x6F>;
925
926 // R700 normalizes inputs to SIN/COS the same as EG
927 def : SIN_PAT <SIN_r700>;
928 def : COS_PAT <COS_r700>;
929}
930
931//===----------------------------------------------------------------------===//
932// Evergreen Only instructions
933//===----------------------------------------------------------------------===//
934
935let Predicates = [isEG] in {
936
937def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
938
939def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
940def MULHI_INT_eg : MULHI_INT_Common<0x90>;
941def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
942def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
943def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
944
945} // End Predicates = [isEG]
946
947//===----------------------------------------------------------------------===//
948// Evergreen / Cayman Instructions
949//===----------------------------------------------------------------------===//
950
951let Predicates = [isEGorCayman] in {
952
953 // BFE_UINT - bit_extract, an optimization for mask and shift
954 // Src0 = Input
955 // Src1 = Offset
956 // Src2 = Width
957 //
958 // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
959 //
960 // Example Usage:
961 // (Offset, Width)
962 //
963 // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
964 // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
965 // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
966 // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
967 def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
968 [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
969 R600_Reg32:$src1,
970 R600_Reg32:$src2))],
971 VecALU
972 >;
973
974 def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
975 [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
976 R600_Reg32:$src2))],
977 VecALU
978 >;
979
980 def MULADD_eg : MULADD_Common<0x14>;
981 def ASHR_eg : ASHR_Common<0x15>;
982 def LSHR_eg : LSHR_Common<0x16>;
983 def LSHL_eg : LSHL_Common<0x17>;
984 def CNDE_eg : CNDE_Common<0x19>;
985 def CNDGT_eg : CNDGT_Common<0x1A>;
986 def CNDGE_eg : CNDGE_Common<0x1B>;
987 def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
988 def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
989 def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
990 def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
991 def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
992 def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
993 def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
994 def SIN_eg : SIN_Common<0x8D>;
995 def COS_eg : COS_Common<0x8E>;
996 def DOT4_eg : DOT4_Common<0xBE>;
997 def : DOT4_Pat <DOT4_eg>;
998 defm CUBE_eg : CUBE_Common<0xC0>;
999
1000 defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
1001 def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>;
1002 def SSG_eg : SSG_Common<CNDGT_eg, CNDGE_eg>;
1003 def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
1004
1005 def : SIN_PAT <SIN_eg>;
1006 def : COS_PAT <COS_eg>;
1007
1008 def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
1009 let Pattern = [];
1010 }
1011
1012 def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
1013
1014 def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
1015 let Pattern = [];
1016 }
1017
1018 def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
1019
1020 def : Pat<(fp_to_sint R600_Reg32:$src),
1021 (FLT_TO_INT_eg (TRUNC R600_Reg32:$src))>;
1022
1023 def : Pat<(fp_to_uint R600_Reg32:$src),
1024 (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>;
1025
1026 def : Pat<(fsqrt R600_Reg32:$src),
1027 (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
1028
1029//===----------------------------------------------------------------------===//
1030// Memory read/write instructions
1031//===----------------------------------------------------------------------===//
1032
1033let usesCustomInserter = 1 in {
1034
1035class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name> : EG_CF_RAT <
1036 0x57, 0x2, 0, (outs), ins, !strconcat(name, " $rw_gpr, $index_gpr, $eop"), []>
1037{
1038 let RIM = 0;
1039 // XXX: Have a separate instruction for non-indexed writes.
1040 let TYPE = 1;
1041 let RW_REL = 0;
1042 let ELEM_SIZE = 0;
1043
1044 let ARRAY_SIZE = 0;
1045 let COMP_MASK = comp_mask;
1046 let BURST_COUNT = 0;
1047 let VPM = 0;
1048 let MARK = 0;
1049 let BARRIER = 1;
1050}
1051
1052} // End usesCustomInserter = 1
1053
1054// 32-bit store
1055def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
1056 (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop),
1057 0x1, "RAT_WRITE_CACHELESS_32_eg"
1058>;
1059
1060// i32 global_store
1061def : Pat <
1062 (global_store (i32 R600_TReg32_X:$val), R600_TReg32_X:$ptr),
1063 (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0)
1064>;
1065
1066// Floating point global_store
1067def : Pat <
1068 (global_store (f32 R600_TReg32_X:$val), R600_TReg32_X:$ptr),
1069 (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0)
1070>;
1071
1072//128-bit store
1073def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
1074 (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop),
1075 0xf, "RAT_WRITE_CACHELESS_128"
1076>;
1077
1078// v4f32 global store
1079def : Pat <
1080 (global_store (v4f32 R600_Reg128:$val), R600_TReg32_X:$ptr),
1081 (RAT_WRITE_CACHELESS_128_eg R600_Reg128:$val, R600_TReg32_X:$ptr, 0)
1082>;
1083
1084class VTX_READ_eg <bits<8> buffer_id, dag outs, list<dag> pattern>
1085 : InstR600ISA <outs, (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", pattern> {
1086
1087 // Operands
1088 bits<7> DST_GPR;
1089 bits<7> SRC_GPR;
1090
1091 // Static fields
1092 bits<5> VC_INST = 0;
1093 bits<2> FETCH_TYPE = 2;
1094 bits<1> FETCH_WHOLE_QUAD = 0;
1095 bits<8> BUFFER_ID = buffer_id;
1096 bits<1> SRC_REL = 0;
1097 // XXX: We can infer this field based on the SRC_GPR. This would allow us
1098 // to store vertex addresses in any channel, not just X.
1099 bits<2> SRC_SEL_X = 0;
1100 bits<6> MEGA_FETCH_COUNT;
1101 bits<1> DST_REL = 0;
1102 bits<3> DST_SEL_X;
1103 bits<3> DST_SEL_Y;
1104 bits<3> DST_SEL_Z;
1105 bits<3> DST_SEL_W;
1106 // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
1107 // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
1108 // however, based on my testing if USE_CONST_FIELDS is set, then all
1109 // these fields need to be set to 0.
1110 bits<1> USE_CONST_FIELDS = 0;
1111 bits<6> DATA_FORMAT;
1112 bits<2> NUM_FORMAT_ALL = 1;
1113 bits<1> FORMAT_COMP_ALL = 0;
1114 bits<1> SRF_MODE_ALL = 0;
1115
1116 // LLVM can only encode 64-bit instructions, so these fields are manually
1117 // encoded in R600CodeEmitter
1118 //
1119 // bits<16> OFFSET;
1120 // bits<2> ENDIAN_SWAP = 0;
1121 // bits<1> CONST_BUF_NO_STRIDE = 0;
1122 // bits<1> MEGA_FETCH = 0;
1123 // bits<1> ALT_CONST = 0;
1124 // bits<2> BUFFER_INDEX_MODE = 0;
1125
1126 // VTX_WORD0
1127 let Inst{4-0} = VC_INST;
1128 let Inst{6-5} = FETCH_TYPE;
1129 let Inst{7} = FETCH_WHOLE_QUAD;
1130 let Inst{15-8} = BUFFER_ID;
1131 let Inst{22-16} = SRC_GPR;
1132 let Inst{23} = SRC_REL;
1133 let Inst{25-24} = SRC_SEL_X;
1134 let Inst{31-26} = MEGA_FETCH_COUNT;
1135
1136 // VTX_WORD1_GPR
1137 let Inst{38-32} = DST_GPR;
1138 let Inst{39} = DST_REL;
1139 let Inst{40} = 0; // Reserved
1140 let Inst{43-41} = DST_SEL_X;
1141 let Inst{46-44} = DST_SEL_Y;
1142 let Inst{49-47} = DST_SEL_Z;
1143 let Inst{52-50} = DST_SEL_W;
1144 let Inst{53} = USE_CONST_FIELDS;
1145 let Inst{59-54} = DATA_FORMAT;
1146 let Inst{61-60} = NUM_FORMAT_ALL;
1147 let Inst{62} = FORMAT_COMP_ALL;
1148 let Inst{63} = SRF_MODE_ALL;
1149
1150 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
1151 // is done in R600CodeEmitter
1152 //
1153 // Inst{79-64} = OFFSET;
1154 // Inst{81-80} = ENDIAN_SWAP;
1155 // Inst{82} = CONST_BUF_NO_STRIDE;
1156 // Inst{83} = MEGA_FETCH;
1157 // Inst{84} = ALT_CONST;
1158 // Inst{86-85} = BUFFER_INDEX_MODE;
1159 // Inst{95-86} = 0; Reserved
1160
1161 // VTX_WORD3 (Padding)
1162 //
1163 // Inst{127-96} = 0;
1164}
1165
1166class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
1167 : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> {
1168
1169 let MEGA_FETCH_COUNT = 1;
1170 let DST_SEL_X = 0;
1171 let DST_SEL_Y = 7; // Masked
1172 let DST_SEL_Z = 7; // Masked
1173 let DST_SEL_W = 7; // Masked
1174 let DATA_FORMAT = 1; // FMT_8
1175}
1176
1177class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
1178 : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> {
1179
1180 let MEGA_FETCH_COUNT = 4;
1181 let DST_SEL_X = 0;
1182 let DST_SEL_Y = 7; // Masked
1183 let DST_SEL_Z = 7; // Masked
1184 let DST_SEL_W = 7; // Masked
1185 let DATA_FORMAT = 0xD; // COLOR_32
1186
1187 // This is not really necessary, but there were some GPU hangs that appeared
1188 // to be caused by ALU instructions in the next instruction group that wrote
1189 // to the $ptr registers of the VTX_READ.
1190 // e.g.
1191 // %T3_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 24
1192 // %T2_X<def> = MOV %ZERO
1193 //Adding this constraint prevents this from happening.
1194 let Constraints = "$ptr.ptr = $dst";
1195}
1196
1197class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
1198 : VTX_READ_eg <buffer_id, (outs R600_Reg128:$dst), pattern> {
1199
1200 let MEGA_FETCH_COUNT = 16;
1201 let DST_SEL_X = 0;
1202 let DST_SEL_Y = 1;
1203 let DST_SEL_Z = 2;
1204 let DST_SEL_W = 3;
1205 let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
1206
1207 // XXX: Need to force VTX_READ_128 instructions to write to the same register
1208 // that holds its buffer address to avoid potential hangs. We can't use
1209 // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
1210 // registers are different sizes.
1211}
1212
1213//===----------------------------------------------------------------------===//
1214// VTX Read from parameter memory space
1215//===----------------------------------------------------------------------===//
1216
1217class VTX_READ_PARAM_32_eg <ValueType vt> : VTX_READ_32_eg <0,
1218 [(set (vt R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
1219>;
1220
1221def VTX_READ_PARAM_i32_eg : VTX_READ_PARAM_32_eg<i32>;
1222def VTX_READ_PARAM_f32_eg : VTX_READ_PARAM_32_eg<f32>;
1223
1224
1225//===----------------------------------------------------------------------===//
1226// VTX Read from global memory space
1227//===----------------------------------------------------------------------===//
1228
1229// 8-bit reads
1230def VTX_READ_GLOBAL_i8_eg : VTX_READ_8_eg <1,
1231 [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
1232>;
1233
1234// 32-bit reads
1235
1236class VTX_READ_GLOBAL_eg <ValueType vt> : VTX_READ_32_eg <1,
1237 [(set (vt R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
1238>;
1239
1240def VTX_READ_GLOBAL_i32_eg : VTX_READ_GLOBAL_eg<i32>;
1241def VTX_READ_GLOBAL_f32_eg : VTX_READ_GLOBAL_eg<f32>;
1242
1243// 128-bit reads
1244
1245class VTX_READ_GLOBAL_128_eg <ValueType vt> : VTX_READ_128_eg <1,
1246 [(set (vt R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
1247>;
1248
1249def VTX_READ_GLOBAL_v4i32_eg : VTX_READ_GLOBAL_128_eg<v4i32>;
1250def VTX_READ_GLOBAL_v4f32_eg : VTX_READ_GLOBAL_128_eg<v4f32>;
1251
1252//===----------------------------------------------------------------------===//
1253// Constant Loads
1254// XXX: We are currently storing all constants in the global address space.
1255//===----------------------------------------------------------------------===//
1256
1257def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
1258 [(set (f32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
1259>;
1260
1261}
1262
1263let Predicates = [isCayman] in {
1264
1265let isVector = 1 in {
1266
1267def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
1268
1269def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
1270def MULHI_INT_cm : MULHI_INT_Common<0x90>;
1271def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
1272def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
1273
1274} // End isVector = 1
1275
1276// RECIP_UINT emulation for Cayman
1277def : Pat <
1278 (AMDGPUurecip R600_Reg32:$src0),
1279 (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
1280 (MOV_IMM_I32 (i32 ALU_LITERAL_X), 0x4f800000)))
1281>;
1282
1283} // End isCayman
1284
1285let isCodeGenOnly = 1 in {
1286
1287 def MULLIT : AMDGPUShaderInst <
1288 (outs R600_Reg128:$dst),
1289 (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
1290 "MULLIT $dst, $src0, $src1",
1291 [(set R600_Reg128:$dst, (int_AMDGPU_mullit R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
1292 >;
1293
1294let usesCustomInserter = 1, isPseudo = 1 in {
1295
1296class R600PreloadInst <string asm, Intrinsic intr> : AMDGPUInst <
1297 (outs R600_TReg32:$dst),
1298 (ins),
1299 asm,
1300 [(set R600_TReg32:$dst, (intr))]
1301>;
1302
1303def R600_LOAD_CONST : AMDGPUShaderInst <
1304 (outs R600_Reg32:$dst),
1305 (ins i32imm:$src0),
1306 "R600_LOAD_CONST $dst, $src0",
1307 [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
1308>;
1309
1310def RESERVE_REG : AMDGPUShaderInst <
1311 (outs),
1312 (ins i32imm:$src),
1313 "RESERVE_REG $src",
1314 [(int_AMDGPU_reserve_reg imm:$src)]
1315>;
1316
1317def TXD: AMDGPUShaderInst <
1318 (outs R600_Reg128:$dst),
1319 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4),
1320 "TXD $dst, $src0, $src1, $src2, $src3, $src4",
1321 [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, imm:$src4))]
1322>;
1323
1324def TXD_SHADOW: AMDGPUShaderInst <
1325 (outs R600_Reg128:$dst),
1326 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4),
1327 "TXD_SHADOW $dst, $src0, $src1, $src2, $src3, $src4",
1328 [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, TEX_SHADOW:$src4))]
1329>;
1330
1331} // End usesCustomInserter = 1, isPseudo = 1
1332
1333} // End isCodeGenOnly = 1
1334
1335def CLAMP_R600 : CLAMP <R600_Reg32>;
1336def FABS_R600 : FABS<R600_Reg32>;
1337def FNEG_R600 : FNEG<R600_Reg32>;
1338
1339let usesCustomInserter = 1 in {
1340
1341def MASK_WRITE : AMDGPUShaderInst <
1342 (outs),
1343 (ins R600_Reg32:$src),
1344 "MASK_WRITE $src",
1345 []
1346>;
1347
1348} // End usesCustomInserter = 1
1349
1350//===---------------------------------------------------------------------===//
1351// Return instruction
1352//===---------------------------------------------------------------------===//
1353let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
1354 def RETURN : ILFormat<(outs), (ins variable_ops),
1355 "RETURN", [(IL_retflag)]>;
1356}
1357
1358//===----------------------------------------------------------------------===//
1359// ISel Patterns
1360//===----------------------------------------------------------------------===//
1361
1362// KIL Patterns
1363def KILP : Pat <
1364 (int_AMDGPU_kilp),
1365 (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO), 0))
1366>;
1367
1368def KIL : Pat <
1369 (int_AMDGPU_kill R600_Reg32:$src0),
1370 (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0), 0))
1371>;
1372
1373// SGT Reverse args
1374def : Pat <
1375 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
1376 (SGT R600_Reg32:$src1, R600_Reg32:$src0)
1377>;
1378
1379// SGE Reverse args
1380def : Pat <
1381 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
1382 (SGE R600_Reg32:$src1, R600_Reg32:$src0)
1383>;
1384
1385// SETGT_INT reverse args
1386def : Pat <
1387 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
1388 (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
1389>;
1390
1391// SETGE_INT reverse args
1392def : Pat <
1393 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
1394 (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
1395>;
1396
1397// SETGT_UINT reverse args
1398def : Pat <
1399 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
1400 (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
1401>;
1402
1403// SETGE_UINT reverse args
1404def : Pat <
1405 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
1406 (SETGE_UINT R600_Reg32:$src0, R600_Reg32:$src1)
1407>;
1408
1409// The next two patterns are special cases for handling 'true if ordered' and
1410// 'true if unordered' conditionals. The assumption here is that the behavior of
1411// SETE and SNE conforms to the Direct3D 10 rules for floating point values
1412// described here:
1413// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
1414// We assume that SETE returns false when one of the operands is NAN and
1415// SNE returns true when on of the operands is NAN
1416
1417//SETE - 'true if ordered'
1418def : Pat <
1419 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
1420 (SETE R600_Reg32:$src0, R600_Reg32:$src1)
1421>;
1422
1423//SNE - 'true if unordered'
1424def : Pat <
1425 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
1426 (SNE R600_Reg32:$src0, R600_Reg32:$src1)
1427>;
1428
1429def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
1430def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
1431def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
1432def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
1433
1434def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>;
1435def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>;
1436def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>;
1437def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>;
1438
1439def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
1440def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
1441def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
1442def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
1443
1444def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>;
1445def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>;
1446def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>;
1447def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>;
1448
1449def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
1450def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
1451
1452// bitconvert patterns
1453
1454def : BitConvert <i32, f32, R600_Reg32>;
1455def : BitConvert <f32, i32, R600_Reg32>;
1456def : BitConvert <v4f32, v4i32, R600_Reg128>;
1457
1458} // End isR600toCayman Predicate
diff --git a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
deleted file mode 100644
index 3b62f0a7303..00000000000
--- a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
+++ /dev/null
@@ -1,50 +0,0 @@
1//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600 Intrinsic Definitions
11//
12//===----------------------------------------------------------------------===//
13
14let TargetPrefix = "R600", isTarget = 1 in {
15 def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
16 def int_R600_load_input_perspective :
17 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
18 def int_R600_load_input_constant :
19 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
20 def int_R600_load_input_linear :
21 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
22 def int_R600_load_input_position :
23 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
24 def int_R600_load_input_face :
25 Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
26}
27
28let TargetPrefix = "r600", isTarget = 1 in {
29
30class R600ReadPreloadRegisterIntrinsic<string name>
31 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
32 GCCBuiltin<name>;
33
34multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
35 def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
36 def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
37 def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
38}
39
40defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
41 "__builtin_r600_read_global_size">;
42defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
43 "__builtin_r600_read_local_size">;
44defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
45 "__builtin_r600_read_ngroups">;
46defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
47 "__builtin_r600_read_tgid">;
48defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
49 "__builtin_r600_read_tidig">;
50} // End TargetPrefix = "r600"
diff --git a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
deleted file mode 100644
index 00877caf2b9..00000000000
--- a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
+++ /dev/null
@@ -1,26 +0,0 @@
1//===-- R600Intrinsics.td - TODO: Add brief description -------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// TODO: Add full description
11//
12//===----------------------------------------------------------------------===//
13
14let TargetPrefix = "R600", isTarget = 1 in {
15 def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
16 def int_R600_load_input_perspective :
17 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
18 def int_R600_load_input_constant :
19 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
20 def int_R600_load_input_linear :
21 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
22 def int_R600_load_input_position :
23 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
24 def int_R600_load_input_face :
25 Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
26}
diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
deleted file mode 100644
index a31848efc99..00000000000
--- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
1//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "R600MachineFunctionInfo.h"
11
12using namespace llvm;
13
14R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
15 : MachineFunctionInfo(),
16 HasLinearInterpolation(false),
17 HasPerspectiveInterpolation(false)
18 { }
19
20unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
21{
22 assert(HasPerspectiveInterpolation);
23 return 0;
24}
25
26unsigned R600MachineFunctionInfo::GetIJLinearIndex() const
27{
28 assert(HasLinearInterpolation);
29 if (HasPerspectiveInterpolation)
30 return 1;
31 else
32 return 0;
33}
diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
deleted file mode 100644
index 68211b25813..00000000000
--- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
+++ /dev/null
@@ -1,38 +0,0 @@
1//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600MachineFunctionInfo is used for keeping track of which registers have
11// been reserved by the llvm.AMDGPU.reserve.reg intrinsic.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef R600MACHINEFUNCTIONINFO_H
16#define R600MACHINEFUNCTIONINFO_H
17
18#include "llvm/CodeGen/MachineFunction.h"
19#include <vector>
20
21namespace llvm {
22
23class R600MachineFunctionInfo : public MachineFunctionInfo {
24
25public:
26 R600MachineFunctionInfo(const MachineFunction &MF);
27 std::vector<unsigned> ReservedRegs;
28 bool HasLinearInterpolation;
29 bool HasPerspectiveInterpolation;
30
31 unsigned GetIJLinearIndex() const;
32 unsigned GetIJPerspectiveIndex() const;
33
34};
35
36} // End llvm namespace
37
38#endif //R600MACHINEFUNCTIONINFO_H
diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.cpp b/src/gallium/drivers/radeon/R600RegisterInfo.cpp
deleted file mode 100644
index 4096cb050bf..00000000000
--- a/src/gallium/drivers/radeon/R600RegisterInfo.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
1//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// The file contains the R600 implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600RegisterInfo.h"
15#include "AMDGPUTargetMachine.h"
16#include "R600MachineFunctionInfo.h"
17
18using namespace llvm;
19
20R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
21 const TargetInstrInfo &tii)
22: AMDGPURegisterInfo(tm, tii),
23 TM(tm),
24 TII(tii)
25 { }
26
27BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
28{
29 BitVector Reserved(getNumRegs());
30 const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
31
32 Reserved.set(AMDGPU::ZERO);
33 Reserved.set(AMDGPU::HALF);
34 Reserved.set(AMDGPU::ONE);
35 Reserved.set(AMDGPU::ONE_INT);
36 Reserved.set(AMDGPU::NEG_HALF);
37 Reserved.set(AMDGPU::NEG_ONE);
38 Reserved.set(AMDGPU::PV_X);
39 Reserved.set(AMDGPU::ALU_LITERAL_X);
40 Reserved.set(AMDGPU::PREDICATE_BIT);
41 Reserved.set(AMDGPU::PRED_SEL_OFF);
42 Reserved.set(AMDGPU::PRED_SEL_ZERO);
43 Reserved.set(AMDGPU::PRED_SEL_ONE);
44
45 for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
46 E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
47 Reserved.set(*I);
48 }
49
50 for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
51 E = MFI->ReservedRegs.end(); I != E; ++I) {
52 Reserved.set(*I);
53 Reserved.set(*(getSuperRegisters(*I)));
54 }
55
56 return Reserved;
57}
58
59const TargetRegisterClass *
60R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
61{
62 switch (rc->getID()) {
63 case AMDGPU::GPRF32RegClassID:
64 case AMDGPU::GPRI32RegClassID:
65 return &AMDGPU::R600_Reg32RegClass;
66 default: return rc;
67 }
68}
69
70unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const
71{
72 switch(reg) {
73 case AMDGPU::ZERO: return 248;
74 case AMDGPU::ONE:
75 case AMDGPU::NEG_ONE: return 249;
76 case AMDGPU::ONE_INT: return 250;
77 case AMDGPU::HALF:
78 case AMDGPU::NEG_HALF: return 252;
79 case AMDGPU::ALU_LITERAL_X: return 253;
80 case AMDGPU::PREDICATE_BIT:
81 case AMDGPU::PRED_SEL_OFF:
82 case AMDGPU::PRED_SEL_ZERO:
83 case AMDGPU::PRED_SEL_ONE:
84 return 0;
85 default: return getHWRegIndexGen(reg);
86 }
87}
88
89unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
90{
91 switch(reg) {
92 case AMDGPU::ZERO:
93 case AMDGPU::ONE:
94 case AMDGPU::ONE_INT:
95 case AMDGPU::NEG_ONE:
96 case AMDGPU::HALF:
97 case AMDGPU::NEG_HALF:
98 case AMDGPU::ALU_LITERAL_X:
99 case AMDGPU::PREDICATE_BIT:
100 case AMDGPU::PRED_SEL_OFF:
101 case AMDGPU::PRED_SEL_ZERO:
102 case AMDGPU::PRED_SEL_ONE:
103 return 0;
104 default: return getHWRegChanGen(reg);
105 }
106}
107
108const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
109 MVT VT) const
110{
111 switch(VT.SimpleTy) {
112 default:
113 case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
114 }
115}
116
117unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const
118{
119 switch (Channel) {
120 default: assert(!"Invalid channel index"); return 0;
121 case 0: return AMDGPU::sel_x;
122 case 1: return AMDGPU::sel_y;
123 case 2: return AMDGPU::sel_z;
124 case 3: return AMDGPU::sel_w;
125 }
126}
127
128#include "R600HwRegInfo.include"
diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.h b/src/gallium/drivers/radeon/R600RegisterInfo.h
deleted file mode 100644
index 60f6d53b2d8..00000000000
--- a/src/gallium/drivers/radeon/R600RegisterInfo.h
+++ /dev/null
@@ -1,63 +0,0 @@
1//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Interface definition for R600RegisterInfo
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef R600REGISTERINFO_H_
15#define R600REGISTERINFO_H_
16
17#include "AMDGPUTargetMachine.h"
18#include "AMDGPURegisterInfo.h"
19
20namespace llvm {
21
22class R600TargetMachine;
23class TargetInstrInfo;
24
25struct R600RegisterInfo : public AMDGPURegisterInfo
26{
27 AMDGPUTargetMachine &TM;
28 const TargetInstrInfo &TII;
29
30 R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
31
32 virtual BitVector getReservedRegs(const MachineFunction &MF) const;
33
34 /// getISARegClass - rc is an AMDIL reg class. This function returns the
35 /// R600 reg class that is equivalent to the given AMDIL reg class.
36 virtual const TargetRegisterClass * getISARegClass(
37 const TargetRegisterClass * rc) const;
38
39 /// getHWRegIndex - get the HW encoding for a register.
40 unsigned getHWRegIndex(unsigned reg) const;
41
42 /// getHWRegChan - get the HW encoding for a register's channel.
43 unsigned getHWRegChan(unsigned reg) const;
44
45 /// getCFGStructurizerRegClass - get the register class of the specified
46 /// type to use in the CFGStructurizer
47 virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
48
49 /// getSubRegFromChannel - Return the sub reg enum value for the given
50 /// Channel (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
51 unsigned getSubRegFromChannel(unsigned Channel) const;
52
53private:
54 /// getHWRegIndexGen - Generated function returns a register's encoding
55 unsigned getHWRegIndexGen(unsigned reg) const;
56 /// getHWRegChanGen - Generated function returns a register's channel
57 /// encoding.
58 unsigned getHWRegChanGen(unsigned reg) const;
59};
60
61} // End namespace llvm
62
63#endif // AMDIDSAREGISTERINFO_H_
diff --git a/src/gallium/drivers/radeon/R600Schedule.td b/src/gallium/drivers/radeon/R600Schedule.td
deleted file mode 100644
index 7ede181c51d..00000000000
--- a/src/gallium/drivers/radeon/R600Schedule.td
+++ /dev/null
@@ -1,36 +0,0 @@
1//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
11// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
12// slot has been removed.
13//
14//===----------------------------------------------------------------------===//
15
16
17def ALU_X : FuncUnit;
18def ALU_Y : FuncUnit;
19def ALU_Z : FuncUnit;
20def ALU_W : FuncUnit;
21def TRANS : FuncUnit;
22
23def AnyALU : InstrItinClass;
24def VecALU : InstrItinClass;
25def TransALU : InstrItinClass;
26
27def R600_EG_Itin : ProcessorItineraries <
28 [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
29 [],
30 [
31 InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
32 InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
33 InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
34 InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
35 ]
36>;
diff --git a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp b/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp
deleted file mode 100644
index 1fc0a873eb6..00000000000
--- a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
1//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass maps the pseudo interpolation registers to the correct physical
11// registers. Prior to executing a fragment shader, the GPU loads interpolation
12// parameters into physical registers. The specific physical register that each
13// interpolation parameter ends up in depends on the type of the interpolation
14// parameter as well as how many interpolation parameters are used by the
15// shader.
16//
17//===----------------------------------------------------------------------===//
18
19
20
21#include "AMDGPU.h"
22#include "AMDIL.h"
23#include "SIMachineFunctionInfo.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27
28using namespace llvm;
29
30namespace {
31
32class SIAssignInterpRegsPass : public MachineFunctionPass {
33
34private:
35 static char ID;
36 TargetMachine &TM;
37
38 void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
39 unsigned physReg, unsigned virtReg);
40
41public:
42 SIAssignInterpRegsPass(TargetMachine &tm) :
43 MachineFunctionPass(ID), TM(tm) { }
44
45 virtual bool runOnMachineFunction(MachineFunction &MF);
46
47 const char *getPassName() const { return "SI Assign intrpolation registers"; }
48};
49
50} // End anonymous namespace
51
52char SIAssignInterpRegsPass::ID = 0;
53
54#define INTERP_VALUES 16
55#define REQUIRED_VALUE_MAX_INDEX 7
56
57struct InterpInfo {
58 bool Enabled;
59 unsigned Regs[3];
60 unsigned RegCount;
61};
62
63
64FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
65 return new SIAssignInterpRegsPass(tm);
66}
67
68bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
69{
70
71 struct InterpInfo InterpUse[INTERP_VALUES] = {
72 {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
73 {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
74 {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
75 {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
76 {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
77 {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
78 {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
79 {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
80 {false, {AMDGPU::POS_X_FLOAT}, 1},
81 {false, {AMDGPU::POS_Y_FLOAT}, 1},
82 {false, {AMDGPU::POS_Z_FLOAT}, 1},
83 {false, {AMDGPU::POS_W_FLOAT}, 1},
84 {false, {AMDGPU::FRONT_FACE}, 1},
85 {false, {AMDGPU::ANCILLARY}, 1},
86 {false, {AMDGPU::SAMPLE_COVERAGE}, 1},
87 {false, {AMDGPU::POS_FIXED_PT}, 1}
88 };
89
90 SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
91 // This pass is only needed for pixel shaders.
92 if (MFI->ShaderType != ShaderType::PIXEL) {
93 return false;
94 }
95 MachineRegisterInfo &MRI = MF.getRegInfo();
96 bool ForceEnable = true;
97
98 // First pass, mark the interpolation values that are used.
99 for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
100 for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
101 RegIdx++) {
102 InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
103 !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
104 if (InterpUse[InterpIdx].Enabled &&
105 InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
106 ForceEnable = false;
107 }
108 }
109 }
110
111 // At least one interpolation mode must be enabled or else the GPU will hang.
112 if (ForceEnable) {
113 InterpUse[0].Enabled = true;
114 }
115
116 unsigned UsedVgprs = 0;
117
118 // Second pass, replace with VGPRs.
119 for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
120 if (!InterpUse[InterpIdx].Enabled) {
121 continue;
122 }
123 MFI->SPIPSInputAddr |= (1 << InterpIdx);
124
125 for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
126 RegIdx++, UsedVgprs++) {
127 unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
128 unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
129 MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
130 addLiveIn(&MF, MRI, NewReg, VirtReg);
131 }
132 }
133
134 return false;
135}
136
137void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
138 MachineRegisterInfo & MRI,
139 unsigned physReg, unsigned virtReg)
140{
141 const TargetInstrInfo * TII = TM.getInstrInfo();
142 if (!MRI.isLiveIn(physReg)) {
143 MRI.addLiveIn(physReg, virtReg);
144 MF->front().addLiveIn(physReg);
145 BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
146 TII->get(TargetOpcode::COPY), virtReg)
147 .addReg(physReg);
148 } else {
149 MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
150 }
151}
diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
deleted file mode 100644
index 48bd5676eb0..00000000000
--- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
+++ /dev/null
@@ -1,291 +0,0 @@
1#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===#
2#
3# The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9#
10# This perl script prints to stdout .td code to be used as SIRegisterInfo.td
11# it also generates a file called SIHwRegInfo.include, which contains helper
12# functions for determining the hw encoding of registers.
13#
14#===------------------------------------------------------------------------===#
15
16use strict;
17use warnings;
18
19my $SGPR_COUNT = 104;
20my $VGPR_COUNT = 256;
21
22my $SGPR_MAX_IDX = $SGPR_COUNT - 1;
23my $VGPR_MAX_IDX = $VGPR_COUNT - 1;
24
25my $INDEX_FILE = defined($ARGV[0]) ? $ARGV[0] : '';
26
27print <<STRING;
28
29let Namespace = "AMDGPU" in {
30 def low : SubRegIndex;
31 def high : SubRegIndex;
32
33 def sub0 : SubRegIndex;
34 def sub1 : SubRegIndex;
35 def sub2 : SubRegIndex;
36 def sub3 : SubRegIndex;
37 def sub4 : SubRegIndex;
38 def sub5 : SubRegIndex;
39 def sub6 : SubRegIndex;
40 def sub7 : SubRegIndex;
41}
42
43class SIReg <string n> : Register<n> {
44 let Namespace = "AMDGPU";
45}
46
47class SI_64 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
48 let Namespace = "AMDGPU";
49 let SubRegIndices = [low, high];
50}
51
52class SI_128 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
53 let Namespace = "AMDGPU";
54 let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
55}
56
57class SI_256 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
58 let Namespace = "AMDGPU";
59 let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
60}
61
62class SGPR_32 <bits<8> num, string name> : SIReg<name> {
63 field bits<8> Num;
64
65 let Num = num;
66}
67
68
69class VGPR_32 <bits<9> num, string name> : SIReg<name> {
70 field bits<9> Num;
71
72 let Num = num;
73}
74
75class SGPR_64 <bits<8> num, string name, list<Register> subregs> :
76 SI_64 <name, subregs>;
77
78class VGPR_64 <bits<9> num, string name, list<Register> subregs> :
79 SI_64 <name, subregs>;
80
81class SGPR_128 <bits<8> num, string name, list<Register> subregs> :
82 SI_128 <name, subregs>;
83
84class VGPR_128 <bits<9> num, string name, list<Register> subregs> :
85 SI_128 <name, subregs>;
86
87class SGPR_256 <bits<8> num, string name, list<Register> subregs> :
88 SI_256 <name, subregs>;
89
90def VCC : SIReg<"VCC">;
91def EXEC_LO : SIReg<"EXEC LO">;
92def EXEC_HI : SIReg<"EXEC HI">;
93def EXEC : SI_64<"EXEC", [EXEC_LO,EXEC_HI]>;
94def SCC : SIReg<"SCC">;
95def SREG_LIT_0 : SIReg <"S LIT 0">;
96def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT">;
97
98def M0 : SIReg <"M0">;
99
100//Interpolation registers
101
102def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
103def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
104def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
105def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
106def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
107def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
108def PERSP_I_W : SIReg <"PERSP_I_W">;
109def PERSP_J_W : SIReg <"PERSP_J_W">;
110def PERSP_1_W : SIReg <"PERSP_1_W">;
111def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
112def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
113def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
114def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
115def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
116def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
117def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
118def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
119def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
120def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
121def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
122def FRONT_FACE : SIReg <"FRONT_FACE">;
123def ANCILLARY : SIReg <"ANCILLARY">;
124def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
125def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
126
127STRING
128
129#32 bit register
130
131my @SGPR;
132for (my $i = 0; $i < $SGPR_COUNT; $i++) {
133 print "def SGPR$i : SGPR_32 <$i, \"SGPR$i\">;\n";
134 $SGPR[$i] = "SGPR$i";
135}
136
137my @VGPR;
138for (my $i = 0; $i < $VGPR_COUNT; $i++) {
139 print "def VGPR$i : VGPR_32 <$i, \"VGPR$i\">;\n";
140 $VGPR[$i] = "VGPR$i";
141}
142
143print <<STRING;
144
145def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
146 (add (sequence "SGPR%u", 0, $SGPR_MAX_IDX), SREG_LIT_0, M0, EXEC_LO, EXEC_HI)
147>;
148
149def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
150 (add (sequence "VGPR%u", 0, $VGPR_MAX_IDX),
151 PERSP_SAMPLE_I, PERSP_SAMPLE_J,
152 PERSP_CENTER_I, PERSP_CENTER_J,
153 PERSP_CENTROID_I, PERSP_CENTROID_J,
154 PERSP_I_W, PERSP_J_W, PERSP_1_W,
155 LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
156 LINEAR_CENTER_I, LINEAR_CENTER_J,
157 LINEAR_CENTROID_I, LINEAR_CENTROID_J,
158 LINE_STIPPLE_TEX_COORD,
159 POS_X_FLOAT,
160 POS_Y_FLOAT,
161 POS_Z_FLOAT,
162 POS_W_FLOAT,
163 FRONT_FACE,
164 ANCILLARY,
165 SAMPLE_COVERAGE,
166 POS_FIXED_PT
167 )
168>;
169
170def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
171 (add VReg_32, SReg_32)
172>;
173
174def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
175def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
176def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
177def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
178
179
180STRING
181
182my @subregs_64 = ('low', 'high');
183my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w');
184my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7');
185
186my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64'));
187my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32'));
188my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32'));
189
190my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64'));
191my @VGPR128 = print_vgpr_class(128, \@subregs_128, ('v4f32'));
192
193
194my $sgpr64_list = join(',', @SGPR64);
195my $vgpr64_list = join(',', @VGPR64);
196print <<STRING;
197
198def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64,
199 (add $sgpr64_list, $vgpr64_list)
200>;
201
202STRING
203
204if ($INDEX_FILE ne '') {
205 open(my $fh, ">", $INDEX_FILE);
206 my %hw_values;
207
208 for (my $i = 0; $i <= $#SGPR; $i++) {
209 push (@{$hw_values{$i}}, $SGPR[$i]);
210 }
211
212 for (my $i = 0; $i <= $#SGPR64; $i++) {
213 push (@{$hw_values{$i * 2}}, $SGPR64[$i])
214 }
215
216 for (my $i = 0; $i <= $#SGPR128; $i++) {
217 push (@{$hw_values{$i * 4}}, $SGPR128[$i]);
218 }
219
220 for (my $i = 0; $i <= $#SGPR256; $i++) {
221 push (@{$hw_values{$i * 8}}, $SGPR256[$i]);
222 }
223
224 for (my $i = 0; $i <= $#VGPR; $i++) {
225 push (@{$hw_values{$i}}, $VGPR[$i]);
226 }
227 for (my $i = 0; $i <= $#VGPR64; $i++) {
228 push (@{$hw_values{$i * 2}}, $VGPR64[$i]);
229 }
230
231 for (my $i = 0; $i <= $#VGPR128; $i++) {
232 push (@{$hw_values{$i * 4}}, $VGPR128[$i]);
233 }
234
235
236 print $fh "unsigned SIRegisterInfo::getHWRegNum(unsigned reg) const\n{\n switch(reg) {\n";
237 for my $key (keys(%hw_values)) {
238 my @names = @{$hw_values{$key}};
239 for my $regname (@names) {
240 print $fh " case AMDGPU::$regname:\n"
241 }
242 print $fh " return $key;\n";
243 }
244 print $fh " default: assert(!\"Unknown Register\"); return 0;\n }\n}\n"
245}
246
247
248
249
250sub print_sgpr_class {
251 my ($reg_width, $sub_reg_ref, @types) = @_;
252 return print_reg_class('SReg', 'SGPR', $reg_width, $SGPR_COUNT, $sub_reg_ref, @types);
253}
254
255sub print_vgpr_class {
256 my ($reg_width, $sub_reg_ref, @types) = @_;
257 return print_reg_class('VReg', 'VGPR', $reg_width, $VGPR_COUNT, $sub_reg_ref, @types);
258}
259
260sub print_reg_class {
261 my ($class_prefix, $reg_prefix, $reg_width, $reg_count, $sub_reg_ref, @types) = @_;
262 my @registers;
263 my $component_count = $reg_width / 32;
264
265 for (my $i = 0; $i < $reg_count; $i += $component_count) {
266 my $reg_name = $reg_prefix . $i . '_' . $reg_width;
267 my @sub_regs;
268 for (my $idx = 0; $idx < $component_count; $idx++) {
269 my $sub_idx = $i + $idx;
270 push(@sub_regs, $reg_prefix . $sub_idx);
271 }
272 print "def $reg_name : $reg_prefix\_$reg_width <$i, \"$reg_name\", [ ", join(',', @sub_regs) , "]>;\n";
273 push (@registers, $reg_name);
274 }
275
276 #Add VCC to SReg_64
277 if ($class_prefix eq 'SReg' and $reg_width == 64) {
278 push (@registers, 'VCC')
279 }
280
281 #Add EXEC to SReg_64
282 if ($class_prefix eq 'SReg' and $reg_width == 64) {
283 push (@registers, 'EXEC')
284 }
285
286 my $reg_list = join(', ', @registers);
287
288 print "def $class_prefix\_$reg_width : RegisterClass<\"AMDGPU\", [" . join (', ', @types) . "], $reg_width,\n (add $reg_list)\n>{\n";
289 print " let SubRegClasses = [($class_prefix\_", ($reg_width / $component_count) , ' ', join(', ', @{$sub_reg_ref}), ")];\n}\n";
290 return @registers;
291}
diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp
deleted file mode 100644
index 7c2739cf5b6..00000000000
--- a/src/gallium/drivers/radeon/SIISelLowering.cpp
+++ /dev/null
@@ -1,466 +0,0 @@
1//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
11// mostly EmitInstrWithCustomInserter().
12//
13//===----------------------------------------------------------------------===//
14
15#include "SIISelLowering.h"
16#include "AMDIL.h"
17#include "AMDILIntrinsicInfo.h"
18#include "SIInstrInfo.h"
19#include "SIMachineFunctionInfo.h"
20#include "SIRegisterInfo.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24
25using namespace llvm;
26
27SITargetLowering::SITargetLowering(TargetMachine &TM) :
28 AMDGPUTargetLowering(TM),
29 TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo()))
30{
31 addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
32 addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
33 addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
34 addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass);
35 addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass);
36 addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
37
38 addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
39 addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
40
41 computeRegisterProperties();
42
43 setOperationAction(ISD::AND, MVT::i1, Custom);
44
45 setOperationAction(ISD::ADD, MVT::i64, Legal);
46 setOperationAction(ISD::ADD, MVT::i32, Legal);
47
48 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
49
50 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
51
52 // We need to custom lower loads from the USER_SGPR address space, so we can
53 // add the SGPRs as livein registers.
54 setOperationAction(ISD::LOAD, MVT::i32, Custom);
55 setOperationAction(ISD::LOAD, MVT::i64, Custom);
56
57 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
58 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
59
60 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
61 setTargetDAGCombine(ISD::SELECT_CC);
62
63 setTargetDAGCombine(ISD::SETCC);
64}
65
66MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
67 MachineInstr * MI, MachineBasicBlock * BB) const
68{
69 const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
70 MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
71 MachineBasicBlock::iterator I = MI;
72
73 if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
74 AppendS_WAITCNT(MI, *BB, llvm::next(I));
75 return BB;
76 }
77
78 switch (MI->getOpcode()) {
79 default:
80 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
81 case AMDGPU::BRANCH: return BB;
82 case AMDGPU::CLAMP_SI:
83 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
84 .addOperand(MI->getOperand(0))
85 .addOperand(MI->getOperand(1))
86 // VSRC1-2 are unused, but we still need to fill all the
87 // operand slots, so we just reuse the VSRC0 operand
88 .addOperand(MI->getOperand(1))
89 .addOperand(MI->getOperand(1))
90 .addImm(0) // ABS
91 .addImm(1) // CLAMP
92 .addImm(0) // OMOD
93 .addImm(0); // NEG
94 MI->eraseFromParent();
95 break;
96
97 case AMDGPU::FABS_SI:
98 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
99 .addOperand(MI->getOperand(0))
100 .addOperand(MI->getOperand(1))
101 // VSRC1-2 are unused, but we still need to fill all the
102 // operand slots, so we just reuse the VSRC0 operand
103 .addOperand(MI->getOperand(1))
104 .addOperand(MI->getOperand(1))
105 .addImm(1) // ABS
106 .addImm(0) // CLAMP
107 .addImm(0) // OMOD
108 .addImm(0); // NEG
109 MI->eraseFromParent();
110 break;
111
112 case AMDGPU::FNEG_SI:
113 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
114 .addOperand(MI->getOperand(0))
115 .addOperand(MI->getOperand(1))
116 // VSRC1-2 are unused, but we still need to fill all the
117 // operand slots, so we just reuse the VSRC0 operand
118 .addOperand(MI->getOperand(1))
119 .addOperand(MI->getOperand(1))
120 .addImm(0) // ABS
121 .addImm(0) // CLAMP
122 .addImm(0) // OMOD
123 .addImm(1); // NEG
124 MI->eraseFromParent();
125 break;
126 case AMDGPU::SHADER_TYPE:
127 BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
128 MI->getOperand(0).getImm();
129 MI->eraseFromParent();
130 break;
131
132 case AMDGPU::SI_INTERP:
133 LowerSI_INTERP(MI, *BB, I, MRI);
134 break;
135 case AMDGPU::SI_INTERP_CONST:
136 LowerSI_INTERP_CONST(MI, *BB, I, MRI);
137 break;
138 case AMDGPU::SI_KIL:
139 LowerSI_KIL(MI, *BB, I, MRI);
140 break;
141 case AMDGPU::SI_WQM:
142 LowerSI_WQM(MI, *BB, I, MRI);
143 break;
144 case AMDGPU::SI_V_CNDLT:
145 LowerSI_V_CNDLT(MI, *BB, I, MRI);
146 break;
147 }
148 return BB;
149}
150
151void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
152 MachineBasicBlock::iterator I) const
153{
154 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
155 .addImm(0);
156}
157
158
159void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
160 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
161{
162 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
163 .addReg(AMDGPU::EXEC);
164
165 MI->eraseFromParent();
166}
167
168void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
169 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
170{
171 unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
172 unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
173 MachineOperand dst = MI->getOperand(0);
174 MachineOperand iReg = MI->getOperand(1);
175 MachineOperand jReg = MI->getOperand(2);
176 MachineOperand attr_chan = MI->getOperand(3);
177 MachineOperand attr = MI->getOperand(4);
178 MachineOperand params = MI->getOperand(5);
179
180 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
181 .addOperand(params);
182
183 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
184 .addOperand(iReg)
185 .addOperand(attr_chan)
186 .addOperand(attr)
187 .addReg(M0);
188
189 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
190 .addOperand(dst)
191 .addReg(tmp)
192 .addOperand(jReg)
193 .addOperand(attr_chan)
194 .addOperand(attr)
195 .addReg(M0);
196
197 MI->eraseFromParent();
198}
199
200void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
201 MachineBasicBlock &BB, MachineBasicBlock::iterator I,
202 MachineRegisterInfo &MRI) const
203{
204 MachineOperand dst = MI->getOperand(0);
205 MachineOperand attr_chan = MI->getOperand(1);
206 MachineOperand attr = MI->getOperand(2);
207 MachineOperand params = MI->getOperand(3);
208 unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
209
210 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
211 .addOperand(params);
212
213 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
214 .addOperand(dst)
215 .addOperand(attr_chan)
216 .addOperand(attr)
217 .addReg(M0);
218
219 MI->eraseFromParent();
220}
221
222void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
223 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
224{
225 // Clear this pixel from the exec mask if the operand is negative
226 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
227 AMDGPU::VCC)
228 .addReg(AMDGPU::SREG_LIT_0)
229 .addOperand(MI->getOperand(0));
230
231 // If the exec mask is non-zero, skip the next two instructions
232 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ))
233 .addImm(3)
234 .addReg(AMDGPU::EXEC);
235
236 // Exec mask is zero: Export to NULL target...
237 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::EXP))
238 .addImm(0)
239 .addImm(0x09) // V_008DFC_SQ_EXP_NULL
240 .addImm(0)
241 .addImm(1)
242 .addImm(1)
243 .addReg(AMDGPU::SREG_LIT_0)
244 .addReg(AMDGPU::SREG_LIT_0)
245 .addReg(AMDGPU::SREG_LIT_0)
246 .addReg(AMDGPU::SREG_LIT_0);
247
248 // ... and terminate wavefront
249 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
250
251 MI->eraseFromParent();
252}
253
254void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
255 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
256{
257 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_GT_F32_e32),
258 AMDGPU::VCC)
259 .addReg(AMDGPU::SREG_LIT_0)
260 .addOperand(MI->getOperand(1));
261
262 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32))
263 .addOperand(MI->getOperand(0))
264 .addOperand(MI->getOperand(3))
265 .addOperand(MI->getOperand(2))
266 .addReg(AMDGPU::VCC);
267
268 MI->eraseFromParent();
269}
270
271EVT SITargetLowering::getSetCCResultType(EVT VT) const
272{
273 return MVT::i1;
274}
275
276//===----------------------------------------------------------------------===//
277// Custom DAG Lowering Operations
278//===----------------------------------------------------------------------===//
279
280SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
281{
282 switch (Op.getOpcode()) {
283 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
284 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
285 case ISD::LOAD: return LowerLOAD(Op, DAG);
286 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
287 case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
288 case ISD::INTRINSIC_WO_CHAIN: {
289 unsigned IntrinsicID =
290 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
291 EVT VT = Op.getValueType();
292 switch (IntrinsicID) {
293 case AMDGPUIntrinsic::SI_vs_load_buffer_index:
294 return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
295 AMDGPU::VGPR0, VT);
296 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
297 }
298 break;
299 }
300 }
301 return SDValue();
302}
303
304/// Loweri1ContextSwitch - The function is for lowering i1 operations on the
305/// VCC register. In the VALU context, VCC is a one bit register, but in the
306/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
307/// the SALU can perform operations on the VCC register, we need to promote
308/// the operand types from i1 to i64 in order for tablegen to be able to match
309/// this operation to the correct SALU instruction. We do this promotion by
310/// wrapping the operands in a CopyToReg node.
311///
312SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
313 SelectionDAG &DAG,
314 unsigned VCCNode) const
315{
316 DebugLoc DL = Op.getDebugLoc();
317
318 SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64,
319 DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
320 Op.getOperand(0)),
321 DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
322 Op.getOperand(1)));
323
324 return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
325}
326
327SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
328{
329 SDValue Chain = Op.getOperand(0);
330 SDValue CC = Op.getOperand(1);
331 SDValue LHS = Op.getOperand(2);
332 SDValue RHS = Op.getOperand(3);
333 SDValue JumpT = Op.getOperand(4);
334 SDValue CmpValue;
335 SDValue Result;
336 CmpValue = DAG.getNode(
337 ISD::SETCC,
338 Op.getDebugLoc(),
339 MVT::i1,
340 LHS, RHS,
341 CC);
342
343 Result = DAG.getNode(
344 AMDGPUISD::BRANCH_COND,
345 CmpValue.getDebugLoc(),
346 MVT::Other, Chain,
347 JumpT, CmpValue);
348 return Result;
349}
350
351SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
352{
353 EVT VT = Op.getValueType();
354 LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
355
356 assert(Ptr);
357
358 unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
359
360 // We only need to lower USER_SGPR address space loads
361 if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
362 return SDValue();
363 }
364
365 // Loads from the USER_SGPR address space can only have constant value
366 // pointers.
367 ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
368 assert(BasePtr);
369
370 unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
371 const TargetRegisterClass * dstClass;
372 switch (TypeDwordWidth) {
373 default:
374 assert(!"USER_SGPR value size not implemented");
375 return SDValue();
376 case 1:
377 dstClass = &AMDGPU::SReg_32RegClass;
378 break;
379 case 2:
380 dstClass = &AMDGPU::SReg_64RegClass;
381 break;
382 }
383 uint64_t Index = BasePtr->getZExtValue();
384 assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
385 unsigned SGPRIndex = Index / TypeDwordWidth;
386 unsigned Reg = dstClass->getRegister(SGPRIndex);
387
388 DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
389 VT));
390 return SDValue();
391}
392
393SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
394{
395 SDValue LHS = Op.getOperand(0);
396 SDValue RHS = Op.getOperand(1);
397 SDValue True = Op.getOperand(2);
398 SDValue False = Op.getOperand(3);
399 SDValue CC = Op.getOperand(4);
400 EVT VT = Op.getValueType();
401 DebugLoc DL = Op.getDebugLoc();
402
403 SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
404 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
405}
406
407//===----------------------------------------------------------------------===//
408// Custom DAG optimizations
409//===----------------------------------------------------------------------===//
410
411SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
412 DAGCombinerInfo &DCI) const {
413 SelectionDAG &DAG = DCI.DAG;
414 DebugLoc DL = N->getDebugLoc();
415 EVT VT = N->getValueType(0);
416
417 switch (N->getOpcode()) {
418 default: break;
419 case ISD::SELECT_CC: {
420 N->dump();
421 ConstantSDNode *True, *False;
422 // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
423 if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
424 && (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
425 && True->isAllOnesValue()
426 && False->isNullValue()
427 && VT == MVT::i1) {
428 return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
429 N->getOperand(1), N->getOperand(4));
430
431 }
432 break;
433 }
434 case ISD::SETCC: {
435 SDValue Arg0 = N->getOperand(0);
436 SDValue Arg1 = N->getOperand(1);
437 SDValue CC = N->getOperand(2);
438 ConstantSDNode * C = NULL;
439 ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
440
441 // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
442 if (VT == MVT::i1
443 && Arg0.getOpcode() == ISD::SIGN_EXTEND
444 && Arg0.getOperand(0).getValueType() == MVT::i1
445 && (C = dyn_cast<ConstantSDNode>(Arg1))
446 && C->isNullValue()
447 && CCOp == ISD::SETNE) {
448 return SimplifySetCC(VT, Arg0.getOperand(0),
449 DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
450 }
451 break;
452 }
453 }
454 return SDValue();
455}
456
457#define NODE_NAME_CASE(node) case SIISD::node: return #node;
458
459const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const
460{
461 switch (Opcode) {
462 default: return AMDGPUTargetLowering::getTargetNodeName(Opcode);
463 NODE_NAME_CASE(VCC_AND)
464 NODE_NAME_CASE(VCC_BITCAST)
465 }
466}
diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h
deleted file mode 100644
index 4407bf04667..00000000000
--- a/src/gallium/drivers/radeon/SIISelLowering.h
+++ /dev/null
@@ -1,63 +0,0 @@
1//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// SI DAG Lowering interface definition
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef SIISELLOWERING_H
15#define SIISELLOWERING_H
16
17#include "AMDGPUISelLowering.h"
18#include "SIInstrInfo.h"
19
20namespace llvm {
21
22class SITargetLowering : public AMDGPUTargetLowering
23{
24 const SIInstrInfo * TII;
25
26 /// AppendS_WAITCNT - Memory reads and writes are syncronized using the
27 /// S_WAITCNT instruction. This function takes the most conservative
28 /// approach and inserts an S_WAITCNT instruction after every read and
29 /// write.
30 void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
31 MachineBasicBlock::iterator I) const;
32 void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
33 MachineBasicBlock::iterator I, unsigned Opocde) const;
34 void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
35 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
36 void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
37 MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
38 void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
39 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
40 void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
41 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
42 void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
43 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
44
45 SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
46 unsigned VCCNode) const;
47 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
48 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
49 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
50
51public:
52 SITargetLowering(TargetMachine &tm);
53 virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
54 MachineBasicBlock * BB) const;
55 virtual EVT getSetCCResultType(EVT VT) const;
56 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
57 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
58 virtual const char* getTargetNodeName(unsigned Opcode) const;
59};
60
61} // End namespace llvm
62
63#endif //SIISELLOWERING_H
diff --git a/src/gallium/drivers/radeon/SIInstrFormats.td b/src/gallium/drivers/radeon/SIInstrFormats.td
deleted file mode 100644
index 8f56e21f5a6..00000000000
--- a/src/gallium/drivers/radeon/SIInstrFormats.td
+++ /dev/null
@@ -1,131 +0,0 @@
1//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// SI Instruction format definitions.
11//
12// Instructions with _32 take 32-bit operands.
13// Instructions with _64 take 64-bit operands.
14//
15// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
16// encoding is the standard encoding, but instruction that make use of
17// any of the instruction modifiers must use the 64-bit encoding.
18//
19// Instructions with _e32 use the 32-bit encoding.
20// Instructions with _e64 use the 64-bit encoding.
21//
22//===----------------------------------------------------------------------===//
23
24
25class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
26 : VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
27
28class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
29 : VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, AllReg_64:$src1, AllReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
30
31
32class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
33 : SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
34
35class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
36 : SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
37
38class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
39 : SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
40
41class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
42 : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
43
44class SOP2_VCC <bits<7> op, string opName, list<dag> pattern>
45 : SOP2 <op, (outs VCCReg:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
46
47class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
48 string opName, list<dag> pattern> :
49 VOP1 <
50 op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
51 >;
52
53multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
54 def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
55 def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
56 opName, []
57 >;
58}
59
60multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
61
62 def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
63
64 def _e64 : VOP3_64 <
65 {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
66 opName, []
67 >;
68}
69
70class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
71 string opName, list<dag> pattern> :
72 VOP2 <
73 op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
74 >;
75
76multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
77
78 def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
79
80 def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
81 opName, []
82 >;
83}
84
85multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
86 def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
87
88 def _e64 : VOP3_64 <
89 {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
90 opName, []
91 >;
92}
93
94class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
95 : SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
96
97class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
98 : SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
99
100class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
101 string opName, list<dag> pattern> :
102 VOPC <
103 op, (ins arc:$src0, vrc:$src1), opName, pattern
104 >;
105
106multiclass VOPC_32 <bits<8> op, string opName, list<dag> pattern> {
107
108 def _e32 : VOPC_Helper <op, VReg_32, AllReg_32, opName, pattern>;
109
110 def _e64 : VOP3_32 <
111 {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
112 opName, []
113 >;
114}
115
116multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
117
118 def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
119
120 def _e64 : VOP3_64 <
121 {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
122 opName, []
123 >;
124}
125
126class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
127 : SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
128
129class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
130 : SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
131
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp
deleted file mode 100644
index 42fef6ba468..00000000000
--- a/src/gallium/drivers/radeon/SIInstrInfo.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
1//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// SI Implementation of TargetInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
14
15#include "SIInstrInfo.h"
16#include "AMDGPUTargetMachine.h"
17#include "llvm/CodeGen/MachineInstrBuilder.h"
18#include "llvm/CodeGen/MachineRegisterInfo.h"
19#include "llvm/MC/MCInstrDesc.h"
20
21#include <stdio.h>
22
23using namespace llvm;
24
25SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
26 : AMDGPUInstrInfo(tm),
27 RI(tm, *this),
28 TM(tm)
29 { }
30
31const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const
32{
33 return RI;
34}
35
36void
37SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
38 MachineBasicBlock::iterator MI, DebugLoc DL,
39 unsigned DestReg, unsigned SrcReg,
40 bool KillSrc) const
41{
42
43 // If we are trying to copy to or from SCC, there is a bug somewhere else in
44 // the backend. While it may be theoretically possible to do this, it should
45 // never be necessary.
46 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
47
48 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
49 .addReg(SrcReg, getKillRegState(KillSrc));
50}
51
52MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
53 int64_t Imm) const
54{
55 MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
56 MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
57 MachineInstrBuilder(MI).addImm(Imm);
58
59 return MI;
60
61}
62
63bool SIInstrInfo::isMov(unsigned Opcode) const
64{
65 switch(Opcode) {
66 default: return false;
67 case AMDGPU::S_MOV_B32:
68 case AMDGPU::S_MOV_B64:
69 case AMDGPU::V_MOV_B32_e32:
70 case AMDGPU::V_MOV_B32_e64:
71 case AMDGPU::V_MOV_IMM_F32:
72 case AMDGPU::V_MOV_IMM_I32:
73 case AMDGPU::S_MOV_IMM_I32:
74 return true;
75 }
76}
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h
deleted file mode 100644
index 5915a00f779..00000000000
--- a/src/gallium/drivers/radeon/SIInstrInfo.h
+++ /dev/null
@@ -1,62 +0,0 @@
1//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Interface definition for SIInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
14
15#ifndef SIINSTRINFO_H
16#define SIINSTRINFO_H
17
18#include "AMDGPUInstrInfo.h"
19#include "SIRegisterInfo.h"
20
21namespace llvm {
22
23class SIInstrInfo : public AMDGPUInstrInfo {
24private:
25 const SIRegisterInfo RI;
26 AMDGPUTargetMachine &TM;
27
28public:
29 explicit SIInstrInfo(AMDGPUTargetMachine &tm);
30
31 const SIRegisterInfo &getRegisterInfo() const;
32
33 virtual void copyPhysReg(MachineBasicBlock &MBB,
34 MachineBasicBlock::iterator MI, DebugLoc DL,
35 unsigned DestReg, unsigned SrcReg,
36 bool KillSrc) const;
37
38 /// getEncodingType - Returns the encoding type of this instruction.
39 unsigned getEncodingType(const MachineInstr &MI) const;
40
41 /// getEncodingBytes - Returns the size of this instructions encoding in
42 /// number of bytes.
43 unsigned getEncodingBytes(const MachineInstr &MI) const;
44
45 virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
46 int64_t Imm) const;
47
48 virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
49 virtual bool isMov(unsigned Opcode) const;
50
51 };
52
53} // End namespace llvm
54
55namespace SIInstrFlags {
56 enum Flags {
57 // First 4 bits are the instruction encoding
58 NEED_WAIT = 1 << 4
59 };
60}
61
62#endif //SIINSTRINFO_H
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td
deleted file mode 100644
index 49ef342a154..00000000000
--- a/src/gallium/drivers/radeon/SIInstrInfo.td
+++ /dev/null
@@ -1,506 +0,0 @@
1//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10//===----------------------------------------------------------------------===//
11// SI DAG Profiles
12//===----------------------------------------------------------------------===//
13def SDTVCCBinaryOp : SDTypeProfile<1, 2, [
14 SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
15]>;
16
17//===----------------------------------------------------------------------===//
18// SI DAG Nodes
19//===----------------------------------------------------------------------===//
20
21// and operation on 64-bit wide vcc
22def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
23 [SDNPCommutative, SDNPAssociative]
24>;
25
26// Special bitcast node for sharing VCC register between VALU and SALU
27def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST",
28 SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
29>;
30
31class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
32 AMDGPUInst<outs, ins, asm, pattern> {
33
34 field bits<4> EncodingType = 0;
35 field bits<1> NeedWait = 0;
36
37 let TSFlags{3-0} = EncodingType;
38 let TSFlags{4} = NeedWait;
39
40}
41
42class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
43 InstSI <outs, ins, asm, pattern> {
44
45 field bits<32> Inst;
46}
47
48class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
49 InstSI <outs, ins, asm, pattern> {
50
51 field bits<64> Inst;
52}
53
54class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
55 let EncoderMethod = "encodeOperand";
56 let MIOperandInfo = opInfo;
57}
58
59def IMM16bit : ImmLeaf <
60 i16,
61 [{return isInt<16>(Imm);}]
62>;
63
64def IMM8bit : ImmLeaf <
65 i32,
66 [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
67>;
68
69def IMM12bit : ImmLeaf <
70 i16,
71 [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
72>;
73
74def IMM32bitIn64bit : ImmLeaf <
75 i64,
76 [{return isInt<32>(Imm);}]
77>;
78
79class GPR4Align <RegisterClass rc> : Operand <vAny> {
80 let EncoderMethod = "GPR4AlignEncode";
81 let MIOperandInfo = (ops rc:$reg);
82}
83
84class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
85 let EncoderMethod = "GPR2AlignEncode";
86 let MIOperandInfo = (ops rc:$reg);
87}
88
89def SMRDmemrr : Operand<iPTR> {
90 let MIOperandInfo = (ops SReg_64, SReg_32);
91 let EncoderMethod = "GPR2AlignEncode";
92}
93
94def SMRDmemri : Operand<iPTR> {
95 let MIOperandInfo = (ops SReg_64, i32imm);
96 let EncoderMethod = "SMRDmemriEncode";
97}
98
99def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>;
100def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>;
101
102let Uses = [EXEC] in {
103def EXP : Enc64<
104 (outs),
105 (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
106 VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
107 "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
108 [] > {
109
110 bits<4> EN;
111 bits<6> TGT;
112 bits<1> COMPR;
113 bits<1> DONE;
114 bits<1> VM;
115 bits<8> VSRC0;
116 bits<8> VSRC1;
117 bits<8> VSRC2;
118 bits<8> VSRC3;
119
120 let Inst{3-0} = EN;
121 let Inst{9-4} = TGT;
122 let Inst{10} = COMPR;
123 let Inst{11} = DONE;
124 let Inst{12} = VM;
125 let Inst{31-26} = 0x3e;
126 let Inst{39-32} = VSRC0;
127 let Inst{47-40} = VSRC1;
128 let Inst{55-48} = VSRC2;
129 let Inst{63-56} = VSRC3;
130 let EncodingType = 0; //SIInstrEncodingType::EXP
131
132 let NeedWait = 1;
133 let usesCustomInserter = 1;
134}
135
136class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
137 Enc64 <outs, ins, asm, pattern> {
138
139 bits<8> VDATA;
140 bits<4> DMASK;
141 bits<1> UNORM;
142 bits<1> GLC;
143 bits<1> DA;
144 bits<1> R128;
145 bits<1> TFE;
146 bits<1> LWE;
147 bits<1> SLC;
148 bits<8> VADDR;
149 bits<5> SRSRC;
150 bits<5> SSAMP;
151
152 let Inst{11-8} = DMASK;
153 let Inst{12} = UNORM;
154 let Inst{13} = GLC;
155 let Inst{14} = DA;
156 let Inst{15} = R128;
157 let Inst{16} = TFE;
158 let Inst{17} = LWE;
159 let Inst{24-18} = op;
160 let Inst{25} = SLC;
161 let Inst{31-26} = 0x3c;
162 let Inst{39-32} = VADDR;
163 let Inst{47-40} = VDATA;
164 let Inst{52-48} = SRSRC;
165 let Inst{57-53} = SSAMP;
166
167 let EncodingType = 2; //SIInstrEncodingType::MIMG
168
169 let NeedWait = 1;
170 let usesCustomInserter = 1;
171}
172
173class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
174 Enc64<outs, ins, asm, pattern> {
175
176 bits<8> VDATA;
177 bits<12> OFFSET;
178 bits<1> OFFEN;
179 bits<1> IDXEN;
180 bits<1> GLC;
181 bits<1> ADDR64;
182 bits<4> DFMT;
183 bits<3> NFMT;
184 bits<8> VADDR;
185 bits<5> SRSRC;
186 bits<1> SLC;
187 bits<1> TFE;
188 bits<8> SOFFSET;
189
190 let Inst{11-0} = OFFSET;
191 let Inst{12} = OFFEN;
192 let Inst{13} = IDXEN;
193 let Inst{14} = GLC;
194 let Inst{15} = ADDR64;
195 let Inst{18-16} = op;
196 let Inst{22-19} = DFMT;
197 let Inst{25-23} = NFMT;
198 let Inst{31-26} = 0x3a; //encoding
199 let Inst{39-32} = VADDR;
200 let Inst{47-40} = VDATA;
201 let Inst{52-48} = SRSRC;
202 let Inst{54} = SLC;
203 let Inst{55} = TFE;
204 let Inst{63-56} = SOFFSET;
205 let EncodingType = 3; //SIInstrEncodingType::MTBUF
206
207 let NeedWait = 1;
208 let usesCustomInserter = 1;
209 let neverHasSideEffects = 1;
210}
211
212class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
213 Enc64<outs, ins, asm, pattern> {
214
215 bits<8> VDATA;
216 bits<12> OFFSET;
217 bits<1> OFFEN;
218 bits<1> IDXEN;
219 bits<1> GLC;
220 bits<1> ADDR64;
221 bits<1> LDS;
222 bits<8> VADDR;
223 bits<5> SRSRC;
224 bits<1> SLC;
225 bits<1> TFE;
226 bits<8> SOFFSET;
227
228 let Inst{11-0} = OFFSET;
229 let Inst{12} = OFFEN;
230 let Inst{13} = IDXEN;
231 let Inst{14} = GLC;
232 let Inst{15} = ADDR64;
233 let Inst{16} = LDS;
234 let Inst{24-18} = op;
235 let Inst{31-26} = 0x38; //encoding
236 let Inst{39-32} = VADDR;
237 let Inst{47-40} = VDATA;
238 let Inst{52-48} = SRSRC;
239 let Inst{54} = SLC;
240 let Inst{55} = TFE;
241 let Inst{63-56} = SOFFSET;
242 let EncodingType = 4; //SIInstrEncodingType::MUBUF
243
244 let NeedWait = 1;
245 let usesCustomInserter = 1;
246 let neverHasSideEffects = 1;
247}
248} // End Uses = [EXEC]
249
250class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
251 Enc32<outs, ins, asm, pattern> {
252
253 bits<7> SDST;
254 bits<15> PTR;
255 bits<8> OFFSET = PTR{7-0};
256 bits<1> IMM = PTR{8};
257 bits<6> SBASE = PTR{14-9};
258
259 let Inst{7-0} = OFFSET;
260 let Inst{8} = IMM;
261 let Inst{14-9} = SBASE;
262 let Inst{21-15} = SDST;
263 let Inst{26-22} = op;
264 let Inst{31-27} = 0x18; //encoding
265 let EncodingType = 5; //SIInstrEncodingType::SMRD
266
267 let NeedWait = 1;
268 let usesCustomInserter = 1;
269}
270
271class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
272 Enc32<outs, ins, asm, pattern> {
273
274 bits<7> SDST;
275 bits<8> SSRC0;
276
277 let Inst{7-0} = SSRC0;
278 let Inst{15-8} = op;
279 let Inst{22-16} = SDST;
280 let Inst{31-23} = 0x17d; //encoding;
281 let EncodingType = 6; //SIInstrEncodingType::SOP1
282}
283
284class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
285 Enc32 <outs, ins, asm, pattern> {
286
287 bits<7> SDST;
288 bits<8> SSRC0;
289 bits<8> SSRC1;
290
291 let Inst{7-0} = SSRC0;
292 let Inst{15-8} = SSRC1;
293 let Inst{22-16} = SDST;
294 let Inst{29-23} = op;
295 let Inst{31-30} = 0x2; // encoding
296 let EncodingType = 7; // SIInstrEncodingType::SOP2
297}
298
299class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
300 Enc32<outs, ins, asm, pattern> {
301
302 bits<8> SSRC0;
303 bits<8> SSRC1;
304
305 let Inst{7-0} = SSRC0;
306 let Inst{15-8} = SSRC1;
307 let Inst{22-16} = op;
308 let Inst{31-23} = 0x17e;
309 let EncodingType = 8; // SIInstrEncodingType::SOPC
310
311 let DisableEncoding = "$dst";
312}
313
314class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
315 Enc32 <outs, ins , asm, pattern> {
316
317 bits <7> SDST;
318 bits <16> SIMM16;
319
320 let Inst{15-0} = SIMM16;
321 let Inst{22-16} = SDST;
322 let Inst{27-23} = op;
323 let Inst{31-28} = 0xb; //encoding
324 let EncodingType = 9; // SIInstrEncodingType::SOPK
325}
326
327class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
328 (outs),
329 ins,
330 asm,
331 pattern > {
332
333 bits <16> SIMM16;
334
335 let Inst{15-0} = SIMM16;
336 let Inst{22-16} = op;
337 let Inst{31-23} = 0x17f; // encoding
338 let EncodingType = 10; // SIInstrEncodingType::SOPP
339}
340
341
342let Uses = [EXEC] in {
343class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
344 Enc32 <outs, ins, asm, pattern> {
345
346 bits<8> VDST;
347 bits<8> VSRC;
348 bits<2> ATTRCHAN;
349 bits<6> ATTR;
350
351 let Inst{7-0} = VSRC;
352 let Inst{9-8} = ATTRCHAN;
353 let Inst{15-10} = ATTR;
354 let Inst{17-16} = op;
355 let Inst{25-18} = VDST;
356 let Inst{31-26} = 0x32; // encoding
357 let EncodingType = 11; // SIInstrEncodingType::VINTRP
358
359 let neverHasSideEffects = 1;
360}
361
362class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
363 Enc32 <outs, ins, asm, pattern> {
364
365 bits<8> VDST;
366 bits<9> SRC0;
367
368 let Inst{8-0} = SRC0;
369 let Inst{16-9} = op;
370 let Inst{24-17} = VDST;
371 let Inst{31-25} = 0x3f; //encoding
372
373 let EncodingType = 12; // SIInstrEncodingType::VOP1
374 let PostEncoderMethod = "VOPPostEncode";
375}
376
377class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
378 Enc32 <outs, ins, asm, pattern> {
379
380 bits<8> VDST;
381 bits<9> SRC0;
382 bits<8> VSRC1;
383
384 let Inst{8-0} = SRC0;
385 let Inst{16-9} = VSRC1;
386 let Inst{24-17} = VDST;
387 let Inst{30-25} = op;
388 let Inst{31} = 0x0; //encoding
389
390 let EncodingType = 13; // SIInstrEncodingType::VOP2
391 let PostEncoderMethod = "VOPPostEncode";
392}
393
394class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
395 Enc64 <outs, ins, asm, pattern> {
396
397 bits<8> VDST;
398 bits<9> SRC0;
399 bits<9> SRC1;
400 bits<9> SRC2;
401 bits<3> ABS;
402 bits<1> CLAMP;
403 bits<2> OMOD;
404 bits<3> NEG;
405
406 let Inst{7-0} = VDST;
407 let Inst{10-8} = ABS;
408 let Inst{11} = CLAMP;
409 let Inst{25-17} = op;
410 let Inst{31-26} = 0x34; //encoding
411 let Inst{40-32} = SRC0;
412 let Inst{49-41} = SRC1;
413 let Inst{58-50} = SRC2;
414 let Inst{60-59} = OMOD;
415 let Inst{63-61} = NEG;
416
417 let EncodingType = 14; // SIInstrEncodingType::VOP3
418 let PostEncoderMethod = "VOPPostEncode";
419}
420
421class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
422 Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
423
424 bits<9> SRC0;
425 bits<8> VSRC1;
426
427 let Inst{8-0} = SRC0;
428 let Inst{16-9} = VSRC1;
429 let Inst{24-17} = op;
430 let Inst{31-25} = 0x3e;
431
432 let EncodingType = 15; //SIInstrEncodingType::VOPC
433 let PostEncoderMethod = "VOPPostEncode";
434 let DisableEncoding = "$dst";
435}
436} // End Uses = [EXEC]
437
438class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
439 op,
440 (outs VReg_128:$vdata),
441 (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
442 i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr,
443 GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
444 asm,
445 []
446>;
447
448class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
449 op,
450 (outs regClass:$dst),
451 (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
452 i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
453 i1imm:$tfe, SReg_32:$soffset),
454 asm,
455 []> {
456 let mayLoad = 1;
457}
458
459class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
460 op,
461 (outs regClass:$dst),
462 (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
463 i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
464 i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
465 asm,
466 []> {
467 let mayLoad = 1;
468}
469
470class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
471 op,
472 (outs),
473 (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
474 i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
475 GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
476 asm,
477 []> {
478 let mayStore = 1;
479}
480
481multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass,
482 ValueType vt> {
483 def _IMM : SMRD <
484 op,
485 (outs dstClass:$dst),
486 (ins SMRDmemri:$src0),
487 asm,
488 [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))]
489 >;
490
491 def _SGPR : SMRD <
492 op,
493 (outs dstClass:$dst),
494 (ins SMRDmemrr:$src0),
495 asm,
496 [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))]
497 >;
498}
499
500multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> {
501 defm _F32 : SMRD_Helper <op, asm, dstClass, f32>;
502 defm _I32 : SMRD_Helper <op, asm, dstClass, i32>;
503}
504
505include "SIInstrFormats.td"
506include "SIInstructions.td"
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td
deleted file mode 100644
index f9bdc63e3e5..00000000000
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ /dev/null
@@ -1,1180 +0,0 @@
1//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10def isSI : Predicate<"Subtarget.device()"
11 "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">;
12
13let Predicates = [isSI] in {
14
15let neverHasSideEffects = 1 in {
16def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
17def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
18def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
19def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
20def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>;
21def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
22def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
23def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
24def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
25def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
26} // End neverHasSideEffects = 1
27////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
28////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
29////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
30////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>;
31////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
32////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
33////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
34////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
35//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
36//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
37def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
38//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
39//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
40//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
41////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
42////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
43////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
44////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
45def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>;
46def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
47def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
48def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
49def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
50def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
51def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
52////def S_ANDN2_SAVEEXEC_B64 : SOP1_ANDN2 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
53////def S_ORN2_SAVEEXEC_B64 : SOP1_ORN2 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
54def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
55def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
56def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
57def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
58def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
59def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
60def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
61def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
62def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
63//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
64def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
65def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
66def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
67def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
68def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
69
70/*
71This instruction is disabled for now until we can figure out how to teach
72the instruction selector to correctly use the S_CMP* vs V_CMP*
73instructions.
74
75When this instruction is enabled the code generator sometimes produces this
76invalid sequence:
77
78SCC = S_CMPK_EQ_I32 SGPR0, imm
79VCC = COPY SCC
80VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
81
82def S_CMPK_EQ_I32 : SOPK <
83 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
84 "S_CMPK_EQ_I32",
85 [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
86>;
87*/
88
89def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
90def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
91def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
92def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
93def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
94def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
95def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
96def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
97def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
98def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
99def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
100def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
101def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
102//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
103def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
104def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
105def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
106//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
107//def EXP : EXP_ <0x00000000, "EXP", []>;
108
109defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>;
110defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32",
111 [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT))]
112>;
113defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32",
114 [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ))]
115>;
116defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32",
117 [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE))]
118>;
119defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32",
120 [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT))]
121>;
122defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32",
123 [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE))]
124>;
125defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32",
126 [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE))]
127>;
128defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>;
129defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>;
130defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>;
131defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>;
132defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>;
133defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>;
134defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32",
135 [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE))]
136>;
137defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>;
138defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>;
139defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>;
140defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>;
141defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>;
142defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>;
143defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>;
144defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>;
145defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>;
146defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>;
147defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>;
148defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>;
149defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>;
150defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>;
151defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>;
152defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>;
153defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>;
154defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>;
155defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>;
156defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>;
157defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>;
158defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>;
159defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>;
160defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>;
161defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>;
162defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>;
163defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>;
164defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>;
165defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>;
166defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>;
167defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>;
168defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>;
169defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>;
170defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>;
171defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>;
172defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>;
173defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>;
174defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>;
175defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>;
176defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>;
177defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>;
178defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>;
179defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>;
180defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>;
181defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>;
182defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>;
183defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>;
184defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>;
185defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>;
186defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>;
187defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>;
188defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>;
189defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>;
190defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>;
191defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>;
192defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>;
193defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>;
194defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>;
195defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>;
196defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>;
197defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>;
198defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>;
199defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>;
200defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>;
201defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>;
202defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>;
203defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>;
204defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>;
205defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>;
206defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>;
207defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>;
208defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>;
209defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>;
210defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>;
211defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>;
212defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>;
213defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>;
214defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>;
215defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>;
216defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>;
217defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>;
218defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>;
219defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>;
220defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>;
221defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>;
222defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>;
223defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>;
224defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>;
225defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>;
226defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>;
227defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>;
228defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>;
229defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>;
230defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>;
231defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>;
232defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>;
233defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>;
234defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>;
235defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>;
236defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>;
237defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>;
238defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>;
239defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>;
240defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>;
241defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>;
242defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>;
243defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>;
244defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>;
245defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>;
246defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>;
247defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>;
248defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>;
249defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>;
250defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>;
251defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>;
252defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32",
253 [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETLT))]
254>;
255defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32",
256 [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETEQ))]
257>;
258defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32",
259 [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETLE))]
260>;
261defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32",
262 [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETGT))]
263>;
264defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32",
265 [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETNE))]
266>;
267defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32",
268 [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETGE))]
269>;
270defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>;
271defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>;
272defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>;
273defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>;
274defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>;
275defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>;
276defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>;
277defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>;
278defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>;
279defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>;
280defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>;
281defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>;
282defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>;
283defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>;
284defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>;
285defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>;
286defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>;
287defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>;
288defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>;
289defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>;
290defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>;
291defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>;
292defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>;
293defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>;
294defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>;
295defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>;
296defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>;
297defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>;
298defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>;
299defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>;
300defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>;
301defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>;
302defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>;
303defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>;
304defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>;
305defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>;
306defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>;
307defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>;
308defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>;
309defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>;
310defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>;
311defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>;
312defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>;
313defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>;
314defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>;
315defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>;
316defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>;
317defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>;
318defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>;
319defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>;
320defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>;
321defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>;
322defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>;
323defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>;
324defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>;
325defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>;
326defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>;
327defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>;
328defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>;
329defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>;
330defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>;
331//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
332//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
333//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
334def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
335//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
336//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
337//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
338//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
339//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>;
340//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
341//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
342//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
343//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>;
344//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>;
345//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>;
346//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
347//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
348//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
349//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
350//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
351//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
352//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
353//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
354//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
355//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
356//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
357//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
358//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>;
359//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>;
360//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>;
361//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
362//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
363//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
364//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
365//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
366//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
367//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
368//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
369//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
370//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
371//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
372//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
373//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
374//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
375//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
376//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
377//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
378//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
379//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
380//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
381//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
382//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
383//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
384//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
385//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
386//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
387//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
388//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
389//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
390def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
391//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
392//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
393//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
394//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
395
396defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>;
397
398//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>;
399defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>;
400defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>;
401//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>;
402//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>;
403//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>;
404//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>;
405//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>;
406//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>;
407
408//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
409//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
410//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
411//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>;
412//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
413//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
414//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
415//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
416//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
417//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
418//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
419//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
420//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>;
421//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
422//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
423//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
424//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
425//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
426//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
427//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
428//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
429//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
430//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
431//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
432//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
433//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
434//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
435//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
436//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
437//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
438def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">;
439//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
440//def IMAGE_SAMPLE_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_D", 0x00000022>;
441//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
442//def IMAGE_SAMPLE_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_L", 0x00000024>;
443//def IMAGE_SAMPLE_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_B", 0x00000025>;
444//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
445//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
446//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>;
447//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
448//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
449//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
450//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>;
451//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>;
452//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
453//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
454//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
455//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>;
456//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>;
457//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>;
458//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>;
459//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>;
460//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>;
461//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>;
462//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>;
463//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>;
464//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>;
465//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>;
466//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>;
467//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
468//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
469//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
470//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
471//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
472//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
473//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
474//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
475//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
476//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
477//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
478//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
479//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
480//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
481//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
482//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
483//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
484//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
485//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
486//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
487//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
488//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
489//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
490//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
491//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
492//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
493//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
494//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
495//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
496//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
497//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
498//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>;
499//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>;
500//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>;
501//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>;
502//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>;
503//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
504//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
505//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
506
507let neverHasSideEffects = 1 in {
508defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
509} // End neverHasSideEffects
510defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
511//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
512//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
513defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
514 [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))]
515>;
516//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
517//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
518defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
519 [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))]
520>;
521defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
522////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
523//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
524//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
525//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
526//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
527//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>;
528//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>;
529//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
530//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
531//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
532//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
533//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
534//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
535defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
536 [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))]
537>;
538defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
539defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>;
540defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
541 [(set VReg_32:$dst, (frint AllReg_32:$src0))]
542>;
543defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
544 [(set VReg_32:$dst, (ffloor AllReg_32:$src0))]
545>;
546defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
547 [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))]
548>;
549defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
550defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>;
551defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
552defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
553defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
554 [(set VReg_32:$dst, (int_AMDGPU_rcp AllReg_32:$src0))]
555>;
556defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
557defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
558defm V_RSQ_LEGACY_F32 : VOP1_32 <
559 0x0000002d, "V_RSQ_LEGACY_F32",
560 [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))]
561>;
562defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
563defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
564defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
565defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
566defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
567defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>;
568defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>;
569defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
570defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
571defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
572defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
573defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
574defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
575defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
576//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
577defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
578defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
579//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
580defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
581//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
582defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
583defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
584defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
585
586def V_INTERP_P1_F32 : VINTRP <
587 0x00000000,
588 (outs VReg_32:$dst),
589 (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
590 "V_INTERP_P1_F32",
591 []> {
592 let DisableEncoding = "$m0";
593}
594
595def V_INTERP_P2_F32 : VINTRP <
596 0x00000001,
597 (outs VReg_32:$dst),
598 (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
599 "V_INTERP_P2_F32",
600 []> {
601
602 let Constraints = "$src0 = $dst";
603 let DisableEncoding = "$src0,$m0";
604
605}
606
607def V_INTERP_MOV_F32 : VINTRP <
608 0x00000002,
609 (outs VReg_32:$dst),
610 (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
611 "V_INTERP_MOV_F32",
612 []> {
613 let VSRC = 0;
614 let DisableEncoding = "$m0";
615}
616
617//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
618
619let isTerminator = 1 in {
620
621def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
622 [(IL_retflag)]> {
623 let SIMM16 = 0;
624 let isBarrier = 1;
625 let hasCtrlDep = 1;
626}
627
628let isBranch = 1 in {
629def S_BRANCH : SOPP <
630 0x00000002, (ins brtarget:$target), "S_BRANCH",
631 []
632>;
633
634let DisableEncoding = "$scc" in {
635def S_CBRANCH_SCC0 : SOPP <
636 0x00000004, (ins brtarget:$target, SCCReg:$scc),
637 "S_CBRANCH_SCC0", []
638>;
639def S_CBRANCH_SCC1 : SOPP <
640 0x00000005, (ins brtarget:$target, SCCReg:$scc),
641 "S_CBRANCH_SCC1",
642 []
643>;
644} // End DisableEncoding = "$scc"
645
646def S_CBRANCH_VCCZ : SOPP <
647 0x00000006, (ins brtarget:$target, VCCReg:$vcc),
648 "S_CBRANCH_VCCZ",
649 []
650>;
651def S_CBRANCH_VCCNZ : SOPP <
652 0x00000007, (ins brtarget:$target, VCCReg:$vcc),
653 "S_CBRANCH_VCCNZ",
654 []
655>;
656
657let DisableEncoding = "$exec" in {
658def S_CBRANCH_EXECZ : SOPP <
659 0x00000008, (ins brtarget:$target, EXECReg:$exec),
660 "S_CBRANCH_EXECZ",
661 []
662>;
663def S_CBRANCH_EXECNZ : SOPP <
664 0x00000009, (ins brtarget:$target, EXECReg:$exec),
665 "S_CBRANCH_EXECNZ",
666 []
667>;
668} // End DisableEncoding = "$exec"
669
670
671} // End isBranch = 1
672} // End isTerminator = 1
673
674//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>;
675def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
676 []
677>;
678//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
679//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
680//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
681//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
682//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
683//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
684//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
685//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
686//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
687//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
688
689/* XXX: No VOP3 version of this instruction yet */
690def V_CNDMASK_B32 : VOP2 <0x00000000, (outs VReg_32:$dst),
691 (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32",
692 [(set (i32 VReg_32:$dst),
693 (select VCCReg:$vcc, VReg_32:$src1, AllReg_32:$src0))] > {
694
695 let DisableEncoding = "$vcc";
696}
697
698//f32 pattern for V_CNDMASK_B32
699def : Pat <
700 (f32 (select VCCReg:$vcc, VReg_32:$src0, AllReg_32:$src1)),
701 (V_CNDMASK_B32 AllReg_32:$src1, VReg_32:$src0, VCCReg:$vcc)
702>;
703
704defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
705defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
706
707defm V_ADD_F32 : VOP2_32 <
708 0x00000003, "V_ADD_F32",
709 [(set VReg_32:$dst, (fadd AllReg_32:$src0, VReg_32:$src1))]
710>;
711
712defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
713 [(set VReg_32:$dst, (fsub AllReg_32:$src0, VReg_32:$src1))]
714>;
715defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>;
716defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
717defm V_MUL_LEGACY_F32 : VOP2_32 <
718 0x00000007, "V_MUL_LEGACY_F32",
719 [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))]
720>;
721
722defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
723 [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))]
724>;
725//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>;
726//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
727//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
728//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
729defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
730 [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))]
731>;
732
733defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
734 [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))]
735>;
736defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
737defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
738defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
739defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
740defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
741defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
742defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
743defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
744defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
745defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
746defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
747defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
748defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
749 [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))]
750>;
751defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
752 [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))]
753>;
754defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
755 [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))]
756>;
757defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
758defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
759defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
760defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
761//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
762//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
763//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
764let Defs = [VCC] in { // Carry-out goes to VCC
765defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32",
766 [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))]
767>;
768defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32",
769 [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))]
770>;
771} // End Defs = [VCC]
772defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>;
773defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>;
774defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>;
775defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>;
776defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
777////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
778////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
779////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
780defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
781 [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))]
782>;
783////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
784////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
785def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
786def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>;
787def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>;
788def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>;
789def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>;
790def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>;
791def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>;
792def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>;
793def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>;
794def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>;
795def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>;
796def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>;
797////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
798////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
799////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
800////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
801//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
802
803let neverHasSideEffects = 1 in {
804
805def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
806def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>;
807//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>;
808//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>;
809
810} // End neverHasSideEffects
811def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
812def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
813def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
814def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
815def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
816def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
817def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
818def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
819def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
820//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
821def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
822def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
823def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
824////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
825////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
826////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
827////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
828////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
829////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
830////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
831////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
832////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
833//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
834//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
835//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
836def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
837////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
838def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
839def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
840def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>;
841def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>;
842def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>;
843def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
844def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
845def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
846def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
847def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
848def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
849def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
850def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
851def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
852def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
853def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
854def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
855def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
856//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
857//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
858//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
859def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
860def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
861def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
862def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>;
863def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>;
864def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>;
865def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>;
866def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
867def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
868def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
869def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
870
871def S_CSELECT_B32 : SOP2 <
872 0x0000000a, (outs SReg_32:$dst),
873 (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
874 [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))]
875>;
876
877def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
878
879// f32 pattern for S_CSELECT_B32
880def : Pat <
881 (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)),
882 (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
883>;
884
885def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
886
887def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
888 [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))]
889>;
890def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64",
891 [(set VCCReg:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))]
892>;
893def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
894def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
895def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
896def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
897////def S_ANDN2_B32 : SOP2_ANDN2 <0x00000014, "S_ANDN2_B32", []>;
898////def S_ANDN2_B64 : SOP2_ANDN2 <0x00000015, "S_ANDN2_B64", []>;
899////def S_ORN2_B32 : SOP2_ORN2 <0x00000016, "S_ORN2_B32", []>;
900////def S_ORN2_B64 : SOP2_ORN2 <0x00000017, "S_ORN2_B64", []>;
901def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
902def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
903def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
904def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
905def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
906def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
907def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
908def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
909def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
910def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
911def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
912def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
913def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
914def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
915def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
916def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
917def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
918def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
919def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
920//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
921def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
922
923class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI <
924 (outs VReg_32:$dst),
925 (ins immType:$src0),
926 "V_MOV_IMM",
927 [(set VReg_32:$dst, (immNode:$src0))]
928>;
929
930let isCodeGenOnly = 1, isPseudo = 1 in {
931
932def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>;
933def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>;
934
935def S_MOV_IMM_I32 : InstSI <
936 (outs SReg_32:$dst),
937 (ins i32imm:$src0),
938 "S_MOV_IMM_I32",
939 [(set SReg_32:$dst, (imm:$src0))]
940>;
941
942// i64 immediates aren't really supported in hardware, but LLVM will use the i64
943// type for indices on load and store instructions. The pattern for
944// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits,
945// which the hardware can handle.
946def S_MOV_IMM_I64 : InstSI <
947 (outs SReg_64:$dst),
948 (ins i64imm:$src0),
949 "S_MOV_IMM_I64 $dst, $src0",
950 [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))]
951>;
952
953} // End isCodeGenOnly, isPseudo = 1
954
955class SI_LOAD_LITERAL<Operand ImmType> :
956 Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> {
957
958 bits<32> imm;
959 let Inst{31-0} = imm;
960}
961
962def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>;
963def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>;
964
965let isCodeGenOnly = 1, isPseudo = 1 in {
966
967def SET_M0 : InstSI <
968 (outs SReg_32:$dst),
969 (ins i32imm:$src0),
970 "SET_M0",
971 [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))]
972>;
973
974def CONFIG_WRITE : InstSI <
975 (outs i32imm:$reg),
976 (ins i32imm:$val),
977 "CONFIG_WRITE $reg, $val",
978 [] > {
979 field bits<32> Inst = 0;
980}
981
982def LOAD_CONST : AMDGPUShaderInst <
983 (outs GPRF32:$dst),
984 (ins i32imm:$src),
985 "LOAD_CONST $dst, $src",
986 [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
987>;
988
989let usesCustomInserter = 1 in {
990
991def SI_V_CNDLT : InstSI <
992 (outs VReg_32:$dst),
993 (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
994 "SI_V_CNDLT $dst, $src0, $src1, $src2",
995 [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))]
996>;
997
998def SI_INTERP : InstSI <
999 (outs VReg_32:$dst),
1000 (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
1001 "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params",
1002 []
1003>;
1004
1005def SI_INTERP_CONST : InstSI <
1006 (outs VReg_32:$dst),
1007 (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
1008 "SI_INTERP_CONST $dst, $attr_chan, $attr, $params",
1009 [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan,
1010 imm:$attr, SReg_32:$params))]
1011>;
1012
1013def SI_KIL : InstSI <
1014 (outs),
1015 (ins VReg_32:$src),
1016 "SI_KIL $src",
1017 [(int_AMDGPU_kill VReg_32:$src)]
1018>;
1019
1020def SI_WQM : InstSI <
1021 (outs),
1022 (ins),
1023 "SI_WQM",
1024 [(int_SI_wqm)]
1025>;
1026
1027} // end usesCustomInserter
1028
1029// SI Psuedo branch instructions. These are used by the CFG structurizer pass
1030// and should be lowered to ISA instructions prior to codegen.
1031
1032let isBranch = 1, isTerminator = 1 in {
1033def SI_IF_NZ : InstSI <
1034 (outs),
1035 (ins brtarget:$target, VCCReg:$vcc),
1036 "SI_BRANCH_NZ",
1037 [(IL_brcond bb:$target, VCCReg:$vcc)]
1038>;
1039
1040def SI_IF_Z : InstSI <
1041 (outs),
1042 (ins brtarget:$target, VCCReg:$vcc),
1043 "SI_BRANCH_Z",
1044 []
1045>;
1046} // end isBranch = 1, isTerminator = 1
1047} // end IsCodeGenOnly, isPseudo
1048
1049/* int_SI_vs_load_input */
1050def : Pat<
1051 (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
1052 VReg_32:$buf_idx_vgpr),
1053 (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
1054 VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
1055 0, 0, (i32 SREG_LIT_0))
1056>;
1057
1058/* int_SI_export */
1059def : Pat <
1060 (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
1061 VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
1062 (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
1063 VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
1064>;
1065
1066/* int_SI_sample */
1067def : Pat <
1068 (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler),
1069 (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
1070 SReg_256:$rsrc, SReg_128:$sampler)
1071>;
1072
1073def CLAMP_SI : CLAMP<VReg_32>;
1074def FABS_SI : FABS<VReg_32>;
1075def FNEG_SI : FNEG<VReg_32>;
1076
1077def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
1078def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>;
1079def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>;
1080def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>;
1081
1082def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>;
1083def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>;
1084def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>;
1085def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>;
1086
1087def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
1088def : Vector_Build <v4i32, SReg_128, i32, SReg_32>;
1089
1090def : BitConvert <i32, f32, SReg_32>;
1091def : BitConvert <i32, f32, VReg_32>;
1092
1093def : BitConvert <f32, i32, SReg_32>;
1094def : BitConvert <f32, i32, VReg_32>;
1095
1096def : Pat <
1097 (i64 (SIvcc_bitcast VCCReg:$vcc)),
1098 (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64))
1099>;
1100
1101def : Pat <
1102 (i1 (SIvcc_bitcast SReg_64:$vcc)),
1103 (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg)
1104>;
1105
1106/********** ===================== **********/
1107/********** Interpolation Paterns **********/
1108/********** ===================== **********/
1109
1110def : Pat <
1111 (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params),
1112 (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan,
1113 imm:$attr, SReg_32:$params)
1114>;
1115
1116def : Pat <
1117 (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
1118 (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan,
1119 imm:$attr, SReg_32:$params)
1120>;
1121
1122def : Pat <
1123 (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params),
1124 (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan,
1125 imm:$attr, SReg_32:$params)
1126>;
1127
1128def : Pat <
1129 (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
1130 (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan,
1131 imm:$attr, SReg_32:$params)
1132>;
1133
1134def : Pat <
1135 (int_SI_fs_read_face),
1136 (f32 FRONT_FACE)
1137>;
1138
1139def : Pat <
1140 (int_SI_fs_read_pos 0),
1141 (f32 POS_X_FLOAT)
1142>;
1143
1144def : Pat <
1145 (int_SI_fs_read_pos 1),
1146 (f32 POS_Y_FLOAT)
1147>;
1148
1149def : Pat <
1150 (int_SI_fs_read_pos 2),
1151 (f32 POS_Z_FLOAT)
1152>;
1153
1154def : Pat <
1155 (int_SI_fs_read_pos 3),
1156 (f32 POS_W_FLOAT)
1157>;
1158
1159/********** ================== **********/
1160/********** Intrinsic Patterns **********/
1161/********** ================== **********/
1162
1163/* llvm.AMDGPU.pow */
1164/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */
1165def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>;
1166
1167def : Pat <
1168 (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1),
1169 (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1))
1170>;
1171
1172/********** ================== **********/
1173/********** VOP3 Patterns **********/
1174/********** ================== **********/
1175
1176def : Pat <(f32 (IL_mad AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2)),
1177 (V_MAD_LEGACY_F32 AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2,
1178 0, 0, 0, 0)>;
1179
1180} // End isSI predicate
diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td
deleted file mode 100644
index b3ecba3e2d0..00000000000
--- a/src/gallium/drivers/radeon/SIIntrinsics.td
+++ /dev/null
@@ -1,40 +0,0 @@
1//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// SI Intrinsic Definitions
11//
12//===----------------------------------------------------------------------===//
13
14
15let TargetPrefix = "SI", isTarget = 1 in {
16
17 def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
18 def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
19 /* XXX: We may need a seperate intrinsic here for loading integer values */
20 def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
21 def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
22 def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
23 def int_SI_wqm : Intrinsic <[], [], []>;
24
25 def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
26
27 /* Interpolation Intrinsics */
28
29 def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
30 class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
31
32 def int_SI_fs_interp_linear_center : Interp;
33 def int_SI_fs_interp_linear_centroid : Interp;
34 def int_SI_fs_interp_persp_center : Interp;
35 def int_SI_fs_interp_persp_centroid : Interp;
36 def int_SI_fs_interp_constant : Interp;
37
38 def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
39 def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
40}
diff --git a/src/gallium/drivers/radeon/SILowerFlowControl.cpp b/src/gallium/drivers/radeon/SILowerFlowControl.cpp
deleted file mode 100644
index bf5192efe3d..00000000000
--- a/src/gallium/drivers/radeon/SILowerFlowControl.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
1//===-- SILowerFlowControl.cpp - Use predicates for flow control ----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass lowers the pseudo flow control instructions (SI_IF_NZ, ELSE, ENDIF)
11// to predicated instructions.
12//
13// All flow control (except loops) is handled using predicated instructions and
14// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
15// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
16// by writting to the 64-bit EXEC register (each bit corresponds to a
17// single vector ALU). Typically, for predicates, a vector ALU will write
18// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
19// Vector ALU) and then the ScalarALU will AND the VCC register with the
20// EXEC to update the predicates.
21//
22// For example:
23// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
24// SI_IF_NZ %VCC
25// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
26// ELSE
27// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
28// ENDIF
29//
30// becomes:
31//
32// %SGPR0 = S_MOV_B64 %EXEC // Save the current exec mask
33// %EXEC = S_AND_B64 %VCC, %EXEC // Update the exec mask
34// S_CBRANCH_EXECZ label0 // This instruction is an
35// // optimization which allows us to
36// // branch if all the bits of
37// // EXEC are zero.
38// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
39//
40// label0:
41// %EXEC = S_NOT_B64 %EXEC // Invert the exec mask for the
42// // Then block.
43// %EXEC = S_AND_B64 %SGPR0, %EXEC
44// S_BRANCH_EXECZ label1 // Use our branch optimization
45// // instruction again.
46// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
47// label1:
48// S_MOV_B64 // Restore the old EXEC value
49//===----------------------------------------------------------------------===//
50
51#include "AMDGPU.h"
52#include "SIInstrInfo.h"
53#include "llvm/CodeGen/MachineFunction.h"
54#include "llvm/CodeGen/MachineFunctionPass.h"
55#include "llvm/CodeGen/MachineInstrBuilder.h"
56#include "llvm/CodeGen/MachineRegisterInfo.h"
57
58using namespace llvm;
59
60namespace {
61
62class SILowerFlowControlPass : public MachineFunctionPass {
63
64private:
65 static char ID;
66 const TargetInstrInfo *TII;
67 std::vector<unsigned> PredicateStack;
68 std::vector<unsigned> UnusedRegisters;
69
70 void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
71 void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
72
73public:
74 SILowerFlowControlPass(TargetMachine &tm) :
75 MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
76
77 virtual bool runOnMachineFunction(MachineFunction &MF);
78
79 const char *getPassName() const {
80 return "SI Lower flow control instructions";
81 }
82
83};
84
85} // End anonymous namespace
86
87char SILowerFlowControlPass::ID = 0;
88
89FunctionPass *llvm::createSILowerFlowControlPass(TargetMachine &tm) {
90 return new SILowerFlowControlPass(tm);
91}
92
93bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) {
94
95 // Find all the unused registers that can be used for the predicate stack.
96 for (TargetRegisterClass::iterator S = AMDGPU::SReg_64RegClass.begin(),
97 I = AMDGPU::SReg_64RegClass.end();
98 I != S; --I) {
99 unsigned Reg = *I;
100 if (!MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
101 UnusedRegisters.push_back(Reg);
102 }
103 }
104
105 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
106 BB != BB_E; ++BB) {
107 MachineBasicBlock &MBB = *BB;
108 for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
109 I != MBB.end(); I = Next, Next = llvm::next(I)) {
110 MachineInstr &MI = *I;
111 switch (MI.getOpcode()) {
112 default: break;
113 case AMDGPU::SI_IF_NZ:
114 pushExecMask(MBB, I);
115 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
116 AMDGPU::EXEC)
117 .addOperand(MI.getOperand(0)) // VCC
118 .addReg(AMDGPU::EXEC);
119 MI.eraseFromParent();
120 break;
121 case AMDGPU::ELSE:
122 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64),
123 AMDGPU::EXEC)
124 .addReg(AMDGPU::EXEC);
125 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
126 AMDGPU::EXEC)
127 .addReg(PredicateStack.back())
128 .addReg(AMDGPU::EXEC);
129 MI.eraseFromParent();
130 break;
131 case AMDGPU::ENDIF:
132 popExecMask(MBB, I);
133 MI.eraseFromParent();
134 break;
135 }
136 }
137 }
138 return false;
139}
140
141void SILowerFlowControlPass::pushExecMask(MachineBasicBlock &MBB,
142 MachineBasicBlock::iterator I) {
143
144 assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack");
145 unsigned StackReg = UnusedRegisters.back();
146 UnusedRegisters.pop_back();
147 PredicateStack.push_back(StackReg);
148 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
149 StackReg)
150 .addReg(AMDGPU::EXEC);
151}
152
153void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB,
154 MachineBasicBlock::iterator I) {
155 unsigned StackReg = PredicateStack.back();
156 PredicateStack.pop_back();
157 UnusedRegisters.push_back(StackReg);
158 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
159 AMDGPU::EXEC)
160 .addReg(StackReg);
161}
diff --git a/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp b/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp
deleted file mode 100644
index 720245091f7..00000000000
--- a/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
1//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// This pass performs the following transformation on instructions with
10// literal constants:
11//
12// %VGPR0 = V_MOV_IMM_I32 1
13//
14// becomes:
15//
16// BUNDLE
17// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
18// * SI_LOAD_LITERAL 1
19//
20// The resulting sequence matches exactly how the hardware handles immediate
21// operands, so this transformation greatly simplifies the code generator.
22//
23// Only the *_MOV_IMM_* support immediate operands at the moment, but when
24// support for immediate operands is added to other instructions, they
25// will be lowered here as well.
26//===----------------------------------------------------------------------===//
27
28#include "AMDGPU.h"
29#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineFunctionPass.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include "llvm/CodeGen/MachineInstrBundle.h"
33
34using namespace llvm;
35
36namespace {
37
38class SILowerLiteralConstantsPass : public MachineFunctionPass {
39
40private:
41 static char ID;
42 const TargetInstrInfo *TII;
43
44public:
45 SILowerLiteralConstantsPass(TargetMachine &tm) :
46 MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
47
48 virtual bool runOnMachineFunction(MachineFunction &MF);
49
50 const char *getPassName() const {
51 return "SI Lower literal constants pass";
52 }
53};
54
55} // End anonymous namespace
56
57char SILowerLiteralConstantsPass::ID = 0;
58
59FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
60 return new SILowerLiteralConstantsPass(tm);
61}
62
63bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
64 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
65 BB != BB_E; ++BB) {
66 MachineBasicBlock &MBB = *BB;
67 for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
68 I != MBB.end(); I = Next, Next = llvm::next(I)) {
69 MachineInstr &MI = *I;
70 switch (MI.getOpcode()) {
71 default: break;
72 case AMDGPU::S_MOV_IMM_I32:
73 case AMDGPU::S_MOV_IMM_I64:
74 case AMDGPU::V_MOV_IMM_F32:
75 case AMDGPU::V_MOV_IMM_I32: {
76 unsigned MovOpcode;
77 unsigned LoadLiteralOpcode;
78 MachineOperand LiteralOp = MI.getOperand(1);
79 if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
80 MovOpcode = AMDGPU::V_MOV_B32_e32;
81 } else {
82 MovOpcode = AMDGPU::S_MOV_B32;
83 }
84 if (LiteralOp.isImm()) {
85 LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
86 } else {
87 LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
88 }
89 MachineInstr *First =
90 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode),
91 MI.getOperand(0).getReg())
92 .addReg(AMDGPU::SI_LITERAL_CONSTANT);
93 MachineInstr *Last =
94 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode))
95 .addOperand(MI.getOperand(1));
96 Last->setIsInsideBundle();
97 llvm::finalizeBundle(MBB, First, Last);
98 MI.eraseFromParent();
99 break;
100 }
101 }
102 }
103 }
104 return false;
105}
diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp b/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp
deleted file mode 100644
index 3cc1cd67c47..00000000000
--- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
1//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10
11#include "SIMachineFunctionInfo.h"
12
13using namespace llvm;
14
15SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
16 : MachineFunctionInfo(),
17 SPIPSInputAddr(0),
18 ShaderType(0)
19 { }
diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h b/src/gallium/drivers/radeon/SIMachineFunctionInfo.h
deleted file mode 100644
index 68097b44c52..00000000000
--- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h
+++ /dev/null
@@ -1,38 +0,0 @@
1//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config
11// register, which is to tell the hardware which interpolation parameters to
12// load.
13//
14//===----------------------------------------------------------------------===//
15
16
17#ifndef _SIMACHINEFUNCTIONINFO_H_
18#define _SIMACHINEFUNCTIONINFO_H_
19
20#include "llvm/CodeGen/MachineFunction.h"
21
22namespace llvm {
23
24class SIMachineFunctionInfo : public MachineFunctionInfo {
25
26 private:
27
28 public:
29 SIMachineFunctionInfo(const MachineFunction &MF);
30 unsigned SPIPSInputAddr;
31 unsigned ShaderType;
32
33};
34
35} // End namespace llvm
36
37
38#endif //_SIMACHINEFUNCTIONINFO_H_
diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.cpp b/src/gallium/drivers/radeon/SIRegisterInfo.cpp
deleted file mode 100644
index 8fe8ae584e2..00000000000
--- a/src/gallium/drivers/radeon/SIRegisterInfo.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
1//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the SI implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14
15#include "SIRegisterInfo.h"
16#include "AMDGPUTargetMachine.h"
17
18using namespace llvm;
19
20SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
21 const TargetInstrInfo &tii)
22: AMDGPURegisterInfo(tm, tii),
23 TM(tm),
24 TII(tii)
25 { }
26
27BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const
28{
29 BitVector Reserved(getNumRegs());
30 return Reserved;
31}
32
33unsigned SIRegisterInfo::getBinaryCode(unsigned reg) const
34{
35 switch (reg) {
36 case AMDGPU::M0: return 124;
37 case AMDGPU::SREG_LIT_0: return 128;
38 default: return getHWRegNum(reg);
39 }
40}
41
42const TargetRegisterClass *
43SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
44{
45 switch (rc->getID()) {
46 case AMDGPU::GPRF32RegClassID:
47 return &AMDGPU::VReg_32RegClass;
48 default: return rc;
49 }
50}
51
52const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
53 MVT VT) const
54{
55 switch(VT.SimpleTy) {
56 default:
57 case MVT::i32: return &AMDGPU::VReg_32RegClass;
58 }
59}
60#include "SIRegisterGetHWRegNum.inc"
diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.h b/src/gallium/drivers/radeon/SIRegisterInfo.h
deleted file mode 100644
index 99005cbccc1..00000000000
--- a/src/gallium/drivers/radeon/SIRegisterInfo.h
+++ /dev/null
@@ -1,54 +0,0 @@
1//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Interface definition for SIRegisterInfo
11//
12//===----------------------------------------------------------------------===//
13
14
15#ifndef SIREGISTERINFO_H_
16#define SIREGISTERINFO_H_
17
18#include "AMDGPURegisterInfo.h"
19
20namespace llvm {
21
22class AMDGPUTargetMachine;
23class TargetInstrInfo;
24
25struct SIRegisterInfo : public AMDGPURegisterInfo
26{
27 AMDGPUTargetMachine &TM;
28 const TargetInstrInfo &TII;
29
30 SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
31
32 virtual BitVector getReservedRegs(const MachineFunction &MF) const;
33
34 /// getBinaryCode - Returns the hardware encoding for a register
35 virtual unsigned getBinaryCode(unsigned reg) const;
36
37 /// getISARegClass - rc is an AMDIL reg class. This function returns the
38 /// SI register class that is equivalent to the given AMDIL register class.
39 virtual const TargetRegisterClass *
40 getISARegClass(const TargetRegisterClass * rc) const;
41
42 /// getHWRegNum - Generated function that returns the hardware encoding for
43 /// a register
44 unsigned getHWRegNum(unsigned reg) const;
45
46 /// getCFGStructurizerRegClass - get the register class of the specified
47 /// type to use in the CFGStructurizer
48 virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
49
50};
51
52} // End namespace llvm
53
54#endif // SIREGISTERINFO_H_
diff --git a/src/gallium/drivers/radeon/SISchedule.td b/src/gallium/drivers/radeon/SISchedule.td
deleted file mode 100644
index 28b65b82585..00000000000
--- a/src/gallium/drivers/radeon/SISchedule.td
+++ /dev/null
@@ -1,15 +0,0 @@
1//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// TODO: This is just a place holder for now.
11//
12//===----------------------------------------------------------------------===//
13
14
15def SI_Itin : ProcessorItineraries <[], [], []>;
diff --git a/src/gallium/drivers/radeon/TargetInfo/AMDGPUTargetInfo.cpp b/src/gallium/drivers/radeon/TargetInfo/AMDGPUTargetInfo.cpp
deleted file mode 100644
index 380e7deb280..00000000000
--- a/src/gallium/drivers/radeon/TargetInfo/AMDGPUTargetInfo.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
1//===-- TargetInfo/AMDGPUTargetInfo.cpp - TODO: Add brief description -------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// TODO: Add full description
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "llvm/Support/TargetRegistry.h"
16
17using namespace llvm;
18
19/// The target for the AMDGPU backend
20Target llvm::TheAMDGPUTarget;
21
22/// Extern function to initialize the targets for the AMDGPU backend
23extern "C" void LLVMInitializeAMDGPUTargetInfo() {
24 RegisterTarget<Triple::r600, false>
25 R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
26}
diff --git a/src/gallium/drivers/radeon/loader.cpp b/src/gallium/drivers/radeon/loader.cpp
deleted file mode 100644
index 3ea8cd8900e..00000000000
--- a/src/gallium/drivers/radeon/loader.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
1
2#include "radeon_llvm_emit.h"
3
4#include <llvm/Support/CommandLine.h>
5#include <llvm/Support/IRReader.h>
6#include <llvm/Support/SourceMgr.h>
7#include <llvm/LLVMContext.h>
8#include <llvm/Module.h>
9#include <stdio.h>
10
11#include <llvm-c/Core.h>
12
13using namespace llvm;
14
15static cl::opt<std::string>
16InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
17
18static cl::opt<std::string>
19TargetGPUName("gpu", cl::desc("target gpu name"), cl::value_desc("gpu_name"));
20
21int main(int argc, char ** argv)
22{
23 unsigned char * bytes;
24 unsigned byte_count;
25
26 std::auto_ptr<Module> M;
27 LLVMContext &Context = getGlobalContext();
28 SMDiagnostic Err;
29 cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
30 M.reset(ParseIRFile(InputFilename, Err, Context));
31
32 Module * mod = M.get();
33
34 radeon_llvm_compile(wrap(mod), &bytes, &byte_count, TargetGPUName.c_str(), 1);
35}
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
index b889f029694..903e1028a09 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
@@ -39,12 +39,7 @@
39#include <llvm/Target/TargetMachine.h> 39#include <llvm/Target/TargetMachine.h>
40#include <llvm/Transforms/Scalar.h> 40#include <llvm/Transforms/Scalar.h>
41#include <llvm-c/Target.h> 41#include <llvm-c/Target.h>
42
43#if HAVE_LLVM < 0x0302
44#include <llvm/Target/TargetData.h>
45#else
46#include <llvm/DataLayout.h> 42#include <llvm/DataLayout.h>
47#endif
48 43
49#include <iostream> 44#include <iostream>
50#include <stdlib.h> 45#include <stdlib.h>
@@ -52,16 +47,6 @@
52 47
53using namespace llvm; 48using namespace llvm;
54 49
55#ifndef EXTERNAL_LLVM
56extern "C" {
57
58void LLVMInitializeAMDGPUAsmPrinter(void);
59void LLVMInitializeAMDGPUTargetMC(void);
60void LLVMInitializeAMDGPUTarget(void);
61void LLVMInitializeAMDGPUTargetInfo(void);
62}
63#endif
64
65namespace { 50namespace {
66 51
67class LLVMEnsureMultithreaded { 52class LLVMEnsureMultithreaded {
@@ -89,17 +74,10 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
89 74
90 Triple AMDGPUTriple(sys::getDefaultTargetTriple()); 75 Triple AMDGPUTriple(sys::getDefaultTargetTriple());
91 76
92#if HAVE_LLVM == 0x0302
93 LLVMInitializeAMDGPUTargetInfo();
94 LLVMInitializeAMDGPUTarget();
95 LLVMInitializeAMDGPUTargetMC();
96 LLVMInitializeAMDGPUAsmPrinter();
97#else
98 LLVMInitializeR600TargetInfo(); 77 LLVMInitializeR600TargetInfo();
99 LLVMInitializeR600Target(); 78 LLVMInitializeR600Target();
100 LLVMInitializeR600TargetMC(); 79 LLVMInitializeR600TargetMC();
101 LLVMInitializeR600AsmPrinter(); 80 LLVMInitializeR600AsmPrinter();
102#endif
103 81
104 std::string err; 82 std::string err;
105 const Target * AMDGPUTarget = TargetRegistry::lookupTarget("r600", err); 83 const Target * AMDGPUTarget = TargetRegistry::lookupTarget("r600", err);
@@ -130,11 +108,7 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
130 )); 108 ));
131 TargetMachine &AMDGPUTargetMachine = *tm.get(); 109 TargetMachine &AMDGPUTargetMachine = *tm.get();
132 PassManager PM; 110 PassManager PM;
133#if HAVE_LLVM < 0x0302
134 PM.add(new TargetData(*AMDGPUTargetMachine.getTargetData()));
135#else
136 PM.add(new DataLayout(*AMDGPUTargetMachine.getDataLayout())); 111 PM.add(new DataLayout(*AMDGPUTargetMachine.getDataLayout()));
137#endif
138 PM.add(createPromoteMemoryToRegisterPass()); 112 PM.add(createPromoteMemoryToRegisterPass());
139 AMDGPUTargetMachine.setAsmVerbosityDefault(true); 113 AMDGPUTargetMachine.setAsmVerbosityDefault(true);
140 114