summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-04-11 13:47:43 -0700
committerTom Stellard <thomas.stellard@amd.com>2013-04-29 12:01:42 -0400
commitac0cff51e403ccff9c6feb2b6375b64916a8bd45 (patch)
tree0e7ea5d9210ecd2e54e1267cb5f5c33185487e15
parentd66346c4ba87929f21b8188b4175223c1dd09b31 (diff)
R600: Add FetchInst bit to instruction defs to denote vertex/tex instructions
v2[Vincent Lejeune]: Split FetchInst into usesTextureCache/usesVertexCache
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp5
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h2
-rw-r--r--lib/Target/R600/AMDILBase.td4
-rw-r--r--lib/Target/R600/Processors.td53
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp45
-rw-r--r--lib/Target/R600/R600Defines.h4
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp24
-rw-r--r--lib/Target/R600/R600InstrInfo.h7
-rw-r--r--lib/Target/R600/R600Instructions.td18
-rw-r--r--test/CodeGen/R600/loop-address.ll4
10 files changed, 102 insertions, 64 deletions
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 0f356a1c3f1..a7e1d7b6d54 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -33,6 +33,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
DefaultSize[0] = 64;
DefaultSize[1] = 1;
DefaultSize[2] = 1;
+ HasVertexCache = false;
ParseSubtargetFeatures(GPU, FS);
DevName = GPU;
Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
@@ -53,6 +54,10 @@ AMDGPUSubtarget::is64bit() const {
return Is64bit;
}
bool
+AMDGPUSubtarget::hasVertexCache() const {
+ return HasVertexCache;
+}
+bool
AMDGPUSubtarget::isTargetELF() const {
return false;
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 1973fc6d544..b6501a45628 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -36,6 +36,7 @@ private:
bool Is32on64bit;
bool DumpCode;
bool R600ALUInst;
+ bool HasVertexCache;
InstrItineraryData InstrItins;
@@ -48,6 +49,7 @@ public:
bool isOverride(AMDGPUDeviceInfo::Caps) const;
bool is64bit() const;
+ bool hasVertexCache() const;
// Helper functions to simplify if statements
bool isTargetELF() const;
diff --git a/lib/Target/R600/AMDILBase.td b/lib/Target/R600/AMDILBase.td
index c12cedcf7fd..e2211106f3f 100644
--- a/lib/Target/R600/AMDILBase.td
+++ b/lib/Target/R600/AMDILBase.td
@@ -74,6 +74,10 @@ def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
"false",
"Older version of ALU instructions encoding.">;
+def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
+ "HasVertexCache",
+ "true",
+ "Specify use of dedicated vertex cache.">;
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index 730335dc3f6..928416111d8 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -9,24 +9,35 @@
class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
: Processor<Name, itin, Features>;
-def : Proc<"", R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"rs880", R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"rv670", R600_EG_Itin, [FeatureR600ALUInst, FeatureFP64]>;
-def : Proc<"rv710", R600_EG_Itin, []>;
-def : Proc<"rv730", R600_EG_Itin, []>;
-def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
-def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"sumo", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"SI", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
-def : Proc<"tahiti", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
-def : Proc<"pitcairn", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
-def : Proc<"verde", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
-def : Proc<"oland", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"", R600_EG_Itin,
+ [FeatureR600ALUInst, FeatureVertexCache]>;
+def : Proc<"r600", R600_EG_Itin,
+ [FeatureR600ALUInst , FeatureVertexCache]>;
+def : Proc<"rs880", R600_EG_Itin,
+ [FeatureR600ALUInst]>;
+def : Proc<"rv670", R600_EG_Itin,
+ [FeatureR600ALUInst, FeatureFP64, FeatureVertexCache]>;
+def : Proc<"rv710", R600_EG_Itin,
+ [FeatureVertexCache]>;
+def : Proc<"rv730", R600_EG_Itin,
+ [FeatureVertexCache]>;
+def : Proc<"rv770", R600_EG_Itin,
+ [FeatureFP64, FeatureVertexCache]>;
+def : Proc<"cedar", R600_EG_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"redwood", R600_EG_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"sumo", R600_EG_Itin,
+ [FeatureByteAddress, FeatureImages]>;
+def : Proc<"juniper", R600_EG_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"cypress", R600_EG_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureFP64, FeatureVertexCache]>;
+def : Proc<"barts", R600_EG_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"turks", R600_EG_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"caicos", R600_EG_Itin,
+ [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cayman", R600_EG_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureFP64]>;
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 9271b3976b5..cc6e0b24f9d 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -32,6 +32,7 @@ class R600ControlFlowFinalizer : public MachineFunctionPass {
private:
enum ControlFlowInstruction {
CF_TC,
+ CF_VC,
CF_CALL_FS,
CF_WHILE_LOOP,
CF_END_LOOP,
@@ -48,39 +49,6 @@ private:
unsigned MaxFetchInst;
const AMDGPUSubtarget &ST;
- bool isFetch(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- case AMDGPU::TEX_VTX_CONSTBUF:
- case AMDGPU::TEX_VTX_TEXBUF:
- case AMDGPU::TEX_LD:
- case AMDGPU::TEX_GET_TEXTURE_RESINFO:
- case AMDGPU::TEX_GET_GRADIENTS_H:
- case AMDGPU::TEX_GET_GRADIENTS_V:
- case AMDGPU::TEX_SET_GRADIENTS_H:
- case AMDGPU::TEX_SET_GRADIENTS_V:
- case AMDGPU::TEX_SAMPLE:
- case AMDGPU::TEX_SAMPLE_C:
- case AMDGPU::TEX_SAMPLE_L:
- case AMDGPU::TEX_SAMPLE_C_L:
- case AMDGPU::TEX_SAMPLE_LB:
- case AMDGPU::TEX_SAMPLE_C_LB:
- case AMDGPU::TEX_SAMPLE_G:
- case AMDGPU::TEX_SAMPLE_C_G:
- case AMDGPU::TXD:
- case AMDGPU::TXD_SHADOW:
- case AMDGPU::VTX_READ_GLOBAL_8_eg:
- case AMDGPU::VTX_READ_GLOBAL_32_eg:
- case AMDGPU::VTX_READ_GLOBAL_128_eg:
- case AMDGPU::VTX_READ_PARAM_8_eg:
- case AMDGPU::VTX_READ_PARAM_16_eg:
- case AMDGPU::VTX_READ_PARAM_32_eg:
- case AMDGPU::VTX_READ_PARAM_128_eg:
- return true;
- default:
- return false;
- }
- }
-
bool IsTrivialInst(MachineInstr *MI) const {
switch (MI->getOpcode()) {
case AMDGPU::KILL:
@@ -98,6 +66,9 @@ private:
case CF_TC:
Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
break;
+ case CF_VC:
+ Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
+ break;
case CF_CALL_FS:
Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
break;
@@ -139,17 +110,19 @@ private:
unsigned CfAddress) const {
MachineBasicBlock::iterator ClauseHead = I;
unsigned AluInstCount = 0;
+ bool IsTex = TII->usesTextureCache(ClauseHead);
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
if (IsTrivialInst(I))
continue;
- if (!isFetch(I))
+ if ((IsTex && !TII->usesTextureCache(I)) ||
+ (!IsTex && !TII->usesVertexCache(I)))
break;
AluInstCount ++;
if (AluInstCount > MaxFetchInst)
break;
}
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
- getHWInstrDesc(CF_TC))
+ getHWInstrDesc(IsTex?CF_TC:CF_VC))
.addImm(CfAddress) // ADDR
.addImm(AluInstCount); // COUNT
return I;
@@ -211,7 +184,7 @@ public:
}
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
- if (isFetch(I)) {
+ if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
DEBUG(dbgs() << CfCount << ":"; I->dump(););
I = MakeFetchClause(MBB, I, 0);
CfCount++;
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
index 16cfcf59eb3..bdda2325214 100644
--- a/lib/Target/R600/R600Defines.h
+++ b/lib/Target/R600/R600Defines.h
@@ -39,7 +39,9 @@ namespace R600_InstFlag {
//FlagOperand bits 7, 8
NATIVE_OPERANDS = (1 << 9),
OP1 = (1 << 10),
- OP2 = (1 << 11)
+ OP2 = (1 << 11),
+ VTX_INST = (1 << 12),
+ TEX_INST = (1 << 13)
};
}
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index b232188a264..b3996bac3c6 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -29,7 +29,8 @@ using namespace llvm;
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
- RI(tm, *this)
+ RI(tm, *this),
+ ST(tm.getSubtarget<AMDGPUSubtarget>())
{ }
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
@@ -139,6 +140,27 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
(TargetFlags & R600_InstFlag::OP3));
}
+bool R600InstrInfo::isCayman() const {
+ return ST.device()->getGeneration() > AMDGPUDeviceInfo::HD5XXX;
+}
+
+bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
+ return ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST;
+}
+
+bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
+ return usesVertexCache(MI->getOpcode());
+}
+
+bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
+ return (!ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST) ||
+ (get(Opcode).TSFlags & R600_InstFlag::TEX_INST);
+}
+
+bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
+ return usesTextureCache(MI->getOpcode());
+}
+
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index dbae90013d2..136023f5a91 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -33,7 +33,9 @@ namespace llvm {
class R600InstrInfo : public AMDGPUInstrInfo {
private:
const R600RegisterInfo RI;
+ const AMDGPUSubtarget &ST;
+ bool isCayman() const;
int getBranchInstr(const MachineOperand &op) const;
public:
@@ -53,6 +55,11 @@ namespace llvm {
/// \returns true if this \p Opcode represents an ALU instruction.
bool isALUInstr(unsigned Opcode) const;
+ bool usesVertexCache(unsigned Opcode) const;
+ bool usesVertexCache(const MachineInstr *MI) const;
+ bool usesTextureCache(unsigned Opcode) const;
+ bool usesTextureCache(const MachineInstr *MI) const;
+
bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
bool canBundle(const std::vector<MachineInstr *> &) const;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 1bde3d57e4f..5eb67977b39 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -25,6 +25,8 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
bit Op1 = 0;
bit Op2 = 0;
bit HasNativeOperands = 0;
+ bit VTXInst = 0;
+ bit TEXInst = 0;
let Namespace = "AMDGPU";
let OutOperandList = outs;
@@ -43,6 +45,8 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
let TSFlags{9} = HasNativeOperands;
let TSFlags{10} = Op1;
let TSFlags{11} = Op2;
+ let TSFlags{12} = VTXInst;
+ let TSFlags{13} = TEXInst;
}
class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -478,6 +482,8 @@ class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
let COORD_TYPE_Y = 0;
let COORD_TYPE_Z = 0;
let COORD_TYPE_W = 0;
+
+ let TEXInst = 1;
}
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
@@ -1783,6 +1789,8 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
// VTX_WORD3 (Padding)
//
// Inst{127-96} = 0;
+
+ let VTXInst = 1;
}
class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
@@ -2011,15 +2019,17 @@ def TXD: InstR600 <
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
"TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
[(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))], NullALU> {
->;
+ let TEXInst = 1;
+}
def TXD_SHADOW: InstR600 <
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
"TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
[(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))], NullALU
->;
-
+> {
+ let TEXInst = 1;
+}
} // End isPseudo = 1
} // End usesCustomInserter = 1
@@ -2105,6 +2115,7 @@ def TEX_VTX_CONSTBUF :
// VTX_WORD3 (Padding)
//
// Inst{127-96} = 0;
+ let VTXInst = 1;
}
def TEX_VTX_TEXBUF:
@@ -2158,6 +2169,7 @@ let Inst{63-32} = Word1;
// VTX_WORD3 (Padding)
//
// Inst{127-96} = 0;
+ let VTXInst = 1;
}
diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll
index dc9295e8e77..a3986b23626 100644
--- a/test/CodeGen/R600/loop-address.ll
+++ b/test/CodeGen/R600/loop-address.ll
@@ -1,10 +1,10 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-;CHECK: TEX
+;CHECK: VTX
;CHECK: ALU_PUSH
;CHECK: JUMP @4
;CHECK: ELSE @16
-;CHECK: TEX
+;CHECK: VTX
;CHECK: LOOP_START_DX10 @15
;CHECK: LOOP_BREAK @14
;CHECK: POP @16