summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-01-22 21:55:46 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-01-22 21:55:46 +0000
commitefa1355495c836f13f98efbb27a99a4aabd60598 (patch)
treeaa40f7c618f3ac713ebf749a9968f23ec2d0654b /lib
parent39210ac1c62c9d6c055d8450ed52c09d094d9b85 (diff)
R600: Add work-around for the CF stack entry HW bug
The CF stack can be corrupted if you use CF_ALU_PUSH_BEFORE, CF_ALU_ELSE_AFTER, CF_ALU_BREAK, or CF_ALU_CONTINUE when the number of sub-entries on the stack is greater than or equal to the stack entry size and sub-entries modulo 4 is either 0 or 3 (on cedar the bug is present when number of sub-entries module 8 is either 7 or 0) We choose to be conservative and always apply the work-around when the number of sub-enries is greater than or equal to the stack entry size, so that we can safely over-allocate the stack when we are unsure of the stack allocation rules. reviewed-by: Vincent Lejeune <vljn at ovi.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199842 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/R600/AMDGPU.td5
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp6
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h2
-rw-r--r--lib/Target/R600/Processors.td14
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp43
5 files changed, 63 insertions, 7 deletions
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index c4e5efc8d6e..d1e2cf5319c 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -63,6 +63,11 @@ def FeatureCaymanISA : SubtargetFeature<"caymanISA",
"true",
"Use Cayman ISA">;
+def FeatureCFALUBug : SubtargetFeature<"cfalubug",
+ "CFALUBug",
+ "true",
+ "GPU has CF_ALU bug">;
+
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index f36aa2071c7..e77ab5e6d14 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -39,6 +39,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
EnableIRStructurizer = true;
EnableIfCvt = true;
WavefrontSize = 0;
+ CFALUBug = false;
ParseSubtargetFeatures(GPU, FS);
DevName = GPU;
}
@@ -97,6 +98,11 @@ AMDGPUSubtarget::getStackEntrySize() const {
}
}
bool
+AMDGPUSubtarget::hasCFAluBug() const {
+ assert(getGeneration() <= NORTHERN_ISLANDS);
+ return CFALUBug;
+}
+bool
AMDGPUSubtarget::isTargetELF() const {
return false;
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 68d853218ba..7e7f4d0c004 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -52,6 +52,7 @@ private:
bool EnableIRStructurizer;
bool EnableIfCvt;
unsigned WavefrontSize;
+ bool CFALUBug;
InstrItineraryData InstrItins;
@@ -71,6 +72,7 @@ public:
bool isIfCvtEnabled() const;
unsigned getWavefrontSize() const;
unsigned getStackEntrySize() const;
+ bool hasCFAluBug() const;
virtual bool enableMachineScheduler() const {
return getGeneration() <= NORTHERN_ISLANDS;
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index e601f353163..fde44814970 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -46,13 +46,15 @@ def : Proc<"rv770", R600_VLIW5_Itin,
//===----------------------------------------------------------------------===//
def : Proc<"cedar", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32]>;
+ [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32,
+ FeatureCFALUBug]>;
def : Proc<"redwood", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
+ [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64,
+ FeatureCFALUBug]>;
def : Proc<"sumo", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureWavefrontSize64]>;
+ [FeatureEvergreen, FeatureWavefrontSize64, FeatureCFALUBug]>;
def : Proc<"juniper", R600_VLIW5_Itin,
[FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
@@ -66,13 +68,13 @@ def : Proc<"cypress", R600_VLIW5_Itin,
//===----------------------------------------------------------------------===//
def : Proc<"barts", R600_VLIW5_Itin,
- [FeatureNorthernIslands, FeatureVertexCache]>;
+ [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
def : Proc<"turks", R600_VLIW5_Itin,
- [FeatureNorthernIslands, FeatureVertexCache]>;
+ [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
def : Proc<"caicos", R600_VLIW5_Itin,
- [FeatureNorthernIslands]>;
+ [FeatureNorthernIslands, FeatureCFALUBug]>;
def : Proc<"cayman", R600_VLIW4_Itin,
[FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>;
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 6b42a7a9faf..470ff2e1079 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -73,6 +73,44 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) {
return false;
}
+bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
+ if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() &&
+ getLoopDepth() > 1)
+ return true;
+
+ if (!ST.hasCFAluBug())
+ return false;
+
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::CF_ALU_PUSH_BEFORE:
+ case AMDGPU::CF_ALU_ELSE_AFTER:
+ case AMDGPU::CF_ALU_BREAK:
+ case AMDGPU::CF_ALU_CONTINUE:
+ if (CurrentSubEntries == 0)
+ return false;
+ if (ST.getWavefrontSize() == 64) {
+ // We are being conservative here. We only require this work-around if
+ // CurrentSubEntries > 3 &&
+ // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
+ //
+ // We have to be conservative, because we don't know for certain that
+ // our stack allocation algorithm for Evergreen/NI is correct. Applying this
+ // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
+ // resources without any problems.
+ return CurrentSubEntries > 3;
+ } else {
+ assert(ST.getWavefrontSize() == 32);
+ // We are being conservative here. We only require the work-around if
+ // CurrentSubEntries > 7 &&
+ // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
+ // See the comment on the wavefront size == 64 case for why we are
+ // being conservative.
+ return CurrentSubEntries > 7;
+ }
+ }
+}
+
unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
switch(Item) {
default:
@@ -472,9 +510,12 @@ public:
if (MI->getOpcode() == AMDGPU::CF_ALU)
LastAlu.back() = MI;
I++;
+ bool RequiresWorkAround =
+ CFStack.requiresWorkAroundForInst(MI->getOpcode());
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
- if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) {
+ if (RequiresWorkAround) {
+ DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
.addImm(CfCount + 1)
.addImm(1);