summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8>2012-10-31 15:26:11 +0000
committertstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8>2012-10-31 15:26:11 +0000
commit4eb8f73c194612cfeb45079d3687e6518ed65dc4 (patch)
treeb40d8d5214bbfebc7ab5b7ef89c5db8766ae052b
parent75f69bfadb208bb5d11cb33db1f6b5d32c37010b (diff)
SI: Alternative handling of EXEC register for control flow
This version handles the EXEC register being modified in the if/else blocks, e.g. for pixel discard. Patch by: Michel Dänzer Reviewed-by: Tom Stellard <thomas.stellar@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Michel Dänzer <michel.daenzer@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/R600/@167124 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp20
-rw-r--r--lib/Target/AMDGPU/SILowerFlowControl.cpp42
2 files changed, 36 insertions, 26 deletions
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index c6f93d77a81..45f180f3aa4 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -228,26 +228,6 @@ void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
.addReg(AMDGPU::SREG_LIT_0)
.addOperand(MI->getOperand(0));
- // If the exec mask is non-zero, skip the next two instructions
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(3)
- .addReg(AMDGPU::EXEC);
-
- // Exec mask is zero: Export to NULL target...
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::EXP))
- .addImm(0)
- .addImm(0x09) // V_008DFC_SQ_EXP_NULL
- .addImm(0)
- .addImm(1)
- .addImm(1)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0);
-
- // ... and terminate wavefront
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
-
MI->eraseFromParent();
}
diff --git a/lib/Target/AMDGPU/SILowerFlowControl.cpp b/lib/Target/AMDGPU/SILowerFlowControl.cpp
index 25f113eb62c..b90168844fa 100644
--- a/lib/Target/AMDGPU/SILowerFlowControl.cpp
+++ b/lib/Target/AMDGPU/SILowerFlowControl.cpp
@@ -50,6 +50,7 @@
#include "AMDGPU.h"
#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -117,20 +118,48 @@ bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) {
AMDGPU::EXEC)
.addOperand(MI.getOperand(0)) // VCC
.addReg(AMDGPU::EXEC);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_XOR_B64),
+ PredicateStack.back())
+ .addReg(PredicateStack.back())
+ .addReg(AMDGPU::EXEC);
MI.eraseFromParent();
break;
case AMDGPU::ELSE:
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64),
- AMDGPU::EXEC)
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
+ UnusedRegisters.back())
.addReg(AMDGPU::EXEC);
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
AMDGPU::EXEC)
- .addReg(PredicateStack.back())
- .addReg(AMDGPU::EXEC);
+ .addReg(PredicateStack.back());
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
+ PredicateStack.back())
+ .addReg(UnusedRegisters.back());
MI.eraseFromParent();
break;
case AMDGPU::ENDIF:
popExecMask(MBB, I);
+ if (MF.getInfo<SIMachineFunctionInfo>()->ShaderType == ShaderType::PIXEL &&
+ PredicateStack.empty()) {
+ // If the exec mask is non-zero, skip the next two instructions
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(3)
+ .addReg(AMDGPU::EXEC);
+
+ // Exec mask is zero: Export to NULL target...
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::EXP))
+ .addImm(0)
+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+ .addImm(0)
+ .addImm(1)
+ .addImm(1)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0);
+
+ // ... and terminate wavefront
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
+ }
MI.eraseFromParent();
break;
}
@@ -156,7 +185,8 @@ void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB,
unsigned StackReg = PredicateStack.back();
PredicateStack.pop_back();
UnusedRegisters.push_back(StackReg);
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_B64),
AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
.addReg(StackReg);
}