summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-11-21 22:31:46 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-11-21 22:31:46 +0000
commitbad4e7b748f615d19ea0b3b5beebd69decd24be3 (patch)
treee7d1d8d51a0814a2ac32521f4503b7bc666d94fa
parent573630a020fedfc4195a41db8d7203e20c89ba8f (diff)
R600/SI: Add an s_mov_b32 to patterns which use the M0RegClass
We need to use a s_mov_b32 rather than a copy, so that CSE will eliminate redundant moves to the m0 register. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222584 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp20
-rw-r--r--lib/Target/R600/SIInstructions.td12
2 files changed, 8 insertions, 24 deletions
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 8343362d116..1a0010c03dc 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -326,26 +326,6 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned Opcode;
const int16_t *SubIndices;
- if (AMDGPU::M0 == DestReg) {
- // Check if M0 isn't already set to this value
- for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
- I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
-
- if (!I->definesRegister(AMDGPU::M0))
- continue;
-
- unsigned Opc = I->getOpcode();
- if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
- break;
-
- if (!I->readsRegister(SrcReg))
- break;
-
- // The copy isn't necessary
- return;
- }
- }
-
if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index e1eb95580ac..00ce9bfcc26 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -2499,17 +2499,21 @@ def : Pat <
/********** Interpolation Paterns **********/
/********** ===================== **********/
+// The value of $params is constant through out the entire kernel.
+// We need to use S_MOV_B32 $params, because CSE ignores copies, so
+// without it we end up with a lot of redundant moves.
+
def : Pat <
(int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
- (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params)
+ (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
>;
def : Pat <
- (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
+ (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij),
(V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
- imm:$attr_chan, imm:$attr, i32:$params),
+ imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)),
(EXTRACT_SUBREG $ij, sub1),
- imm:$attr_chan, imm:$attr, $params)
+ imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
>;
/********** ================== **********/