summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2012-03-06 17:30:35 -0500
committerTom Stellard <thomas.stellard@amd.com>2012-03-06 17:30:35 -0500
commitee087912c586d61aa699574bc7daafda6d7026b3 (patch)
treea5d8c4e7e1f3d576046442565e5fed0a2af0b5f4
parent47e6432c71e5c875210b2a95fc41c19b7c8d4b8e (diff)
r600/llvm: Implement integer division
-rw-r--r--src/gallium/drivers/radeon/R600GenRegisterInfo.pl3
-rw-r--r--src/gallium/drivers/radeon/R600InstrInfo.cpp11
-rw-r--r--src/gallium/drivers/radeon/R600InstrInfo.h1
-rw-r--r--src/gallium/drivers/radeon/R600Instructions.td27
-rw-r--r--src/gallium/drivers/radeon/R600LowerInstructions.cpp186
-rw-r--r--src/gallium/drivers/radeon/R600RegisterInfo.cpp3
6 files changed, 213 insertions, 18 deletions
diff --git a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
index fd7f62d3308..3d2db2fe931 100644
--- a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
+++ b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
@@ -80,6 +80,7 @@ class RegSet <dag s> {
def ZERO : R600Reg<"0.0">;
def HALF : R600Reg<"0.5">;
def ONE : R600Reg<"1.0">;
+def ONE_INT : R600Reg<"1">;
def NEG_HALF : R600Reg<"-0.5">;
def NEG_ONE : R600Reg<"-1.0">;
def PV_X : R600Reg<"pv.x">;
@@ -97,7 +98,7 @@ def R600_TReg32_X : RegisterClass <"AMDIL", [f32, i32], 32, (add
def R600_Reg32 : RegisterClass <"AMDIL", [f32, i32], 32, (add
R600_TReg32,
R600_CReg32,
- ZERO, HALF, ONE, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
+ ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
def R600_Reg128 : RegisterClass<"AMDIL", [v4f32], 128, (add
$t128_string)>
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp
index ab1a292a5cb..2790def7253 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.cpp
+++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp
@@ -76,6 +76,17 @@ unsigned R600InstrInfo::getISAOpcode(unsigned opcode) const
}
}
+unsigned R600InstrInfo::getMULHI_UINT() const
+{
+ unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration();
+
+ if (gen < AMDILDeviceInfo::HD5XXX) {
+ return AMDIL::MULHI_UINT_r600;
+ } else {
+ return AMDIL::MULHI_UINT_eg;
+ }
+}
+
unsigned R600InstrInfo::getMULLO_UINT() const
{
unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration();
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h
index 5395f409d5f..170e0e0de0b 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.h
+++ b/src/gallium/drivers/radeon/R600InstrInfo.h
@@ -66,6 +66,7 @@ namespace llvm {
virtual unsigned getISAOpcode(unsigned opcode) const;
bool isTrig(const MachineInstr &MI) const;
+ unsigned getMULHI_UINT() const;
unsigned getMULLO_UINT() const;
unsigned getRECIP_UINT() const;
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index 6ee8a53f7fc..f0135819c3a 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -354,11 +354,31 @@ def ADD_INT : R600_2OP <
let AMDILOp = AMDILInst.ADD_i32;
}
+def SUB_INT : R600_2OP <
+ 0x35, "SUB_INT $dst, $src0, $src1",
+ []
+>;
+
+def SETE_INT : R600_2OP <
+ 0x3A, "SETE_INT $dst, $src0, $src1",
+ []
+>;
+
def SETGT_INT : R600_2OP <
0x3B, "SGT_INT $dst, $src0, $src1",
[]
>;
+def SETGE_INT : R600_2OP <
+ 0x3C, "SETGE_INT $dst, $src0, $src1",
+ []
+>;
+
+def CNDE_INT : R600_3OP <
+ 0x1C, "CNDE_INT $dst, $src0, $src1, $src2",
+ []
+>;
+
/* Texture instructions */
def TEX_SAMPLE : R600_TEX <
@@ -477,6 +497,11 @@ class LSHR_Common <bits<32> inst> : R600_2OP <
let AMDILOp = AMDILInst.USHR_i32;
}
+class MULHI_UINT_Common <bits<32> inst> : R600_2OP <
+ inst, "MULHI $dst, $src0, $src1",
+ []
+>;
+
class MULLO_INT_Common <bits<32> inst> : R600_2OP <
inst, "MULLO_INT $dst, $src0, $src1",
[] >{
@@ -583,6 +608,7 @@ let Gen = AMDGPUGen.R600 in {
def COS_r600 : COS_Common<0x6F>;
def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
+ def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
def RECIP_UINT_r600 : RECIP_UINT_Common <0x77>;
} // End AMDGPUGen.R600
@@ -764,6 +790,7 @@ let Gen = AMDGPUGen.EG_CAYMAN in {
def COS_eg : COS_Common<0x8E>;
def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
+ def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
def DOT4_eg : DOT4_Common<0xBE>;
diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
index 35e81965054..51611da1f1f 100644
--- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp
+++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
@@ -61,6 +61,11 @@ namespace {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
+ void divMod(MachineInstr &MI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ bool div = true) const;
+
public:
R600LowerInstructionsPass(TargetMachine &tm) :
MachineFunctionPass(ID), TM(tm),
@@ -88,10 +93,10 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
- I != MBB.end(); I = Next, Next = llvm::next(I) ) {
- MachineInstr &MI = *I;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Prev = NULL;
+ I != MBB.end(); Prev = I, I ? ++I : I = MBB.begin() ) {
+ MachineInstr &MI = *I;
switch(MI.getOpcode()) {
case AMDIL::FLT:
BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FGE))
@@ -100,6 +105,30 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
.addOperand(MI.getOperand(1));
break;
+ case AMDIL::ABS_i32:
+ {
+ unsigned setgt = MRI->createVirtualRegister(
+ &AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT),
+ setgt)
+ .addOperand(MI.getOperand(1))
+ .addReg(AMDIL::ZERO);
+
+ unsigned add_int = MRI->createVirtualRegister(
+ &AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT),
+ add_int)
+ .addReg(setgt)
+ .addOperand(MI.getOperand(1));
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::XOR_INT))
+ .addOperand(MI.getOperand(0))
+ .addReg(setgt)
+ .addReg(add_int);
+
+ break;
+ }
+
/* XXX: We could propagate the ABS flag to all of the uses of Operand0 and
* remove the ABS instruction.*/
case AMDIL::FABS_f32:
@@ -153,20 +182,8 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
}
case AMDIL::UDIV_i32:
- {
- /* XXX: We need to make sure we are handling precision correctly
- * here. */
- unsigned tmp =
- MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
- BuildMI(MBB, I, MBB.findDebugLoc(I),
- TII->get(TII->getRECIP_UINT()), tmp)
- .addOperand(MI.getOperand(2));
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addReg(tmp);
- break;
- }
+ divMod(MI, MBB, I);
+ break;
/* XXX: Figure out the semantics of DIV_INF_f32 and make sure this is OK */
/* case AMDIL::DIV_INF_f32:
@@ -375,6 +392,11 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
continue;
}
MI.eraseFromParent();
+ if (Prev) {
+ I = Prev;
+ } else {
+ I = NULL;
+ }
}
}
return false;
@@ -399,3 +421,133 @@ void R600LowerInstructionsPass::calcAddress(const MachineOperand &ptrOp,
.addOperand(ptrOp);
}
}
+
+/* Mostly copied from tgsi_divmod() in r600_shader.c */
+void R600LowerInstructionsPass::divMod(MachineInstr &MI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ bool div) const
+{
+ unsigned dst = MI.getOperand(0).getReg();
+ MachineOperand &numerator = MI.getOperand(1);
+ MachineOperand &denominator = MI.getOperand(2);
+ /* rcp = RECIP(denominator) = 2^32 / denominator + e
+ * e is rounding error */
+ unsigned rcp = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getRECIP_UINT()), rcp)
+ .addOperand(denominator);
+
+ /* rcp_lo = lo(rcp * denominator) */
+ unsigned rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()), rcp_lo)
+ .addReg(rcp)
+ .addOperand(denominator);
+
+ /* rcp_hi = HI (rcp * denominator) */
+ unsigned rcp_hi = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), rcp_hi)
+ .addReg(rcp)
+ .addOperand(denominator);
+
+ unsigned neg_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), neg_rcp_lo)
+ .addReg(AMDIL::ZERO)
+ .addReg(rcp_lo);
+
+ unsigned abs_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), abs_rcp_lo)
+ .addReg(rcp_hi)
+ .addReg(neg_rcp_lo)
+ .addReg(rcp_lo);
+
+ unsigned e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), e)
+ .addReg(abs_rcp_lo)
+ .addReg(rcp);
+
+ unsigned rcp_plus_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), rcp_plus_e)
+ .addReg(rcp)
+ .addReg(e);
+
+ unsigned rcp_sub_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), rcp_sub_e)
+ .addReg(rcp)
+ .addReg(e);
+
+ /* tmp0 = rcp_hi == 0 ? rcp_plus_e : rcp_sub_e */
+ unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), tmp0)
+ .addReg(rcp_hi)
+ .addReg(rcp_plus_e)
+ .addReg(rcp_sub_e);
+
+ unsigned q = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), q)
+ .addReg(tmp0)
+ .addOperand(numerator);
+
+ /* num_sub_r = q * denominator */
+ unsigned num_sub_r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()),
+ num_sub_r)
+ .addReg(q)
+ .addOperand(denominator);
+
+ unsigned r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), r)
+ .addOperand(numerator)
+ .addReg(num_sub_r);
+
+ unsigned r_ge_den = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_den)
+ .addReg(r)
+ .addOperand(denominator);
+
+ unsigned r_ge_zero = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_zero)
+ .addOperand(numerator)
+ .addReg(num_sub_r);
+
+ unsigned tmp1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::AND_INT), tmp1)
+ .addReg(r_ge_den)
+ .addReg(r_ge_zero);
+
+ unsigned val0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ unsigned val1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ unsigned result = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ if (div) {
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val0)
+ .addReg(q)
+ .addReg(AMDIL::ONE_INT);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val1)
+ .addReg(q)
+ .addReg(AMDIL::ONE_INT);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result)
+ .addReg(tmp1)
+ .addReg(q)
+ .addReg(val0);
+ } else {
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val0)
+ .addReg(r)
+ .addOperand(denominator);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val1)
+ .addReg(r)
+ .addOperand(denominator);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result)
+ .addReg(tmp1)
+ .addReg(r)
+ .addReg(val0);
+ }
+
+ /* XXX: Do we need to set to MAX_INT if denominator is 0? */
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), dst)
+ .addReg(r_ge_zero)
+ .addReg(val1)
+ .addReg(result);
+}
diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.cpp b/src/gallium/drivers/radeon/R600RegisterInfo.cpp
index 71d8b0b78c2..d655cca7223 100644
--- a/src/gallium/drivers/radeon/R600RegisterInfo.cpp
+++ b/src/gallium/drivers/radeon/R600RegisterInfo.cpp
@@ -44,6 +44,7 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
Reserved.set(AMDIL::ZERO);
Reserved.set(AMDIL::HALF);
Reserved.set(AMDIL::ONE);
+ Reserved.set(AMDIL::ONE_INT);
Reserved.set(AMDIL::NEG_HALF);
Reserved.set(AMDIL::NEG_ONE);
Reserved.set(AMDIL::PV_X);
@@ -90,6 +91,7 @@ unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const
case AMDIL::ZERO: return 248;
case AMDIL::ONE:
case AMDIL::NEG_ONE: return 249;
+ case AMDIL::ONE_INT: return 250;
case AMDIL::HALF:
case AMDIL::NEG_HALF: return 252;
case AMDIL::ALU_LITERAL_X: return 253;
@@ -102,6 +104,7 @@ unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
switch(reg) {
case AMDIL::ZERO:
case AMDIL::ONE:
+ case AMDIL::ONE_INT:
case AMDIL::NEG_ONE:
case AMDIL::HALF:
case AMDIL::NEG_HALF: