diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2012-03-06 17:30:35 -0500 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2012-03-06 17:30:35 -0500 |
commit | ee087912c586d61aa699574bc7daafda6d7026b3 (patch) | |
tree | a5d8c4e7e1f3d576046442565e5fed0a2af0b5f4 | |
parent | 47e6432c71e5c875210b2a95fc41c19b7c8d4b8e (diff) |
r600/llvm: Implement integer division
-rw-r--r-- | src/gallium/drivers/radeon/R600GenRegisterInfo.pl | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/R600InstrInfo.cpp | 11 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/R600InstrInfo.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/R600Instructions.td | 27 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/R600LowerInstructions.cpp | 186 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/R600RegisterInfo.cpp | 3 |
6 files changed, 213 insertions, 18 deletions
diff --git a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl index fd7f62d3308..3d2db2fe931 100644 --- a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl +++ b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl @@ -80,6 +80,7 @@ class RegSet <dag s> { def ZERO : R600Reg<"0.0">; def HALF : R600Reg<"0.5">; def ONE : R600Reg<"1.0">; +def ONE_INT : R600Reg<"1">; def NEG_HALF : R600Reg<"-0.5">; def NEG_ONE : R600Reg<"-1.0">; def PV_X : R600Reg<"pv.x">; @@ -97,7 +98,7 @@ def R600_TReg32_X : RegisterClass <"AMDIL", [f32, i32], 32, (add def R600_Reg32 : RegisterClass <"AMDIL", [f32, i32], 32, (add R600_TReg32, R600_CReg32, - ZERO, HALF, ONE, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; + ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; def R600_Reg128 : RegisterClass<"AMDIL", [v4f32], 128, (add $t128_string)> diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp index ab1a292a5cb..2790def7253 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp @@ -76,6 +76,17 @@ unsigned R600InstrInfo::getISAOpcode(unsigned opcode) const } } +unsigned R600InstrInfo::getMULHI_UINT() const +{ + unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration(); + + if (gen < AMDILDeviceInfo::HD5XXX) { + return AMDIL::MULHI_UINT_r600; + } else { + return AMDIL::MULHI_UINT_eg; + } +} + unsigned R600InstrInfo::getMULLO_UINT() const { unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration(); diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h index 5395f409d5f..170e0e0de0b 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.h +++ b/src/gallium/drivers/radeon/R600InstrInfo.h @@ -66,6 +66,7 @@ namespace llvm { virtual unsigned getISAOpcode(unsigned opcode) const; bool isTrig(const MachineInstr &MI) const; + unsigned getMULHI_UINT() const; unsigned getMULLO_UINT() const; unsigned getRECIP_UINT() const; diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 6ee8a53f7fc..f0135819c3a 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -354,11 +354,31 @@ def ADD_INT : R600_2OP < let AMDILOp = AMDILInst.ADD_i32; } +def SUB_INT : R600_2OP < + 0x35, "SUB_INT $dst, $src0, $src1", + [] +>; + +def SETE_INT : R600_2OP < + 0x3A, "SETE_INT $dst, $src0, $src1", + [] +>; + def SETGT_INT : R600_2OP < 0x3B, "SGT_INT $dst, $src0, $src1", [] >; +def SETGE_INT : R600_2OP < + 0x3C, "SETGE_INT $dst, $src0, $src1", + [] +>; + +def CNDE_INT : R600_3OP < + 0x1C, "CNDE_INT $dst, $src0, $src1, $src2", + [] +>; + /* Texture instructions */ def TEX_SAMPLE : R600_TEX < @@ -477,6 +497,11 @@ class LSHR_Common <bits<32> inst> : R600_2OP < let AMDILOp = AMDILInst.USHR_i32; } +class MULHI_UINT_Common <bits<32> inst> : R600_2OP < + inst, "MULHI $dst, $src0, $src1", + [] +>; + class MULLO_INT_Common <bits<32> inst> : R600_2OP < inst, "MULLO_INT $dst, $src0, $src1", [] >{ @@ -583,6 +608,7 @@ let Gen = AMDGPUGen.R600 in { def COS_r600 : COS_Common<0x6F>; def MULLO_INT_r600 : MULLO_INT_Common<0x73>; def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; + def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; def RECIP_UINT_r600 : RECIP_UINT_Common <0x77>; } // End AMDGPUGen.R600 @@ -764,6 +790,7 @@ let Gen = AMDGPUGen.EG_CAYMAN in { def COS_eg : COS_Common<0x8E>; def MULLO_INT_eg : MULLO_INT_Common<0x8F>; def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; + def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; def DOT4_eg : DOT4_Common<0xBE>; diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp index 35e81965054..51611da1f1f 100644 --- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp +++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp @@ -61,6 +61,11 @@ namespace { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + void divMod(MachineInstr &MI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + bool div = true) const; + public: R600LowerInstructionsPass(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm), @@ -88,10 +93,10 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); BB != BB_E; ++BB) { MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next, Next = llvm::next(I) ) { - MachineInstr &MI = *I; + for (MachineBasicBlock::iterator I = MBB.begin(), Prev = NULL; + I != MBB.end(); Prev = I, I ? ++I : I = MBB.begin() ) { + MachineInstr &MI = *I; switch(MI.getOpcode()) { case AMDIL::FLT: BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FGE)) @@ -100,6 +105,30 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) .addOperand(MI.getOperand(1)); break; + case AMDIL::ABS_i32: + { + unsigned setgt = MRI->createVirtualRegister( + &AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), + setgt) + .addOperand(MI.getOperand(1)) + .addReg(AMDIL::ZERO); + + unsigned add_int = MRI->createVirtualRegister( + &AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), + add_int) + .addReg(setgt) + .addOperand(MI.getOperand(1)); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::XOR_INT)) + .addOperand(MI.getOperand(0)) + .addReg(setgt) + .addReg(add_int); + + break; + } + /* XXX: We could propagate the ABS flag to all of the uses of Operand0 and * remove the ABS instruction.*/ case AMDIL::FABS_f32: @@ -153,20 +182,8 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) } case AMDIL::UDIV_i32: - { - /* XXX: We need to make sure we are handling precision correctly - * here. */ - unsigned tmp = - MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), - TII->get(TII->getRECIP_UINT()), tmp) - .addOperand(MI.getOperand(2)); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT())) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)) - .addReg(tmp); - break; - } + divMod(MI, MBB, I); + break; /* XXX: Figure out the semantics of DIV_INF_f32 and make sure this is OK */ /* case AMDIL::DIV_INF_f32: @@ -375,6 +392,11 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) continue; } MI.eraseFromParent(); + if (Prev) { + I = Prev; + } else { + I = NULL; + } } } return false; @@ -399,3 +421,133 @@ void R600LowerInstructionsPass::calcAddress(const MachineOperand &ptrOp, .addOperand(ptrOp); } } + +/* Mostly copied from tgsi_divmod() in r600_shader.c */ +void R600LowerInstructionsPass::divMod(MachineInstr &MI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + bool div) const +{ + unsigned dst = MI.getOperand(0).getReg(); + MachineOperand &numerator = MI.getOperand(1); + MachineOperand &denominator = MI.getOperand(2); + /* rcp = RECIP(denominator) = 2^32 / denominator + e + * e is rounding error */ + unsigned rcp = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getRECIP_UINT()), rcp) + .addOperand(denominator); + + /* rcp_lo = lo(rcp * denominator) */ + unsigned rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()), rcp_lo) + .addReg(rcp) + .addOperand(denominator); + + /* rcp_hi = HI (rcp * denominator) */ + unsigned rcp_hi = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), rcp_hi) + .addReg(rcp) + .addOperand(denominator); + + unsigned neg_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), neg_rcp_lo) + .addReg(AMDIL::ZERO) + .addReg(rcp_lo); + + unsigned abs_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), abs_rcp_lo) + .addReg(rcp_hi) + .addReg(neg_rcp_lo) + .addReg(rcp_lo); + + unsigned e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), e) + .addReg(abs_rcp_lo) + .addReg(rcp); + + unsigned rcp_plus_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), rcp_plus_e) + .addReg(rcp) + .addReg(e); + + unsigned rcp_sub_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), rcp_sub_e) + .addReg(rcp) + .addReg(e); + + /* tmp0 = rcp_hi == 0 ? rcp_plus_e : rcp_sub_e */ + unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), tmp0) + .addReg(rcp_hi) + .addReg(rcp_plus_e) + .addReg(rcp_sub_e); + + unsigned q = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), q) + .addReg(tmp0) + .addOperand(numerator); + + /* num_sub_r = q * denominator */ + unsigned num_sub_r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()), + num_sub_r) + .addReg(q) + .addOperand(denominator); + + unsigned r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), r) + .addOperand(numerator) + .addReg(num_sub_r); + + unsigned r_ge_den = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_den) + .addReg(r) + .addOperand(denominator); + + unsigned r_ge_zero = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_zero) + .addOperand(numerator) + .addReg(num_sub_r); + + unsigned tmp1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::AND_INT), tmp1) + .addReg(r_ge_den) + .addReg(r_ge_zero); + + unsigned val0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + unsigned val1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + unsigned result = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + if (div) { + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val0) + .addReg(q) + .addReg(AMDIL::ONE_INT); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val1) + .addReg(q) + .addReg(AMDIL::ONE_INT); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result) + .addReg(tmp1) + .addReg(q) + .addReg(val0); + } else { + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val0) + .addReg(r) + .addOperand(denominator); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val1) + .addReg(r) + .addOperand(denominator); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result) + .addReg(tmp1) + .addReg(r) + .addReg(val0); + } + + /* XXX: Do we need to set to MAX_INT if denominator is 0? */ + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), dst) + .addReg(r_ge_zero) + .addReg(val1) + .addReg(result); +} diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.cpp b/src/gallium/drivers/radeon/R600RegisterInfo.cpp index 71d8b0b78c2..d655cca7223 100644 --- a/src/gallium/drivers/radeon/R600RegisterInfo.cpp +++ b/src/gallium/drivers/radeon/R600RegisterInfo.cpp @@ -44,6 +44,7 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const Reserved.set(AMDIL::ZERO); Reserved.set(AMDIL::HALF); Reserved.set(AMDIL::ONE); + Reserved.set(AMDIL::ONE_INT); Reserved.set(AMDIL::NEG_HALF); Reserved.set(AMDIL::NEG_ONE); Reserved.set(AMDIL::PV_X); @@ -90,6 +91,7 @@ unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const case AMDIL::ZERO: return 248; case AMDIL::ONE: case AMDIL::NEG_ONE: return 249; + case AMDIL::ONE_INT: return 250; case AMDIL::HALF: case AMDIL::NEG_HALF: return 252; case AMDIL::ALU_LITERAL_X: return 253; @@ -102,6 +104,7 @@ unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const switch(reg) { case AMDIL::ZERO: case AMDIL::ONE: + case AMDIL::ONE_INT: case AMDIL::NEG_ONE: case AMDIL::HALF: case AMDIL::NEG_HALF: |