diff options
Diffstat (limited to 'src/gallium/drivers/nouveau')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 56 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp | 1 |
2 files changed, 56 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 5b4a98d25cb..dc7bf24ba23 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2301,13 +2301,18 @@ AlgebraicOpt::visit(BasicBlock *bb) // ============================================================================= // ADD(SHL(a, b), c) -> SHLADD(a, b, c) +// MUL(a, b) -> a few XMADs +// MAD/FMA(a, b, c) -> a few XMADs class LateAlgebraicOpt : public Pass { private: virtual bool visit(Instruction *); void handleADD(Instruction *); + void handleMULMAD(Instruction *); bool tryADDToSHLADD(Instruction *); + + BuildUtil bld; }; void @@ -2368,6 +2373,52 @@ LateAlgebraicOpt::tryADDToSHLADD(Instruction *add) return true; } +// MUL(a, b) -> a few XMADs +// MAD/FMA(a, b, c) -> a few XMADs +void +LateAlgebraicOpt::handleMULMAD(Instruction *i) +{ + // TODO: handle NV50_IR_SUBOP_MUL_HIGH + if (!prog->getTarget()->isOpSupported(OP_XMAD, TYPE_U32)) + return; + if (isFloatType(i->dType) || typeSizeof(i->dType) != 4) + return; + if (i->subOp || i->usesFlags() || i->flagsDef >= 0) + return; + + assert(!i->src(0).mod); + assert(!i->src(1).mod); + assert(i->op == OP_MUL ? 1 : !i->src(2).mod); + + bld.setPosition(i, false); + + Value *a = i->getSrc(0); + Value *b = i->getSrc(1); + Value *c = i->op == OP_MUL ? bld.mkImm(0) : i->getSrc(2); + + Value *tmp0 = bld.getSSA(); + Value *tmp1 = bld.getSSA(); + + Instruction *insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp0, b, a, c); + insn->setPredicate(i->cc, i->getPredicate()); + + insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp1, b, a, bld.mkImm(0)); + insn->setPredicate(i->cc, i->getPredicate()); + insn->subOp = NV50_IR_SUBOP_XMAD_MRG | NV50_IR_SUBOP_XMAD_H1(1); + + Value *pred = i->getPredicate(); + i->setPredicate(i->cc, NULL); + + i->op = OP_XMAD; + i->setSrc(0, b); + i->setSrc(1, tmp1); + i->setSrc(2, tmp0); + i->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC; + i->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1); + + i->setPredicate(i->cc, pred); +} + bool LateAlgebraicOpt::visit(Instruction *i) { @@ -2375,6 +2426,11 @@ LateAlgebraicOpt::visit(Instruction *i) case OP_ADD: handleADD(i); break; + case OP_MUL: + case OP_MAD: + case OP_FMA: + handleMULMAD(i); + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp index de07ad1de89..2e2e40770e1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp @@ -170,7 +170,6 @@ TargetGM107::isBarrierRequired(const Instruction *insn) const } break; case OPCLASS_ARITH: - // TODO: IMUL/IMAD require barriers too, use of XMAD instead! if ((insn->op == OP_MUL || insn->op == OP_MAD) && !isFloatType(insn->dType)) return true; |