summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nouveau')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp56
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp1
2 files changed, 56 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 5b4a98d25cb..dc7bf24ba23 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2301,13 +2301,18 @@ AlgebraicOpt::visit(BasicBlock *bb)
// =============================================================================
// ADD(SHL(a, b), c) -> SHLADD(a, b, c)
+// MUL(a, b) -> a few XMADs
+// MAD/FMA(a, b, c) -> a few XMADs
class LateAlgebraicOpt : public Pass
{
private:
virtual bool visit(Instruction *);
void handleADD(Instruction *);
+ void handleMULMAD(Instruction *);
bool tryADDToSHLADD(Instruction *);
+
+ BuildUtil bld;
};
void
@@ -2368,6 +2373,52 @@ LateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
return true;
}
+// MUL(a, b) -> a few XMADs
+// MAD/FMA(a, b, c) -> a few XMADs
+void
+LateAlgebraicOpt::handleMULMAD(Instruction *i)
+{
+ // TODO: handle NV50_IR_SUBOP_MUL_HIGH
+ if (!prog->getTarget()->isOpSupported(OP_XMAD, TYPE_U32))
+ return;
+ if (isFloatType(i->dType) || typeSizeof(i->dType) != 4)
+ return;
+ if (i->subOp || i->usesFlags() || i->flagsDef >= 0)
+ return;
+
+ assert(!i->src(0).mod);
+ assert(!i->src(1).mod);
+ assert(i->op == OP_MUL ? 1 : !i->src(2).mod);
+
+ bld.setPosition(i, false);
+
+ Value *a = i->getSrc(0);
+ Value *b = i->getSrc(1);
+ Value *c = i->op == OP_MUL ? bld.mkImm(0) : i->getSrc(2);
+
+ Value *tmp0 = bld.getSSA();
+ Value *tmp1 = bld.getSSA();
+
+ Instruction *insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp0, b, a, c);
+ insn->setPredicate(i->cc, i->getPredicate());
+
+ insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp1, b, a, bld.mkImm(0));
+ insn->setPredicate(i->cc, i->getPredicate());
+ insn->subOp = NV50_IR_SUBOP_XMAD_MRG | NV50_IR_SUBOP_XMAD_H1(1);
+
+ Value *pred = i->getPredicate();
+ i->setPredicate(i->cc, NULL);
+
+ i->op = OP_XMAD;
+ i->setSrc(0, b);
+ i->setSrc(1, tmp1);
+ i->setSrc(2, tmp0);
+ i->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC;
+ i->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1);
+
+ i->setPredicate(i->cc, pred);
+}
+
bool
LateAlgebraicOpt::visit(Instruction *i)
{
@@ -2375,6 +2426,11 @@ LateAlgebraicOpt::visit(Instruction *i)
case OP_ADD:
handleADD(i);
break;
+ case OP_MUL:
+ case OP_MAD:
+ case OP_FMA:
+ handleMULMAD(i);
+ break;
default:
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
index de07ad1de89..2e2e40770e1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
@@ -170,7 +170,6 @@ TargetGM107::isBarrierRequired(const Instruction *insn) const
}
break;
case OPCLASS_ARITH:
- // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
!isFloatType(insn->dType))
return true;