summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlia Mirkin <imirkin@alum.mit.edu>2014-05-14 23:22:32 -0400
committerIan Romanick <ian.d.romanick@intel.com>2014-05-23 09:51:06 -0700
commitd6a4c3c29c789857eb60016a61f5db0716e463ef (patch)
tree0b2537b94241dc6743c865cf66cd5c5c9047e29d
parent9028b946703da1d22de91fbfc55932455b482c35 (diff)
nv50/ir: fix constant folding for OP_MUL subop HIGH
These instructions can come in either through IMUL_HI/UMUL_HI TGSI opcodes, or from OP_DIV constant folding. Also make sure that the constant foldings which delete the original instruction still get counted as having done something. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Cc: "10.1 10.2" <mesa-stable@lists.freedesktop.org> Reviewed-by: Ben Skeggs <bskeggs@redhat.com> (cherry picked from commit d2a3de19c6aa5881228734c73df706483a4aecf9)
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp47
1 files changed, 43 insertions, 4 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index cdae3c8c2ba..bb88b18b609 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -425,7 +425,17 @@ ConstantFolding::expr(Instruction *i,
case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break;
case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
case TYPE_S32:
- case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break;
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+ res.data.s32 = ((int64_t)a->data.s32 * b->data.s32) >> 32;
+ break;
+ }
+ /* fallthrough */
+ case TYPE_U32:
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+ res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32) >> 32;
+ break;
+ }
+ res.data.u32 = a->data.u32 * b->data.u32; break;
default:
return;
}
@@ -691,12 +701,41 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
const int t = !s;
const operation op = i->op;
+ Instruction *newi = i;
switch (i->op) {
case OP_MUL:
if (i->dType == TYPE_F32)
tryCollapseChainedMULs(i, s, imm0);
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+ assert(!isFloatType(i->sType));
+ if (imm0.isInteger(1) && i->dType == TYPE_S32) {
+ bld.setPosition(i, false);
+ // Need to set to the sign value, which is a compare.
+ newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
+ TYPE_S32, i->getSrc(t), bld.mkImm(0));
+ delete_Instruction(prog, i);
+ } else if (imm0.isInteger(0) || imm0.isInteger(1)) {
+ // The high bits can't be set in this case (either mul by 0 or
+ // unsigned by 1)
+ i->op = OP_MOV;
+ i->subOp = 0;
+ i->setSrc(0, new_ImmediateValue(prog, 0u));
+ i->src(0).mod = Modifier(0);
+ i->setSrc(1, NULL);
+ } else if (!imm0.isNegative() && imm0.isPow2()) {
+ // Translate into a shift
+ imm0.applyLog2();
+ i->op = OP_SHR;
+ i->subOp = 0;
+ imm0.reg.data.u32 = 32 - imm0.reg.data.u32;
+ i->setSrc(0, i->getSrc(t));
+ i->src(0).mod = i->src(t).mod;
+ i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
+ i->src(1).mod = 0;
+ }
+ } else
if (imm0.isInteger(0)) {
i->op = OP_MOV;
i->setSrc(0, new_ImmediateValue(prog, 0u));
@@ -787,7 +826,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
else
tA = tB;
tB = s ? bld.getSSA() : i->getDef(0);
- bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
+ newi = bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
if (s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
@@ -819,7 +858,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
tA = bld.getSSA();
bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0));
tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
- bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
+ newi = bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
if (d < 0)
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
@@ -897,7 +936,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
default:
return;
}
- if (i->op != op)
+ if (newi->op != op)
foldCount++;
}