summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlia Mirkin <imirkin@alum.mit.edu>2014-05-13 11:23:33 -0400
committerCarl Worth <cworth@cworth.org>2014-05-19 14:50:03 -0700
commit2d6f733979dd3c3e3f32876bb1eb89c0693edf80 (patch)
tree9519f098e5b50be3e38518e0069aead1e1da573f
parent507d2e523ca81823d07e593d08ab6bfb261ceb88 (diff)
nv50/ir: fix integer mul lowering for u32 x u32 -> high u32
UNION appears to expect that all of its sources are conditionally defined. Otherwise it inserts an unpredicated mov instruction which overwrites the desired result. This fixes tests that use UMUL_HI, and much less directly, unsigned integer division by a constant, which uses this functionality in a peephole pass. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Cc: "10.1 10.2" <mesa-stable@lists.freedesktop.org> Reviewed-by: Ben Skeggs <bskeggs@redhat.com> (cherry picked from commit 5b8f1a0f7c5b1412577a913d374192a2329fa615)
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp7
1 files changed, 4 insertions, 3 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 7030c219c0e..c4ae6c41fb3 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -75,16 +75,17 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
if (highResult) {
- Value *r[3];
+ Value *r[4];
Value *imm = bld->loadImm(NULL, 1 << (halfSize * 8));
c[0] = bld->getSSA(1, FILE_FLAGS);
c[1] = bld->getSSA(1, FILE_FLAGS);
- for (int j = 0; j < 3; ++j)
+ for (int j = 0; j < 4; ++j)
r[j] = bld->getSSA(fullSize);
i[8] = bld->mkOp2(OP_SHR, fTy, r[0], t[1], bld->mkImm(halfSize * 8));
i[6] = bld->mkOp2(OP_ADD, fTy, r[1], r[0], imm);
- bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[0]);
+ bld->mkMov(r[3], r[0])->setPredicate(CC_NC, c[0]);
+ bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[3]);
i[5] = bld->mkOp3(OP_MAD, fTy, mul->getDef(0), a[1], b[1], r[2]);
// set carry defs / sources