summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2015-08-05 20:05:56 -0700
committerEric Anholt <eric@anholt.net>2015-10-26 16:48:34 -0700
commit3359ad6cda49fb977d837eb00e8ae4d781d95c2a (patch)
treecac61e2a26a4edfce06bafa7f48e5b58a3abbc1d /src/gallium/drivers/vc4
parent01ca4f207efac555ff5f729dce1687a68ba65400 (diff)
vc4: Add support for copy propagation with unpack flags present.
total instructions in shared programs: 89251 -> 87862 (-1.56%) instructions in affected programs: 52971 -> 51582 (-2.62%)
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_copy_propagation.c84
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c61
2 files changed, 109 insertions, 36 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
index b46be24ad0c..0eee5c34e1d 100644
--- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
+++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
@@ -41,29 +41,77 @@ qir_opt_copy_propagation(struct vc4_compile *c)
bool debug = false;
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
- for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
- int index = inst->src[i].index;
- if (inst->src[i].file == QFILE_TEMP &&
- c->defs[index] &&
- qir_is_raw_mov(c->defs[index]) &&
- (c->defs[index]->src[0].file == QFILE_TEMP ||
- c->defs[index]->src[0].file == QFILE_UNIF)) {
- if (debug) {
- fprintf(stderr, "Copy propagate: ");
- qir_dump_inst(c, inst);
- fprintf(stderr, "\n");
- }
+ int nsrc = qir_get_op_nsrc(inst->op);
+ for (int i = 0; i < nsrc; i++) {
+ if (inst->src[i].file != QFILE_TEMP)
+ continue;
+
+ struct qinst *mov = c->defs[inst->src[i].index];
+ if (!mov ||
+ (mov->op != QOP_MOV &&
+ mov->op != QOP_FMOV &&
+ mov->op != QOP_MMOV)) {
+ continue;
+ }
- inst->src[i] = c->defs[index]->src[0];
+ if (mov->src[0].file != QFILE_TEMP &&
+ mov->src[0].file != QFILE_UNIF) {
+ continue;
+ }
+
+ if (mov->dst.pack)
+ continue;
+
+ uint8_t unpack;
+ if (mov->src[0].pack) {
+ /* Make sure that the meaning of the unpack
+ * would be the same between the two
+ * instructions.
+ */
+ if (qir_is_float_input(inst) !=
+ qir_is_float_input(mov)) {
+ continue;
+ }
- if (debug) {
- fprintf(stderr, "to: ");
- qir_dump_inst(c, inst);
- fprintf(stderr, "\n");
+ /* There's only one unpack field, so make sure
+ * this instruction doesn't already use it.
+ */
+ bool already_has_unpack = false;
+ for (int j = 0; j < nsrc; j++) {
+ if (inst->src[j].pack)
+ already_has_unpack = true;
}
+ if (already_has_unpack)
+ continue;
- progress = true;
+ /* A destination pack requires the PM bit to
+ * be set to a specific value already, which
+ * may be different from ours.
+ */
+ if (inst->dst.pack)
+ continue;
+
+ unpack = mov->src[0].pack;
+ } else {
+ unpack = inst->src[i].pack;
+ }
+
+ if (debug) {
+ fprintf(stderr, "Copy propagate: ");
+ qir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
}
+
+ inst->src[i] = mov->src[0];
+ inst->src[i].pack = unpack;
+
+ if (debug) {
+ fprintf(stderr, "to: ");
+ qir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ }
+
+ progress = true;
}
}
return progress;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index d06f8b27d29..133e1385178 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -101,7 +101,8 @@ swap_file(struct qpu_reg *src)
static void
fixup_raddr_conflict(struct vc4_compile *c,
struct qpu_reg dst,
- struct qpu_reg *src0, struct qpu_reg *src1)
+ struct qpu_reg *src0, struct qpu_reg *src1,
+ struct qinst *inst, uint64_t *unpack)
{
uint32_t mux0 = src0->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src0->mux;
uint32_t mux1 = src1->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src1->mux;
@@ -117,7 +118,21 @@ fixup_raddr_conflict(struct vc4_compile *c,
return;
if (mux0 == QPU_MUX_A) {
- queue(c, qpu_a_MOV(qpu_rb(31), *src0));
+ /* Make sure we use the same type of MOV as the instruction,
+ * in case of unpacks.
+ */
+ if (qir_is_float_input(inst))
+ queue(c, qpu_a_FMAX(qpu_rb(31), *src0, *src0));
+ else
+ queue(c, qpu_a_MOV(qpu_rb(31), *src0));
+
+ /* If we had an unpack on this A-file source, we need to put
+ * it into this MOV, not into the later move from regfile B.
+ */
+ if (inst->src[0].pack) {
+ *last_inst(c) |= *unpack;
+ *unpack = 0;
+ }
*src0 = qpu_rb(31);
} else {
queue(c, qpu_a_MOV(qpu_ra(31), *src0));
@@ -296,7 +311,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_SEL_X_0_ZC:
case QOP_SEL_X_0_NS:
case QOP_SEL_X_0_NC:
- queue(c, qpu_a_MOV(dst, src[0]));
+ queue(c, qpu_a_MOV(dst, src[0]) | unpack);
set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
QPU_COND_ZS);
@@ -310,10 +325,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_SEL_X_Y_NS:
case QOP_SEL_X_Y_NC:
queue(c, qpu_a_MOV(dst, src[0]));
+ if (qinst->src[0].pack)
+ *(last_inst(c)) |= unpack;
set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS +
QPU_COND_ZS);
queue(c, qpu_a_MOV(dst, src[1]));
+ if (qinst->src[1].pack)
+ *(last_inst(c)) |= unpack;
set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^
1) + QPU_COND_ZS);
@@ -326,19 +345,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
switch (qinst->op) {
case QOP_RCP:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIP),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_RSQ:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIPSQRT),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_EXP2:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_EXP),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_LOG2:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_LOG),
- src[0]));
+ src[0]) | unpack);
break;
default:
abort();
@@ -373,16 +392,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_TLB_DISCARD_SETUP:
discard = true;
- queue(c, qpu_a_MOV(src[0], src[0]));
+ queue(c, qpu_a_MOV(src[0], src[0]) | unpack);
*last_inst(c) |= QPU_SF;
break;
case QOP_TLB_STENCIL_SETUP:
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), src[0]));
+ assert(!unpack);
+ queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP),
+ src[0]) | unpack);
break;
case QOP_TLB_Z_WRITE:
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0]));
+ queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z),
+ src[0]) | unpack);
if (discard) {
set_last_cond_add(c, QPU_COND_ZS);
}
@@ -398,14 +420,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
case QOP_TLB_COLOR_WRITE:
- queue(c, qpu_a_MOV(qpu_tlbc(), src[0]));
+ queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack);
if (discard) {
set_last_cond_add(c, QPU_COND_ZS);
}
break;
case QOP_VARY_ADD_C:
- queue(c, qpu_a_FADD(dst, src[0], qpu_r5()));
+ queue(c, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack);
break;
case QOP_TEX_S:
@@ -414,12 +436,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_TEX_B:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_TMU0_S +
(qinst->op - QOP_TEX_S)),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_TEX_DIRECT:
- fixup_raddr_conflict(c, dst, &src[0], &src[1]);
- queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1]));
+ fixup_raddr_conflict(c, dst, &src[0], &src[1],
+ qinst, &unpack);
+ queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S),
+ src[0], src[1]) | unpack);
break;
case QOP_TEX_RESULT:
@@ -447,16 +471,17 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
if (qir_get_op_nsrc(qinst->op) == 1)
src[1] = src[0];
- fixup_raddr_conflict(c, dst, &src[0], &src[1]);
+ fixup_raddr_conflict(c, dst, &src[0], &src[1],
+ qinst, &unpack);
if (qir_is_mul(qinst)) {
queue(c, qpu_m_alu2(translate[qinst->op].op,
dst,
- src[0], src[1]));
+ src[0], src[1]) | unpack);
} else {
queue(c, qpu_a_alu2(translate[qinst->op].op,
dst,
- src[0], src[1]));
+ src[0], src[1]) | unpack);
}
set_last_dst_pack(c, qinst);