summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2015-01-08 18:32:29 -0800
committerEric Anholt <eric@anholt.net>2015-01-10 13:54:12 +1300
commit72cb6619cb75a92901d372d687505a747a384571 (patch)
tree90568090fa72cec2f3c6aa78cd274dd5453cece0
parent3093bfacf042516cd2a0fa2346da1e9df5f321ec (diff)
vc4: Restructure color packing as a series of channel replacements.
I'm using this in some WIP commits for doing blending in 8888 instead of vec4. But it also gives us these results immediately, thanks to allowing more uniforms/immediates in the arguments: total instructions in shared programs: 41027 -> 40960 (-0.16%) instructions in affected programs: 4381 -> 4314 (-1.53%)
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c34
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h19
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c50
4 files changed, 60 insertions, 49 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index bba02ca93f..6bad1560b2 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1843,32 +1843,22 @@ emit_frag_end(struct vc4_compile *c)
qir_TLB_Z_WRITE(c, z);
}
- bool color_written = false;
+ struct qreg packed_color = c->undef;
for (int i = 0; i < 4; i++) {
- if (swizzled_outputs[i].file != QFILE_NULL)
- color_written = true;
- }
-
- struct qreg packed_color;
- if (color_written) {
- /* Fill in any undefined colors. The simulator will assertion
- * fail if we read something that wasn't written, and I don't
- * know what hardware does.
- */
- for (int i = 0; i < 4; i++) {
- if (swizzled_outputs[i].file == QFILE_NULL)
- swizzled_outputs[i] = qir_uniform_f(c, 0.0);
+ if (swizzled_outputs[i].file == QFILE_NULL)
+ continue;
+ if (packed_color.file == QFILE_NULL) {
+ packed_color = qir_PACK_8888_F(c, swizzled_outputs[i]);
+ } else {
+ packed_color = qir_PACK_8_F(c,
+ packed_color,
+ swizzled_outputs[i],
+ i);
}
- packed_color = qir_get_temp(c);
- qir_emit(c, qir_inst4(QOP_PACK_COLORS, packed_color,
- swizzled_outputs[0],
- swizzled_outputs[1],
- swizzled_outputs[2],
- swizzled_outputs[3]));
- } else {
- packed_color = qir_uniform_ui(c, 0);
}
+ if (packed_color.file == QFILE_NULL)
+ packed_color = qir_uniform_ui(c, 0);
if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
packed_color = vc4_logicop(c, packed_color, packed_dst_color);
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 3fd3941322..5f3b8ddc44 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -73,7 +73,11 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_RSQ] = { "rsq", 1, 1, false, true },
[QOP_EXP2] = { "exp2", 1, 2, false, true },
[QOP_LOG2] = { "log2", 1, 2, false, true },
- [QOP_PACK_COLORS] = { "pack_colors", 1, 4, false, true },
+ [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1, false, true },
+ [QOP_PACK_8A_F] = { "pack_8a_f", 1, 2, false, true },
+ [QOP_PACK_8B_F] = { "pack_8b_f", 1, 2, false, true },
+ [QOP_PACK_8C_F] = { "pack_8c_f", 1, 2, false, true },
+ [QOP_PACK_8D_F] = { "pack_8d_f", 1, 2, false, true },
[QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true },
[QOP_VPM_READ] = { "vpm_read", 0, 1, true },
[QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index f7d59a80da..6dac00fbbd 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -100,7 +100,11 @@ enum qop {
QOP_VW_SETUP,
QOP_VR_SETUP,
QOP_PACK_SCALED,
- QOP_PACK_COLORS,
+ QOP_PACK_8888_F,
+ QOP_PACK_8A_F,
+ QOP_PACK_8B_F,
+ QOP_PACK_8C_F,
+ QOP_PACK_8D_F,
QOP_VPM_READ,
QOP_TLB_DISCARD_SETUP,
QOP_TLB_STENCIL_SETUP,
@@ -473,6 +477,11 @@ QIR_ALU1(RSQ)
QIR_ALU1(EXP2)
QIR_ALU1(LOG2)
QIR_ALU2(PACK_SCALED)
+QIR_ALU1(PACK_8888_F)
+QIR_ALU2(PACK_8A_F)
+QIR_ALU2(PACK_8B_F)
+QIR_ALU2(PACK_8C_F)
+QIR_ALU2(PACK_8D_F)
QIR_ALU1(VARY_ADD_C)
QIR_NODST_2(TEX_S)
QIR_NODST_2(TEX_T)
@@ -539,6 +548,14 @@ qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
}
static inline struct qreg
+qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan)
+{
+ struct qreg t = qir_get_temp(c);
+ qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val));
+ return t;
+}
+
+static inline struct qreg
qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
{
return qir_EXP2(c, qir_FMUL(c,
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 503f32a4c0..857d56e0f4 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -347,40 +347,40 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
- case QOP_PACK_COLORS: {
- /* We have to be careful not to start writing over one
- * of our source values when incrementally writing the
- * destination. So, if the dst is one of the srcs, we
- * pack that one first (and we pack 4 channels at once
- * for the first pack).
- */
- struct qpu_reg first_pack = src[0];
- for (int i = 0; i < 4; i++) {
- if (src[i].mux == dst.mux &&
- src[i].addr == dst.addr) {
- first_pack = dst;
- break;
- }
- }
- queue(c, qpu_m_MOV(dst, first_pack));
+ case QOP_PACK_8888_F:
+ queue(c, qpu_m_MOV(dst, src[0]));
*last_inst(c) |= QPU_PM;
*last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888,
QPU_PACK);
+ break;
- for (int i = 0; i < 4; i++) {
- if (src[i].mux == first_pack.mux &&
- src[i].addr == first_pack.addr) {
- continue;
+ case QOP_PACK_8A_F:
+ case QOP_PACK_8B_F:
+ case QOP_PACK_8C_F:
+ case QOP_PACK_8D_F:
+ /* If dst doesn't happen to already contain src[0],
+ * then we have to move it in.
+ */
+ if (qinst->src[0].file != QFILE_NULL &&
+ (src[0].mux != dst.mux || src[0].addr != dst.addr)) {
+ /* Don't overwrite src1 while setting up
+ * the dst!
+ */
+ if (dst.mux == src[1].mux &&
+ dst.addr == src[1].addr) {
+ queue(c, qpu_m_MOV(qpu_rb(31), src[1]));
+ src[1] = qpu_rb(31);
}
- queue(c, qpu_m_MOV(dst, src[i]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + i,
- QPU_PACK);
+ queue(c, qpu_m_MOV(dst, src[0]));
}
+ queue(c, qpu_m_MOV(dst, src[1]));
+ *last_inst(c) |= QPU_PM;
+ *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A +
+ qinst->op - QOP_PACK_8A_F,
+ QPU_PACK);
break;
- }
case QOP_FRAG_X:
queue(c, qpu_a_ITOF(dst,