summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2017-02-06 17:30:59 -0800
committerEmil Velikov <emil.l.velikov@gmail.com>2017-02-16 17:59:42 +0000
commit0683feb18ca4f6010e0714d398d2fc90d3c25413 (patch)
treeeb4d2afc9e7cb2a9acfb16c9640218a6dea62fa1 /src/gallium
parent7d172c6c356b6e2257162c09c4560d3b02ce42d4 (diff)
vc4: Avoid emitting small immediates for UBO indirect load address guards.
The kernel will reject our shader if we emit one here, and having 4, 8, or 12 as the top end of our UBO clamp rare is enough that it's not worth making the kernel let us. Fixes piglit fs-const-array-of-struct and fs-const-array-of-struct-of-array since recent GLSL linking changes made us get this as an indirect load of a uniform, instead of a tempoary. Cc: "13.0 17.0" <mesa-stable@lists.freedesktop.org> (cherry picked from commit b2309393039b2ec0cc00a8e6fd828c60c4ef1e11) [Emil Velikov: resolve trivial conflicts] Signed-off-by: Emil Velikov <emil.velikov@collabora.com> Conflicts: src/gallium/drivers/vc4/vc4_opt_small_immediates.c
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_small_immediates.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c8
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h2
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c2
5 files changed, 20 insertions, 4 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
index 4c105f37344..e97cb63aec9 100644
--- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
+++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
@@ -52,6 +52,17 @@ qir_opt_small_immediates(struct vc4_compile *c)
if (uses_small_imm)
continue;
+ /* Don't propagate small immediates into the top-end bounds
+ * checking for indirect UBO loads. The kernel doesn't parse
+ * small immediates and rejects the shader in this case. UBO
+ * loads are much more expensive than the uniform load, and
+ * indirect UBO regions are usually much larger than a small
+ * immediate, so it's not worth updating the kernel to allow
+ * optimizing it.
+ */
+ if (inst->op == QOP_MIN_NOIMM)
+ continue;
+
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
struct qreg src = qir_follow_movs(c, inst->src[i]);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 15e8984ef79..00e16e3db54 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -102,9 +102,9 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
/* Clamp to [0, array size). Note that MIN/MAX are signed. */
indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0));
- indirect_offset = qir_MIN(c, indirect_offset,
- qir_uniform_ui(c, (range->dst_offset +
- range->size - 4)));
+ indirect_offset = qir_MIN_NOIMM(c, indirect_offset,
+ qir_uniform_ui(c, (range->dst_offset +
+ range->size - 4)));
qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
c->num_texture_samples++;
@@ -322,7 +322,7 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
/* Perform the clamping required by kernel validation. */
addr = qir_MAX(c, addr, qir_uniform_ui(c, 0));
- addr = qir_MIN(c, addr, qir_uniform_ui(c, size - 4));
+ addr = qir_MIN_NOIMM(c, addr, qir_uniform_ui(c, size - 4));
qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 446af66affd..4b94fcfb9c5 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -58,6 +58,7 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_ASR] = { "asr", 1, 2 },
[QOP_SHL] = { "shl", 1, 2 },
[QOP_MIN] = { "min", 1, 2 },
+ [QOP_MIN_NOIMM] = { "min_noimm", 1, 2 },
[QOP_MAX] = { "max", 1, 2 },
[QOP_AND] = { "and", 1, 2 },
[QOP_OR] = { "or", 1, 2 },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index c76aeb2bf4e..b3cac6bf26b 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -111,6 +111,7 @@ enum qop {
QOP_SHR,
QOP_ASR,
QOP_MIN,
+ QOP_MIN_NOIMM,
QOP_MAX,
QOP_AND,
QOP_OR,
@@ -709,6 +710,7 @@ QIR_ALU2(SHL)
QIR_ALU2(SHR)
QIR_ALU2(ASR)
QIR_ALU2(MIN)
+QIR_ALU2(MIN_NOIMM)
QIR_ALU2(MAX)
QIR_ALU2(AND)
QIR_ALU2(OR)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index eedee55a9f5..2ee52a497ef 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -285,6 +285,8 @@ vc4_generate_code_block(struct vc4_compile *c,
[QOP_MOV] = { QPU_A_OR },
[QOP_FMOV] = { QPU_A_FMAX },
[QOP_MMOV] = { QPU_M_V8MIN },
+
+ [QOP_MIN_NOIMM] = { QPU_A_MIN },
};
uint64_t unpack = 0;