summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2020-01-21 12:24:49 -0800
committerMarge Bot <eric+marge@anholt.net>2020-01-23 17:38:29 +0000
commitb327501dbf946279c8dff55566af73821d0d576e (patch)
tree9fd7eeb21c36ef31f3db1d6b9c5440af36704bda
parent876824908db342f83cebb7845d01b713f85b577a (diff)
turnip: Add support for fine derivatives.
This does appear to be the required instruction sequence (dsxpp_1 dst src; dsxpp_1.p dst src) as dropping either instruction fails the testsuite. Fixes dEQP-VK.glsl.derivate.* Reviewed-by: Jonathan Marek <jonathan@marek.ca> Reviewed-by: Rob Clark <robdclark@chromium.org> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3494> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3494>
-rw-r--r--src/freedreno/ir3/ir3.h2
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c8
-rw-r--r--src/freedreno/ir3/ir3_legalize.c7
-rw-r--r--src/freedreno/ir3/ir3_shader.h2
-rw-r--r--src/freedreno/vulkan/tu_pipeline.c7
5 files changed, 25 insertions, 1 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index b5135fa4017..461ff633205 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1437,7 +1437,9 @@ INSTR1(SQRT)
/* cat5 instructions: */
INSTR1(DSX)
+INSTR1(DSXPP_1)
INSTR1(DSY)
+INSTR1(DSYPP_1)
INSTR1F(3D, DSX)
INSTR1F(3D, DSY)
INSTR1(RGETPOS)
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 602b3612165..51715025561 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -467,12 +467,20 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
dst[0] = ir3_DSX(b, src[0], 0);
dst[0]->cat5.type = TYPE_F32;
break;
+ case nir_op_fddx_fine:
+ dst[0] = ir3_DSXPP_1(b, src[0], 0);
+ dst[0]->cat5.type = TYPE_F32;
+ break;
case nir_op_fddy:
case nir_op_fddy_coarse:
dst[0] = ir3_DSY(b, src[0], 0);
dst[0]->cat5.type = TYPE_F32;
break;
break;
+ case nir_op_fddy_fine:
+ dst[0] = ir3_DSYPP_1(b, src[0], 0);
+ dst[0]->cat5.type = TYPE_F32;
+ break;
case nir_op_flt16:
case nir_op_flt32:
dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c
index 7894b75c0e8..db21507181c 100644
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -246,6 +246,13 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
list_addtail(&n->node, &block->instr_list);
}
+ if (n->opc == OPC_DSXPP_1 || n->opc == OPC_DSYPP_1) {
+ struct ir3_instruction *op_p = ir3_instr_clone(n);
+ op_p->flags = IR3_INSTR_P;
+
+ ctx->so->need_fine_derivatives = true;
+ }
+
if (is_sfu(n))
regmask_set(&state->needs_ss, n->regs[0]);
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index f056a3e5cd6..e6765985676 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -564,6 +564,8 @@ struct ir3_shader_variant {
/* do we need derivatives: */
bool need_pixlod;
+ bool need_fine_derivatives;
+
/* do we have kill, image write, etc (which prevents early-z): */
bool no_earlyz;
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index 7593efb7bbd..c6624dd4932 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -378,6 +378,8 @@ tu6_emit_vs_config(struct tu_cs *cs, struct tu_shader *shader,
A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack);
if (vs->need_pixlod)
sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_PIXLODENABLE;
+ if (vs->need_fine_derivatives)
+ sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_DIFF_FINE;
uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(shader->texture_map.num_desc) |
A6XX_SP_VS_CONFIG_NSAMP(shader->sampler_map.num_desc);
@@ -463,6 +465,8 @@ tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader,
sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_VARYING;
if (fs->need_pixlod)
sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE;
+ if (fs->need_fine_derivatives)
+ sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_DIFF_FINE;
uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(shader->texture_map.num_desc) |
A6XX_SP_FS_CONFIG_NSAMP(shader->sampler_map.num_desc) |
@@ -515,7 +519,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(v->info.max_reg + 1) |
A6XX_SP_CS_CTRL_REG0_MERGEDREGS |
A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) |
- COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE));
+ COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE) |
+ COND(v->need_fine_derivatives, A6XX_SP_CS_CTRL_REG0_DIFF_FINE));
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
tu_cs_emit(cs, 0x41);