summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/radeonsi
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2017-06-06 15:23:42 +0200
committerEmil Velikov <emil.l.velikov@gmail.com>2017-06-14 12:47:43 +0100
commit2a7279fa8f28ea99b2786e33f7678f92b41f69eb (patch)
tree82088f756740e1e8f092451927486c12e3fb6f9f /src/gallium/drivers/radeonsi
parentae960d7deee977da5b479539382382aacb99455d (diff)
radeonsi: disable the patch ID workaround on SI when the patch ID isn't used (v2)
The workaround causes a massive performance decrease on 1-SE parts. (Cape Verde, Hainan, Oland) The performance regression is already part of 17.0 and 17.1. v2: check tess_uses_prim_id Cc: 17.0 17.1 <mesa-stable@lists.freedesktop.org> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> (cherry picked from commit 391673af7ad1565a5f6ac8fc2f8c9fcdd1fe9908) [Emil Velikov: s/tcs_tes_uses_prim_id/tess_uses_prim_id/] Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c35
2 files changed, 21 insertions, 15 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 09788316a49..529e1e3d97d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -366,6 +366,7 @@ struct si_context {
struct si_shader_selector *last_tcs;
int last_num_tcs_input_cp;
int last_tes_sh_base;
+ bool last_tess_uses_primid;
unsigned last_num_patches;
/* Debug state. */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 0fb90bedf1a..a62b1aa6f27 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -101,6 +101,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
* It would be wrong to think that TCS = TES. */
struct si_shader_selector *tcs =
sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
+ unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tcs_tes_uses_prim_id;
+ bool has_primid_instancing_bug = sctx->b.chip_class == SI &&
+ sctx->b.screen->info.max_se == 1;
unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
unsigned num_tcs_input_cp = info->vertices_per_patch;
unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
@@ -114,7 +117,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
if (sctx->last_ls == ls->current &&
sctx->last_tcs == tcs &&
sctx->last_tes_sh_base == tes_sh_base &&
- sctx->last_num_tcs_input_cp == num_tcs_input_cp) {
+ sctx->last_num_tcs_input_cp == num_tcs_input_cp &&
+ (!has_primid_instancing_bug ||
+ (sctx->last_tess_uses_primid == tess_uses_primid))) {
*num_patches = sctx->last_num_patches;
return;
}
@@ -123,6 +128,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
sctx->last_tcs = tcs;
sctx->last_tes_sh_base = tes_sh_base;
sctx->last_num_tcs_input_cp = num_tcs_input_cp;
+ sctx->last_tess_uses_primid = tess_uses_primid;
/* This calculates how shader inputs and outputs among VS, TCS, and TES
* are laid out in LDS. */
@@ -178,22 +184,21 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
if (sctx->b.chip_class == SI) {
unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
*num_patches = MIN2(*num_patches, one_wave);
-
- if (sctx->screen->b.info.max_se == 1) {
- /* The VGT HS block increments the patch ID unconditionally
- * within a single threadgroup. This results in incorrect
- * patch IDs when instanced draws are used.
- *
- * The intended solution is to restrict threadgroups to
- * a single instance by setting SWITCH_ON_EOI, which
- * should cause IA to split instances up. However, this
- * doesn't work correctly on SI when there is no other
- * SE to switch to.
- */
- *num_patches = 1;
- }
}
+ /* The VGT HS block increments the patch ID unconditionally
+ * within a single threadgroup. This results in incorrect
+ * patch IDs when instanced draws are used.
+ *
+ * The intended solution is to restrict threadgroups to
+ * a single instance by setting SWITCH_ON_EOI, which
+ * should cause IA to split instances up. However, this
+ * doesn't work correctly on SI when there is no other
+ * SE to switch to.
+ */
+ if (has_primid_instancing_bug)
+ *num_patches = 1;
+
sctx->last_num_patches = *num_patches;
output_patch0_offset = input_patch_size * *num_patches;