summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2020-07-29 19:04:38 -0400
committerDylan Baker <dylan.c.baker@intel.com>2020-08-07 10:44:38 -0700
commit31836102281038d57e099af3c46fec7dd19dcefb (patch)
tree56177302f63214998e56fea47aae9bf1078d098b
parent6eadb68e989bd09989de08398a668470fafc0dbe (diff)
radeonsi: increase minimum NGG vertex count requirement per workgroup on gfx 10.3
Fixes: a23802bcb9a - ac,radeonsi: start adding support for gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137> (cherry picked from commit 7a468fc0f6a02d2ee71b50225459de335ed689b4)
-rw-r--r--.pick_status.json2
-rw-r--r--src/gallium/drivers/radeonsi/gfx10_shader_ngg.c7
2 files changed, 5 insertions, 4 deletions
diff --git a/.pick_status.json b/.pick_status.json
index ba473424408..9b6e1066f6a 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -58,7 +58,7 @@
"description": "radeonsi: increase minimum NGG vertex count requirement per workgroup on gfx 10.3",
"nominated": true,
"nomination_type": 1,
- "resolution": 0,
+ "resolution": 1,
"master_sha": null,
"because_sha": "a23802bcb9a42a02d34a5a36d6e66d6532813a0d"
},
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 4b639eecf5f..ee242da7ed1 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -1920,6 +1920,7 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
unsigned gsprim_lds_size = 0;
/* All these are per subgroup: */
+ const unsigned min_esverts = gs_sel->screen->info.chip_class >= GFX10_3 ? 29 : 24;
bool max_vert_out_per_gs_instance = false;
unsigned max_gsprims_base = 128; /* default prim group size clamp */
unsigned max_esverts_base = 128;
@@ -2033,7 +2034,7 @@ retry_select_mode:
}
/* Hardware restriction: minimum value of max_esverts */
- max_esverts = MAX2(max_esverts, 23 + max_verts_per_prim);
+ max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
unsigned max_out_vertices =
max_vert_out_per_gs_instance
@@ -2064,10 +2065,10 @@ retry_select_mode:
shader->gs_info.esgs_ring_size = max_esverts * esvert_lds_size;
shader->ngg.ngg_emit_size = max_gsprims * gsprim_lds_size;
- assert(shader->ngg.hw_max_esverts >= 24); /* HW limitation */
+ assert(shader->ngg.hw_max_esverts >= min_esverts); /* HW limitation */
/* If asserts are disabled, we use the same conditions to return false */
return max_esverts >= max_verts_per_prim && max_gsprims >= 1 &&
max_out_vertices <= 256 &&
- shader->ngg.hw_max_esverts >= 24;
+ shader->ngg.hw_max_esverts >= min_esverts;
}