summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2020-01-09 16:41:13 -0500
committerMarek Olšák <marek.olsak@amd.com>2020-01-13 15:57:07 -0500
commit2bb88b2fdc0d9271079e45a009ddbc6bfa6a0f37 (patch)
tree482cd438bfe5624218f5ab8b5878a1c96180aca3 /src/gallium/drivers
parent363b4027fcbae3cc69ff6e55989f900398c3968a (diff)
radeonsi: don't enable VBOs in user SGPRs if compute-based culling can be used
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/radeonsi/si_compute_prim_discard.c56
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c15
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h5
3 files changed, 46 insertions, 30 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
index 31c18e098e6..b6d92da8233 100644
--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@@ -187,36 +187,38 @@
/* For emulating the rewind packet on CI. */
#define FORCE_REWIND_EMULATION 0
-void si_initialize_prim_discard_tunables(struct si_context *sctx)
+void si_initialize_prim_discard_tunables(struct si_screen *sscreen,
+ bool is_aux_context,
+ unsigned *prim_discard_vertex_count_threshold,
+ unsigned *index_ring_size_per_ib)
{
- sctx->prim_discard_vertex_count_threshold = UINT_MAX; /* disable */
+ *prim_discard_vertex_count_threshold = UINT_MAX; /* disable */
- if (sctx->chip_class == GFX6 || /* SI support is not implemented */
- !sctx->screen->info.has_gds_ordered_append ||
- sctx->screen->debug_flags & DBG(NO_PD) ||
- /* If aux_context == NULL, we are initializing aux_context right now. */
- !sctx->screen->aux_context)
+ if (sscreen->info.chip_class == GFX6 || /* SI support is not implemented */
+ !sscreen->info.has_gds_ordered_append ||
+ sscreen->debug_flags & DBG(NO_PD) ||
+ is_aux_context)
return;
/* TODO: enable this after the GDS kernel memory management is fixed */
bool enable_on_pro_graphics_by_default = false;
- if (sctx->screen->debug_flags & DBG(ALWAYS_PD) ||
- sctx->screen->debug_flags & DBG(PD) ||
+ if (sscreen->debug_flags & DBG(ALWAYS_PD) ||
+ sscreen->debug_flags & DBG(PD) ||
(enable_on_pro_graphics_by_default &&
- sctx->screen->info.is_pro_graphics &&
- (sctx->family == CHIP_BONAIRE ||
- sctx->family == CHIP_HAWAII ||
- sctx->family == CHIP_TONGA ||
- sctx->family == CHIP_FIJI ||
- sctx->family == CHIP_POLARIS10 ||
- sctx->family == CHIP_POLARIS11 ||
- sctx->family == CHIP_VEGA10 ||
- sctx->family == CHIP_VEGA20))) {
- sctx->prim_discard_vertex_count_threshold = 6000 * 3; /* 6K triangles */
-
- if (sctx->screen->debug_flags & DBG(ALWAYS_PD))
- sctx->prim_discard_vertex_count_threshold = 0; /* always enable */
+ sscreen->info.is_pro_graphics &&
+ (sscreen->info.family == CHIP_BONAIRE ||
+ sscreen->info.family == CHIP_HAWAII ||
+ sscreen->info.family == CHIP_TONGA ||
+ sscreen->info.family == CHIP_FIJI ||
+ sscreen->info.family == CHIP_POLARIS10 ||
+ sscreen->info.family == CHIP_POLARIS11 ||
+ sscreen->info.family == CHIP_VEGA10 ||
+ sscreen->info.family == CHIP_VEGA20))) {
+ *prim_discard_vertex_count_threshold = 6000 * 3; /* 6K triangles */
+
+ if (sscreen->debug_flags & DBG(ALWAYS_PD))
+ *prim_discard_vertex_count_threshold = 0; /* always enable */
const uint32_t MB = 1024 * 1024;
const uint64_t GB = 1024 * 1024 * 1024;
@@ -224,12 +226,12 @@ void si_initialize_prim_discard_tunables(struct si_context *sctx)
/* The total size is double this per context.
* Greater numbers allow bigger gfx IBs.
*/
- if (sctx->screen->info.vram_size <= 2 * GB)
- sctx->index_ring_size_per_ib = 64 * MB;
- else if (sctx->screen->info.vram_size <= 4 * GB)
- sctx->index_ring_size_per_ib = 128 * MB;
+ if (sscreen->info.vram_size <= 2 * GB)
+ *index_ring_size_per_ib = 64 * MB;
+ else if (sscreen->info.vram_size <= 4 * GB)
+ *index_ring_size_per_ib = 128 * MB;
else
- sctx->index_ring_size_per_ib = 256 * MB;
+ *index_ring_size_per_ib = 256 * MB;
}
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index a69f6c07800..f37a4b222bb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -593,7 +593,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
sctx->queued.named.rasterizer = sctx->discard_rasterizer_state;
si_init_draw_functions(sctx);
- si_initialize_prim_discard_tunables(sctx);
+
+ /* If aux_context == NULL, we are initializing aux_context right now. */
+ bool is_aux_context = !sscreen->aux_context;
+ si_initialize_prim_discard_tunables(sscreen, is_aux_context,
+ &sctx->prim_discard_vertex_count_threshold,
+ &sctx->index_ring_size_per_ib);
}
/* Initialize SDMA functions. */
@@ -1092,7 +1097,13 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
si_init_perfcounters(sscreen);
- sscreen->num_vbos_in_user_sgprs = sscreen->info.chip_class >= GFX9 ? 5 : 1;
+ unsigned prim_discard_vertex_count_threshold, tmp;
+ si_initialize_prim_discard_tunables(sscreen, false,
+ &prim_discard_vertex_count_threshold,
+ &tmp);
+ /* Compute-shader-based culling doesn't support VBOs in user SGPRs. */
+ if (prim_discard_vertex_count_threshold != UINT_MAX)
+ sscreen->num_vbos_in_user_sgprs = sscreen->info.chip_class >= GFX9 ? 5 : 1;
/* Determine tessellation ring info. */
bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 &&
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 6c92dc1a81e..b0287688f9a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1437,7 +1437,10 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
unsigned base_vertex,
uint64_t input_indexbuf_va,
unsigned input_indexbuf_max_elements);
-void si_initialize_prim_discard_tunables(struct si_context *sctx);
+void si_initialize_prim_discard_tunables(struct si_screen *sscreen,
+ bool is_aux_context,
+ unsigned *prim_discard_vertex_count_threshold,
+ unsigned *index_ring_size_per_ib);
/* si_pipe.c */
void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler);