diff options
author | Marek Olšák <marek.olsak@amd.com> | 2022-05-14 22:16:16 -0400 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-06-11 11:14:16 +0000 |
commit | 8edafaa25c5d649af6c016a61383d784a1ebb078 (patch) | |
tree | 1359d12c17bcfd7fc01276c633cabdd988567702 /src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | |
parent | 1592921c599620524fd03575bcc7f81594a712bc (diff) |
winsys/amdgpu: use AMDGPU_IB_FLAG_PREAMBLE for the CS preamble on gfx10+
This skips the preamble for following IBs if the queue receives IBs from
the same context back-to-back. This eliminates VGT_FLUSH (for tess and
legacy GS) and PS_PARTIAL_FLUSH (for gfx11) in those cases if the preamble
contains them.
v2: only use this on gfx10+ due to stability issues on Stoney and limited
testing
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
Diffstat (limited to 'src/gallium/winsys/amdgpu/drm/amdgpu_cs.c')
-rw-r--r-- | src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 42 |
1 files changed, 28 insertions, 14 deletions
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 0e372a5a155..f507847f773 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1027,16 +1027,9 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs, return true; } -static void amdgpu_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib, - unsigned preamble_num_dw, bool preamble_changed) -{ - /* TODO: implement this properly */ - radeon_emit_array(cs, preamble_ib, preamble_num_dw); -} - -static bool -amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib, - unsigned preamble_num_dw) +static bool amdgpu_cs_set_preamble(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib, + unsigned preamble_num_dw, bool preamble_changed, + bool enable_preemption) { struct amdgpu_cs *cs = amdgpu_cs(rcs); struct amdgpu_winsys *ws = cs->ws; @@ -1045,6 +1038,23 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i struct pb_buffer *preamble_bo; uint32_t *map; + assert(preamble_ib); + /* The preamble can be set only once for preemption. */ + assert(!enable_preemption || !cs->preamble_ib_bo); + + /* The preamble IB causes GPU hangs on Stoney. To be safe, don't use the preamble IB on + * chips older than gfx10, and instead paste the preamble into the main command buffer. + */ + if (ws->info.gfx_level < GFX10) { + radeon_emit_array(rcs, preamble_ib, preamble_num_dw); + return true; + } + + if (!preamble_changed && !enable_preemption) { + assert(cs->preamble_ib_bo); /* we shouldn't get no-change calls with no preamble */ + return true; + } + /* Create the preamble IB buffer. */ preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment, RADEON_DOMAIN_VRAM, @@ -1070,15 +1080,20 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i map[preamble_num_dw++] = PKT3_NOP_PAD; amdgpu_bo_unmap(&ws->dummy_ws.base, preamble_bo); + /* Wait until the CS job finishes, so that we don't mess up IB_PREAMBLE while the IB is being + * submitted. + */ + amdgpu_cs_sync_flush(rcs); + for (unsigned i = 0; i < 2; i++) { csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va; csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4; - csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT; + if (enable_preemption) + csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT; } - assert(!cs->preamble_ib_bo); - cs->preamble_ib_bo = preamble_bo; + radeon_bo_reference(&ws->dummy_ws.base, &cs->preamble_ib_bo, preamble_bo); amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ | RADEON_PRIO_IB, 0); @@ -1841,7 +1856,6 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws) ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status; ws->base.cs_create = amdgpu_cs_create; ws->base.cs_set_preamble = amdgpu_cs_set_preamble; - ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption; ws->base.cs_destroy = amdgpu_cs_destroy; ws->base.cs_add_buffer = amdgpu_cs_add_buffer; ws->base.cs_validate = amdgpu_cs_validate; |