diff options
author | Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> | 2021-09-07 20:12:30 +0200 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-10-07 09:21:05 +0000 |
commit | a1dea665d023df0f0d240a32a5c7df259c3db92c (patch) | |
tree | fd5fcb01047a46f7c8d7a2a5756cd4c27e21f5d2 | |
parent | 22a1b7c5b3a9cdbf05a0fd97fb193763a525b732 (diff) |
radeonsi: make the DRI_PRIME dGPU -> iGPU copy async
Doing this copy using SDMA frees up the dGPU to do more
interesting things while the copy is happening; for instance
the rendering of the next frame.
hw queue activity before:
------------------------
dGPU:
gfx: [renderframe 1][copy->iGPU][renderframe 2][copy->iGPU]...
iGPU:
gfx: [Xorg] [Xorg]
hw queue activity before after:
------------------------------
dGPU:
gfx: [renderframe 1][renderframe 2][renderframe 3]....
sdma: [copy->iGPU] [copy->iGPU] [copy->iGPU]
iGPU:
gfx: [Xorg] [Xorg] ...
If SDMA isn't available or can't do the copy, use an async compute
context instead.
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12763>
-rw-r--r-- | src/gallium/drivers/radeonsi/si_blit.c | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 6de37ab2bd89..bf5ccb1cc9a5 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -1205,11 +1205,48 @@ resolve_to_temp: static void si_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) { struct si_context *sctx = (struct si_context *)ctx; + struct si_texture *sdst = (struct si_texture *)info->dst.resource; if (do_hardware_msaa_resolve(ctx, info)) { return; } + if (info->is_dri_blit_image && sdst->surface.is_linear && + sctx->chip_class >= GFX7 && sdst->surface.flags & RADEON_SURF_IMPORTED) { + struct si_texture *ssrc = (struct si_texture *)info->src.resource; + /* Use SDMA or async compute when copying to a DRI_PRIME imported linear surface. */ + bool async_copy = info->dst.box.x == 0 && info->dst.box.y == 0 && info->dst.box.z == 0 && + info->src.box.x == 0 && info->src.box.y == 0 && info->src.box.z == 0 && + info->dst.level == 0 && info->src.level == 0 && + info->src.box.width == info->dst.resource->width0 && + info->src.box.height == info->dst.resource->height0 && + info->src.box.depth == 1 && util_can_blit_via_copy_region(info, true); + /* Try SDMA first... */ + /* TODO: figure out why SDMA copies are slow on GFX10_3 */ + if (async_copy && sctx->chip_class < GFX10_3 && si_sdma_copy_image(sctx, sdst, ssrc)) + return; + + /* ... and use async compute as the fallback. */ + if (async_copy) { + struct si_screen *sscreen = sctx->screen; + + simple_mtx_lock(&sscreen->async_compute_context_lock); + if (!sscreen->async_compute_context) + si_init_aux_async_compute_ctx(sscreen); + + if (sscreen->async_compute_context) { + si_compute_copy_image((struct si_context*)sctx->screen->async_compute_context, + info->dst.resource, 0, info->src.resource, 0, 0, 0, 0, + &info->src.box, false, 0); + si_flush_gfx_cs((struct si_context*)sctx->screen->async_compute_context, 0, NULL); + simple_mtx_unlock(&sscreen->async_compute_context_lock); + return; + } + + simple_mtx_unlock(&sscreen->async_compute_context_lock); + } + } + if (unlikely(sctx->thread_trace_enabled)) sctx->sqtt_next_event = EventCmdCopyImage; |