summaryrefslogtreecommitdiff
path: root/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2022-05-14 22:16:16 -0400
committerMarge Bot <emma+marge@anholt.net>2022-06-11 11:14:16 +0000
commit8edafaa25c5d649af6c016a61383d784a1ebb078 (patch)
tree1359d12c17bcfd7fc01276c633cabdd988567702 /src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
parent1592921c599620524fd03575bcc7f81594a712bc (diff)
winsys/amdgpu: use AMDGPU_IB_FLAG_PREAMBLE for the CS preamble on gfx10+
This skips the preamble for following IBs if the queue receives IBs from the same context back-to-back. This eliminates VGT_FLUSH (for tess and legacy GS) and PS_PARTIAL_FLUSH (for gfx11) in those cases if the preamble contains them. v2: only use this on gfx10+ due to stability issues on Stoney and limited testing Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
Diffstat (limited to 'src/gallium/winsys/amdgpu/drm/amdgpu_cs.c')
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_cs.c42
1 files changed, 28 insertions, 14 deletions
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 0e372a5a155..f507847f773 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1027,16 +1027,9 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs,
return true;
}
-static void amdgpu_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
- unsigned preamble_num_dw, bool preamble_changed)
-{
- /* TODO: implement this properly */
- radeon_emit_array(cs, preamble_ib, preamble_num_dw);
-}
-
-static bool
-amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib,
- unsigned preamble_num_dw)
+static bool amdgpu_cs_set_preamble(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib,
+ unsigned preamble_num_dw, bool preamble_changed,
+ bool enable_preemption)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_winsys *ws = cs->ws;
@@ -1045,6 +1038,23 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i
struct pb_buffer *preamble_bo;
uint32_t *map;
+ assert(preamble_ib);
+ /* The preamble can be set only once for preemption. */
+ assert(!enable_preemption || !cs->preamble_ib_bo);
+
+ /* The preamble IB causes GPU hangs on Stoney. To be safe, don't use the preamble IB on
+ * chips older than gfx10, and instead paste the preamble into the main command buffer.
+ */
+ if (ws->info.gfx_level < GFX10) {
+ radeon_emit_array(rcs, preamble_ib, preamble_num_dw);
+ return true;
+ }
+
+ if (!preamble_changed && !enable_preemption) {
+ assert(cs->preamble_ib_bo); /* we shouldn't get no-change calls with no preamble */
+ return true;
+ }
+
/* Create the preamble IB buffer. */
preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment,
RADEON_DOMAIN_VRAM,
@@ -1070,15 +1080,20 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i
map[preamble_num_dw++] = PKT3_NOP_PAD;
amdgpu_bo_unmap(&ws->dummy_ws.base, preamble_bo);
+ /* Wait until the CS job finishes, so that we don't mess up IB_PREAMBLE while the IB is being
+ * submitted.
+ */
+ amdgpu_cs_sync_flush(rcs);
+
for (unsigned i = 0; i < 2; i++) {
csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va;
csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4;
- csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
+ if (enable_preemption)
+ csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
}
- assert(!cs->preamble_ib_bo);
- cs->preamble_ib_bo = preamble_bo;
+ radeon_bo_reference(&ws->dummy_ws.base, &cs->preamble_ib_bo, preamble_bo);
amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo,
RADEON_USAGE_READ | RADEON_PRIO_IB, 0);
@@ -1841,7 +1856,6 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
ws->base.cs_create = amdgpu_cs_create;
ws->base.cs_set_preamble = amdgpu_cs_set_preamble;
- ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption;
ws->base.cs_destroy = amdgpu_cs_destroy;
ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
ws->base.cs_validate = amdgpu_cs_validate;