diff options
author | Boris Brezillon <boris.brezillon@collabora.com> | 2021-04-19 17:58:36 +0200 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-04-22 16:59:18 +0000 |
commit | 500616706148d2e340bbdfecad45204b515ae9b5 (patch) | |
tree | 18b1f1e810edee86231e1cbbf3a31a707d5c131c | |
parent | 9b22cda364d92bd50c3bed12a28080ba0252f04d (diff) |
panfrost: Hook-up indirect dispatch support
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10332>
-rw-r--r-- | src/gallium/drivers/panfrost/pan_cmdstream.c | 20 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_compute.c | 33 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_job.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_screen.c | 3 |
4 files changed, 50 insertions, 9 deletions
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 1c50dbf443f..610b787301f 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1007,11 +1007,12 @@ panfrost_upload_rt_conversion_sysval(struct panfrost_batch *batch, unsigned rt, } static void -panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf, +panfrost_upload_sysvals(struct panfrost_batch *batch, + const struct panfrost_ptr *ptr, struct panfrost_shader_state *ss, enum pipe_shader_type st) { - struct sysval_uniform *uniforms = (void *)buf; + struct sysval_uniform *uniforms = ptr->cpu; for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) { int sysval = ss->info.sysvals.sysvals[i]; @@ -1036,6 +1037,10 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf, &uniforms[i]); break; case PAN_SYSVAL_NUM_WORK_GROUPS: + for (unsigned j = 0; j < 3; j++) { + batch->num_wg_sysval[j] = + ptr->gpu + (i * sizeof(*uniforms)) + (j * 4); + } panfrost_upload_num_work_groups_sysval(batch, &uniforms[i]); break; @@ -1115,7 +1120,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, panfrost_pool_alloc_aligned(&batch->pool, sys_size, 16); /* Upload sysvals requested by the shader */ - panfrost_upload_sysvals(batch, transfer.cpu, ss, stage); + panfrost_upload_sysvals(batch, &transfer, ss, stage); /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */ struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage); @@ -1171,6 +1176,15 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, for (unsigned i = 0; i < ss->info.push.count; ++i) { struct panfrost_ubo_word src = ss->info.push.words[i]; + if (src.ubo == sysval_ubo) { + unsigned sysval_idx = src.offset / 16; + unsigned sysval_type = PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[sysval_idx]); + if (sysval_type == PAN_SYSVAL_NUM_WORK_GROUPS) { + unsigned word = (src.offset % 16) / 4; + + batch->num_wg_sysval[word] = push_transfer.gpu + (4 * i); + } + } /* Map the UBO, this should be cheap. However this is reading * from write-combine memory which is _very_ slow. It might pay * off to upload sysvals to a staging buffer on the CPU on the diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c index 9624b7dec79..a2023ac8e8c 100644 --- a/src/gallium/drivers/panfrost/pan_compute.c +++ b/src/gallium/drivers/panfrost/pan_compute.c @@ -30,6 +30,7 @@ #include "pan_cmdstream.h" #include "panfrost-quirks.h" #include "pan_bo.h" +#include "pan_indirect_dispatch.h" #include "pan_shader.h" #include "util/u_memory.h" #include "nir_serialize.h" @@ -106,9 +107,6 @@ panfrost_launch_grid(struct pipe_context *pipe, */ panfrost_batch_reserve_tls(batch, true); - /* TODO: Indirect compute dispatch */ - assert(!info->indirect); - ctx->compute_grid = info; struct panfrost_ptr t = @@ -131,9 +129,13 @@ panfrost_launch_grid(struct pipe_context *pipe, void *invocation = pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION); + unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] }; + + if (info->indirect) + num_wg[0] = num_wg[1] = num_wg[2] = 1; + panfrost_pack_work_groups_compute(invocation, - info->grid[0], info->grid[1], - info->grid[2], + num_wg[0], num_wg[1], num_wg[2], info->block[0], info->block[1], info->block[2], false); @@ -162,8 +164,27 @@ panfrost_launch_grid(struct pipe_context *pipe, pan_section_pack(t.cpu, COMPUTE_JOB, DRAW_PADDING, cfg); + unsigned indirect_dep = 0; + if (info->indirect) { + struct pan_indirect_dispatch_info indirect = { + .job = t.gpu, + .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu + + info->indirect_offset, + .num_wg_sysval = { + batch->num_wg_sysval[0], + batch->num_wg_sysval[1], + batch->num_wg_sysval[2], + }, + }; + + indirect_dep = pan_indirect_dispatch_emit(&batch->pool, + &batch->scoreboard, + &indirect); + } + panfrost_add_job(&batch->pool, &batch->scoreboard, - MALI_JOB_TYPE_COMPUTE, true, false, 0, 0, &t, true); + MALI_JOB_TYPE_COMPUTE, true, false, + indirect_dep, 0, &t, false); panfrost_flush_all_batches(ctx); } diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index 0c8e96a4c80..6906c7c8d78 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -127,6 +127,9 @@ struct panfrost_batch { /* Indirect draw data */ struct panfrost_ptr indirect_draw_ctx; unsigned indirect_draw_job_id; + + /* Keep the num_work_groups sysval around for indirect dispatch */ + mali_ptr num_wg_sysval[3]; }; /* Functions for managing the above */ diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index ed1266bbc1d..67d25e89951 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -50,6 +50,7 @@ #include "pan_resource.h" #include "pan_public.h" #include "pan_util.h" +#include "pan_indirect_dispatch.h" #include "pan_indirect_draw.h" #include "decode.h" @@ -696,6 +697,7 @@ panfrost_destroy_screen(struct pipe_screen *pscreen) { struct panfrost_device *dev = pan_device(pscreen); + pan_indirect_dispatch_cleanup(dev); panfrost_cleanup_indirect_draw_shaders(dev); pan_blitter_cleanup(dev); pan_blend_shaders_cleanup(dev); @@ -872,6 +874,7 @@ panfrost_create_screen(int fd, struct renderonly *ro) panfrost_resource_screen_init(&screen->base); pan_blend_shaders_init(dev); panfrost_init_indirect_draw_shaders(dev); + pan_indirect_dispatch_init(dev); pan_blitter_init(dev); return &screen->base; |