diff options
author | Boris Brezillon <boris.brezillon@collabora.com> | 2020-09-08 20:32:41 +0200 |
---|---|---|
committer | Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> | 2020-09-21 07:35:45 -0400 |
commit | 6b923037726beb613179f33986176bc407ca91b0 (patch) | |
tree | 1ee2d0c67caa4402b56d138d52b818fce8f0414c | |
parent | d289209ea68f47411c15a7c46fa2d8c2d1a4a61b (diff) |
panfrost: Avoid copying job descriptors around when we can
Job descriptors are written section by section and are never modified
after them been emitted. Let's avoid copying things around by allocating
descriptors upfront and letting the scoreboard logic only write the
header section.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6797>
-rw-r--r-- | src/gallium/drivers/panfrost/pan_cmdstream.c | 30 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_cmdstream.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_compute.c | 19 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_context.c | 22 | ||||
-rw-r--r-- | src/panfrost/lib/pan_blit.c | 13 | ||||
-rw-r--r-- | src/panfrost/lib/pan_scoreboard.c | 33 | ||||
-rw-r--r-- | src/panfrost/lib/pan_scoreboard.h | 4 |
7 files changed, 57 insertions, 68 deletions
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 499b4a2fb46..b15e6b810c7 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1771,42 +1771,32 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch, void panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch, - void *vertex_job, - void *tiler_job) + const struct panfrost_transfer *vertex_job, + const struct panfrost_transfer *tiler_job) { struct panfrost_context *ctx = batch->ctx; - struct panfrost_device *device = pan_device(ctx->base.screen); bool wallpapering = ctx->wallpaper_batch && batch->scoreboard.tiler_dep; - void *vp = vertex_job + MALI_JOB_HEADER_LENGTH; - size_t vp_size = MALI_COMPUTE_JOB_LENGTH - - MALI_JOB_HEADER_LENGTH; - void *tp = tiler_job + MALI_JOB_HEADER_LENGTH; - bool is_bifrost = device->quirks & IS_BIFROST; - size_t tp_size = (is_bifrost ? - MALI_BIFROST_TILER_JOB_LENGTH : - MALI_MIDGARD_TILER_JOB_LENGTH) - - MALI_JOB_HEADER_LENGTH; if (wallpapering) { /* Inject in reverse order, with "predicted" job indices. * THIS IS A HACK XXX */ - panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false, - batch->scoreboard.job_index + 2, tp, tp_size, true); - panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0, - vp, vp_size, true); + + panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false, + batch->scoreboard.job_index + 2, tiler_job, true); + panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0, + vertex_job, true); return; } /* If rasterizer discard is enable, only submit the vertex */ - unsigned vertex = panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0, - vp, vp_size, false); + unsigned vertex = panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0, + vertex_job, false); if (ctx->rasterizer->base.rasterizer_discard) return; - panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false, vertex, tp, tp_size, - false); + panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false, vertex, tiler_job, false); } /* TODO: stop hardcoding this */ diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.h b/src/gallium/drivers/panfrost/pan_cmdstream.h index ea4729b4316..20abf614b11 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.h +++ b/src/gallium/drivers/panfrost/pan_cmdstream.h @@ -84,8 +84,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch, void panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch, - void *vertex_job, - void *tiler_job); + const struct panfrost_transfer *vertex_job, + const struct panfrost_transfer *tiler_job); mali_ptr panfrost_emit_sample_locations(struct panfrost_batch *batch); diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c index 43907b3e49b..74986992e98 100644 --- a/src/gallium/drivers/panfrost/pan_compute.c +++ b/src/gallium/drivers/panfrost/pan_compute.c @@ -104,7 +104,10 @@ panfrost_launch_grid(struct pipe_context *pipe, ctx->compute_grid = info; /* TODO: Stub */ - struct mali_compute_job_packed job = { 0 }; + struct panfrost_transfer t = + panfrost_pool_alloc_aligned(&batch->pool, + MALI_COMPUTE_JOB_LENGTH, + 64); /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so * reuse the graphics path for this by lowering to Gallium */ @@ -122,7 +125,7 @@ panfrost_launch_grid(struct pipe_context *pipe, /* Invoke according to the grid info */ void *invocation = - pan_section_ptr(&job, COMPUTE_JOB, INVOCATION); + pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION); panfrost_pack_work_groups_compute(invocation, info->grid[0], info->grid[1], info->grid[2], @@ -130,14 +133,14 @@ panfrost_launch_grid(struct pipe_context *pipe, info->block[2], false); - pan_section_pack(&job, COMPUTE_JOB, PARAMETERS, cfg) { + pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) { cfg.job_task_split = util_logbase2_ceil(info->block[0] + 1) + util_logbase2_ceil(info->block[1] + 1) + util_logbase2_ceil(info->block[2] + 1); } - pan_section_pack(&job, COMPUTE_JOB, DRAW, cfg) { + pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) { cfg.unknown_1 = (dev->quirks & IS_BIFROST) ? 0x2 : 0x6; cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_COMPUTE); cfg.shared = panfrost_emit_shared_memory(batch, info); @@ -149,12 +152,8 @@ panfrost_launch_grid(struct pipe_context *pipe, PIPE_SHADER_COMPUTE); } - panfrost_new_job(&batch->pool, &batch->scoreboard, - MALI_JOB_TYPE_COMPUTE, true, 0, - ((void *)&job) + MALI_JOB_HEADER_LENGTH, - MALI_COMPUTE_JOB_LENGTH - - MALI_JOB_HEADER_LENGTH, - false); + panfrost_add_job(&batch->pool, &batch->scoreboard, + MALI_JOB_TYPE_COMPUTE, true, 0, &t, true); panfrost_flush_all_batches(ctx, 0); } diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index bb79806127e..2e01e6b8f6f 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -454,13 +454,19 @@ panfrost_draw_vbo( ctx->instance_count = info->instance_count; ctx->active_prim = info->mode; - /* bifrost tiler is bigger than midgard's one, so let's use it as a - * generic container for both. - */ - struct mali_bifrost_tiler_job_packed tiler = {}; - struct mali_compute_job_packed vertex = {}; - unsigned vertex_count = ctx->vertex_count; bool is_bifrost = device->quirks & IS_BIFROST; + struct panfrost_transfer tiler = + panfrost_pool_alloc_aligned(&batch->pool, + is_bifrost ? + MALI_BIFROST_TILER_JOB_LENGTH : + MALI_MIDGARD_TILER_JOB_LENGTH, + 64); + struct panfrost_transfer vertex = + panfrost_pool_alloc_aligned(&batch->pool, + MALI_COMPUTE_JOB_LENGTH, + 64); + + unsigned vertex_count = ctx->vertex_count; mali_ptr shared_mem = is_bifrost ? panfrost_vt_emit_shared_memory(batch) : @@ -506,9 +512,9 @@ panfrost_draw_vbo( /* Fire off the draw itself */ panfrost_draw_emit_vertex(batch, info, &invocation, shared_mem, - vs_vary, varyings, &vertex); + vs_vary, varyings, vertex.cpu); panfrost_draw_emit_tiler(batch, info, &invocation, shared_mem, indices, - fs_vary, varyings, pos, psiz, &tiler); + fs_vary, varyings, pos, psiz, tiler.cpu); panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler); /* Adjust the batch stack size based on the new shader stack sizes. */ diff --git a/src/panfrost/lib/pan_blit.c b/src/panfrost/lib/pan_blit.c index dd4934deec4..ff1da6fdc05 100644 --- a/src/panfrost/lib/pan_blit.c +++ b/src/panfrost/lib/pan_blit.c @@ -340,9 +340,10 @@ panfrost_load_midg( } } - struct mali_midgard_tiler_job_packed payload = {}; + struct panfrost_transfer t = + panfrost_pool_alloc_aligned(pool, MALI_MIDGARD_TILER_JOB_LENGTH, 64); - pan_section_pack(&payload, MIDGARD_TILER_JOB, DRAW, cfg) { + pan_section_pack(t.cpu, MIDGARD_TILER_JOB, DRAW, cfg) { cfg.unknown_1 = 0x7; cfg.position = coordinates; cfg.textures = panfrost_pool_upload(pool, &texture_t.gpu, sizeof(texture_t.gpu)); @@ -354,16 +355,14 @@ panfrost_load_midg( cfg.shared = fbd; } - pan_section_pack(&payload, MIDGARD_TILER_JOB, PRIMITIVE, cfg) { + pan_section_pack(t.cpu, MIDGARD_TILER_JOB, PRIMITIVE, cfg) { cfg.draw_mode = MALI_DRAW_MODE_TRIANGLES; cfg.index_count = vertex_count; cfg.unknown_3 = 6; } - panfrost_pack_work_groups_compute(pan_section_ptr(&payload, MIDGARD_TILER_JOB, INVOCATION), + panfrost_pack_work_groups_compute(pan_section_ptr(t.cpu, MIDGARD_TILER_JOB, INVOCATION), 1, vertex_count, 1, 1, 1, 1, true); - panfrost_new_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, - pan_section_ptr(&payload, MIDGARD_TILER_JOB, INVOCATION), - MALI_MIDGARD_TILER_JOB_LENGTH - MALI_JOB_HEADER_LENGTH, true); + panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &t, true); } diff --git a/src/panfrost/lib/pan_scoreboard.c b/src/panfrost/lib/pan_scoreboard.c index 22f3eaae9f4..b0e2cb3fdd3 100644 --- a/src/panfrost/lib/pan_scoreboard.c +++ b/src/panfrost/lib/pan_scoreboard.c @@ -106,13 +106,13 @@ * not wallpapering and set this, dragons will eat you. */ unsigned -panfrost_new_job( +panfrost_add_job( struct pan_pool *pool, struct pan_scoreboard *scoreboard, enum mali_job_type type, bool barrier, unsigned local_dep, - void *payload, size_t payload_size, + const struct panfrost_transfer *job, bool inject) { unsigned global_dep = 0; @@ -133,24 +133,19 @@ panfrost_new_job( /* Assign the index */ unsigned index = ++scoreboard->job_index; - struct panfrost_transfer transfer = - panfrost_pool_alloc_aligned(pool, MALI_JOB_HEADER_LENGTH + payload_size, 64); - - pan_pack(transfer.cpu, JOB_HEADER, job) { - job.type = type; - job.barrier = barrier; - job.index = index; - job.dependency_1 = local_dep; - job.dependency_2 = global_dep; + pan_pack(job->cpu, JOB_HEADER, header) { + header.type = type; + header.barrier = barrier; + header.index = index; + header.dependency_1 = local_dep; + header.dependency_2 = global_dep; if (inject) - job.next = scoreboard->first_job; + header.next = scoreboard->first_job; } - memcpy(transfer.cpu + MALI_JOB_HEADER_LENGTH, payload, payload_size); - if (inject) { - scoreboard->first_job = transfer.gpu; + scoreboard->first_job = job->gpu; return index; } @@ -164,13 +159,13 @@ panfrost_new_job( * TODO: Find a way to defer last job header emission until we * have a new job to queue or the batch is ready for execution. */ - scoreboard->prev_job->opaque[6] = transfer.gpu; - scoreboard->prev_job->opaque[7] = transfer.gpu >> 32; + scoreboard->prev_job->opaque[6] = job->gpu; + scoreboard->prev_job->opaque[7] = job->gpu >> 32; } else { - scoreboard->first_job = transfer.gpu; + scoreboard->first_job = job->gpu; } - scoreboard->prev_job = (struct mali_job_header_packed *)transfer.cpu; + scoreboard->prev_job = (struct mali_job_header_packed *)job->cpu; return index; } diff --git a/src/panfrost/lib/pan_scoreboard.h b/src/panfrost/lib/pan_scoreboard.h index 53c65d43764..17fbc32549d 100644 --- a/src/panfrost/lib/pan_scoreboard.h +++ b/src/panfrost/lib/pan_scoreboard.h @@ -49,13 +49,13 @@ struct pan_scoreboard { }; unsigned -panfrost_new_job( +panfrost_add_job( struct pan_pool *pool, struct pan_scoreboard *scoreboard, enum mali_job_type type, bool barrier, unsigned local_dep, - void *payload, size_t payload_size, + const struct panfrost_transfer *job, bool inject); void panfrost_scoreboard_initialize_tiler( |