diff options
author | Alyssa Rosenzweig <alyssa@rosenzweig.io> | 2022-11-17 18:10:11 -0500 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-11-19 15:33:16 +0000 |
commit | d7511ad784ce0fc6a5060de2d7c969a1300a0fb9 (patch) | |
tree | a40783e954b8377e9a9de35f51f023683f219083 | |
parent | de1eb9400f1d0d64627630dd28d11073ad1c1a7e (diff) |
asahi: Add batch tracking logic
We already have the notion of an agx_batch, which encapsulates a render
pass. Extend the logic to allow multiple in-flight batches per context, avoiding
a flush in set_framebuffer_state and improving performance for certain
applications designed for IMRs that ping-pong unnecessarily between FBOs. I
don't have such an application immediately in mind, but I wanted to get this
flag-day out of the way while the driver is still small and flexible.
The driver was written from day 1 with batch tracking in mind, so this is a
relatively small change to actually wire it up, but there are lots of little
details to get right.
The code itself is mostly a copy/paste of panfrost, which in turn draws
inspiration from freedreno and v3d.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19865>
-rw-r--r-- | src/gallium/drivers/asahi/agx_batch.c | 225 | ||||
-rw-r--r-- | src/gallium/drivers/asahi/agx_pipe.c | 60 | ||||
-rw-r--r-- | src/gallium/drivers/asahi/agx_state.c | 134 | ||||
-rw-r--r-- | src/gallium/drivers/asahi/agx_state.h | 34 |
4 files changed, 315 insertions, 138 deletions
diff --git a/src/gallium/drivers/asahi/agx_batch.c b/src/gallium/drivers/asahi/agx_batch.c index 549f928c21b..b1aa11ac358 100644 --- a/src/gallium/drivers/asahi/agx_batch.c +++ b/src/gallium/drivers/asahi/agx_batch.c @@ -1,16 +1,202 @@ /* * Copyright 2022 Alyssa Rosenzweig + * Copyright 2019-2020 Collabora, Ltd. * SPDX-License-Identifier: MIT */ #include "agx_state.h" +#define foreach_batch(ctx, idx) \ + BITSET_FOREACH_SET(idx, ctx->batches.active, AGX_MAX_BATCHES) + +static unsigned +agx_batch_idx(struct agx_batch *batch) +{ + return batch - batch->ctx->batches.slots; +} + +bool +agx_batch_is_active(struct agx_batch *batch) +{ + return BITSET_TEST(batch->ctx->batches.active, agx_batch_idx(batch)); +} + +static void +agx_batch_init(struct agx_context *ctx, + const struct pipe_framebuffer_state *key, + struct agx_batch *batch) +{ + struct agx_device *dev = agx_device(ctx->base.screen); + + batch->ctx = ctx; + util_copy_framebuffer_state(&batch->key, key); + batch->seqnum = ++ctx->batches.seqnum; + + agx_pool_init(&batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true); + agx_pool_init(&batch->pipeline_pool, dev, AGX_MEMORY_TYPE_SHADER, true); + + /* These allocations can happen only once and will just be zeroed (not freed) + * during batch clean up. The memory is owned by the context. + */ + if (!batch->bo_list.set) { + batch->bo_list.set = rzalloc_array(ctx, BITSET_WORD, 128); + batch->bo_list.word_count = 128; + } else { + memset(batch->bo_list.set, 0, batch->bo_list.word_count * sizeof(BITSET_WORD)); + } + + if (!batch->encoder) { + batch->encoder = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER); + batch->encoder_current = batch->encoder->ptr.cpu; + batch->encoder_end = batch->encoder_current + batch->encoder->size; + } else { + batch->encoder_current = batch->encoder->ptr.cpu; + batch->encoder_end = batch->encoder_current + batch->encoder->size; + } + + if (!batch->scissor.bo) { + batch->scissor.bo = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER); + } + + if (!batch->depth_bias.bo) { + batch->depth_bias.bo = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER); + } + + batch->clear = 0; + batch->draw = 0; + batch->load = 0; + batch->clear_depth = 0; + batch->clear_stencil = 0; + batch->scissor.count = 0; + batch->depth_bias.count = 0; + batch->varyings = 0; + + /* We need to emit prim state at the start. Max collides with all. */ + batch->reduced_prim = PIPE_PRIM_MAX; + + if (batch->key.zsbuf) { + agx_batch_writes(batch, agx_resource(key->zsbuf->texture)); + } + + for (unsigned i = 0; i < key->nr_cbufs; ++i) { + agx_batch_writes(batch, agx_resource(key->cbufs[i]->texture)); + } + + unsigned batch_idx = agx_batch_idx(batch); + BITSET_SET(ctx->batches.active, batch_idx); + + agx_batch_init_state(batch); +} + void -agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason) +agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch) +{ + struct agx_device *dev = agx_device(ctx->base.screen); + assert(batch->ctx == ctx); + + if (ctx->batch == batch) + ctx->batch = NULL; + + /* There is no more writer for anything we wrote recorded on this context */ + hash_table_foreach(ctx->writer, ent) { + if (ent->data == batch) + _mesa_hash_table_remove(ctx->writer, ent); + } + + int handle; + AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) { + agx_bo_unreference(agx_lookup_bo(dev, handle)); + } + + agx_pool_cleanup(&batch->pool); + agx_pool_cleanup(&batch->pipeline_pool); + util_unreference_framebuffer_state(&batch->key); + + unsigned batch_idx = agx_batch_idx(batch); + BITSET_CLEAR(ctx->batches.active, batch_idx); +} + +static struct agx_batch * +agx_get_batch_for_framebuffer(struct agx_context *ctx, + const struct pipe_framebuffer_state *state) { - /* TODO: Turn into loop when we support multiple batches */ - if (ctx->batch) { - struct agx_batch *batch = ctx->batch; + /* Look if we have a matching batch */ + unsigned i; + foreach_batch(ctx, i) { + struct agx_batch *candidate = &ctx->batches.slots[i]; + + if (util_framebuffer_state_equal(&candidate->key, state)) { + /* We found a match, increase the seqnum for the LRU + * eviction logic. + */ + candidate->seqnum = ++ctx->batches.seqnum; + return candidate; + } + } + + /* Look if we have a free batch */ + struct agx_batch *batch = NULL; + for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) { + if (!BITSET_TEST(ctx->batches.active, i)) { + batch = &ctx->batches.slots[i]; + break; + } + } + + /* Else, evict something */ + if (!batch) { + for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) { + struct agx_batch *candidate = &ctx->batches.slots[i]; + + if (!batch || batch->seqnum > candidate->seqnum) + batch = candidate; + } + + agx_flush_batch(ctx, batch); + } + + /* Batch is now free */ + agx_batch_init(ctx, state, batch); + return batch; +} + +struct agx_batch * +agx_get_batch(struct agx_context *ctx) +{ + if (!ctx->batch) { + ctx->batch = agx_get_batch_for_framebuffer(ctx, &ctx->framebuffer); + agx_dirty_all(ctx); + } + + assert(util_framebuffer_state_equal(&ctx->framebuffer, &ctx->batch->key)); + return ctx->batch; +} + +void +agx_flush_all(struct agx_context *ctx, const char *reason) +{ + if (reason) + perf_debug_ctx(ctx, "Flushing due to: %s\n", reason); + + unsigned idx; + foreach_batch(ctx, idx) { + agx_flush_batch(ctx, &ctx->batches.slots[idx]); + } +} + +static void +agx_flush_readers_except(struct agx_context *ctx, + struct agx_resource *rsrc, + struct agx_batch *except, + const char *reason) +{ + unsigned idx; + + foreach_batch(ctx, idx) { + struct agx_batch *batch = &ctx->batches.slots[idx]; + + if (batch == except) + continue; if (agx_batch_uses_bo(batch, rsrc->bo)) { perf_debug_ctx(ctx, "Flush reader due to: %s\n", reason); @@ -19,20 +205,38 @@ agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char } } -void -agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason) +static void +agx_flush_writer_except(struct agx_context *ctx, + struct agx_resource *rsrc, + struct agx_batch *except, + const char *reason) { struct hash_entry *ent = _mesa_hash_table_search(ctx->writer, rsrc); - if (ent) { + if (ent && ent->data != except) { perf_debug_ctx(ctx, "Flush writer due to: %s\n", reason); agx_flush_batch(ctx, ent->data); } } void +agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason) +{ + agx_flush_readers_except(ctx, rsrc, NULL, reason); +} + +void +agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason) +{ + agx_flush_writer_except(ctx, rsrc, NULL, reason); +} + +void agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc) { + /* Hazard: read-after-write */ + agx_flush_writer_except(batch->ctx, rsrc, batch, "Read from another batch"); + agx_batch_add_bo(batch, rsrc->bo); if (rsrc->separate_stencil) @@ -45,12 +249,15 @@ agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc) struct agx_context *ctx = batch->ctx; struct hash_entry *ent = _mesa_hash_table_search(ctx->writer, rsrc); + agx_flush_readers_except(ctx, rsrc, batch, "Write from other batch"); + /* Nothing to do if we're already writing */ if (ent && ent->data == batch) return; - /* Flush the old writer if there is one */ - agx_flush_writer(ctx, rsrc, "Multiple writers"); + /* Hazard: writer-after-write, write-after-read */ + if (ent) + agx_flush_writer(ctx, rsrc, "Multiple writers"); /* Write is strictly stronger than a read */ agx_batch_reads(batch, rsrc); diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index baea9e3d63e..cacc5bdba4c 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -659,7 +659,7 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor const union pipe_color_union *color, double depth, unsigned stencil) { struct agx_context *ctx = agx_context(pctx); - struct agx_batch *batch = ctx->batch; + struct agx_batch *batch = agx_get_batch(ctx); unsigned fastclear = buffers & ~(batch->draw | batch->load); unsigned slowclear = buffers & ~fastclear; @@ -690,11 +690,11 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor assert((batch->draw & slowclear) == slowclear); } - static void agx_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource) { + agx_flush_writer(agx_context(ctx), agx_resource(resource), "flush_resource"); } /* @@ -710,7 +710,7 @@ agx_flush(struct pipe_context *pctx, if (fence) *fence = NULL; - agx_flush_batch(ctx, ctx->batch); + agx_flush_all(ctx, "Gallium flush"); } void @@ -718,9 +718,13 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) { struct agx_device *dev = agx_device(ctx->base.screen); + assert(agx_batch_is_active(batch)); + /* Nothing to do */ - if (!(batch->draw | batch->clear)) + if (!(batch->draw | batch->clear)) { + agx_batch_cleanup(ctx, batch); return; + } /* Finalize the encoder */ uint8_t stop[5 + 64] = { 0x00, 0x00, 0x00, 0xc0, 0x00 }; @@ -761,7 +765,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) pipeline_store = agx_build_store_pipeline(batch, dev->internal.store, - agx_pool_upload(&batch->pool, ctx->render_target[0], sizeof(ctx->render_target))); + agx_batch_upload_pbe(batch, 0)); } for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) { @@ -851,37 +855,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) agxdecode_next_frame(); } - AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) { - agx_bo_unreference(agx_lookup_bo(dev, handle)); - } - - /* There is no more writer for anything we wrote recorded on this context */ - hash_table_foreach(ctx->writer, ent) { - if (ent->data == batch) - _mesa_hash_table_remove(ctx->writer, ent); - } - - memset(batch->bo_list.set, 0, batch->bo_list.word_count * sizeof(BITSET_WORD)); - agx_pool_cleanup(&batch->pool); - agx_pool_cleanup(&batch->pipeline_pool); - agx_pool_init(&batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true); - agx_pool_init(&batch->pipeline_pool, dev, AGX_MEMORY_TYPE_CMDBUF_32, true); - batch->clear = 0; - batch->draw = 0; - batch->load = 0; - batch->encoder_current = batch->encoder->ptr.cpu; - batch->encoder_end = batch->encoder_current + batch->encoder->size; - batch->scissor.count = 0; - - agx_dirty_all(ctx); - agx_batch_init_state(batch); - - /* After resetting the batch, rebind the framebuffer so we update resource - * tracking logic and the BO lists. - * - * XXX: This is a hack to workaround lack of proper batch tracking. - */ - ctx->base.set_framebuffer_state(&ctx->base, &ctx->framebuffer); + agx_batch_cleanup(ctx, batch); } static void @@ -919,20 +893,6 @@ agx_create_context(struct pipe_screen *screen, pctx->screen = screen; pctx->priv = priv; - ctx->batch = rzalloc(ctx, struct agx_batch); - ctx->batch->ctx = ctx; - ctx->batch->bo_list.set = rzalloc_array(ctx->batch, BITSET_WORD, 128); - ctx->batch->bo_list.word_count = 128; - agx_pool_init(&ctx->batch->pool, - agx_device(screen), AGX_MEMORY_TYPE_FRAMEBUFFER, true); - agx_pool_init(&ctx->batch->pipeline_pool, - agx_device(screen), AGX_MEMORY_TYPE_SHADER, true); - ctx->batch->encoder = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER); - ctx->batch->encoder_current = ctx->batch->encoder->ptr.cpu; - ctx->batch->encoder_end = ctx->batch->encoder_current + ctx->batch->encoder->size; - ctx->batch->scissor.bo = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER); - ctx->batch->depth_bias.bo = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER); - ctx->writer = _mesa_pointer_hash_table_create(ctx); /* Upload fixed shaders (TODO: compile them?) */ diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 2c4b3a74cd4..f5c0cdbb559 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -781,63 +781,61 @@ agx_set_framebuffer_state(struct pipe_context *pctx, if (!state) return; - /* XXX: eliminate this flush with batch tracking logic */ - agx_flush_all(ctx, "Framebuffer switch"); - util_copy_framebuffer_state(&ctx->framebuffer, state); - util_copy_framebuffer_state(&ctx->batch->key, state); - ctx->dirty = ~0; - - if (state->zsbuf) - agx_batch_writes(ctx->batch, agx_resource(state->zsbuf->texture)); + ctx->batch = NULL; + agx_dirty_all(ctx); +} +uint64_t +agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt) +{ + struct pipe_surface *surf = batch->key.cbufs[rt]; + struct agx_resource *tex = agx_resource(surf->texture); + const struct util_format_description *desc = + util_format_description(surf->format); + unsigned level = surf->u.tex.level; + unsigned layer = surf->u.tex.first_layer; - for (unsigned i = 0; i < state->nr_cbufs; ++i) { - struct pipe_surface *surf = state->cbufs[i]; - struct agx_resource *tex = agx_resource(surf->texture); - const struct util_format_description *desc = - util_format_description(surf->format); - unsigned level = surf->u.tex.level; - unsigned layer = surf->u.tex.first_layer; + assert(surf->u.tex.last_layer == layer); - agx_batch_writes(ctx->batch, tex); + struct agx_ptr T = agx_pool_alloc_aligned(&batch->pool, AGX_RENDER_TARGET_LENGTH, 256); - assert(surf->u.tex.last_layer == layer); + agx_pack(T.cpu, RENDER_TARGET, cfg) { + cfg.layout = agx_translate_layout(tex->layout.tiling); + cfg.channels = agx_pixel_format[surf->format].channels; + cfg.type = agx_pixel_format[surf->format].type; - agx_pack(ctx->render_target[i], RENDER_TARGET, cfg) { - cfg.layout = agx_translate_layout(tex->layout.tiling); - cfg.channels = agx_pixel_format[surf->format].channels; - cfg.type = agx_pixel_format[surf->format].type; + assert(desc->nr_channels >= 1 && desc->nr_channels <= 4); + cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]) & 3; - assert(desc->nr_channels >= 1 && desc->nr_channels <= 4); - cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]) & 3; + if (desc->nr_channels >= 2) + cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]) & 3; - if (desc->nr_channels >= 2) - cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]) & 3; + if (desc->nr_channels >= 3) + cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]) & 3; - if (desc->nr_channels >= 3) - cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]) & 3; + if (desc->nr_channels >= 4) + cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]) & 3; - if (desc->nr_channels >= 4) - cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]) & 3; + cfg.width = batch->key.width; + cfg.height = batch->key.height; + cfg.level = surf->u.tex.level; + cfg.buffer = agx_map_texture_gpu(tex, layer); + cfg.unk_mipmapped = tex->mipmapped; - cfg.width = state->width; - cfg.height = state->height; - cfg.level = surf->u.tex.level; - cfg.buffer = agx_map_texture_gpu(tex, layer); - cfg.unk_mipmapped = tex->mipmapped; + if (tex->layout.tiling == AIL_TILING_LINEAR) { + cfg.stride = ail_get_linear_stride_B(&tex->layout, level) - 4; + cfg.levels = 1; + } else { + cfg.unk_tiled = true; + cfg.levels = tex->base.last_level + 1; + } + }; - if (tex->layout.tiling == AIL_TILING_LINEAR) { - cfg.stride = ail_get_linear_stride_B(&tex->layout, level) - 4; - cfg.levels = 1; - } else { - cfg.unk_tiled = true; - cfg.levels = tex->base.last_level + 1; - } - }; - } + return T.gpu; } + /* Likewise constant buffers, textures, and samplers are handled in a common * per-draw path, with dirty tracking to reduce the costs involved. */ @@ -1224,18 +1222,20 @@ agx_update_vs(struct agx_context *ctx) } static bool -agx_update_fs(struct agx_context *ctx) +agx_update_fs(struct agx_batch *batch) { + struct agx_context *ctx = batch->ctx; + struct asahi_shader_key key = { - .nr_cbufs = ctx->batch->key.nr_cbufs, + .nr_cbufs = batch->key.nr_cbufs, .clip_plane_enable = ctx->rast->base.clip_plane_enable, }; - if (ctx->batch->reduced_prim == PIPE_PRIM_POINTS) + if (batch->reduced_prim == PIPE_PRIM_POINTS) key.sprite_coord_enable = ctx->rast->base.sprite_coord_enable; for (unsigned i = 0; i < key.nr_cbufs; ++i) { - struct pipe_surface *surf = ctx->batch->key.cbufs[i]; + struct pipe_surface *surf = batch->key.cbufs[i]; if (surf) { enum pipe_format fmt = surf->format; @@ -1557,9 +1557,6 @@ agx_batch_init_state(struct agx_batch *batch) agx_ppp_fini(&out, &ppp); batch->encoder_current = out; - - /* We need to emit prim state at the start. Max collides with all. */ - batch->reduced_prim = PIPE_PRIM_MAX; } static enum agx_object_type @@ -1586,9 +1583,10 @@ agx_pass_type_for_shader(struct agx_shader_info *info) #define MAX_PPP_UPDATES 2 static uint8_t * -agx_encode_state(struct agx_context *ctx, uint8_t *out, +agx_encode_state(struct agx_batch *batch, uint8_t *out, bool is_lines, bool is_points) { + struct agx_context *ctx = batch->ctx; struct agx_rasterizer *rast = ctx->rast; unsigned ppp_updates = 0; @@ -1613,7 +1611,7 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out, out += AGX_VDM_STATE_VERTEX_SHADER_WORD_0_LENGTH; agx_pack(out, VDM_STATE_VERTEX_SHADER_WORD_1, cfg) { - cfg.pipeline = agx_build_pipeline(ctx->batch, ctx->vs, PIPE_SHADER_VERTEX); + cfg.pipeline = agx_build_pipeline(batch, ctx->vs, PIPE_SHADER_VERTEX); } out += AGX_VDM_STATE_VERTEX_SHADER_WORD_1_LENGTH; @@ -1634,17 +1632,17 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out, out += 4; } - struct agx_pool *pool = &ctx->batch->pool; + struct agx_pool *pool = &batch->pool; struct agx_compiled_shader *vs = ctx->vs, *fs = ctx->fs; unsigned zbias = 0; if (ctx->rast->base.offset_tri) { - zbias = agx_upload_depth_bias(ctx->batch, &ctx->rast->base); + zbias = agx_upload_depth_bias(batch, &ctx->rast->base); ctx->dirty |= AGX_DIRTY_SCISSOR_ZBIAS; } if (ctx->dirty & (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR_ZBIAS)) { - agx_upload_viewport_scissor(pool, ctx->batch, &out, &ctx->viewport, + agx_upload_viewport_scissor(pool, batch, &out, &ctx->viewport, ctx->rast->base.scissor ? &ctx->scissor : NULL, zbias); } @@ -1652,7 +1650,7 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out, bool varyings_dirty = false; if (IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG) || IS_DIRTY(RS)) { - ctx->batch->varyings = agx_link_varyings_vs_fs(&ctx->batch->pipeline_pool, + batch->varyings = agx_link_varyings_vs_fs(&batch->pipeline_pool, &ctx->vs->info.varyings.vs, &ctx->fs->info.varyings.fs, ctx->rast->base.flatshade_first); @@ -1774,13 +1772,13 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out, if (IS_DIRTY(FS) || varyings_dirty) { unsigned frag_tex_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count; agx_ppp_push(&ppp, FRAGMENT_SHADER, cfg) { - cfg.pipeline = agx_build_pipeline(ctx->batch, ctx->fs, PIPE_SHADER_FRAGMENT), + cfg.pipeline = agx_build_pipeline(batch, ctx->fs, PIPE_SHADER_FRAGMENT), cfg.uniform_register_count = ctx->fs->info.push_count; cfg.preshader_register_count = ctx->fs->info.nr_preamble_gprs; cfg.texture_state_register_count = frag_tex_count; cfg.sampler_state_register_count = frag_tex_count; cfg.cf_binding_count = ctx->fs->info.varyings.fs.nr_bindings; - cfg.cf_bindings = ctx->batch->varyings; + cfg.cf_bindings = batch->varyings; /* XXX: This is probably wrong */ cfg.unknown_30 = frag_tex_count >= 4; @@ -1883,18 +1881,12 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, } struct agx_context *ctx = agx_context(pctx); - struct agx_batch *batch = ctx->batch; + struct agx_batch *batch = agx_get_batch(ctx); if (agx_scissor_culls_everything(ctx)) return; -#ifndef NDEBUG - /* For debugging dirty tracking, mark all state as dirty every draw, forcing - * everything to be re-emitted fresh. - */ - if (unlikely(agx_device(pctx->screen)->debug & AGX_DBG_DIRTY)) - agx_dirty_all(ctx); -#endif + agx_dirty_all(ctx); /* Dirty track the reduced prim: lines vs points vs triangles */ enum pipe_prim_type reduced_prim = u_reduced_prim(info->mode); @@ -1902,8 +1894,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, batch->reduced_prim = reduced_prim; /* TODO: masks */ - ctx->batch->draw |= ~0; - ctx->batch->load |= ~0; + batch->draw |= ~0; + batch->load |= ~0; /* TODO: These are expensive calls, consider finer dirty tracking */ if (agx_update_vs(ctx)) @@ -1911,7 +1903,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, else if (ctx->stage[PIPE_SHADER_VERTEX].dirty) ctx->dirty |= AGX_DIRTY_VS; - if (agx_update_fs(ctx)) + if (agx_update_fs(batch)) ctx->dirty |= AGX_DIRTY_FS | AGX_DIRTY_FS_PROG; else if (ctx->stage[PIPE_SHADER_FRAGMENT].dirty) ctx->dirty |= AGX_DIRTY_FS; @@ -1939,7 +1931,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, AGX_INDEX_LIST_START_LENGTH + AGX_INDEX_LIST_BUFFER_SIZE_LENGTH); - uint8_t *out = agx_encode_state(ctx, batch->encoder_current, + uint8_t *out = agx_encode_state(batch, batch->encoder_current, reduced_prim == PIPE_PRIM_LINES, reduced_prim == PIPE_PRIM_POINTS); @@ -2008,6 +2000,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, assert(batch->encoder_current <= batch->encoder_end && "Failed to reserve sufficient space in encoder"); ctx->dirty = 0; + + assert(batch == agx_get_batch(ctx) && "batch should not change under us"); } void agx_init_state_functions(struct pipe_context *ctx); diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index a6291264dad..22f9d7b37ce 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -95,6 +95,7 @@ struct agx_array { struct agx_batch { struct agx_context *ctx; struct pipe_framebuffer_state key; + uint64_t seqnum; /* PIPE_CLEAR_* bitmask */ uint32_t clear, draw, load; @@ -174,11 +175,24 @@ enum agx_dirty { AGX_DIRTY_FS_PROG = BITFIELD_BIT(11), }; +#define AGX_MAX_BATCHES (2) + struct agx_context { struct pipe_context base; struct agx_compiled_shader *vs, *fs; uint32_t dirty; + /* Set of batches. When full, the LRU entry (the batch with the smallest + * seqnum) is flushed to free a slot. + */ + struct { + uint64_t seqnum; + struct agx_batch slots[AGX_MAX_BATCHES]; + + /** Set of active batches for faster traversal */ + BITSET_DECLARE(active, AGX_MAX_BATCHES); + } batches; + struct agx_batch *batch; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; @@ -204,8 +218,6 @@ struct agx_context { struct util_debug_callback debug; bool is_noop; - uint8_t render_target[8][AGX_RENDER_TARGET_LENGTH]; - struct blitter_context *blitter; /* Map of agx_resource to agx_batch that writes that resource */ @@ -350,6 +362,9 @@ uint64_t agx_push_location(struct agx_batch *batch, struct agx_push push, enum pipe_shader_type stage); +bool +agx_batch_is_active(struct agx_batch *batch); + uint64_t agx_build_clear_pipeline(struct agx_batch *batch, uint32_t code, uint64_t clear_buf); @@ -360,6 +375,9 @@ agx_build_store_pipeline(struct agx_batch *batch, uint32_t code, uint64_t agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_surface *surf); +uint64_t +agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt); + /* Add a BO to a batch. This needs to be amortized O(1) since it's called in * hot paths. To achieve this we model BO lists by bit sets */ @@ -383,7 +401,7 @@ agx_batch_add_bo(struct agx_batch *batch, struct agx_bo *bo) { /* Double the size of the BO list if we run out, this is amortized O(1) */ if (unlikely(bo->handle > agx_batch_bo_list_bits(batch))) { - batch->bo_list.set = rerzalloc(batch, batch->bo_list.set, BITSET_WORD, + batch->bo_list.set = rerzalloc(batch->ctx, batch->bo_list.set, BITSET_WORD, batch->bo_list.word_count, batch->bo_list.word_count * 2); batch->bo_list.word_count *= 2; @@ -408,6 +426,7 @@ agx_batch_num_bo(struct agx_batch *batch) BITSET_FOREACH_SET(handle, (batch)->bo_list.set, agx_batch_bo_list_bits(batch)) void agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch); +void agx_flush_all(struct agx_context *ctx, const char *reason); void agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason); void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason); @@ -415,6 +434,9 @@ void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const void agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc); void agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc); +struct agx_batch *agx_get_batch(struct agx_context *ctx); +void agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch); + /* Blit shaders */ void agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter, @@ -426,12 +448,6 @@ void agx_blit(struct pipe_context *pipe, void agx_internal_shaders(struct agx_device *dev); /* Batch logic */ -static void -agx_flush_all(struct agx_context *ctx, const char *reason) -{ - perf_debug_ctx(ctx, "Flushing due to: %s\n", reason); - ctx->base.flush(&ctx->base, NULL, 0); -} void agx_batch_init_state(struct agx_batch *batch); |