summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>2022-11-17 18:10:11 -0500
committerMarge Bot <emma+marge@anholt.net>2022-11-19 15:33:16 +0000
commitd7511ad784ce0fc6a5060de2d7c969a1300a0fb9 (patch)
treea40783e954b8377e9a9de35f51f023683f219083
parentde1eb9400f1d0d64627630dd28d11073ad1c1a7e (diff)
asahi: Add batch tracking logic
We already have the notion of an agx_batch, which encapsulates a render pass. Extend the logic to allow multiple in-flight batches per context, avoiding a flush in set_framebuffer_state and improving performance for certain applications designed for IMRs that ping-pong unnecessarily between FBOs. I don't have such an application immediately in mind, but I wanted to get this flag-day out of the way while the driver is still small and flexible. The driver was written from day 1 with batch tracking in mind, so this is a relatively small change to actually wire it up, but there are lots of little details to get right. The code itself is mostly a copy/paste of panfrost, which in turn draws inspiration from freedreno and v3d. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19865>
-rw-r--r--src/gallium/drivers/asahi/agx_batch.c225
-rw-r--r--src/gallium/drivers/asahi/agx_pipe.c60
-rw-r--r--src/gallium/drivers/asahi/agx_state.c134
-rw-r--r--src/gallium/drivers/asahi/agx_state.h34
4 files changed, 315 insertions, 138 deletions
diff --git a/src/gallium/drivers/asahi/agx_batch.c b/src/gallium/drivers/asahi/agx_batch.c
index 549f928c21b..b1aa11ac358 100644
--- a/src/gallium/drivers/asahi/agx_batch.c
+++ b/src/gallium/drivers/asahi/agx_batch.c
@@ -1,16 +1,202 @@
/*
* Copyright 2022 Alyssa Rosenzweig
+ * Copyright 2019-2020 Collabora, Ltd.
* SPDX-License-Identifier: MIT
*/
#include "agx_state.h"
+#define foreach_batch(ctx, idx) \
+ BITSET_FOREACH_SET(idx, ctx->batches.active, AGX_MAX_BATCHES)
+
+static unsigned
+agx_batch_idx(struct agx_batch *batch)
+{
+ return batch - batch->ctx->batches.slots;
+}
+
+bool
+agx_batch_is_active(struct agx_batch *batch)
+{
+ return BITSET_TEST(batch->ctx->batches.active, agx_batch_idx(batch));
+}
+
+static void
+agx_batch_init(struct agx_context *ctx,
+ const struct pipe_framebuffer_state *key,
+ struct agx_batch *batch)
+{
+ struct agx_device *dev = agx_device(ctx->base.screen);
+
+ batch->ctx = ctx;
+ util_copy_framebuffer_state(&batch->key, key);
+ batch->seqnum = ++ctx->batches.seqnum;
+
+ agx_pool_init(&batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true);
+ agx_pool_init(&batch->pipeline_pool, dev, AGX_MEMORY_TYPE_SHADER, true);
+
+ /* These allocations can happen only once and will just be zeroed (not freed)
+ * during batch clean up. The memory is owned by the context.
+ */
+ if (!batch->bo_list.set) {
+ batch->bo_list.set = rzalloc_array(ctx, BITSET_WORD, 128);
+ batch->bo_list.word_count = 128;
+ } else {
+ memset(batch->bo_list.set, 0, batch->bo_list.word_count * sizeof(BITSET_WORD));
+ }
+
+ if (!batch->encoder) {
+ batch->encoder = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
+ batch->encoder_current = batch->encoder->ptr.cpu;
+ batch->encoder_end = batch->encoder_current + batch->encoder->size;
+ } else {
+ batch->encoder_current = batch->encoder->ptr.cpu;
+ batch->encoder_end = batch->encoder_current + batch->encoder->size;
+ }
+
+ if (!batch->scissor.bo) {
+ batch->scissor.bo = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
+ }
+
+ if (!batch->depth_bias.bo) {
+ batch->depth_bias.bo = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
+ }
+
+ batch->clear = 0;
+ batch->draw = 0;
+ batch->load = 0;
+ batch->clear_depth = 0;
+ batch->clear_stencil = 0;
+ batch->scissor.count = 0;
+ batch->depth_bias.count = 0;
+ batch->varyings = 0;
+
+ /* We need to emit prim state at the start. Max collides with all. */
+ batch->reduced_prim = PIPE_PRIM_MAX;
+
+ if (batch->key.zsbuf) {
+ agx_batch_writes(batch, agx_resource(key->zsbuf->texture));
+ }
+
+ for (unsigned i = 0; i < key->nr_cbufs; ++i) {
+ agx_batch_writes(batch, agx_resource(key->cbufs[i]->texture));
+ }
+
+ unsigned batch_idx = agx_batch_idx(batch);
+ BITSET_SET(ctx->batches.active, batch_idx);
+
+ agx_batch_init_state(batch);
+}
+
void
-agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
+agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch)
+{
+ struct agx_device *dev = agx_device(ctx->base.screen);
+ assert(batch->ctx == ctx);
+
+ if (ctx->batch == batch)
+ ctx->batch = NULL;
+
+ /* There is no more writer for anything we wrote recorded on this context */
+ hash_table_foreach(ctx->writer, ent) {
+ if (ent->data == batch)
+ _mesa_hash_table_remove(ctx->writer, ent);
+ }
+
+ int handle;
+ AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
+ agx_bo_unreference(agx_lookup_bo(dev, handle));
+ }
+
+ agx_pool_cleanup(&batch->pool);
+ agx_pool_cleanup(&batch->pipeline_pool);
+ util_unreference_framebuffer_state(&batch->key);
+
+ unsigned batch_idx = agx_batch_idx(batch);
+ BITSET_CLEAR(ctx->batches.active, batch_idx);
+}
+
+static struct agx_batch *
+agx_get_batch_for_framebuffer(struct agx_context *ctx,
+ const struct pipe_framebuffer_state *state)
{
- /* TODO: Turn into loop when we support multiple batches */
- if (ctx->batch) {
- struct agx_batch *batch = ctx->batch;
+ /* Look if we have a matching batch */
+ unsigned i;
+ foreach_batch(ctx, i) {
+ struct agx_batch *candidate = &ctx->batches.slots[i];
+
+ if (util_framebuffer_state_equal(&candidate->key, state)) {
+ /* We found a match, increase the seqnum for the LRU
+ * eviction logic.
+ */
+ candidate->seqnum = ++ctx->batches.seqnum;
+ return candidate;
+ }
+ }
+
+ /* Look if we have a free batch */
+ struct agx_batch *batch = NULL;
+ for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
+ if (!BITSET_TEST(ctx->batches.active, i)) {
+ batch = &ctx->batches.slots[i];
+ break;
+ }
+ }
+
+ /* Else, evict something */
+ if (!batch) {
+ for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
+ struct agx_batch *candidate = &ctx->batches.slots[i];
+
+ if (!batch || batch->seqnum > candidate->seqnum)
+ batch = candidate;
+ }
+
+ agx_flush_batch(ctx, batch);
+ }
+
+ /* Batch is now free */
+ agx_batch_init(ctx, state, batch);
+ return batch;
+}
+
+struct agx_batch *
+agx_get_batch(struct agx_context *ctx)
+{
+ if (!ctx->batch) {
+ ctx->batch = agx_get_batch_for_framebuffer(ctx, &ctx->framebuffer);
+ agx_dirty_all(ctx);
+ }
+
+ assert(util_framebuffer_state_equal(&ctx->framebuffer, &ctx->batch->key));
+ return ctx->batch;
+}
+
+void
+agx_flush_all(struct agx_context *ctx, const char *reason)
+{
+ if (reason)
+ perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
+
+ unsigned idx;
+ foreach_batch(ctx, idx) {
+ agx_flush_batch(ctx, &ctx->batches.slots[idx]);
+ }
+}
+
+static void
+agx_flush_readers_except(struct agx_context *ctx,
+ struct agx_resource *rsrc,
+ struct agx_batch *except,
+ const char *reason)
+{
+ unsigned idx;
+
+ foreach_batch(ctx, idx) {
+ struct agx_batch *batch = &ctx->batches.slots[idx];
+
+ if (batch == except)
+ continue;
if (agx_batch_uses_bo(batch, rsrc->bo)) {
perf_debug_ctx(ctx, "Flush reader due to: %s\n", reason);
@@ -19,20 +205,38 @@ agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char
}
}
-void
-agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
+static void
+agx_flush_writer_except(struct agx_context *ctx,
+ struct agx_resource *rsrc,
+ struct agx_batch *except,
+ const char *reason)
{
struct hash_entry *ent = _mesa_hash_table_search(ctx->writer, rsrc);
- if (ent) {
+ if (ent && ent->data != except) {
perf_debug_ctx(ctx, "Flush writer due to: %s\n", reason);
agx_flush_batch(ctx, ent->data);
}
}
void
+agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
+{
+ agx_flush_readers_except(ctx, rsrc, NULL, reason);
+}
+
+void
+agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
+{
+ agx_flush_writer_except(ctx, rsrc, NULL, reason);
+}
+
+void
agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc)
{
+ /* Hazard: read-after-write */
+ agx_flush_writer_except(batch->ctx, rsrc, batch, "Read from another batch");
+
agx_batch_add_bo(batch, rsrc->bo);
if (rsrc->separate_stencil)
@@ -45,12 +249,15 @@ agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc)
struct agx_context *ctx = batch->ctx;
struct hash_entry *ent = _mesa_hash_table_search(ctx->writer, rsrc);
+ agx_flush_readers_except(ctx, rsrc, batch, "Write from other batch");
+
/* Nothing to do if we're already writing */
if (ent && ent->data == batch)
return;
- /* Flush the old writer if there is one */
- agx_flush_writer(ctx, rsrc, "Multiple writers");
+ /* Hazard: writer-after-write, write-after-read */
+ if (ent)
+ agx_flush_writer(ctx, rsrc, "Multiple writers");
/* Write is strictly stronger than a read */
agx_batch_reads(batch, rsrc);
diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c
index baea9e3d63e..cacc5bdba4c 100644
--- a/src/gallium/drivers/asahi/agx_pipe.c
+++ b/src/gallium/drivers/asahi/agx_pipe.c
@@ -659,7 +659,7 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor
const union pipe_color_union *color, double depth, unsigned stencil)
{
struct agx_context *ctx = agx_context(pctx);
- struct agx_batch *batch = ctx->batch;
+ struct agx_batch *batch = agx_get_batch(ctx);
unsigned fastclear = buffers & ~(batch->draw | batch->load);
unsigned slowclear = buffers & ~fastclear;
@@ -690,11 +690,11 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor
assert((batch->draw & slowclear) == slowclear);
}
-
static void
agx_flush_resource(struct pipe_context *ctx,
struct pipe_resource *resource)
{
+ agx_flush_writer(agx_context(ctx), agx_resource(resource), "flush_resource");
}
/*
@@ -710,7 +710,7 @@ agx_flush(struct pipe_context *pctx,
if (fence)
*fence = NULL;
- agx_flush_batch(ctx, ctx->batch);
+ agx_flush_all(ctx, "Gallium flush");
}
void
@@ -718,9 +718,13 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
{
struct agx_device *dev = agx_device(ctx->base.screen);
+ assert(agx_batch_is_active(batch));
+
/* Nothing to do */
- if (!(batch->draw | batch->clear))
+ if (!(batch->draw | batch->clear)) {
+ agx_batch_cleanup(ctx, batch);
return;
+ }
/* Finalize the encoder */
uint8_t stop[5 + 64] = { 0x00, 0x00, 0x00, 0xc0, 0x00 };
@@ -761,7 +765,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
pipeline_store =
agx_build_store_pipeline(batch,
dev->internal.store,
- agx_pool_upload(&batch->pool, ctx->render_target[0], sizeof(ctx->render_target)));
+ agx_batch_upload_pbe(batch, 0));
}
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
@@ -851,37 +855,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
agxdecode_next_frame();
}
- AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
- agx_bo_unreference(agx_lookup_bo(dev, handle));
- }
-
- /* There is no more writer for anything we wrote recorded on this context */
- hash_table_foreach(ctx->writer, ent) {
- if (ent->data == batch)
- _mesa_hash_table_remove(ctx->writer, ent);
- }
-
- memset(batch->bo_list.set, 0, batch->bo_list.word_count * sizeof(BITSET_WORD));
- agx_pool_cleanup(&batch->pool);
- agx_pool_cleanup(&batch->pipeline_pool);
- agx_pool_init(&batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true);
- agx_pool_init(&batch->pipeline_pool, dev, AGX_MEMORY_TYPE_CMDBUF_32, true);
- batch->clear = 0;
- batch->draw = 0;
- batch->load = 0;
- batch->encoder_current = batch->encoder->ptr.cpu;
- batch->encoder_end = batch->encoder_current + batch->encoder->size;
- batch->scissor.count = 0;
-
- agx_dirty_all(ctx);
- agx_batch_init_state(batch);
-
- /* After resetting the batch, rebind the framebuffer so we update resource
- * tracking logic and the BO lists.
- *
- * XXX: This is a hack to workaround lack of proper batch tracking.
- */
- ctx->base.set_framebuffer_state(&ctx->base, &ctx->framebuffer);
+ agx_batch_cleanup(ctx, batch);
}
static void
@@ -919,20 +893,6 @@ agx_create_context(struct pipe_screen *screen,
pctx->screen = screen;
pctx->priv = priv;
- ctx->batch = rzalloc(ctx, struct agx_batch);
- ctx->batch->ctx = ctx;
- ctx->batch->bo_list.set = rzalloc_array(ctx->batch, BITSET_WORD, 128);
- ctx->batch->bo_list.word_count = 128;
- agx_pool_init(&ctx->batch->pool,
- agx_device(screen), AGX_MEMORY_TYPE_FRAMEBUFFER, true);
- agx_pool_init(&ctx->batch->pipeline_pool,
- agx_device(screen), AGX_MEMORY_TYPE_SHADER, true);
- ctx->batch->encoder = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
- ctx->batch->encoder_current = ctx->batch->encoder->ptr.cpu;
- ctx->batch->encoder_end = ctx->batch->encoder_current + ctx->batch->encoder->size;
- ctx->batch->scissor.bo = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
- ctx->batch->depth_bias.bo = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
-
ctx->writer = _mesa_pointer_hash_table_create(ctx);
/* Upload fixed shaders (TODO: compile them?) */
diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c
index 2c4b3a74cd4..f5c0cdbb559 100644
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@@ -781,63 +781,61 @@ agx_set_framebuffer_state(struct pipe_context *pctx,
if (!state)
return;
- /* XXX: eliminate this flush with batch tracking logic */
- agx_flush_all(ctx, "Framebuffer switch");
-
util_copy_framebuffer_state(&ctx->framebuffer, state);
- util_copy_framebuffer_state(&ctx->batch->key, state);
- ctx->dirty = ~0;
-
- if (state->zsbuf)
- agx_batch_writes(ctx->batch, agx_resource(state->zsbuf->texture));
+ ctx->batch = NULL;
+ agx_dirty_all(ctx);
+}
+uint64_t
+agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt)
+{
+ struct pipe_surface *surf = batch->key.cbufs[rt];
+ struct agx_resource *tex = agx_resource(surf->texture);
+ const struct util_format_description *desc =
+ util_format_description(surf->format);
+ unsigned level = surf->u.tex.level;
+ unsigned layer = surf->u.tex.first_layer;
- for (unsigned i = 0; i < state->nr_cbufs; ++i) {
- struct pipe_surface *surf = state->cbufs[i];
- struct agx_resource *tex = agx_resource(surf->texture);
- const struct util_format_description *desc =
- util_format_description(surf->format);
- unsigned level = surf->u.tex.level;
- unsigned layer = surf->u.tex.first_layer;
+ assert(surf->u.tex.last_layer == layer);
- agx_batch_writes(ctx->batch, tex);
+ struct agx_ptr T = agx_pool_alloc_aligned(&batch->pool, AGX_RENDER_TARGET_LENGTH, 256);
- assert(surf->u.tex.last_layer == layer);
+ agx_pack(T.cpu, RENDER_TARGET, cfg) {
+ cfg.layout = agx_translate_layout(tex->layout.tiling);
+ cfg.channels = agx_pixel_format[surf->format].channels;
+ cfg.type = agx_pixel_format[surf->format].type;
- agx_pack(ctx->render_target[i], RENDER_TARGET, cfg) {
- cfg.layout = agx_translate_layout(tex->layout.tiling);
- cfg.channels = agx_pixel_format[surf->format].channels;
- cfg.type = agx_pixel_format[surf->format].type;
+ assert(desc->nr_channels >= 1 && desc->nr_channels <= 4);
+ cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]) & 3;
- assert(desc->nr_channels >= 1 && desc->nr_channels <= 4);
- cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]) & 3;
+ if (desc->nr_channels >= 2)
+ cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]) & 3;
- if (desc->nr_channels >= 2)
- cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]) & 3;
+ if (desc->nr_channels >= 3)
+ cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]) & 3;
- if (desc->nr_channels >= 3)
- cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]) & 3;
+ if (desc->nr_channels >= 4)
+ cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]) & 3;
- if (desc->nr_channels >= 4)
- cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]) & 3;
+ cfg.width = batch->key.width;
+ cfg.height = batch->key.height;
+ cfg.level = surf->u.tex.level;
+ cfg.buffer = agx_map_texture_gpu(tex, layer);
+ cfg.unk_mipmapped = tex->mipmapped;
- cfg.width = state->width;
- cfg.height = state->height;
- cfg.level = surf->u.tex.level;
- cfg.buffer = agx_map_texture_gpu(tex, layer);
- cfg.unk_mipmapped = tex->mipmapped;
+ if (tex->layout.tiling == AIL_TILING_LINEAR) {
+ cfg.stride = ail_get_linear_stride_B(&tex->layout, level) - 4;
+ cfg.levels = 1;
+ } else {
+ cfg.unk_tiled = true;
+ cfg.levels = tex->base.last_level + 1;
+ }
+ };
- if (tex->layout.tiling == AIL_TILING_LINEAR) {
- cfg.stride = ail_get_linear_stride_B(&tex->layout, level) - 4;
- cfg.levels = 1;
- } else {
- cfg.unk_tiled = true;
- cfg.levels = tex->base.last_level + 1;
- }
- };
- }
+ return T.gpu;
}
+
/* Likewise constant buffers, textures, and samplers are handled in a common
* per-draw path, with dirty tracking to reduce the costs involved.
*/
@@ -1224,18 +1222,20 @@ agx_update_vs(struct agx_context *ctx)
}
static bool
-agx_update_fs(struct agx_context *ctx)
+agx_update_fs(struct agx_batch *batch)
{
+ struct agx_context *ctx = batch->ctx;
+
struct asahi_shader_key key = {
- .nr_cbufs = ctx->batch->key.nr_cbufs,
+ .nr_cbufs = batch->key.nr_cbufs,
.clip_plane_enable = ctx->rast->base.clip_plane_enable,
};
- if (ctx->batch->reduced_prim == PIPE_PRIM_POINTS)
+ if (batch->reduced_prim == PIPE_PRIM_POINTS)
key.sprite_coord_enable = ctx->rast->base.sprite_coord_enable;
for (unsigned i = 0; i < key.nr_cbufs; ++i) {
- struct pipe_surface *surf = ctx->batch->key.cbufs[i];
+ struct pipe_surface *surf = batch->key.cbufs[i];
if (surf) {
enum pipe_format fmt = surf->format;
@@ -1557,9 +1557,6 @@ agx_batch_init_state(struct agx_batch *batch)
agx_ppp_fini(&out, &ppp);
batch->encoder_current = out;
-
- /* We need to emit prim state at the start. Max collides with all. */
- batch->reduced_prim = PIPE_PRIM_MAX;
}
static enum agx_object_type
@@ -1586,9 +1583,10 @@ agx_pass_type_for_shader(struct agx_shader_info *info)
#define MAX_PPP_UPDATES 2
static uint8_t *
-agx_encode_state(struct agx_context *ctx, uint8_t *out,
+agx_encode_state(struct agx_batch *batch, uint8_t *out,
bool is_lines, bool is_points)
{
+ struct agx_context *ctx = batch->ctx;
struct agx_rasterizer *rast = ctx->rast;
unsigned ppp_updates = 0;
@@ -1613,7 +1611,7 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
out += AGX_VDM_STATE_VERTEX_SHADER_WORD_0_LENGTH;
agx_pack(out, VDM_STATE_VERTEX_SHADER_WORD_1, cfg) {
- cfg.pipeline = agx_build_pipeline(ctx->batch, ctx->vs, PIPE_SHADER_VERTEX);
+ cfg.pipeline = agx_build_pipeline(batch, ctx->vs, PIPE_SHADER_VERTEX);
}
out += AGX_VDM_STATE_VERTEX_SHADER_WORD_1_LENGTH;
@@ -1634,17 +1632,17 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
out += 4;
}
- struct agx_pool *pool = &ctx->batch->pool;
+ struct agx_pool *pool = &batch->pool;
struct agx_compiled_shader *vs = ctx->vs, *fs = ctx->fs;
unsigned zbias = 0;
if (ctx->rast->base.offset_tri) {
- zbias = agx_upload_depth_bias(ctx->batch, &ctx->rast->base);
+ zbias = agx_upload_depth_bias(batch, &ctx->rast->base);
ctx->dirty |= AGX_DIRTY_SCISSOR_ZBIAS;
}
if (ctx->dirty & (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR_ZBIAS)) {
- agx_upload_viewport_scissor(pool, ctx->batch, &out, &ctx->viewport,
+ agx_upload_viewport_scissor(pool, batch, &out, &ctx->viewport,
ctx->rast->base.scissor ? &ctx->scissor : NULL,
zbias);
}
@@ -1652,7 +1650,7 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
bool varyings_dirty = false;
if (IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG) || IS_DIRTY(RS)) {
- ctx->batch->varyings = agx_link_varyings_vs_fs(&ctx->batch->pipeline_pool,
+ batch->varyings = agx_link_varyings_vs_fs(&batch->pipeline_pool,
&ctx->vs->info.varyings.vs,
&ctx->fs->info.varyings.fs,
ctx->rast->base.flatshade_first);
@@ -1774,13 +1772,13 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
if (IS_DIRTY(FS) || varyings_dirty) {
unsigned frag_tex_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count;
agx_ppp_push(&ppp, FRAGMENT_SHADER, cfg) {
- cfg.pipeline = agx_build_pipeline(ctx->batch, ctx->fs, PIPE_SHADER_FRAGMENT),
+ cfg.pipeline = agx_build_pipeline(batch, ctx->fs, PIPE_SHADER_FRAGMENT),
cfg.uniform_register_count = ctx->fs->info.push_count;
cfg.preshader_register_count = ctx->fs->info.nr_preamble_gprs;
cfg.texture_state_register_count = frag_tex_count;
cfg.sampler_state_register_count = frag_tex_count;
cfg.cf_binding_count = ctx->fs->info.varyings.fs.nr_bindings;
- cfg.cf_bindings = ctx->batch->varyings;
+ cfg.cf_bindings = batch->varyings;
/* XXX: This is probably wrong */
cfg.unknown_30 = frag_tex_count >= 4;
@@ -1883,18 +1881,12 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
}
struct agx_context *ctx = agx_context(pctx);
- struct agx_batch *batch = ctx->batch;
+ struct agx_batch *batch = agx_get_batch(ctx);
if (agx_scissor_culls_everything(ctx))
return;
-#ifndef NDEBUG
- /* For debugging dirty tracking, mark all state as dirty every draw, forcing
- * everything to be re-emitted fresh.
- */
- if (unlikely(agx_device(pctx->screen)->debug & AGX_DBG_DIRTY))
- agx_dirty_all(ctx);
-#endif
+ agx_dirty_all(ctx);
/* Dirty track the reduced prim: lines vs points vs triangles */
enum pipe_prim_type reduced_prim = u_reduced_prim(info->mode);
@@ -1902,8 +1894,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
batch->reduced_prim = reduced_prim;
/* TODO: masks */
- ctx->batch->draw |= ~0;
- ctx->batch->load |= ~0;
+ batch->draw |= ~0;
+ batch->load |= ~0;
/* TODO: These are expensive calls, consider finer dirty tracking */
if (agx_update_vs(ctx))
@@ -1911,7 +1903,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
else if (ctx->stage[PIPE_SHADER_VERTEX].dirty)
ctx->dirty |= AGX_DIRTY_VS;
- if (agx_update_fs(ctx))
+ if (agx_update_fs(batch))
ctx->dirty |= AGX_DIRTY_FS | AGX_DIRTY_FS_PROG;
else if (ctx->stage[PIPE_SHADER_FRAGMENT].dirty)
ctx->dirty |= AGX_DIRTY_FS;
@@ -1939,7 +1931,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
AGX_INDEX_LIST_START_LENGTH +
AGX_INDEX_LIST_BUFFER_SIZE_LENGTH);
- uint8_t *out = agx_encode_state(ctx, batch->encoder_current,
+ uint8_t *out = agx_encode_state(batch, batch->encoder_current,
reduced_prim == PIPE_PRIM_LINES,
reduced_prim == PIPE_PRIM_POINTS);
@@ -2008,6 +2000,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
assert(batch->encoder_current <= batch->encoder_end &&
"Failed to reserve sufficient space in encoder");
ctx->dirty = 0;
+
+ assert(batch == agx_get_batch(ctx) && "batch should not change under us");
}
void agx_init_state_functions(struct pipe_context *ctx);
diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h
index a6291264dad..22f9d7b37ce 100644
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@@ -95,6 +95,7 @@ struct agx_array {
struct agx_batch {
struct agx_context *ctx;
struct pipe_framebuffer_state key;
+ uint64_t seqnum;
/* PIPE_CLEAR_* bitmask */
uint32_t clear, draw, load;
@@ -174,11 +175,24 @@ enum agx_dirty {
AGX_DIRTY_FS_PROG = BITFIELD_BIT(11),
};
+#define AGX_MAX_BATCHES (2)
+
struct agx_context {
struct pipe_context base;
struct agx_compiled_shader *vs, *fs;
uint32_t dirty;
+ /* Set of batches. When full, the LRU entry (the batch with the smallest
+ * seqnum) is flushed to free a slot.
+ */
+ struct {
+ uint64_t seqnum;
+ struct agx_batch slots[AGX_MAX_BATCHES];
+
+ /** Set of active batches for faster traversal */
+ BITSET_DECLARE(active, AGX_MAX_BATCHES);
+ } batches;
+
struct agx_batch *batch;
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
@@ -204,8 +218,6 @@ struct agx_context {
struct util_debug_callback debug;
bool is_noop;
- uint8_t render_target[8][AGX_RENDER_TARGET_LENGTH];
-
struct blitter_context *blitter;
/* Map of agx_resource to agx_batch that writes that resource */
@@ -350,6 +362,9 @@ uint64_t
agx_push_location(struct agx_batch *batch, struct agx_push push,
enum pipe_shader_type stage);
+bool
+agx_batch_is_active(struct agx_batch *batch);
+
uint64_t
agx_build_clear_pipeline(struct agx_batch *batch, uint32_t code, uint64_t clear_buf);
@@ -360,6 +375,9 @@ agx_build_store_pipeline(struct agx_batch *batch, uint32_t code,
uint64_t
agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_surface *surf);
+uint64_t
+agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt);
+
/* Add a BO to a batch. This needs to be amortized O(1) since it's called in
* hot paths. To achieve this we model BO lists by bit sets */
@@ -383,7 +401,7 @@ agx_batch_add_bo(struct agx_batch *batch, struct agx_bo *bo)
{
/* Double the size of the BO list if we run out, this is amortized O(1) */
if (unlikely(bo->handle > agx_batch_bo_list_bits(batch))) {
- batch->bo_list.set = rerzalloc(batch, batch->bo_list.set, BITSET_WORD,
+ batch->bo_list.set = rerzalloc(batch->ctx, batch->bo_list.set, BITSET_WORD,
batch->bo_list.word_count,
batch->bo_list.word_count * 2);
batch->bo_list.word_count *= 2;
@@ -408,6 +426,7 @@ agx_batch_num_bo(struct agx_batch *batch)
BITSET_FOREACH_SET(handle, (batch)->bo_list.set, agx_batch_bo_list_bits(batch))
void agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch);
+void agx_flush_all(struct agx_context *ctx, const char *reason);
void agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
@@ -415,6 +434,9 @@ void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const
void agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc);
void agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc);
+struct agx_batch *agx_get_batch(struct agx_context *ctx);
+void agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch);
+
/* Blit shaders */
void
agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
@@ -426,12 +448,6 @@ void agx_blit(struct pipe_context *pipe,
void agx_internal_shaders(struct agx_device *dev);
/* Batch logic */
-static void
-agx_flush_all(struct agx_context *ctx, const char *reason)
-{
- perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
- ctx->base.flush(&ctx->base, NULL, 0);
-}
void
agx_batch_init_state(struct agx_batch *batch);