summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2021-04-21 22:32:06 -0400
committerMarge Bot <eric+marge@anholt.net>2021-05-17 10:37:24 +0000
commite9c41b321488c23b7d8ec5744f68218bac753505 (patch)
treec87bf5dcc2e8cb8483ca5e0b44c1c232f065fbc9
parent88b97567d413568e956a73cce12daecc6b9cacc7 (diff)
gallium/u_threaded: add buffer lists - tracking of buffers referenced by tc
not used by anything yet Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Reviewed-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10662>
-rw-r--r--src/gallium/auxiliary/util/u_threaded_context.c351
-rw-r--r--src/gallium/auxiliary/util/u_threaded_context.h37
2 files changed, 365 insertions, 23 deletions
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index 40f1e9896b9..e9343da1c3c 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -196,6 +196,21 @@ tc_batch_execute(void *job, UNUSED int thread_index)
}
static void
+tc_begin_next_buffer_list(struct threaded_context *tc)
+{
+ tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
+
+ tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
+
+ /* Clear the buffer list in the new empty batch. */
+ struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
+ BITSET_ZERO(buf_list->buffer_list);
+
+ tc->add_all_gfx_bindings_to_buffer_list = true;
+ tc->add_all_compute_bindings_to_buffer_list = true;
+}
+
+static void
tc_batch_flush(struct threaded_context *tc)
{
struct tc_batch *next = &tc->batch_slots[tc->next];
@@ -215,6 +230,7 @@ tc_batch_flush(struct threaded_context *tc)
NULL, 0);
tc->last = tc->next;
tc->next = (tc->next + 1) % TC_MAX_BATCHES;
+ tc_begin_next_buffer_list(tc);
}
/* This is the function that adds variable-sized calls into the current
@@ -298,6 +314,7 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char
p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
tc->bytes_mapped_estimate = 0;
tc_batch_execute(next, 0);
+ tc_begin_next_buffer_list(tc);
synced = true;
}
@@ -343,6 +360,173 @@ threaded_context_flush(struct pipe_context *_pipe,
}
}
+static void
+tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf)
+{
+ uint32_t id = threaded_resource(buf)->buffer_id_unique;
+ BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
+}
+
+/* Set a buffer binding and add it to the buffer list. */
+static void
+tc_bind_buffer(uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf)
+{
+ uint32_t id = threaded_resource(buf)->buffer_id_unique;
+ *binding = id;
+ BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
+}
+
+/* Reset a buffer binding. */
+static void
+tc_unbind_buffer(uint32_t *binding)
+{
+ *binding = 0;
+}
+
+/* Reset a range of buffer binding slots. */
+static void
+tc_unbind_buffers(uint32_t *binding, unsigned count)
+{
+ if (count)
+ memset(binding, 0, sizeof(*binding) * count);
+}
+
+static void
+tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
+ unsigned count)
+{
+ for (unsigned i = 0; i < count; i++) {
+ if (bindings[i])
+ BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
+ }
+}
+
+static bool
+tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
+ unsigned count)
+{
+ bool rebound = false;
+
+ for (unsigned i = 0; i < count; i++) {
+ if (bindings[i] == old_id) {
+ bindings[i] = new_id;
+ rebound = true;
+ }
+ }
+ return rebound;
+}
+
+static void
+tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
+ BITSET_WORD *buffer_list,
+ enum pipe_shader_type shader)
+{
+ tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
+ tc->max_const_buffers);
+ if (tc->seen_shader_buffers[shader]) {
+ tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
+ tc->max_shader_buffers);
+ }
+ if (tc->seen_image_buffers[shader]) {
+ tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
+ tc->max_images);
+ }
+ if (tc->seen_sampler_buffers[shader]) {
+ tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
+ tc->max_samplers);
+ }
+}
+
+static bool
+tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
+ uint32_t new_id, enum pipe_shader_type shader)
+{
+ bool rebound = false;
+
+ rebound |= tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
+ tc->max_const_buffers);
+ if (tc->seen_shader_buffers[shader]) {
+ rebound |= tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
+ tc->max_shader_buffers);
+ }
+ if (tc->seen_image_buffers[shader]) {
+ rebound |= tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
+ tc->max_images);
+ }
+ if (tc->seen_sampler_buffers[shader]) {
+ rebound |= tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
+ tc->max_samplers);
+ }
+ return rebound;
+}
+
+/* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
+ * This is called by the first draw call in a batch when we want to inherit
+ * all bindings set by the previous batch.
+ */
+static void
+tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
+{
+ BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
+
+ tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->max_vertex_buffers);
+ if (tc->seen_streamout_buffers)
+ tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
+
+ tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
+ tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
+
+ if (tc->seen_tcs)
+ tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
+ if (tc->seen_tes)
+ tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
+ if (tc->seen_gs)
+ tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
+
+ tc->add_all_gfx_bindings_to_buffer_list = false;
+}
+
+/* Add all bound buffers used by compute to the buffer list.
+ * This is called by the first compute call in a batch when we want to inherit
+ * all bindings set by the previous batch.
+ */
+static void
+tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
+{
+ BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
+
+ tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
+ tc->add_all_compute_bindings_to_buffer_list = false;
+}
+
+static void
+tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id)
+{
+ bool rebound = false;
+
+ rebound |= tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
+ tc->max_vertex_buffers);
+ if (tc->seen_streamout_buffers) {
+ rebound |= tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
+ PIPE_MAX_SO_BUFFERS);
+ }
+
+ rebound |= tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX);
+ rebound |= tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT);
+
+ if (tc->seen_tcs)
+ rebound |= tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL);
+ if (tc->seen_tes)
+ rebound |= tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL);
+ if (tc->seen_gs)
+ rebound |= tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY);
+
+ rebound |= tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE);
+
+ if (rebound)
+ BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
+}
+
void
threaded_resource_init(struct pipe_resource *res)
{
@@ -352,6 +536,7 @@ threaded_resource_init(struct pipe_resource *res)
util_range_init(&tres->valid_buffer_range);
tres->is_shared = false;
tres->is_user_ptr = false;
+ tres->buffer_id_unique = 0;
tres->pending_staging_uploads = 0;
util_range_init(&tres->pending_staging_uploads_range);
}
@@ -589,6 +774,7 @@ tc_get_query_result_resource(struct pipe_context *_pipe,
p->result_type = result_type;
p->index = index;
tc_set_resource_reference(&p->resource, resource);
+ tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
p->offset = offset;
}
@@ -849,6 +1035,13 @@ tc_set_constant_buffer(struct pipe_context *_pipe,
p->cb.buffer = buffer;
else
tc_set_resource_reference(&p->cb.buffer, buffer);
+
+ if (buffer) {
+ tc_bind_buffer(&tc->const_buffers[shader][index],
+ &tc->buffer_lists[tc->next_buf_list], buffer);
+ } else {
+ tc_unbind_buffer(&tc->const_buffers[shader][index]);
+ }
}
struct tc_inlinable_constants {
@@ -1038,16 +1231,32 @@ tc_set_sampler_views(struct pipe_context *_pipe,
p->start = start;
if (views) {
+ struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
+
p->count = count;
p->unbind_num_trailing_slots = unbind_num_trailing_slots;
for (unsigned i = 0; i < count; i++) {
p->slot[i] = NULL;
pipe_sampler_view_reference(&p->slot[i], views[i]);
+
+ if (views[i] && views[i]->target == PIPE_BUFFER) {
+ tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
+ views[i]->texture);
+ } else {
+ tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
+ }
}
+
+ tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
+ unbind_num_trailing_slots);
+ tc->seen_sampler_buffers[shader] = true;
} else {
p->count = 0;
p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
+
+ tc_unbind_buffers(&tc->sampler_buffers[shader][start],
+ count + unbind_num_trailing_slots);
}
}
@@ -1101,24 +1310,38 @@ tc_set_shader_images(struct pipe_context *_pipe,
p->count = count;
p->unbind_num_trailing_slots = unbind_num_trailing_slots;
+ struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
+
for (unsigned i = 0; i < count; i++) {
- tc_set_resource_reference(&p->slot[i].resource, images[i].resource);
+ struct pipe_resource *resource = images[i].resource;
- if (images[i].access & PIPE_IMAGE_ACCESS_WRITE &&
- images[i].resource &&
- images[i].resource->target == PIPE_BUFFER) {
- struct threaded_resource *tres =
- threaded_resource(images[i].resource);
+ tc_set_resource_reference(&p->slot[i].resource, resource);
- util_range_add(&tres->b, &tres->valid_buffer_range,
- images[i].u.buf.offset,
- images[i].u.buf.offset + images[i].u.buf.size);
+ if (resource && resource->target == PIPE_BUFFER) {
+ tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource);
+
+ if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
+ struct threaded_resource *tres = threaded_resource(resource);
+
+ util_range_add(&tres->b, &tres->valid_buffer_range,
+ images[i].u.buf.offset,
+ images[i].u.buf.offset + images[i].u.buf.size);
+ }
+ } else {
+ tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
}
}
memcpy(p->slot, images, count * sizeof(images[0]));
+
+ tc_unbind_buffers(&tc->image_buffers[shader][start + count],
+ unbind_num_trailing_slots);
+ tc->seen_image_buffers[shader] = true;
} else {
p->count = 0;
p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
+
+ tc_unbind_buffers(&tc->image_buffers[shader][start],
+ count + unbind_num_trailing_slots);
}
}
@@ -1172,6 +1395,8 @@ tc_set_shader_buffers(struct pipe_context *_pipe,
p->writable_bitmask = writable_bitmask;
if (buffers) {
+ struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
+
for (unsigned i = 0; i < count; i++) {
struct pipe_shader_buffer *dst = &p->slot[i];
const struct pipe_shader_buffer *src = buffers + i;
@@ -1183,11 +1408,18 @@ tc_set_shader_buffers(struct pipe_context *_pipe,
if (src->buffer) {
struct threaded_resource *tres = threaded_resource(src->buffer);
+ tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b);
+
util_range_add(&tres->b, &tres->valid_buffer_range,
src->buffer_offset,
src->buffer_offset + src->buffer_size);
+ } else {
+ tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
}
}
+ tc->seen_shader_buffers[shader] = true;
+ } else {
+ tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
}
}
@@ -1237,27 +1469,51 @@ tc_set_vertex_buffers(struct pipe_context *_pipe,
p->count = count;
p->unbind_num_trailing_slots = unbind_num_trailing_slots;
+ struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
+
if (take_ownership) {
memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
+
+ for (unsigned i = 0; i < count; i++) {
+ struct pipe_resource *buf = buffers[i].buffer.resource;
+
+ if (buf) {
+ tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf);
+ } else {
+ tc_unbind_buffer(&tc->vertex_buffers[start + i]);
+ }
+ }
} else {
for (unsigned i = 0; i < count; i++) {
struct pipe_vertex_buffer *dst = &p->slot[i];
const struct pipe_vertex_buffer *src = buffers + i;
+ struct pipe_resource *buf = src->buffer.resource;
tc_assert(!src->is_user_buffer);
dst->stride = src->stride;
dst->is_user_buffer = false;
- tc_set_resource_reference(&dst->buffer.resource,
- src->buffer.resource);
+ tc_set_resource_reference(&dst->buffer.resource, buf);
dst->buffer_offset = src->buffer_offset;
+
+ if (buf) {
+ tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf);
+ } else {
+ tc_unbind_buffer(&tc->vertex_buffers[start + i]);
+ }
}
}
+
+ tc_unbind_buffers(&tc->vertex_buffers[start + count],
+ unbind_num_trailing_slots);
} else {
struct tc_vertex_buffers *p =
tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
p->start = start;
p->count = 0;
p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
+
+ tc_unbind_buffers(&tc->vertex_buffers[start],
+ count + unbind_num_trailing_slots);
}
}
@@ -1290,13 +1546,23 @@ tc_set_stream_output_targets(struct pipe_context *_pipe,
struct threaded_context *tc = threaded_context(_pipe);
struct tc_stream_outputs *p =
tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
+ struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
for (unsigned i = 0; i < count; i++) {
p->targets[i] = NULL;
pipe_so_target_reference(&p->targets[i], tgs[i]);
+ if (tgs[i]) {
+ tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer);
+ } else {
+ tc_unbind_buffer(&tc->streamout_buffers[i]);
+ }
}
p->count = count;
memcpy(p->offsets, offsets, count * sizeof(unsigned));
+
+ tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
+ if (count)
+ tc->seen_streamout_buffers = true;
}
static void
@@ -1540,6 +1806,10 @@ tc_invalidate_buffer(struct threaded_context *tc,
pipe_resource_reference(&tbuf->latest, NULL);
tbuf->latest = new_buf;
+
+ /* Treat the current buffer as the new buffer. */
+ tc_rebind_buffer(tc, tbuf->buffer_id_unique,
+ threaded_resource(new_buf)->buffer_id_unique);
util_range_set_empty(&tbuf->valid_buffer_range);
uint32_t delete_buffer_id = tbuf->buffer_id_unique;
@@ -1981,6 +2251,10 @@ tc_buffer_subdata(struct pipe_context *_pipe,
tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
tc_set_resource_reference(&p->resource, resource);
+ /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
+ * _flags would set UNSYNCHRONIZED and we wouldn't get here.
+ */
+ tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
p->usage = usage;
p->offset = offset;
p->size = size;
@@ -2601,15 +2875,23 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
unsigned index_size = info->index_size;
bool has_user_indices = info->has_user_indices;
+ if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
+ tc_add_all_gfx_bindings_to_buffer_list(tc);
+
if (unlikely(indirect)) {
assert(!has_user_indices);
assert(num_draws == 1);
struct tc_draw_indirect *p =
tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
- if (index_size && !info->take_index_buffer_ownership) {
- tc_set_resource_reference(&p->info.index.resource,
- info->index.resource);
+ struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
+
+ if (index_size) {
+ if (!info->take_index_buffer_ownership) {
+ tc_set_resource_reference(&p->info.index.resource,
+ info->index.resource);
+ }
+ tc_add_to_buffer_list(next, info->index.resource);
}
memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
@@ -2619,6 +2901,14 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
p->indirect.count_from_stream_output = NULL;
pipe_so_target_reference(&p->indirect.count_from_stream_output,
indirect->count_from_stream_output);
+
+ if (indirect->buffer)
+ tc_add_to_buffer_list(next, indirect->buffer);
+ if (indirect->indirect_draw_count)
+ tc_add_to_buffer_list(next, indirect->indirect_draw_count);
+ if (indirect->count_from_stream_output)
+ tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer);
+
memcpy(&p->indirect, indirect, sizeof(*indirect));
p->draw.start = draws[0].start;
return;
@@ -2660,9 +2950,12 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
struct tc_draw_single *p = drawid_offset > 0 ?
&tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
- if (index_size && !info->take_index_buffer_ownership) {
- tc_set_resource_reference(&p->info.index.resource,
- info->index.resource);
+ if (index_size) {
+ if (!info->take_index_buffer_ownership) {
+ tc_set_resource_reference(&p->info.index.resource,
+ info->index.resource);
+ }
+ tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
}
if (drawid_offset > 0)
((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
@@ -2768,9 +3061,12 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
struct tc_draw_multi *p =
tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
dr);
- if (index_size && !take_index_buffer_ownership) {
- tc_set_resource_reference(&p->info.index.resource,
- info->index.resource);
+ if (index_size) {
+ if (!take_index_buffer_ownership) {
+ tc_set_resource_reference(&p->info.index.resource,
+ info->index.resource);
+ }
+ tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
}
take_index_buffer_ownership = false;
memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
@@ -2807,8 +3103,14 @@ tc_launch_grid(struct pipe_context *_pipe,
tc_launch_grid_call);
assert(info->input == NULL);
+ if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
+ tc_add_all_compute_bindings_to_buffer_list(tc);
+
tc_set_resource_reference(&p->info.indirect, info->indirect);
memcpy(&p->info, info, sizeof(*info));
+
+ if (info->indirect)
+ tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect);
}
static uint16_t
@@ -2845,9 +3147,15 @@ tc_resource_copy_region(struct pipe_context *_pipe,
p->src_level = src_level;
p->src_box = *src_box;
- if (dst->target == PIPE_BUFFER)
+ if (dst->target == PIPE_BUFFER) {
+ struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
+
+ tc_add_to_buffer_list(next, src);
+ tc_add_to_buffer_list(next, dst);
+
util_range_add(&tdst->b, &tdst->valid_buffer_range,
dstx, dstx + src_box->width);
+ }
}
struct tc_blit_call {
@@ -3087,6 +3395,7 @@ tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
tc_set_resource_reference(&p->res, res);
+ tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res);
p->offset = offset;
p->size = size;
memcpy(p->clear_value, clear_value, clear_value_size);
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h
index 72ffe7819ac..8c41a018f74 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -191,6 +191,7 @@
#include "c11/threads.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
+#include "util/bitset.h"
#include "util/u_inlines.h"
#include "util/u_queue.h"
#include "util/u_range.h"
@@ -240,6 +241,13 @@ struct tc_unflushed_batch_token;
*/
#define TC_MAX_BUFFER_LISTS (TC_MAX_BATCHES * 4)
+/* This mask is used to get a hash of a buffer ID. It's also the bit size of
+ * the buffer list - 1. It must be 2^n - 1. The size should be as low as
+ * possible to minimize memory usage, but high enough to minimize hash
+ * collisions.
+ */
+#define TC_BUFFER_ID_MASK BITFIELD_MASK(14)
+
/* Threshold for when to use the queue or sync. */
#define TC_MAX_STRING_MARKER_BYTES 512
@@ -357,12 +365,18 @@ struct tc_batch {
#if !defined(NDEBUG) && TC_DEBUG >= 1
unsigned sentinel;
#endif
- unsigned num_total_slots;
+ uint16_t num_total_slots;
+ uint16_t buffer_list_index;
struct util_queue_fence fence;
struct tc_unflushed_batch_token *token;
uint64_t slots[TC_SLOTS_PER_BATCH];
};
+struct tc_buffer_list {
+ /* Buffer list where bit N means whether ID hash N is in the list. */
+ BITSET_DECLARE(buffer_list, TC_BUFFER_ID_MASK + 1);
+};
+
struct threaded_context {
struct pipe_context base;
struct pipe_context *pipe;
@@ -382,6 +396,8 @@ struct threaded_context {
bool driver_calls_flush_notify;
bool use_forced_staging_uploads;
+ bool add_all_gfx_bindings_to_buffer_list;
+ bool add_all_compute_bindings_to_buffer_list;
/* Estimation of how much vram/gtt bytes are mmap'd in
* the current tc_batch.
@@ -405,13 +421,18 @@ struct threaded_context {
bool seen_tes;
bool seen_gs;
+ bool seen_streamout_buffers;
+ bool seen_shader_buffers[PIPE_SHADER_TYPES];
+ bool seen_image_buffers[PIPE_SHADER_TYPES];
+ bool seen_sampler_buffers[PIPE_SHADER_TYPES];
+
unsigned max_vertex_buffers;
unsigned max_const_buffers;
unsigned max_shader_buffers;
unsigned max_images;
unsigned max_samplers;
- unsigned last, next;
+ unsigned last, next, next_buf_list;
/* The list fences that the driver should signal after the next flush.
* If this is empty, all driver command buffers have been flushed.
@@ -419,7 +440,19 @@ struct threaded_context {
struct util_queue_fence *signal_fences_next_flush[TC_MAX_BUFFER_LISTS];
unsigned num_signal_fences_next_flush;
+ /* Bound buffers are tracked here using threaded_resource::buffer_id_hash.
+ * 0 means unbound.
+ */
+ uint32_t vertex_buffers[PIPE_MAX_ATTRIBS];
+ uint32_t streamout_buffers[PIPE_MAX_SO_BUFFERS];
+ uint32_t const_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
+ uint32_t shader_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
+ uint32_t image_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
+ /* Don't use PIPE_MAX_SHADER_SAMPLER_VIEWS because it's too large. */
+ uint32_t sampler_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+
struct tc_batch batch_slots[TC_MAX_BATCHES];
+ struct tc_buffer_list buffer_lists[TC_MAX_BUFFER_LISTS];
};
void threaded_resource_init(struct pipe_resource *res);