summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2017-03-02 01:15:55 +0100
committerMarek Olšák <marek.olsak@amd.com>2017-05-15 13:01:33 +0200
commitb8e552424eed58d95671da3191c7199cf171b3f0 (patch)
tree04b3030072ef8285580f2b31162d0e45a0c92dd1 /src/gallium/auxiliary
parentdca19b1d427f0ecbc0bbd530d1fc3f6c0ce2b5c1 (diff)
gallium/util: add threaded_context as a pipe_context wrapper
v2: - rename num_calls -> num_call_slots (for tc_call) - rename num_calls -> num_total_call_slots (for tc_batch) - rename num_offloaded/direct_calls -> num_offloaded/direct_slots - declare slot[0] instead of slot[1] - remove no-op leftover code from tc_draw_vbo - use tc_set_resource_reference to fill threaded_transfer - fix map flags for sparse buffers - cosmetic changes Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/Makefile.sources3
-rw-r--r--src/gallium/auxiliary/util/u_threaded_context.c2305
-rw-r--r--src/gallium/auxiliary/util/u_threaded_context.h349
-rw-r--r--src/gallium/auxiliary/util/u_threaded_context_calls.h66
4 files changed, 2723 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index dbdb3ca8152..baebee67a60 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -301,6 +301,9 @@ C_SOURCES := \
util/u_time.h \
util/u_transfer.c \
util/u_transfer.h \
+ util/u_threaded_context.c \
+ util/u_threaded_context.h \
+ util/u_threaded_context_calls.h \
util/u_upload_mgr.c \
util/u_upload_mgr.h \
util/u_vbuf.c \
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
new file mode 100644
index 00000000000..b44430fd096
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -0,0 +1,2305 @@
+/**************************************************************************
+ *
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_threaded_context.h"
+#include "util/u_cpu_detect.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+
+/* 0 = disabled, 1 = assertions, 2 = printfs */
+#define TC_DEBUG 0
+
+#if TC_DEBUG >= 1
+#define tc_assert assert
+#else
+#define tc_assert(x)
+#endif
+
+#if TC_DEBUG >= 2
+#define tc_printf printf
+#define tc_asprintf asprintf
+#define tc_strcmp strcmp
+#else
+#define tc_printf(...)
+#define tc_asprintf(...) 0
+#define tc_strcmp(...) 0
+#endif
+
+#define TC_SENTINEL 0x5ca1ab1e
+
+enum tc_call_id {
+#define CALL(name) TC_CALL_##name,
+#include "u_threaded_context_calls.h"
+#undef CALL
+ TC_NUM_CALLS,
+};
+
+typedef void (*tc_execute)(struct pipe_context *pipe, union tc_payload *payload);
+
+static const tc_execute execute_func[TC_NUM_CALLS];
+
+static void
+tc_batch_check(struct tc_batch *batch)
+{
+ tc_assert(batch->sentinel == TC_SENTINEL);
+ tc_assert(batch->sentinel2 == TC_SENTINEL);
+ tc_assert(batch->num_total_call_slots <= TC_CALLS_PER_BATCH);
+}
+
+static void
+tc_debug_check(struct threaded_context *tc)
+{
+ for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
+ tc_batch_check(&tc->batch_slots[i]);
+ tc_assert(tc->batch_slots[i].pipe == tc->pipe);
+ }
+}
+
+static void
+tc_batch_execute(void *job, int thread_index)
+{
+ struct tc_batch *batch = job;
+ struct pipe_context *pipe = batch->pipe;
+ struct tc_call *last = &batch->call[batch->num_total_call_slots];
+
+ tc_batch_check(batch);
+
+ for (struct tc_call *iter = batch->call; iter != last;
+ iter += iter->num_call_slots) {
+ tc_assert(iter->sentinel == TC_SENTINEL);
+ execute_func[iter->call_id](pipe, &iter->payload);
+ }
+
+ tc_batch_check(batch);
+ batch->num_total_call_slots = 0;
+}
+
+static void
+tc_batch_flush(struct threaded_context *tc)
+{
+ struct tc_batch *next = &tc->batch_slots[tc->next];
+
+ tc_assert(next->num_total_call_slots != 0);
+ tc_batch_check(next);
+ tc_debug_check(tc);
+ p_atomic_add(&tc->num_offloaded_slots, next->num_total_call_slots);
+
+ util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
+ NULL);
+ tc->last = tc->next;
+ tc->next = (tc->next + 1) % TC_MAX_BATCHES;
+}
+
+/* This is the function that adds variable-sized calls into the current
+ * batch. It also flushes the batch if there is not enough space there.
+ * All other higher-level "add" functions use it.
+ */
+static union tc_payload *
+tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
+ unsigned payload_size)
+{
+ struct tc_batch *next = &tc->batch_slots[tc->next];
+ unsigned total_size = offsetof(struct tc_call, payload) + payload_size;
+ unsigned num_call_slots = DIV_ROUND_UP(total_size, sizeof(struct tc_call));
+
+ tc_debug_check(tc);
+
+ if (unlikely(next->num_total_call_slots + num_call_slots > TC_CALLS_PER_BATCH)) {
+ tc_batch_flush(tc);
+ next = &tc->batch_slots[tc->next];
+ tc_assert(next->num_total_call_slots == 0);
+ }
+
+ tc_assert(util_queue_fence_is_signalled(&next->fence));
+
+ struct tc_call *call = &next->call[next->num_total_call_slots];
+ next->num_total_call_slots += num_call_slots;
+
+ call->sentinel = TC_SENTINEL;
+ call->call_id = id;
+ call->num_call_slots = num_call_slots;
+
+ tc_debug_check(tc);
+ return &call->payload;
+}
+
+#define tc_add_struct_typed_call(tc, execute, type) \
+ ((struct type*)tc_add_sized_call(tc, execute, sizeof(struct type)))
+
+#define tc_add_slot_based_call(tc, execute, type, num_slots) \
+ ((struct type*)tc_add_sized_call(tc, execute, \
+ sizeof(struct type) + \
+ sizeof(((struct type*)NULL)->slot[0]) * \
+ num_slots))
+
+static union tc_payload *
+tc_add_small_call(struct threaded_context *tc, enum tc_call_id id)
+{
+ return tc_add_sized_call(tc, id, 0);
+}
+
+static void
+_tc_sync(struct threaded_context *tc, const char *info, const char *func)
+{
+ struct tc_batch *last = &tc->batch_slots[tc->last];
+ struct tc_batch *next = &tc->batch_slots[tc->next];
+ bool synced = false;
+
+ tc_debug_check(tc);
+
+ /* Only wait for queued calls... */
+ if (!util_queue_fence_is_signalled(&last->fence)) {
+ util_queue_fence_wait(&last->fence);
+ synced = true;
+ }
+
+ tc_debug_check(tc);
+
+ /* .. and execute unflushed calls directly. */
+ if (next->num_total_call_slots) {
+ p_atomic_add(&tc->num_direct_slots, next->num_total_call_slots);
+ tc_batch_execute(next, 0);
+ synced = true;
+ }
+
+ if (synced) {
+ p_atomic_inc(&tc->num_syncs);
+
+ if (tc_strcmp(func, "tc_destroy") != 0)
+ tc_printf("sync %s %s\n", func, info);
+ }
+
+ tc_debug_check(tc);
+}
+
+#define tc_sync(tc) _tc_sync(tc, "", __func__)
+#define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
+
+static void
+tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
+{
+ *dst = NULL;
+ pipe_resource_reference(dst, src);
+}
+
+void
+threaded_resource_init(struct pipe_resource *res)
+{
+ struct threaded_resource *tres = threaded_resource(res);
+
+ tres->latest = &tres->b;
+ util_range_init(&tres->valid_buffer_range);
+ tres->base_valid_buffer_range = &tres->valid_buffer_range;
+ tres->is_shared = false;
+ tres->is_user_ptr = false;
+}
+
+void
+threaded_resource_deinit(struct pipe_resource *res)
+{
+ struct threaded_resource *tres = threaded_resource(res);
+
+ if (tres->latest != &tres->b)
+ pipe_resource_reference(&tres->latest, NULL);
+ util_range_destroy(&tres->valid_buffer_range);
+}
+
+struct pipe_context *
+threaded_context_unwrap_sync(struct pipe_context *pipe)
+{
+ if (!pipe || !pipe->priv)
+ return pipe;
+
+ tc_sync(threaded_context(pipe));
+ return (struct pipe_context*)pipe->priv;
+}
+
+
+/********************************************************************
+ * simple functions
+ */
+
+#define TC_FUNC1(func, m_payload, qualifier, type, deref, deref2) \
+ static void \
+ tc_call_##func(struct pipe_context *pipe, union tc_payload *payload) \
+ { \
+ pipe->func(pipe, deref2((type*)payload)); \
+ } \
+ \
+ static void \
+ tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
+ { \
+ struct threaded_context *tc = threaded_context(_pipe); \
+ type *p = (type*)tc_add_sized_call(tc, TC_CALL_##func, sizeof(type)); \
+ *p = deref(param); \
+ }
+
+TC_FUNC1(set_active_query_state, flags, , boolean, , *)
+
+TC_FUNC1(set_blend_color, blend_color, const, struct pipe_blend_color, *, )
+TC_FUNC1(set_stencil_ref, stencil_ref, const, struct pipe_stencil_ref, *, )
+TC_FUNC1(set_clip_state, clip_state, const, struct pipe_clip_state, *, )
+TC_FUNC1(set_sample_mask, sample_mask, , unsigned, , *)
+TC_FUNC1(set_min_samples, min_samples, , unsigned, , *)
+TC_FUNC1(set_polygon_stipple, polygon_stipple, const, struct pipe_poly_stipple, *, )
+
+TC_FUNC1(texture_barrier, flags, , unsigned, , *)
+TC_FUNC1(memory_barrier, flags, , unsigned, , *)
+
+
+/********************************************************************
+ * queries
+ */
+
+static struct pipe_query *
+tc_create_query(struct pipe_context *_pipe, unsigned query_type,
+ unsigned index)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ return pipe->create_query(pipe, query_type, index);
+}
+
+static struct pipe_query *
+tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
+ unsigned *query_types)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ return pipe->create_batch_query(pipe, num_queries, query_types);
+}
+
+static void
+tc_call_destroy_query(struct pipe_context *pipe, union tc_payload *payload)
+{
+ pipe->destroy_query(pipe, payload->query);
+}
+
+static void
+tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct threaded_query *tq = threaded_query(query);
+
+ if (tq->head_unflushed.next)
+ LIST_DEL(&tq->head_unflushed);
+
+ tc_add_small_call(tc, TC_CALL_destroy_query)->query = query;
+}
+
+static void
+tc_call_begin_query(struct pipe_context *pipe, union tc_payload *payload)
+{
+ pipe->begin_query(pipe, payload->query);
+}
+
+static boolean
+tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ union tc_payload *payload = tc_add_small_call(tc, TC_CALL_begin_query);
+
+ payload->query = query;
+ return true; /* we don't care about the return value for this call */
+}
+
+static void
+tc_call_end_query(struct pipe_context *pipe, union tc_payload *payload)
+{
+ pipe->end_query(pipe, payload->query);
+}
+
+static bool
+tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct threaded_query *tq = threaded_query(query);
+ union tc_payload *payload = tc_add_small_call(tc, TC_CALL_end_query);
+
+ payload->query = query;
+
+ tq->flushed = false;
+ if (!tq->head_unflushed.next)
+ LIST_ADD(&tq->head_unflushed, &tc->unflushed_queries);
+
+ return true; /* we don't care about the return value for this call */
+}
+
+static boolean
+tc_get_query_result(struct pipe_context *_pipe,
+ struct pipe_query *query, boolean wait,
+ union pipe_query_result *result)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct threaded_query *tq = threaded_query(query);
+ struct pipe_context *pipe = tc->pipe;
+
+ if (!tq->flushed)
+ tc_sync_msg(tc, wait ? "wait" : "nowait");
+
+ bool success = pipe->get_query_result(pipe, query, wait, result);
+
+ if (success) {
+ tq->flushed = true;
+ if (tq->head_unflushed.next)
+ LIST_DEL(&tq->head_unflushed);
+ }
+ return success;
+}
+
+struct tc_query_result_resource {
+ struct pipe_query *query;
+ boolean wait;
+ enum pipe_query_value_type result_type;
+ int index;
+ struct pipe_resource *resource;
+ unsigned offset;
+};
+
+static void
+tc_call_get_query_result_resource(struct pipe_context *pipe,
+ union tc_payload *payload)
+{
+ struct tc_query_result_resource *p = (struct tc_query_result_resource *)payload;
+
+ pipe->get_query_result_resource(pipe, p->query, p->wait, p->result_type,
+ p->index, p->resource, p->offset);
+ pipe_resource_reference(&p->resource, NULL);
+}
+
+static void
+tc_get_query_result_resource(struct pipe_context *_pipe,
+ struct pipe_query *query, boolean wait,
+ enum pipe_query_value_type result_type, int index,
+ struct pipe_resource *resource, unsigned offset)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_query_result_resource *p =
+ tc_add_struct_typed_call(tc, TC_CALL_get_query_result_resource,
+ tc_query_result_resource);
+
+ p->query = query;
+ p->wait = wait;
+ p->result_type = result_type;
+ p->index = index;
+ tc_set_resource_reference(&p->resource, resource);
+ p->offset = offset;
+}
+
+struct tc_render_condition {
+ struct pipe_query *query;
+ bool condition;
+ unsigned mode;
+};
+
+static void
+tc_call_render_condition(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_render_condition *p = (struct tc_render_condition *)payload;
+ pipe->render_condition(pipe, p->query, p->condition, p->mode);
+}
+
+static void
+tc_render_condition(struct pipe_context *_pipe,
+ struct pipe_query *query, boolean condition,
+ uint mode)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_render_condition *p =
+ tc_add_struct_typed_call(tc, TC_CALL_render_condition, tc_render_condition);
+
+ p->query = query;
+ p->condition = condition;
+ p->mode = mode;
+}
+
+
+/********************************************************************
+ * constant (immutable) states
+ */
+
+#define TC_CSO_CREATE(name, sname) \
+ static void * \
+ tc_create_##name##_state(struct pipe_context *_pipe, \
+ const struct pipe_##sname##_state *state) \
+ { \
+ struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
+ return pipe->create_##name##_state(pipe, state); \
+ }
+
+#define TC_CSO_BIND(name) TC_FUNC1(bind_##name##_state, cso, , void *, , *)
+#define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, cso, , void *, , *)
+
+#define TC_CSO_WHOLE2(name, sname) \
+ TC_CSO_CREATE(name, sname) \
+ TC_CSO_BIND(name) \
+ TC_CSO_DELETE(name)
+
+#define TC_CSO_WHOLE(name) TC_CSO_WHOLE2(name, name)
+
+TC_CSO_WHOLE(blend)
+TC_CSO_WHOLE(rasterizer)
+TC_CSO_WHOLE(depth_stencil_alpha)
+TC_CSO_WHOLE(compute)
+TC_CSO_WHOLE2(fs, shader)
+TC_CSO_WHOLE2(vs, shader)
+TC_CSO_WHOLE2(gs, shader)
+TC_CSO_WHOLE2(tcs, shader)
+TC_CSO_WHOLE2(tes, shader)
+TC_CSO_CREATE(sampler, sampler)
+TC_CSO_DELETE(sampler)
+TC_CSO_BIND(vertex_elements)
+TC_CSO_DELETE(vertex_elements)
+
+static void *
+tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
+ const struct pipe_vertex_element *elems)
+{
+ struct pipe_context *pipe = threaded_context(_pipe)->pipe;
+
+ return pipe->create_vertex_elements_state(pipe, count, elems);
+}
+
+struct tc_sampler_states {
+ ubyte shader, start, count;
+ void *slot[0]; /* more will be allocated if needed */
+};
+
+static void
+tc_call_bind_sampler_states(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_sampler_states *p = (struct tc_sampler_states *)payload;
+ pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
+}
+
+static void
+tc_bind_sampler_states(struct pipe_context *_pipe,
+ enum pipe_shader_type shader,
+ unsigned start, unsigned count, void **states)
+{
+ if (!count)
+ return;
+
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_sampler_states *p =
+ tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
+
+ p->shader = shader;
+ p->start = start;
+ p->count = count;
+ memcpy(p->slot, states, count * sizeof(states[0]));
+}
+
+
+/********************************************************************
+ * immediate states
+ */
+
+static void
+tc_call_set_framebuffer_state(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct pipe_framebuffer_state *p = (struct pipe_framebuffer_state *)payload;
+
+ pipe->set_framebuffer_state(pipe, p);
+
+ unsigned nr_cbufs = p->nr_cbufs;
+ for (unsigned i = 0; i < nr_cbufs; i++)
+ pipe_surface_reference(&p->cbufs[i], NULL);
+ pipe_surface_reference(&p->zsbuf, NULL);
+}
+
+static void
+tc_set_framebuffer_state(struct pipe_context *_pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_framebuffer_state *p =
+ tc_add_struct_typed_call(tc, TC_CALL_set_framebuffer_state,
+ pipe_framebuffer_state);
+ unsigned nr_cbufs = fb->nr_cbufs;
+
+ p->width = fb->width;
+ p->height = fb->height;
+ p->samples = fb->samples;
+ p->layers = fb->layers;
+ p->nr_cbufs = nr_cbufs;
+
+ for (unsigned i = 0; i < nr_cbufs; i++) {
+ p->cbufs[i] = NULL;
+ pipe_surface_reference(&p->cbufs[i], fb->cbufs[i]);
+ }
+ p->zsbuf = NULL;
+ pipe_surface_reference(&p->zsbuf, fb->zsbuf);
+}
+
+static void
+tc_call_set_tess_state(struct pipe_context *pipe, union tc_payload *payload)
+{
+ float *p = (float*)payload;
+ pipe->set_tess_state(pipe, p, p + 4);
+}
+
+static void
+tc_set_tess_state(struct pipe_context *_pipe,
+ const float default_outer_level[4],
+ const float default_inner_level[2])
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ float *p = (float*)tc_add_sized_call(tc, TC_CALL_set_tess_state,
+ sizeof(float) * 6);
+
+ memcpy(p, default_outer_level, 4 * sizeof(float));
+ memcpy(p + 4, default_inner_level, 2 * sizeof(float));
+}
+
+struct tc_constant_buffer {
+ ubyte shader, index;
+ struct pipe_constant_buffer cb;
+};
+
+static void
+tc_call_set_constant_buffer(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_constant_buffer *p = (struct tc_constant_buffer *)payload;
+
+ pipe->set_constant_buffer(pipe,
+ p->shader,
+ p->index,
+ &p->cb);
+ pipe_resource_reference(&p->cb.buffer, NULL);
+}
+
+static void
+tc_set_constant_buffer(struct pipe_context *_pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *cb)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_resource *buffer = NULL;
+ unsigned offset;
+
+ /* This must be done before adding set_constant_buffer, because it could
+ * generate e.g. transfer_unmap and flush partially-uninitialized
+ * set_constant_buffer to the driver if it was done afterwards.
+ */
+ if (cb && cb->user_buffer) {
+ u_upload_data(tc->base.const_uploader, 0, cb->buffer_size, 64,
+ cb->user_buffer, &offset, &buffer);
+ }
+
+ struct tc_constant_buffer *p =
+ tc_add_struct_typed_call(tc, TC_CALL_set_constant_buffer,
+ tc_constant_buffer);
+ p->shader = shader;
+ p->index = index;
+
+ if (cb) {
+ if (cb->user_buffer) {
+ p->cb.buffer_size = cb->buffer_size;
+ p->cb.user_buffer = NULL;
+ p->cb.buffer_offset = offset;
+ p->cb.buffer = buffer;
+ } else {
+ tc_set_resource_reference(&p->cb.buffer,
+ cb->buffer);
+ memcpy(&p->cb, cb, sizeof(*cb));
+ }
+ } else {
+ memset(&p->cb, 0, sizeof(*cb));
+ }
+}
+
+struct tc_scissors {
+ ubyte start, count;
+ struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
+};
+
+static void
+tc_call_set_scissor_states(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_scissors *p = (struct tc_scissors *)payload;
+ pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
+}
+
+static void
+tc_set_scissor_states(struct pipe_context *_pipe,
+ unsigned start, unsigned count,
+ const struct pipe_scissor_state *states)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_scissors *p =
+ tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
+
+ p->start = start;
+ p->count = count;
+ memcpy(&p->slot, states, count * sizeof(states[0]));
+}
+
+struct tc_viewports {
+ ubyte start, count;
+ struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
+};
+
+static void
+tc_call_set_viewport_states(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_viewports *p = (struct tc_viewports *)payload;
+ pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
+}
+
+static void
+tc_set_viewport_states(struct pipe_context *_pipe,
+ unsigned start, unsigned count,
+ const struct pipe_viewport_state *states)
+{
+ if (!count)
+ return;
+
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_viewports *p =
+ tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
+
+ p->start = start;
+ p->count = count;
+ memcpy(&p->slot, states, count * sizeof(states[0]));
+}
+
+struct tc_window_rects {
+ bool include;
+ ubyte count;
+ struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
+};
+
+static void
+tc_call_set_window_rectangles(struct pipe_context *pipe,
+ union tc_payload *payload)
+{
+ struct tc_window_rects *p = (struct tc_window_rects *)payload;
+ pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
+}
+
+static void
+tc_set_window_rectangles(struct pipe_context *_pipe, boolean include,
+ unsigned count,
+ const struct pipe_scissor_state *rects)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_window_rects *p =
+ tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
+
+ p->include = include;
+ p->count = count;
+ memcpy(p->slot, rects, count * sizeof(rects[0]));
+}
+
+struct tc_sampler_views {
+ ubyte shader, start, count;
+ struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
+};
+
+static void
+tc_call_set_sampler_views(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_sampler_views *p = (struct tc_sampler_views *)payload;
+ unsigned count = p->count;
+
+ pipe->set_sampler_views(pipe, p->shader, p->start, p->count, p->slot);
+ for (unsigned i = 0; i < count; i++)
+ pipe_sampler_view_reference(&p->slot[i], NULL);
+}
+
+static void
+tc_set_sampler_views(struct pipe_context *_pipe,
+ enum pipe_shader_type shader,
+ unsigned start, unsigned count,
+ struct pipe_sampler_view **views)
+{
+ if (!count)
+ return;
+
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_sampler_views *p =
+ tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views, count);
+
+ p->shader = shader;
+ p->start = start;
+ p->count = count;
+
+ if (views) {
+ for (unsigned i = 0; i < count; i++) {
+ p->slot[i] = NULL;
+ pipe_sampler_view_reference(&p->slot[i], views[i]);
+ }
+ } else {
+ memset(p->slot, 0, count * sizeof(views[0]));
+ }
+}
+
+struct tc_shader_images {
+ ubyte shader, start, count;
+ struct pipe_image_view slot[0]; /* more will be allocated if needed */
+};
+
+static void
+tc_call_set_shader_images(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_shader_images *p = (struct tc_shader_images *)payload;
+ unsigned count = p->count;
+
+ pipe->set_shader_images(pipe, p->shader, p->start, p->count, p->slot);
+
+ for (unsigned i = 0; i < count; i++)
+ pipe_resource_reference(&p->slot[i].resource, NULL);
+}
+
+static void
+tc_set_shader_images(struct pipe_context *_pipe,
+ enum pipe_shader_type shader,
+ unsigned start, unsigned count,
+ const struct pipe_image_view *images)
+{
+ if (!count)
+ return;
+
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_shader_images *p =
+ tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images, count);
+
+ p->shader = shader;
+ p->start = start;
+ p->count = count;
+
+ if (images) {
+ for (unsigned i = 0; i < count; i++) {
+ tc_set_resource_reference(&p->slot[i].resource, images[i].resource);
+
+ if (images[i].access & PIPE_IMAGE_ACCESS_WRITE &&
+ images[i].resource &&
+ images[i].resource->target == PIPE_BUFFER) {
+ struct threaded_resource *tres =
+ threaded_resource(images[i].resource);
+
+ util_range_add(&tres->valid_buffer_range, images[i].u.buf.offset,
+ images[i].u.buf.offset + images[i].u.buf.size);
+ }
+ }
+ memcpy(p->slot, images, count * sizeof(images[0]));
+ } else {
+ memset(p->slot, 0, count * sizeof(images[0]));
+ }
+}
+
+struct tc_shader_buffers {
+ ubyte shader, start, count;
+ struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
+};
+
+static void
+tc_call_set_shader_buffers(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_shader_buffers *p = (struct tc_shader_buffers *)payload;
+ unsigned count = p->count;
+
+ pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot);
+
+ for (unsigned i = 0; i < count; i++)
+ pipe_resource_reference(&p->slot[i].buffer, NULL);
+}
+
+static void
+tc_set_shader_buffers(struct pipe_context *_pipe, unsigned shader,
+ unsigned start, unsigned count,
+ const struct pipe_shader_buffer *buffers)
+{
+ if (!count)
+ return;
+
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_shader_buffers *p =
+ tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers, count);
+
+ p->shader = shader;
+ p->start = start;
+ p->count = count;
+
+ if (buffers) {
+ for (unsigned i = 0; i < count; i++) {
+ struct pipe_shader_buffer *dst = &p->slot[i];
+ const struct pipe_shader_buffer *src = buffers + i;
+
+ tc_set_resource_reference(&dst->buffer, src->buffer);
+ dst->buffer_offset = src->buffer_offset;
+ dst->buffer_size = src->buffer_size;
+
+ if (src->buffer) {
+ struct threaded_resource *tres = threaded_resource(src->buffer);
+
+ util_range_add(&tres->valid_buffer_range, src->buffer_offset,
+ src->buffer_offset + src->buffer_size);
+ }
+ }
+ } else {
+ memset(p->slot, 0, count * sizeof(buffers[0]));
+ }
+}
+
+struct tc_vertex_buffers {
+ ubyte start, count;
+ bool unbind;
+ struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */
+};
+
+static void
+tc_call_set_vertex_buffers(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)payload;
+ unsigned count = p->count;
+
+ if (p->unbind) {
+ pipe->set_vertex_buffers(pipe, p->start, count, NULL);
+ return;
+ }
+
+ for (unsigned i = 0; i < count; i++)
+ tc_assert(!p->slot[i].is_user_buffer);
+
+ pipe->set_vertex_buffers(pipe, p->start, count, p->slot);
+ for (unsigned i = 0; i < count; i++)
+ pipe_resource_reference(&p->slot[i].buffer.resource, NULL);
+}
+
+static void
+tc_set_vertex_buffers(struct pipe_context *_pipe,
+ unsigned start, unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+
+ if (!count)
+ return;
+
+ if (buffers) {
+ struct tc_vertex_buffers *p =
+ tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
+ p->start = start;
+ p->count = count;
+ p->unbind = false;
+
+ for (unsigned i = 0; i < count; i++) {
+ struct pipe_vertex_buffer *dst = &p->slot[i];
+ const struct pipe_vertex_buffer *src = buffers + i;
+
+ tc_assert(!src->is_user_buffer);
+ dst->stride = src->stride;
+ dst->is_user_buffer = false;
+ tc_set_resource_reference(&dst->buffer.resource,
+ src->buffer.resource);
+ dst->buffer_offset = src->buffer_offset;
+ }
+ } else {
+ struct tc_vertex_buffers *p =
+ tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
+ p->start = start;
+ p->count = count;
+ p->unbind = true;
+ }
+}
+
+struct tc_stream_outputs {
+ unsigned count;
+ struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+ unsigned offsets[PIPE_MAX_SO_BUFFERS];
+};
+
+static void
+tc_call_set_stream_output_targets(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_stream_outputs *p = (struct tc_stream_outputs *)payload;
+ unsigned count = p->count;
+
+ pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
+ for (unsigned i = 0; i < count; i++)
+ pipe_so_target_reference(&p->targets[i], NULL);
+}
+
+static void
+tc_set_stream_output_targets(struct pipe_context *_pipe,
+ unsigned count,
+ struct pipe_stream_output_target **tgs,
+ const unsigned *offsets)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_stream_outputs *p =
+ tc_add_struct_typed_call(tc, TC_CALL_set_stream_output_targets,
+ tc_stream_outputs);
+
+ for (unsigned i = 0; i < count; i++) {
+ p->targets[i] = NULL;
+ pipe_so_target_reference(&p->targets[i], tgs[i]);
+ }
+ p->count = count;
+ memcpy(p->offsets, offsets, count * sizeof(unsigned));
+}
+
+static void
+tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
+ unsigned count, struct pipe_surface **resources)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->set_compute_resources(pipe, start, count, resources);
+}
+
+static void
+tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
+ unsigned count, struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->set_global_binding(pipe, first, count, resources, handles);
+}
+
+
+/********************************************************************
+ * views
+ */
+
+static struct pipe_surface *
+tc_create_surface(struct pipe_context *_pipe,
+ struct pipe_resource *resource,
+ const struct pipe_surface *surf_tmpl)
+{
+ struct pipe_context *pipe = threaded_context(_pipe)->pipe;
+ struct pipe_surface *view =
+ pipe->create_surface(pipe, resource, surf_tmpl);
+
+ if (view)
+ view->context = _pipe;
+ return view;
+}
+
+static void
+tc_surface_destroy(struct pipe_context *_pipe,
+ struct pipe_surface *surf)
+{
+ struct pipe_context *pipe = threaded_context(_pipe)->pipe;
+
+ pipe->surface_destroy(pipe, surf);
+}
+
+static struct pipe_sampler_view *
+tc_create_sampler_view(struct pipe_context *_pipe,
+ struct pipe_resource *resource,
+ const struct pipe_sampler_view *templ)
+{
+ struct pipe_context *pipe = threaded_context(_pipe)->pipe;
+ struct pipe_sampler_view *view =
+ pipe->create_sampler_view(pipe, resource, templ);
+
+ if (view)
+ view->context = _pipe;
+ return view;
+}
+
+static void
+tc_sampler_view_destroy(struct pipe_context *_pipe,
+ struct pipe_sampler_view *view)
+{
+ struct pipe_context *pipe = threaded_context(_pipe)->pipe;
+
+ pipe->sampler_view_destroy(pipe, view);
+}
+
+static struct pipe_stream_output_target *
+tc_create_stream_output_target(struct pipe_context *_pipe,
+ struct pipe_resource *res,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct pipe_context *pipe = threaded_context(_pipe)->pipe;
+ struct threaded_resource *tres = threaded_resource(res);
+ struct pipe_stream_output_target *view;
+
+ tc_sync(threaded_context(_pipe));
+ util_range_add(&tres->valid_buffer_range, buffer_offset,
+ buffer_offset + buffer_size);
+
+ view = pipe->create_stream_output_target(pipe, res, buffer_offset,
+ buffer_size);
+ if (view)
+ view->context = _pipe;
+ return view;
+}
+
+static void
+tc_stream_output_target_destroy(struct pipe_context *_pipe,
+ struct pipe_stream_output_target *target)
+{
+ struct pipe_context *pipe = threaded_context(_pipe)->pipe;
+
+ pipe->stream_output_target_destroy(pipe, target);
+}
+
+
+/********************************************************************
+ * transfer
+ */
+
+struct tc_replace_buffer_storage {
+ struct pipe_resource *dst;
+ struct pipe_resource *src;
+ tc_replace_buffer_storage_func func;
+};
+
+static void
+tc_call_replace_buffer_storage(struct pipe_context *pipe,
+ union tc_payload *payload)
+{
+ struct tc_replace_buffer_storage *p =
+ (struct tc_replace_buffer_storage *)payload;
+
+ p->func(pipe, p->dst, p->src);
+ pipe_resource_reference(&p->dst, NULL);
+ pipe_resource_reference(&p->src, NULL);
+}
+
+static bool
+tc_invalidate_buffer(struct threaded_context *tc,
+ struct threaded_resource *tbuf)
+{
+ /* We can't check if the buffer is idle, so we invalidate it
+ * unconditionally. */
+ struct pipe_screen *screen = tc->base.screen;
+ struct pipe_resource *new_buf;
+
+ /* Shared, pinned, and sparse buffers can't be reallocated. */
+ if (tbuf->is_shared ||
+ tbuf->is_user_ptr ||
+ tbuf->b.flags & PIPE_RESOURCE_FLAG_SPARSE)
+ return false;
+
+ /* Allocate a new one. */
+ new_buf = screen->resource_create(screen, &tbuf->b);
+ if (!new_buf)
+ return false;
+
+ /* Replace the "latest" pointer. */
+ if (tbuf->latest != &tbuf->b)
+ pipe_resource_reference(&tbuf->latest, NULL);
+
+ tbuf->latest = new_buf;
+ util_range_set_empty(&tbuf->valid_buffer_range);
+
+ /* The valid range should point to the original buffer. */
+ threaded_resource(new_buf)->base_valid_buffer_range =
+ &tbuf->valid_buffer_range;
+
+ /* Enqueue storage replacement of the original buffer. */
+ struct tc_replace_buffer_storage *p =
+ tc_add_struct_typed_call(tc, TC_CALL_replace_buffer_storage,
+ tc_replace_buffer_storage);
+
+ p->func = tc->replace_buffer_storage;
+ tc_set_resource_reference(&p->dst, &tbuf->b);
+ tc_set_resource_reference(&p->src, new_buf);
+ return true;
+}
+
+static unsigned
+tc_improve_map_buffer_flags(struct threaded_context *tc,
+ struct threaded_resource *tres, unsigned usage,
+ unsigned offset, unsigned size)
+{
+ /* Sparse buffers can't be mapped directly and can't be reallocated
+ * (fully invalidated). That may just be a radeonsi limitation, but
+ * the threaded context must obey it with radeonsi.
+ */
+ if (tres->b.flags & PIPE_RESOURCE_FLAG_SPARSE) {
+ /* We can use DISCARD_RANGE instead of full discard. This is the only
+ * fast path for sparse buffers that doesn't need thread synchronization.
+ */
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
+ usage |= PIPE_TRANSFER_DISCARD_RANGE;
+
+ /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
+ * The threaded context doesn't do unsychronized mappings and invalida-
+ * tions of sparse buffers, therefore a correct driver behavior won't
+ * result in an incorrect behavior with the threaded context.
+ */
+ return usage;
+ }
+
+ /* Handle CPU reads trivially. */
+ if (usage & PIPE_TRANSFER_READ) {
+ /* Driver aren't allowed to do buffer invalidations. */
+ return (usage & ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) |
+ TC_TRANSFER_MAP_NO_INVALIDATE |
+ TC_TRANSFER_MAP_IGNORE_VALID_RANGE;
+ }
+
+ /* See if the buffer range being mapped has never been initialized,
+ * in which case it can be mapped unsynchronized. */
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+ !tres->is_shared &&
+ !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size))
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ /* If discarding the entire range, discard the whole resource instead. */
+ if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
+ offset == 0 && size == tres->b.width0)
+ usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+
+ /* Discard the whole resource if needed. */
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+ if (tc_invalidate_buffer(tc, tres))
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ else
+ usage |= PIPE_TRANSFER_DISCARD_RANGE; /* fallback */
+ }
+ }
+
+ /* We won't need this flag anymore. */
+ /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
+ usage &= ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+
+ /* GL_AMD_pinned_memory and persistent mappings can't use staging
+ * buffers. */
+ if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_PERSISTENT) ||
+ tres->is_user_ptr)
+ usage &= ~PIPE_TRANSFER_DISCARD_RANGE;
+
+ /* Unsychronized buffer mappings don't have to synchronize the thread. */
+ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+ usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
+
+ /* Never invalidate inside the driver and never infer "unsynchronized". */
+ return usage |
+ TC_TRANSFER_MAP_NO_INVALIDATE |
+ TC_TRANSFER_MAP_IGNORE_VALID_RANGE;
+}
+
+static void *
+tc_transfer_map(struct pipe_context *_pipe,
+ struct pipe_resource *resource, unsigned level,
+ unsigned usage, const struct pipe_box *box,
+ struct pipe_transfer **transfer)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct threaded_resource *tres = threaded_resource(resource);
+ struct pipe_context *pipe = tc->pipe;
+
+ if (resource->target == PIPE_BUFFER) {
+ usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
+
+ /* Do a staging transfer within the threaded context. The driver should
+ * only get resource_copy_region.
+ */
+ if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
+ struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers);
+ uint8_t *map;
+
+ ttrans->staging = NULL;
+
+ u_upload_alloc(tc->base.stream_uploader, 0,
+ box->width + (box->x % tc->map_buffer_alignment),
+ 64, &ttrans->offset, &ttrans->staging, (void**)&map);
+ if (!map) {
+ slab_free(&tc->pool_transfers, ttrans);
+ return NULL;
+ }
+
+ tc_set_resource_reference(&ttrans->b.resource, resource);
+ ttrans->b.level = 0;
+ ttrans->b.usage = usage;
+ ttrans->b.box = *box;
+ ttrans->b.stride = 0;
+ ttrans->b.layer_stride = 0;
+ *transfer = &ttrans->b;
+ return map + (box->x % tc->map_buffer_alignment);
+ }
+ }
+
+ /* Unsychronized buffer mappings don't have to synchronize the thread. */
+ if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
+ tc_sync_msg(tc, resource->target != PIPE_BUFFER ? " texture" :
+ usage & PIPE_TRANSFER_DISCARD_RANGE ? " discard_range" :
+ usage & PIPE_TRANSFER_READ ? " read" : " ??");
+
+ return pipe->transfer_map(pipe, tres->latest ? tres->latest : resource,
+ level, usage, box, transfer);
+}
+
+struct tc_transfer_flush_region {
+ struct pipe_transfer *transfer;
+ struct pipe_box box;
+};
+
+static void
+tc_call_transfer_flush_region(struct pipe_context *pipe,
+ union tc_payload *payload)
+{
+ struct tc_transfer_flush_region *p =
+ (struct tc_transfer_flush_region *)payload;
+
+ pipe->transfer_flush_region(pipe, p->transfer, &p->box);
+}
+
+struct tc_resource_copy_region {
+ struct pipe_resource *dst;
+ unsigned dst_level;
+ unsigned dstx, dsty, dstz;
+ struct pipe_resource *src;
+ unsigned src_level;
+ struct pipe_box src_box;
+};
+
+static void
+tc_resource_copy_region(struct pipe_context *_pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box);
+
+static void
+tc_buffer_do_flush_region(struct threaded_context *tc,
+ struct threaded_transfer *ttrans,
+ const struct pipe_box *box)
+{
+ struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
+
+ if (ttrans->staging) {
+ struct pipe_box src_box;
+
+ u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment,
+ box->width, &src_box);
+
+ /* Copy the staging buffer into the original one. */
+ tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
+ ttrans->staging, 0, &src_box);
+ }
+
+ util_range_add(tres->base_valid_buffer_range, box->x, box->x + box->width);
+}
+
+static void
+tc_transfer_flush_region(struct pipe_context *_pipe,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *rel_box)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct threaded_transfer *ttrans = threaded_transfer(transfer);
+ struct threaded_resource *tres = threaded_resource(transfer->resource);
+ unsigned required_usage = PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_FLUSH_EXPLICIT;
+
+ if (tres->b.target == PIPE_BUFFER) {
+ if ((transfer->usage & required_usage) == required_usage) {
+ struct pipe_box box;
+
+ u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
+ tc_buffer_do_flush_region(tc, ttrans, &box);
+ }
+
+ /* Staging transfers don't send the call to the driver. */
+ if (ttrans->staging)
+ return;
+ }
+
+ struct tc_transfer_flush_region *p =
+ tc_add_struct_typed_call(tc, TC_CALL_transfer_flush_region,
+ tc_transfer_flush_region);
+ p->transfer = transfer;
+ p->box = *rel_box;
+}
+
+static void
+tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload)
+{
+ pipe->transfer_unmap(pipe, payload->transfer);
+}
+
+static void
+tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct threaded_transfer *ttrans = threaded_transfer(transfer);
+ struct threaded_resource *tres = threaded_resource(transfer->resource);
+
+ if (tres->b.target == PIPE_BUFFER) {
+ if (transfer->usage & PIPE_TRANSFER_WRITE &&
+ !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+ tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
+
+ /* Staging transfers don't send the call to the driver. */
+ if (ttrans->staging) {
+ pipe_resource_reference(&ttrans->staging, NULL);
+ pipe_resource_reference(&ttrans->b.resource, NULL);
+ slab_free(&tc->pool_transfers, ttrans);
+ return;
+ }
+ }
+
+ tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer;
+}
+
+struct tc_buffer_subdata {
+ struct pipe_resource *resource;
+ unsigned usage, offset, size;
+ char slot[0]; /* more will be allocated if needed */
+};
+
+static void
+tc_call_buffer_subdata(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)payload;
+
+ pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
+ p->slot);
+ pipe_resource_reference(&p->resource, NULL);
+}
+
+static void
+tc_buffer_subdata(struct pipe_context *_pipe,
+ struct pipe_resource *resource,
+ unsigned usage, unsigned offset,
+ unsigned size, const void *data)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct threaded_resource *tres = threaded_resource(resource);
+
+ if (!size)
+ return;
+
+ usage |= PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_DISCARD_RANGE;
+
+ usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
+
+ /* Unsychronized and big transfers should use transfer_map. Also handle
+ * full invalidations, because drivers aren't allowed to do them.
+ */
+ if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) ||
+ size > TC_MAX_SUBDATA_BYTES) {
+ struct pipe_transfer *transfer;
+ struct pipe_box box;
+ uint8_t *map = NULL;
+
+ u_box_1d(offset, size, &box);
+
+ map = tc_transfer_map(_pipe, resource, 0, usage, &box, &transfer);
+ if (map) {
+ memcpy(map, data, size);
+ tc_transfer_unmap(_pipe, transfer);
+ }
+ return;
+ }
+
+ util_range_add(&tres->valid_buffer_range, offset, offset + size);
+
+ /* The upload is small. Enqueue it. */
+ struct tc_buffer_subdata *p =
+ tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
+
+ tc_set_resource_reference(&p->resource, resource);
+ p->usage = usage;
+ p->offset = offset;
+ p->size = size;
+ memcpy(p->slot, data, size);
+}
+
+struct tc_texture_subdata {
+ struct pipe_resource *resource;
+ unsigned level, usage, stride, layer_stride;
+ struct pipe_box box;
+ char slot[0]; /* more will be allocated if needed */
+};
+
+static void
+tc_call_texture_subdata(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_texture_subdata *p = (struct tc_texture_subdata *)payload;
+
+ pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
+ p->slot, p->stride, p->layer_stride);
+ pipe_resource_reference(&p->resource, NULL);
+}
+
+static void
+tc_texture_subdata(struct pipe_context *_pipe,
+ struct pipe_resource *resource,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ const void *data, unsigned stride,
+ unsigned layer_stride)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ unsigned size;
+
+ assert(box->height >= 1);
+ assert(box->depth >= 1);
+
+ size = (box->depth - 1) * layer_stride +
+ (box->height - 1) * stride +
+ box->width * util_format_get_blocksize(resource->format);
+ if (!size)
+ return;
+
+ /* Small uploads can be enqueued, big uploads must sync. */
+ if (size <= TC_MAX_SUBDATA_BYTES) {
+ struct tc_texture_subdata *p =
+ tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
+
+ tc_set_resource_reference(&p->resource, resource);
+ p->level = level;
+ p->usage = usage;
+ p->box = *box;
+ p->stride = stride;
+ p->layer_stride = layer_stride;
+ memcpy(p->slot, data, size);
+ } else {
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->texture_subdata(pipe, resource, level, usage, box, data,
+ stride, layer_stride);
+ }
+}
+
+
+/********************************************************************
+ * miscellaneous
+ */
+
+#define TC_FUNC_SYNC_RET0(ret_type, func) \
+ static ret_type \
+ tc_##func(struct pipe_context *_pipe) \
+ { \
+ struct threaded_context *tc = threaded_context(_pipe); \
+ struct pipe_context *pipe = tc->pipe; \
+ tc_sync(tc); \
+ return pipe->func(pipe); \
+ }
+
+TC_FUNC_SYNC_RET0(enum pipe_reset_status, get_device_reset_status)
+TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
+
+static void
+tc_get_sample_position(struct pipe_context *_pipe,
+ unsigned sample_count, unsigned sample_index,
+ float *out_value)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->get_sample_position(pipe, sample_count, sample_index,
+ out_value);
+}
+
+static void
+tc_set_device_reset_callback(struct pipe_context *_pipe,
+ const struct pipe_device_reset_callback *cb)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->set_device_reset_callback(pipe, cb);
+}
+
+struct tc_string_marker {
+ int len;
+ char slot[0]; /* more will be allocated if needed */
+};
+
+static void
+tc_call_emit_string_marker(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_string_marker *p = (struct tc_string_marker *)payload;
+ pipe->emit_string_marker(pipe, p->slot, p->len);
+}
+
+static void
+tc_emit_string_marker(struct pipe_context *_pipe,
+ const char *string, int len)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+
+ if (len <= TC_MAX_STRING_MARKER_BYTES) {
+ struct tc_string_marker *p =
+ tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
+
+ memcpy(p->slot, string, len);
+ p->len = len;
+ } else {
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->emit_string_marker(pipe, string, len);
+ }
+}
+
+static void
+tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
+ unsigned flags)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->dump_debug_state(pipe, stream, flags);
+}
+
+static void
+tc_set_debug_callback(struct pipe_context *_pipe,
+ const struct pipe_debug_callback *cb)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->set_debug_callback(pipe, cb);
+}
+
+static void
+tc_create_fence_fd(struct pipe_context *_pipe,
+ struct pipe_fence_handle **fence, int fd)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->create_fence_fd(pipe, fence, fd);
+}
+
+static void
+tc_fence_server_sync(struct pipe_context *_pipe,
+ struct pipe_fence_handle *fence)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->fence_server_sync(pipe, fence);
+}
+
+static struct pipe_video_codec *
+tc_create_video_codec(struct pipe_context *_pipe,
+ const struct pipe_video_codec *templ)
+{
+ unreachable("Threaded context should not be enabled for video APIs");
+ return NULL;
+}
+
+static struct pipe_video_buffer *
+tc_create_video_buffer(struct pipe_context *_pipe,
+ const struct pipe_video_buffer *templ)
+{
+ unreachable("Threaded context should not be enabled for video APIs");
+ return NULL;
+}
+
+
+/********************************************************************
+ * draw, launch, clear, blit, copy, flush
+ */
+
+static void
+tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+ struct threaded_query *tq, *tmp;
+
+ LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
+ tq->flushed = true;
+ LIST_DEL(&tq->head_unflushed);
+ }
+
+ /* TODO: deferred flushes? */
+ tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
+ flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
+ pipe->flush(pipe, fence, flags);
+}
+
+/* This is actually variable-sized, because indirect isn't allocated if it's
+ * not needed. */
+struct tc_full_draw_info {
+ struct pipe_draw_info draw;
+ struct pipe_draw_indirect_info indirect;
+};
+
+static void
+tc_call_draw_vbo(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_full_draw_info *info = (struct tc_full_draw_info*)payload;
+
+ pipe->draw_vbo(pipe, &info->draw);
+ pipe_so_target_reference(&info->draw.count_from_stream_output, NULL);
+ if (info->draw.index_size)
+ pipe_resource_reference(&info->draw.index.resource, NULL);
+ if (info->draw.indirect) {
+ pipe_resource_reference(&info->indirect.buffer, NULL);
+ pipe_resource_reference(&info->indirect.indirect_draw_count, NULL);
+ }
+}
+
+static struct tc_full_draw_info *
+tc_add_draw_vbo(struct pipe_context *_pipe, bool indirect)
+{
+ return (struct tc_full_draw_info*)
+ tc_add_sized_call(threaded_context(_pipe), TC_CALL_draw_vbo,
+ indirect ? sizeof(struct tc_full_draw_info) :
+ sizeof(struct pipe_draw_info));
+}
+
+static void
+tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_draw_indirect_info *indirect = info->indirect;
+ unsigned index_size = info->index_size;
+ bool has_user_indices = info->has_user_indices;
+
+ if (index_size && has_user_indices) {
+ unsigned size = info->count * index_size;
+ struct pipe_resource *buffer = NULL;
+ unsigned offset;
+
+ tc_assert(!indirect);
+
+ /* This must be done before adding draw_vbo, because it could generate
+ * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
+ * to the driver if it was done afterwards.
+ */
+ u_upload_data(tc->base.stream_uploader, 0, size, 4, info->index.user,
+ &offset, &buffer);
+ if (unlikely(!buffer))
+ return;
+
+ struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, false);
+ p->draw.count_from_stream_output = NULL;
+ pipe_so_target_reference(&p->draw.count_from_stream_output,
+ info->count_from_stream_output);
+ memcpy(&p->draw, info, sizeof(*info));
+ p->draw.has_user_indices = false;
+ p->draw.index.resource = buffer;
+ p->draw.start = offset / index_size;
+ } else {
+ /* Non-indexed call or indexed with a real index buffer. */
+ struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, indirect != NULL);
+ p->draw.count_from_stream_output = NULL;
+ pipe_so_target_reference(&p->draw.count_from_stream_output,
+ info->count_from_stream_output);
+ if (index_size) {
+ tc_set_resource_reference(&p->draw.index.resource,
+ info->index.resource);
+ }
+ memcpy(&p->draw, info, sizeof(*info));
+
+ if (indirect) {
+ tc_set_resource_reference(&p->draw.indirect->buffer, indirect->buffer);
+ tc_set_resource_reference(&p->indirect.indirect_draw_count,
+ indirect->indirect_draw_count);
+ memcpy(&p->indirect, indirect, sizeof(*indirect));
+ p->draw.indirect = &p->indirect;
+ }
+ }
+}
+
+static void
+tc_call_launch_grid(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct pipe_grid_info *p = (struct pipe_grid_info *)payload;
+
+ pipe->launch_grid(pipe, p);
+ pipe_resource_reference(&p->indirect, NULL);
+}
+
+static void
+tc_launch_grid(struct pipe_context *_pipe,
+ const struct pipe_grid_info *info)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_grid_info *p = tc_add_struct_typed_call(tc, TC_CALL_launch_grid,
+ pipe_grid_info);
+ assert(info->input == NULL);
+
+ tc_set_resource_reference(&p->indirect, info->indirect);
+ memcpy(p, info, sizeof(*info));
+}
+
+static void
+tc_call_resource_copy_region(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_resource_copy_region *p = (struct tc_resource_copy_region *)payload;
+
+ pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
+ p->dstz, p->src, p->src_level, &p->src_box);
+ pipe_resource_reference(&p->dst, NULL);
+ pipe_resource_reference(&p->src, NULL);
+}
+
+static void
+tc_resource_copy_region(struct pipe_context *_pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct threaded_resource *tdst = threaded_resource(dst);
+ struct tc_resource_copy_region *p =
+ tc_add_struct_typed_call(tc, TC_CALL_resource_copy_region,
+ tc_resource_copy_region);
+
+ tc_set_resource_reference(&p->dst, dst);
+ p->dst_level = dst_level;
+ p->dstx = dstx;
+ p->dsty = dsty;
+ p->dstz = dstz;
+ tc_set_resource_reference(&p->src, src);
+ p->src_level = src_level;
+ p->src_box = *src_box;
+
+ if (dst->target == PIPE_BUFFER)
+ util_range_add(&tdst->valid_buffer_range, dstx, dstx + src_box->width);
+}
+
+static void
+tc_call_blit(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct pipe_blit_info *blit = (struct pipe_blit_info*)payload;
+
+ pipe->blit(pipe, blit);
+ pipe_resource_reference(&blit->dst.resource, NULL);
+ pipe_resource_reference(&blit->src.resource, NULL);
+}
+
+static void
+tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_blit_info *blit =
+ tc_add_struct_typed_call(tc, TC_CALL_blit, pipe_blit_info);
+
+ tc_set_resource_reference(&blit->dst.resource, info->dst.resource);
+ tc_set_resource_reference(&blit->src.resource, info->src.resource);
+ memcpy(blit, info, sizeof(*info));
+}
+
+struct tc_generate_mipmap {
+ struct pipe_resource *res;
+ enum pipe_format format;
+ unsigned base_level;
+ unsigned last_level;
+ unsigned first_layer;
+ unsigned last_layer;
+};
+
+static void
+tc_call_generate_mipmap(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_generate_mipmap *p = (struct tc_generate_mipmap *)payload;
+ bool result = pipe->generate_mipmap(pipe, p->res, p->format, p->base_level,
+ p->last_level, p->first_layer,
+ p->last_layer);
+ assert(result);
+ pipe_resource_reference(&p->res, NULL);
+}
+
+static boolean
+tc_generate_mipmap(struct pipe_context *_pipe,
+ struct pipe_resource *res,
+ enum pipe_format format,
+ unsigned base_level,
+ unsigned last_level,
+ unsigned first_layer,
+ unsigned last_layer)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ unsigned bind = PIPE_BIND_SAMPLER_VIEW;
+
+ if (util_format_is_depth_or_stencil(format))
+ bind = PIPE_BIND_DEPTH_STENCIL;
+ else
+ bind = PIPE_BIND_RENDER_TARGET;
+
+ if (!screen->is_format_supported(screen, format, res->target,
+ res->nr_samples, bind))
+ return false;
+
+ struct tc_generate_mipmap *p =
+ tc_add_struct_typed_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
+
+ tc_set_resource_reference(&p->res, res);
+ p->format = format;
+ p->base_level = base_level;
+ p->last_level = last_level;
+ p->first_layer = first_layer;
+ p->last_layer = last_layer;
+ return true;
+}
+
+static void
+tc_call_flush_resource(struct pipe_context *pipe, union tc_payload *payload)
+{
+ pipe->flush_resource(pipe, payload->resource);
+ pipe_resource_reference(&payload->resource, NULL);
+}
+
+static void
+tc_flush_resource(struct pipe_context *_pipe,
+ struct pipe_resource *resource)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ union tc_payload *payload = tc_add_small_call(tc, TC_CALL_flush_resource);
+
+ tc_set_resource_reference(&payload->resource, resource);
+}
+
+static void
+tc_call_invalidate_resource(struct pipe_context *pipe, union tc_payload *payload)
+{
+ pipe->invalidate_resource(pipe, payload->resource);
+ pipe_resource_reference(&payload->resource, NULL);
+}
+
+static void
+tc_invalidate_resource(struct pipe_context *_pipe,
+ struct pipe_resource *resource)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+
+ if (resource->target == PIPE_BUFFER) {
+ tc_invalidate_buffer(tc, threaded_resource(resource));
+ return;
+ }
+
+ union tc_payload *payload = tc_add_small_call(tc, TC_CALL_invalidate_resource);
+ tc_set_resource_reference(&payload->resource, resource);
+}
+
+struct tc_clear {
+ unsigned buffers;
+ union pipe_color_union color;
+ double depth;
+ unsigned stencil;
+};
+
+static void
+tc_call_clear(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_clear *p = (struct tc_clear *)payload;
+ pipe->clear(pipe, p->buffers, &p->color, p->depth, p->stencil);
+}
+
+static void
+tc_clear(struct pipe_context *_pipe, unsigned buffers,
+ const union pipe_color_union *color, double depth,
+ unsigned stencil)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_clear *p = tc_add_struct_typed_call(tc, TC_CALL_clear, tc_clear);
+
+ p->buffers = buffers;
+ p->color = *color;
+ p->depth = depth;
+ p->stencil = stencil;
+}
+
+static void
+tc_clear_render_target(struct pipe_context *_pipe,
+ struct pipe_surface *dst,
+ const union pipe_color_union *color,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height,
+ bool render_condition_enabled)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->clear_render_target(pipe, dst, color, dstx, dsty, width, height,
+ render_condition_enabled);
+}
+
+static void
+tc_clear_depth_stencil(struct pipe_context *_pipe,
+ struct pipe_surface *dst, unsigned clear_flags,
+ double depth, unsigned stencil, unsigned dstx,
+ unsigned dsty, unsigned width, unsigned height,
+ bool render_condition_enabled)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+ pipe->clear_depth_stencil(pipe, dst, clear_flags, depth, stencil,
+ dstx, dsty, width, height,
+ render_condition_enabled);
+}
+
+struct tc_clear_buffer {
+ struct pipe_resource *res;
+ unsigned offset;
+ unsigned size;
+ char clear_value[16];
+ int clear_value_size;
+};
+
+static void
+tc_call_clear_buffer(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_clear_buffer *p = (struct tc_clear_buffer *)payload;
+
+ pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
+ p->clear_value_size);
+ pipe_resource_reference(&p->res, NULL);
+}
+
+static void
+tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *clear_value, int clear_value_size)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct threaded_resource *tres = threaded_resource(res);
+ struct tc_clear_buffer *p =
+ tc_add_struct_typed_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
+
+ tc_set_resource_reference(&p->res, res);
+ p->offset = offset;
+ p->size = size;
+ memcpy(p->clear_value, clear_value, clear_value_size);
+ p->clear_value_size = clear_value_size;
+
+ util_range_add(&tres->valid_buffer_range, offset, offset + size);
+}
+
+struct tc_clear_texture {
+ struct pipe_resource *res;
+ unsigned level;
+ struct pipe_box box;
+ char data[16];
+};
+
+static void
+tc_call_clear_texture(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_clear_texture *p = (struct tc_clear_texture *)payload;
+
+ pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
+ pipe_resource_reference(&p->res, NULL);
+}
+
+static void
+tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
+ unsigned level, const struct pipe_box *box, const void *data)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_clear_texture *p =
+ tc_add_struct_typed_call(tc, TC_CALL_clear_texture, tc_clear_texture);
+
+ tc_set_resource_reference(&p->res, res);
+ p->level = level;
+ p->box = *box;
+ memcpy(p->data, data,
+ util_format_get_blocksize(res->format));
+}
+
+struct tc_resource_commit {
+ struct pipe_resource *res;
+ unsigned level;
+ struct pipe_box box;
+ bool commit;
+};
+
+static void
+tc_call_resource_commit(struct pipe_context *pipe, union tc_payload *payload)
+{
+ struct tc_resource_commit *p = (struct tc_resource_commit *)payload;
+
+ pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
+ pipe_resource_reference(&p->res, NULL);
+}
+
+static bool
+tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
+ unsigned level, struct pipe_box *box, bool commit)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct tc_resource_commit *p =
+ tc_add_struct_typed_call(tc, TC_CALL_resource_commit, tc_resource_commit);
+
+ tc_set_resource_reference(&p->res, res);
+ p->level = level;
+ p->box = *box;
+ p->commit = commit;
+ return true; /* we don't care about the return value for this call */
+}
+
+
+/********************************************************************
+ * create & destroy
+ */
+
+static void
+tc_destroy(struct pipe_context *_pipe)
+{
+ struct threaded_context *tc = threaded_context(_pipe);
+ struct pipe_context *pipe = tc->pipe;
+
+ tc_sync(tc);
+
+ if (util_queue_is_initialized(&tc->queue)) {
+ util_queue_destroy(&tc->queue);
+
+ for (unsigned i = 0; i < TC_MAX_BATCHES; i++)
+ util_queue_fence_destroy(&tc->batch_slots[i].fence);
+ }
+
+ if (tc->base.const_uploader &&
+ tc->base.stream_uploader != tc->base.const_uploader)
+ u_upload_destroy(tc->base.const_uploader);
+
+ if (tc->base.stream_uploader)
+ u_upload_destroy(tc->base.stream_uploader);
+
+ slab_destroy_child(&tc->pool_transfers);
+ pipe->destroy(pipe);
+ FREE(tc);
+}
+
+static const tc_execute execute_func[TC_NUM_CALLS] = {
+#define CALL(name) tc_call_##name,
+#include "u_threaded_context_calls.h"
+#undef CALL
+};
+
+/**
+ * Wrap an existing pipe_context into a threaded_context.
+ *
+ * \param pipe pipe_context to wrap
+ * \param parent_transfer_pool parent slab pool set up for creating pipe_-
+ * transfer objects; the driver should have one
+ * in pipe_screen.
+ * \param replace_buffer callback for replacing a pipe_resource's storage
+ * with another pipe_resource's storage.
+ * \param out if successful, the threaded_context will be returned here in
+ * addition to the return value if "out" != NULL
+ */
+struct pipe_context *
+threaded_context_create(struct pipe_context *pipe,
+ struct slab_parent_pool *parent_transfer_pool,
+ tc_replace_buffer_storage_func replace_buffer,
+ struct threaded_context **out)
+{
+ struct threaded_context *tc;
+
+ STATIC_ASSERT(sizeof(union tc_payload) <= 8);
+ STATIC_ASSERT(sizeof(struct tc_call) <= 16);
+
+ if (!pipe)
+ return NULL;
+
+ util_cpu_detect();
+
+ if (!debug_get_bool_option("GALLIUM_THREAD", util_cpu_caps.nr_cpus > 1))
+ return pipe;
+
+ tc = CALLOC_STRUCT(threaded_context);
+ if (!tc) {
+ pipe->destroy(pipe);
+ return NULL;
+ }
+
+ /* The driver context isn't wrapped, so set its "priv" to NULL. */
+ pipe->priv = NULL;
+
+ tc->pipe = pipe;
+ tc->replace_buffer_storage = replace_buffer;
+ tc->map_buffer_alignment =
+ pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
+ tc->base.priv = pipe; /* priv points to the wrapped driver context */
+ tc->base.screen = pipe->screen;
+ tc->base.destroy = tc_destroy;
+
+ tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
+ if (pipe->stream_uploader == pipe->const_uploader)
+ tc->base.const_uploader = tc->base.stream_uploader;
+ else
+ tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
+
+ if (!tc->base.stream_uploader || !tc->base.const_uploader)
+ goto fail;
+
+ /* The queue size is the number of batches "waiting". Batches are removed
+ * from the queue before being executed, so keep one tc_batch slot for that
+ * execution. Also, keep one unused slot for an unflushed batch.
+ */
+ if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1))
+ goto fail;
+
+ for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
+ tc->batch_slots[i].sentinel = TC_SENTINEL;
+ tc->batch_slots[i].sentinel2 = TC_SENTINEL;
+ tc->batch_slots[i].pipe = pipe;
+ util_queue_fence_init(&tc->batch_slots[i].fence);
+ }
+
+ LIST_INITHEAD(&tc->unflushed_queries);
+
+ slab_create_child(&tc->pool_transfers, parent_transfer_pool);
+
+#define CTX_INIT(_member) \
+ tc->base._member = tc->pipe->_member ? tc_##_member : NULL
+
+ CTX_INIT(flush);
+ CTX_INIT(draw_vbo);
+ CTX_INIT(launch_grid);
+ CTX_INIT(resource_copy_region);
+ CTX_INIT(blit);
+ CTX_INIT(clear);
+ CTX_INIT(clear_render_target);
+ CTX_INIT(clear_depth_stencil);
+ CTX_INIT(clear_buffer);
+ CTX_INIT(clear_texture);
+ CTX_INIT(flush_resource);
+ CTX_INIT(generate_mipmap);
+ CTX_INIT(render_condition);
+ CTX_INIT(create_query);
+ CTX_INIT(create_batch_query);
+ CTX_INIT(destroy_query);
+ CTX_INIT(begin_query);
+ CTX_INIT(end_query);
+ CTX_INIT(get_query_result);
+ CTX_INIT(get_query_result_resource);
+ CTX_INIT(set_active_query_state);
+ CTX_INIT(create_blend_state);
+ CTX_INIT(bind_blend_state);
+ CTX_INIT(delete_blend_state);
+ CTX_INIT(create_sampler_state);
+ CTX_INIT(bind_sampler_states);
+ CTX_INIT(delete_sampler_state);
+ CTX_INIT(create_rasterizer_state);
+ CTX_INIT(bind_rasterizer_state);
+ CTX_INIT(delete_rasterizer_state);
+ CTX_INIT(create_depth_stencil_alpha_state);
+ CTX_INIT(bind_depth_stencil_alpha_state);
+ CTX_INIT(delete_depth_stencil_alpha_state);
+ CTX_INIT(create_fs_state);
+ CTX_INIT(bind_fs_state);
+ CTX_INIT(delete_fs_state);
+ CTX_INIT(create_vs_state);
+ CTX_INIT(bind_vs_state);
+ CTX_INIT(delete_vs_state);
+ CTX_INIT(create_gs_state);
+ CTX_INIT(bind_gs_state);
+ CTX_INIT(delete_gs_state);
+ CTX_INIT(create_tcs_state);
+ CTX_INIT(bind_tcs_state);
+ CTX_INIT(delete_tcs_state);
+ CTX_INIT(create_tes_state);
+ CTX_INIT(bind_tes_state);
+ CTX_INIT(delete_tes_state);
+ CTX_INIT(create_compute_state);
+ CTX_INIT(bind_compute_state);
+ CTX_INIT(delete_compute_state);
+ CTX_INIT(create_vertex_elements_state);
+ CTX_INIT(bind_vertex_elements_state);
+ CTX_INIT(delete_vertex_elements_state);
+ CTX_INIT(set_blend_color);
+ CTX_INIT(set_stencil_ref);
+ CTX_INIT(set_sample_mask);
+ CTX_INIT(set_min_samples);
+ CTX_INIT(set_clip_state);
+ CTX_INIT(set_constant_buffer);
+ CTX_INIT(set_framebuffer_state);
+ CTX_INIT(set_polygon_stipple);
+ CTX_INIT(set_scissor_states);
+ CTX_INIT(set_viewport_states);
+ CTX_INIT(set_window_rectangles);
+ CTX_INIT(set_sampler_views);
+ CTX_INIT(set_tess_state);
+ CTX_INIT(set_shader_buffers);
+ CTX_INIT(set_shader_images);
+ CTX_INIT(set_vertex_buffers);
+ CTX_INIT(create_stream_output_target);
+ CTX_INIT(stream_output_target_destroy);
+ CTX_INIT(set_stream_output_targets);
+ CTX_INIT(create_sampler_view);
+ CTX_INIT(sampler_view_destroy);
+ CTX_INIT(create_surface);
+ CTX_INIT(surface_destroy);
+ CTX_INIT(transfer_map);
+ CTX_INIT(transfer_flush_region);
+ CTX_INIT(transfer_unmap);
+ CTX_INIT(buffer_subdata);
+ CTX_INIT(texture_subdata);
+ CTX_INIT(texture_barrier);
+ CTX_INIT(memory_barrier);
+ CTX_INIT(resource_commit);
+ CTX_INIT(create_video_codec);
+ CTX_INIT(create_video_buffer);
+ CTX_INIT(set_compute_resources);
+ CTX_INIT(set_global_binding);
+ CTX_INIT(get_sample_position);
+ CTX_INIT(invalidate_resource);
+ CTX_INIT(get_device_reset_status);
+ CTX_INIT(set_device_reset_callback);
+ CTX_INIT(dump_debug_state);
+ CTX_INIT(emit_string_marker);
+ CTX_INIT(set_debug_callback);
+ CTX_INIT(create_fence_fd);
+ CTX_INIT(fence_server_sync);
+ CTX_INIT(get_timestamp);
+#undef CTX_INIT
+
+ if (out)
+ *out = tc;
+
+ return &tc->base;
+
+fail:
+ tc_destroy(&tc->base);
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h
new file mode 100644
index 00000000000..ea58d4ca0cf
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -0,0 +1,349 @@
+/**************************************************************************
+ *
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* This is a wrapper for pipe_context that executes all pipe_context calls
+ * in another thread.
+ *
+ *
+ * Guidelines for adopters and deviations from Gallium
+ * ---------------------------------------------------
+ *
+ * 1) pipe_context is wrapped. pipe_screen isn't wrapped. All pipe_screen
+ * driver functions that take a context (fence_finish, texture_get_handle)
+ * should manually unwrap pipe_context by doing:
+ * pipe = threaded_context_unwrap_sync(pipe);
+ *
+ * pipe_context::priv is used to unwrap the context, so drivers and state
+ * trackers shouldn't use it.
+ *
+ * No other objects are wrapped.
+ *
+ * 2) Drivers must subclass and initialize these structures:
+ * - threaded_resource for pipe_resource (use threaded_resource_init/deinit)
+ * - threaded_query for pipe_query (zero memory)
+ * - threaded_transfer for pipe_transfer (zero memory)
+ *
+ * 3) The threaded context must not be enabled for contexts that can use video
+ * codecs.
+ *
+ * 4) Changes in driver behavior:
+ * - begin_query and end_query always return true; return values from
+ * the driver are ignored.
+ * - generate_mipmap uses is_format_supported to determine success;
+ * the return value from the driver is ignored.
+ * - resource_commit always returns true; failures are ignored.
+ * - If a non-async debug callback is set, the threaded context keeps using
+ * asynchronous execution. This is OK for shader-db, but the driver
+ * shouldn't use the debug callback in any other way.
+ *
+ *
+ * Thread-safety requirements on context functions
+ * -----------------------------------------------
+ *
+ * These pipe_context functions are executed directly, so they shouldn't use
+ * pipe_context in an unsafe way. They are de-facto screen functions now:
+ * - create_query
+ * - create_batch_query
+ * - create_*_state (all CSOs and shaders)
+ * - Make sure the shader compiler doesn't use any per-context stuff.
+ * (e.g. LLVM target machine)
+ * - Only pipe_context's debug callback for shader dumps is guaranteed to
+ * be up to date, because set_debug_callback synchronizes execution.
+ * - create_surface
+ * - surface_destroy
+ * - create_sampler_view
+ * - sampler_view_destroy
+ * - stream_output_target_destroy
+ * - transfer_map (only unsychronized buffer mappings)
+ * - get_query_result (when threaded_query::flushed == true)
+ *
+ * Create calls causing a sync that can't be async due to driver limitations:
+ * - create_stream_output_target
+ *
+ *
+ * Transfer_map rules for buffer mappings
+ * --------------------------------------
+ *
+ * 1) If transfer_map has PIPE_TRANSFER_UNSYNCHRONIZED, the call is made
+ * in the non-driver thread without flushing the queue. The driver will
+ * receive TC_TRANSFER_MAP_THREADED_UNSYNC in addition to PIPE_TRANSFER_-
+ * UNSYNCHRONIZED to indicate this.
+ * Note that transfer_unmap is always enqueued and called from the driver
+ * thread.
+ *
+ * 2) The driver isn't allowed to infer unsychronized mappings by tracking
+ * the valid buffer range. The threaded context always sends TC_TRANSFER_-
+ * MAP_IGNORE_VALID_RANGE to indicate this. Ignoring the flag will lead
+ * to failures.
+ * The threaded context does its own detection of unsynchronized mappings.
+ *
+ * 3) The driver isn't allowed to do buffer invalidations by itself under any
+ * circumstances. This is necessary for unsychronized maps to map the latest
+ * version of the buffer. (because invalidations can be queued, while
+ * unsychronized maps are not queued and they should return the latest
+ * storage after invalidation). The threaded context always sends
+ * TC_TRANSFER_MAP_NO_INVALIDATE into transfer_map and buffer_subdata to
+ * indicate this. Ignoring the flag will lead to failures.
+ * The threaded context uses its own buffer invalidation mechanism.
+ *
+ *
+ * Additional requirements
+ * -----------------------
+ *
+ * get_query_result:
+ * If threaded_query::flushed == true, get_query_result should assume that
+ * it's called from a non-driver thread, in which case the driver shouldn't
+ * use the context in an unsafe way.
+ *
+ * replace_buffer_storage:
+ * The driver has to implement this callback, which will be called when
+ * the threaded context wants to replace a resource's backing storage with
+ * another resource's backing storage. The threaded context uses it to
+ * implement buffer invalidation. This call is always queued.
+ *
+ *
+ * Performance gotchas
+ * -------------------
+ *
+ * Buffer invalidations are done unconditionally - they don't check whether
+ * the buffer is busy. This can cause drivers to have more live allocations
+ * and CPU mappings than necessary.
+ *
+ *
+ * How it works (queue architecture)
+ * ---------------------------------
+ *
+ * There is a multithreaded queue consisting of batches, each batch consisting
+ * of call slots. Each call slot consists of an 8-byte header (call ID +
+ * call size + constant 32-bit marker for integrity checking) and an 8-byte
+ * body for per-call data. That is 16 bytes per call slot.
+ *
+ * Simple calls such as bind_xx_state(CSO) occupy only one call slot. Bigger
+ * calls occupy multiple call slots depending on the size needed by call
+ * parameters. That means that calls can have a variable size in the batch.
+ * For example, set_vertex_buffers(count = any, buffers = NULL) occupies only
+ * 1 call slot, but set_vertex_buffers(count = 5) occupies 6 call slots.
+ * Even though the first call slot can use only 8 bytes for data, additional
+ * call slots used by the same call can use all 16 bytes for data.
+ * For example, a call using 2 call slots has 24 bytes of space for data.
+ *
+ * Once a batch is full and there is no space for the next call, it's flushed,
+ * meaning that it's added to the queue for execution in the other thread.
+ * The batches are ordered in a ring and reused once they are idle again.
+ * The batching is necessary for low queue/mutex overhead.
+ *
+ */
+
+#ifndef U_THREADED_CONTEXT_H
+#define U_THREADED_CONTEXT_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_queue.h"
+#include "util/u_range.h"
+#include "util/slab.h"
+
+/* These are transfer flags sent to drivers. */
+/* Never infer whether it's safe to use unsychronized mappings: */
+#define TC_TRANSFER_MAP_IGNORE_VALID_RANGE (1u << 29)
+/* Don't invalidate buffers: */
+#define TC_TRANSFER_MAP_NO_INVALIDATE (1u << 30)
+/* transfer_map is called from a non-driver thread: */
+#define TC_TRANSFER_MAP_THREADED_UNSYNC (1u << 31)
+
+/* Size of the queue = number of batch slots in memory.
+ * - 1 batch is always idle and records new commands
+ * - 1 batch is being executed
+ * so the queue size is TC_MAX_BATCHES - 2 = number of waiting batches.
+ *
+ * Use a size as small as possible for low CPU L2 cache usage but large enough
+ * so that the queue isn't stalled too often for not having enough idle batch
+ * slots.
+ */
+#define TC_MAX_BATCHES 10
+
+/* The size of one batch. Non-trivial calls (i.e. not setting a CSO pointer)
+ * can occupy multiple call slots.
+ *
+ * The idea is to have batches as small as possible but large enough so that
+ * the queuing and mutex overhead is negligible.
+ */
+#define TC_CALLS_PER_BATCH 192
+
+/* Threshold for when to use the queue or sync. */
+#define TC_MAX_STRING_MARKER_BYTES 512
+
+/* Threshold for when to enqueue buffer/texture_subdata as-is.
+ * If the upload size is greater than this, it will do instead:
+ * - for buffers: DISCARD_RANGE is done by the threaded context
+ * - for textures: sync and call the driver directly
+ */
+#define TC_MAX_SUBDATA_BYTES 320
+
+typedef void (*tc_replace_buffer_storage_func)(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ struct pipe_resource *src);
+
+struct threaded_resource {
+ struct pipe_resource b;
+ const struct u_resource_vtbl *vtbl;
+
+ /* Since buffer invalidations are queued, we can't use the base resource
+ * for unsychronized mappings. This points to the latest version of
+ * the buffer after the latest invalidation. It's only used for unsychro-
+ * nized mappings in the non-driver thread. Initially it's set to &b.
+ */
+ struct pipe_resource *latest;
+
+ /* The buffer range which is initialized (with a write transfer, streamout,
+ * or writable shader resources). The remainder of the buffer is considered
+ * invalid and can be mapped unsynchronized.
+ *
+ * This allows unsychronized mapping of a buffer range which hasn't been
+ * used yet. It's for applications which forget to use the unsynchronized
+ * map flag and expect the driver to figure it out.
+ *
+ * Drivers should set this to the full range for buffers backed by user
+ * memory.
+ */
+ struct util_range valid_buffer_range;
+
+ /* If "this" is not the base instance of the buffer, but it's one of its
+ * reallocations (set in "latest" of the base instance), this points to
+ * the valid range of the base instance. It's used for transfers after
+ * a buffer invalidation, because such transfers operate on "latest", not
+ * the base instance. Initially it's set to &valid_buffer_range.
+ */
+ struct util_range *base_valid_buffer_range;
+
+ /* Drivers are required to update this for shared resources and user
+ * pointers. */
+ bool is_shared;
+ bool is_user_ptr;
+};
+
+struct threaded_transfer {
+ struct pipe_transfer b;
+
+ /* Staging buffer for DISCARD_RANGE transfers. */
+ struct pipe_resource *staging;
+
+ /* Offset into the staging buffer, because the backing buffer is
+ * sub-allocated. */
+ unsigned offset;
+};
+
+struct threaded_query {
+ /* The query is added to the list in end_query and removed in flush. */
+ struct list_head head_unflushed;
+
+ /* Whether pipe->flush has been called after end_query. */
+ bool flushed;
+};
+
+/* This is the second half of tc_call containing call data.
+ * Most calls will typecast this to the type they need, typically larger
+ * than 8 bytes.
+ */
+union tc_payload {
+ struct pipe_query *query;
+ struct pipe_resource *resource;
+ struct pipe_transfer *transfer;
+ uint64_t __use_8_bytes;
+};
+
+struct tc_call {
+ unsigned sentinel;
+ ushort num_call_slots;
+ ushort call_id;
+ union tc_payload payload;
+};
+
+struct tc_batch {
+ struct pipe_context *pipe;
+ unsigned sentinel;
+ unsigned num_total_call_slots;
+ struct util_queue_fence fence;
+ struct tc_call call[TC_CALLS_PER_BATCH];
+ unsigned sentinel2;
+};
+
+struct threaded_context {
+ struct pipe_context base;
+ struct pipe_context *pipe;
+ struct slab_child_pool pool_transfers;
+ tc_replace_buffer_storage_func replace_buffer_storage;
+ unsigned map_buffer_alignment;
+
+ struct list_head unflushed_queries;
+
+ /* Counters for the HUD. */
+ unsigned num_offloaded_slots;
+ unsigned num_direct_slots;
+ unsigned num_syncs;
+
+ struct util_queue queue;
+ struct util_queue_fence *fence;
+
+ unsigned last, next;
+ struct tc_batch batch_slots[TC_MAX_BATCHES];
+};
+
+void threaded_resource_init(struct pipe_resource *res);
+void threaded_resource_deinit(struct pipe_resource *res);
+struct pipe_context *threaded_context_unwrap_sync(struct pipe_context *pipe);
+
+struct pipe_context *
+threaded_context_create(struct pipe_context *pipe,
+ struct slab_parent_pool *parent_transfer_pool,
+ tc_replace_buffer_storage_func replace_buffer,
+ struct threaded_context **out);
+
+static inline struct threaded_context *
+threaded_context(struct pipe_context *pipe)
+{
+ return (struct threaded_context*)pipe;
+}
+
+static inline struct threaded_resource *
+threaded_resource(struct pipe_resource *res)
+{
+ return (struct threaded_resource*)res;
+}
+
+static inline struct threaded_query *
+threaded_query(struct pipe_query *q)
+{
+ return (struct threaded_query*)q;
+}
+
+static inline struct threaded_transfer *
+threaded_transfer(struct pipe_transfer *transfer)
+{
+ return (struct threaded_transfer*)transfer;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_threaded_context_calls.h b/src/gallium/auxiliary/util/u_threaded_context_calls.h
new file mode 100644
index 00000000000..7dfccb0f0de
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_threaded_context_calls.h
@@ -0,0 +1,66 @@
+CALL(destroy_query)
+CALL(begin_query)
+CALL(end_query)
+CALL(get_query_result_resource)
+CALL(render_condition)
+CALL(bind_sampler_states)
+CALL(set_framebuffer_state)
+CALL(set_tess_state)
+CALL(set_constant_buffer)
+CALL(set_scissor_states)
+CALL(set_viewport_states)
+CALL(set_window_rectangles)
+CALL(set_sampler_views)
+CALL(set_shader_images)
+CALL(set_shader_buffers)
+CALL(set_vertex_buffers)
+CALL(set_stream_output_targets)
+CALL(replace_buffer_storage)
+CALL(transfer_flush_region)
+CALL(transfer_unmap)
+CALL(buffer_subdata)
+CALL(texture_subdata)
+CALL(emit_string_marker)
+CALL(draw_vbo)
+CALL(launch_grid)
+CALL(resource_copy_region)
+CALL(blit)
+CALL(generate_mipmap)
+CALL(flush_resource)
+CALL(invalidate_resource)
+CALL(clear)
+CALL(clear_buffer)
+CALL(clear_texture)
+CALL(resource_commit)
+CALL(set_active_query_state)
+CALL(set_blend_color)
+CALL(set_stencil_ref)
+CALL(set_clip_state)
+CALL(set_sample_mask)
+CALL(set_min_samples)
+CALL(set_polygon_stipple)
+CALL(texture_barrier)
+CALL(memory_barrier)
+
+CALL(bind_blend_state)
+CALL(bind_rasterizer_state)
+CALL(bind_depth_stencil_alpha_state)
+CALL(bind_compute_state)
+CALL(bind_fs_state)
+CALL(bind_vs_state)
+CALL(bind_gs_state)
+CALL(bind_tcs_state)
+CALL(bind_tes_state)
+CALL(bind_vertex_elements_state)
+
+CALL(delete_blend_state)
+CALL(delete_rasterizer_state)
+CALL(delete_depth_stencil_alpha_state)
+CALL(delete_compute_state)
+CALL(delete_fs_state)
+CALL(delete_vs_state)
+CALL(delete_gs_state)
+CALL(delete_tcs_state)
+CALL(delete_tes_state)
+CALL(delete_vertex_elements_state)
+CALL(delete_sampler_state)