summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>2019-05-01 12:30:41 +0100
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>2019-05-09 00:49:12 +0100
commitba47599c7984b8d657744a474defe786b4224ac6 (patch)
tree0779e04c54bd59ac0170e1b6e2ea427eadc40de6
parent0ebd8e316d129a48b1c0adf0629a02b552e560bb (diff)
anv: rework queries writes to ensure ordering memory writes
We use a mix of MI & PIPE_CONTROL commands to write our queries' data (results & availability). Those commands' memory write order is not guaranteed with regard to their order in the command stream, unless CS stalls are inserted between them. This is problematic for 2 reasons : 1. We copy results from the device using MI commands even though the values are generated from PIPE_CONTROL, meaning we could copy unlanded values into the results and then copy the availability that is inconsistent with the values. 2. We allow the user to poll on the availability values of the query pool from the CPU. If the availability lands in memory before the values then we could return invalid values. This change does 2 things to address this problem : - We use either PIPE_CONTROL or MI commands to write both queries values and availability, so that the ordering of the memory writes guarantees that if availability is visible, results are also visible. - For the occlusion & timestamp queries we apply a CS stall before copying the results on the device, to ensure copying with MI commands see the correct values of previous PIPE_CONTROL writes of availability (required by the Vulkan spec). Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reported-by: Iago Toral Quiroga <itoral@igalia.com> Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> (cherry picked from commit a07d06f10352fc5fa40db8a723fa5842ebc660db)
-rw-r--r--src/intel/vulkan/genX_query.c103
1 files changed, 85 insertions, 18 deletions
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 587b1b20477..6c1c76aeef0 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -356,14 +356,23 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
}
static void
-emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
- struct anv_address addr)
+emit_query_mi_availability(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_address addr,
+ bool available)
+{
+ genX(cmd_buffer_mi_memset)(cmd_buffer, addr, available, 8);
+}
+
+static void
+emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_address addr,
+ bool available)
{
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.DestinationAddressType = DAT_PPGTT;
pc.PostSyncOperation = WriteImmediateData;
pc.Address = addr;
- pc.ImmediateData = 1;
+ pc.ImmediateData = available;
}
}
@@ -376,12 +385,40 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
struct anv_query_pool *pool,
uint32_t first_index, uint32_t num_queries)
{
- for (uint32_t i = 0; i < num_queries; i++) {
- struct anv_address slot_addr =
- anv_query_address(pool, first_index + i);
- genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8),
- 0, pool->stride - 8);
- emit_query_availability(cmd_buffer, slot_addr);
+ switch (pool->type) {
+ case VK_QUERY_TYPE_OCCLUSION:
+ case VK_QUERY_TYPE_TIMESTAMP:
+ /* These queries are written with a PIPE_CONTROL so clear them using the
+ * PIPE_CONTROL as well so we don't have to synchronize between 2 types
+ * of operations.
+ */
+ assert((pool->stride % 8) == 0);
+ for (uint32_t i = 0; i < num_queries; i++) {
+ struct anv_address slot_addr =
+ anv_query_address(pool, first_index + i);
+
+ for (uint32_t qword = 1; qword < (pool->stride / 8); qword++) {
+ emit_query_pc_availability(cmd_buffer,
+ anv_address_add(slot_addr, qword * 8),
+ false);
+ }
+ emit_query_pc_availability(cmd_buffer, slot_addr, true);
+ }
+ break;
+
+ case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+ for (uint32_t i = 0; i < num_queries; i++) {
+ struct anv_address slot_addr =
+ anv_query_address(pool, first_index + i);
+ genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8),
+ 0, pool->stride - 8);
+ emit_query_mi_availability(cmd_buffer, slot_addr, true);
+ }
+ break;
+
+ default:
+ unreachable("Unsupported query type");
}
}
@@ -394,11 +431,28 @@ void genX(CmdResetQueryPool)(
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
- for (uint32_t i = 0; i < queryCount; i++) {
- anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) {
- sdm.Address = anv_query_address(pool, firstQuery + i);
- sdm.ImmediateData = 0;
+ switch (pool->type) {
+ case VK_QUERY_TYPE_OCCLUSION:
+ case VK_QUERY_TYPE_TIMESTAMP:
+ for (uint32_t i = 0; i < queryCount; i++) {
+ emit_query_pc_availability(cmd_buffer,
+ anv_query_address(pool, firstQuery + i),
+ false);
}
+ break;
+
+ case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
+ for (uint32_t i = 0; i < queryCount; i++) {
+ emit_query_mi_availability(cmd_buffer,
+ anv_query_address(pool, firstQuery + i),
+ false);
+ }
+ break;
+ }
+
+ default:
+ unreachable("Unsupported query type");
}
}
@@ -529,7 +583,7 @@ void genX(CmdEndQueryIndexedEXT)(
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 16));
- emit_query_availability(cmd_buffer, query_addr);
+ emit_query_pc_availability(cmd_buffer, query_addr, true);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
@@ -548,7 +602,7 @@ void genX(CmdEndQueryIndexedEXT)(
offset += 16;
}
- emit_query_availability(cmd_buffer, query_addr);
+ emit_query_mi_availability(cmd_buffer, query_addr, true);
break;
}
@@ -559,7 +613,7 @@ void genX(CmdEndQueryIndexedEXT)(
}
emit_xfb_query(cmd_buffer, index, anv_address_add(query_addr, 16));
- emit_query_availability(cmd_buffer, query_addr);
+ emit_query_mi_availability(cmd_buffer, query_addr, true);
break;
default:
@@ -614,7 +668,7 @@ void genX(CmdWriteTimestamp)(
break;
}
- emit_query_availability(cmd_buffer, query_addr);
+ emit_query_pc_availability(cmd_buffer, query_addr, true);
/* When multiview is active the spec requires that N consecutive query
* indices are used, where N is the number of active views in the subpass.
@@ -817,7 +871,20 @@ void genX(CmdCopyQueryPoolResults)(
}
if ((flags & VK_QUERY_RESULT_WAIT_BIT) ||
- (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS)) {
+ (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) ||
+ /* Occlusion & timestamp queries are written using a PIPE_CONTROL and
+ * because we're about to copy values from MI commands, we need to
+ * stall the command streamer to make sure the PIPE_CONTROL values have
+ * landed, otherwise we could see inconsistent values & availability.
+ *
+ * From the vulkan spec:
+ *
+ * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
+ * previous uses of vkCmdResetQueryPool in the same queue, without
+ * any additional synchronization."
+ */
+ pool->type == VK_QUERY_TYPE_OCCLUSION ||
+ pool->type == VK_QUERY_TYPE_TIMESTAMP) {
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}