diff options
-rw-r--r-- | src/intel/vulkan/genX_query.c | 103 |
1 files changed, 85 insertions, 18 deletions
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 587b1b20477..6c1c76aeef0 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -356,14 +356,23 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer, } static void -emit_query_availability(struct anv_cmd_buffer *cmd_buffer, - struct anv_address addr) +emit_query_mi_availability(struct anv_cmd_buffer *cmd_buffer, + struct anv_address addr, + bool available) +{ + genX(cmd_buffer_mi_memset)(cmd_buffer, addr, available, 8); +} + +static void +emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer, + struct anv_address addr, + bool available) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WriteImmediateData; pc.Address = addr; - pc.ImmediateData = 1; + pc.ImmediateData = available; } } @@ -376,12 +385,40 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer, struct anv_query_pool *pool, uint32_t first_index, uint32_t num_queries) { - for (uint32_t i = 0; i < num_queries; i++) { - struct anv_address slot_addr = - anv_query_address(pool, first_index + i); - genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8), - 0, pool->stride - 8); - emit_query_availability(cmd_buffer, slot_addr); + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + /* These queries are written with a PIPE_CONTROL so clear them using the + * PIPE_CONTROL as well so we don't have to synchronize between 2 types + * of operations. + */ + assert((pool->stride % 8) == 0); + for (uint32_t i = 0; i < num_queries; i++) { + struct anv_address slot_addr = + anv_query_address(pool, first_index + i); + + for (uint32_t qword = 1; qword < (pool->stride / 8); qword++) { + emit_query_pc_availability(cmd_buffer, + anv_address_add(slot_addr, qword * 8), + false); + } + emit_query_pc_availability(cmd_buffer, slot_addr, true); + } + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + for (uint32_t i = 0; i < num_queries; i++) { + struct anv_address slot_addr = + anv_query_address(pool, first_index + i); + genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8), + 0, pool->stride - 8); + emit_query_mi_availability(cmd_buffer, slot_addr, true); + } + break; + + default: + unreachable("Unsupported query type"); } } @@ -394,11 +431,28 @@ void genX(CmdResetQueryPool)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - for (uint32_t i = 0; i < queryCount; i++) { - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) { - sdm.Address = anv_query_address(pool, firstQuery + i); - sdm.ImmediateData = 0; + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + for (uint32_t i = 0; i < queryCount; i++) { + emit_query_pc_availability(cmd_buffer, + anv_query_address(pool, firstQuery + i), + false); } + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: { + for (uint32_t i = 0; i < queryCount; i++) { + emit_query_mi_availability(cmd_buffer, + anv_query_address(pool, firstQuery + i), + false); + } + break; + } + + default: + unreachable("Unsupported query type"); } } @@ -529,7 +583,7 @@ void genX(CmdEndQueryIndexedEXT)( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 16)); - emit_query_availability(cmd_buffer, query_addr); + emit_query_pc_availability(cmd_buffer, query_addr, true); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: { @@ -548,7 +602,7 @@ void genX(CmdEndQueryIndexedEXT)( offset += 16; } - emit_query_availability(cmd_buffer, query_addr); + emit_query_mi_availability(cmd_buffer, query_addr, true); break; } @@ -559,7 +613,7 @@ void genX(CmdEndQueryIndexedEXT)( } emit_xfb_query(cmd_buffer, index, anv_address_add(query_addr, 16)); - emit_query_availability(cmd_buffer, query_addr); + emit_query_mi_availability(cmd_buffer, query_addr, true); break; default: @@ -614,7 +668,7 @@ void genX(CmdWriteTimestamp)( break; } - emit_query_availability(cmd_buffer, query_addr); + emit_query_pc_availability(cmd_buffer, query_addr, true); /* When multiview is active the spec requires that N consecutive query * indices are used, where N is the number of active views in the subpass. @@ -817,7 +871,20 @@ void genX(CmdCopyQueryPoolResults)( } if ((flags & VK_QUERY_RESULT_WAIT_BIT) || - (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS)) { + (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) || + /* Occlusion & timestamp queries are written using a PIPE_CONTROL and + * because we're about to copy values from MI commands, we need to + * stall the command streamer to make sure the PIPE_CONTROL values have + * landed, otherwise we could see inconsistent values & availability. + * + * From the vulkan spec: + * + * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of + * previous uses of vkCmdResetQueryPool in the same queue, without + * any additional synchronization." + */ + pool->type == VK_QUERY_TYPE_OCCLUSION || + pool->type == VK_QUERY_TYPE_TIMESTAMP) { cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); } |