summaryrefslogtreecommitdiff
path: root/src/intel/vulkan/genX_query.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/intel/vulkan/genX_query.c')
-rw-r--r--src/intel/vulkan/genX_query.c176
1 files changed, 120 insertions, 56 deletions
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 2cb492afcf9..aaf3ca962b2 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -37,17 +37,14 @@
#include "ds/intel_tracepoints.h"
#include "anv_internal_kernels.h"
+#include "genX_mi_builder.h"
+
+#if GFX_VERx10 >= 125
+#define ANV_PIPELINE_STATISTICS_MASK 0x00001fff
+#else
+#define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
+#endif
-/* We reserve :
- * - GPR 14 for perf queries
- * - GPR 15 for conditional rendering
- */
-#define MI_BUILDER_NUM_ALLOC_GPRS 14
-#define MI_BUILDER_CAN_WRITE_BATCH true
-#define __gen_get_batch_dwords anv_batch_emit_dwords
-#define __gen_address_offset anv_address_add
-#define __gen_get_batch_address(b, a) anv_batch_address(b, a)
-#include "common/mi_builder.h"
#include "perf/intel_perf.h"
#include "perf/intel_perf_mdapi.h"
#include "perf/intel_perf_regs.h"
@@ -186,6 +183,11 @@ VkResult genX(CreateQueryPool)(
uint64s_per_slot = 1 + 2 /* availability + size (PostbuildInfoSerializationDesc) */;
break;
+ case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
+ /* Query has two values: begin and end. */
+ uint64s_per_slot = 1 + 2;
+ break;
+
#endif
case VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR:
uint64s_per_slot = 1;
@@ -484,6 +486,7 @@ VkResult genX(GetQueryPoolResults)(
pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR ||
pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR ||
pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR ||
+ pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT ||
#endif
pool->vk.query_type == VK_QUERY_TYPE_OCCLUSION ||
pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
@@ -535,7 +538,11 @@ VkResult genX(GetQueryPoolResults)(
uint32_t idx = 0;
switch (pool->vk.query_type) {
case VK_QUERY_TYPE_OCCLUSION:
- case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
+ case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
+#if GFX_VERx10 >= 125
+ case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
+#endif
+ {
uint64_t *slot = query_slot(pool, firstQuery + i);
if (write_results) {
/* From the Vulkan 1.2.132 spec:
@@ -558,7 +565,8 @@ VkResult genX(GetQueryPoolResults)(
while (statistics) {
UNUSED uint32_t stat = u_bit_scan(&statistics);
if (write_results) {
- uint64_t result = slot[idx * 2 + 2] - slot[idx * 2 + 1];
+ /* If a query is not available but VK_QUERY_RESULT_PARTIAL_BIT is set, write 0. */
+ uint64_t result = available ? slot[idx * 2 + 2] - slot[idx * 2 + 1] : 0;
cpu_write_query_result(pData, flags, idx, result);
}
idx++;
@@ -569,11 +577,17 @@ VkResult genX(GetQueryPoolResults)(
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
uint64_t *slot = query_slot(pool, firstQuery + i);
- if (write_results)
- cpu_write_query_result(pData, flags, idx, slot[2] - slot[1]);
+ if (write_results) {
+ /* If a query is not available but VK_QUERY_RESULT_PARTIAL_BIT is set, write 0. */
+ uint64_t result = available ? slot[2] - slot[1] : 0;
+ cpu_write_query_result(pData, flags, idx, result);
+ }
idx++;
- if (write_results)
- cpu_write_query_result(pData, flags, idx, slot[4] - slot[3]);
+ if (write_results) {
+ /* If a query is not available but VK_QUERY_RESULT_PARTIAL_BIT is set, write 0. */
+ uint64_t result = available ? slot[4] - slot[3] : 0;
+ cpu_write_query_result(pData, flags, idx, result);
+ }
idx++;
break;
}
@@ -737,6 +751,9 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+#if GFX_VERx10 >= 125
+ case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
+#endif
for (uint32_t i = 0; i < num_queries; i++) {
struct anv_address slot_addr =
anv_query_address(pool, first_index + i);
@@ -844,7 +861,11 @@ void genX(CmdResetQueryPool)(
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
- case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
+ case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
+#if GFX_VERx10 >= 125
+ case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
+#endif
+ {
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
@@ -921,6 +942,10 @@ static const uint32_t vk_pipeline_stat_to_reg[] = {
GENX(HS_INVOCATION_COUNT_num),
GENX(DS_INVOCATION_COUNT_num),
GENX(CS_INVOCATION_COUNT_num),
+#if GFX_VERx10 >= 125
+ GENX(TASK_INVOCATION_COUNT_num),
+ GENX(MESH_INVOCATION_COUNT_num)
+#endif
};
static void
@@ -1042,6 +1067,18 @@ void genX(CmdBeginQueryIndexedEXT)(
mi_reg64(GENX(CL_INVOCATION_COUNT_num)));
break;
+#if GFX_VERx10 >= 125
+ case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
+ genx_batch_emit_pipe_control(&cmd_buffer->batch,
+ cmd_buffer->device->info,
+ cmd_buffer->state.current_pipeline,
+ ANV_PIPE_CS_STALL_BIT |
+ ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
+ mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
+ mi_reg64(GENX(MESH_PRIMITIVE_COUNT_num)));
+ break;
+#endif
+
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
/* TODO: This might only be necessary for certain stats */
genx_batch_emit_pipe_control(&cmd_buffer->batch,
@@ -1088,7 +1125,8 @@ void genX(CmdBeginQueryIndexedEXT)(
khr_perf_query_data_offset(pool, query, 0, end) +
field->location)),
mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
- cmd_buffer->self_mod_locations[reloc_idx++] = mi_store_address(&b, reg_addr);
+ cmd_buffer->self_mod_locations[reloc_idx++] =
+ mi_store_relocated_address_reg64(&b, reg_addr);
if (field->type != INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC &&
field->size == 8) {
@@ -1099,7 +1137,8 @@ void genX(CmdBeginQueryIndexedEXT)(
khr_perf_query_data_offset(pool, query, 0, end) +
field->location + 4)),
mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
- cmd_buffer->self_mod_locations[reloc_idx++] = mi_store_address(&b, reg_addr);
+ cmd_buffer->self_mod_locations[reloc_idx++] =
+ mi_store_relocated_address_reg64(&b, reg_addr);
}
}
}
@@ -1113,7 +1152,7 @@ void genX(CmdBeginQueryIndexedEXT)(
khr_perf_query_availability_offset(pool, query, 0 /* pass */))),
mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
cmd_buffer->self_mod_locations[reloc_idx++] =
- mi_store_address(&b, availability_write_offset);
+ mi_store_relocated_address_reg64(&b, availability_write_offset);
assert(reloc_idx == pdevice->n_perf_query_commands);
@@ -1140,10 +1179,10 @@ void genX(CmdBeginQueryIndexedEXT)(
GENX(MI_REPORT_PERF_COUNT_length),
GENX(MI_REPORT_PERF_COUNT),
.MemoryAddress = query_addr /* Will be overwritten */);
- _mi_resolve_address_token(&b,
- cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
- dws +
- GENX(MI_REPORT_PERF_COUNT_MemoryAddress_start) / 8);
+ mi_resolve_relocated_address_token(
+ &b,
+ cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
+ dws + GENX(MI_REPORT_PERF_COUNT_MemoryAddress_start) / 8);
break;
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
@@ -1157,10 +1196,10 @@ void genX(CmdBeginQueryIndexedEXT)(
GENX(MI_STORE_REGISTER_MEM),
.RegisterAddress = field->mmio_offset,
.MemoryAddress = query_addr /* Will be overwritten */ );
- _mi_resolve_address_token(&b,
- cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
- dws +
- GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
+ mi_resolve_relocated_address_token(
+ &b,
+ cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
+ dws + GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
if (field->size == 8) {
dws =
anv_batch_emitn(&cmd_buffer->batch,
@@ -1168,10 +1207,10 @@ void genX(CmdBeginQueryIndexedEXT)(
GENX(MI_STORE_REGISTER_MEM),
.RegisterAddress = field->mmio_offset + 4,
.MemoryAddress = query_addr /* Will be overwritten */ );
- _mi_resolve_address_token(&b,
- cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
- dws +
- GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
+ mi_resolve_relocated_address_token(
+ &b,
+ cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
+ dws + GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
}
break;
@@ -1236,6 +1275,19 @@ void genX(CmdEndQueryIndexedEXT)(
emit_query_mi_availability(&b, query_addr, true);
break;
+#if GFX_VERx10 >= 125
+ case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
+ genx_batch_emit_pipe_control(&cmd_buffer->batch,
+ cmd_buffer->device->info,
+ cmd_buffer->state.current_pipeline,
+ ANV_PIPE_CS_STALL_BIT |
+ ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
+ mi_store(&b, mi_mem64(anv_address_add(query_addr, 16)),
+ mi_reg64(GENX(MESH_PRIMITIVE_COUNT_num)));
+ emit_query_mi_availability(&b, query_addr, true);
+ break;
+#endif
+
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
/* TODO: This might only be necessary for certain stats */
genx_batch_emit_pipe_control(&cmd_buffer->batch,
@@ -1290,10 +1342,10 @@ void genX(CmdEndQueryIndexedEXT)(
GENX(MI_REPORT_PERF_COUNT_length),
GENX(MI_REPORT_PERF_COUNT),
.MemoryAddress = query_addr /* Will be overwritten */);
- _mi_resolve_address_token(&b,
- cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
- dws +
- GENX(MI_REPORT_PERF_COUNT_MemoryAddress_start) / 8);
+ mi_resolve_relocated_address_token(
+ &b,
+ cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
+ dws + GENX(MI_REPORT_PERF_COUNT_MemoryAddress_start) / 8);
break;
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
@@ -1307,10 +1359,10 @@ void genX(CmdEndQueryIndexedEXT)(
GENX(MI_STORE_REGISTER_MEM),
.RegisterAddress = field->mmio_offset,
.MemoryAddress = query_addr /* Will be overwritten */ );
- _mi_resolve_address_token(&b,
- cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
- dws +
- GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
+ mi_resolve_relocated_address_token(
+ &b,
+ cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
+ dws + GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
if (field->size == 8) {
dws =
anv_batch_emitn(&cmd_buffer->batch,
@@ -1318,10 +1370,10 @@ void genX(CmdEndQueryIndexedEXT)(
GENX(MI_STORE_REGISTER_MEM),
.RegisterAddress = field->mmio_offset + 4,
.MemoryAddress = query_addr /* Will be overwritten */ );
- _mi_resolve_address_token(&b,
- cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
- dws +
- GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
+ mi_resolve_relocated_address_token(
+ &b,
+ cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
+ dws + GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
}
break;
@@ -1336,10 +1388,10 @@ void genX(CmdEndQueryIndexedEXT)(
GENX(MI_STORE_DATA_IMM_length),
GENX(MI_STORE_DATA_IMM),
.ImmediateData = true);
- _mi_resolve_address_token(&b,
- cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
- dws +
- GENX(MI_STORE_DATA_IMM_Address_start) / 8);
+ mi_resolve_relocated_address_token(
+ &b,
+ cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
+ dws + GENX(MI_STORE_DATA_IMM_Address_start) / 8);
assert(cmd_buffer->perf_reloc_idx == pdevice->n_perf_query_commands);
break;
@@ -1576,13 +1628,13 @@ copy_query_results_with_cs(struct anv_cmd_buffer *cmd_buffer,
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
- struct mi_value result;
+ mi_builder_set_mocs(&b, anv_mocs_for_address(
+ cmd_buffer->device,
+ &(struct anv_address) { .bo = pool->bo }));
for (uint32_t i = 0; i < query_count; i++) {
struct anv_address query_addr = anv_query_address(pool, first_query + i);
- const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &query_addr);
-
- mi_builder_set_mocs(&b, mocs);
+ struct mi_value result;
/* Wait for the availability write to land before we go read the data */
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
@@ -1598,6 +1650,9 @@ copy_query_results_with_cs(struct anv_cmd_buffer *cmd_buffer,
switch (pool->vk.query_type) {
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
+#if GFX_VERx10 >= 125
+ case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
+#endif
result = compute_query_result(&b, anv_address_add(query_addr, 8));
/* Like in the case of vkGetQueryPoolResults, if the query is
* unavailable and the VK_QUERY_RESULT_PARTIAL_BIT flag is set,
@@ -1780,9 +1835,8 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
uint32_t data_offset = 8 /* behind availability */;
switch (pool->vk.query_type) {
case VK_QUERY_TYPE_OCCLUSION:
- case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
copy_flags |= ANV_COPY_QUERY_FLAG_DELTA;
- /* These 2 queries are the only ones where we would have partial data
+ /* Occlusion and timestamps queries are the only ones where we would have partial data
* because they are capture with a PIPE_CONTROL post sync operation. The
* other ones are captured with MI_STORE_REGISTER_DATA so we're always
* available by the time we reach the copy command.
@@ -1790,6 +1844,17 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
copy_flags |= (flags & VK_QUERY_RESULT_PARTIAL_BIT) ? ANV_COPY_QUERY_FLAG_PARTIAL : 0;
break;
+ case VK_QUERY_TYPE_TIMESTAMP:
+ copy_flags |= (flags & VK_QUERY_RESULT_PARTIAL_BIT) ? ANV_COPY_QUERY_FLAG_PARTIAL : 0;
+ break;
+
+ case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
+#if GFX_VERx10 >= 125
+ case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
+#endif
+ copy_flags |= ANV_COPY_QUERY_FLAG_DELTA;
+ break;
+
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
num_items = util_bitcount(pool->vk.pipeline_statistics);
copy_flags |= ANV_COPY_QUERY_FLAG_DELTA;
@@ -1800,7 +1865,6 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
copy_flags |= ANV_COPY_QUERY_FLAG_DELTA;
break;
- case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
@@ -1875,7 +1939,7 @@ void genX(CmdCopyQueryPoolResults)(
}
}
-#if GFX_VERx10 == 125 && ANV_SUPPORT_RT
+#if GFX_VERx10 >= 125 && ANV_SUPPORT_RT
#include "grl/include/GRLRTASCommon.h"
#include "grl/grl_metakernel_postbuild_info.h"