summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>2018-06-07 18:02:03 +0100
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>2019-10-23 05:41:15 +0000
commit2b5f30b1d91b98ab27ba21439cd8a40a0d1ece36 (patch)
treeca4dadd22784f705e489f55ad32103c0dfdb8d79
parent5ba6d9941b5dda95b88b924ac51133f36bd0f653 (diff)
anv: implement VK_INTEL_performance_query
v2: Introduce the appropriate pipe controls Properly deal with changes in metric sets (using execbuf parameter) Record marker at query end v3: Fill out PerfCntr1&2 v4: Introduce vkUninitializePerformanceApiINTEL v5: Use new execbuf extension mechanism v6: Fix comments in genX_query.c (Rafael) Use PIPE_CONTROL workarounds (Rafael) Refactor on the last kernel series update (Lionel) v7: Only I915_PERF_IOCTL_CONFIG when perf stream is already opened (Lionel) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
-rw-r--r--src/intel/Android.vulkan.mk1
-rw-r--r--src/intel/Makefile.sources1
-rw-r--r--src/intel/vulkan/anv_device.c5
-rw-r--r--src/intel/vulkan/anv_extensions.py1
-rw-r--r--src/intel/vulkan/anv_perf.c224
-rw-r--r--src/intel/vulkan/anv_private.h11
-rw-r--r--src/intel/vulkan/genX_cmd_buffer.c54
-rw-r--r--src/intel/vulkan/genX_query.c252
-rw-r--r--src/intel/vulkan/meson.build4
9 files changed, 535 insertions, 18 deletions
diff --git a/src/intel/Android.vulkan.mk b/src/intel/Android.vulkan.mk
index 468ddfc65f8..134f4183eec 100644
--- a/src/intel/Android.vulkan.mk
+++ b/src/intel/Android.vulkan.mk
@@ -305,6 +305,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
libmesa_compiler \
libmesa_intel_common \
libmesa_intel_dev \
+ libmesa_intel_perf \
libmesa_vulkan_common \
libmesa_vulkan_util \
libmesa_anv_gen7 \
diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index b65cc934350..4900dd56bd2 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -259,6 +259,7 @@ VULKAN_FILES := \
vulkan/anv_nir_lower_push_constants.c \
vulkan/anv_nir_lower_ycbcr_textures.c \
vulkan/anv_pass.c \
+ vulkan/anv_perf.c \
vulkan/anv_pipeline.c \
vulkan/anv_pipeline_cache.c \
vulkan/anv_private.h \
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 8934957e39b..9730e027392 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -604,6 +604,8 @@ anv_physical_device_init(struct anv_physical_device *device,
goto fail;
}
+ device->perf = anv_get_perf(&device->info, fd);
+
anv_physical_device_get_supported_extensions(device,
&device->supported_extensions);
@@ -625,6 +627,7 @@ anv_physical_device_finish(struct anv_physical_device *device)
anv_finish_wsi(device);
anv_physical_device_free_disk_cache(device);
ralloc_free(device->compiler);
+ ralloc_free(device->perf);
close(device->local_fd);
if (device->master_fd >= 0)
close(device->master_fd);
@@ -2657,6 +2660,8 @@ VkResult anv_CreateDevice(
anv_device_init_border_colors(device);
+ anv_device_perf_init(device);
+
*pDevice = anv_device_to_handle(device);
return VK_SUCCESS;
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
index 84284398b6a..c72c23530f8 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -165,6 +165,7 @@ EXTENSIONS = [
Extension('VK_ANDROID_native_buffer', 7, 'ANDROID'),
Extension('VK_GOOGLE_decorate_string', 1, True),
Extension('VK_GOOGLE_hlsl_functionality1', 1, True),
+ Extension('VK_INTEL_performance_query', 1, 'device->perf'),
Extension('VK_NV_compute_shader_derivatives', 1, True),
]
diff --git a/src/intel/vulkan/anv_perf.c b/src/intel/vulkan/anv_perf.c
new file mode 100644
index 00000000000..6a9fb4f6f11
--- /dev/null
+++ b/src/intel/vulkan/anv_perf.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "anv_private.h"
+
+#include "perf/gen_perf.h"
+#include "perf/gen_perf_mdapi.h"
+
+struct gen_perf_config *
+anv_get_perf(const struct gen_device_info *devinfo, int fd)
+{
+ struct gen_perf_config *perf = gen_perf_new(NULL);
+
+ gen_perf_init_metrics(perf, devinfo, fd);
+
+ /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
+ * perf revision 2.
+ */
+ if (anv_gem_get_param(fd, I915_PARAM_PERF_REVISION) < 3)
+ goto err;
+
+ return perf;
+
+ err:
+ ralloc_free(perf);
+ return NULL;
+}
+
+void
+anv_device_perf_init(struct anv_device *device)
+{
+ device->perf_fd = -1;
+}
+
+static int
+anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
+{
+ uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
+ struct drm_i915_perf_open_param param;
+ int p = 0, stream_fd;
+
+ properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
+ properties[p++] = true;
+
+ properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
+ properties[p++] = metric_id;
+
+ properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
+ properties[p++] = device->info.gen >= 8 ?
+ I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
+ I915_OA_FORMAT_A45_B8_C8;
+
+ properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
+ properties[p++] = 31; /* slowest sampling period */
+
+ properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
+ properties[p++] = device->context_id;
+
+ properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
+ properties[p++] = true;
+
+ memset(&param, 0, sizeof(param));
+ param.flags = 0;
+ param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
+ param.properties_ptr = (uintptr_t)properties;
+ param.num_properties = p / 2;
+
+ stream_fd = gen_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, &param);
+ return stream_fd;
+}
+
+VkResult anv_InitializePerformanceApiINTEL(
+ VkDevice _device,
+ const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ const struct anv_physical_device *pdevice = &device->instance->physicalDevice;
+
+ if (!pdevice->perf)
+ return VK_ERROR_EXTENSION_NOT_PRESENT;
+
+ /* Not much to do here */
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetPerformanceParameterINTEL(
+ VkDevice _device,
+ VkPerformanceParameterTypeINTEL parameter,
+ VkPerformanceValueINTEL* pValue)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ const struct anv_physical_device *pdevice = &device->instance->physicalDevice;
+
+ if (!pdevice->perf)
+ return VK_ERROR_EXTENSION_NOT_PRESENT;
+
+ VkResult result = VK_SUCCESS;
+ switch (parameter) {
+ case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
+ pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
+ pValue->data.valueBool = VK_TRUE;
+ break;
+
+ case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
+ pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
+ pValue->data.value32 = 25;
+ break;
+
+ default:
+ result = VK_ERROR_FEATURE_NOT_PRESENT;
+ break;
+ }
+
+ return result;
+}
+
+VkResult anv_CmdSetPerformanceMarkerINTEL(
+ VkCommandBuffer commandBuffer,
+ const VkPerformanceMarkerInfoINTEL* pMarkerInfo)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_AcquirePerformanceConfigurationINTEL(
+ VkDevice _device,
+ const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
+ VkPerformanceConfigurationINTEL* pConfiguration)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ const struct anv_physical_device *pdevice = &device->instance->physicalDevice;
+
+ struct gen_perf_registers *perf_config =
+ gen_perf_load_configuration(pdevice->perf, device->fd,
+ GEN_PERF_QUERY_GUID_MDAPI);
+ if (!perf_config)
+ return VK_INCOMPLETE;
+
+ int ret = gen_perf_store_configuration(pdevice->perf, device->fd,
+ perf_config, NULL /* guid */);
+ if (ret < 0) {
+ ralloc_free(perf_config);
+ return VK_INCOMPLETE;
+ }
+
+ *pConfiguration = (VkPerformanceConfigurationINTEL) (uint64_t) ret;
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_ReleasePerformanceConfigurationINTEL(
+ VkDevice _device,
+ VkPerformanceConfigurationINTEL _configuration)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ uint64_t config = (uint64_t) _configuration;
+
+ gen_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_QueueSetPerformanceConfigurationINTEL(
+ VkQueue _queue,
+ VkPerformanceConfigurationINTEL _configuration)
+{
+ ANV_FROM_HANDLE(anv_queue, queue, _queue);
+ struct anv_device *device = queue->device;
+ uint64_t configuration = (uint64_t) _configuration;
+
+ if (device->perf_fd < 0) {
+ device->perf_fd = anv_device_perf_open(device, configuration);
+ if (device->perf_fd < 0)
+ return VK_ERROR_INITIALIZATION_FAILED;
+ } else {
+ int ret = gen_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
+ (void *)(uintptr_t) _configuration);
+ if (ret < 0) {
+ return anv_device_set_lost(device,
+ "i915-perf config failed: %s",
+ strerror(ret));
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+void anv_UninitializePerformanceApiINTEL(
+ VkDevice _device)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+
+ if (device->perf_fd >= 0) {
+ close(device->perf_fd);
+ device->perf_fd = -1;
+ }
+}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 3aa6d1922f9..aa1f2cbea87 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -74,6 +74,7 @@ struct anv_image_view;
struct anv_instance;
struct gen_l3_config;
+struct gen_perf_config;
#include <vulkan/vulkan.h>
#include <vulkan/vulkan_intel.h>
@@ -948,6 +949,7 @@ struct anv_physical_device {
bool supports_48bit_addresses;
struct brw_compiler * compiler;
struct isl_device isl_dev;
+ struct gen_perf_config * perf;
int cmd_parser_version;
bool has_exec_async;
bool has_exec_capture;
@@ -1169,6 +1171,9 @@ struct anv_device {
* the cmd_buffer's list.
*/
struct anv_cmd_buffer *cmd_buffer_being_decoded;
+
+ int perf_fd; /* -1 if no opened */
+ uint64_t perf_metric; /* 0 if unset */
};
static inline struct anv_state_pool *
@@ -2530,6 +2535,9 @@ struct anv_cmd_buffer {
VkCommandBufferLevel level;
struct anv_cmd_state state;
+
+ /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
+ uint64_t intel_perf_marker;
};
VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
@@ -3750,6 +3758,9 @@ anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)
return subpass_id;
}
+struct gen_perf_config *anv_get_perf(const struct gen_device_info *devinfo, int fd);
+void anv_device_perf_init(struct anv_device *device);
+
#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \
\
static inline struct __anv_type * \
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index dd0f6e20681..ff9c6c79eb9 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -5091,3 +5091,57 @@ void genX(CmdWaitEvents)(
bufferMemoryBarrierCount, pBufferMemoryBarriers,
imageMemoryBarrierCount, pImageMemoryBarriers);
}
+
+VkResult genX(CmdSetPerformanceOverrideINTEL)(
+ VkCommandBuffer commandBuffer,
+ const VkPerformanceOverrideInfoINTEL* pOverrideInfo)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ switch (pOverrideInfo->type) {
+ case VK_PERFORMANCE_OVERRIDE_TYPE_NULL_HARDWARE_INTEL: {
+ uint32_t dw;
+
+#if GEN_GEN >= 9
+ anv_pack_struct(&dw, GENX(CS_DEBUG_MODE2),
+ ._3DRenderingInstructionDisable = pOverrideInfo->enable,
+ .MediaInstructionDisable = pOverrideInfo->enable,
+ ._3DRenderingInstructionDisableMask = true,
+ .MediaInstructionDisableMask = true);
+ emit_lri(&cmd_buffer->batch, GENX(CS_DEBUG_MODE2_num), dw);
+#else
+ anv_pack_struct(&dw, GENX(INSTPM),
+ ._3DRenderingInstructionDisable = pOverrideInfo->enable,
+ .MediaInstructionDisable = pOverrideInfo->enable,
+ ._3DRenderingInstructionDisableMask = true,
+ .MediaInstructionDisableMask = true);
+ emit_lri(&cmd_buffer->batch, GENX(INSTPM_num), dw);
+#endif
+ break;
+ }
+
+ case VK_PERFORMANCE_OVERRIDE_TYPE_FLUSH_GPU_CACHES_INTEL:
+ if (pOverrideInfo->enable) {
+ /* FLUSH ALL THE THINGS! As requested by the MDAPI team. */
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_FLUSH_BITS |
+ ANV_PIPE_INVALIDATE_BITS;
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+ }
+ break;
+
+ default:
+ unreachable("Invalid override");
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult genX(CmdSetPerformanceStreamMarkerINTEL)(
+ VkCommandBuffer commandBuffer,
+ const VkPerformanceStreamMarkerInfoINTEL* pMarkerInfo)
+{
+ /* TODO: Waiting on the register to write, might depend on generation. */
+
+ return VK_SUCCESS;
+}
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index aa0cf8b9471..b3090f20545 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -37,6 +37,10 @@
#define __gen_get_batch_dwords anv_batch_emit_dwords
#define __gen_address_offset anv_address_add
#include "common/gen_mi_builder.h"
+#include "perf/gen_perf.h"
+#include "perf/gen_perf_mdapi.h"
+
+#define OA_REPORT_N_UINT64 (256 / sizeof(uint64_t))
VkResult genX(CreateQueryPool)(
VkDevice _device,
@@ -52,9 +56,14 @@ VkResult genX(CreateQueryPool)(
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
/* Query pool slots are made up of some number of 64-bit values packed
- * tightly together. The first 64-bit value is always the "available" bit
- * which is 0 when the query is unavailable and 1 when it is available.
- * The 64-bit values that follow are determined by the type of query.
+ * tightly together. For most query types have the first 64-bit value is
+ * the "available" bit which is 0 when the query is unavailable and 1 when
+ * it is available. The 64-bit values that follow are determined by the
+ * type of query.
+ *
+ * For performance queries, we have a requirement to align OA reports at
+ * 64bytes so we put those first and have the "available" bit behind
+ * together with some other counters.
*/
uint32_t uint64s_per_slot = 1;
@@ -84,6 +93,15 @@ VkResult genX(CreateQueryPool)(
*/
uint64s_per_slot += 4;
break;
+ case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
+ uint64s_per_slot = 2 * OA_REPORT_N_UINT64; /* begin & end OA reports */
+ uint64s_per_slot += 4; /* PerfCounter 1 & 2 */
+ uint64s_per_slot++; /* 2 * 32bit RPSTAT register */
+ uint64s_per_slot++; /* 64bit marker */
+ uint64s_per_slot++; /* availability */
+ uint64s_per_slot = align_u32(uint64s_per_slot, 8); /* OA reports must be aligned to 64 bytes */
+ break;
+ }
default:
assert(!"Invalid query type");
}
@@ -160,6 +178,57 @@ anv_query_address(struct anv_query_pool *pool, uint32_t query)
};
}
+/**
+ * VK_INTEL_performance_query layout:
+ *
+ * ------------------------------
+ * | end MI_RPC (256b) |
+ * |----------------------------|
+ * | begin MI_RPC (256b) |
+ * |----------------------------|
+ * | begin perfcntr 1 & 2 (16b) |
+ * |----------------------------|
+ * | end perfcntr 1 & 2 (16b) |
+ * |----------------------------|
+ * | begin RPSTAT register (4b) |
+ * |----------------------------|
+ * | end RPSTAT register (4b) |
+ * |----------------------------|
+ * | marker (8b) |
+ * |----------------------------|
+ * | availability (8b) |
+ * ------------------------------
+ */
+
+static uint32_t
+intel_perf_mi_rpc_offset(bool end)
+{
+ return end ? 0 : 256;
+}
+
+static uint32_t
+intel_perf_counter(bool end)
+{
+ uint32_t offset = 512;
+ offset += end ? 2 * sizeof(uint64_t) : 0;
+ return offset;
+}
+
+static uint32_t
+intel_perf_rpstart_offset(bool end)
+{
+ uint32_t offset = intel_perf_counter(false) +
+ 4 * sizeof(uint64_t);
+ offset += end ? sizeof(uint32_t) : 0;
+ return offset;
+}
+
+static uint32_t
+intel_perf_marker_offset(void)
+{
+ return intel_perf_rpstart_offset(false) + sizeof(uint64_t);
+}
+
static void
cpu_write_query_result(void *dst_slot, VkQueryResultFlags flags,
uint32_t value_index, uint64_t result)
@@ -173,18 +242,28 @@ cpu_write_query_result(void *dst_slot, VkQueryResultFlags flags,
}
}
+static void *
+query_slot(struct anv_query_pool *pool, uint32_t query)
+{
+ return pool->bo.map + query * pool->stride;
+}
+
static bool
-query_is_available(uint64_t *slot)
+query_is_available(struct anv_query_pool *pool, uint32_t query)
{
- return *(volatile uint64_t *)slot;
+ if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL) {
+ return *(volatile uint64_t *)((uint8_t *)query_slot(pool, query) +
+ pool->stride - 8);
+ } else
+ return *(volatile uint64_t *)query_slot(pool, query);
}
static VkResult
wait_for_available(struct anv_device *device,
- struct anv_query_pool *pool, uint64_t *slot)
+ struct anv_query_pool *pool, uint32_t query)
{
while (true) {
- if (query_is_available(slot))
+ if (query_is_available(pool, query))
return VK_SUCCESS;
int ret = anv_gem_busy(device, pool->bo.gem_handle);
@@ -197,7 +276,7 @@ wait_for_available(struct anv_device *device,
} else {
assert(ret == 0);
/* The BO is no longer busy. */
- if (query_is_available(slot)) {
+ if (query_is_available(pool, query)) {
return VK_SUCCESS;
} else {
VkResult status = anv_device_query_status(device);
@@ -233,7 +312,8 @@ VkResult genX(GetQueryPoolResults)(
assert(pool->type == VK_QUERY_TYPE_OCCLUSION ||
pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
pool->type == VK_QUERY_TYPE_TIMESTAMP ||
- pool->type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT);
+ pool->type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
+ pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL);
if (anv_device_is_lost(device))
return VK_ERROR_DEVICE_LOST;
@@ -245,13 +325,10 @@ VkResult genX(GetQueryPoolResults)(
VkResult status = VK_SUCCESS;
for (uint32_t i = 0; i < queryCount; i++) {
- uint64_t *slot = pool->bo.map + (firstQuery + i) * pool->stride;
-
- /* Availability is always at the start of the slot */
- bool available = slot[0];
+ bool available = query_is_available(pool, firstQuery + i);
if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) {
- status = wait_for_available(device, pool, slot);
+ status = wait_for_available(device, pool, firstQuery + i);
if (status != VK_SUCCESS)
return status;
@@ -271,13 +348,16 @@ VkResult genX(GetQueryPoolResults)(
uint32_t idx = 0;
switch (pool->type) {
- case VK_QUERY_TYPE_OCCLUSION:
+ case VK_QUERY_TYPE_OCCLUSION: {
+ uint64_t *slot = query_slot(pool, firstQuery + i);
if (write_results)
cpu_write_query_result(pData, flags, idx, slot[2] - slot[1]);
idx++;
break;
+ }
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
+ uint64_t *slot = query_slot(pool, firstQuery + i);
uint32_t statistics = pool->pipeline_statistics;
while (statistics) {
uint32_t stat = u_bit_scan(&statistics);
@@ -297,7 +377,8 @@ VkResult genX(GetQueryPoolResults)(
break;
}
- case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
+ uint64_t *slot = query_slot(pool, firstQuery + i);
if (write_results)
cpu_write_query_result(pData, flags, idx, slot[2] - slot[1]);
idx++;
@@ -305,12 +386,54 @@ VkResult genX(GetQueryPoolResults)(
cpu_write_query_result(pData, flags, idx, slot[4] - slot[3]);
idx++;
break;
+ }
- case VK_QUERY_TYPE_TIMESTAMP:
+ case VK_QUERY_TYPE_TIMESTAMP: {
+ uint64_t *slot = query_slot(pool, firstQuery + i);
if (write_results)
cpu_write_query_result(pData, flags, idx, slot[1]);
idx++;
break;
+ }
+
+ case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
+ if (!write_results)
+ break;
+ const void *query_data = query_slot(pool, firstQuery + i);
+ const uint32_t *oa_begin = query_data + intel_perf_mi_rpc_offset(false);
+ const uint32_t *oa_end = query_data + intel_perf_mi_rpc_offset(true);
+ const uint32_t *rpstat_begin = query_data + intel_perf_rpstart_offset(false);
+ const uint32_t *rpstat_end = query_data + intel_perf_mi_rpc_offset(true);
+ struct gen_perf_query_result result;
+ struct gen_perf_query_info metric = {
+ .oa_format = (GEN_GEN >= 8 ?
+ I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
+ I915_OA_FORMAT_A45_B8_C8),
+ };
+ uint32_t core_freq[2];
+#if GEN_GEN < 9
+ core_freq[0] = ((*rpstat_begin >> 7) & 0x7f) * 1000000ULL;
+ core_freq[1] = ((*rpstat_end >> 7) & 0x7f) * 1000000ULL;
+#else
+ core_freq[0] = ((*rpstat_begin >> 23) & 0x1ff) * 1000000ULL;
+ core_freq[1] = ((*rpstat_end >> 23) & 0x1ff) * 1000000ULL;
+#endif
+ gen_perf_query_result_clear(&result);
+ gen_perf_query_result_accumulate(&result, &metric,
+ oa_begin, oa_end);
+ gen_perf_query_result_read_frequencies(&result, &device->info,
+ oa_begin, oa_end);
+ gen_perf_query_result_write_mdapi(pData, stride,
+ &device->info,
+ &result,
+ core_freq[0], core_freq[1]);
+ gen_perf_query_mdapi_write_perfcntr(pData, stride, &device->info,
+ query_data + intel_perf_counter(false),
+ query_data + intel_perf_counter(true));
+ const uint64_t *marker = query_data + intel_perf_marker_offset();
+ gen_perf_query_mdapi_write_marker(pData, stride, &device->info, *marker);
+ break;
+ }
default:
unreachable("invalid pool type");
@@ -406,6 +529,16 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
}
break;
+ case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL:
+ for (uint32_t i = 0; i < num_queries; i++) {
+ struct anv_address slot_addr =
+ anv_query_address(pool, first_index + i);
+ gen_mi_memset(b, slot_addr, 0, pool->stride - 8);
+ emit_query_mi_availability(b, anv_address_add(slot_addr,
+ pool->stride - 8), true);
+ }
+ break;
+
default:
unreachable("Unsupported query type");
}
@@ -440,6 +573,21 @@ void genX(CmdResetQueryPool)(
break;
}
+ case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
+ struct gen_mi_builder b;
+ gen_mi_builder_init(&b, &cmd_buffer->batch);
+
+ for (uint32_t i = 0; i < queryCount; i++) {
+ emit_query_mi_availability(
+ &b,
+ anv_address_add(
+ anv_query_address(pool, firstQuery + i),
+ pool->stride - 8),
+ false);
+ }
+ break;
+ }
+
default:
unreachable("Unsupported query type");
}
@@ -550,6 +698,37 @@ void genX(CmdBeginQueryIndexedEXT)(
emit_xfb_query(&b, index, anv_address_add(query_addr, 8));
break;
+ case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.CommandStreamerStallEnable = true;
+ pc.StallAtPixelScoreboard = true;
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_REPORT_PERF_COUNT), rpc) {
+ rpc.MemoryAddress =
+ anv_address_add(query_addr, intel_perf_mi_rpc_offset(false));
+ }
+#if GEN_GEN < 9
+ gen_mi_store(&b,
+ gen_mi_mem32(anv_address_add(query_addr,
+ intel_perf_rpstart_offset(false))),
+ gen_mi_reg32(GENX(RPSTAT1_num)));
+#else
+ gen_mi_store(&b,
+ gen_mi_mem32(anv_address_add(query_addr,
+ intel_perf_rpstart_offset(false))),
+ gen_mi_reg32(GENX(RPSTAT0_num)));
+#endif
+#if GEN_GEN >= 8 && GEN_GEN <= 11
+ gen_mi_store(&b, gen_mi_mem64(anv_address_add(query_addr,
+ intel_perf_counter(false))),
+ gen_mi_reg64(GENX(PERFCNT1_num)));
+ gen_mi_store(&b, gen_mi_mem64(anv_address_add(query_addr,
+ intel_perf_counter(false) + 8)),
+ gen_mi_reg64(GENX(PERFCNT2_num)));
+#endif
+ break;
+ }
+
default:
unreachable("");
}
@@ -611,6 +790,45 @@ void genX(CmdEndQueryIndexedEXT)(
emit_query_mi_availability(&b, query_addr, true);
break;
+ case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.CommandStreamerStallEnable = true;
+ pc.StallAtPixelScoreboard = true;
+ }
+ uint32_t marker_offset = intel_perf_marker_offset();
+ gen_mi_store(&b, gen_mi_mem64(anv_address_add(query_addr, marker_offset)),
+ gen_mi_imm(cmd_buffer->intel_perf_marker));
+#if GEN_GEN >= 8 && GEN_GEN <= 11
+ gen_mi_store(&b, gen_mi_mem64(anv_address_add(query_addr, intel_perf_counter(true))),
+ gen_mi_reg64(GENX(PERFCNT1_num)));
+ gen_mi_store(&b, gen_mi_mem64(anv_address_add(query_addr, intel_perf_counter(true) + 8)),
+ gen_mi_reg64(GENX(PERFCNT2_num)));
+#endif
+#if GEN_GEN < 9
+ gen_mi_store(&b,
+ gen_mi_mem32(anv_address_add(query_addr,
+ intel_perf_rpstart_offset(true))),
+ gen_mi_reg32(GENX(RPSTAT1_num)));
+#else
+ gen_mi_store(&b,
+ gen_mi_mem32(anv_address_add(query_addr,
+ intel_perf_rpstart_offset(true))),
+ gen_mi_reg32(GENX(RPSTAT0_num)));
+#endif
+ /* Position the last OA snapshot at the beginning of the query so that
+ * we can tell whether it's ready.
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_REPORT_PERF_COUNT), rpc) {
+ rpc.MemoryAddress = anv_address_add(query_addr,
+ intel_perf_mi_rpc_offset(true));
+ rpc.ReportID = 0xdeadbeef; /* This goes in the first dword */
+ }
+ emit_query_mi_availability(&b,
+ anv_address_add(query_addr, pool->stride - 8),
+ true);
+ break;
+ }
+
default:
unreachable("");
}
diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
index e8db8f44de0..69e472f719b 100644
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -118,6 +118,7 @@ libanv_files = files(
'anv_nir_lower_push_constants.c',
'anv_nir_lower_ycbcr_textures.c',
'anv_pass.c',
+ 'anv_perf.c',
'anv_pipeline.c',
'anv_pipeline_cache.c',
'anv_private.h',
@@ -194,6 +195,7 @@ libvulkan_intel = shared_library(
link_whole : [libanv_common, libanv_gen_libs],
link_with : [
libintel_compiler, libintel_dev, libisl, libblorp, libvulkan_wsi,
+ libintel_perf,
],
dependencies : [
dep_thread, dep_dl, dep_m, anv_deps, idep_libintel_common,
@@ -227,7 +229,7 @@ if with_tests
link_whole : libanv_common,
link_with : [
libanv_gen_libs, libintel_compiler, libintel_common, libintel_dev,
- libisl, libblorp, libvulkan_wsi,
+ libisl, libblorp, libvulkan_wsi, libintel_perf,
],
dependencies : [
dep_thread, dep_dl, dep_m, anv_deps,