summaryrefslogtreecommitdiff
path: root/src/intel/vulkan/anv_perf.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/intel/vulkan/anv_perf.c')
-rw-r--r--src/intel/vulkan/anv_perf.c81
1 files changed, 43 insertions, 38 deletions
diff --git a/src/intel/vulkan/anv_perf.c b/src/intel/vulkan/anv_perf.c
index 560da6a7c31..3b23067ab23 100644
--- a/src/intel/vulkan/anv_perf.c
+++ b/src/intel/vulkan/anv_perf.c
@@ -36,39 +36,21 @@
void
anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
{
- const struct intel_device_info *devinfo = &device->info;
-
device->perf = NULL;
- /* We need self modifying batches. The i915 parser prevents it on
- * Gfx7.5 :( maybe one day.
- */
- if (devinfo->ver < 8)
- return;
-
struct intel_perf_config *perf = intel_perf_new(NULL);
intel_perf_init_metrics(perf, &device->info, fd,
false /* pipeline statistics */,
true /* register snapshots */);
- if (!perf->n_queries) {
- if (perf->platform_supported) {
- static bool warned_once = false;
-
- if (!warned_once) {
- mesa_logw("Performance support disabled, "
- "consider sysctl dev.i915.perf_stream_paranoid=0\n");
- warned_once = true;
- }
- }
+ if (!perf->n_queries)
goto err;
- }
/* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
* perf revision 2.
*/
- if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
+ if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
if (!intel_perf_has_hold_preemption(perf))
goto err;
}
@@ -89,10 +71,13 @@ anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
break;
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
+ case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
device->n_perf_query_commands += field->size / 4;
break;
+ default:
+ unreachable("Unhandled register type");
}
}
device->n_perf_query_commands *= 2; /* Begin & End */
@@ -124,9 +109,10 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
properties[p++] = metric_id;
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
- properties[p++] = device->info.ver >= 8 ?
- I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
- I915_OA_FORMAT_A45_B8_C8;
+ properties[p++] =
+ device->info->verx10 >= 125 ?
+ I915_OA_FORMAT_A24u40_A14u32_B8_C8 :
+ I915_OA_FORMAT_A32u40_A4u32_B8_C8;
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
properties[p++] = 31; /* slowest sampling period */
@@ -141,8 +127,12 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
* Gfx11 for instance we use the full EU array. Initially when perf was
* enabled we would use only half on Gfx11 because of functional
* requirements.
+ *
+ * Temporary disable this option on Gfx12.5+, kernel doesn't appear to
+ * support it.
*/
- if (intel_perf_has_global_sseu(device->physical->perf)) {
+ if (intel_perf_has_global_sseu(device->physical->perf) &&
+ device->info->verx10 < 125) {
properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
properties[p++] = (uintptr_t) &device->physical->perf->sseu;
}
@@ -223,9 +213,9 @@ VkResult anv_AcquirePerformanceConfigurationINTEL(
config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
if (!config)
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
- if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
+ if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
config->register_config =
intel_perf_load_configuration(device->physical->perf, device->fd,
INTEL_PERF_QUERY_GUID_MDAPI);
@@ -258,7 +248,7 @@ VkResult anv_ReleasePerformanceConfigurationINTEL(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
- if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG))
+ if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
ralloc_free(config->register_config);
@@ -276,7 +266,7 @@ VkResult anv_QueueSetPerformanceConfigurationINTEL(
ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
struct anv_device *device = queue->device;
- if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
+ if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
if (device->perf_fd < 0) {
device->perf_fd = anv_device_perf_open(device, config->config_id);
if (device->perf_fd < 0)
@@ -285,7 +275,7 @@ VkResult anv_QueueSetPerformanceConfigurationINTEL(
int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
(void *)(uintptr_t) config->config_id);
if (ret < 0)
- return anv_device_set_lost(device, "i915-perf config failed: %m");
+ return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
}
}
@@ -346,15 +336,25 @@ VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
uint32_t desc_count = *pCounterCount;
- VK_OUTARRAY_MAKE(out, pCounters, pCounterCount);
- VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);
+ VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, out, pCounters, pCounterCount);
+ VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, out_desc,
+ pCounterDescriptions, &desc_count);
+
+ /* We cannot support performance queries on anything other than RCS,
+ * because the MI_REPORT_PERF_COUNT command is not available on other
+ * engines.
+ */
+ struct anv_queue_family *queue_family =
+ &pdevice->queue.families[queueFamilyIndex];
+ if (queue_family->engine_class != INTEL_ENGINE_CLASS_RENDER)
+ return vk_outarray_status(&out);
for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
- vk_outarray_append(&out, counter) {
+ vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
- counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
+ counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
unsigned char sha1_result[20];
@@ -364,9 +364,12 @@ VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
}
- vk_outarray_append(&out_desc, desc) {
+ vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) {
desc->flags = 0; /* None so far. */
- snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
+ snprintf(desc->name, sizeof(desc->name), "%s",
+ INTEL_DEBUG(DEBUG_PERF_SYMBOL_NAMES) ?
+ intel_counter->symbol_name :
+ intel_counter->name);
snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
}
@@ -405,7 +408,7 @@ VkResult anv_AcquireProfilingLockKHR(
assert(device->perf_fd == -1);
- if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
+ if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
if (fd < 0)
return VK_TIMEOUT;
@@ -420,7 +423,7 @@ void anv_ReleaseProfilingLockKHR(
{
ANV_FROM_HANDLE(anv_device, device, _device);
- if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
+ if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
assert(device->perf_fd >= 0);
close(device->perf_fd);
}
@@ -433,10 +436,12 @@ anv_perf_write_pass_results(struct intel_perf_config *perf,
const struct intel_perf_query_result *accumulated_results,
union VkPerformanceCounterResultKHR *results)
{
+ const struct intel_perf_query_info *query = pool->pass_query[pass];
+
for (uint32_t c = 0; c < pool->n_counters; c++) {
const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
- if (counter_pass->pass != pass)
+ if (counter_pass->query != query)
continue;
switch (pool->pass_query[pass]->kind) {