diff options
Diffstat (limited to 'src/intel/vulkan/anv_perf.c')
-rw-r--r-- | src/intel/vulkan/anv_perf.c | 81 |
1 files changed, 43 insertions, 38 deletions
diff --git a/src/intel/vulkan/anv_perf.c b/src/intel/vulkan/anv_perf.c index 560da6a7c31..3b23067ab23 100644 --- a/src/intel/vulkan/anv_perf.c +++ b/src/intel/vulkan/anv_perf.c @@ -36,39 +36,21 @@ void anv_physical_device_init_perf(struct anv_physical_device *device, int fd) { - const struct intel_device_info *devinfo = &device->info; - device->perf = NULL; - /* We need self modifying batches. The i915 parser prevents it on - * Gfx7.5 :( maybe one day. - */ - if (devinfo->ver < 8) - return; - struct intel_perf_config *perf = intel_perf_new(NULL); intel_perf_init_metrics(perf, &device->info, fd, false /* pipeline statistics */, true /* register snapshots */); - if (!perf->n_queries) { - if (perf->platform_supported) { - static bool warned_once = false; - - if (!warned_once) { - mesa_logw("Performance support disabled, " - "consider sysctl dev.i915.perf_stream_paranoid=0\n"); - warned_once = true; - } - } + if (!perf->n_queries) goto err; - } /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in * perf revision 2. */ - if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) { + if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { if (!intel_perf_has_hold_preemption(perf)) goto err; } @@ -89,10 +71,13 @@ anv_physical_device_init_perf(struct anv_physical_device *device, int fd) break; case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT: + case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: device->n_perf_query_commands += field->size / 4; break; + default: + unreachable("Unhandled register type"); } } device->n_perf_query_commands *= 2; /* Begin & End */ @@ -124,9 +109,10 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id) properties[p++] = metric_id; properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; - properties[p++] = device->info.ver >= 8 ? - I915_OA_FORMAT_A32u40_A4u32_B8_C8 : - I915_OA_FORMAT_A45_B8_C8; + properties[p++] = + device->info->verx10 >= 125 ? + I915_OA_FORMAT_A24u40_A14u32_B8_C8 : + I915_OA_FORMAT_A32u40_A4u32_B8_C8; properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; properties[p++] = 31; /* slowest sampling period */ @@ -141,8 +127,12 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id) * Gfx11 for instance we use the full EU array. Initially when perf was * enabled we would use only half on Gfx11 because of functional * requirements. + * + * Temporary disable this option on Gfx12.5+, kernel doesn't appear to + * support it. */ - if (intel_perf_has_global_sseu(device->physical->perf)) { + if (intel_perf_has_global_sseu(device->physical->perf) && + device->info->verx10 < 125) { properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU; properties[p++] = (uintptr_t) &device->physical->perf->sseu; } @@ -223,9 +213,9 @@ VkResult anv_AcquirePerformanceConfigurationINTEL( config = vk_object_alloc(&device->vk, NULL, sizeof(*config), VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL); if (!config) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) { + if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { config->register_config = intel_perf_load_configuration(device->physical->perf, device->fd, INTEL_PERF_QUERY_GUID_MDAPI); @@ -258,7 +248,7 @@ VkResult anv_ReleasePerformanceConfigurationINTEL( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration); - if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) + if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id); ralloc_free(config->register_config); @@ -276,7 +266,7 @@ VkResult anv_QueueSetPerformanceConfigurationINTEL( ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration); struct anv_device *device = queue->device; - if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) { + if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { if (device->perf_fd < 0) { device->perf_fd = anv_device_perf_open(device, config->config_id); if (device->perf_fd < 0) @@ -285,7 +275,7 @@ VkResult anv_QueueSetPerformanceConfigurationINTEL( int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG, (void *)(uintptr_t) config->config_id); if (ret < 0) - return anv_device_set_lost(device, "i915-perf config failed: %m"); + return vk_device_set_lost(&device->vk, "i915-perf config failed: %m"); } } @@ -346,15 +336,25 @@ VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( uint32_t desc_count = *pCounterCount; - VK_OUTARRAY_MAKE(out, pCounters, pCounterCount); - VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count); + VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, out, pCounters, pCounterCount); + VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, out_desc, + pCounterDescriptions, &desc_count); + + /* We cannot support performance queries on anything other than RCS, + * because the MI_REPORT_PERF_COUNT command is not available on other + * engines. + */ + struct anv_queue_family *queue_family = + &pdevice->queue.families[queueFamilyIndex]; + if (queue_family->engine_class != INTEL_ENGINE_CLASS_RENDER) + return vk_outarray_status(&out); for (int c = 0; c < (perf ? perf->n_counters : 0); c++) { const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter; - vk_outarray_append(&out, counter) { + vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) { counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units]; - counter->scope = VK_QUERY_SCOPE_COMMAND_KHR; + counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR; counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type]; unsigned char sha1_result[20]; @@ -364,9 +364,12 @@ VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( memcpy(counter->uuid, sha1_result, sizeof(counter->uuid)); } - vk_outarray_append(&out_desc, desc) { + vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) { desc->flags = 0; /* None so far. */ - snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name); + snprintf(desc->name, sizeof(desc->name), "%s", + INTEL_DEBUG(DEBUG_PERF_SYMBOL_NAMES) ? + intel_counter->symbol_name : + intel_counter->name); snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category); snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc); } @@ -405,7 +408,7 @@ VkResult anv_AcquireProfilingLockKHR( assert(device->perf_fd == -1); - if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) { + if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id); if (fd < 0) return VK_TIMEOUT; @@ -420,7 +423,7 @@ void anv_ReleaseProfilingLockKHR( { ANV_FROM_HANDLE(anv_device, device, _device); - if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) { + if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { assert(device->perf_fd >= 0); close(device->perf_fd); } @@ -433,10 +436,12 @@ anv_perf_write_pass_results(struct intel_perf_config *perf, const struct intel_perf_query_result *accumulated_results, union VkPerformanceCounterResultKHR *results) { + const struct intel_perf_query_info *query = pool->pass_query[pass]; + for (uint32_t c = 0; c < pool->n_counters; c++) { const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c]; - if (counter_pass->pass != pass) + if (counter_pass->query != query) continue; switch (pool->pass_query[pass]->kind) { |