diff options
author | Lionel Landwerlin <lionel.g.landwerlin@intel.com> | 2021-11-22 16:24:43 +0200 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-01-14 20:17:44 +0000 |
commit | 6eb554a9c7ea71ebcb4a4933179c994c2b07c814 (patch) | |
tree | e88329e213d2cf797100e846dcca2357eb09cad9 | |
parent | 69df00b33b3ed71c984de35f7a09baf47901f8cb (diff) |
intel/ds: allow user to select metric set at start time
Rather than using always the same metric set, let the user choose when
starting the producer with :
INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Acked-by: Antonio Caggiano <antonio.caggiano@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13996>
-rw-r--r-- | docs/perfetto.rst | 7 | ||||
-rw-r--r-- | src/intel/ds/intel_pps_driver.cc | 148 | ||||
-rw-r--r-- | src/intel/ds/intel_pps_driver.h | 7 | ||||
-rw-r--r-- | src/intel/ds/intel_pps_perf.cc | 24 | ||||
-rw-r--r-- | src/intel/ds/intel_pps_perf.h | 6 |
5 files changed, 94 insertions, 98 deletions
diff --git a/docs/perfetto.rst b/docs/perfetto.rst index 17379e6f770..689031d73e5 100644 --- a/docs/perfetto.rst +++ b/docs/perfetto.rst @@ -154,6 +154,13 @@ Another option to enable access wide data without root permissions would be runn Alternatively using the ``CAP_PERFMON`` permission on the binary should work too. +A particular metric set can also be selected to capture a different +set of HW counters : + +.. code-block:: console + + INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer + Panfrost ^^^^^^^^ diff --git a/src/intel/ds/intel_pps_driver.cc b/src/intel/ds/intel_pps_driver.cc index 2cd399a12b6..56f08dd64fb 100644 --- a/src/intel/ds/intel_pps_driver.cc +++ b/src/intel/ds/intel_pps_driver.cc @@ -58,38 +58,17 @@ IntelDriver::~IntelDriver() void IntelDriver::enable_counter(uint32_t counter_id) { auto &counter = counters[counter_id]; - auto &group = groups[counter.group]; - if (perf->query) { - if (perf->query->symbol_name != group.name) { - PPS_LOG_ERROR( - "Unable to enable metrics from different sets: %u " - "belongs to %s but %s is currently in use.", - counter_id, - perf->query->symbol_name, - group.name.c_str()); - return; - } - } enabled_counters.emplace_back(counter); - if (!perf->query) { - perf->query = perf->find_query_by_name(group.name); - } } void IntelDriver::enable_all_counters() { - // We can only enable one metric set at a time so at least enable one. - for (auto &group : groups) { - if (group.name == "RenderBasic") { - for (uint32_t counter_id : group.counters) { - auto &counter = counters[counter_id]; - enabled_counters.emplace_back(counter); - } - - perf->query = perf->find_query_by_name(group.name); - break; - } + // We should only have one group + assert(groups.size() == 1); + for (uint32_t counter_id : groups[0].counters) { + auto &counter = counters[counter_id]; + enabled_counters.emplace_back(counter); } } @@ -99,49 +78,76 @@ bool IntelDriver::init_perfcnt() perf = std::make_unique<IntelPerf>(drm_device.fd); + const char *metric_set_name = getenv("INTEL_PERFETTO_METRIC_SET"); + + struct intel_perf_query_info *default_query = nullptr; + selected_query = nullptr; for (auto &query : perf->get_queries()) { - // Create group - CounterGroup group = {}; - group.id = groups.size(); - group.name = query->symbol_name; - - for (int i = 0; i < query->n_counters; ++i) { - intel_perf_query_counter &counter = query->counters[i]; - - // Create counter - Counter counter_desc = {}; - counter_desc.id = counters.size(); - counter_desc.name = counter.symbol_name; - counter_desc.group = group.id; - counter_desc.getter = [counter, query, this]( - const Counter &c, const Driver &dri) -> Counter::Value { - switch (counter.data_type) { - case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: - case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: - case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32: - return (int64_t)counter.oa_counter_read_uint64(perf->cfg, query, &perf->result); - break; - case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: - case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: - return counter.oa_counter_read_float(perf->cfg, query, &perf->result); - break; - } - - return {}; - }; - - // Add counter id to the group - group.counters.emplace_back(counter_desc.id); - - // Store counter - counters.emplace_back(std::move(counter_desc)); + if (!strcmp(query->symbol_name, "RenderBasic")) + default_query = query; + if (metric_set_name && !strcmp(query->symbol_name, metric_set_name)) + selected_query = query; + } + + assert(default_query); + + if (!selected_query) { + if (metric_set_name) { + PPS_LOG_ERROR("Available metric sets:"); + for (auto &query : perf->get_queries()) + PPS_LOG_ERROR(" %s", query->symbol_name); + PPS_LOG_FATAL("Metric set '%s' not available.", metric_set_name); } + selected_query = default_query; + } + + PPS_LOG("Using metric set '%s': %s", + selected_query->symbol_name, selected_query->name); + + // Create group + CounterGroup group = {}; + group.id = groups.size(); + group.name = selected_query->symbol_name; + + for (int i = 0; i < selected_query->n_counters; ++i) { + intel_perf_query_counter &counter = selected_query->counters[i]; + + // Create counter + Counter counter_desc = {}; + counter_desc.id = counters.size(); + counter_desc.name = counter.symbol_name; + counter_desc.group = group.id; + counter_desc.getter = [counter, this]( + const Counter &c, const Driver &dri) -> Counter::Value { + switch (counter.data_type) { + case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: + case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: + case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32: + return (int64_t)counter.oa_counter_read_uint64(perf->cfg, + selected_query, + &perf->result); + break; + case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: + case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: + return counter.oa_counter_read_float(perf->cfg, + selected_query, + &perf->result); + break; + } + + return {}; + }; - // Store group - groups.emplace_back(std::move(group)); + // Add counter id to the group + group.counters.emplace_back(counter_desc.id); + + // Store counter + counters.emplace_back(std::move(counter_desc)); } - assert(groups.size() && "Failed to query groups"); + // Store group + groups.emplace_back(std::move(group)); + assert(counters.size() && "Failed to query counters"); // Clear accumulations @@ -154,7 +160,7 @@ void IntelDriver::enable_perfcnt(uint64_t sampling_period_ns) { this->sampling_period_ns = sampling_period_ns; - if (!perf->open(sampling_period_ns)) { + if (!perf->open(sampling_period_ns, selected_query)) { PPS_LOG_FATAL("Failed to open intel perf"); } } @@ -197,7 +203,7 @@ std::vector<PerfRecord> IntelDriver::parse_perf_records(const std::vector<uint8_ // Report is next to the header const uint32_t *report = reinterpret_cast<const uint32_t *>(header + 1); uint64_t gpu_timestamp_ldw = - intel_perf_report_timestamp(&perf->query.value(), report); + intel_perf_report_timestamp(selected_query, report); /* Our HW only provides us with the lower 32 bits of the 36bits * timestamp counter value. If we haven't captured the top bits yet, @@ -292,11 +298,11 @@ uint64_t IntelDriver::gpu_next() auto record_b = reinterpret_cast<const drm_i915_perf_record_header *>(records[1].data.data()); intel_perf_query_result_accumulate_fields(&perf->result, - &perf->query.value(), - &perf->devinfo, - record_a + 1, - record_b + 1, - false /* no_oa_accumulate */); + selected_query, + &perf->devinfo, + record_a + 1, + record_b + 1, + false /* no_oa_accumulate */); // Get last timestamp auto gpu_timestamp = records[1].timestamp; diff --git a/src/intel/ds/intel_pps_driver.h b/src/intel/ds/intel_pps_driver.h index 404fc651588..fb02327c39f 100644 --- a/src/intel/ds/intel_pps_driver.h +++ b/src/intel/ds/intel_pps_driver.h @@ -9,6 +9,10 @@ #include <pps/pps_driver.h> +extern "C" { +struct intel_perf_query_info; +}; + namespace pps { @@ -82,6 +86,9 @@ class IntelDriver : public Driver // Gpu clock ID used to correlate GPU/CPU timestamps uint32_t clock_id = 0; + + // Selected query + intel_perf_query_info *selected_query = nullptr; }; } // namespace pps diff --git a/src/intel/ds/intel_pps_perf.cc b/src/intel/ds/intel_pps_perf.cc index 441d06cd584..f72b80a9c6b 100644 --- a/src/intel/ds/intel_pps_perf.cc +++ b/src/intel/ds/intel_pps_perf.cc @@ -36,13 +36,6 @@ IntelPerf::IntelPerf(const int drm_fd) false, // no pipeline statistics false // no register snapshots ); - - // Enable RenderBasic counters - auto query_name = "RenderBasic"; - query = find_query_by_name(query_name); - if (!query) { - PPS_LOG_FATAL("Failed to find %s query", query_name); - } } IntelPerf::~IntelPerf() @@ -58,20 +51,6 @@ IntelPerf::~IntelPerf() } } -/// @return A query info, which is something like a group of counters -std::optional<struct intel_perf_query_info> IntelPerf::find_query_by_name( - const std::string &name) const -{ - for (int i = 0; i < cfg->n_queries; ++i) { - struct intel_perf_query_info query = cfg->queries[i]; - if (name == query.symbol_name) { - return query; - } - } - - return std::nullopt; -} - std::vector<struct intel_perf_query_info *> IntelPerf::get_queries() const { assert(cfg && "Intel perf config should be valid"); @@ -98,7 +77,8 @@ static uint32_t get_oa_exponent(const intel_device_info *devinfo, const uint64_t return static_cast<uint32_t>(log2(sampling_period_ns * devinfo->timestamp_frequency / 1000000000ull)) - 1; } -bool IntelPerf::open(const uint64_t sampling_period_ns) +bool IntelPerf::open(const uint64_t sampling_period_ns, + struct intel_perf_query_info *query) { assert(!ctx && "Perf context should not be initialized at this point"); diff --git a/src/intel/ds/intel_pps_perf.h b/src/intel/ds/intel_pps_perf.h index 0c2831577a5..2db622ae466 100644 --- a/src/intel/ds/intel_pps_perf.h +++ b/src/intel/ds/intel_pps_perf.h @@ -23,11 +23,9 @@ class IntelPerf IntelPerf(int drm_fd); ~IntelPerf(); - std::optional<struct intel_perf_query_info> find_query_by_name(const std::string &name) const; - std::vector<struct intel_perf_query_info*> get_queries() const; - bool open(uint64_t sampling_period_ns); + bool open(uint64_t sampling_period_ns, struct intel_perf_query_info *query); void close(); bool oa_stream_ready() const; @@ -45,8 +43,6 @@ class IntelPerf struct intel_perf_query_result result = {}; struct intel_device_info devinfo = {}; - - std::optional<struct intel_perf_query_info> query = std::nullopt; }; } // namespace pps |