summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>2021-11-22 16:24:43 +0200
committerMarge Bot <emma+marge@anholt.net>2022-01-14 20:17:44 +0000
commit6eb554a9c7ea71ebcb4a4933179c994c2b07c814 (patch)
treee88329e213d2cf797100e846dcca2357eb09cad9
parent69df00b33b3ed71c984de35f7a09baf47901f8cb (diff)
intel/ds: allow user to select metric set at start time
Rather than using always the same metric set, let the user choose when starting the producer with : INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Rohan Garg <rohan.garg@intel.com> Acked-by: Antonio Caggiano <antonio.caggiano@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13996>
-rw-r--r--docs/perfetto.rst7
-rw-r--r--src/intel/ds/intel_pps_driver.cc148
-rw-r--r--src/intel/ds/intel_pps_driver.h7
-rw-r--r--src/intel/ds/intel_pps_perf.cc24
-rw-r--r--src/intel/ds/intel_pps_perf.h6
5 files changed, 94 insertions, 98 deletions
diff --git a/docs/perfetto.rst b/docs/perfetto.rst
index 17379e6f770..689031d73e5 100644
--- a/docs/perfetto.rst
+++ b/docs/perfetto.rst
@@ -154,6 +154,13 @@ Another option to enable access wide data without root permissions would be runn
Alternatively using the ``CAP_PERFMON`` permission on the binary should work too.
+A particular metric set can also be selected to capture a different
+set of HW counters :
+
+.. code-block:: console
+
+ INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer
+
Panfrost
^^^^^^^^
diff --git a/src/intel/ds/intel_pps_driver.cc b/src/intel/ds/intel_pps_driver.cc
index 2cd399a12b6..56f08dd64fb 100644
--- a/src/intel/ds/intel_pps_driver.cc
+++ b/src/intel/ds/intel_pps_driver.cc
@@ -58,38 +58,17 @@ IntelDriver::~IntelDriver()
void IntelDriver::enable_counter(uint32_t counter_id)
{
auto &counter = counters[counter_id];
- auto &group = groups[counter.group];
- if (perf->query) {
- if (perf->query->symbol_name != group.name) {
- PPS_LOG_ERROR(
- "Unable to enable metrics from different sets: %u "
- "belongs to %s but %s is currently in use.",
- counter_id,
- perf->query->symbol_name,
- group.name.c_str());
- return;
- }
- }
enabled_counters.emplace_back(counter);
- if (!perf->query) {
- perf->query = perf->find_query_by_name(group.name);
- }
}
void IntelDriver::enable_all_counters()
{
- // We can only enable one metric set at a time so at least enable one.
- for (auto &group : groups) {
- if (group.name == "RenderBasic") {
- for (uint32_t counter_id : group.counters) {
- auto &counter = counters[counter_id];
- enabled_counters.emplace_back(counter);
- }
-
- perf->query = perf->find_query_by_name(group.name);
- break;
- }
+ // We should only have one group
+ assert(groups.size() == 1);
+ for (uint32_t counter_id : groups[0].counters) {
+ auto &counter = counters[counter_id];
+ enabled_counters.emplace_back(counter);
}
}
@@ -99,49 +78,76 @@ bool IntelDriver::init_perfcnt()
perf = std::make_unique<IntelPerf>(drm_device.fd);
+ const char *metric_set_name = getenv("INTEL_PERFETTO_METRIC_SET");
+
+ struct intel_perf_query_info *default_query = nullptr;
+ selected_query = nullptr;
for (auto &query : perf->get_queries()) {
- // Create group
- CounterGroup group = {};
- group.id = groups.size();
- group.name = query->symbol_name;
-
- for (int i = 0; i < query->n_counters; ++i) {
- intel_perf_query_counter &counter = query->counters[i];
-
- // Create counter
- Counter counter_desc = {};
- counter_desc.id = counters.size();
- counter_desc.name = counter.symbol_name;
- counter_desc.group = group.id;
- counter_desc.getter = [counter, query, this](
- const Counter &c, const Driver &dri) -> Counter::Value {
- switch (counter.data_type) {
- case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
- case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
- case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
- return (int64_t)counter.oa_counter_read_uint64(perf->cfg, query, &perf->result);
- break;
- case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
- case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
- return counter.oa_counter_read_float(perf->cfg, query, &perf->result);
- break;
- }
-
- return {};
- };
-
- // Add counter id to the group
- group.counters.emplace_back(counter_desc.id);
-
- // Store counter
- counters.emplace_back(std::move(counter_desc));
+ if (!strcmp(query->symbol_name, "RenderBasic"))
+ default_query = query;
+ if (metric_set_name && !strcmp(query->symbol_name, metric_set_name))
+ selected_query = query;
+ }
+
+ assert(default_query);
+
+ if (!selected_query) {
+ if (metric_set_name) {
+ PPS_LOG_ERROR("Available metric sets:");
+ for (auto &query : perf->get_queries())
+ PPS_LOG_ERROR(" %s", query->symbol_name);
+ PPS_LOG_FATAL("Metric set '%s' not available.", metric_set_name);
}
+ selected_query = default_query;
+ }
+
+ PPS_LOG("Using metric set '%s': %s",
+ selected_query->symbol_name, selected_query->name);
+
+ // Create group
+ CounterGroup group = {};
+ group.id = groups.size();
+ group.name = selected_query->symbol_name;
+
+ for (int i = 0; i < selected_query->n_counters; ++i) {
+ intel_perf_query_counter &counter = selected_query->counters[i];
+
+ // Create counter
+ Counter counter_desc = {};
+ counter_desc.id = counters.size();
+ counter_desc.name = counter.symbol_name;
+ counter_desc.group = group.id;
+ counter_desc.getter = [counter, this](
+ const Counter &c, const Driver &dri) -> Counter::Value {
+ switch (counter.data_type) {
+ case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
+ case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
+ case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
+ return (int64_t)counter.oa_counter_read_uint64(perf->cfg,
+ selected_query,
+ &perf->result);
+ break;
+ case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
+ case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
+ return counter.oa_counter_read_float(perf->cfg,
+ selected_query,
+ &perf->result);
+ break;
+ }
+
+ return {};
+ };
- // Store group
- groups.emplace_back(std::move(group));
+ // Add counter id to the group
+ group.counters.emplace_back(counter_desc.id);
+
+ // Store counter
+ counters.emplace_back(std::move(counter_desc));
}
- assert(groups.size() && "Failed to query groups");
+ // Store group
+ groups.emplace_back(std::move(group));
+
assert(counters.size() && "Failed to query counters");
// Clear accumulations
@@ -154,7 +160,7 @@ void IntelDriver::enable_perfcnt(uint64_t sampling_period_ns)
{
this->sampling_period_ns = sampling_period_ns;
- if (!perf->open(sampling_period_ns)) {
+ if (!perf->open(sampling_period_ns, selected_query)) {
PPS_LOG_FATAL("Failed to open intel perf");
}
}
@@ -197,7 +203,7 @@ std::vector<PerfRecord> IntelDriver::parse_perf_records(const std::vector<uint8_
// Report is next to the header
const uint32_t *report = reinterpret_cast<const uint32_t *>(header + 1);
uint64_t gpu_timestamp_ldw =
- intel_perf_report_timestamp(&perf->query.value(), report);
+ intel_perf_report_timestamp(selected_query, report);
/* Our HW only provides us with the lower 32 bits of the 36bits
* timestamp counter value. If we haven't captured the top bits yet,
@@ -292,11 +298,11 @@ uint64_t IntelDriver::gpu_next()
auto record_b = reinterpret_cast<const drm_i915_perf_record_header *>(records[1].data.data());
intel_perf_query_result_accumulate_fields(&perf->result,
- &perf->query.value(),
- &perf->devinfo,
- record_a + 1,
- record_b + 1,
- false /* no_oa_accumulate */);
+ selected_query,
+ &perf->devinfo,
+ record_a + 1,
+ record_b + 1,
+ false /* no_oa_accumulate */);
// Get last timestamp
auto gpu_timestamp = records[1].timestamp;
diff --git a/src/intel/ds/intel_pps_driver.h b/src/intel/ds/intel_pps_driver.h
index 404fc651588..fb02327c39f 100644
--- a/src/intel/ds/intel_pps_driver.h
+++ b/src/intel/ds/intel_pps_driver.h
@@ -9,6 +9,10 @@
#include <pps/pps_driver.h>
+extern "C" {
+struct intel_perf_query_info;
+};
+
namespace pps
{
@@ -82,6 +86,9 @@ class IntelDriver : public Driver
// Gpu clock ID used to correlate GPU/CPU timestamps
uint32_t clock_id = 0;
+
+ // Selected query
+ intel_perf_query_info *selected_query = nullptr;
};
} // namespace pps
diff --git a/src/intel/ds/intel_pps_perf.cc b/src/intel/ds/intel_pps_perf.cc
index 441d06cd584..f72b80a9c6b 100644
--- a/src/intel/ds/intel_pps_perf.cc
+++ b/src/intel/ds/intel_pps_perf.cc
@@ -36,13 +36,6 @@ IntelPerf::IntelPerf(const int drm_fd)
false, // no pipeline statistics
false // no register snapshots
);
-
- // Enable RenderBasic counters
- auto query_name = "RenderBasic";
- query = find_query_by_name(query_name);
- if (!query) {
- PPS_LOG_FATAL("Failed to find %s query", query_name);
- }
}
IntelPerf::~IntelPerf()
@@ -58,20 +51,6 @@ IntelPerf::~IntelPerf()
}
}
-/// @return A query info, which is something like a group of counters
-std::optional<struct intel_perf_query_info> IntelPerf::find_query_by_name(
- const std::string &name) const
-{
- for (int i = 0; i < cfg->n_queries; ++i) {
- struct intel_perf_query_info query = cfg->queries[i];
- if (name == query.symbol_name) {
- return query;
- }
- }
-
- return std::nullopt;
-}
-
std::vector<struct intel_perf_query_info *> IntelPerf::get_queries() const
{
assert(cfg && "Intel perf config should be valid");
@@ -98,7 +77,8 @@ static uint32_t get_oa_exponent(const intel_device_info *devinfo, const uint64_t
return static_cast<uint32_t>(log2(sampling_period_ns * devinfo->timestamp_frequency / 1000000000ull)) - 1;
}
-bool IntelPerf::open(const uint64_t sampling_period_ns)
+bool IntelPerf::open(const uint64_t sampling_period_ns,
+ struct intel_perf_query_info *query)
{
assert(!ctx && "Perf context should not be initialized at this point");
diff --git a/src/intel/ds/intel_pps_perf.h b/src/intel/ds/intel_pps_perf.h
index 0c2831577a5..2db622ae466 100644
--- a/src/intel/ds/intel_pps_perf.h
+++ b/src/intel/ds/intel_pps_perf.h
@@ -23,11 +23,9 @@ class IntelPerf
IntelPerf(int drm_fd);
~IntelPerf();
- std::optional<struct intel_perf_query_info> find_query_by_name(const std::string &name) const;
-
std::vector<struct intel_perf_query_info*> get_queries() const;
- bool open(uint64_t sampling_period_ns);
+ bool open(uint64_t sampling_period_ns, struct intel_perf_query_info *query);
void close();
bool oa_stream_ready() const;
@@ -45,8 +43,6 @@ class IntelPerf
struct intel_perf_query_result result = {};
struct intel_device_info devinfo = {};
-
- std::optional<struct intel_perf_query_info> query = std::nullopt;
};
} // namespace pps