summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--meson_options.txt2
-rw-r--r--src/freedreno/ds/fd_pps_driver.cc365
-rw-r--r--src/freedreno/ds/fd_pps_driver.h131
-rw-r--r--src/freedreno/ds/meson.build49
-rw-r--r--src/freedreno/meson.build4
-rw-r--r--src/meson.build1
-rw-r--r--src/tool/pps/meson.build2
-rw-r--r--src/tool/pps/pps_driver.cc9
8 files changed, 561 insertions, 2 deletions
diff --git a/meson_options.txt b/meson_options.txt
index 8ab9309806a..37e5015d748 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -475,6 +475,6 @@ option(
'datasources',
type : 'array',
value : ['auto'],
- choices : ['auto', 'panfrost', 'intel'],
+ choices : ['auto', 'panfrost', 'intel', 'freedreno'],
description: 'List of Perfetto datasources to build. If this is set to `auto`, datasources that can not be build are skipped. Default: [`auto`]'
)
diff --git a/src/freedreno/ds/fd_pps_driver.cc b/src/freedreno/ds/fd_pps_driver.cc
new file mode 100644
index 00000000000..ebcba2d5e79
--- /dev/null
+++ b/src/freedreno/ds/fd_pps_driver.cc
@@ -0,0 +1,365 @@
+/*
+ * Copyright © 2021 Google, Inc.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "fd_pps_driver.h"
+
+#include <cstring>
+#include <iostream>
+#include <perfetto.h>
+
+#include "pps/pps.h"
+#include "pps/pps_algorithm.h"
+
+namespace pps
+{
+
+uint64_t
+FreedrenoDriver::get_min_sampling_period_ns()
+{
+ return 100000;
+}
+
+/*
+TODO this sees like it would be largely the same for a5xx as well
+(ie. same countable names)..
+ */
+void
+FreedrenoDriver::setup_a6xx_counters()
+{
+ /* TODO is there a reason to want more than one group? */
+ CounterGroup group = {};
+ group.name = "counters";
+ groups.clear();
+ counters.clear();
+ countables.clear();
+ enabled_counters.clear();
+ groups.emplace_back(std::move(group));
+
+ /*
+ * Create the countables that we'll be using.
+ */
+
+ auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
+ auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES");
+ auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS");
+ auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
+ auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
+ auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
+ auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES");
+
+ /*
+ * And then setup the derived counters that we are exporting to
+ * pps based on the captured countable values
+ */
+
+ counter("GPU Frequency", Counter::Units::Hertz, [=]() {
+ return PERF_CP_ALWAYS_COUNT / time;
+ }
+ );
+
+ counter("GPU % Utilization", Counter::Units::Percent, [=]() {
+ return 100.0 * (PERF_CP_BUSY_CYCLES / time) / max_freq;
+ }
+ );
+
+ // This one is a bit of a guess, but seems plausible..
+ counter("ALU / Fragment", Counter::Units::None, [=]() {
+ return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
+ PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / PERF_RB_3D_PIXELS;
+ }
+ );
+
+ counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
+ return PERF_TP_L1_CACHELINE_MISSES / time;
+ }
+ );
+
+ counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
+ return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info.num_sp_cores);
+ }
+ );
+
+ // TODO add more.. see https://gpuinspector.dev/docs/gpu-counters/qualcomm
+ // for what blob exposes
+}
+
+/**
+ * Generate an submit the cmdstream to configure the counter/countable
+ * muxing
+ */
+void
+FreedrenoDriver::configure_counters(bool reset, bool wait)
+{
+ struct fd_submit *submit = fd_submit_new(pipe);
+ enum fd_ringbuffer_flags flags =
+ (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
+ struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
+
+ for (auto countable : countables)
+ countable.configure(ring, reset);
+
+ struct fd_submit_fence fence = {};
+ util_queue_fence_init(&fence.ready);
+
+ fd_submit_flush(submit, -1, &fence);
+
+ util_queue_fence_wait(&fence.ready);
+
+ fd_ringbuffer_del(ring);
+ fd_submit_del(submit);
+
+ if (wait)
+ fd_pipe_wait(pipe, &fence.fence);
+}
+
+/**
+ * Read the current counter values and record the time.
+ */
+void
+FreedrenoDriver::collect_countables()
+{
+ last_dump_ts = perfetto::base::GetBootTimeNs().count();
+
+ for (auto countable : countables)
+ countable.collect();
+}
+
+bool
+FreedrenoDriver::init_perfcnt()
+{
+ uint64_t val;
+
+ dev = fd_device_new(drm_device.fd);
+ pipe = fd_pipe_new(dev, FD_PIPE_3D);
+
+ if (fd_pipe_get_param(pipe, FD_GPU_ID, &val)) {
+ PERFETTO_FATAL("Could not get GPU_ID");
+ return false;
+ }
+ gpu_id = val;
+
+ if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
+ PERFETTO_FATAL("Could not get MAX_FREQ");
+ return false;
+ }
+ max_freq = val;
+
+ perfcntrs = fd_perfcntrs(gpu_id, &num_perfcntrs);
+ if (num_perfcntrs == 0) {
+ PERFETTO_FATAL("No hw counters available");
+ return false;
+ }
+
+ assigned_counters.resize(num_perfcntrs);
+ assigned_counters.assign(assigned_counters.size(), 0);
+
+ switch (gpu_id) {
+ case 600 ... 699:
+ setup_a6xx_counters();
+ break;
+ default:
+ PERFETTO_FATAL("Unsupported GPU: a%03u", gpu_id);
+ return false;
+ }
+
+ state.resize(next_countable_id);
+
+ for (auto countable : countables)
+ countable.resolve();
+
+ freedreno_dev_info_init(&info, gpu_id);
+
+ io = fd_dt_find_io();
+ if (!io) {
+ PERFETTO_FATAL("Could not map GPU I/O space");
+ return false;
+ }
+
+ configure_counters(true, true);
+ collect_countables();
+
+ return true;
+}
+
+void
+FreedrenoDriver::enable_counter(const uint32_t counter_id)
+{
+ enabled_counters.push_back(counters[counter_id]);
+}
+
+void
+FreedrenoDriver::enable_all_counters()
+{
+ enabled_counters.reserve(counters.size());
+ for (auto &counter : counters) {
+ enabled_counters.push_back(counter);
+ }
+}
+
+void
+FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
+{
+}
+
+bool
+FreedrenoDriver::dump_perfcnt()
+{
+ auto last_ts = last_dump_ts;
+
+ collect_countables();
+
+ auto elapsed_time_ns = last_dump_ts - last_ts;
+
+ time = (float)elapsed_time_ns / 1000000000.0;
+
+ // TODO we want to do this periodically to keep the GPU awake
+ // (and to ensure we don't loose counter configuration due to
+ // suspend/resume cycle), but we don't' need to do this every
+ // time.. we probably just want to do this every 30-60ms..
+ configure_counters(false, false);
+
+ last_capture_ts = last_dump_ts;
+
+ return true;
+}
+
+uint64_t FreedrenoDriver::next()
+{
+ auto ret = last_capture_ts;
+ last_capture_ts = 0;
+ return ret;
+}
+
+void FreedrenoDriver::disable_perfcnt()
+{
+ /* There isn't really any disable, only reconfiguring which countables
+ * get muxed to which counters
+ */
+}
+
+/*
+ * Countable
+ */
+
+FreedrenoDriver::Countable
+FreedrenoDriver::countable(std::string name)
+{
+ auto countable = Countable(this, name);
+ countables.emplace_back(countable);
+ return countable;
+}
+
+FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
+ : id {d->next_countable_id++}, d {d}, name {name}
+{
+}
+
+/* Emit register writes on ring to configure counter/countable muxing: */
+void
+FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset)
+{
+ const struct fd_perfcntr_countable *countable = d->state[id].countable;
+ const struct fd_perfcntr_counter *counter = d->state[id].counter;
+
+ OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
+
+ if (counter->enable && reset) {
+ OUT_PKT4(ring, counter->enable, 1);
+ OUT_RING(ring, 0);
+ }
+
+ if (counter->clear && reset) {
+ OUT_PKT4(ring, counter->clear, 1);
+ OUT_RING(ring, 1);
+
+ OUT_PKT4(ring, counter->clear, 1);
+ OUT_RING(ring, 0);
+ }
+
+ OUT_PKT4(ring, counter->select_reg, 1);
+ OUT_RING(ring, countable->selector);
+
+ if (counter->enable && reset) {
+ OUT_PKT4(ring, counter->enable, 1);
+ OUT_RING(ring, 1);
+ }
+}
+
+/* Collect current counter value and calculate delta since last sample: */
+void
+FreedrenoDriver::Countable::collect()
+{
+ const struct fd_perfcntr_counter *counter = d->state[id].counter;
+
+ d->state[id].last_value = d->state[id].value;
+
+ uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo;
+ uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi;
+
+ uint32_t lo = *reg_lo;
+ uint32_t hi = *reg_hi;
+
+ d->state[id].value = lo | ((uint64_t)hi << 32);
+}
+
+/* Resolve the countable and assign next counter from it's group: */
+void
+FreedrenoDriver::Countable::resolve()
+{
+ for (unsigned i = 0; i < d->num_perfcntrs; i++) {
+ const struct fd_perfcntr_group *g = &d->perfcntrs[i];
+ for (unsigned j = 0; j < g->num_countables; j++) {
+ const struct fd_perfcntr_countable *c = &g->countables[j];
+ if (name == c->name) {
+ d->state[id].countable = c;
+
+ /* Assign a counter from the same group: */
+ assert(d->assigned_counters[i] < g->num_counters);
+ d->state[id].counter = &g->counters[d->assigned_counters[i]++];
+
+ std::cout << "Countable: " << name << ", group=" << g->name <<
+ ", counter=" << d->assigned_counters[i] - 1 << "\n";
+
+ return;
+ }
+ }
+ }
+ unreachable("no such countable!");
+}
+
+uint64_t
+FreedrenoDriver::Countable::get_value() const
+{
+ return d->state[id].value - d->state[id].last_value;
+}
+
+/*
+ * DerivedCounter
+ */
+
+FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
+ Counter::Units units,
+ std::function<int64_t()> derive)
+ : Counter(d->next_counter_id++, name, 0)
+{
+ std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
+ this->units = units;
+ set_getter([=](const Counter &c, const Driver &d) {
+ return derive();
+ }
+ );
+}
+
+FreedrenoDriver::DerivedCounter
+FreedrenoDriver::counter(std::string name, Counter::Units units,
+ std::function<int64_t()> derive)
+{
+ auto counter = DerivedCounter(this, name, units, derive);
+ counters.emplace_back(counter);
+ return counter;
+}
+
+} // namespace pps
diff --git a/src/freedreno/ds/fd_pps_driver.h b/src/freedreno/ds/fd_pps_driver.h
new file mode 100644
index 00000000000..dc3e1aedcc0
--- /dev/null
+++ b/src/freedreno/ds/fd_pps_driver.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright © 2021 Google, Inc.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#pragma once
+
+#include "pps/pps_driver.h"
+
+#include "common/freedreno_dev_info.h"
+#include "drm/freedreno_drmif.h"
+#include "drm/freedreno_ringbuffer.h"
+#include "perfcntrs/freedreno_dt.h"
+#include "perfcntrs/freedreno_perfcntr.h"
+
+namespace pps
+{
+
+class FreedrenoDriver : public Driver
+{
+public:
+ uint64_t get_min_sampling_period_ns() override;
+ bool init_perfcnt() override;
+ void enable_counter(uint32_t counter_id) override;
+ void enable_all_counters() override;
+ void enable_perfcnt(uint64_t sampling_period_ns) override;
+ void disable_perfcnt() override;
+ bool dump_perfcnt() override;
+ uint64_t next() override;
+
+private:
+ struct fd_device *dev;
+ struct fd_pipe *pipe;
+ uint32_t gpu_id;
+ uint32_t max_freq;
+ uint32_t next_counter_id;
+ uint32_t next_countable_id;
+ uint64_t last_dump_ts = 0;
+ uint64_t last_capture_ts;
+
+ struct freedreno_dev_info info;
+
+ /**
+ * The memory mapped i/o space for counter readback:
+ */
+ void *io;
+
+ const struct fd_perfcntr_group *perfcntrs;
+ unsigned num_perfcntrs;
+
+ /**
+ * The number of counters assigned per perfcntr group, the index
+ * into this matches the index into perfcntrs
+ */
+ std::vector<int> assigned_counters;
+
+ /*
+ * Values that can be used by derived counters evaluation
+ */
+ float time; /* time since last sample in fraction of second */
+// uint32_t cycles; /* the number of clock cycles since last sample */
+
+ void setup_a6xx_counters();
+
+ void configure_counters(bool reset, bool wait);
+ void collect_countables();
+
+ /**
+ * Split out countable mutable state from the class so that copy-
+ * constructor does something sane when lambda derive function
+ * tries to get the countable value.
+ */
+ struct CountableState {
+ uint64_t last_value, value;
+ const struct fd_perfcntr_countable *countable;
+ const struct fd_perfcntr_counter *counter;
+ };
+
+ std::vector<struct CountableState> state;
+
+ /**
+ * Performance counters on adreno consist of sets of counters in various
+ * blocks of the GPU, where each counter can be can be muxed to collect
+ * one of a set of countables.
+ *
+ * But the countables tend to be too low level to be directly useful to
+ * visualize. Instead various combinations of countables are combined
+ * with various formulas to derive the high level "Counter" value exposed
+ * via gfx-pps.
+ *
+ * This class serves to decouple the logic of those formulas from the
+ * details of collecting countable values.
+ */
+ class Countable {
+ public:
+ Countable(FreedrenoDriver *d, std::string name);
+
+ operator int64_t() const { return get_value(); };
+
+ void configure(struct fd_ringbuffer *ring, bool reset);
+ void collect();
+ void resolve();
+
+ private:
+
+ uint64_t get_value() const;
+
+ uint32_t id;
+ FreedrenoDriver *d;
+ std::string name;
+ };
+
+ Countable countable(std::string name);
+
+ std::vector<Countable> countables;
+
+ /**
+ * A derived "Counter" (from pps's perspective)
+ */
+ class DerivedCounter : public Counter {
+ public:
+ DerivedCounter(FreedrenoDriver *d, std::string name, Counter::Units units,
+ std::function<int64_t()> derive);
+ };
+
+ DerivedCounter counter(std::string name, Counter::Units units,
+ std::function<int64_t()> derive);
+};
+
+} // namespace pps
diff --git a/src/freedreno/ds/meson.build b/src/freedreno/ds/meson.build
new file mode 100644
index 00000000000..89d2b7e6eb5
--- /dev/null
+++ b/src/freedreno/ds/meson.build
@@ -0,0 +1,49 @@
+# Copyright © 2021 Collabora, Ltd.
+# Copyright © 2021 Google, Inc
+#
+# SPDX-License-Identifier: MIT
+
+pps_freedreno_lib = static_library(
+ 'freedreno-gpu',
+ sources: [
+ 'fd_pps_driver.cc',
+ 'fd_pps_driver.h',
+ freedreno_xml_header_files,
+ ],
+ include_directories: [
+ inc_tool,
+ inc_src,
+ inc_freedreno,
+ inc_include,
+ ],
+ dependencies: [
+ dep_libdrm,
+ dep_perfetto,
+ ],
+ cpp_args: '-std=c++17'
+)
+
+pps_freedreno_dep = declare_dependency(
+ link_with: [
+ pps_freedreno_lib,
+ libfreedreno_common,
+ libfreedreno_drm,
+ libfreedreno_perfcntrs,
+ ],
+ dependencies: [
+ idep_mesautil,
+ ],
+ include_directories: [
+ inc_tool,
+ inc_src,
+ ],
+ compile_args: [
+ '-DPPS_FREEDRENO',
+ ],
+)
+
+pps_datasources += pps_freedreno_dep
+pps_includes += [
+ inc_include,
+ inc_freedreno,
+]
diff --git a/src/freedreno/meson.build b/src/freedreno/meson.build
index c29b8a3b871..8d1f12b63cf 100644
--- a/src/freedreno/meson.build
+++ b/src/freedreno/meson.build
@@ -50,6 +50,10 @@ subdir('fdl')
subdir('perfcntrs')
subdir('computerator')
+if with_perfetto and (with_datasources.contains('freedreno') or with_datasources.contains('auto'))
+ subdir('ds')
+endif
+
# Everything that depends on rnn requires (indirectly) libxml2:
if dep_libxml2.found()
subdir('rnn')
diff --git a/src/meson.build b/src/meson.build
index a36db2fe18d..190041207bf 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -27,6 +27,7 @@ inc_amd_common = include_directories('amd/common')
inc_amd_common_llvm = include_directories('amd/llvm')
inc_tool = include_directories('tool')
pps_datasources = []
+pps_includes = []
libglsl_util = static_library(
'glsl_util',
diff --git a/src/tool/pps/meson.build b/src/tool/pps/meson.build
index 9ca5e57e003..e0e0361d6f2 100644
--- a/src/tool/pps/meson.build
+++ b/src/tool/pps/meson.build
@@ -19,7 +19,7 @@ pps_deps += pps_datasources
lib_pps = static_library(
'pps',
sources: pps_sources,
- include_directories: [include_pps, inc_src],
+ include_directories: [include_pps, inc_src, pps_includes],
dependencies: pps_deps,
cpp_args: '-std=c++17'
)
diff --git a/src/tool/pps/pps_driver.cc b/src/tool/pps/pps_driver.cc
index 6c7c340a941..b32dcd26ea5 100644
--- a/src/tool/pps/pps_driver.cc
+++ b/src/tool/pps/pps_driver.cc
@@ -13,6 +13,10 @@
#include <iterator>
#include <sstream>
+#ifdef PPS_FREEDRENO
+#include "freedreno/ds/fd_pps_driver.h"
+#endif // PPS_FREEDRENO
+
#include "pps.h"
#include "pps_algorithm.h"
@@ -21,6 +25,11 @@ namespace pps
std::unordered_map<std::string, std::unique_ptr<Driver>> create_supported_drivers()
{
std::unordered_map<std::string, std::unique_ptr<Driver>> map;
+
+#ifdef PPS_FREEDRENO
+ map.emplace("msm", std::make_unique<FreedrenoDriver>());
+#endif // PPS_FREEDRENO
+
return map;
}