diff options
-rw-r--r-- | meson_options.txt | 2 | ||||
-rw-r--r-- | src/freedreno/ds/fd_pps_driver.cc | 365 | ||||
-rw-r--r-- | src/freedreno/ds/fd_pps_driver.h | 131 | ||||
-rw-r--r-- | src/freedreno/ds/meson.build | 49 | ||||
-rw-r--r-- | src/freedreno/meson.build | 4 | ||||
-rw-r--r-- | src/meson.build | 1 | ||||
-rw-r--r-- | src/tool/pps/meson.build | 2 | ||||
-rw-r--r-- | src/tool/pps/pps_driver.cc | 9 |
8 files changed, 561 insertions, 2 deletions
diff --git a/meson_options.txt b/meson_options.txt index 8ab9309806a..37e5015d748 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -475,6 +475,6 @@ option( 'datasources', type : 'array', value : ['auto'], - choices : ['auto', 'panfrost', 'intel'], + choices : ['auto', 'panfrost', 'intel', 'freedreno'], description: 'List of Perfetto datasources to build. If this is set to `auto`, datasources that can not be build are skipped. Default: [`auto`]' ) diff --git a/src/freedreno/ds/fd_pps_driver.cc b/src/freedreno/ds/fd_pps_driver.cc new file mode 100644 index 00000000000..ebcba2d5e79 --- /dev/null +++ b/src/freedreno/ds/fd_pps_driver.cc @@ -0,0 +1,365 @@ +/* + * Copyright © 2021 Google, Inc. + * + * SPDX-License-Identifier: MIT + */ + +#include "fd_pps_driver.h" + +#include <cstring> +#include <iostream> +#include <perfetto.h> + +#include "pps/pps.h" +#include "pps/pps_algorithm.h" + +namespace pps +{ + +uint64_t +FreedrenoDriver::get_min_sampling_period_ns() +{ + return 100000; +} + +/* +TODO this sees like it would be largely the same for a5xx as well +(ie. same countable names).. + */ +void +FreedrenoDriver::setup_a6xx_counters() +{ + /* TODO is there a reason to want more than one group? */ + CounterGroup group = {}; + group.name = "counters"; + groups.clear(); + counters.clear(); + countables.clear(); + enabled_counters.clear(); + groups.emplace_back(std::move(group)); + + /* + * Create the countables that we'll be using. + */ + + auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT"); + auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES"); + auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS"); + auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS"); + auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS"); + auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES"); + auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES"); + + /* + * And then setup the derived counters that we are exporting to + * pps based on the captured countable values + */ + + counter("GPU Frequency", Counter::Units::Hertz, [=]() { + return PERF_CP_ALWAYS_COUNT / time; + } + ); + + counter("GPU % Utilization", Counter::Units::Percent, [=]() { + return 100.0 * (PERF_CP_BUSY_CYCLES / time) / max_freq; + } + ); + + // This one is a bit of a guess, but seems plausible.. + counter("ALU / Fragment", Counter::Units::None, [=]() { + return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS + + PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / PERF_RB_3D_PIXELS; + } + ); + + counter("TP L1 Cache Misses", Counter::Units::None, [=]() { + return PERF_TP_L1_CACHELINE_MISSES / time; + } + ); + + counter("Shader Core Utilization", Counter::Units::Percent, [=]() { + return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info.num_sp_cores); + } + ); + + // TODO add more.. see https://gpuinspector.dev/docs/gpu-counters/qualcomm + // for what blob exposes +} + +/** + * Generate an submit the cmdstream to configure the counter/countable + * muxing + */ +void +FreedrenoDriver::configure_counters(bool reset, bool wait) +{ + struct fd_submit *submit = fd_submit_new(pipe); + enum fd_ringbuffer_flags flags = + (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE); + struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags); + + for (auto countable : countables) + countable.configure(ring, reset); + + struct fd_submit_fence fence = {}; + util_queue_fence_init(&fence.ready); + + fd_submit_flush(submit, -1, &fence); + + util_queue_fence_wait(&fence.ready); + + fd_ringbuffer_del(ring); + fd_submit_del(submit); + + if (wait) + fd_pipe_wait(pipe, &fence.fence); +} + +/** + * Read the current counter values and record the time. + */ +void +FreedrenoDriver::collect_countables() +{ + last_dump_ts = perfetto::base::GetBootTimeNs().count(); + + for (auto countable : countables) + countable.collect(); +} + +bool +FreedrenoDriver::init_perfcnt() +{ + uint64_t val; + + dev = fd_device_new(drm_device.fd); + pipe = fd_pipe_new(dev, FD_PIPE_3D); + + if (fd_pipe_get_param(pipe, FD_GPU_ID, &val)) { + PERFETTO_FATAL("Could not get GPU_ID"); + return false; + } + gpu_id = val; + + if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) { + PERFETTO_FATAL("Could not get MAX_FREQ"); + return false; + } + max_freq = val; + + perfcntrs = fd_perfcntrs(gpu_id, &num_perfcntrs); + if (num_perfcntrs == 0) { + PERFETTO_FATAL("No hw counters available"); + return false; + } + + assigned_counters.resize(num_perfcntrs); + assigned_counters.assign(assigned_counters.size(), 0); + + switch (gpu_id) { + case 600 ... 699: + setup_a6xx_counters(); + break; + default: + PERFETTO_FATAL("Unsupported GPU: a%03u", gpu_id); + return false; + } + + state.resize(next_countable_id); + + for (auto countable : countables) + countable.resolve(); + + freedreno_dev_info_init(&info, gpu_id); + + io = fd_dt_find_io(); + if (!io) { + PERFETTO_FATAL("Could not map GPU I/O space"); + return false; + } + + configure_counters(true, true); + collect_countables(); + + return true; +} + +void +FreedrenoDriver::enable_counter(const uint32_t counter_id) +{ + enabled_counters.push_back(counters[counter_id]); +} + +void +FreedrenoDriver::enable_all_counters() +{ + enabled_counters.reserve(counters.size()); + for (auto &counter : counters) { + enabled_counters.push_back(counter); + } +} + +void +FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */) +{ +} + +bool +FreedrenoDriver::dump_perfcnt() +{ + auto last_ts = last_dump_ts; + + collect_countables(); + + auto elapsed_time_ns = last_dump_ts - last_ts; + + time = (float)elapsed_time_ns / 1000000000.0; + + // TODO we want to do this periodically to keep the GPU awake + // (and to ensure we don't loose counter configuration due to + // suspend/resume cycle), but we don't' need to do this every + // time.. we probably just want to do this every 30-60ms.. + configure_counters(false, false); + + last_capture_ts = last_dump_ts; + + return true; +} + +uint64_t FreedrenoDriver::next() +{ + auto ret = last_capture_ts; + last_capture_ts = 0; + return ret; +} + +void FreedrenoDriver::disable_perfcnt() +{ + /* There isn't really any disable, only reconfiguring which countables + * get muxed to which counters + */ +} + +/* + * Countable + */ + +FreedrenoDriver::Countable +FreedrenoDriver::countable(std::string name) +{ + auto countable = Countable(this, name); + countables.emplace_back(countable); + return countable; +} + +FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name) + : id {d->next_countable_id++}, d {d}, name {name} +{ +} + +/* Emit register writes on ring to configure counter/countable muxing: */ +void +FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset) +{ + const struct fd_perfcntr_countable *countable = d->state[id].countable; + const struct fd_perfcntr_counter *counter = d->state[id].counter; + + OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); + + if (counter->enable && reset) { + OUT_PKT4(ring, counter->enable, 1); + OUT_RING(ring, 0); + } + + if (counter->clear && reset) { + OUT_PKT4(ring, counter->clear, 1); + OUT_RING(ring, 1); + + OUT_PKT4(ring, counter->clear, 1); + OUT_RING(ring, 0); + } + + OUT_PKT4(ring, counter->select_reg, 1); + OUT_RING(ring, countable->selector); + + if (counter->enable && reset) { + OUT_PKT4(ring, counter->enable, 1); + OUT_RING(ring, 1); + } +} + +/* Collect current counter value and calculate delta since last sample: */ +void +FreedrenoDriver::Countable::collect() +{ + const struct fd_perfcntr_counter *counter = d->state[id].counter; + + d->state[id].last_value = d->state[id].value; + + uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo; + uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi; + + uint32_t lo = *reg_lo; + uint32_t hi = *reg_hi; + + d->state[id].value = lo | ((uint64_t)hi << 32); +} + +/* Resolve the countable and assign next counter from it's group: */ +void +FreedrenoDriver::Countable::resolve() +{ + for (unsigned i = 0; i < d->num_perfcntrs; i++) { + const struct fd_perfcntr_group *g = &d->perfcntrs[i]; + for (unsigned j = 0; j < g->num_countables; j++) { + const struct fd_perfcntr_countable *c = &g->countables[j]; + if (name == c->name) { + d->state[id].countable = c; + + /* Assign a counter from the same group: */ + assert(d->assigned_counters[i] < g->num_counters); + d->state[id].counter = &g->counters[d->assigned_counters[i]++]; + + std::cout << "Countable: " << name << ", group=" << g->name << + ", counter=" << d->assigned_counters[i] - 1 << "\n"; + + return; + } + } + } + unreachable("no such countable!"); +} + +uint64_t +FreedrenoDriver::Countable::get_value() const +{ + return d->state[id].value - d->state[id].last_value; +} + +/* + * DerivedCounter + */ + +FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name, + Counter::Units units, + std::function<int64_t()> derive) + : Counter(d->next_counter_id++, name, 0) +{ + std::cout << "DerivedCounter: " << name << ", id=" << id << "\n"; + this->units = units; + set_getter([=](const Counter &c, const Driver &d) { + return derive(); + } + ); +} + +FreedrenoDriver::DerivedCounter +FreedrenoDriver::counter(std::string name, Counter::Units units, + std::function<int64_t()> derive) +{ + auto counter = DerivedCounter(this, name, units, derive); + counters.emplace_back(counter); + return counter; +} + +} // namespace pps diff --git a/src/freedreno/ds/fd_pps_driver.h b/src/freedreno/ds/fd_pps_driver.h new file mode 100644 index 00000000000..dc3e1aedcc0 --- /dev/null +++ b/src/freedreno/ds/fd_pps_driver.h @@ -0,0 +1,131 @@ +/* + * Copyright © 2021 Google, Inc. + * + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include "pps/pps_driver.h" + +#include "common/freedreno_dev_info.h" +#include "drm/freedreno_drmif.h" +#include "drm/freedreno_ringbuffer.h" +#include "perfcntrs/freedreno_dt.h" +#include "perfcntrs/freedreno_perfcntr.h" + +namespace pps +{ + +class FreedrenoDriver : public Driver +{ +public: + uint64_t get_min_sampling_period_ns() override; + bool init_perfcnt() override; + void enable_counter(uint32_t counter_id) override; + void enable_all_counters() override; + void enable_perfcnt(uint64_t sampling_period_ns) override; + void disable_perfcnt() override; + bool dump_perfcnt() override; + uint64_t next() override; + +private: + struct fd_device *dev; + struct fd_pipe *pipe; + uint32_t gpu_id; + uint32_t max_freq; + uint32_t next_counter_id; + uint32_t next_countable_id; + uint64_t last_dump_ts = 0; + uint64_t last_capture_ts; + + struct freedreno_dev_info info; + + /** + * The memory mapped i/o space for counter readback: + */ + void *io; + + const struct fd_perfcntr_group *perfcntrs; + unsigned num_perfcntrs; + + /** + * The number of counters assigned per perfcntr group, the index + * into this matches the index into perfcntrs + */ + std::vector<int> assigned_counters; + + /* + * Values that can be used by derived counters evaluation + */ + float time; /* time since last sample in fraction of second */ +// uint32_t cycles; /* the number of clock cycles since last sample */ + + void setup_a6xx_counters(); + + void configure_counters(bool reset, bool wait); + void collect_countables(); + + /** + * Split out countable mutable state from the class so that copy- + * constructor does something sane when lambda derive function + * tries to get the countable value. + */ + struct CountableState { + uint64_t last_value, value; + const struct fd_perfcntr_countable *countable; + const struct fd_perfcntr_counter *counter; + }; + + std::vector<struct CountableState> state; + + /** + * Performance counters on adreno consist of sets of counters in various + * blocks of the GPU, where each counter can be can be muxed to collect + * one of a set of countables. + * + * But the countables tend to be too low level to be directly useful to + * visualize. Instead various combinations of countables are combined + * with various formulas to derive the high level "Counter" value exposed + * via gfx-pps. + * + * This class serves to decouple the logic of those formulas from the + * details of collecting countable values. + */ + class Countable { + public: + Countable(FreedrenoDriver *d, std::string name); + + operator int64_t() const { return get_value(); }; + + void configure(struct fd_ringbuffer *ring, bool reset); + void collect(); + void resolve(); + + private: + + uint64_t get_value() const; + + uint32_t id; + FreedrenoDriver *d; + std::string name; + }; + + Countable countable(std::string name); + + std::vector<Countable> countables; + + /** + * A derived "Counter" (from pps's perspective) + */ + class DerivedCounter : public Counter { + public: + DerivedCounter(FreedrenoDriver *d, std::string name, Counter::Units units, + std::function<int64_t()> derive); + }; + + DerivedCounter counter(std::string name, Counter::Units units, + std::function<int64_t()> derive); +}; + +} // namespace pps diff --git a/src/freedreno/ds/meson.build b/src/freedreno/ds/meson.build new file mode 100644 index 00000000000..89d2b7e6eb5 --- /dev/null +++ b/src/freedreno/ds/meson.build @@ -0,0 +1,49 @@ +# Copyright © 2021 Collabora, Ltd. +# Copyright © 2021 Google, Inc +# +# SPDX-License-Identifier: MIT + +pps_freedreno_lib = static_library( + 'freedreno-gpu', + sources: [ + 'fd_pps_driver.cc', + 'fd_pps_driver.h', + freedreno_xml_header_files, + ], + include_directories: [ + inc_tool, + inc_src, + inc_freedreno, + inc_include, + ], + dependencies: [ + dep_libdrm, + dep_perfetto, + ], + cpp_args: '-std=c++17' +) + +pps_freedreno_dep = declare_dependency( + link_with: [ + pps_freedreno_lib, + libfreedreno_common, + libfreedreno_drm, + libfreedreno_perfcntrs, + ], + dependencies: [ + idep_mesautil, + ], + include_directories: [ + inc_tool, + inc_src, + ], + compile_args: [ + '-DPPS_FREEDRENO', + ], +) + +pps_datasources += pps_freedreno_dep +pps_includes += [ + inc_include, + inc_freedreno, +] diff --git a/src/freedreno/meson.build b/src/freedreno/meson.build index c29b8a3b871..8d1f12b63cf 100644 --- a/src/freedreno/meson.build +++ b/src/freedreno/meson.build @@ -50,6 +50,10 @@ subdir('fdl') subdir('perfcntrs') subdir('computerator') +if with_perfetto and (with_datasources.contains('freedreno') or with_datasources.contains('auto')) + subdir('ds') +endif + # Everything that depends on rnn requires (indirectly) libxml2: if dep_libxml2.found() subdir('rnn') diff --git a/src/meson.build b/src/meson.build index a36db2fe18d..190041207bf 100644 --- a/src/meson.build +++ b/src/meson.build @@ -27,6 +27,7 @@ inc_amd_common = include_directories('amd/common') inc_amd_common_llvm = include_directories('amd/llvm') inc_tool = include_directories('tool') pps_datasources = [] +pps_includes = [] libglsl_util = static_library( 'glsl_util', diff --git a/src/tool/pps/meson.build b/src/tool/pps/meson.build index 9ca5e57e003..e0e0361d6f2 100644 --- a/src/tool/pps/meson.build +++ b/src/tool/pps/meson.build @@ -19,7 +19,7 @@ pps_deps += pps_datasources lib_pps = static_library( 'pps', sources: pps_sources, - include_directories: [include_pps, inc_src], + include_directories: [include_pps, inc_src, pps_includes], dependencies: pps_deps, cpp_args: '-std=c++17' ) diff --git a/src/tool/pps/pps_driver.cc b/src/tool/pps/pps_driver.cc index 6c7c340a941..b32dcd26ea5 100644 --- a/src/tool/pps/pps_driver.cc +++ b/src/tool/pps/pps_driver.cc @@ -13,6 +13,10 @@ #include <iterator> #include <sstream> +#ifdef PPS_FREEDRENO +#include "freedreno/ds/fd_pps_driver.h" +#endif // PPS_FREEDRENO + #include "pps.h" #include "pps_algorithm.h" @@ -21,6 +25,11 @@ namespace pps std::unordered_map<std::string, std::unique_ptr<Driver>> create_supported_drivers() { std::unordered_map<std::string, std::unique_ptr<Driver>> map; + +#ifdef PPS_FREEDRENO + map.emplace("msm", std::make_unique<FreedrenoDriver>()); +#endif // PPS_FREEDRENO + return map; } |