diff options
author | Lionel Landwerlin <lionel.g.landwerlin@intel.com> | 2020-09-03 10:52:34 +0300 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-02-02 13:25:54 +0000 |
commit | f32d1bf5295ce420779b324c6935e68ac6ad8be4 (patch) | |
tree | e0ccd966b10abe0be7e87c4182bd78edea63c52d /src/intel | |
parent | a6e980e9bf6c33f4166b423ead0d221c76c2bcde (diff) |
intel/perf: query register descriptions
This will be useful when we implement queries using a series of MI_SRM
instead of MI_RPC.
Unfortunately on Gen12, the MI_RPC command sources values from the OAR
unit which has a similar series of register as the OAG unit but some
of the configuration of HW doesn't reach OAR so we have to snapshot
OAG manually instead.
v2: Fix comments
Use const
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6518>
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/perf/gen_perf.c | 142 | ||||
-rw-r--r-- | src/intel/perf/gen_perf.h | 61 | ||||
-rw-r--r-- | src/intel/perf/gen_perf.py | 2 |
3 files changed, 203 insertions, 2 deletions
diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c index 85cc4ec1338..d50ede2b97f 100644 --- a/src/intel/perf/gen_perf.c +++ b/src/intel/perf/gen_perf.c @@ -1130,6 +1130,74 @@ gen_perf_query_result_read_perfcnts(struct gen_perf_query_result *result, } } +static uint32_t +query_accumulator_offset(const struct gen_perf_query_info *query, + enum gen_perf_query_field_type type, + uint8_t index) +{ + switch (type) { + case GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT: + return query->perfcnt_offset + index; + default: + unreachable("Invalid register type"); + return 0; + } +} + +void +gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result, + const struct gen_perf_query_info *query, + const struct gen_device_info *devinfo, + const void *start, + const void *end, + bool no_oa_accumulate) +{ + struct gen_perf_query_field_layout *layout = &query->perf->query_layout; + + for (uint32_t r = 0; r < layout->n_fields; r++) { + struct gen_perf_query_field *field = &layout->fields[r]; + + if (field->type == GEN_PERF_QUERY_FIELD_TYPE_MI_RPC) { + gen_perf_query_result_read_frequencies(result, devinfo, + start + field->location, + end + field->location); + /* no_oa_accumulate=true is used when doing GL perf queries, we + * manually parse the OA reports from the OA buffer and substract + * unrelated deltas, so don't accumulate the begin/end reports here. + */ + if (!no_oa_accumulate) { + gen_perf_query_result_accumulate(result, query, + start + field->location, + end + field->location); + } + } else { + uint64_t v0, v1; + + if (field->size == 4) { + v0 = *(const uint32_t *)(start + field->location); + v1 = *(const uint32_t *)(end + field->location); + } else { + assert(field->size == 8); + v0 = *(const uint64_t *)(start + field->location); + v1 = *(const uint64_t *)(end + field->location); + } + + if (field->mask) { + v0 = field->mask & v0; + v1 = field->mask & v1; + } + + /* RPSTAT is a bit of a special case because its begin/end values + * represent frequencies. We store it in a separate location. + */ + if (field->type == GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT) + gen_perf_query_result_read_gt_frequency(result, devinfo, v0, v1); + else + result->accumulator[query_accumulator_offset(query, field->type, field->index)] = v1 - v0; + } + } +} + void gen_perf_query_result_clear(struct gen_perf_query_result *result) { @@ -1146,12 +1214,86 @@ gen_perf_compare_query_names(const void *v1, const void *v2) return strcmp(q1->name, q2->name); } +static inline struct gen_perf_query_field * +add_query_register(struct gen_perf_query_field_layout *layout, + enum gen_perf_query_field_type type, + uint16_t offset, + uint16_t size, + uint8_t index) +{ + /* Align MI_RPC to 64bytes (HW requirement) & 64bit registers to 8bytes + * (shows up nicely in the debugger). + */ + if (type == GEN_PERF_QUERY_FIELD_TYPE_MI_RPC) + layout->size = align(layout->size, 64); + else if (size % 8 == 0) + layout->size = align(layout->size, 8); + + layout->fields[layout->n_fields++] = (struct gen_perf_query_field) { + .mmio_offset = offset, + .location = layout->size, + .type = type, + .index = index, + .size = size, + }; + layout->size += size; + + return &layout->fields[layout->n_fields - 1]; +} + +static void +gen_perf_init_query_fields(struct gen_perf_config *perf_cfg, + const struct gen_device_info *devinfo) +{ + struct gen_perf_query_field_layout *layout = &perf_cfg->query_layout; + + layout->n_fields = 0; + + /* MI_RPC requires a 64byte alignment. */ + layout->alignment = 64; + + add_query_register(layout, GEN_PERF_QUERY_FIELD_TYPE_MI_RPC, + 0, 256, 0); + + if (devinfo->gen <= 11) { + struct gen_perf_query_field *field = + add_query_register(layout, + GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT, + PERF_CNT_1_DW0, 8, 0); + field->mask = PERF_CNT_VALUE_MASK; + + field = add_query_register(layout, + GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT, + PERF_CNT_2_DW0, 8, 1); + field->mask = PERF_CNT_VALUE_MASK; + } + + if (devinfo->gen == 8 && !devinfo->is_cherryview) { + add_query_register(layout, + GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT, + GEN7_RPSTAT1, 4, 0); + } + + if (devinfo->gen >= 9) { + add_query_register(layout, + GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT, + GEN9_RPSTAT0, 4, 0); + } + + /* Align the whole package to 64bytes so that 2 snapshots can be put + * together without extract alignment for the user. + */ + layout->size = align(layout->size, 64); +} + void gen_perf_init_metrics(struct gen_perf_config *perf_cfg, const struct gen_device_info *devinfo, int drm_fd, bool include_pipeline_statistics) { + gen_perf_init_query_fields(perf_cfg, devinfo); + if (include_pipeline_statistics) { load_pipeline_statistic_metrics(perf_cfg, devinfo); gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo); diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index bbc87495821..b5e751b8d62 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -109,9 +109,9 @@ struct gen_pipeline_stat { * For Gen8+ * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters * - * Plus 2 PERF_CNT registers. + * Plus 2 PERF_CNT registers and 1 RPSTAT register. */ -#define MAX_OA_REPORT_COUNTERS (62 + 2) +#define MAX_OA_REPORT_COUNTERS (62 + 2 + 1) /* * When currently allocate only one page for pipeline statistics queries. Here @@ -234,10 +234,54 @@ struct gen_perf_query_info { int b_offset; int c_offset; int perfcnt_offset; + int rpstat_offset; struct gen_perf_registers config; }; +/* When not using the MI_RPC command, this structure describes the list of + * register offsets as well as their storage location so that they can be + * stored through a series of MI_SRM commands and accumulated with + * gen_perf_query_result_accumulate_snapshots(). + */ +struct gen_perf_query_field_layout { + /* Alignment for the layout */ + uint32_t alignment; + + /* Size of the whole layout */ + uint32_t size; + + uint32_t n_fields; + + struct gen_perf_query_field { + /* MMIO location of this register */ + uint16_t mmio_offset; + + /* Location of this register in the storage */ + uint16_t location; + + /* Type of register, for accumulation (see gen_perf_query_info:*_offset + * fields) + */ + enum gen_perf_query_field_type { + GEN_PERF_QUERY_FIELD_TYPE_MI_RPC, + GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT, + GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT + } type; + + /* Index of register in the given type (for instance A31 or B2, + * etc...) + */ + uint8_t index; + + /* 4, 8 or 256 */ + uint16_t size; + + /* If not 0, mask to apply to the register value. */ + uint64_t mask; + } *fields; +}; + struct gen_perf_query_counter_info { struct gen_perf_query_counter *counter; @@ -269,6 +313,8 @@ struct gen_perf_config { struct gen_perf_query_counter_info *counter_infos; int n_counters; + struct gen_perf_query_field_layout query_layout; + /* Variables referenced in the XML meta data for OA performance * counters, e.g in the normalization equations. * @@ -387,6 +433,17 @@ void gen_perf_query_result_accumulate(struct gen_perf_query_result *result, const struct gen_perf_query_info *query, const uint32_t *start, const uint32_t *end); + +/** Accumulate the delta between 2 snapshots of OA perf registers (layout + * should match description specified through gen_perf_query_register_layout). + */ +void gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result, + const struct gen_perf_query_info *query, + const struct gen_device_info *devinfo, + const void *start, + const void *end, + bool no_oa_accumulate); + void gen_perf_query_result_clear(struct gen_perf_query_result *result); static inline size_t diff --git a/src/intel/perf/gen_perf.py b/src/intel/perf/gen_perf.py index b35b6a0482c..6fc23601a35 100644 --- a/src/intel/perf/gen_perf.py +++ b/src/intel/perf/gen_perf.py @@ -743,6 +743,7 @@ def main(): query->b_offset = query->a_offset + 45; query->c_offset = query->b_offset + 8; query->perfcnt_offset = query->c_offset + 8; + query->rpstat_offset = query->perfcnt_offset + 2; """)) else: c(textwrap.dedent("""\ @@ -754,6 +755,7 @@ def main(): query->b_offset = query->a_offset + 36; query->c_offset = query->b_offset + 8; query->perfcnt_offset = query->c_offset + 8; + query->rpstat_offset = query->perfcnt_offset + 2; """)) |