summaryrefslogtreecommitdiff
path: root/src/intel
diff options
context:
space:
mode:
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>2020-09-03 10:52:34 +0300
committerMarge Bot <eric+marge@anholt.net>2021-02-02 13:25:54 +0000
commitf32d1bf5295ce420779b324c6935e68ac6ad8be4 (patch)
treee0ccd966b10abe0be7e87c4182bd78edea63c52d /src/intel
parenta6e980e9bf6c33f4166b423ead0d221c76c2bcde (diff)
intel/perf: query register descriptions
This will be useful when we implement queries using a series of MI_SRM instead of MI_RPC. Unfortunately on Gen12, the MI_RPC command sources values from the OAR unit which has a similar series of register as the OAG unit but some of the configuration of HW doesn't reach OAR so we have to snapshot OAG manually instead. v2: Fix comments Use const Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6518>
Diffstat (limited to 'src/intel')
-rw-r--r--src/intel/perf/gen_perf.c142
-rw-r--r--src/intel/perf/gen_perf.h61
-rw-r--r--src/intel/perf/gen_perf.py2
3 files changed, 203 insertions, 2 deletions
diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c
index 85cc4ec1338..d50ede2b97f 100644
--- a/src/intel/perf/gen_perf.c
+++ b/src/intel/perf/gen_perf.c
@@ -1130,6 +1130,74 @@ gen_perf_query_result_read_perfcnts(struct gen_perf_query_result *result,
}
}
+static uint32_t
+query_accumulator_offset(const struct gen_perf_query_info *query,
+ enum gen_perf_query_field_type type,
+ uint8_t index)
+{
+ switch (type) {
+ case GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
+ return query->perfcnt_offset + index;
+ default:
+ unreachable("Invalid register type");
+ return 0;
+ }
+}
+
+void
+gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
+ const struct gen_perf_query_info *query,
+ const struct gen_device_info *devinfo,
+ const void *start,
+ const void *end,
+ bool no_oa_accumulate)
+{
+ struct gen_perf_query_field_layout *layout = &query->perf->query_layout;
+
+ for (uint32_t r = 0; r < layout->n_fields; r++) {
+ struct gen_perf_query_field *field = &layout->fields[r];
+
+ if (field->type == GEN_PERF_QUERY_FIELD_TYPE_MI_RPC) {
+ gen_perf_query_result_read_frequencies(result, devinfo,
+ start + field->location,
+ end + field->location);
+ /* no_oa_accumulate=true is used when doing GL perf queries, we
+ * manually parse the OA reports from the OA buffer and substract
+ * unrelated deltas, so don't accumulate the begin/end reports here.
+ */
+ if (!no_oa_accumulate) {
+ gen_perf_query_result_accumulate(result, query,
+ start + field->location,
+ end + field->location);
+ }
+ } else {
+ uint64_t v0, v1;
+
+ if (field->size == 4) {
+ v0 = *(const uint32_t *)(start + field->location);
+ v1 = *(const uint32_t *)(end + field->location);
+ } else {
+ assert(field->size == 8);
+ v0 = *(const uint64_t *)(start + field->location);
+ v1 = *(const uint64_t *)(end + field->location);
+ }
+
+ if (field->mask) {
+ v0 = field->mask & v0;
+ v1 = field->mask & v1;
+ }
+
+ /* RPSTAT is a bit of a special case because its begin/end values
+ * represent frequencies. We store it in a separate location.
+ */
+ if (field->type == GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT)
+ gen_perf_query_result_read_gt_frequency(result, devinfo, v0, v1);
+ else
+ result->accumulator[query_accumulator_offset(query, field->type, field->index)] = v1 - v0;
+ }
+ }
+}
+
void
gen_perf_query_result_clear(struct gen_perf_query_result *result)
{
@@ -1146,12 +1214,86 @@ gen_perf_compare_query_names(const void *v1, const void *v2)
return strcmp(q1->name, q2->name);
}
+static inline struct gen_perf_query_field *
+add_query_register(struct gen_perf_query_field_layout *layout,
+ enum gen_perf_query_field_type type,
+ uint16_t offset,
+ uint16_t size,
+ uint8_t index)
+{
+ /* Align MI_RPC to 64bytes (HW requirement) & 64bit registers to 8bytes
+ * (shows up nicely in the debugger).
+ */
+ if (type == GEN_PERF_QUERY_FIELD_TYPE_MI_RPC)
+ layout->size = align(layout->size, 64);
+ else if (size % 8 == 0)
+ layout->size = align(layout->size, 8);
+
+ layout->fields[layout->n_fields++] = (struct gen_perf_query_field) {
+ .mmio_offset = offset,
+ .location = layout->size,
+ .type = type,
+ .index = index,
+ .size = size,
+ };
+ layout->size += size;
+
+ return &layout->fields[layout->n_fields - 1];
+}
+
+static void
+gen_perf_init_query_fields(struct gen_perf_config *perf_cfg,
+ const struct gen_device_info *devinfo)
+{
+ struct gen_perf_query_field_layout *layout = &perf_cfg->query_layout;
+
+ layout->n_fields = 0;
+
+ /* MI_RPC requires a 64byte alignment. */
+ layout->alignment = 64;
+
+ add_query_register(layout, GEN_PERF_QUERY_FIELD_TYPE_MI_RPC,
+ 0, 256, 0);
+
+ if (devinfo->gen <= 11) {
+ struct gen_perf_query_field *field =
+ add_query_register(layout,
+ GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
+ PERF_CNT_1_DW0, 8, 0);
+ field->mask = PERF_CNT_VALUE_MASK;
+
+ field = add_query_register(layout,
+ GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
+ PERF_CNT_2_DW0, 8, 1);
+ field->mask = PERF_CNT_VALUE_MASK;
+ }
+
+ if (devinfo->gen == 8 && !devinfo->is_cherryview) {
+ add_query_register(layout,
+ GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
+ GEN7_RPSTAT1, 4, 0);
+ }
+
+ if (devinfo->gen >= 9) {
+ add_query_register(layout,
+ GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
+ GEN9_RPSTAT0, 4, 0);
+ }
+
+ /* Align the whole package to 64bytes so that 2 snapshots can be put
+ * together without extract alignment for the user.
+ */
+ layout->size = align(layout->size, 64);
+}
+
void
gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
const struct gen_device_info *devinfo,
int drm_fd,
bool include_pipeline_statistics)
{
+ gen_perf_init_query_fields(perf_cfg, devinfo);
+
if (include_pipeline_statistics) {
load_pipeline_statistic_metrics(perf_cfg, devinfo);
gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h
index bbc87495821..b5e751b8d62 100644
--- a/src/intel/perf/gen_perf.h
+++ b/src/intel/perf/gen_perf.h
@@ -109,9 +109,9 @@ struct gen_pipeline_stat {
* For Gen8+
* 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
*
- * Plus 2 PERF_CNT registers.
+ * Plus 2 PERF_CNT registers and 1 RPSTAT register.
*/
-#define MAX_OA_REPORT_COUNTERS (62 + 2)
+#define MAX_OA_REPORT_COUNTERS (62 + 2 + 1)
/*
* When currently allocate only one page for pipeline statistics queries. Here
@@ -234,10 +234,54 @@ struct gen_perf_query_info {
int b_offset;
int c_offset;
int perfcnt_offset;
+ int rpstat_offset;
struct gen_perf_registers config;
};
+/* When not using the MI_RPC command, this structure describes the list of
+ * register offsets as well as their storage location so that they can be
+ * stored through a series of MI_SRM commands and accumulated with
+ * gen_perf_query_result_accumulate_snapshots().
+ */
+struct gen_perf_query_field_layout {
+ /* Alignment for the layout */
+ uint32_t alignment;
+
+ /* Size of the whole layout */
+ uint32_t size;
+
+ uint32_t n_fields;
+
+ struct gen_perf_query_field {
+ /* MMIO location of this register */
+ uint16_t mmio_offset;
+
+ /* Location of this register in the storage */
+ uint16_t location;
+
+ /* Type of register, for accumulation (see gen_perf_query_info:*_offset
+ * fields)
+ */
+ enum gen_perf_query_field_type {
+ GEN_PERF_QUERY_FIELD_TYPE_MI_RPC,
+ GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
+ GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT
+ } type;
+
+ /* Index of register in the given type (for instance A31 or B2,
+ * etc...)
+ */
+ uint8_t index;
+
+ /* 4, 8 or 256 */
+ uint16_t size;
+
+ /* If not 0, mask to apply to the register value. */
+ uint64_t mask;
+ } *fields;
+};
+
struct gen_perf_query_counter_info {
struct gen_perf_query_counter *counter;
@@ -269,6 +313,8 @@ struct gen_perf_config {
struct gen_perf_query_counter_info *counter_infos;
int n_counters;
+ struct gen_perf_query_field_layout query_layout;
+
/* Variables referenced in the XML meta data for OA performance
* counters, e.g in the normalization equations.
*
@@ -387,6 +433,17 @@ void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
const struct gen_perf_query_info *query,
const uint32_t *start,
const uint32_t *end);
+
+/** Accumulate the delta between 2 snapshots of OA perf registers (layout
+ * should match description specified through gen_perf_query_register_layout).
+ */
+void gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
+ const struct gen_perf_query_info *query,
+ const struct gen_device_info *devinfo,
+ const void *start,
+ const void *end,
+ bool no_oa_accumulate);
+
void gen_perf_query_result_clear(struct gen_perf_query_result *result);
static inline size_t
diff --git a/src/intel/perf/gen_perf.py b/src/intel/perf/gen_perf.py
index b35b6a0482c..6fc23601a35 100644
--- a/src/intel/perf/gen_perf.py
+++ b/src/intel/perf/gen_perf.py
@@ -743,6 +743,7 @@ def main():
query->b_offset = query->a_offset + 45;
query->c_offset = query->b_offset + 8;
query->perfcnt_offset = query->c_offset + 8;
+ query->rpstat_offset = query->perfcnt_offset + 2;
"""))
else:
c(textwrap.dedent("""\
@@ -754,6 +755,7 @@ def main():
query->b_offset = query->a_offset + 36;
query->c_offset = query->b_offset + 8;
query->perfcnt_offset = query->c_offset + 8;
+ query->rpstat_offset = query->perfcnt_offset + 2;
"""))