summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Marek <jonathan@marek.ca>2019-01-28 10:09:39 -0500
committerRob Clark <robdclark@gmail.com>2019-01-28 18:21:16 -0500
commit7d458c0c69ad80e3d01c0ba79e57ed3981764823 (patch)
tree71182c6b5189cc72dce9a8cb9b927fcadbe77cdc
parentcccec0b4573fd69530ec67fc808d41163059631c (diff)
freedreno: a2xx: add perfcntrs
Based on a5xx perfcntrs implementation. Signed-off-by: Jonathan Marek <jonathan@marek.ca>
-rw-r--r--src/gallium/drivers/freedreno/Makefile.sources3
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_context.c3
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_emit.c9
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c813
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_query.c244
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_query.h35
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_screen.c8
-rw-r--r--src/gallium/drivers/freedreno/meson.build3
8 files changed, 1118 insertions, 0 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index d2eddb989d7..e11c42517c9 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -50,8 +50,11 @@ a2xx_SOURCES := \
a2xx/fd2_emit.h \
a2xx/fd2_gmem.c \
a2xx/fd2_gmem.h \
+ a2xx/fd2_perfcntr.c \
a2xx/fd2_program.c \
a2xx/fd2_program.h \
+ a2xx/fd2_query.c \
+ a2xx/fd2_query.h \
a2xx/fd2_rasterizer.c \
a2xx/fd2_rasterizer.h \
a2xx/fd2_resource.c \
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_context.c b/src/gallium/drivers/freedreno/a2xx/fd2_context.c
index 6d9dce2ffbe..9353e37420f 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_context.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.c
@@ -31,6 +31,7 @@
#include "fd2_emit.h"
#include "fd2_gmem.h"
#include "fd2_program.h"
+#include "fd2_query.h"
#include "fd2_rasterizer.h"
#include "fd2_texture.h"
#include "fd2_zsa.h"
@@ -120,5 +121,7 @@ fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
/* construct vertex state used for solid ops (clear, and gmem<->mem) */
fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
+ fd2_query_context_init(pctx);
+
return pctx;
}
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
index 805a4cf032a..76fd7dee9d7 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
@@ -424,6 +424,15 @@ fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
OUT_RING(ring, 0x0000003b);
}
+ /* enable perfcntrs */
+ OUT_PKT0(ring, REG_A2XX_CP_PERFMON_CNTL, 1);
+ OUT_RING(ring, COND(fd_mesa_debug & FD_DBG_PERFC, 1));
+
+ /* note: perfcntrs don't work without the PM_OVERRIDE bit */
+ OUT_PKT0(ring, REG_A2XX_RBBM_PM_OVERRIDE1, 2);
+ OUT_RING(ring, 0xffffffff);
+ OUT_RING(ring, 0x00000fff);
+
OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
OUT_RING(ring, 0x00000002);
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c b/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c
new file mode 100644
index 00000000000..35f084e1fa9
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c
@@ -0,0 +1,813 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Jonathan Marek <jonathan@marek.ca>
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "freedreno_perfcntr.h"
+#include "freedreno_util.h"
+#include "a2xx.xml.h"
+
+#define REG(_x) REG_A2XX_ ## _x
+
+#define COUNTER(_sel, _lo, _hi) { \
+ .select_reg = REG(_sel), \
+ .counter_reg_lo = REG(_lo), \
+ .counter_reg_hi = REG(_hi), \
+}
+
+#define COUNTABLE(_selector, _query_type, _result_type) { \
+ .name = #_selector, \
+ .selector = _selector, \
+ .query_type = PIPE_DRIVER_QUERY_TYPE_ ## _query_type, \
+ .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_ ## _result_type, \
+}
+
+#define GROUP(_name, _counters, _countables) { \
+ .name = _name, \
+ .num_counters = ARRAY_SIZE(_counters), \
+ .counters = _counters, \
+ .num_countables = ARRAY_SIZE(_countables), \
+ .countables = _countables, \
+}
+
+static const struct fd_perfcntr_countable pa_su_countables[] = {
+ COUNTABLE(PERF_PAPC_PASX_REQ, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_FIRST_VECTOR, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_SECOND_VECTOR, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_FIRST_DEAD, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_SECOND_DEAD, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_VTX_KILL_DISCARD, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_VTX_NAN_DISCARD, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PA_INPUT_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PA_INPUT_NULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PA_INPUT_EVENT_FLAG, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PA_INPUT_END_OF_PACKET, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_VV_CULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CULL_TO_NULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_VV_CLIP_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_1, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_2, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_3, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_4, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_5, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_6, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_NEAR, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_FAR, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_LEFT, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_RIGHT, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_TOP, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLSM_NULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLSM_CLIP_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLSM_CULL_TO_NULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_1, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_2, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_3, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_4, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_5, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_6_7, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLSM_NON_TRIVIAL_CULL, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_INPUT_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_INPUT_CLIP_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_INPUT_NULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_ZERO_AREA_CULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_BACK_FACE_CULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_FRONT_FACE_CULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_POLYMODE_FACE_CULL, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_POLYMODE_BACK_CULL, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_POLYMODE_FRONT_CULL, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_POLYMODE_INVALID_FILL, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUTPUT_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUTPUT_CLIP_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUTPUT_NULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUTPUT_EVENT_FLAG, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUTPUT_END_OF_PACKET, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FACE, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_BACK, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_REQ_IDLE, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_REQ_BUSY, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_REQ_STALLED, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_REC_IDLE, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_REC_BUSY, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_REC_STARVED_SX, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_REC_STALLED, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_REC_STALLED_POS_MEM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_PASX_REC_STALLED_CCGSM_IN, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CCGSM_IDLE, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CCGSM_BUSY, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CCGSM_STALLED, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPRIM_IDLE, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPRIM_BUSY, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPRIM_STALLED, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLPRIM_STARVED_CCGSM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIPSM_IDLE, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIPSM_BUSY, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIPGA, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIPGA_IDLE, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIPGA_BUSY, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIPGA_STARVED_VTE_CLIP, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIPGA_STALLED, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIP_IDLE, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_CLIP_BUSY, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_IDLE, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_BUSY, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_STARVED_CLIP, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_STALLED_SC, UINT64, AVERAGE),
+ COUNTABLE(PERF_PAPC_SU_FACENESS_CULL, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable pa_sc_countables[] = {
+ COUNTABLE(SC_SR_WINDOW_VALID, UINT64, AVERAGE),
+ COUNTABLE(SC_CW_WINDOW_VALID, UINT64, AVERAGE),
+ COUNTABLE(SC_QM_WINDOW_VALID, UINT64, AVERAGE),
+ COUNTABLE(SC_FW_WINDOW_VALID, UINT64, AVERAGE),
+ COUNTABLE(SC_EZ_WINDOW_VALID, UINT64, AVERAGE),
+ COUNTABLE(SC_IT_WINDOW_VALID, UINT64, AVERAGE),
+ COUNTABLE(SC_STARVED_BY_PA, UINT64, AVERAGE),
+ COUNTABLE(SC_STALLED_BY_RB_TILE, UINT64, AVERAGE),
+ COUNTABLE(SC_STALLED_BY_RB_SAMP, UINT64, AVERAGE),
+ COUNTABLE(SC_STARVED_BY_RB_EZ, UINT64, AVERAGE),
+ COUNTABLE(SC_STALLED_BY_SAMPLE_FF, UINT64, AVERAGE),
+ COUNTABLE(SC_STALLED_BY_SQ, UINT64, AVERAGE),
+ COUNTABLE(SC_STALLED_BY_SP, UINT64, AVERAGE),
+ COUNTABLE(SC_TOTAL_NO_PRIMS, UINT64, AVERAGE),
+ COUNTABLE(SC_NON_EMPTY_PRIMS, UINT64, AVERAGE),
+ COUNTABLE(SC_NO_TILES_PASSING_QM, UINT64, AVERAGE),
+ COUNTABLE(SC_NO_PIXELS_PRE_EZ, UINT64, AVERAGE),
+ COUNTABLE(SC_NO_PIXELS_POST_EZ, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable vgt_countables[] = {
+ COUNTABLE(VGT_SQ_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+ COUNTABLE(VGT_SQ_SEND, UINT64, AVERAGE),
+ COUNTABLE(VGT_SQ_STALLED, UINT64, AVERAGE),
+ COUNTABLE(VGT_SQ_STARVED_BUSY, UINT64, AVERAGE),
+ COUNTABLE(VGT_SQ_STARVED_IDLE, UINT64, AVERAGE),
+ COUNTABLE(VGT_SQ_STATIC, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_V_SEND, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_V_STALLED, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_V_STARVED_BUSY, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_V_STARVED_IDLE, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_V_STATIC, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_P_SEND, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_P_STALLED, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_P_STARVED_BUSY, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_P_STARVED_IDLE, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_P_STATIC, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_S_SEND, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_S_STALLED, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_S_STARVED_BUSY, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_S_STARVED_IDLE, UINT64, AVERAGE),
+ COUNTABLE(VGT_PA_CLIP_S_STATIC, UINT64, AVERAGE),
+ COUNTABLE(RBIU_FIFOS_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+ COUNTABLE(RBIU_IMMED_DATA_FIFO_STARVED, UINT64, AVERAGE),
+ COUNTABLE(RBIU_IMMED_DATA_FIFO_STALLED, UINT64, AVERAGE),
+ COUNTABLE(RBIU_DMA_REQUEST_FIFO_STARVED, UINT64, AVERAGE),
+ COUNTABLE(RBIU_DMA_REQUEST_FIFO_STALLED, UINT64, AVERAGE),
+ COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STARVED, UINT64, AVERAGE),
+ COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STALLED, UINT64, AVERAGE),
+ COUNTABLE(BIN_PRIM_NEAR_CULL, UINT64, AVERAGE),
+ COUNTABLE(BIN_PRIM_ZERO_CULL, UINT64, AVERAGE),
+ COUNTABLE(BIN_PRIM_FAR_CULL, UINT64, AVERAGE),
+ COUNTABLE(BIN_PRIM_BIN_CULL, UINT64, AVERAGE),
+ COUNTABLE(BIN_PRIM_FACE_CULL, UINT64, AVERAGE),
+ COUNTABLE(SPARE34, UINT64, AVERAGE),
+ COUNTABLE(SPARE35, UINT64, AVERAGE),
+ COUNTABLE(SPARE36, UINT64, AVERAGE),
+ COUNTABLE(SPARE37, UINT64, AVERAGE),
+ COUNTABLE(SPARE38, UINT64, AVERAGE),
+ COUNTABLE(SPARE39, UINT64, AVERAGE),
+ COUNTABLE(TE_SU_IN_VALID, UINT64, AVERAGE),
+ COUNTABLE(TE_SU_IN_READ, UINT64, AVERAGE),
+ COUNTABLE(TE_SU_IN_PRIM, UINT64, AVERAGE),
+ COUNTABLE(TE_SU_IN_EOP, UINT64, AVERAGE),
+ COUNTABLE(TE_SU_IN_NULL_PRIM, UINT64, AVERAGE),
+ COUNTABLE(TE_WK_IN_VALID, UINT64, AVERAGE),
+ COUNTABLE(TE_WK_IN_READ, UINT64, AVERAGE),
+ COUNTABLE(TE_OUT_PRIM_VALID, UINT64, AVERAGE),
+ COUNTABLE(TE_OUT_PRIM_READ, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tcr_countables[] = {
+ COUNTABLE(DGMMPD_IPMUX0_STALL, UINT64, AVERAGE),
+ COUNTABLE(DGMMPD_IPMUX_ALL_STALL, UINT64, AVERAGE),
+ COUNTABLE(OPMUX0_L2_WRITES, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tp0_countables[] = {
+ COUNTABLE(POINT_QUADS, UINT64, AVERAGE),
+ COUNTABLE(BILIN_QUADS, UINT64, AVERAGE),
+ COUNTABLE(ANISO_QUADS, UINT64, AVERAGE),
+ COUNTABLE(MIP_QUADS, UINT64, AVERAGE),
+ COUNTABLE(VOL_QUADS, UINT64, AVERAGE),
+ COUNTABLE(MIP_VOL_QUADS, UINT64, AVERAGE),
+ COUNTABLE(MIP_ANISO_QUADS, UINT64, AVERAGE),
+ COUNTABLE(VOL_ANISO_QUADS, UINT64, AVERAGE),
+ COUNTABLE(ANISO_2_1_QUADS, UINT64, AVERAGE),
+ COUNTABLE(ANISO_4_1_QUADS, UINT64, AVERAGE),
+ COUNTABLE(ANISO_6_1_QUADS, UINT64, AVERAGE),
+ COUNTABLE(ANISO_8_1_QUADS, UINT64, AVERAGE),
+ COUNTABLE(ANISO_10_1_QUADS, UINT64, AVERAGE),
+ COUNTABLE(ANISO_12_1_QUADS, UINT64, AVERAGE),
+ COUNTABLE(ANISO_14_1_QUADS, UINT64, AVERAGE),
+ COUNTABLE(ANISO_16_1_QUADS, UINT64, AVERAGE),
+ COUNTABLE(MIP_VOL_ANISO_QUADS, UINT64, AVERAGE),
+ COUNTABLE(ALIGN_2_QUADS, UINT64, AVERAGE),
+ COUNTABLE(ALIGN_4_QUADS, UINT64, AVERAGE),
+ COUNTABLE(PIX_0_QUAD, UINT64, AVERAGE),
+ COUNTABLE(PIX_1_QUAD, UINT64, AVERAGE),
+ COUNTABLE(PIX_2_QUAD, UINT64, AVERAGE),
+ COUNTABLE(PIX_3_QUAD, UINT64, AVERAGE),
+ COUNTABLE(PIX_4_QUAD, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD0, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD1, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD2, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD3, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD4, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD5, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD6, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD7, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD8, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD9, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD10, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD11, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD12, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD13, UINT64, AVERAGE),
+ COUNTABLE(TP_MIPMAP_LOD14, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tcm_countables[] = {
+ COUNTABLE(QUAD0_RD_LAT_FIFO_EMPTY, UINT64, AVERAGE),
+ COUNTABLE(QUAD0_RD_LAT_FIFO_4TH_FULL, UINT64, AVERAGE),
+ COUNTABLE(QUAD0_RD_LAT_FIFO_HALF_FULL, UINT64, AVERAGE),
+ COUNTABLE(QUAD0_RD_LAT_FIFO_FULL, UINT64, AVERAGE),
+ COUNTABLE(QUAD0_RD_LAT_FIFO_LT_4TH_FULL, UINT64, AVERAGE),
+ COUNTABLE(READ_STARVED_QUAD0, UINT64, AVERAGE),
+ COUNTABLE(READ_STARVED, UINT64, AVERAGE),
+ COUNTABLE(READ_STALLED_QUAD0, UINT64, AVERAGE),
+ COUNTABLE(READ_STALLED, UINT64, AVERAGE),
+ COUNTABLE(VALID_READ_QUAD0, UINT64, AVERAGE),
+ COUNTABLE(TC_TP_STARVED_QUAD0, UINT64, AVERAGE),
+ COUNTABLE(TC_TP_STARVED, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tcf_countables[] = {
+ COUNTABLE(VALID_CYCLES, UINT64, AVERAGE),
+ COUNTABLE(SINGLE_PHASES, UINT64, AVERAGE),
+ COUNTABLE(ANISO_PHASES, UINT64, AVERAGE),
+ COUNTABLE(MIP_PHASES, UINT64, AVERAGE),
+ COUNTABLE(VOL_PHASES, UINT64, AVERAGE),
+ COUNTABLE(MIP_VOL_PHASES, UINT64, AVERAGE),
+ COUNTABLE(MIP_ANISO_PHASES, UINT64, AVERAGE),
+ COUNTABLE(VOL_ANISO_PHASES, UINT64, AVERAGE),
+ COUNTABLE(ANISO_2_1_PHASES, UINT64, AVERAGE),
+ COUNTABLE(ANISO_4_1_PHASES, UINT64, AVERAGE),
+ COUNTABLE(ANISO_6_1_PHASES, UINT64, AVERAGE),
+ COUNTABLE(ANISO_8_1_PHASES, UINT64, AVERAGE),
+ COUNTABLE(ANISO_10_1_PHASES, UINT64, AVERAGE),
+ COUNTABLE(ANISO_12_1_PHASES, UINT64, AVERAGE),
+ COUNTABLE(ANISO_14_1_PHASES, UINT64, AVERAGE),
+ COUNTABLE(ANISO_16_1_PHASES, UINT64, AVERAGE),
+ COUNTABLE(MIP_VOL_ANISO_PHASES, UINT64, AVERAGE),
+ COUNTABLE(ALIGN_2_PHASES, UINT64, AVERAGE),
+ COUNTABLE(ALIGN_4_PHASES, UINT64, AVERAGE),
+ COUNTABLE(TPC_BUSY, UINT64, AVERAGE),
+ COUNTABLE(TPC_STALLED, UINT64, AVERAGE),
+ COUNTABLE(TPC_STARVED, UINT64, AVERAGE),
+ COUNTABLE(TPC_WORKING, UINT64, AVERAGE),
+ COUNTABLE(TPC_WALKER_BUSY, UINT64, AVERAGE),
+ COUNTABLE(TPC_WALKER_STALLED, UINT64, AVERAGE),
+ COUNTABLE(TPC_WALKER_WORKING, UINT64, AVERAGE),
+ COUNTABLE(TPC_ALIGNER_BUSY, UINT64, AVERAGE),
+ COUNTABLE(TPC_ALIGNER_STALLED, UINT64, AVERAGE),
+ COUNTABLE(TPC_ALIGNER_STALLED_BY_BLEND, UINT64, AVERAGE),
+ COUNTABLE(TPC_ALIGNER_STALLED_BY_CACHE, UINT64, AVERAGE),
+ COUNTABLE(TPC_ALIGNER_WORKING, UINT64, AVERAGE),
+ COUNTABLE(TPC_BLEND_BUSY, UINT64, AVERAGE),
+ COUNTABLE(TPC_BLEND_SYNC, UINT64, AVERAGE),
+ COUNTABLE(TPC_BLEND_STARVED, UINT64, AVERAGE),
+ COUNTABLE(TPC_BLEND_WORKING, UINT64, AVERAGE),
+ COUNTABLE(OPCODE_0x00, UINT64, AVERAGE),
+ COUNTABLE(OPCODE_0x01, UINT64, AVERAGE),
+ COUNTABLE(OPCODE_0x04, UINT64, AVERAGE),
+ COUNTABLE(OPCODE_0x10, UINT64, AVERAGE),
+ COUNTABLE(OPCODE_0x11, UINT64, AVERAGE),
+ COUNTABLE(OPCODE_0x12, UINT64, AVERAGE),
+ COUNTABLE(OPCODE_0x13, UINT64, AVERAGE),
+ COUNTABLE(OPCODE_0x18, UINT64, AVERAGE),
+ COUNTABLE(OPCODE_0x19, UINT64, AVERAGE),
+ COUNTABLE(OPCODE_0x1A, UINT64, AVERAGE),
+ COUNTABLE(OPCODE_OTHER, UINT64, AVERAGE),
+ COUNTABLE(IN_FIFO_0_EMPTY, UINT64, AVERAGE),
+ COUNTABLE(IN_FIFO_0_LT_HALF_FULL, UINT64, AVERAGE),
+ COUNTABLE(IN_FIFO_0_HALF_FULL, UINT64, AVERAGE),
+ COUNTABLE(IN_FIFO_0_FULL, UINT64, AVERAGE),
+ COUNTABLE(IN_FIFO_TPC_EMPTY, UINT64, AVERAGE),
+ COUNTABLE(IN_FIFO_TPC_LT_HALF_FULL, UINT64, AVERAGE),
+ COUNTABLE(IN_FIFO_TPC_HALF_FULL, UINT64, AVERAGE),
+ COUNTABLE(IN_FIFO_TPC_FULL, UINT64, AVERAGE),
+ COUNTABLE(TPC_TC_XFC, UINT64, AVERAGE),
+ COUNTABLE(TPC_TC_STATE, UINT64, AVERAGE),
+ COUNTABLE(TC_STALL, UINT64, AVERAGE),
+ COUNTABLE(QUAD0_TAPS, UINT64, AVERAGE),
+ COUNTABLE(QUADS, UINT64, AVERAGE),
+ COUNTABLE(TCA_SYNC_STALL, UINT64, AVERAGE),
+ COUNTABLE(TAG_STALL, UINT64, AVERAGE),
+ COUNTABLE(TCB_SYNC_STALL, UINT64, AVERAGE),
+ COUNTABLE(TCA_VALID, UINT64, AVERAGE),
+ COUNTABLE(PROBES_VALID, UINT64, AVERAGE),
+ COUNTABLE(MISS_STALL, UINT64, AVERAGE),
+ COUNTABLE(FETCH_FIFO_STALL, UINT64, AVERAGE),
+ COUNTABLE(TCO_STALL, UINT64, AVERAGE),
+ COUNTABLE(ANY_STALL, UINT64, AVERAGE),
+ COUNTABLE(TAG_MISSES, UINT64, AVERAGE),
+ COUNTABLE(TAG_HITS, UINT64, AVERAGE),
+ COUNTABLE(SUB_TAG_MISSES, UINT64, AVERAGE),
+ COUNTABLE(SET0_INVALIDATES, UINT64, AVERAGE),
+ COUNTABLE(SET1_INVALIDATES, UINT64, AVERAGE),
+ COUNTABLE(SET2_INVALIDATES, UINT64, AVERAGE),
+ COUNTABLE(SET3_INVALIDATES, UINT64, AVERAGE),
+ COUNTABLE(SET0_TAG_MISSES, UINT64, AVERAGE),
+ COUNTABLE(SET1_TAG_MISSES, UINT64, AVERAGE),
+ COUNTABLE(SET2_TAG_MISSES, UINT64, AVERAGE),
+ COUNTABLE(SET3_TAG_MISSES, UINT64, AVERAGE),
+ COUNTABLE(SET0_TAG_HITS, UINT64, AVERAGE),
+ COUNTABLE(SET1_TAG_HITS, UINT64, AVERAGE),
+ COUNTABLE(SET2_TAG_HITS, UINT64, AVERAGE),
+ COUNTABLE(SET3_TAG_HITS, UINT64, AVERAGE),
+ COUNTABLE(SET0_SUB_TAG_MISSES, UINT64, AVERAGE),
+ COUNTABLE(SET1_SUB_TAG_MISSES, UINT64, AVERAGE),
+ COUNTABLE(SET2_SUB_TAG_MISSES, UINT64, AVERAGE),
+ COUNTABLE(SET3_SUB_TAG_MISSES, UINT64, AVERAGE),
+ COUNTABLE(SET0_EVICT1, UINT64, AVERAGE),
+ COUNTABLE(SET0_EVICT2, UINT64, AVERAGE),
+ COUNTABLE(SET0_EVICT3, UINT64, AVERAGE),
+ COUNTABLE(SET0_EVICT4, UINT64, AVERAGE),
+ COUNTABLE(SET0_EVICT5, UINT64, AVERAGE),
+ COUNTABLE(SET0_EVICT6, UINT64, AVERAGE),
+ COUNTABLE(SET0_EVICT7, UINT64, AVERAGE),
+ COUNTABLE(SET0_EVICT8, UINT64, AVERAGE),
+ COUNTABLE(SET1_EVICT1, UINT64, AVERAGE),
+ COUNTABLE(SET1_EVICT2, UINT64, AVERAGE),
+ COUNTABLE(SET1_EVICT3, UINT64, AVERAGE),
+ COUNTABLE(SET1_EVICT4, UINT64, AVERAGE),
+ COUNTABLE(SET1_EVICT5, UINT64, AVERAGE),
+ COUNTABLE(SET1_EVICT6, UINT64, AVERAGE),
+ COUNTABLE(SET1_EVICT7, UINT64, AVERAGE),
+ COUNTABLE(SET1_EVICT8, UINT64, AVERAGE),
+ COUNTABLE(SET2_EVICT1, UINT64, AVERAGE),
+ COUNTABLE(SET2_EVICT2, UINT64, AVERAGE),
+ COUNTABLE(SET2_EVICT3, UINT64, AVERAGE),
+ COUNTABLE(SET2_EVICT4, UINT64, AVERAGE),
+ COUNTABLE(SET2_EVICT5, UINT64, AVERAGE),
+ COUNTABLE(SET2_EVICT6, UINT64, AVERAGE),
+ COUNTABLE(SET2_EVICT7, UINT64, AVERAGE),
+ COUNTABLE(SET2_EVICT8, UINT64, AVERAGE),
+ COUNTABLE(SET3_EVICT1, UINT64, AVERAGE),
+ COUNTABLE(SET3_EVICT2, UINT64, AVERAGE),
+ COUNTABLE(SET3_EVICT3, UINT64, AVERAGE),
+ COUNTABLE(SET3_EVICT4, UINT64, AVERAGE),
+ COUNTABLE(SET3_EVICT5, UINT64, AVERAGE),
+ COUNTABLE(SET3_EVICT6, UINT64, AVERAGE),
+ COUNTABLE(SET3_EVICT7, UINT64, AVERAGE),
+ COUNTABLE(SET3_EVICT8, UINT64, AVERAGE),
+ COUNTABLE(FF_EMPTY, UINT64, AVERAGE),
+ COUNTABLE(FF_LT_HALF_FULL, UINT64, AVERAGE),
+ COUNTABLE(FF_HALF_FULL, UINT64, AVERAGE),
+ COUNTABLE(FF_FULL, UINT64, AVERAGE),
+ COUNTABLE(FF_XFC, UINT64, AVERAGE),
+ COUNTABLE(FF_STALLED, UINT64, AVERAGE),
+ COUNTABLE(FG_MASKS, UINT64, AVERAGE),
+ COUNTABLE(FG_LEFT_MASKS, UINT64, AVERAGE),
+ COUNTABLE(FG_LEFT_MASK_STALLED, UINT64, AVERAGE),
+ COUNTABLE(FG_LEFT_NOT_DONE_STALL, UINT64, AVERAGE),
+ COUNTABLE(FG_LEFT_FG_STALL, UINT64, AVERAGE),
+ COUNTABLE(FG_LEFT_SECTORS, UINT64, AVERAGE),
+ COUNTABLE(FG0_REQUESTS, UINT64, AVERAGE),
+ COUNTABLE(FG0_STALLED, UINT64, AVERAGE),
+ COUNTABLE(MEM_REQ512, UINT64, AVERAGE),
+ COUNTABLE(MEM_REQ_SENT, UINT64, AVERAGE),
+ COUNTABLE(MEM_LOCAL_READ_REQ, UINT64, AVERAGE),
+ COUNTABLE(TC0_MH_STALLED, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable sq_countables[] = {
+ COUNTABLE(SQ_PIXEL_VECTORS_SUB, UINT64, AVERAGE),
+ COUNTABLE(SQ_VERTEX_VECTORS_SUB, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD0, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD0, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD0, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD0, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD1, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD1, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD1, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD1, UINT64, AVERAGE),
+ COUNTABLE(SQ_EXPORT_CYCLES, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU_CST_WRITTEN, UINT64, AVERAGE),
+ COUNTABLE(SQ_TEX_CST_WRITTEN, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU_CST_STALL, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU_TEX_STALL, UINT64, AVERAGE),
+ COUNTABLE(SQ_INST_WRITTEN, UINT64, AVERAGE),
+ COUNTABLE(SQ_BOOLEAN_WRITTEN, UINT64, AVERAGE),
+ COUNTABLE(SQ_LOOPS_WRITTEN, UINT64, AVERAGE),
+ COUNTABLE(SQ_PIXEL_SWAP_IN, UINT64, AVERAGE),
+ COUNTABLE(SQ_PIXEL_SWAP_OUT, UINT64, AVERAGE),
+ COUNTABLE(SQ_VERTEX_SWAP_IN, UINT64, AVERAGE),
+ COUNTABLE(SQ_VERTEX_SWAP_OUT, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU_VTX_INST_ISSUED, UINT64, AVERAGE),
+ COUNTABLE(SQ_TEX_VTX_INST_ISSUED, UINT64, AVERAGE),
+ COUNTABLE(SQ_VC_VTX_INST_ISSUED, UINT64, AVERAGE),
+ COUNTABLE(SQ_CF_VTX_INST_ISSUED, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU_PIX_INST_ISSUED, UINT64, AVERAGE),
+ COUNTABLE(SQ_TEX_PIX_INST_ISSUED, UINT64, AVERAGE),
+ COUNTABLE(SQ_VC_PIX_INST_ISSUED, UINT64, AVERAGE),
+ COUNTABLE(SQ_CF_PIX_INST_ISSUED, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD0, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD0, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD1, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD1, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU_NOPS, UINT64, AVERAGE),
+ COUNTABLE(SQ_PRED_SKIP, UINT64, AVERAGE),
+ COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_VTX, UINT64, AVERAGE),
+ COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_VTX, UINT64, AVERAGE),
+ COUNTABLE(SQ_SYNC_TEX_STALL_VTX, UINT64, AVERAGE),
+ COUNTABLE(SQ_SYNC_VC_STALL_VTX, UINT64, AVERAGE),
+ COUNTABLE(SQ_CONSTANTS_USED_SIMD0, UINT64, AVERAGE),
+ COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD0, UINT64, AVERAGE),
+ COUNTABLE(SQ_GPR_STALL_VTX, UINT64, AVERAGE),
+ COUNTABLE(SQ_GPR_STALL_PIX, UINT64, AVERAGE),
+ COUNTABLE(SQ_VTX_RS_STALL, UINT64, AVERAGE),
+ COUNTABLE(SQ_PIX_RS_STALL, UINT64, AVERAGE),
+ COUNTABLE(SQ_SX_PC_FULL, UINT64, AVERAGE),
+ COUNTABLE(SQ_SX_EXP_BUFF_FULL, UINT64, AVERAGE),
+ COUNTABLE(SQ_SX_POS_BUFF_FULL, UINT64, AVERAGE),
+ COUNTABLE(SQ_INTERP_QUADS, UINT64, AVERAGE),
+ COUNTABLE(SQ_INTERP_ACTIVE, UINT64, AVERAGE),
+ COUNTABLE(SQ_IN_PIXEL_STALL, UINT64, AVERAGE),
+ COUNTABLE(SQ_IN_VTX_STALL, UINT64, AVERAGE),
+ COUNTABLE(SQ_VTX_CNT, UINT64, AVERAGE),
+ COUNTABLE(SQ_VTX_VECTOR2, UINT64, AVERAGE),
+ COUNTABLE(SQ_VTX_VECTOR3, UINT64, AVERAGE),
+ COUNTABLE(SQ_VTX_VECTOR4, UINT64, AVERAGE),
+ COUNTABLE(SQ_PIXEL_VECTOR1, UINT64, AVERAGE),
+ COUNTABLE(SQ_PIXEL_VECTOR23, UINT64, AVERAGE),
+ COUNTABLE(SQ_PIXEL_VECTOR4, UINT64, AVERAGE),
+ COUNTABLE(SQ_CONSTANTS_USED_SIMD1, UINT64, AVERAGE),
+ COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD1, UINT64, AVERAGE),
+ COUNTABLE(SQ_SX_MEM_EXP_FULL, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD2, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD2, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD2, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD2, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD3, UINT64, AVERAGE),
+ COUNTABLE(SQ_PERFCOUNT_VTX_QUAL_TP_DONE, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD3, UINT64, AVERAGE),
+ COUNTABLE(SQ_PERFCOUNT_PIX_QUAL_TP_DONE, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD2, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD2, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD3, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD3, UINT64, AVERAGE),
+ COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_VTX, UINT64, AVERAGE),
+ COUNTABLE(SQ_PERFCOUNT_VTX_POP_THREAD, UINT64, AVERAGE),
+ COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_PIX, UINT64, AVERAGE),
+ COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_PIX, UINT64, AVERAGE),
+ COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_PIX, UINT64, AVERAGE),
+ COUNTABLE(SQ_PERFCOUNT_PIX_POP_THREAD, UINT64, AVERAGE),
+ COUNTABLE(SQ_SYNC_TEX_STALL_PIX, UINT64, AVERAGE),
+ COUNTABLE(SQ_SYNC_VC_STALL_PIX, UINT64, AVERAGE),
+ COUNTABLE(SQ_CONSTANTS_USED_SIMD2, UINT64, AVERAGE),
+ COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD2, UINT64, AVERAGE),
+ COUNTABLE(SQ_PERFCOUNT_VTX_DEALLOC_ACK, UINT64, AVERAGE),
+ COUNTABLE(SQ_PERFCOUNT_PIX_DEALLOC_ACK, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD0, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD0, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD1, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD1, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD2, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD2, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD3, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD3, UINT64, AVERAGE),
+ COUNTABLE(VC_PERF_STATIC, UINT64, AVERAGE),
+ COUNTABLE(VC_PERF_STALLED, UINT64, AVERAGE),
+ COUNTABLE(VC_PERF_STARVED, UINT64, AVERAGE),
+ COUNTABLE(VC_PERF_SEND, UINT64, AVERAGE),
+ COUNTABLE(VC_PERF_ACTUAL_STARVED, UINT64, AVERAGE),
+ COUNTABLE(PIXEL_THREAD_0_ACTIVE, UINT64, AVERAGE),
+ COUNTABLE(VERTEX_THREAD_0_ACTIVE, UINT64, AVERAGE),
+ COUNTABLE(PIXEL_THREAD_0_NUMBER, UINT64, AVERAGE),
+ COUNTABLE(VERTEX_THREAD_0_NUMBER, UINT64, AVERAGE),
+ COUNTABLE(VERTEX_EVENT_NUMBER, UINT64, AVERAGE),
+ COUNTABLE(PIXEL_EVENT_NUMBER, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_EF_PUSH, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_EF_POP_EVENT, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_EF_POP_NEW_VTX, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_EF_POP_DEALLOC, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_EF_POP_PVECTOR, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_EF_POP_PVECTOR_X, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_EF_POP_PVECTOR_VNZ, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_PB_DEALLOC, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_PI_STATE_PPB_POP, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_PI_RTR, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_PI_READ_EN, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_PI_BUFF_SWAP, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_SQ_FREE_BUFF, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_SQ_DEC, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_SC_VALID_CNTL_EVENT, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_SC_VALID_IJ_XFER, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_SC_NEW_VECTOR_1_Q, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_QUAL_NEW_VECTOR, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_QUAL_EVENT, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_END_BUFFER, UINT64, AVERAGE),
+ COUNTABLE(PTRBUFF_FILL_QUAD, UINT64, AVERAGE),
+ COUNTABLE(VERTS_WRITTEN_SPI, UINT64, AVERAGE),
+ COUNTABLE(TP_FETCH_INSTR_EXEC, UINT64, AVERAGE),
+ COUNTABLE(TP_FETCH_INSTR_REQ, UINT64, AVERAGE),
+ COUNTABLE(TP_DATA_RETURN, UINT64, AVERAGE),
+ COUNTABLE(SPI_WRITE_CYCLES_SP, UINT64, AVERAGE),
+ COUNTABLE(SPI_WRITES_SP, UINT64, AVERAGE),
+ COUNTABLE(SP_ALU_INSTR_EXEC, UINT64, AVERAGE),
+ COUNTABLE(SP_CONST_ADDR_TO_SQ, UINT64, AVERAGE),
+ COUNTABLE(SP_PRED_KILLS_TO_SQ, UINT64, AVERAGE),
+ COUNTABLE(SP_EXPORT_CYCLES_TO_SX, UINT64, AVERAGE),
+ COUNTABLE(SP_EXPORTS_TO_SX, UINT64, AVERAGE),
+ COUNTABLE(SQ_CYCLES_ELAPSED, UINT64, AVERAGE),
+ COUNTABLE(SQ_TCFS_OPT_ALLOC_EXEC, UINT64, AVERAGE),
+ COUNTABLE(SQ_TCFS_NO_OPT_ALLOC, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_NO_OPT_ALLOC, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_NO_OPT_ALLOC, UINT64, AVERAGE),
+ COUNTABLE(SQ_TCFS_ARB_XFC_CNT, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_ARB_XFC_CNT, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_ARB_XFC_CNT, UINT64, AVERAGE),
+ COUNTABLE(SQ_TCFS_CFS_UPDATE_CNT, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU0_CFS_UPDATE_CNT, UINT64, AVERAGE),
+ COUNTABLE(SQ_ALU1_CFS_UPDATE_CNT, UINT64, AVERAGE),
+ COUNTABLE(SQ_VTX_PUSH_THREAD_CNT, UINT64, AVERAGE),
+ COUNTABLE(SQ_VTX_POP_THREAD_CNT, UINT64, AVERAGE),
+ COUNTABLE(SQ_PIX_PUSH_THREAD_CNT, UINT64, AVERAGE),
+ COUNTABLE(SQ_PIX_POP_THREAD_CNT, UINT64, AVERAGE),
+ COUNTABLE(SQ_PIX_TOTAL, UINT64, AVERAGE),
+ COUNTABLE(SQ_PIX_KILLED, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable sx_countables[] = {
+ COUNTABLE(SX_EXPORT_VECTORS, UINT64, AVERAGE),
+ COUNTABLE(SX_DUMMY_QUADS, UINT64, AVERAGE),
+ COUNTABLE(SX_ALPHA_FAIL, UINT64, AVERAGE),
+ COUNTABLE(SX_RB_QUAD_BUSY, UINT64, AVERAGE),
+ COUNTABLE(SX_RB_COLOR_BUSY, UINT64, AVERAGE),
+ COUNTABLE(SX_RB_QUAD_STALL, UINT64, AVERAGE),
+ COUNTABLE(SX_RB_COLOR_STALL, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable rb_countables[] = {
+ COUNTABLE(RBPERF_CNTX_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_CNTX_BUSY_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_SX_QUAD_STARVED, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_SX_QUAD_STARVED_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_MH_STARVED, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_MH_STARVED_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_AZ_BC_Z_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_AZ_BC_Z_BUSY_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SC_TILE_RTR_N, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SC_TILE_RTR_N_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ZXP_STALL, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ZXP_STALL_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_EVENT_PENDING, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_EVENT_PENDING_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_MH_VALID, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_MH_VALID_MAX, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_SX_RB_QUAD_SEND, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_SX_RB_COLOR_SEND, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_SC_RB_TILE_SEND, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_SC_RB_SAMPLE_SEND, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_SX_RB_MEM_EXPORT, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_SX_RB_QUAD_EVENT, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_SC_RB_TILE_EVENT_FILTERED, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_SC_RB_TILE_EVENT_ALL, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SC_EZ_SEND, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_SX_INDEX_SEND, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_GMEM_INTFO_RD, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_GMEM_INTF1_RD, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_GMEM_INTFO_WR, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_GMEM_INTF1_WR, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_CP_CONTEXT_DONE, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_RB_CP_CACHE_FLUSH, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ZPASS_DONE, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ZCMD_VALID, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_CCMD_VALID, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ACCUM_GRANT, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ACCUM_C0_GRANT, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ACCUM_C1_GRANT, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ACCUM_FULL_BE_WR, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ACCUM_REQUEST_NO_GRANT, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ACCUM_TIMEOUT_PULSE, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ACCUM_LIN_TIMEOUT_PULSE, UINT64, AVERAGE),
+ COUNTABLE(RBPERF_ACCUM_CAM_HIT_FLUSHING, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_counter pa_su_counters[] = {
+ COUNTER(PA_SU_PERFCOUNTER0_SELECT, PA_SU_PERFCOUNTER0_LOW, PA_SU_PERFCOUNTER0_HI),
+ COUNTER(PA_SU_PERFCOUNTER1_SELECT, PA_SU_PERFCOUNTER1_LOW, PA_SU_PERFCOUNTER1_HI),
+ COUNTER(PA_SU_PERFCOUNTER2_SELECT, PA_SU_PERFCOUNTER2_LOW, PA_SU_PERFCOUNTER2_HI),
+ COUNTER(PA_SU_PERFCOUNTER3_SELECT, PA_SU_PERFCOUNTER3_LOW, PA_SU_PERFCOUNTER3_HI),
+};
+
+static const struct fd_perfcntr_counter pa_sc_counters[] = {
+ COUNTER(PA_SC_PERFCOUNTER0_SELECT, PA_SC_PERFCOUNTER0_LOW, PA_SC_PERFCOUNTER0_HI),
+};
+
+static const struct fd_perfcntr_counter vgt_counters[] = {
+ COUNTER(VGT_PERFCOUNTER0_SELECT, VGT_PERFCOUNTER0_LOW, VGT_PERFCOUNTER0_HI),
+ COUNTER(VGT_PERFCOUNTER1_SELECT, VGT_PERFCOUNTER1_LOW, VGT_PERFCOUNTER1_HI),
+ COUNTER(VGT_PERFCOUNTER2_SELECT, VGT_PERFCOUNTER2_LOW, VGT_PERFCOUNTER2_HI),
+ COUNTER(VGT_PERFCOUNTER3_SELECT, VGT_PERFCOUNTER3_LOW, VGT_PERFCOUNTER3_HI),
+};
+
+static const struct fd_perfcntr_counter tcr_counters[] = {
+ COUNTER(TCR_PERFCOUNTER0_SELECT, TCR_PERFCOUNTER0_LOW, TCR_PERFCOUNTER0_HI),
+ COUNTER(TCR_PERFCOUNTER1_SELECT, TCR_PERFCOUNTER1_LOW, TCR_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter tp0_counters[] = {
+ COUNTER(TP0_PERFCOUNTER0_SELECT, TP0_PERFCOUNTER0_LOW, TP0_PERFCOUNTER0_HI),
+ COUNTER(TP0_PERFCOUNTER1_SELECT, TP0_PERFCOUNTER1_LOW, TP0_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter tcm_counters[] = {
+ COUNTER(TCM_PERFCOUNTER0_SELECT, TCM_PERFCOUNTER0_LOW, TCM_PERFCOUNTER0_HI),
+ COUNTER(TCM_PERFCOUNTER1_SELECT, TCM_PERFCOUNTER1_LOW, TCM_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter tcf_counters[] = {
+ COUNTER(TCF_PERFCOUNTER0_SELECT, TCF_PERFCOUNTER0_LOW, TCF_PERFCOUNTER0_HI),
+ COUNTER(TCF_PERFCOUNTER1_SELECT, TCF_PERFCOUNTER1_LOW, TCF_PERFCOUNTER1_HI),
+ COUNTER(TCF_PERFCOUNTER2_SELECT, TCF_PERFCOUNTER2_LOW, TCF_PERFCOUNTER2_HI),
+ COUNTER(TCF_PERFCOUNTER3_SELECT, TCF_PERFCOUNTER3_LOW, TCF_PERFCOUNTER3_HI),
+ COUNTER(TCF_PERFCOUNTER4_SELECT, TCF_PERFCOUNTER4_LOW, TCF_PERFCOUNTER4_HI),
+ COUNTER(TCF_PERFCOUNTER5_SELECT, TCF_PERFCOUNTER5_LOW, TCF_PERFCOUNTER5_HI),
+ COUNTER(TCF_PERFCOUNTER6_SELECT, TCF_PERFCOUNTER6_LOW, TCF_PERFCOUNTER6_HI),
+ COUNTER(TCF_PERFCOUNTER7_SELECT, TCF_PERFCOUNTER7_LOW, TCF_PERFCOUNTER7_HI),
+ COUNTER(TCF_PERFCOUNTER8_SELECT, TCF_PERFCOUNTER8_LOW, TCF_PERFCOUNTER8_HI),
+ COUNTER(TCF_PERFCOUNTER9_SELECT, TCF_PERFCOUNTER9_LOW, TCF_PERFCOUNTER9_HI),
+ COUNTER(TCF_PERFCOUNTER10_SELECT, TCF_PERFCOUNTER10_LOW, TCF_PERFCOUNTER10_HI),
+ COUNTER(TCF_PERFCOUNTER11_SELECT, TCF_PERFCOUNTER11_LOW, TCF_PERFCOUNTER11_HI),
+};
+
+static const struct fd_perfcntr_counter sq_counters[] = {
+ COUNTER(SQ_PERFCOUNTER0_SELECT, SQ_PERFCOUNTER0_LOW, SQ_PERFCOUNTER0_HI),
+ COUNTER(SQ_PERFCOUNTER1_SELECT, SQ_PERFCOUNTER1_LOW, SQ_PERFCOUNTER1_HI),
+ COUNTER(SQ_PERFCOUNTER2_SELECT, SQ_PERFCOUNTER2_LOW, SQ_PERFCOUNTER2_HI),
+ COUNTER(SQ_PERFCOUNTER3_SELECT, SQ_PERFCOUNTER3_LOW, SQ_PERFCOUNTER3_HI),
+};
+
+static const struct fd_perfcntr_countable rbbm_countables[] = {
+ COUNTABLE(RBBM1_COUNT, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_NRT_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_RB_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_SQ_CNTX0_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_SQ_CNTX17_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_VGT_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_VGT_NODMA_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_PA_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_SC_CNTX_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_TPC_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_TC_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_SX_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_CP_COHER_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_CP_NRT_BUSY, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_GFX_IDLE_STALL, UINT64, AVERAGE),
+ COUNTABLE(RBBM1_INTERRUPT, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable cp_countables[] = {
+ COUNTABLE(ALWAYS_COUNT, UINT64, AVERAGE),
+ COUNTABLE(TRANS_FIFO_FULL, UINT64, AVERAGE),
+ COUNTABLE(TRANS_FIFO_AF, UINT64, AVERAGE),
+ COUNTABLE(RCIU_PFPTRANS_WAIT, UINT64, AVERAGE),
+ COUNTABLE(RCIU_NRTTRANS_WAIT, UINT64, AVERAGE),
+ COUNTABLE(CSF_NRT_READ_WAIT, UINT64, AVERAGE),
+ COUNTABLE(CSF_I1_FIFO_FULL, UINT64, AVERAGE),
+ COUNTABLE(CSF_I2_FIFO_FULL, UINT64, AVERAGE),
+ COUNTABLE(CSF_ST_FIFO_FULL, UINT64, AVERAGE),
+ COUNTABLE(CSF_RING_ROQ_FULL, UINT64, AVERAGE),
+ COUNTABLE(CSF_I1_ROQ_FULL, UINT64, AVERAGE),
+ COUNTABLE(CSF_I2_ROQ_FULL, UINT64, AVERAGE),
+ COUNTABLE(CSF_ST_ROQ_FULL, UINT64, AVERAGE),
+ COUNTABLE(MIU_TAG_MEM_FULL, UINT64, AVERAGE),
+ COUNTABLE(MIU_WRITECLEAN, UINT64, AVERAGE),
+ COUNTABLE(MIU_NRT_WRITE_STALLED, UINT64, AVERAGE),
+ COUNTABLE(MIU_NRT_READ_STALLED, UINT64, AVERAGE),
+ COUNTABLE(ME_WRITE_CONFIRM_FIFO_FULL, UINT64, AVERAGE),
+ COUNTABLE(ME_VS_DEALLOC_FIFO_FULL, UINT64, AVERAGE),
+ COUNTABLE(ME_PS_DEALLOC_FIFO_FULL, UINT64, AVERAGE),
+ COUNTABLE(ME_REGS_VS_EVENT_FIFO_FULL, UINT64, AVERAGE),
+ COUNTABLE(ME_REGS_PS_EVENT_FIFO_FULL, UINT64, AVERAGE),
+ COUNTABLE(ME_REGS_CF_EVENT_FIFO_FULL, UINT64, AVERAGE),
+ COUNTABLE(ME_MICRO_RB_STARVED, UINT64, AVERAGE),
+ COUNTABLE(ME_MICRO_I1_STARVED, UINT64, AVERAGE),
+ COUNTABLE(ME_MICRO_I2_STARVED, UINT64, AVERAGE),
+ COUNTABLE(ME_MICRO_ST_STARVED, UINT64, AVERAGE),
+ COUNTABLE(RCIU_RBBM_DWORD_SENT, UINT64, AVERAGE),
+ COUNTABLE(ME_BUSY_CLOCKS, UINT64, AVERAGE),
+ COUNTABLE(ME_WAIT_CONTEXT_AVAIL, UINT64, AVERAGE),
+ COUNTABLE(PFP_TYPE0_PACKET, UINT64, AVERAGE),
+ COUNTABLE(PFP_TYPE3_PACKET, UINT64, AVERAGE),
+ COUNTABLE(CSF_RB_WPTR_NEQ_RPTR, UINT64, AVERAGE),
+ COUNTABLE(CSF_I1_SIZE_NEQ_ZERO, UINT64, AVERAGE),
+ COUNTABLE(CSF_I2_SIZE_NEQ_ZERO, UINT64, AVERAGE),
+ COUNTABLE(CSF_RBI1I2_FETCHING, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_counter sx_counters[] = {
+ COUNTER(SX_PERFCOUNTER0_SELECT, SX_PERFCOUNTER0_LOW, SX_PERFCOUNTER0_HI),
+};
+
+// We don't have the enums for MH perfcntrs
+#if 0
+static const struct fd_perfcntr_counter mh_counters[] = {
+ COUNTER(MH_PERFCOUNTER0_SELECT, MH_PERFCOUNTER0_LOW, MH_PERFCOUNTER0_HI),
+ COUNTER(MH_PERFCOUNTER1_SELECT, MH_PERFCOUNTER1_LOW, MH_PERFCOUNTER1_HI),
+};
+#endif
+
+static const struct fd_perfcntr_counter rbbm_counters[] = {
+ COUNTER(RBBM_PERFCOUNTER1_SELECT, RBBM_PERFCOUNTER1_LO, RBBM_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter cp_counters[] = {
+ COUNTER(CP_PERFCOUNTER_SELECT, CP_PERFCOUNTER_LO, CP_PERFCOUNTER_HI),
+};
+
+static const struct fd_perfcntr_counter rb_counters[] = {
+ COUNTER(RB_PERFCOUNTER0_SELECT, RB_PERFCOUNTER0_LOW, RB_PERFCOUNTER0_HI),
+};
+
+const struct fd_perfcntr_group a2xx_perfcntr_groups[] = {
+ GROUP("PA_SU", pa_su_counters, pa_su_countables),
+ GROUP("PA_SC", pa_sc_counters, pa_sc_countables),
+ GROUP("VGT", vgt_counters, vgt_countables),
+ GROUP("TCR", tcr_counters, tcr_countables),
+ GROUP("TP0", tp0_counters, tp0_countables),
+ GROUP("TCM", tcm_counters, tcm_countables),
+ GROUP("TCF", tcf_counters, tcf_countables),
+ GROUP("SQ", sq_counters, sq_countables),
+ GROUP("SX", sx_counters, sx_countables),
+// GROUP("MH", mh_counters, mh_countables),
+ GROUP("RBBM", rbbm_counters, rbbm_countables),
+ GROUP("CP", cp_counters, cp_countables),
+ GROUP("RB", rb_counters, rb_countables),
+};
+
+const unsigned a2xx_num_perfcntr_groups = ARRAY_SIZE(a2xx_perfcntr_groups);
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_query.c b/src/gallium/drivers/freedreno/a2xx/fd2_query.c
new file mode 100644
index 00000000000..9e5bb450cd4
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_query.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Jonathan Marek <jonathan@marek.ca>
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+/* NOTE: perfcntrs are 48-bits but we only have 32-bit accumulate (?)
+ * so we work with 32-bits only. we accumulate start/stop separately,
+ * which differs from a5xx but works with only accumulate (no add/neg)
+ */
+
+#include "freedreno_query_acc.h"
+#include "freedreno_resource.h"
+
+#include "fd2_context.h"
+#include "fd2_query.h"
+
+struct PACKED fd2_query_sample {
+ uint32_t start;
+ uint32_t stop;
+};
+
+/* offset of a single field of an array of fd2_query_sample: */
+#define query_sample_idx(aq, idx, field) \
+ fd_resource((aq)->prsc)->bo, \
+ (idx * sizeof(struct fd2_query_sample)) + \
+ offsetof(struct fd2_query_sample, field), \
+ 0, 0
+
+/* offset of a single field of fd2_query_sample: */
+#define query_sample(aq, field) \
+ query_sample_idx(aq, 0, field)
+
+/*
+ * Performance Counter (batch) queries:
+ *
+ * Only one of these is active at a time, per design of the gallium
+ * batch_query API design. On perfcntr query tracks N query_types,
+ * each of which has a 'fd_batch_query_entry' that maps it back to
+ * the associated group and counter.
+ */
+
+struct fd_batch_query_entry {
+ uint8_t gid; /* group-id */
+ uint8_t cid; /* countable-id within the group */
+};
+
+struct fd_batch_query_data {
+ struct fd_screen *screen;
+ unsigned num_query_entries;
+ struct fd_batch_query_entry query_entries[];
+};
+
+static void
+perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch)
+{
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd_screen *screen = data->screen;
+ struct fd_ringbuffer *ring = batch->draw;
+
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
+
+ fd_wfi(batch, ring);
+
+ /* configure performance counters for the requested queries: */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
+
+ debug_assert(counter_idx < g->num_counters);
+
+ OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1);
+ OUT_RING(ring, g->countables[entry->cid].selector);
+ }
+
+ memset(counters_per_group, 0, sizeof(counters_per_group));
+
+ /* and snapshot the start values */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
+ const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, counter->counter_reg_lo | CP_MEM_TO_REG_0_ACCUMULATE);
+ OUT_RELOCW(ring, query_sample_idx(aq, i, start));
+ }
+}
+
+static void
+perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch)
+{
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd_screen *screen = data->screen;
+ struct fd_ringbuffer *ring = batch->draw;
+
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
+
+ fd_wfi(batch, ring);
+
+ /* TODO do we need to bother to turn anything off? */
+
+ /* snapshot the end values: */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
+ const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, counter->counter_reg_lo | CP_MEM_TO_REG_0_ACCUMULATE);
+ OUT_RELOCW(ring, query_sample_idx(aq, i, stop));
+ }
+}
+
+static void
+perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
+ union pipe_query_result *result)
+{
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd2_query_sample *sp = buf;
+
+ for (unsigned i = 0; i < data->num_query_entries; i++)
+ result->batch[i].u64 = sp[i].stop - sp[i].start;
+}
+
+static const struct fd_acc_sample_provider perfcntr = {
+ .query_type = FD_QUERY_FIRST_PERFCNTR,
+ .active = FD_STAGE_DRAW | FD_STAGE_CLEAR,
+ .resume = perfcntr_resume,
+ .pause = perfcntr_pause,
+ .result = perfcntr_accumulate_result,
+};
+
+static struct pipe_query *
+fd2_create_batch_query(struct pipe_context *pctx,
+ unsigned num_queries, unsigned *query_types)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_screen *screen = ctx->screen;
+ struct fd_query *q;
+ struct fd_acc_query *aq;
+ struct fd_batch_query_data *data;
+
+ data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data,
+ num_queries * sizeof(data->query_entries[0]));
+
+ data->screen = screen;
+ data->num_query_entries = num_queries;
+
+ /* validate the requested query_types and ensure we don't try
+ * to request more query_types of a given group than we have
+ * counters:
+ */
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
+
+ for (unsigned i = 0; i < num_queries; i++) {
+ unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
+
+ /* verify valid query_type, ie. is it actually a perfcntr? */
+ if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
+ (idx >= screen->num_perfcntr_queries)) {
+ debug_printf("invalid batch query query_type: %u\n", query_types[i]);
+ goto error;
+ }
+
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
+
+ entry->gid = pq->group_id;
+
+ /* the perfcntr_queries[] table flattens all the countables
+ * for each group in series, ie:
+ *
+ * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
+ *
+ * So to find the countable index just step back through the
+ * table to find the first entry with the same group-id.
+ */
+ while (pq > screen->perfcntr_queries) {
+ pq--;
+ if (pq->group_id == entry->gid)
+ entry->cid++;
+ }
+
+ if (counters_per_group[entry->gid] >=
+ screen->perfcntr_groups[entry->gid].num_counters) {
+ debug_printf("too many counters for group %u\n", entry->gid);
+ goto error;
+ }
+
+ counters_per_group[entry->gid]++;
+ }
+
+ q = fd_acc_create_query2(ctx, 0, &perfcntr);
+ aq = fd_acc_query(q);
+
+ /* sample buffer size is based on # of queries: */
+ aq->size = num_queries * sizeof(struct fd2_query_sample);
+ aq->query_data = data;
+
+ return (struct pipe_query *)q;
+
+error:
+ free(data);
+ return NULL;
+}
+
+void
+fd2_query_context_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ ctx->create_query = fd_acc_create_query;
+ ctx->query_set_stage = fd_acc_query_set_stage;
+
+ pctx->create_batch_query = fd2_create_batch_query;
+}
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_query.h b/src/gallium/drivers/freedreno/a2xx/fd2_query.h
new file mode 100644
index 00000000000..4d9f3e39f81
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_query.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2019 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Jonathan Marek <jonathan@marek.ca>
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD2_QUERY_H_
+#define FD2_QUERY_H_
+
+#include "pipe/p_context.h"
+
+void fd2_query_context_init(struct pipe_context *pctx);
+
+#endif /* FD2_QUERY_H_ */
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
index 3b44147cb3d..f0253238123 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
@@ -105,6 +105,9 @@ fd2_screen_is_format_supported(struct pipe_screen *pscreen,
return retval == usage;
}
+extern const struct fd_perfcntr_group a2xx_perfcntr_groups[];
+extern const unsigned a2xx_num_perfcntr_groups;
+
void
fd2_screen_init(struct pipe_screen *pscreen)
{
@@ -114,4 +117,9 @@ fd2_screen_init(struct pipe_screen *pscreen)
pscreen->context_create = fd2_context_create;
pscreen->is_format_supported = fd2_screen_is_format_supported;
screen->setup_slices = fd2_setup_slices;
+
+ if (fd_mesa_debug & FD_DBG_PERFC) {
+ screen->perfcntr_groups = a2xx_perfcntr_groups;
+ screen->num_perfcntr_groups = a2xx_num_perfcntr_groups;
+ }
}
diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build
index 1e3a3037014..90c0b62aeb6 100644
--- a/src/gallium/drivers/freedreno/meson.build
+++ b/src/gallium/drivers/freedreno/meson.build
@@ -68,8 +68,11 @@ files_libfreedreno = files(
'a2xx/fd2_emit.h',
'a2xx/fd2_gmem.c',
'a2xx/fd2_gmem.h',
+ 'a2xx/fd2_perfcntr.c',
'a2xx/fd2_program.c',
'a2xx/fd2_program.h',
+ 'a2xx/fd2_query.c',
+ 'a2xx/fd2_query.h',
'a2xx/fd2_rasterizer.c',
'a2xx/fd2_rasterizer.h',
'a2xx/fd2_resource.c',