summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDanylo Piliaiev <dpiliaiev@igalia.com>2021-12-28 21:44:55 +0200
committerMarge Bot <emma+marge@anholt.net>2022-02-10 15:15:33 +0000
commitb84f0596808574bb0d37355a896eaaf1aafe277f (patch)
tree01168f649739157bcf639223c47ed281cb686729
parent03ab9d895e86668f7b2f8e118cefb7341b8b78b9 (diff)
freedreno/pps: Expose same counters as blob
Expose most of the counters exposed by blob. By faking the value of counters returned from kgsl I found the exact underlying counters and constant coefficients being used. Note, coefficients for counters that depend on time are NOT verified. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14323>
-rw-r--r--src/freedreno/ds/fd_pps_driver.cc331
1 files changed, 318 insertions, 13 deletions
diff --git a/src/freedreno/ds/fd_pps_driver.cc b/src/freedreno/ds/fd_pps_driver.cc
index 97305c911ae..deadeae11c9 100644
--- a/src/freedreno/ds/fd_pps_driver.cc
+++ b/src/freedreno/ds/fd_pps_driver.cc
@@ -16,6 +16,27 @@
namespace pps
{
+double
+safe_div(uint64_t a, uint64_t b)
+{
+ if (b == 0)
+ return 0;
+
+ return a / static_cast<double>(b);
+}
+
+float
+percent(uint64_t a, uint64_t b)
+{
+ /* Sometimes we get bogus values but we want for the timeline
+ * to look nice without higher than 100% values.
+ */
+ if (b == 0 || a > b)
+ return 0;
+
+ return 100.f * (a / static_cast<double>(b));
+}
+
uint64_t
FreedrenoDriver::get_min_sampling_period_ns()
{
@@ -45,14 +66,58 @@ FreedrenoDriver::setup_a6xx_counters()
auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES");
auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS");
- auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
- auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
+ auto PERF_TP_L1_CACHELINE_REQUESTS = countable("PERF_TP_L1_CACHELINE_REQUESTS");
+
+ auto PERF_TP_OUTPUT_PIXELS = countable("PERF_TP_OUTPUT_PIXELS");
+ auto PERF_TP_OUTPUT_PIXELS_ANISO = countable("PERF_TP_OUTPUT_PIXELS_ANISO");
+ auto PERF_TP_OUTPUT_PIXELS_BILINEAR = countable("PERF_TP_OUTPUT_PIXELS_BILINEAR");
+ auto PERF_TP_OUTPUT_PIXELS_POINT = countable("PERF_TP_OUTPUT_PIXELS_POINT");
+ auto PERF_TP_OUTPUT_PIXELS_ZERO_LOD = countable("PERF_TP_OUTPUT_PIXELS_ZERO_LOD");
+
+ auto PERF_TSE_INPUT_PRIM = countable("PERF_TSE_INPUT_PRIM");
+ auto PERF_TSE_CLIPPED_PRIM = countable("PERF_TSE_CLIPPED_PRIM");
+ auto PERF_TSE_TRIVAL_REJ_PRIM = countable("PERF_TSE_TRIVAL_REJ_PRIM");
+ auto PERF_TSE_OUTPUT_VISIBLE_PRIM = countable("PERF_TSE_OUTPUT_VISIBLE_PRIM");
+
auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES");
+ auto PERF_SP_ALU_WORKING_CYCLES = countable("PERF_SP_ALU_WORKING_CYCLES");
+ auto PERF_SP_EFU_WORKING_CYCLES = countable("PERF_SP_EFU_WORKING_CYCLES");
+ auto PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_EFU_INSTRUCTIONS");
+ auto PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS");
+ auto PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_TEX_INSTRUCTIONS");
+ auto PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_EFU_INSTRUCTIONS");
+ auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
+ auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
+ auto PERF_SP_STALL_CYCLES_TP = countable("PERF_SP_STALL_CYCLES_TP");
+ auto PERF_SP_ANY_EU_WORKING_FS_STAGE = countable("PERF_SP_ANY_EU_WORKING_FS_STAGE");
+ auto PERF_SP_ANY_EU_WORKING_VS_STAGE = countable("PERF_SP_ANY_EU_WORKING_VS_STAGE");
+ auto PERF_SP_ANY_EU_WORKING_CS_STAGE = countable("PERF_SP_ANY_EU_WORKING_CS_STAGE");
+
+ auto PERF_UCHE_STALL_CYCLES_ARBITER = countable("PERF_UCHE_STALL_CYCLES_ARBITER");
+ auto PERF_UCHE_VBIF_READ_BEATS_TP = countable("PERF_UCHE_VBIF_READ_BEATS_TP");
+ auto PERF_UCHE_VBIF_READ_BEATS_VFD = countable("PERF_UCHE_VBIF_READ_BEATS_VFD");
+ auto PERF_UCHE_VBIF_READ_BEATS_SP = countable("PERF_UCHE_VBIF_READ_BEATS_SP");
+ auto PERF_UCHE_READ_REQUESTS_TP = countable("PERF_UCHE_READ_REQUESTS_TP");
+
+ auto PERF_PC_STALL_CYCLES_VFD = countable("PERF_PC_STALL_CYCLES_VFD");
+ auto PERF_PC_VS_INVOCATIONS = countable("PERF_PC_VS_INVOCATIONS");
+ auto PERF_PC_VERTEX_HITS = countable("PERF_PC_VERTEX_HITS");
+
+ auto PERF_HLSQ_QUADS = countable("PERF_HLSQ_QUADS"); /* Quads (fragments / 4) produced */
+
+ auto PERF_CP_NUM_PREEMPTIONS = countable("PERF_CP_NUM_PREEMPTIONS");
+ auto PERF_CP_PREEMPTION_REACTION_DELAY = countable("PERF_CP_PREEMPTION_REACTION_DELAY");
+
+ /* TODO: resolve() tells there is no PERF_CMPDECMP_VBIF_READ_DATA */
+ // auto PERF_CMPDECMP_VBIF_READ_DATA = countable("PERF_CMPDECMP_VBIF_READ_DATA");
/*
* And then setup the derived counters that we are exporting to
- * pps based on the captured countable values
+ * pps based on the captured countable values.
+ *
+ * We try to expose the same counters as blob:
+ * https://gpuinspector.dev/docs/gpu-counters/qualcomm
*/
counter("GPU Frequency", Counter::Units::Hertz, [=]() {
@@ -61,29 +126,269 @@ FreedrenoDriver::setup_a6xx_counters()
);
counter("GPU % Utilization", Counter::Units::Percent, [=]() {
- return 100.0 * (PERF_CP_BUSY_CYCLES / time) / max_freq;
+ return percent(PERF_CP_BUSY_CYCLES / time, max_freq);
}
);
- // This one is a bit of a guess, but seems plausible..
- counter("ALU / Fragment", Counter::Units::None, [=]() {
+ counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
+ return PERF_TP_L1_CACHELINE_MISSES / time;
+ }
+ );
+
+ counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
+ return percent(PERF_SP_BUSY_CYCLES / time, max_freq * info->num_sp_cores);
+ }
+ );
+
+ /* TODO: verify */
+ counter("(?) % Texture Fetch Stall", Counter::Units::Percent, [=]() {
+ return percent(PERF_SP_STALL_CYCLES_TP / time, max_freq * info->num_sp_cores);
+ }
+ );
+
+ /* TODO: verify */
+ counter("(?) % Vertex Fetch Stall", Counter::Units::Percent, [=]() {
+ return percent(PERF_PC_STALL_CYCLES_VFD / time, max_freq * info->num_sp_cores);
+ }
+ );
+
+ counter("L1 Texture Cache Miss Per Pixel", Counter::Units::None, [=]() {
+ return safe_div(PERF_TP_L1_CACHELINE_MISSES, PERF_HLSQ_QUADS * 4);
+ }
+ );
+
+ counter("% Texture L1 Miss", Counter::Units::Percent, [=]() {
+ return percent(PERF_TP_L1_CACHELINE_MISSES, PERF_TP_L1_CACHELINE_REQUESTS);
+ }
+ );
+
+ counter("% Texture L2 Miss", Counter::Units::Percent, [=]() {
+ return percent(PERF_UCHE_VBIF_READ_BEATS_TP / 2, PERF_UCHE_READ_REQUESTS_TP);
+ }
+ );
+
+ /* TODO: verify */
+ counter("(?) % Stalled on System Memory", Counter::Units::Percent, [=]() {
+ return percent(PERF_UCHE_STALL_CYCLES_ARBITER / time, max_freq * info->num_sp_cores);
+ }
+ );
+
+ counter("Pre-clipped Polygons / Second", Counter::Units::None, [=]() {
+ return PERF_TSE_INPUT_PRIM * (1.f / time);
+ }
+ );
+
+ counter("% Prims Trivially Rejected", Counter::Units::Percent, [=]() {
+ return percent(PERF_TSE_TRIVAL_REJ_PRIM, PERF_TSE_INPUT_PRIM);
+ }
+ );
+
+ counter("% Prims Clipped", Counter::Units::Percent, [=]() {
+ return percent(PERF_TSE_CLIPPED_PRIM, PERF_TSE_INPUT_PRIM);
+ }
+ );
+
+ counter("Average Vertices / Polygon", Counter::Units::None, [=]() {
+ return PERF_PC_VS_INVOCATIONS / PERF_TSE_INPUT_PRIM;
+ }
+ );
+
+ counter("Reused Vertices / Second", Counter::Units::None, [=]() {
+ return PERF_PC_VERTEX_HITS * (1.f / time);
+ }
+ );
+
+ counter("Average Polygon Area", Counter::Units::None, [=]() {
+ return safe_div(PERF_HLSQ_QUADS * 4, PERF_TSE_OUTPUT_VISIBLE_PRIM);
+ }
+ );
+
+ /* TODO: find formula */
+ // counter("% Shaders Busy", Counter::Units::Percent, [=]() {
+ // return 100.0 * 0;
+ // }
+ // );
+
+ counter("Vertices Shaded / Second", Counter::Units::None, [=]() {
+ return PERF_PC_VS_INVOCATIONS * (1.f / time);
+ }
+ );
+
+ counter("Fragments Shaded / Second", Counter::Units::None, [=]() {
+ return PERF_HLSQ_QUADS * 4 * (1.f / time);
+ }
+ );
+
+ counter("Vertex Instructions / Second", Counter::Units::None, [=]() {
+ return (PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
+ PERF_SP_VS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
+ }
+ );
+
+ counter("Fragment Instructions / Second", Counter::Units::None, [=]() {
return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
- PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / PERF_RB_3D_PIXELS;
+ PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2 +
+ PERF_SP_FS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
}
);
- counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
- return PERF_TP_L1_CACHELINE_MISSES / time;
+ counter("Fragment ALU Instructions / Sec (Full)", Counter::Units::None, [=]() {
+ return PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS * (1.f / time);
}
);
- counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
- return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info->num_sp_cores);
+ counter("Fragment ALU Instructions / Sec (Half)", Counter::Units::None, [=]() {
+ return PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS * (1.f / time);
+ }
+ );
+
+ counter("Fragment EFU Instructions / Second", Counter::Units::None, [=]() {
+ return PERF_SP_FS_STAGE_EFU_INSTRUCTIONS * (1.f / time);
+ }
+ );
+
+ counter("Textures / Vertex", Counter::Units::None, [=]() {
+ return safe_div(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
+ }
+ );
+
+ counter("Textures / Fragment", Counter::Units::None, [=]() {
+ return safe_div(PERF_TP_OUTPUT_PIXELS, PERF_HLSQ_QUADS * 4);
+ }
+ );
+
+ counter("ALU / Vertex", Counter::Units::None, [=]() {
+ return safe_div(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
+ }
+ );
+
+ counter("EFU / Vertex", Counter::Units::None, [=]() {
+ return safe_div(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
+ }
+ );
+
+ counter("ALU / Fragment", Counter::Units::None, [=]() {
+ return safe_div(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
+ PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2, PERF_HLSQ_QUADS);
+ }
+ );
+
+ counter("EFU / Fragment", Counter::Units::None, [=]() {
+ return safe_div(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, PERF_HLSQ_QUADS);
+ }
+ );
+
+ counter("% Time Shading Vertices", Counter::Units::Percent, [=]() {
+ return percent(PERF_SP_ANY_EU_WORKING_VS_STAGE,
+ (PERF_SP_ANY_EU_WORKING_VS_STAGE +
+ PERF_SP_ANY_EU_WORKING_FS_STAGE +
+ PERF_SP_ANY_EU_WORKING_CS_STAGE));
+ }
+ );
+
+ counter("% Time Shading Fragments", Counter::Units::Percent, [=]() {
+ return percent(PERF_SP_ANY_EU_WORKING_FS_STAGE,
+ (PERF_SP_ANY_EU_WORKING_VS_STAGE +
+ PERF_SP_ANY_EU_WORKING_FS_STAGE +
+ PERF_SP_ANY_EU_WORKING_CS_STAGE));
+ }
+ );
+
+ counter("% Time Compute", Counter::Units::Percent, [=]() {
+ return percent(PERF_SP_ANY_EU_WORKING_CS_STAGE,
+ (PERF_SP_ANY_EU_WORKING_VS_STAGE +
+ PERF_SP_ANY_EU_WORKING_FS_STAGE +
+ PERF_SP_ANY_EU_WORKING_CS_STAGE));
+ }
+ );
+
+ counter("% Shader ALU Capacity Utilized", Counter::Units::Percent, [=]() {
+ return percent((PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
+ PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
+ PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / 64,
+ PERF_SP_BUSY_CYCLES);
+ }
+ );
+
+ counter("% Time ALUs Working", Counter::Units::Percent, [=]() {
+ return percent(PERF_SP_ALU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
}
);
- // TODO add more.. see https://gpuinspector.dev/docs/gpu-counters/qualcomm
- // for what blob exposes
+ counter("% Time EFUs Working", Counter::Units::Percent, [=]() {
+ return percent(PERF_SP_EFU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
+ }
+ );
+
+ counter("% Anisotropic Filtered", Counter::Units::Percent, [=]() {
+ return percent(PERF_TP_OUTPUT_PIXELS_ANISO, PERF_TP_OUTPUT_PIXELS);
+ }
+ );
+
+ counter("% Linear Filtered", Counter::Units::Percent, [=]() {
+ return percent(PERF_TP_OUTPUT_PIXELS_BILINEAR, PERF_TP_OUTPUT_PIXELS);
+ }
+ );
+
+ counter("% Nearest Filtered", Counter::Units::Percent, [=]() {
+ return percent(PERF_TP_OUTPUT_PIXELS_POINT, PERF_TP_OUTPUT_PIXELS);
+ }
+ );
+
+ counter("% Non-Base Level Textures", Counter::Units::Percent, [=]() {
+ return percent(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, PERF_TP_OUTPUT_PIXELS);
+ }
+ );
+
+ /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=63 */
+ // counter("Read Total (Bytes/sec)", Counter::Units::Byte, [=]() {
+ // return * (1.f / time);
+ // }
+ // );
+
+ /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=84 */
+ // counter("Write Total (Bytes/sec)", Counter::Units::Byte, [=]() {
+ // return * (1.f / time);
+ // }
+ // );
+
+ /* Cannot get PERF_CMPDECMP_VBIF_READ_DATA countable */
+ // counter("Texture Memory Read BW (Bytes/Second)", Counter::Units::Byte, [=]() {
+ // return (PERF_CMPDECMP_VBIF_READ_DATA + PERF_UCHE_VBIF_READ_BEATS_TP) * (1.f / time);
+ // }
+ // );
+
+ /* TODO: verify */
+ counter("(?) Vertex Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
+ return PERF_UCHE_VBIF_READ_BEATS_VFD * 32 * (1.f / time);
+ }
+ );
+
+ /* TODO: verify */
+ counter("SP Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
+ return PERF_UCHE_VBIF_READ_BEATS_SP * 32 * (1.f / time);
+ }
+ );
+
+ counter("Avg Bytes / Fragment", Counter::Units::Byte, [=]() {
+ return safe_div(PERF_UCHE_VBIF_READ_BEATS_TP * 32, PERF_HLSQ_QUADS * 4);
+ }
+ );
+
+ counter("Avg Bytes / Vertex", Counter::Units::Byte, [=]() {
+ return safe_div(PERF_UCHE_VBIF_READ_BEATS_VFD * 32, PERF_PC_VS_INVOCATIONS);
+ }
+ );
+
+ counter("Preemptions / second", Counter::Units::None, [=]() {
+ return PERF_CP_NUM_PREEMPTIONS * (1.f / time);
+ }
+ );
+
+ counter("Avg Preemption Delay", Counter::Units::None, [=]() {
+ return PERF_CP_PREEMPTION_REACTION_DELAY * (1.f / time);
+ }
+ );
}
/**