/* * Copyright © 2014 Broadcom * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ /** * Expose V3D HW perf counters. * * We also have code to fake support for occlusion queries. * Since we expose support for GL 2.0, we have to expose occlusion queries, * but the spec allows you to expose 0 query counter bits, so we just return 0 * as the result of all our queries. */ #include "vc4_context.h" struct vc4_query { unsigned num_queries; struct vc4_hwperfmon *hwperfmon; }; static const char *v3d_counter_names[] = { "FEP-valid-primitives-no-rendered-pixels", "FEP-valid-primitives-rendered-pixels", "FEP-clipped-quads", "FEP-valid-quads", "TLB-quads-not-passing-stencil-test", "TLB-quads-not-passing-z-and-stencil-test", "TLB-quads-passing-z-and-stencil-test", "TLB-quads-with-zero-coverage", "TLB-quads-with-non-zero-coverage", "TLB-quads-written-to-color-buffer", "PTB-primitives-discarded-outside-viewport", "PTB-primitives-need-clipping", "PTB-primitives-discared-reversed", "QPU-total-idle-clk-cycles", "QPU-total-clk-cycles-vertex-coord-shading", "QPU-total-clk-cycles-fragment-shading", "QPU-total-clk-cycles-executing-valid-instr", "QPU-total-clk-cycles-waiting-TMU", "QPU-total-clk-cycles-waiting-scoreboard", "QPU-total-clk-cycles-waiting-varyings", "QPU-total-instr-cache-hit", "QPU-total-instr-cache-miss", "QPU-total-uniform-cache-hit", "QPU-total-uniform-cache-miss", "TMU-total-text-quads-processed", "TMU-total-text-cache-miss", "VPM-total-clk-cycles-VDW-stalled", "VPM-total-clk-cycles-VCD-stalled", "L2C-total-cache-hit", "L2C-total-cache-miss", }; int vc4_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index, struct pipe_driver_query_group_info *info) { struct vc4_screen *screen = vc4_screen(pscreen); if (!screen->has_perfmon_ioctl) return 0; if (!info) return 1; if (index > 0) return 0; info->name = "V3D counters"; info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS; info->num_queries = ARRAY_SIZE(v3d_counter_names); return 1; } int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, struct pipe_driver_query_info *info) { struct vc4_screen *screen = vc4_screen(pscreen); if (!screen->has_perfmon_ioctl) return 0; if (!info) return ARRAY_SIZE(v3d_counter_names); if (index >= ARRAY_SIZE(v3d_counter_names)) return 0; info->group_id = 0; info->name = v3d_counter_names[index]; info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index; info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE; info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; return 1; } static struct pipe_query * vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries, unsigned *query_types) { struct vc4_query *query = calloc(1, sizeof(*query)); struct vc4_hwperfmon *hwperfmon; unsigned i, nhwqueries = 0; if (!query) return NULL; for (i = 0; i < num_queries; i++) { if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC) nhwqueries++; } /* We can't mix HW and non-HW queries. */ if (nhwqueries && nhwqueries != num_queries) return NULL; if (!nhwqueries) return (struct pipe_query *)query; hwperfmon = calloc(1, sizeof(*hwperfmon)); if (!hwperfmon) goto err_free_query; for (i = 0; i < num_queries; i++) hwperfmon->events[i] = query_types[i] - PIPE_QUERY_DRIVER_SPECIFIC; query->hwperfmon = hwperfmon; query->num_queries = num_queries; /* Note that struct pipe_query isn't actually defined anywhere. */ return (struct pipe_query *)query; err_free_query: free(query); return NULL; } static struct pipe_query * vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) { return vc4_create_batch_query(ctx, 1, &query_type); } static void vc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery) { struct vc4_context *ctx = vc4_context(pctx); struct vc4_query *query = (struct vc4_query *)pquery; if (query->hwperfmon && query->hwperfmon->id) { if (query->hwperfmon->id) { struct drm_vc4_perfmon_destroy req = { }; req.id = query->hwperfmon->id; vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &req); } free(query->hwperfmon); } free(query); } static boolean vc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery) { struct vc4_query *query = (struct vc4_query *)pquery; struct vc4_context *ctx = vc4_context(pctx); struct drm_vc4_perfmon_create req = { }; unsigned i; int ret; if (!query->hwperfmon) return true; /* Only one perfmon can be activated per context. */ if (ctx->perfmon) return false; /* Reset the counters by destroying the previously allocated perfmon */ if (query->hwperfmon->id) { struct drm_vc4_perfmon_destroy destroyreq = { }; destroyreq.id = query->hwperfmon->id; vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq); } for (i = 0; i < query->num_queries; i++) req.events[i] = query->hwperfmon->events[i]; req.ncounters = query->num_queries; ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req); if (ret) return false; query->hwperfmon->id = req.id; /* Make sure all pendings jobs are flushed before activating the * perfmon. */ vc4_flush(pctx); ctx->perfmon = query->hwperfmon; return true; } static bool vc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery) { struct vc4_query *query = (struct vc4_query *)pquery; struct vc4_context *ctx = vc4_context(pctx); if (!query->hwperfmon) return true; if (ctx->perfmon != query->hwperfmon) return false; /* Make sure all pendings jobs are flushed before deactivating the * perfmon. */ vc4_flush(pctx); ctx->perfmon = NULL; return true; } static boolean vc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery, boolean wait, union pipe_query_result *vresult) { struct vc4_context *ctx = vc4_context(pctx); struct vc4_query *query = (struct vc4_query *)pquery; struct drm_vc4_perfmon_get_values req; unsigned i; int ret; if (!query->hwperfmon) { vresult->u64 = 0; return true; } if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno, wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon")) return false; req.id = query->hwperfmon->id; req.values_ptr = (uintptr_t)query->hwperfmon->counters; ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req); if (ret) return false; for (i = 0; i < query->num_queries; i++) vresult->batch[i].u64 = query->hwperfmon->counters[i]; return true; } static void vc4_set_active_query_state(struct pipe_context *pctx, boolean enable) { } void vc4_query_init(struct pipe_context *pctx) { pctx->create_query = vc4_create_query; pctx->create_batch_query = vc4_create_batch_query; pctx->destroy_query = vc4_destroy_query; pctx->begin_query = vc4_begin_query; pctx->end_query = vc4_end_query; pctx->get_query_result = vc4_get_query_result; pctx->set_active_query_state = vc4_set_active_query_state; }