summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/radeonsi/si_perfcounter.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_perfcounter.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_perfcounter.c357
1 files changed, 298 insertions, 59 deletions
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index b553a36b424..c54854bd9a7 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -1,25 +1,7 @@
/*
* Copyright 2015 Advanced Micro Devices, Inc.
- * All Rights Reserved.
*
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * SPDX-License-Identifier: MIT
*/
#include "si_build_pm4.h"
@@ -69,7 +51,7 @@ static void si_pc_emit_instance(struct si_context *sctx, int se, int instance)
value |= S_030800_SE_BROADCAST_WRITES(1);
}
- if (sctx->chip_class >= GFX10) {
+ if (sctx->gfx_level >= GFX10) {
/* TODO: Expose counters from each shader array separately if needed. */
value |= S_030800_SA_BROADCAST_WRITES(1);
}
@@ -81,18 +63,16 @@ static void si_pc_emit_instance(struct si_context *sctx, int se, int instance)
}
radeon_begin(cs);
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, value);
+ radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, value);
radeon_end();
}
-static void si_pc_emit_shaders(struct si_context *sctx, unsigned shaders)
+void si_pc_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders)
{
- struct radeon_cmdbuf *cs = &sctx->gfx_cs;
-
radeon_begin(cs);
- radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2, false);
- radeon_emit(cs, shaders & 0x7f);
- radeon_emit(cs, 0xffffffff);
+ radeon_set_uconfig_reg_seq(R_036780_SQ_PERFCOUNTER_CTRL, 2);
+ radeon_emit(shaders & 0x7f);
+ radeon_emit(0xffffffff);
radeon_end();
}
@@ -112,13 +92,13 @@ static void si_pc_emit_select(struct si_context *sctx, struct ac_pc_block *block
radeon_begin(cs);
for (idx = 0; idx < count; ++idx) {
- radeon_set_uconfig_reg_seq(cs, regs->select0[idx], 1, false);
- radeon_emit(cs, selectors[idx] | regs->select_or);
+ radeon_set_uconfig_reg_seq(regs->select0[idx], 1);
+ radeon_emit(selectors[idx] | regs->select_or);
}
for (idx = 0; idx < regs->num_spm_counters; idx++) {
- radeon_set_uconfig_reg_seq(cs, regs->select1[idx], 1, false);
- radeon_emit(cs, 0);
+ radeon_set_uconfig_reg_seq(regs->select1[idx], 1);
+ radeon_emit(0);
}
radeon_end();
@@ -132,11 +112,11 @@ static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer
COPY_DATA_IMM, NULL, 1);
radeon_begin(cs);
- radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
+ radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
- radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
+ radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
+ radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
radeon_end();
}
@@ -152,16 +132,72 @@ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer,
si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
radeon_begin(cs);
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
+ radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
+
+ if (!sctx->screen->info.never_send_perfcounter_stop) {
+ radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
+ }
+
radeon_set_uconfig_reg(
- cs, R_036020_CP_PERFMON_CNTL,
- S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1));
+ R_036020_CP_PERFMON_CNTL,
+ S_036020_PERFMON_STATE(sctx->screen->info.never_stop_sq_perf_counters ?
+ V_036020_CP_PERFMON_STATE_START_COUNTING :
+ V_036020_CP_PERFMON_STATE_STOP_COUNTING) |
+ S_036020_PERFMON_SAMPLE_ENABLE(1));
+ radeon_end();
+}
+
+void si_pc_emit_spm_start(struct radeon_cmdbuf *cs)
+{
+ radeon_begin(cs);
+
+ /* Start SPM counters. */
+ radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
+ S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
+ S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING));
+ /* Start windowed performance counters. */
+ radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
+ radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1));
+
+ radeon_end();
+}
+
+void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters,
+ bool never_send_perfcounter_stop)
+{
+ radeon_begin(cs);
+
+ /* Stop windowed performance counters. */
+ if (!never_send_perfcounter_stop) {
+ radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
+ }
+
+ radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0));
+
+ /* Stop SPM counters. */
+ radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
+ S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
+ S_036020_SPM_PERFMON_STATE(never_stop_sq_perf_counters ?
+ V_036020_STRM_PERFMON_STATE_START_COUNTING :
+ V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
+
radeon_end();
}
+void si_pc_emit_spm_reset(struct radeon_cmdbuf *cs)
+{
+ radeon_begin(cs);
+ radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
+ S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
+ S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET));
+ radeon_end();
+}
+
+
static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
uint64_t va)
{
@@ -178,26 +214,26 @@ static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block,
if (regs->counters)
reg = regs->counters[idx];
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+ radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
COPY_DATA_COUNT_SEL); /* 64 bits */
- radeon_emit(cs, reg >> 2);
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
+ radeon_emit(reg >> 2);
+ radeon_emit(0); /* unused */
+ radeon_emit(va);
+ radeon_emit(va >> 32);
va += sizeof(uint64_t);
reg += reg_delta;
}
} else {
/* Fake counters. */
for (idx = 0; idx < count; ++idx) {
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_COUNT_SEL);
- radeon_emit(cs, 0); /* immediate */
- radeon_emit(cs, 0);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
+ radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+ COPY_DATA_COUNT_SEL);
+ radeon_emit(0); /* immediate */
+ radeon_emit(0);
+ radeon_emit(va);
+ radeon_emit(va >> 32);
va += sizeof(uint64_t);
}
}
@@ -222,13 +258,16 @@ static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery
void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit)
{
+ if (sctx->gfx_level >= GFX11)
+ return;
+
radeon_begin(&sctx->gfx_cs);
- if (sctx->chip_class >= GFX10) {
- radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL,
+ if (sctx->gfx_level >= GFX10) {
+ radeon_set_uconfig_reg(R_037390_RLC_PERFMON_CLK_CNTL,
S_037390_PERFMON_CLOCK_STATE(inhibit));
- } else if (sctx->chip_class >= GFX8) {
- radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL,
+ } else if (sctx->gfx_level >= GFX8) {
+ radeon_set_uconfig_reg(R_0372FC_RLC_PERFMON_CLK_CNTL,
S_0372FC_PERFMON_CLOCK_STATE(inhibit));
}
radeon_end();
@@ -248,7 +287,7 @@ static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery)
si_need_gfx_cs_space(sctx, 0);
if (query->shaders)
- si_pc_emit_shaders(sctx, query->shaders);
+ si_pc_emit_shaders(&sctx->gfx_cs, query->shaders);
si_inhibit_clockgating(sctx, &sctx->gfx_cs, true);
@@ -643,6 +682,7 @@ void si_destroy_perfcounters(struct si_screen *screen)
return;
ac_destroy_perfcounters(&pc->base);
+ FREE(pc);
screen->perfcounters = NULL;
}
@@ -665,3 +705,202 @@ void si_init_perfcounters(struct si_screen *screen)
si_destroy_perfcounters(screen);
}
}
+
+static bool
+si_spm_init_bo(struct si_context *sctx)
+{
+ struct radeon_winsys *ws = sctx->ws;
+ uint64_t size = 32 * 1024 * 1024; /* Default to 32MB. */
+
+ sctx->spm.buffer_size = size;
+ sctx->spm.sample_interval = 4096; /* Default to 4096 clk. */
+
+ sctx->spm.bo = ws->buffer_create(
+ ws, size, 4096,
+ RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ RADEON_FLAG_GTT_WC |
+ RADEON_FLAG_NO_SUBALLOC);
+
+ return sctx->spm.bo != NULL;
+}
+
+
+static void
+si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs)
+{
+ struct ac_spm *spm = &sctx->spm;
+
+ radeon_begin(cs);
+
+ for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
+ uint32_t num_counters = spm->sqg[instance].num_counters;
+
+ if (!num_counters)
+ continue;
+
+ radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
+ S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1) |
+ S_030800_SE_INDEX(instance));
+
+ for (uint32_t b = 0; b < num_counters; b++) {
+ const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
+ uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
+
+ radeon_set_uconfig_reg_seq(reg_base + b * 4, 1);
+ radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
+ }
+ }
+
+ for (uint32_t b = 0; b < spm->num_block_sel; b++) {
+ struct ac_spm_block_select *block_sel = &spm->block_sel[b];
+ struct ac_pc_block_base *regs = block_sel->b->b->b;
+
+ for (unsigned i = 0; i < block_sel->num_instances; i++) {
+ struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
+
+ radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
+
+ for (unsigned c = 0; c < block_instance->num_counters; c++) {
+ const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
+
+ if (!cntr_sel->active)
+ continue;
+
+ radeon_set_uconfig_reg_seq(regs->select0[c], 1);
+ radeon_emit(cntr_sel->sel0);
+
+ radeon_set_uconfig_reg_seq(regs->select1[c], 1);
+ radeon_emit(cntr_sel->sel1);
+ }
+ }
+ }
+
+ /* Restore global broadcasting. */
+ radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+ radeon_end();
+}
+
+#define SPM_RING_BASE_ALIGN 32
+
+void
+si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs)
+{
+ struct ac_spm *spm = &sctx->spm;
+ uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm->bo);
+ uint64_t ring_size = spm->buffer_size;
+
+ /* It's required that the ring VA and the size are correctly aligned. */
+ assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
+ assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
+ assert(spm->sample_interval >= 32);
+
+ radeon_begin(cs);
+
+ /* Configure the SPM ring buffer. */
+ radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL,
+ S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
+ S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
+ radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
+ radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI,
+ S_037208_RING_BASE_HI(va >> 32));
+ radeon_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
+
+ /* Configure the muxsel. */
+ uint32_t total_muxsel_lines = 0;
+ for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
+ total_muxsel_lines += spm->num_muxsel_lines[s];
+ }
+
+ radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0);
+ radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
+ radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
+ S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
+ S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
+ S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
+ S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
+ radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
+ S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
+ S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
+
+ /* Upload each muxsel ram to the RLC. */
+ for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
+ unsigned rlc_muxsel_addr, rlc_muxsel_data;
+ unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1);
+
+ if (!spm->num_muxsel_lines[s])
+ continue;
+
+ if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
+ grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
+
+ rlc_muxsel_addr = R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
+ rlc_muxsel_data = R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
+ } else {
+ grbm_gfx_index |= S_030800_SE_INDEX(s);
+
+ rlc_muxsel_addr = R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
+ rlc_muxsel_data = R_037220_RLC_SPM_SE_MUXSEL_DATA;
+ }
+
+ radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
+
+ for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
+ uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
+
+ /* Select MUXSEL_ADDR to point to the next muxsel. */
+ radeon_set_uconfig_reg(rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
+
+ /* Write the muxsel line configuration with MUXSEL_DATA. */
+ radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
+ radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_ME) |
+ S_370_WR_ONE_ADDR(1));
+ radeon_emit(rlc_muxsel_data >> 2);
+ radeon_emit(0);
+ radeon_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE);
+ }
+ }
+ radeon_end();
+
+ /* Select SPM counters. */
+ si_emit_spm_counters(sctx, cs);
+}
+
+bool
+si_spm_init(struct si_context *sctx)
+{
+ const struct radeon_info *info = &sctx->screen->info;
+
+ sctx->screen->perfcounters = CALLOC_STRUCT(si_perfcounters);
+ sctx->screen->perfcounters->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(sctx->screen);
+ sctx->screen->perfcounters->num_instance_cs_dwords = 3;
+
+ struct ac_perfcounters *pc = &sctx->screen->perfcounters->base;
+
+ if (!ac_init_perfcounters(info, false, false, pc))
+ return false;
+
+ if (!ac_init_spm(info, pc, &sctx->spm))
+ return false;
+
+ if (!si_spm_init_bo(sctx))
+ return false;
+
+ return true;
+}
+
+void
+si_spm_finish(struct si_context *sctx)
+{
+ struct pb_buffer_lean *bo = sctx->spm.bo;
+ radeon_bo_reference(sctx->screen->ws, &bo, NULL);
+
+ ac_destroy_spm(&sctx->spm);
+}