summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2019-12-04 15:19:56 +0000
committerMarge Bot <eric+marge@anholt.net>2020-04-03 12:12:08 +0000
commitb1544352c022953febcc2c2c448ba21551e6b215 (patch)
treed1a3ce23dcae47bdeba40cfac3e2a65cf49db09e
parentad2703653f306f0fa751ddfd546d1d93ce348630 (diff)
aco: add various compiler statistics
Adds these statistics: - hash of code and constant data - number of instructions - number of copies from pseudo-instructions - number of branches - estimate of cycles spent not waiting in s_waitcnt - number of vmem/smem "clauses" - sgpr/vgpr usage before scheduling Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2965>
-rw-r--r--src/amd/compiler/aco_interface.cpp44
-rw-r--r--src/amd/compiler/aco_ir.h20
-rw-r--r--src/amd/compiler/aco_lower_to_hw_instr.cpp5
-rw-r--r--src/amd/compiler/aco_statistics.cpp88
-rw-r--r--src/amd/compiler/meson.build3
5 files changed, 155 insertions, 5 deletions
diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp
index 378a138d245..104436d33c6 100644
--- a/src/amd/compiler/aco_interface.cpp
+++ b/src/amd/compiler/aco_interface.cpp
@@ -54,6 +54,18 @@ static void init()
}
}
+static radv_compiler_statistic_info statistic_infos[] = {
+ [aco::statistic_hash] = {"Hash", "CRC32 hash of code and constant data"},
+ [aco::statistic_instructions] = {"Instructions", "Instruction count"},
+ [aco::statistic_copies] = {"Copies", "Copy instructions created for pseudo-instructions"},
+ [aco::statistic_branches] = {"Branches", "Branch instructions"},
+ [aco::statistic_cycles] = {"Busy Cycles", "Estimate of busy cycles"},
+ [aco::statistic_vmem_clauses] = {"VMEM Clause", "Number of VMEM clauses (includes 1-sized clauses)"},
+ [aco::statistic_smem_clauses] = {"SMEM Clause", "Number of SMEM clauses (includes 1-sized clauses)"},
+ [aco::statistic_sgpr_presched] = {"Pre-Sched SGPRs", "SGPR usage before scheduling"},
+ [aco::statistic_vgpr_presched] = {"Pre-Sched VGPRs", "VGPR usage before scheduling"},
+};
+
void aco_compile_shader(unsigned shader_count,
struct nir_shader *const *shaders,
struct radv_shader_binary **binary,
@@ -64,6 +76,10 @@ void aco_compile_shader(unsigned shader_count,
ac_shader_config config = {0};
std::unique_ptr<aco::Program> program{new aco::Program};
+ program->collect_statistics = args->options->record_ir;
+ if (program->collect_statistics)
+ memset(program->statistics, 0, sizeof(program->statistics));
+
/* Instruction Selection */
if (args->is_gs_copy_shader)
aco::select_gs_copy_shader(program.get(), shaders[0], &config, args);
@@ -94,6 +110,9 @@ void aco_compile_shader(unsigned shader_count,
aco::live live_vars = aco::live_var_analysis(program.get(), args->options);
aco::spill(program.get(), live_vars, args->options);
+ if (program->collect_statistics)
+ aco::collect_presched_stats(program.get());
+
//std::cerr << "Before Schedule:\n";
//aco_print_program(program.get(), stderr);
aco::schedule_program(program.get(), live_vars);
@@ -139,10 +158,16 @@ void aco_compile_shader(unsigned shader_count,
//std::cerr << "After Insert-Waitcnt:\n";
//aco_print_program(program.get(), stderr);
+ if (program->collect_statistics)
+ aco::collect_preasm_stats(program.get());
+
/* Assembly */
std::vector<uint32_t> code;
unsigned exec_size = aco::emit_program(program.get(), code);
+ if (program->collect_statistics)
+ aco::collect_postasm_stats(program.get(), code);
+
bool get_disasm = args->options->dump_shader || args->options->record_ir;
size_t size = llvm_ir.size();
@@ -156,6 +181,11 @@ void aco_compile_shader(unsigned shader_count,
size += disasm.size();
}
+ size_t stats_size = 0;
+ if (program->collect_statistics)
+ stats_size = sizeof(radv_compiler_statistics) + aco::num_statistics * sizeof(uint32_t);
+ size += stats_size;
+
size += code.size() * sizeof(uint32_t) + sizeof(radv_shader_binary_legacy);
/* We need to calloc to prevent unintialized data because this will be used
* directly for the disk cache. Uninitialized data can appear because of
@@ -168,9 +198,15 @@ void aco_compile_shader(unsigned shader_count,
legacy_binary->base.is_gs_copy_shader = args->is_gs_copy_shader;
legacy_binary->base.total_size = size;
- legacy_binary->stats_size = 0;
+ if (program->collect_statistics) {
+ radv_compiler_statistics *statistics = (radv_compiler_statistics *)legacy_binary->data;
+ statistics->count = aco::num_statistics;
+ statistics->infos = statistic_infos;
+ memcpy(statistics->values, program->statistics, aco::num_statistics * sizeof(uint32_t));
+ }
+ legacy_binary->stats_size = stats_size;
- memcpy(legacy_binary->data, code.data(), code.size() * sizeof(uint32_t));
+ memcpy(legacy_binary->data + legacy_binary->stats_size, code.data(), code.size() * sizeof(uint32_t));
legacy_binary->exec_size = exec_size;
legacy_binary->code_size = code.size() * sizeof(uint32_t);
@@ -178,10 +214,10 @@ void aco_compile_shader(unsigned shader_count,
legacy_binary->disasm_size = 0;
legacy_binary->ir_size = llvm_ir.size();
- llvm_ir.copy((char*) legacy_binary->data + legacy_binary->code_size, llvm_ir.size());
+ llvm_ir.copy((char*) legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size, llvm_ir.size());
if (get_disasm) {
- disasm.copy((char*) legacy_binary->data + legacy_binary->code_size + llvm_ir.size(), disasm.size());
+ disasm.copy((char*) legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size + llvm_ir.size(), disasm.size());
legacy_binary->disasm_size = disasm.size();
}
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index ace84db1018..c6213e0c04e 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1219,6 +1219,19 @@ static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
static constexpr Stage geometry_gs = sw_gs | hw_gs;
+enum statistic {
+ statistic_hash,
+ statistic_instructions,
+ statistic_copies,
+ statistic_branches,
+ statistic_cycles,
+ statistic_vmem_clauses,
+ statistic_smem_clauses,
+ statistic_sgpr_presched,
+ statistic_vgpr_presched,
+ num_statistics
+};
+
class Program final {
public:
float_mode next_fp_mode;
@@ -1257,6 +1270,9 @@ public:
bool needs_vcc = false;
bool needs_flat_scr = false;
+ bool collect_statistics = false;
+ uint32_t statistics[num_statistics];
+
uint32_t allocateId()
{
assert(allocationID <= 16777215);
@@ -1337,6 +1353,10 @@ void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
#define perfwarn(program, cond, msg, ...) do {} while(0)
#endif
+void collect_presched_stats(Program *program);
+void collect_preasm_stats(Program *program);
+void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code);
+
void aco_print_instr(Instruction *instr, FILE *output);
void aco_print_program(Program *program, FILE *output);
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 6f2f54f6992..606f2fde65c 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -784,6 +784,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
copy_map.erase(it);
it = copy_map.begin();
+ ctx->program->statistics[statistic_copies]++;
continue;
} else {
/* the target reg is used as operand, check the next entry */
@@ -813,6 +814,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
Definition op_as_def = Definition(swap.op.physReg(), swap.op.regClass());
if (chip_class >= GFX9 && swap.def.getTemp().type() == RegType::vgpr) {
bld.vop1(aco_opcode::v_swap_b32, swap.def, op_as_def, swap.op, def_as_op);
+ ctx->program->statistics[statistic_copies]++;
} else if (swap.op.physReg() == scc || swap.def.physReg() == scc) {
/* we need to swap scc and another sgpr */
assert(!preserve_scc);
@@ -822,6 +824,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), Operand(scc, s1));
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(other, s1), Operand(0u));
bld.sop1(aco_opcode::s_mov_b32, Definition(other, s1), Operand(pi->scratch_sgpr, s1));
+ ctx->program->statistics[statistic_copies] += 3;
} else if (swap.def.getTemp().type() == RegType::sgpr) {
if (preserve_scc) {
bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), swap.op);
@@ -832,10 +835,12 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
bld.sop2(aco_opcode::s_xor_b32, swap.def, Definition(scc, s1), swap.op, def_as_op);
bld.sop2(aco_opcode::s_xor_b32, op_as_def, Definition(scc, s1), swap.op, def_as_op);
}
+ ctx->program->statistics[statistic_copies] += 3;
} else {
bld.vop2(aco_opcode::v_xor_b32, op_as_def, swap.op, def_as_op);
bld.vop2(aco_opcode::v_xor_b32, swap.def, swap.op, def_as_op);
bld.vop2(aco_opcode::v_xor_b32, op_as_def, swap.op, def_as_op);
+ ctx->program->statistics[statistic_copies] += 3;
}
/* change the operand reg of the target's use */
diff --git a/src/amd/compiler/aco_statistics.cpp b/src/amd/compiler/aco_statistics.cpp
new file mode 100644
index 00000000000..2e78ab63cb1
--- /dev/null
+++ b/src/amd/compiler/aco_statistics.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright © 2020 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#include "aco_ir.h"
+#include "util/crc32.h"
+
+namespace aco {
+
+/* sgpr_presched/vgpr_presched */
+void collect_presched_stats(Program *program)
+{
+ RegisterDemand presched_demand;
+ for (Block& block : program->blocks)
+ presched_demand.update(block.register_demand);
+ program->statistics[statistic_sgpr_presched] = presched_demand.sgpr;
+ program->statistics[statistic_vgpr_presched] = presched_demand.vgpr;
+}
+
+/* instructions/branches/vmem_clauses/smem_clauses/cycles */
+void collect_preasm_stats(Program *program)
+{
+ for (Block& block : program->blocks) {
+ std::set<Temp> vmem_clause_res;
+ std::set<Temp> smem_clause_res;
+
+ program->statistics[statistic_instructions] += block.instructions.size();
+
+ for (aco_ptr<Instruction>& instr : block.instructions) {
+ if (instr->format == Format::SOPP && static_cast<SOPP_instruction*>(instr.get())->block != -1)
+ program->statistics[statistic_branches]++;
+
+ if (instr->opcode == aco_opcode::p_constaddr)
+ program->statistics[statistic_instructions] += 2;
+
+ if (instr->isVMEM() && !instr->operands.empty()) {
+ vmem_clause_res.insert(instr->operands[0].getTemp());
+ } else {
+ program->statistics[statistic_vmem_clauses] += vmem_clause_res.size();
+ vmem_clause_res.clear();
+ }
+
+ if (instr->format == Format::SMEM && !instr->operands.empty()) {
+ if (instr->operands[0].size() == 2)
+ smem_clause_res.insert(Temp(0, s2));
+ else
+ smem_clause_res.insert(instr->operands[0].getTemp());
+ } else {
+ program->statistics[statistic_smem_clauses] += smem_clause_res.size();
+ smem_clause_res.clear();
+ }
+
+ /* TODO: this incorrectly assumes instructions always take 4 cycles */
+ /* assume loops execute 4 times (TODO: it would be nice to be able to consider loop unrolling) */
+ unsigned iter = 1 << (block.loop_nest_depth * 2);
+ program->statistics[statistic_cycles] += 4 * iter;
+ }
+
+ program->statistics[statistic_vmem_clauses] += vmem_clause_res.size();
+ program->statistics[statistic_smem_clauses] += smem_clause_res.size();
+ }
+}
+
+void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code)
+{
+ program->statistics[aco::statistic_hash] = util_hash_crc32(code.data(), code.size() * 4);
+}
+
+}
diff --git a/src/amd/compiler/meson.build b/src/amd/compiler/meson.build
index 8a0ebb0befc..44b56baab4c 100644
--- a/src/amd/compiler/meson.build
+++ b/src/amd/compiler/meson.build
@@ -76,8 +76,9 @@ libaco_files = files(
'aco_print_asm.cpp',
'aco_print_ir.cpp',
'aco_scheduler.cpp',
- 'aco_ssa_elimination.cpp',
'aco_spill.cpp',
+ 'aco_ssa_elimination.cpp',
+ 'aco_statistics.cpp',
'aco_util.h',
'aco_validate.cpp',
)