From 9bbb9f110482bb25d05ae1e07bf9bc25a30ef7a3 Mon Sep 17 00:00:00 2001 From: Daniel Schürmann Date: Mon, 18 Dec 2023 11:21:08 +0100 Subject: aco: use small_vec as Block::edge_vec for predecessors and successors Part-of: --- src/amd/compiler/aco_insert_exec_mask.cpp | 4 ++-- src/amd/compiler/aco_instruction_selection.cpp | 2 +- src/amd/compiler/aco_ir.h | 10 ++++++---- src/amd/compiler/aco_live_var_analysis.cpp | 4 ++-- src/amd/compiler/aco_lower_to_cssa.cpp | 8 ++++---- src/amd/compiler/aco_optimizer_postRA.cpp | 5 ++--- src/amd/compiler/aco_register_allocation.cpp | 8 ++++---- src/amd/compiler/aco_spill.cpp | 17 +++++++---------- src/amd/compiler/aco_ssa_elimination.cpp | 2 +- 9 files changed, 29 insertions(+), 31 deletions(-) diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index 5038d5da680..7ab20c443ef 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -174,7 +174,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector> { unsigned idx = block->index; Builder bld(ctx.program, &instructions); - std::vector& preds = block->linear_preds; + Block::edge_vec& preds = block->linear_preds; bool restore_exec = false; /* start block */ @@ -270,7 +270,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector> assert(ctx.info[pred].exec.size() >= info.num_exec_masks); /* fill the loop header phis */ - std::vector& header_preds = header->linear_preds; + Block::edge_vec& header_preds = header->linear_preds; int instr_idx = 0; if (info.has_discard && header_preds.size() > 1) { while (instr_idx < info.num_exec_masks - 1) { diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 0ec8389aa05..9c52e67d41f 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -10021,7 +10021,7 @@ visit_phi(isel_context* ctx, nir_phi_instr* instr) nir_foreach_phi_src (src, instr) phi_src[src->pred->index] = src->src.ssa; - std::vector& preds = logical ? ctx->block->logical_preds : ctx->block->linear_preds; + Block::edge_vec& preds = logical ? ctx->block->logical_preds : ctx->block->linear_preds; unsigned num_operands = 0; Operand* const operands = (Operand*)alloca( (std::max(exec_list_length(&instr->srcs), (unsigned)preds.size()) + 1) * sizeof(Operand)); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 8ee4caeed47..d6f83822278 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1930,14 +1930,16 @@ struct RegisterDemand { /* CFG */ struct Block { + using edge_vec = small_vec; + float_mode fp_mode; unsigned index; unsigned offset = 0; std::vector> instructions; - std::vector logical_preds; - std::vector linear_preds; - std::vector logical_succs; - std::vector linear_succs; + edge_vec logical_preds; + edge_vec linear_preds; + edge_vec logical_succs; + edge_vec linear_succs; RegisterDemand register_demand = RegisterDemand(); uint32_t kind = 0; int32_t logical_idom = -1; diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index f894dd31b74..35eeeb2eb8f 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -263,7 +263,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, unsign } else { for (unsigned t : live) { RegClass rc = program->temp_rc[t]; - std::vector& preds = rc.is_linear() ? block->linear_preds : block->logical_preds; + Block::edge_vec& preds = rc.is_linear() ? block->linear_preds : block->logical_preds; #ifndef NDEBUG if (preds.empty()) @@ -285,7 +285,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, unsign Instruction* insn = block->instructions[phi_idx].get(); assert(is_phi(insn)); /* directly insert into the predecessors live-out set */ - std::vector& preds = + Block::edge_vec& preds = insn->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds; for (unsigned i = 0; i < preds.size(); ++i) { Operand& operand = insn->operands[i]; diff --git a/src/amd/compiler/aco_lower_to_cssa.cpp b/src/amd/compiler/aco_lower_to_cssa.cpp index 3c509ee2f81..7b9eb3be15a 100644 --- a/src/amd/compiler/aco_lower_to_cssa.cpp +++ b/src/amd/compiler/aco_lower_to_cssa.cpp @@ -89,7 +89,7 @@ collect_parallelcopies(cssa_ctx& ctx) if (!def.isTemp()) continue; - std::vector& preds = + Block::edge_vec& preds = phi->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds; uint32_t index = ctx.merge_sets.size(); merge_set set; @@ -194,9 +194,9 @@ intersects(cssa_ctx& ctx, Temp var, Temp parent) /* parent is defined in a different block than var */ if (node_parent.defined_at < node_var.defined_at) { /* if the parent is not live-in, they don't interfere */ - std::vector& preds = var.type() == RegType::vgpr - ? ctx.program->blocks[block_idx].logical_preds - : ctx.program->blocks[block_idx].linear_preds; + Block::edge_vec& preds = var.type() == RegType::vgpr + ? ctx.program->blocks[block_idx].logical_preds + : ctx.program->blocks[block_idx].linear_preds; for (uint32_t pred : preds) { if (!ctx.live_out[pred].count(parent.id())) return false; diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index 39bbff4a660..e67f56e056d 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -74,9 +74,8 @@ struct pr_opt_ctx { instr_idx_by_regs(std::unique_ptr{new Idx_array[p->blocks.size()]}) {} - ALWAYS_INLINE void reset_block_regs(const std::vector& preds, - const unsigned block_index, const unsigned min_reg, - const unsigned num_regs) + ALWAYS_INLINE void reset_block_regs(const Block::edge_vec& preds, const unsigned block_index, + const unsigned min_reg, const unsigned num_regs) { const unsigned num_preds = preds.size(); const unsigned first_pred = preds[0]; diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 12f8f005817..84ba242ff86 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2235,7 +2235,7 @@ get_reg_phi(ra_ctx& ctx, IDSet& live_in, RegisterFile& register_file, * to move it in this block's predecessors */ aco_opcode opcode = pc.first.getTemp().is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi; - std::vector& preds = + Block::edge_vec& preds = pc.first.getTemp().is_linear() ? block.linear_preds : block.logical_preds; aco_ptr new_phi{ create_instruction(opcode, Format::PSEUDO, preds.size(), 1)}; @@ -2354,7 +2354,7 @@ read_variable(ra_ctx& ctx, Temp val, unsigned block_idx) Temp handle_live_in(ra_ctx& ctx, Temp val, Block* block) { - std::vector& preds = val.is_linear() ? block->linear_preds : block->logical_preds; + Block::edge_vec& preds = val.is_linear() ? block->linear_preds : block->logical_preds; if (preds.size() == 0) return val; @@ -2444,7 +2444,7 @@ handle_loop_phis(ra_ctx& ctx, const IDSet& live_in, uint32_t loop_header_idx, aco_ptr& phi = loop_header.instructions[i]; if (!is_phi(phi)) break; - const std::vector& preds = + const Block::edge_vec& preds = phi->opcode == aco_opcode::p_phi ? loop_header.logical_preds : loop_header.linear_preds; for (unsigned j = 1; j < phi->operands.size(); j++) { Operand& op = phi->operands[j]; @@ -2535,7 +2535,7 @@ init_reg_file(ra_ctx& ctx, const std::vector& live_out_per_block, Block& for (aco_ptr& instr : block.instructions) { if (!is_phi(instr)) break; - const std::vector& preds = + const Block::edge_vec& preds = instr->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds; for (unsigned i = 0; i < instr->operands.size(); i++) { diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 2f9e4b0421f..56858da266e 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -259,7 +259,7 @@ next_uses_per_block(spill_ctx& ctx, unsigned block_idx, uint32_t& worklist) } uint32_t distance = pair.second.second; uint32_t dom = pair.second.first; - std::vector& preds = temp.is_linear() ? block->linear_preds : block->logical_preds; + Block::edge_vec& preds = temp.is_linear() ? block->linear_preds : block->logical_preds; for (unsigned pred_idx : preds) { if (ctx.program->blocks[pred_idx].loop_nest_depth > block->loop_nest_depth) distance += 0xFFFF; @@ -664,8 +664,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) /* keep variables spilled on all incoming paths */ for (const std::pair>& pair : next_use_distances) { - std::vector& preds = - pair.first.is_linear() ? block->linear_preds : block->logical_preds; + Block::edge_vec& preds = pair.first.is_linear() ? block->linear_preds : block->logical_preds; /* If it can be rematerialized, keep the variable spilled if all predecessors do not reload * it. Otherwise, if any predecessor reloads it, ensure it's reloaded on all other * predecessors. The idea is that it's better in practice to rematerialize redundantly than to @@ -708,7 +707,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) if (!phi->definitions[0].isTemp()) continue; - std::vector& preds = + Block::edge_vec& preds = phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds; bool is_all_spilled = true; for (unsigned i = 0; i < phi->operands.size(); i++) { @@ -877,7 +876,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) continue; } - std::vector& preds = + Block::edge_vec& preds = phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds; uint32_t def_spill_id = ctx.spills_entry[block_idx][phi->definitions[0].getTemp()]; @@ -945,8 +944,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) /* iterate all (other) spilled variables for which to spill at the predecessor */ // TODO: would be better to have them sorted: first vgprs and first with longest distance for (std::pair pair : ctx.spills_entry[block_idx]) { - std::vector preds = - pair.first.is_linear() ? block->linear_preds : block->logical_preds; + Block::edge_vec& preds = pair.first.is_linear() ? block->linear_preds : block->logical_preds; for (unsigned pred_idx : preds) { /* variable is already spilled at predecessor */ @@ -1000,7 +998,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) assert(!phi->definitions[0].isTemp() || !ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp())); - std::vector& preds = + Block::edge_vec& preds = phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds; for (unsigned i = 0; i < phi->operands.size(); i++) { if (!phi->operands[i].isTemp()) @@ -1060,8 +1058,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) /* skip spilled variables */ if (ctx.spills_entry[block_idx].count(pair.first)) continue; - std::vector preds = - pair.first.is_linear() ? block->linear_preds : block->logical_preds; + Block::edge_vec& preds = pair.first.is_linear() ? block->linear_preds : block->logical_preds; /* variable is dead at predecessor, it must be from a phi */ bool is_dead = false; diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index 477b59ba6be..43ccb5c250e 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -68,7 +68,7 @@ collect_phi_info(ssa_elimination_ctx& ctx) assert(phi->definitions[0].size() == phi->operands[i].size()); - std::vector& preds = + Block::edge_vec& preds = phi->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds; uint32_t pred_idx = preds[i]; auto& info_vec = phi->opcode == aco_opcode::p_phi ? ctx.logical_phi_info[pred_idx] -- cgit v1.2.3