diff options
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/amd/compiler/aco_builder_h.py | 4 | ||||
-rw-r--r-- | src/amd/compiler/aco_dead_code_analysis.cpp | 2 | ||||
-rw-r--r-- | src/amd/compiler/aco_insert_exec_mask.cpp | 10 | ||||
-rw-r--r-- | src/amd/compiler/aco_instruction_selection.cpp | 64 | ||||
-rw-r--r-- | src/amd/compiler/aco_lower_to_hw_instr.cpp | 16 |
6 files changed, 58 insertions, 40 deletions
diff --git a/.pick_status.json b/.pick_status.json index bc03cb7e3c2..50bd84d8c66 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -706,7 +706,7 @@ "description": "aco: reserve 2 sgprs for each branch", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index 8d541cbd72f..432b39d7070 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -537,7 +537,7 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod ("sop1", [Format.SOP1], 'SOP1_instruction', [(1, 1), (2, 1), (3, 2)]), ("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])), ("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])), - ("sopp", [Format.SOPP], 'SOPP_instruction', [(0, 0), (0, 1)]), + ("sopp", [Format.SOPP], 'SOPP_instruction', itertools.product([0, 1], [0, 1])), ("sopc", [Format.SOPC], 'SOPC_instruction', [(1, 2)]), ("smem", [Format.SMEM], 'SMEM_instruction', [(0, 4), (0, 3), (1, 0), (1, 3), (1, 2), (0, 0)]), ("ds", [Format.DS], 'DS_instruction', [(1, 1), (1, 2), (0, 3), (0, 4)]), @@ -545,7 +545,7 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod ("mtbuf", [Format.MTBUF], 'MTBUF_instruction', [(0, 4), (1, 3)]), ("mimg", [Format.MIMG], 'MIMG_instruction', [(0, 3), (1, 3)]), ("exp", [Format.EXP], 'Export_instruction', [(0, 4)]), - ("branch", [Format.PSEUDO_BRANCH], 'Pseudo_branch_instruction', itertools.product([0], [0, 1])), + ("branch", [Format.PSEUDO_BRANCH], 'Pseudo_branch_instruction', itertools.product([1], [0, 1])), ("barrier", [Format.PSEUDO_BARRIER], 'Pseudo_barrier_instruction', [(0, 0)]), ("reduction", [Format.PSEUDO_REDUCTION], 'Pseudo_reduction_instruction', [(3, 2)]), ("vop1", [Format.VOP1], 'VOP1_instruction', [(1, 1), (2, 2)]), diff --git a/src/amd/compiler/aco_dead_code_analysis.cpp b/src/amd/compiler/aco_dead_code_analysis.cpp index f43d784c55f..b77a3dd89b2 100644 --- a/src/amd/compiler/aco_dead_code_analysis.cpp +++ b/src/amd/compiler/aco_dead_code_analysis.cpp @@ -79,7 +79,7 @@ void process_block(dce_ctx& ctx, Block& block) bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr) { - if (instr->definitions.empty()) + if (instr->definitions.empty() || instr->format == Format::PSEUDO_BRANCH) return false; if (std::any_of(instr->definitions.begin(), instr->definitions.end(), [&uses] (const Definition& def) { return uses[def.tempId()];})) diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index b682ac77efb..ebd44ade4cd 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -985,7 +985,7 @@ void add_branch_code(exec_ctx& ctx, Block* block) if (need_parallelcopy) ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec), ctx.info[idx].exec.back().first); - bld.branch(aco_opcode::p_cbranch_nz, bld.exec(ctx.info[idx].exec.back().first), block->linear_succs[1], block->linear_succs[0]); + bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.exec(ctx.info[idx].exec.back().first), block->linear_succs[1], block->linear_succs[0]); return; } @@ -1032,7 +1032,7 @@ void add_branch_code(exec_ctx& ctx, Block* block) /* add next current exec to the stack */ ctx.info[idx].exec.emplace_back(then_mask, mask_type); - bld.branch(aco_opcode::p_cbranch_z, bld.exec(then_mask), block->linear_succs[1], block->linear_succs[0]); + bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), bld.exec(then_mask), block->linear_succs[1], block->linear_succs[0]); return; } @@ -1050,7 +1050,7 @@ void add_branch_code(exec_ctx& ctx, Block* block) /* add next current exec to the stack */ ctx.info[idx].exec.emplace_back(else_mask, mask_type); - bld.branch(aco_opcode::p_cbranch_z, bld.exec(else_mask), block->linear_succs[1], block->linear_succs[0]); + bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), bld.exec(else_mask), block->linear_succs[1], block->linear_succs[0]); return; } @@ -1078,7 +1078,7 @@ void add_branch_code(exec_ctx& ctx, Block* block) ctx.info[idx].exec.back().first = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand(0u)); } - bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1], block->linear_succs[0]); + bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond), block->linear_succs[1], block->linear_succs[0]); return; } @@ -1107,7 +1107,7 @@ void add_branch_code(exec_ctx& ctx, Block* block) ctx.info[idx].exec.back().first = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand(0u)); } - bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1], block->linear_succs[0]); + bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond), block->linear_succs[1], block->linear_succs[0]); return; } } diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 49c111012cc..dc278fc3e70 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5482,7 +5482,7 @@ void visit_discard(isel_context* ctx, nir_intrinsic_instr *instr) assert(nir_instr_is_last(&instr->instr)); ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(ctx->block->index, linear_target); return; } @@ -5495,14 +5495,14 @@ void visit_discard(isel_context* ctx, nir_intrinsic_instr *instr) ctx->cf_info.nir_to_aco[instr->instr.block->index] = idx; /* remove critical edges from linear CFG */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* break_block = ctx->program->create_and_insert_block(); break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; break_block->kind |= block_kind_uniform; add_linear_edge(idx, break_block); add_linear_edge(break_block->index, linear_target); bld.reset(break_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* continue_block = ctx->program->create_and_insert_block(); continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; @@ -9303,7 +9303,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) /* uniform break - directly jump out of the loop */ ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(idx, logical_target); return; } @@ -9325,7 +9325,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) /* uniform continue - directly jump to the loop header */ ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(idx, logical_target); return; } @@ -9343,7 +9343,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) } /* remove critical edges from linear CFG */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* break_block = ctx->program->create_and_insert_block(); break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; break_block->kind |= block_kind_uniform; @@ -9353,7 +9353,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx]; add_linear_edge(break_block->index, logical_target); bld.reset(break_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* continue_block = ctx->program->create_and_insert_block(); continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; @@ -9455,7 +9455,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) append_logical_end(ctx->block); ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform; Builder bld(ctx->program, ctx->block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); unsigned loop_preheader_idx = ctx->block->index; Block loop_exit = Block(); @@ -9490,7 +9490,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; break_block->kind = block_kind_uniform; bld.reset(break_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(block_idx, break_block); add_linear_edge(break_block->index, &loop_exit); @@ -9498,7 +9498,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; continue_block->kind = block_kind_uniform; bld.reset(continue_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(block_idx, continue_block); add_linear_edge(continue_block->index, &ctx->program->blocks[loop_header_idx]); @@ -9514,7 +9514,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) } bld.reset(ctx->block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); } /* Fixup phis in loop header from unreachable blocks. @@ -9592,7 +9592,9 @@ static void begin_divergent_if_then(isel_context *ctx, if_context *ic, Temp cond /* branch to linear then block */ assert(cond.regClass() == ctx->program->lane_mask); aco_ptr<Pseudo_branch_instruction> branch; - branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 0)); + branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); branch->operands[0] = Operand(cond); ctx->block->instructions.push_back(std::move(branch)); @@ -9631,7 +9633,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic) append_logical_end(BB_then_logical); /* branch from logical then block to invert block */ aco_ptr<Pseudo_branch_instruction> branch; - branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_then_logical->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then_logical->index, &ic->BB_invert); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -9647,7 +9651,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic) BB_then_linear->kind |= block_kind_uniform; add_linear_edge(ic->BB_if_idx, BB_then_linear); /* branch from linear then block to invert block */ - branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_then_linear->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then_linear->index, &ic->BB_invert); @@ -9656,7 +9662,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic) ic->invert_idx = ctx->block->index; /* branch to linear else block (skip else) */ - branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 0)); + branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); branch->operands[0] = Operand(ic->cond); ctx->block->instructions.push_back(std::move(branch)); @@ -9685,7 +9693,9 @@ static void end_divergent_if(isel_context *ctx, if_context *ic) /* branch from logical else block to endif block */ aco_ptr<Pseudo_branch_instruction> branch; - branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_else_logical->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else_logical->index, &ic->BB_endif); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -9703,7 +9713,9 @@ static void end_divergent_if(isel_context *ctx, if_context *ic) add_linear_edge(ic->invert_idx, BB_else_linear); /* branch from linear else block to endif block */ - branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_else_linear->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else_linear->index, &ic->BB_endif); @@ -9740,7 +9752,9 @@ static void begin_uniform_if_then(isel_context *ctx, if_context *ic, Temp cond) aco_ptr<Pseudo_branch_instruction> branch; aco_opcode branch_opcode = aco_opcode::p_cbranch_z; - branch.reset(create_instruction<Pseudo_branch_instruction>(branch_opcode, Format::PSEUDO_BRANCH, 1, 0)); + branch.reset(create_instruction<Pseudo_branch_instruction>(branch_opcode, Format::PSEUDO_BRANCH, 1, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); branch->operands[0] = Operand(cond); branch->operands[0].setFixed(scc); ctx->block->instructions.emplace_back(std::move(branch)); @@ -9772,7 +9786,9 @@ static void begin_uniform_if_else(isel_context *ctx, if_context *ic) append_logical_end(BB_then); /* branch from then block to endif block */ aco_ptr<Pseudo_branch_instruction> branch; - branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_then->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then->index, &ic->BB_endif); if (!ic->then_branch_divergent) @@ -9799,7 +9815,9 @@ static void end_uniform_if(isel_context *ctx, if_context *ic) append_logical_end(BB_else); /* branch from then block to endif block */ aco_ptr<Pseudo_branch_instruction> branch; - branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_else->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else->index, &ic->BB_endif); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -11160,7 +11178,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader, Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand(stream)); append_logical_end(ctx.block); ctx.block->kind |= block_kind_uniform; - bld.branch(aco_opcode::p_cbranch_z, cond); + bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), cond); BB_endif.kind |= ctx.block->kind & block_kind_top_level; @@ -11222,7 +11240,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader, append_logical_end(ctx.block); /* branch from then block to endif block */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_edge(ctx.block->index, &BB_endif); ctx.block->kind |= block_kind_uniform; @@ -11244,7 +11262,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader, append_logical_end(BB_else); /* branch from else block to endif block */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_edge(BB_else->index, &BB_endif); BB_else->kind |= block_kind_uniform; diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index e8c72485772..1962c17032f 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1803,7 +1803,7 @@ void lower_to_hw_instr(Program* program) //TODO: exec can be zero here with block_kind_discard assert(instr->operands[0].physReg() == scc); - bld.sopp(aco_opcode::s_cbranch_scc0, instr->operands[0], discard_block->index); + bld.sopp(aco_opcode::s_cbranch_scc0, Definition(exec, s2), instr->operands[0], discard_block->index); discard_block->linear_preds.push_back(block->index); block->linear_succs.push_back(discard_block->index); @@ -1873,28 +1873,28 @@ void lower_to_hw_instr(Program* program) switch (instr->opcode) { case aco_opcode::p_branch: assert(block->linear_succs[0] == branch->target[0]); - bld.sopp(aco_opcode::s_branch, branch->target[0]); + bld.sopp(aco_opcode::s_branch, branch->definitions[0], branch->target[0]); break; case aco_opcode::p_cbranch_nz: assert(block->linear_succs[1] == branch->target[0]); if (branch->operands[0].physReg() == exec) - bld.sopp(aco_opcode::s_cbranch_execnz, branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_execnz, branch->definitions[0], branch->target[0]); else if (branch->operands[0].physReg() == vcc) - bld.sopp(aco_opcode::s_cbranch_vccnz, branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_vccnz, branch->definitions[0], branch->target[0]); else { assert(branch->operands[0].physReg() == scc); - bld.sopp(aco_opcode::s_cbranch_scc1, branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_scc1, branch->definitions[0], branch->target[0]); } break; case aco_opcode::p_cbranch_z: assert(block->linear_succs[1] == branch->target[0]); if (branch->operands[0].physReg() == exec) - bld.sopp(aco_opcode::s_cbranch_execz, branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_execz, branch->definitions[0], branch->target[0]); else if (branch->operands[0].physReg() == vcc) - bld.sopp(aco_opcode::s_cbranch_vccz, branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_vccz, branch->definitions[0], branch->target[0]); else { assert(branch->operands[0].physReg() == scc); - bld.sopp(aco_opcode::s_cbranch_scc0, branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_scc0, branch->definitions[0], branch->target[0]); } break; default: |