diff options
author | Arcady Goldmints-Orlov <agoldmints@igalia.com> | 2020-10-26 00:03:04 -0400 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2020-11-09 20:45:58 +0000 |
commit | a1a365e8188bd72be27404dd669cad90468bc09f (patch) | |
tree | 40779e41d74967a7dbc031de59a15dcbc2759d5e | |
parent | 1c5271346af77724f0462d1acafaa49142569006 (diff) |
broadcom/compiler: Allow spills of temporaries from TMU reads
Since spills and fills use the TMU, special care has to be taken to
avoid putting one between a TMU setup instruction and the corresponding
reads or writes. This change adds logic to move fills up and move spills
down to avoid interrupting such sequences.
This allows compiling 6 more programs from shader-db. Other stats:
total spills in shared programs: 446 -> 446 (0.00%)
spills in affected programs: 0 -> 0
helped: 0
HURT: 0
total fills in shared programs: 606 -> 610 (0.66%)
fills in affected programs: 38 -> 42 (10.53%)
helped: 0
HURT: 2
total instructions in shared programs: 19330 -> 19363 (0.17%)
instructions in affected programs: 3299 -> 3332 (1.00%)
helped: 0
HURT: 5
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6606>
-rw-r--r-- | src/broadcom/compiler/vir_register_allocate.c | 180 |
1 files changed, 115 insertions, 65 deletions
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index 7c857cd5698..3e20faf94cc 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -37,12 +37,20 @@ static inline bool qinst_writes_tmu(struct qinst *inst) { return (inst->dst.file == QFILE_MAGIC && - v3d_qpu_magic_waddr_is_tmu(inst->dst.index)); + v3d_qpu_magic_waddr_is_tmu(inst->dst.index)) || + inst->qpu.sig.wrtmuc; } static bool -is_last_ldtmu(struct qinst *inst, struct qblock *block) +is_end_of_tmu_sequence(struct qinst *inst, struct qblock *block) { + if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && + inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) + return true; + + if (!inst->qpu.sig.ldtmu) + return false; + list_for_each_entry_from(struct qinst, scan_inst, inst->link.next, &block->instructions, link) { if (scan_inst->qpu.sig.ldtmu) @@ -78,14 +86,13 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g, /* XXX: Scale the cost up when inside of a loop. */ vir_for_each_block(block, c) { vir_for_each_inst(inst, block) { - /* We can't insert a new TMU operation while currently - * in a TMU operation, and we can't insert new thread - * switches after starting output writes. + /* We can't insert new thread switches after + * starting output writes. */ bool no_spilling = - (in_tmu_operation || - (c->threads > 1 && started_last_seg)); + c->threads > 1 && started_last_seg; + /* Discourage spilling of TMU operations */ for (int i = 0; i < vir_get_nsrc(inst); i++) { if (inst->src[i].file != QFILE_TEMP) continue; @@ -94,8 +101,11 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g, if (vir_is_mov_uniform(c, temp)) { spill_costs[temp] += block_scale; } else if (!no_spilling) { + float tmu_op_scale = in_tmu_operation ? + 3.0 : 1.0; spill_costs[temp] += (block_scale * - tmu_scale); + tmu_scale * + tmu_op_scale); } else { BITSET_CLEAR(c->spillable, temp); } @@ -133,16 +143,10 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g, started_last_seg = true; /* Track when we're in between a TMU setup and the - * final LDTMU or TMUWT from that TMU setup. We can't - * spill/fill any temps during that time, because that - * involves inserting a new TMU setup/LDTMU sequence. + * final LDTMU or TMUWT from that TMU setup. We + * penalize spills during that time. */ - if (inst->qpu.sig.ldtmu && - is_last_ldtmu(inst, block)) - in_tmu_operation = false; - - if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && - inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) + if (is_end_of_tmu_sequence(inst, block)) in_tmu_operation = false; if (qinst_writes_tmu(inst)) @@ -205,6 +209,23 @@ v3d_emit_spill_tmua(struct v3d_compile *c, uint32_t spill_offset) vir_uniform_ui(c, spill_offset)); } + +static void +v3d_emit_tmu_spill(struct v3d_compile *c, struct qinst *inst, + struct qinst *position, uint32_t spill_offset) +{ + c->cursor = vir_after_inst(position); + inst->dst.index = c->num_temps++; + vir_MOV_dest(c, vir_reg(QFILE_MAGIC, + V3D_QPU_WADDR_TMUD), + inst->dst); + v3d_emit_spill_tmua(c, spill_offset); + vir_emit_thrsw(c); + vir_TMUWT(c); + c->spills++; + c->tmu_dirty_rcl = true; +} + static void v3d_spill_reg(struct v3d_compile *c, int spill_temp) { @@ -233,62 +254,91 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp) uniform_index = orig_unif->uniform; } - vir_for_each_inst_inorder_safe(inst, c) { - for (int i = 0; i < vir_get_nsrc(inst); i++) { - if (inst->src[i].file != QFILE_TEMP || - inst->src[i].index != spill_temp) { - continue; + struct qinst *start_of_tmu_sequence = NULL; + struct qinst *postponed_spill = NULL; + vir_for_each_block(block, c) { + vir_for_each_inst_safe(inst, block) { + /* Track when we're in between a TMU setup and the final + * LDTMU or TMUWT from that TMU setup. We can't spill/fill any + * temps during that time, because that involves inserting a + * new TMU setup/LDTMU sequence, so we postpone the spill or + * move the fill up to not intrude in the middle of the TMU + * sequence. + */ + if (is_end_of_tmu_sequence(inst, block)) { + if (postponed_spill) { + v3d_emit_tmu_spill(c, postponed_spill, + inst, spill_offset); + } + + start_of_tmu_sequence = NULL; + postponed_spill = NULL; } - c->cursor = vir_before_inst(inst); + if (!start_of_tmu_sequence && qinst_writes_tmu(inst)) + start_of_tmu_sequence = inst; - if (is_uniform) { - struct qreg unif = - vir_uniform(c, - c->uniform_contents[uniform_index], - c->uniform_data[uniform_index]); - inst->src[i] = unif; - } else { - v3d_emit_spill_tmua(c, spill_offset); - vir_emit_thrsw(c); - inst->src[i] = vir_LDTMU(c); - c->fills++; + /* fills */ + for (int i = 0; i < vir_get_nsrc(inst); i++) { + if (inst->src[i].file != QFILE_TEMP || + inst->src[i].index != spill_temp) { + continue; + } + + c->cursor = vir_before_inst(inst); + + if (is_uniform) { + struct qreg unif = + vir_uniform(c, + c->uniform_contents[uniform_index], + c->uniform_data[uniform_index]); + inst->src[i] = unif; + } else { + /* If we have a postponed spill, we don't need + * a fill as the temp would not have been + * spilled yet. + */ + if (postponed_spill) + continue; + if (start_of_tmu_sequence) + c->cursor = vir_before_inst(start_of_tmu_sequence); + + v3d_emit_spill_tmua(c, spill_offset); + vir_emit_thrsw(c); + inst->src[i] = vir_LDTMU(c); + c->fills++; + } } - } - if (inst->dst.file == QFILE_TEMP && - inst->dst.index == spill_temp) { - if (is_uniform) { - c->cursor.link = NULL; - vir_remove_instruction(c, inst); - } else { - c->cursor = vir_after_inst(inst); + /* spills */ + if (inst->dst.file == QFILE_TEMP && + inst->dst.index == spill_temp) { + if (is_uniform) { + c->cursor.link = NULL; + vir_remove_instruction(c, inst); + } else { + if (start_of_tmu_sequence) + postponed_spill = inst; + else + v3d_emit_tmu_spill(c, inst, inst, + spill_offset); + } + } - inst->dst.index = c->num_temps++; - vir_MOV_dest(c, vir_reg(QFILE_MAGIC, - V3D_QPU_WADDR_TMUD), - inst->dst); - v3d_emit_spill_tmua(c, spill_offset); + /* If we didn't have a last-thrsw inserted by nir_to_vir and + * we've been inserting thrsws, then insert a new last_thrsw + * right before we start the vpm/tlb sequence for the last + * thread segment. + */ + if (!is_uniform && !last_thrsw && c->last_thrsw && + (v3d_qpu_writes_vpm(&inst->qpu) || + v3d_qpu_uses_tlb(&inst->qpu))) { + c->cursor = vir_before_inst(inst); vir_emit_thrsw(c); - vir_TMUWT(c); - c->spills++; - c->tmu_dirty_rcl = true; - } - } - /* If we didn't have a last-thrsw inserted by nir_to_vir and - * we've been inserting thrsws, then insert a new last_thrsw - * right before we start the vpm/tlb sequence for the last - * thread segment. - */ - if (!is_uniform && !last_thrsw && c->last_thrsw && - (v3d_qpu_writes_vpm(&inst->qpu) || - v3d_qpu_uses_tlb(&inst->qpu))) { - c->cursor = vir_before_inst(inst); - vir_emit_thrsw(c); - - last_thrsw = c->last_thrsw; - last_thrsw->is_last_thrsw = true; + last_thrsw = c->last_thrsw; + last_thrsw->is_last_thrsw = true; + } } } |