diff options
author | Connor Abbott <cwabbott0@gmail.com> | 2021-06-16 15:55:30 +0200 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-06-16 22:45:13 +0000 |
commit | e19f1124353bf0c76072d8687bb93b78fd2970cf (patch) | |
tree | e4e8c673a464426edaf36c6dca8df1ab5afeab39 /src/freedreno | |
parent | 2c21dab36e55eaa3f6b083bc5c3bc0573321fdb5 (diff) |
ir3/ra: Fix array parallelcopy confusion
With array registers, there are two num's we care about:
1. The base num that the whole array starts at (->array.base)
2. The num that the instruction uses, plus possibly an indirect offset
(->num or ->array.offset)
For parallel copies we always copy the whole array, so (2) is irrelevant
here. For phis and parallel copies inserted for phis, we used
assign_reg() which assigned ->array.base, but we forgot about this when
constructing our own parallel copies for live range splitting, just
setting ->num instead. The parallel copy lowering was also inconsistent
here, using ra_reg_get_num() (which looks at ->array.base for arrays)
for sources but looking at ->num directly for destinations. This makes
everything use ->array.base consistently.
While we're here, make sure to remove IR3_REG_SSA from liveout copies to
make sure printing works correctly.
Fixes: 0ffcb19 ("ir3: Rewrite register allocation")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11422>
Diffstat (limited to 'src/freedreno')
-rw-r--r-- | src/freedreno/ir3/ir3_lower_parallelcopy.c | 3 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_ra.c | 16 |
2 files changed, 12 insertions, 7 deletions
diff --git a/src/freedreno/ir3/ir3_lower_parallelcopy.c b/src/freedreno/ir3/ir3_lower_parallelcopy.c index 68f1fefffe3..0fa5e765210 100644 --- a/src/freedreno/ir3/ir3_lower_parallelcopy.c +++ b/src/freedreno/ir3/ir3_lower_parallelcopy.c @@ -466,9 +466,10 @@ ir3_lower_copies(struct ir3_shader_variant *v) struct ir3_register *dst = instr->regs[i]; struct ir3_register *src = instr->regs[i + instr->regs_count / 2]; unsigned flags = src->flags & (IR3_REG_HALF | IR3_REG_SHARED); + unsigned dst_physreg = ra_reg_get_physreg(dst); for (unsigned j = 0; j < reg_elems(dst); j++) { array_insert(NULL, copies, (struct copy_entry) { - .dst = ra_num_to_physreg(dst->num + j, flags), + .dst = dst_physreg + j * reg_elem_size(dst), .src = get_copy_src(src, j * reg_elem_size(dst)), .flags = flags, }); diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 34f828f2f4b..afbb8172089 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -1177,20 +1177,21 @@ insert_parallel_copy_instr(struct ra_ctx *ctx, struct ir3_instruction *instr) for (unsigned i = 0; i < ctx->parallel_copies_count; i++) { struct ra_parallel_copy *entry = &ctx->parallel_copies[i]; struct ir3_register *reg = - ir3_reg_create(pcopy, ra_interval_get_num(entry->interval), + ir3_reg_create(pcopy, INVALID_REG, entry->interval->interval.reg->flags & ~IR3_REG_SSA); reg->size = entry->interval->interval.reg->size; reg->wrmask = entry->interval->interval.reg->wrmask; + assign_reg(pcopy, reg, ra_interval_get_num(entry->interval)); } for (unsigned i = 0; i < ctx->parallel_copies_count; i++) { struct ra_parallel_copy *entry = &ctx->parallel_copies[i]; struct ir3_register *reg = - ir3_reg_create(pcopy, - ra_physreg_to_num(entry->src, entry->interval->interval.reg->flags), + ir3_reg_create(pcopy, INVALID_REG, entry->interval->interval.reg->flags & ~(IR3_REG_DEST | IR3_REG_SSA)); reg->size = entry->interval->interval.reg->size; reg->wrmask = entry->interval->interval.reg->wrmask; + assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags)); } list_del(&pcopy->node); @@ -1620,23 +1621,26 @@ insert_liveout_copy(struct ir3_block *block, physreg_t dst, physreg_t src, 2 + old_pcopy_regs); for (unsigned i = 0; i < old_pcopy_regs / 2; i++) { + old_pcopy->regs[i]->instr = pcopy; pcopy->regs[pcopy->regs_count++] = old_pcopy->regs[i]; } struct ir3_register *dst_reg = - ir3_reg_create(pcopy, ra_physreg_to_num(dst, reg->flags), reg->flags); + ir3_reg_create(pcopy, INVALID_REG, + reg->flags & ~IR3_REG_SSA); dst_reg->wrmask = reg->wrmask; dst_reg->size = reg->size; + assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags)); for (unsigned i = old_pcopy_regs / 2; i < old_pcopy_regs; i++) { pcopy->regs[pcopy->regs_count++] = old_pcopy->regs[i]; } struct ir3_register *src_reg = - ir3_reg_create(pcopy, ra_physreg_to_num(src, reg->flags), - reg->flags & ~IR3_REG_DEST); + ir3_reg_create(pcopy, INVALID_REG, reg->flags & ~(IR3_REG_DEST | IR3_REG_SSA)); src_reg->wrmask = reg->wrmask; src_reg->size = reg->size; + assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags)); if (old_pcopy) list_del(&old_pcopy->node); |