summaryrefslogtreecommitdiff
path: root/src/freedreno
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2021-06-16 15:55:30 +0200
committerMarge Bot <eric+marge@anholt.net>2021-06-16 22:45:13 +0000
commite19f1124353bf0c76072d8687bb93b78fd2970cf (patch)
treee4e8c673a464426edaf36c6dca8df1ab5afeab39 /src/freedreno
parent2c21dab36e55eaa3f6b083bc5c3bc0573321fdb5 (diff)
ir3/ra: Fix array parallelcopy confusion
With array registers, there are two num's we care about: 1. The base num that the whole array starts at (->array.base) 2. The num that the instruction uses, plus possibly an indirect offset (->num or ->array.offset) For parallel copies we always copy the whole array, so (2) is irrelevant here. For phis and parallel copies inserted for phis, we used assign_reg() which assigned ->array.base, but we forgot about this when constructing our own parallel copies for live range splitting, just setting ->num instead. The parallel copy lowering was also inconsistent here, using ra_reg_get_num() (which looks at ->array.base for arrays) for sources but looking at ->num directly for destinations. This makes everything use ->array.base consistently. While we're here, make sure to remove IR3_REG_SSA from liveout copies to make sure printing works correctly. Fixes: 0ffcb19 ("ir3: Rewrite register allocation") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11422>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/ir3/ir3_lower_parallelcopy.c3
-rw-r--r--src/freedreno/ir3/ir3_ra.c16
2 files changed, 12 insertions, 7 deletions
diff --git a/src/freedreno/ir3/ir3_lower_parallelcopy.c b/src/freedreno/ir3/ir3_lower_parallelcopy.c
index 68f1fefffe3..0fa5e765210 100644
--- a/src/freedreno/ir3/ir3_lower_parallelcopy.c
+++ b/src/freedreno/ir3/ir3_lower_parallelcopy.c
@@ -466,9 +466,10 @@ ir3_lower_copies(struct ir3_shader_variant *v)
struct ir3_register *dst = instr->regs[i];
struct ir3_register *src = instr->regs[i + instr->regs_count / 2];
unsigned flags = src->flags & (IR3_REG_HALF | IR3_REG_SHARED);
+ unsigned dst_physreg = ra_reg_get_physreg(dst);
for (unsigned j = 0; j < reg_elems(dst); j++) {
array_insert(NULL, copies, (struct copy_entry) {
- .dst = ra_num_to_physreg(dst->num + j, flags),
+ .dst = dst_physreg + j * reg_elem_size(dst),
.src = get_copy_src(src, j * reg_elem_size(dst)),
.flags = flags,
});
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index 34f828f2f4b..afbb8172089 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -1177,20 +1177,21 @@ insert_parallel_copy_instr(struct ra_ctx *ctx, struct ir3_instruction *instr)
for (unsigned i = 0; i < ctx->parallel_copies_count; i++) {
struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
struct ir3_register *reg =
- ir3_reg_create(pcopy, ra_interval_get_num(entry->interval),
+ ir3_reg_create(pcopy, INVALID_REG,
entry->interval->interval.reg->flags & ~IR3_REG_SSA);
reg->size = entry->interval->interval.reg->size;
reg->wrmask = entry->interval->interval.reg->wrmask;
+ assign_reg(pcopy, reg, ra_interval_get_num(entry->interval));
}
for (unsigned i = 0; i < ctx->parallel_copies_count; i++) {
struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
struct ir3_register *reg =
- ir3_reg_create(pcopy,
- ra_physreg_to_num(entry->src, entry->interval->interval.reg->flags),
+ ir3_reg_create(pcopy, INVALID_REG,
entry->interval->interval.reg->flags & ~(IR3_REG_DEST | IR3_REG_SSA));
reg->size = entry->interval->interval.reg->size;
reg->wrmask = entry->interval->interval.reg->wrmask;
+ assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags));
}
list_del(&pcopy->node);
@@ -1620,23 +1621,26 @@ insert_liveout_copy(struct ir3_block *block, physreg_t dst, physreg_t src,
2 + old_pcopy_regs);
for (unsigned i = 0; i < old_pcopy_regs / 2; i++) {
+ old_pcopy->regs[i]->instr = pcopy;
pcopy->regs[pcopy->regs_count++] = old_pcopy->regs[i];
}
struct ir3_register *dst_reg =
- ir3_reg_create(pcopy, ra_physreg_to_num(dst, reg->flags), reg->flags);
+ ir3_reg_create(pcopy, INVALID_REG,
+ reg->flags & ~IR3_REG_SSA);
dst_reg->wrmask = reg->wrmask;
dst_reg->size = reg->size;
+ assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags));
for (unsigned i = old_pcopy_regs / 2; i < old_pcopy_regs; i++) {
pcopy->regs[pcopy->regs_count++] = old_pcopy->regs[i];
}
struct ir3_register *src_reg =
- ir3_reg_create(pcopy, ra_physreg_to_num(src, reg->flags),
- reg->flags & ~IR3_REG_DEST);
+ ir3_reg_create(pcopy, INVALID_REG, reg->flags & ~(IR3_REG_DEST | IR3_REG_SSA));
src_reg->wrmask = reg->wrmask;
src_reg->size = reg->size;
+ assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags));
if (old_pcopy)
list_del(&old_pcopy->node);