summaryrefslogtreecommitdiff
path: root/src/freedreno/ir3/ir3_ra.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/freedreno/ir3/ir3_ra.c')
-rw-r--r--src/freedreno/ir3/ir3_ra.c775
1 files changed, 607 insertions, 168 deletions
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index f03ceef4b24..2debdeab167 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -193,6 +193,8 @@ void
ir3_reg_interval_remove(struct ir3_reg_ctx *ctx,
struct ir3_reg_interval *interval)
{
+ assert(interval->inserted);
+
if (interval->parent) {
rb_tree_remove(&interval->parent->children, &interval->node);
} else {
@@ -538,8 +540,6 @@ ra_file_init(struct ra_file *file)
BITSET_SET(file->available_to_evict, i);
}
- file->start = 0;
-
rb_tree_init(&file->reg_ctx.intervals);
rb_tree_init(&file->physreg_intervals);
@@ -686,6 +686,8 @@ ra_pop_interval(struct ra_ctx *ctx, struct ra_file *file,
struct ra_interval *interval)
{
assert(!interval->interval.parent);
+ /* shared live splitting is not allowed! */
+ assert(!(interval->interval.reg->flags & IR3_REG_SHARED));
/* Check if we've already moved this reg before */
unsigned pcopy_index;
@@ -720,6 +722,10 @@ ra_push_interval(struct ra_ctx *ctx, struct ra_file *file,
interval->physreg_start = dst;
interval->physreg_end = dst + removed->size;
+ assert(interval->physreg_end <= file->size);
+ if (interval->interval.reg->flags & IR3_REG_HALF)
+ assert(interval->physreg_end <= RA_HALF_SIZE);
+
ir3_reg_interval_reinsert(&file->reg_ctx, &interval->interval);
}
@@ -732,16 +738,76 @@ ra_move_interval(struct ra_ctx *ctx, struct ra_file *file,
ra_push_interval(ctx, file, &temp, dst);
}
+static struct ra_file *
+ra_get_file(struct ra_ctx *ctx, struct ir3_register *reg)
+{
+ if (reg->flags & IR3_REG_SHARED)
+ return &ctx->shared;
+ else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
+ return &ctx->full;
+ else
+ return &ctx->half;
+}
+
+
+/* Returns true if the proposed spot for "dst" or a killed source overlaps a
+ * destination that's been allocated.
+ */
static bool
-get_reg_specified(struct ra_file *file, struct ir3_register *reg,
- physreg_t physreg, bool is_source)
+check_dst_overlap(struct ra_ctx *ctx, struct ra_file *file,
+ struct ir3_register *dst, physreg_t start,
+ physreg_t end)
+{
+ struct ir3_instruction *instr = dst->instr;
+
+ ra_foreach_dst (other_dst, instr) {
+ /* We assume only destinations before the current one have been allocated.
+ */
+ if (other_dst == dst)
+ break;
+
+ if (ra_get_file(ctx, other_dst) != file)
+ continue;
+
+ struct ra_interval *other_interval = &ctx->intervals[other_dst->name];
+ assert(!other_interval->interval.parent);
+ physreg_t other_start = other_interval->physreg_start;
+ physreg_t other_end = other_interval->physreg_end;
+
+ if (other_end > start && end > other_start)
+ return true;
+ }
+
+ return false;
+}
+
+/* True if the destination is "early-clobber," meaning that it cannot be
+ * allocated over killed sources. Some destinations always require it, but it
+ * also is implicitly true for tied destinations whose source is live-through.
+ * If the source is killed, then we skip allocating a register for the
+ * destination altogether so we don't need to worry about that case here.
+ */
+static bool
+is_early_clobber(struct ir3_register *reg)
+{
+ return (reg->flags & IR3_REG_EARLY_CLOBBER) || reg->tied;
+}
+
+static bool
+get_reg_specified(struct ra_ctx *ctx, struct ra_file *file,
+ struct ir3_register *reg, physreg_t physreg, bool is_source)
{
for (unsigned i = 0; i < reg_size(reg); i++) {
- if (!BITSET_TEST(is_source ? file->available_to_evict : file->available,
+ if (!BITSET_TEST(is_early_clobber(reg) || is_source ?
+ file->available_to_evict : file->available,
physreg + i))
return false;
}
+ if (!is_source &&
+ check_dst_overlap(ctx, file, reg, physreg, physreg + reg_size(reg)))
+ return false;
+
return true;
}
@@ -759,8 +825,13 @@ try_evict_regs(struct ra_ctx *ctx, struct ra_file *file,
memcpy(available_to_evict, file->available_to_evict,
sizeof(available_to_evict));
- for (unsigned i = 0; i < reg_size(reg); i++)
+ BITSET_DECLARE(available, RA_MAX_FILE_SIZE);
+ memcpy(available, file->available, sizeof(available));
+
+ for (unsigned i = 0; i < reg_size(reg); i++) {
BITSET_CLEAR(available_to_evict, physreg + i);
+ BITSET_CLEAR(available, physreg + i);
+ }
unsigned eviction_count = 0;
/* Iterate over each range conflicting with physreg */
@@ -769,7 +840,7 @@ try_evict_regs(struct ra_ctx *ctx, struct ra_file *file,
conflicting != NULL &&
conflicting->physreg_start < physreg + reg_size(reg);
conflicting = next, next = ra_interval_next_or_null(next)) {
- if (!is_source && conflicting->is_killed)
+ if (!is_early_clobber(reg) && !is_source && conflicting->is_killed)
continue;
if (conflicting->frozen) {
@@ -777,10 +848,12 @@ try_evict_regs(struct ra_ctx *ctx, struct ra_file *file,
return false;
}
+ unsigned conflicting_file_size =
+ reg_file_size(file, conflicting->interval.reg);
unsigned avail_start, avail_end;
bool evicted = false;
BITSET_FOREACH_RANGE (avail_start, avail_end, available_to_evict,
- reg_file_size(file, conflicting->interval.reg)) {
+ conflicting_file_size) {
unsigned size = avail_end - avail_start;
/* non-half registers must be aligned */
@@ -790,7 +863,11 @@ try_evict_regs(struct ra_ctx *ctx, struct ra_file *file,
size--;
}
- if (size >= conflicting->physreg_end - conflicting->physreg_start) {
+ unsigned conflicting_size =
+ conflicting->physreg_end - conflicting->physreg_start;
+ if (size >= conflicting_size &&
+ !check_dst_overlap(ctx, file, reg, avail_start, avail_start +
+ conflicting_size)) {
for (unsigned i = 0;
i < conflicting->physreg_end - conflicting->physreg_start; i++)
BITSET_CLEAR(available_to_evict, avail_start + i);
@@ -803,6 +880,76 @@ try_evict_regs(struct ra_ctx *ctx, struct ra_file *file,
}
}
+ if (evicted)
+ continue;
+
+ /* If we couldn't evict this range, but the register we're allocating is
+ * allowed to overlap with a killed range, then we may be able to swap it
+ * with a killed range to acheive the same effect.
+ */
+ if (is_early_clobber(reg) || is_source)
+ return false;
+
+ foreach_interval (killed, file) {
+ if (!killed->is_killed)
+ continue;
+
+ if (killed->physreg_end - killed->physreg_start !=
+ conflicting->physreg_end - conflicting->physreg_start)
+ continue;
+
+ if (killed->physreg_end > conflicting_file_size ||
+ conflicting->physreg_end > reg_file_size(file, killed->interval.reg))
+ continue;
+
+ /* We can't swap the killed range if it partially/fully overlaps the
+ * space we're trying to allocate or (in speculative mode) if it's
+ * already been swapped and will overlap when we actually evict.
+ */
+ bool killed_available = true;
+ for (unsigned i = killed->physreg_start; i < killed->physreg_end; i++) {
+ if (!BITSET_TEST(available, i)) {
+ killed_available = false;
+ break;
+ }
+ }
+
+ if (!killed_available)
+ continue;
+
+ if (check_dst_overlap(ctx, file, reg, killed->physreg_start,
+ killed->physreg_end))
+ continue;
+
+ /* Check for alignment if one is a full reg */
+ if ((!(killed->interval.reg->flags & IR3_REG_HALF) ||
+ !(conflicting->interval.reg->flags & IR3_REG_HALF)) &&
+ (killed->physreg_start % 2 != 0 ||
+ conflicting->physreg_start % 2 != 0))
+ continue;
+
+ for (unsigned i = killed->physreg_start; i < killed->physreg_end; i++) {
+ BITSET_CLEAR(available, i);
+ }
+ /* Because this will generate swaps instead of moves, multiply the
+ * cost by 2.
+ */
+ eviction_count += (killed->physreg_end - killed->physreg_start) * 2;
+ if (!speculative) {
+ physreg_t killed_start = killed->physreg_start,
+ conflicting_start = conflicting->physreg_start;
+ struct ra_removed_interval killed_removed =
+ ra_pop_interval(ctx, file, killed);
+ struct ra_removed_interval conflicting_removed =
+ ra_pop_interval(ctx, file, conflicting);
+ ra_push_interval(ctx, file, &killed_removed, conflicting_start);
+ ra_push_interval(ctx, file, &conflicting_removed, killed_start);
+ }
+
+ evicted = true;
+ break;
+ }
+
if (!evicted)
return false;
}
@@ -819,15 +966,16 @@ removed_interval_cmp(const void *_i1, const void *_i2)
/* We sort the registers as follows:
*
- * |--------------------------------------------------------------------|
- * | | | | |
- * | Half live-through | Half killed | Full killed | Full live-through |
- * | | | | |
- * |--------------------------------------------------------------------|
- * | |
- * | Destination |
- * | |
- * |-----------------|
+ * |------------------------------------------------------------------------------------------|
+ * | | | | | | |
+ * | Half | Half early-clobber | Half | Full | Full early-clobber | Full |
+ * | live-through | destination | killed | killed | destination | live-through |
+ * | | | | | | |
+ * |------------------------------------------------------------------------------------------|
+ * | |
+ * | Destination |
+ * | |
+ * |-----------------|
*
* Half-registers have to be first so that they stay in the low half of
* the register file. Then half and full killed must stay together so that
@@ -859,6 +1007,37 @@ removed_interval_cmp(const void *_i1, const void *_i2)
return 0;
}
+static int
+dsts_cmp(const void *_i1, const void *_i2)
+{
+ struct ir3_register *i1 = *(struct ir3_register *const *) _i1;
+ struct ir3_register *i2 = *(struct ir3_register *const *) _i2;
+
+ /* Treat tied destinations as-if they are live-through sources, and normal
+ * destinations as killed sources.
+ */
+ unsigned i1_align = reg_elem_size(i1);
+ unsigned i2_align = reg_elem_size(i2);
+ if (i1_align > i2_align)
+ return 1;
+ if (i1_align < i2_align)
+ return -1;
+
+ if (i1_align == 1) {
+ if (!is_early_clobber(i2))
+ return -1;
+ if (!is_early_clobber(i1))
+ return 1;
+ } else {
+ if (!is_early_clobber(i2))
+ return 1;
+ if (!is_early_clobber(i1))
+ return -1;
+ }
+
+ return 0;
+}
+
/* "Compress" all the live intervals so that there is enough space for the
* destination register. As there can be gaps when a more-aligned interval
* follows a less-aligned interval, this also sorts them to remove such
@@ -869,29 +1048,100 @@ removed_interval_cmp(const void *_i1, const void *_i2)
* Return the physreg to use.
*/
static physreg_t
-compress_regs_left(struct ra_ctx *ctx, struct ra_file *file, unsigned size,
- unsigned align, bool is_source)
+compress_regs_left(struct ra_ctx *ctx, struct ra_file *file,
+ struct ir3_register *reg)
{
+ unsigned reg_align = reg_elem_size(reg);
DECLARE_ARRAY(struct ra_removed_interval, intervals);
intervals_count = intervals_sz = 0;
intervals = NULL;
+ DECLARE_ARRAY(struct ir3_register *, dsts);
+ dsts_count = dsts_sz = 0;
+ dsts = NULL;
+ array_insert(ctx, dsts, reg);
+ bool dst_inserted[reg->instr->dsts_count];
+
+ unsigned dst_size = reg->tied ? 0 : reg_size(reg);
+ unsigned ec_dst_size = is_early_clobber(reg) ? reg_size(reg) : 0;
+ unsigned half_dst_size = 0, ec_half_dst_size = 0;
+ if (reg_align == 1) {
+ half_dst_size = dst_size;
+ ec_half_dst_size = ec_dst_size;
+ }
+
unsigned removed_size = 0, removed_half_size = 0;
+ unsigned removed_killed_size = 0, removed_killed_half_size = 0;
unsigned file_size =
- align == 1 ? MIN2(file->size, RA_HALF_SIZE) : file->size;
+ reg_align == 1 ? MIN2(file->size, RA_HALF_SIZE) : file->size;
physreg_t start_reg = 0;
foreach_interval_rev_safe (interval, file) {
+ /* We'll check if we can compact the intervals starting here. */
+ physreg_t candidate_start = interval->physreg_end;
+
+ /* Check if there are any other destinations we need to compact. */
+ ra_foreach_dst_n (other_dst, n, reg->instr) {
+ if (other_dst == reg)
+ break;
+ if (ra_get_file(ctx, other_dst) != file)
+ continue;
+ if (dst_inserted[n])
+ continue;
+
+ struct ra_interval *other_interval = &ctx->intervals[other_dst->name];
+ /* if the destination partially overlaps this interval, we need to
+ * extend candidate_start to the end.
+ */
+ if (other_interval->physreg_start < candidate_start) {
+ candidate_start = MAX2(candidate_start,
+ other_interval->physreg_end);
+ continue;
+ }
+
+ dst_inserted[n] = true;
+
+ /* dst intervals with a tied killed source are considered attached to
+ * that source. Don't actually insert them. This means we have to
+ * update them below if their tied source moves.
+ */
+ if (other_dst->tied) {
+ struct ra_interval *tied_interval =
+ &ctx->intervals[other_dst->tied->def->name];
+ if (tied_interval->is_killed)
+ continue;
+ }
+
+ d("popping destination %u physreg %u\n",
+ other_interval->interval.reg->name,
+ other_interval->physreg_start);
+
+ array_insert(ctx, dsts, other_dst);
+ unsigned interval_size = reg_size(other_dst);
+ if (is_early_clobber(other_dst)) {
+ ec_dst_size += interval_size;
+ if (other_interval->interval.reg->flags & IR3_REG_HALF)
+ ec_half_dst_size += interval_size;
+ } else {
+ dst_size += interval_size;
+ if (other_interval->interval.reg->flags & IR3_REG_HALF)
+ half_dst_size += interval_size;
+ }
+ }
+
/* Check if we can sort the intervals *after* this one and have enough
- * space leftover to accomodate "size" units. Also check that we have
- * enough space leftover for half-registers, if we're inserting a
- * half-register (otherwise we only shift any half-registers down so they
- * should be safe).
+ * space leftover to accomodate all intervals, keeping in mind that killed
+ * sources overlap non-tied destinations. Also check that we have enough
+ * space leftover for half-registers, if we're inserting a half-register
+ * (otherwise we only shift any half-registers down so they should be
+ * safe).
*/
- if (interval->physreg_end + size + removed_size <= file->size &&
- (align != 1 ||
- interval->physreg_end + size + removed_half_size <= file_size)) {
- start_reg = interval->physreg_end;
+ if (candidate_start + removed_size + ec_dst_size +
+ MAX2(removed_killed_size, dst_size) <= file->size &&
+ (reg_align != 1 ||
+ candidate_start + removed_half_size + ec_half_dst_size +
+ MAX2(removed_killed_half_size, half_dst_size) <= file_size)) {
+ start_reg = candidate_start;
break;
}
@@ -900,20 +1150,23 @@ compress_regs_left(struct ra_ctx *ctx, struct ra_file *file, unsigned size,
*/
assert(!interval->frozen);
- /* Killed sources don't count because they go at the end and can
+ /* Killed sources are different because they go at the end and can
* overlap the register we're trying to add.
*/
- if (!interval->is_killed && !is_source) {
- removed_size += interval->physreg_end - interval->physreg_start;
- if (interval->interval.reg->flags & IR3_REG_HALF) {
- removed_half_size += interval->physreg_end -
- interval->physreg_start;
- }
+ unsigned interval_size = interval->physreg_end - interval->physreg_start;
+ if (interval->is_killed) {
+ removed_killed_size += interval_size;
+ if (interval->interval.reg->flags & IR3_REG_HALF)
+ removed_killed_half_size += interval_size;
+ } else {
+ removed_size += interval_size;
+ if (interval->interval.reg->flags & IR3_REG_HALF)
+ removed_half_size += interval_size;
}
/* Now that we've done the accounting, pop this off */
- d("popping interval %u physreg %u\n", interval->interval.reg->name,
- interval->physreg_start);
+ d("popping interval %u physreg %u%s\n", interval->interval.reg->name,
+ interval->physreg_start, interval->is_killed ? ", killed" : "");
array_insert(ctx, intervals, ra_pop_interval(ctx, file, interval));
}
@@ -922,55 +1175,138 @@ compress_regs_left(struct ra_ctx *ctx, struct ra_file *file, unsigned size,
*/
qsort(intervals, intervals_count, sizeof(*intervals), removed_interval_cmp);
+ qsort(dsts, dsts_count, sizeof(*dsts), dsts_cmp);
- physreg_t physreg = start_reg;
+ physreg_t live_reg = start_reg;
+ physreg_t dst_reg = (physreg_t)~0;
physreg_t ret_reg = (physreg_t)~0;
- for (unsigned i = 0; i < intervals_count; i++) {
- if (ret_reg == (physreg_t)~0 &&
- ((intervals[i].interval->is_killed && !is_source) ||
- !(intervals[i].interval->interval.reg->flags & IR3_REG_HALF))) {
- ret_reg = ALIGN(physreg, align);
+ unsigned dst_index = 0;
+ unsigned live_index = 0;
+
+ /* We have two lists of intervals to process, live intervals and destination
+ * intervals. Process them in the order of the disgram in insert_cmp().
+ */
+ while (live_index < intervals_count || dst_index < dsts_count) {
+ bool process_dst;
+ if (live_index == intervals_count) {
+ process_dst = true;
+ } else if (dst_index == dsts_count) {
+ process_dst = false;
+ } else {
+ struct ir3_register *dst = dsts[dst_index];
+ struct ra_interval *live_interval = intervals[live_index].interval;
+
+ bool live_half = live_interval->interval.reg->flags & IR3_REG_HALF;
+ bool live_killed = live_interval->is_killed;
+ bool dst_half = dst->flags & IR3_REG_HALF;
+ bool dst_early_clobber = is_early_clobber(dst);
+
+ if (live_half && !live_killed) {
+ /* far-left of diagram. */
+ process_dst = false;
+ } else if (dst_half && dst_early_clobber) {
+ /* mid-left of diagram. */
+ process_dst = true;
+ } else if (!dst_early_clobber) {
+ /* bottom of disagram. */
+ process_dst = true;
+ } else if (live_killed) {
+ /* middle of diagram. */
+ process_dst = false;
+ } else if (!dst_half && dst_early_clobber) {
+ /* mid-right of diagram. */
+ process_dst = true;
+ } else {
+ /* far right of diagram. */
+ assert(!live_killed && !live_half);
+ process_dst = false;
+ }
}
- if (ret_reg != (physreg_t)~0 &&
- (is_source || !intervals[i].interval->is_killed)) {
- physreg = MAX2(physreg, ret_reg + size);
+ struct ir3_register *cur_reg =
+ process_dst ? dsts[dst_index] :
+ intervals[live_index].interval->interval.reg;
+
+ physreg_t physreg;
+ if (process_dst && !is_early_clobber(cur_reg)) {
+ if (dst_reg == (physreg_t)~0)
+ dst_reg = live_reg;
+ physreg = dst_reg;
+ } else {
+ physreg = live_reg;
+ struct ra_interval *live_interval = intervals[live_index].interval;
+ bool live_killed = live_interval->is_killed;
+ /* If this is live-through and we've processed the destinations, we
+ * need to make sure we take into account any overlapping destinations.
+ */
+ if (!live_killed && dst_reg != (physreg_t)~0)
+ physreg = MAX2(physreg, dst_reg);
}
- if (!(intervals[i].interval->interval.reg->flags & IR3_REG_HALF)) {
+ if (!(cur_reg->flags & IR3_REG_HALF))
physreg = ALIGN(physreg, 2);
- }
- if (physreg + intervals[i].size >
- reg_file_size(file, intervals[i].interval->interval.reg)) {
+ d("pushing reg %u physreg %u\n", cur_reg->name, physreg);
+
+ unsigned interval_size = reg_size(cur_reg);
+ if (physreg + interval_size >
+ reg_file_size(file, cur_reg)) {
d("ran out of room for interval %u!\n",
- intervals[i].interval->interval.reg->name);
+ cur_reg->name);
unreachable("reg pressure calculation was wrong!");
return 0;
}
- d("pushing interval %u physreg %u\n",
- intervals[i].interval->interval.reg->name, physreg);
- ra_push_interval(ctx, file, &intervals[i], physreg);
+ if (process_dst) {
+ if (cur_reg == reg) {
+ ret_reg = physreg;
+ } else {
+ struct ra_interval *interval = &ctx->intervals[cur_reg->name];
+ interval->physreg_start = physreg;
+ interval->physreg_end = physreg + interval_size;
+ }
+ dst_index++;
+ } else {
+ ra_push_interval(ctx, file, &intervals[live_index], physreg);
+ live_index++;
+ }
+
+ physreg += interval_size;
- physreg += intervals[i].size;
+ if (process_dst && !is_early_clobber(cur_reg)) {
+ dst_reg = physreg;
+ } else {
+ live_reg = physreg;
+ }
}
- if (ret_reg == (physreg_t)~0)
- ret_reg = physreg;
+ /* If we shuffled around a tied source that is killed, we may have to update
+ * its corresponding destination since we didn't insert it above.
+ */
+ ra_foreach_dst (dst, reg->instr) {
+ if (dst == reg)
+ break;
- ret_reg = ALIGN(ret_reg, align);
- if (ret_reg + size > file_size) {
- d("ran out of room for the new interval!\n");
- unreachable("reg pressure calculation was wrong!");
- return 0;
+ struct ir3_register *tied = dst->tied;
+ if (!tied)
+ continue;
+
+ struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
+ if (!tied_interval->is_killed)
+ continue;
+
+ struct ra_interval *dst_interval = &ctx->intervals[dst->name];
+ unsigned dst_size = reg_size(dst);
+ dst_interval->physreg_start = ra_interval_get_physreg(tied_interval);
+ dst_interval->physreg_end = dst_interval->physreg_start + dst_size;
}
return ret_reg;
}
static void
-update_affinity(struct ir3_register *reg, physreg_t physreg)
+update_affinity(struct ra_file *file, struct ir3_register *reg,
+ physreg_t physreg)
{
if (!reg->merge_set || reg->merge_set->preferred_reg != (physreg_t)~0)
return;
@@ -978,6 +1314,9 @@ update_affinity(struct ir3_register *reg, physreg_t physreg)
if (physreg < reg->merge_set_offset)
return;
+ if ((physreg - reg->merge_set_offset + reg->merge_set->size) > file->size)
+ return;
+
reg->merge_set->preferred_reg = physreg - reg->merge_set_offset;
}
@@ -985,8 +1324,9 @@ update_affinity(struct ir3_register *reg, physreg_t physreg)
* a round-robin algorithm to reduce false dependencies.
*/
static physreg_t
-find_best_gap(struct ra_file *file, unsigned file_size, unsigned size,
- unsigned align, bool is_source)
+find_best_gap(struct ra_ctx *ctx, struct ra_file *file,
+ struct ir3_register *dst, unsigned file_size, unsigned size,
+ unsigned alignment)
{
/* This can happen if we create a very large merge set. Just bail out in that
* case.
@@ -995,9 +1335,9 @@ find_best_gap(struct ra_file *file, unsigned file_size, unsigned size,
return (physreg_t) ~0;
BITSET_WORD *available =
- is_source ? file->available_to_evict : file->available;
+ is_early_clobber(dst) ? file->available_to_evict : file->available;
- unsigned start = ALIGN(file->start, align) % (file_size - size + align);
+ unsigned start = ALIGN(file->start, alignment) % (file_size - size + alignment);
unsigned candidate = start;
do {
bool is_available = true;
@@ -1009,11 +1349,16 @@ find_best_gap(struct ra_file *file, unsigned file_size, unsigned size,
}
if (is_available) {
+ is_available =
+ !check_dst_overlap(ctx, file, dst, candidate, candidate + size);
+ }
+
+ if (is_available) {
file->start = (candidate + size) % file_size;
return candidate;
}
- candidate += align;
+ candidate += alignment;
if (candidate + size > file_size)
candidate = 0;
} while (candidate != start);
@@ -1021,17 +1366,6 @@ find_best_gap(struct ra_file *file, unsigned file_size, unsigned size,
return (physreg_t)~0;
}
-static struct ra_file *
-ra_get_file(struct ra_ctx *ctx, struct ir3_register *reg)
-{
- if (reg->flags & IR3_REG_SHARED)
- return &ctx->shared;
- else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
- return &ctx->full;
- else
- return &ctx->half;
-}
-
/* This is the main entrypoint for picking a register. Pick a free register
* for "reg", shuffling around sources if necessary. In the normal case where
* "is_source" is false, this register can overlap with killed sources
@@ -1042,16 +1376,15 @@ ra_get_file(struct ra_ctx *ctx, struct ir3_register *reg)
*/
static physreg_t
-get_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg,
- bool is_source)
+get_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg)
{
unsigned file_size = reg_file_size(file, reg);
if (reg->merge_set && reg->merge_set->preferred_reg != (physreg_t)~0) {
physreg_t preferred_reg =
reg->merge_set->preferred_reg + reg->merge_set_offset;
- if (preferred_reg < file_size &&
+ if (preferred_reg + reg_size(reg) <= file_size &&
preferred_reg % reg_elem_size(reg) == 0 &&
- get_reg_specified(file, reg, preferred_reg, is_source))
+ get_reg_specified(ctx, file, reg, preferred_reg, false))
return preferred_reg;
}
@@ -1062,8 +1395,9 @@ get_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg,
unsigned size = reg_size(reg);
if (reg->merge_set && reg->merge_set->preferred_reg == (physreg_t)~0 &&
size < reg->merge_set->size) {
- physreg_t best_reg = find_best_gap(file, file_size, reg->merge_set->size,
- reg->merge_set->alignment, is_source);
+ physreg_t best_reg = find_best_gap(ctx, file, reg, file_size,
+ reg->merge_set->size,
+ reg->merge_set->alignment);
if (best_reg != (physreg_t)~0u) {
best_reg += reg->merge_set_offset;
return best_reg;
@@ -1085,14 +1419,14 @@ get_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg,
physreg_t src_physreg = ra_interval_get_physreg(src_interval);
if (src_physreg % reg_elem_size(reg) == 0 &&
src_physreg + size <= file_size &&
- get_reg_specified(file, reg, src_physreg, is_source))
+ get_reg_specified(ctx, file, reg, src_physreg, false))
return src_physreg;
}
}
}
physreg_t best_reg =
- find_best_gap(file, file_size, size, reg_elem_size(reg), is_source);
+ find_best_gap(ctx, file, reg, file_size, size, reg_elem_size(reg));
if (best_reg != (physreg_t)~0u) {
return best_reg;
}
@@ -1104,7 +1438,7 @@ get_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg,
unsigned best_eviction_count = ~0;
for (physreg_t i = 0; i + size <= file_size; i += reg_elem_size(reg)) {
unsigned eviction_count;
- if (try_evict_regs(ctx, file, reg, i, &eviction_count, is_source, true)) {
+ if (try_evict_regs(ctx, file, reg, i, &eviction_count, false, true)) {
if (eviction_count < best_eviction_count) {
best_eviction_count = eviction_count;
best_reg = i;
@@ -1114,14 +1448,13 @@ get_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg,
if (best_eviction_count != ~0) {
ASSERTED bool result = try_evict_regs(
- ctx, file, reg, best_reg, &best_eviction_count, is_source, false);
+ ctx, file, reg, best_reg, &best_eviction_count, false, false);
assert(result);
return best_reg;
}
/* Use the dumb fallback only if try_evict_regs() fails. */
- return compress_regs_left(ctx, file, reg_size(reg), reg_elem_size(reg),
- is_source);
+ return compress_regs_left(ctx, file, reg);
}
static void
@@ -1170,14 +1503,47 @@ static void
allocate_dst_fixed(struct ra_ctx *ctx, struct ir3_register *dst,
physreg_t physreg)
{
+ struct ra_file *file = ra_get_file(ctx, dst);
struct ra_interval *interval = &ctx->intervals[dst->name];
- update_affinity(dst, physreg);
+ update_affinity(file, dst, physreg);
ra_interval_init(interval, dst);
interval->physreg_start = physreg;
interval->physreg_end = physreg + reg_size(dst);
}
+/* If a tied destination interferes with its source register, we have to insert
+ * a copy beforehand to copy the source to the destination. Because we are using
+ * the parallel_copies array and not creating a separate copy, this copy will
+ * happen in parallel with any shuffling around of the tied source, so we have
+ * to copy the source *as it exists before it is shuffled around*. We do this by
+ * inserting the copy early, before any other copies are inserted. We don't
+ * actually know the destination of the copy, but that's ok because the
+ * dst_interval will be filled out later.
+ */
+static void
+insert_tied_dst_copy(struct ra_ctx *ctx, struct ir3_register *dst)
+{
+ struct ir3_register *tied = dst->tied;
+
+ if (!tied)
+ return;
+
+ struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
+ struct ra_interval *dst_interval = &ctx->intervals[dst->name];
+
+ if (tied_interval->is_killed)
+ return;
+
+ physreg_t tied_physreg = ra_interval_get_physreg(tied_interval);
+
+ array_insert(ctx, ctx->parallel_copies,
+ (struct ra_parallel_copy){
+ .interval = dst_interval,
+ .src = tied_physreg,
+ });
+}
+
static void
allocate_dst(struct ra_ctx *ctx, struct ir3_register *dst)
{
@@ -1186,33 +1552,17 @@ allocate_dst(struct ra_ctx *ctx, struct ir3_register *dst)
struct ir3_register *tied = dst->tied;
if (tied) {
struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
- struct ra_interval *dst_interval = &ctx->intervals[dst->name];
- physreg_t tied_physreg = ra_interval_get_physreg(tied_interval);
if (tied_interval->is_killed) {
/* The easy case: the source is killed, so we can just reuse it
* for the destination.
*/
allocate_dst_fixed(ctx, dst, ra_interval_get_physreg(tied_interval));
- } else {
- /* The source is live-through, so we need to get a free register
- * (which is free for both the source and destination!), copy the
- * original source to it, then use that for the source and
- * destination.
- */
- physreg_t physreg = get_reg(ctx, file, dst, true);
- allocate_dst_fixed(ctx, dst, physreg);
- array_insert(ctx, ctx->parallel_copies,
- (struct ra_parallel_copy){
- .interval = dst_interval,
- .src = tied_physreg,
- });
+ return;
}
-
- return;
}
/* All the hard work is done by get_reg here. */
- physreg_t physreg = get_reg(ctx, file, dst, false);
+ physreg_t physreg = get_reg(ctx, file, dst);
allocate_dst_fixed(ctx, dst, physreg);
}
@@ -1256,7 +1606,8 @@ insert_parallel_copy_instr(struct ra_ctx *ctx, struct ir3_instruction *instr)
struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
struct ir3_register *reg =
ir3_dst_create(pcopy, INVALID_REG,
- entry->interval->interval.reg->flags & ~IR3_REG_SSA);
+ entry->interval->interval.reg->flags &
+ (IR3_REG_HALF | IR3_REG_ARRAY | IR3_REG_SHARED));
reg->size = entry->interval->interval.reg->size;
reg->wrmask = entry->interval->interval.reg->wrmask;
assign_reg(pcopy, reg, ra_interval_get_num(entry->interval));
@@ -1266,7 +1617,8 @@ insert_parallel_copy_instr(struct ra_ctx *ctx, struct ir3_instruction *instr)
struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
struct ir3_register *reg =
ir3_src_create(pcopy, INVALID_REG,
- entry->interval->interval.reg->flags & ~IR3_REG_SSA);
+ entry->interval->interval.reg->flags &
+ (IR3_REG_HALF | IR3_REG_ARRAY | IR3_REG_SHARED));
reg->size = entry->interval->interval.reg->size;
reg->wrmask = entry->interval->interval.reg->wrmask;
assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags));
@@ -1285,6 +1637,11 @@ handle_normal_instr(struct ra_ctx *ctx, struct ir3_instruction *instr)
mark_src_killed(ctx, src);
}
+ /* Pre-insert tied dst copies. */
+ ra_foreach_dst (dst, instr) {
+ insert_tied_dst_copy(ctx, dst);
+ }
+
/* Allocate the destination. */
ra_foreach_dst (dst, instr) {
allocate_dst(ctx, dst);
@@ -1312,6 +1669,9 @@ handle_split(struct ra_ctx *ctx, struct ir3_instruction *instr)
struct ir3_register *dst = instr->dsts[0];
struct ir3_register *src = instr->srcs[0];
+ if (!(dst->flags & IR3_REG_SSA))
+ return;
+
if (dst->merge_set == NULL || src->def->merge_set != dst->merge_set) {
handle_normal_instr(ctx, instr);
return;
@@ -1330,6 +1690,9 @@ handle_split(struct ra_ctx *ctx, struct ir3_instruction *instr)
static void
handle_collect(struct ra_ctx *ctx, struct ir3_instruction *instr)
{
+ if (!(instr->dsts[0]->flags & IR3_REG_SSA))
+ return;
+
struct ir3_merge_set *dst_set = instr->dsts[0]->merge_set;
unsigned dst_offset = instr->dsts[0]->merge_set_offset;
@@ -1362,8 +1725,14 @@ handle_collect(struct ra_ctx *ctx, struct ir3_instruction *instr)
struct ra_interval *interval = &ctx->intervals[src->def->name];
- if (src->def->merge_set != dst_set || interval->is_killed)
+ /* We only need special handling if the source's interval overlaps with
+ * the destination's interval.
+ */
+ if (src->def->interval_start >= instr->dsts[0]->interval_end ||
+ instr->dsts[0]->interval_start >= src->def->interval_end ||
+ interval->is_killed)
continue;
+
while (interval->interval.parent != NULL) {
interval = ir3_reg_interval_to_ra_interval(interval->interval.parent);
}
@@ -1445,13 +1814,19 @@ handle_pcopy(struct ra_ctx *ctx, struct ir3_instruction *instr)
static void
handle_precolored_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
{
- if (instr->dsts[0]->num == INVALID_REG)
+ if (instr->dsts[0]->num == INVALID_REG ||
+ !(instr->dsts[0]->flags & IR3_REG_SSA))
return;
+ struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
physreg_t physreg = ra_reg_get_physreg(instr->dsts[0]);
allocate_dst_fixed(ctx, instr->dsts[0], physreg);
- insert_dst(ctx, instr->dsts[0]);
+
+ d("insert precolored dst %u physreg %u", instr->dsts[0]->name,
+ ra_interval_get_physreg(interval));
+
+ ra_file_insert(file, interval);
interval->frozen = true;
}
@@ -1471,6 +1846,9 @@ handle_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
static void
assign_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
{
+ if (!(instr->dsts[0]->flags & IR3_REG_SSA))
+ return;
+
struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
@@ -1517,7 +1895,7 @@ handle_precolored_source(struct ra_ctx *ctx, struct ir3_register *src)
* anything unless it overlaps with our precolored physreg, so we don't
* have to worry about evicting other precolored sources.
*/
- if (!get_reg_specified(file, src, physreg, true)) {
+ if (!get_reg_specified(ctx, file, src, physreg, true)) {
unsigned eviction_count;
if (!try_evict_regs(ctx, file, src, physreg, &eviction_count, true,
false)) {
@@ -1615,6 +1993,9 @@ handle_live_out(struct ra_ctx *ctx, struct ir3_register *def)
static void
handle_phi(struct ra_ctx *ctx, struct ir3_register *def)
{
+ if (!(def->flags & IR3_REG_SSA))
+ return;
+
struct ra_file *file = ra_get_file(ctx, def);
struct ra_interval *interval = &ctx->intervals[def->name];
@@ -1630,7 +2011,7 @@ handle_phi(struct ra_ctx *ctx, struct ir3_register *def)
physreg = ra_interval_get_physreg(parent) +
(def->interval_start - parent_ir3->reg->interval_start);
} else {
- physreg = get_reg(ctx, file, def, false);
+ physreg = get_reg(ctx, file, def);
}
allocate_dst_fixed(ctx, def, physreg);
@@ -1641,6 +2022,9 @@ handle_phi(struct ra_ctx *ctx, struct ir3_register *def)
static void
assign_phi(struct ra_ctx *ctx, struct ir3_instruction *phi)
{
+ if (!(phi->dsts[0]->flags & IR3_REG_SSA))
+ return;
+
struct ra_file *file = ra_get_file(ctx, phi->dsts[0]);
struct ra_interval *interval = &ctx->intervals[phi->dsts[0]->name];
assert(!interval->interval.parent);
@@ -1686,12 +2070,10 @@ insert_liveout_copy(struct ir3_block *block, physreg_t dst, physreg_t src,
struct ir3_register *reg)
{
struct ir3_instruction *old_pcopy = NULL;
- if (!list_is_empty(&block->instr_list)) {
- struct ir3_instruction *last =
- LIST_ENTRY(struct ir3_instruction, block->instr_list.prev, node);
- if (last->opc == OPC_META_PARALLEL_COPY)
- old_pcopy = last;
- }
+ struct ir3_instruction *last = ir3_block_get_last_non_terminator(block);
+
+ if (last && last->opc == OPC_META_PARALLEL_COPY)
+ old_pcopy = last;
unsigned old_pcopy_srcs = old_pcopy ? old_pcopy->srcs_count : 0;
struct ir3_instruction *pcopy = ir3_instr_create(
@@ -1702,8 +2084,9 @@ insert_liveout_copy(struct ir3_block *block, physreg_t dst, physreg_t src,
pcopy->dsts[pcopy->dsts_count++] = old_pcopy->dsts[i];
}
- struct ir3_register *dst_reg =
- ir3_dst_create(pcopy, INVALID_REG, reg->flags & ~IR3_REG_SSA);
+ unsigned flags = reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
+
+ struct ir3_register *dst_reg = ir3_dst_create(pcopy, INVALID_REG, flags);
dst_reg->wrmask = reg->wrmask;
dst_reg->size = reg->size;
assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags));
@@ -1712,8 +2095,7 @@ insert_liveout_copy(struct ir3_block *block, physreg_t dst, physreg_t src,
pcopy->srcs[pcopy->srcs_count++] = old_pcopy->srcs[i];
}
- struct ir3_register *src_reg =
- ir3_src_create(pcopy, INVALID_REG, reg->flags & ~IR3_REG_SSA);
+ struct ir3_register *src_reg = ir3_src_create(pcopy, INVALID_REG, flags);
src_reg->wrmask = reg->wrmask;
src_reg->size = reg->size;
assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags));
@@ -1727,15 +2109,8 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval)
{
physreg_t physreg = ra_interval_get_physreg(interval);
- bool shared = interval->interval.reg->flags & IR3_REG_SHARED;
- struct ir3_block **predecessors =
- shared ? ctx->block->physical_predecessors : ctx->block->predecessors;
- unsigned predecessors_count = shared
- ? ctx->block->physical_predecessors_count
- : ctx->block->predecessors_count;
-
- for (unsigned i = 0; i < predecessors_count; i++) {
- struct ir3_block *pred = predecessors[i];
+ for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
+ struct ir3_block *pred = ctx->block->predecessors[i];
struct ra_block_state *pred_state = &ctx->blocks[pred->index];
if (!pred_state->visited)
@@ -1743,28 +2118,8 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval)
physreg_t pred_reg = read_register(ctx, pred, interval->interval.reg);
if (pred_reg != physreg) {
+ assert(!(interval->interval.reg->flags & IR3_REG_SHARED));
insert_liveout_copy(pred, physreg, pred_reg, interval->interval.reg);
-
- /* This is a bit tricky, but when visiting the destination of a
- * physical-only edge, we have two predecessors (the if and the
- * header block) and both have multiple successors. We pick the
- * register for all live-ins from the normal edge, which should
- * guarantee that there's no need for shuffling things around in
- * the normal predecessor as long as there are no phi nodes, but
- * we still may need to insert fixup code in the physical
- * predecessor (i.e. the last block of the if) and that has
- * another successor (the block after the if) so we need to update
- * the renames state for when we process the other successor. This
- * crucially depends on the other successor getting processed
- * after this.
- *
- * For normal (non-physical) edges we disallow critical edges so
- * that hacks like this aren't necessary.
- */
- if (!pred_state->renames)
- pred_state->renames = _mesa_pointer_hash_table_create(ctx);
- _mesa_hash_table_insert(pred_state->renames, interval->interval.reg,
- (void *)(uintptr_t)physreg);
}
}
}
@@ -1977,13 +2332,13 @@ calc_target_full_pressure(struct ir3_shader_variant *v, unsigned pressure)
unsigned reg_independent_max_waves =
ir3_get_reg_independent_max_waves(v, double_threadsize);
unsigned reg_dependent_max_waves = ir3_get_reg_dependent_max_waves(
- v->shader->compiler, reg_count, double_threadsize);
+ v->compiler, reg_count, double_threadsize);
unsigned target_waves =
MIN2(reg_independent_max_waves, reg_dependent_max_waves);
while (target <= RA_FULL_SIZE / (2 * 4) &&
ir3_should_double_threadsize(v, target) == double_threadsize &&
- ir3_get_reg_dependent_max_waves(v->shader->compiler, target,
+ ir3_get_reg_dependent_max_waves(v->compiler, target,
double_threadsize) >= target_waves)
target++;
@@ -2136,17 +2491,68 @@ calc_min_limit_pressure(struct ir3_shader_variant *v,
ralloc_free(ctx);
}
+/*
+ * If barriers are used, it must be possible for all waves in the workgroup
+ * to execute concurrently. Thus we may have to reduce the registers limit.
+ */
+static void
+calc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant *v,
+ struct ir3_pressure *limit_pressure)
+{
+ const struct ir3_compiler *compiler = v->compiler;
+
+ unsigned threads_per_wg;
+ if (v->local_size_variable) {
+ /* We have to expect the worst case. */
+ threads_per_wg = compiler->max_variable_workgroup_size;
+ } else {
+ threads_per_wg = v->local_size[0] * v->local_size[1] * v->local_size[2];
+ }
+
+ /* The register file is grouped into reg_size_vec4 number of parts.
+ * Each part has enough registers to add a single vec4 register to
+ * each thread of a single-sized wave-pair. With double threadsize
+ * each wave-pair would consume two parts of the register file to get
+ * a single vec4 for a thread. The more active wave-pairs the less
+ * parts each could get.
+ */
+
+ bool double_threadsize = ir3_should_double_threadsize(v, 0);
+ unsigned waves_per_wg = DIV_ROUND_UP(
+ threads_per_wg, compiler->threadsize_base * (double_threadsize ? 2 : 1) *
+ compiler->wave_granularity);
+
+ uint32_t vec4_regs_per_thread =
+ compiler->reg_size_vec4 / (waves_per_wg * (double_threadsize ? 2 : 1));
+ assert(vec4_regs_per_thread > 0);
+
+ uint32_t half_regs_per_thread = vec4_regs_per_thread * 4 * 2;
+
+ if (limit_pressure->full > half_regs_per_thread) {
+ if (v->mergedregs) {
+ limit_pressure->full = half_regs_per_thread;
+ } else {
+ /* TODO: Handle !mergedregs case, probably we would have to do this
+ * after the first register pressure pass.
+ */
+ }
+ }
+}
+
int
ir3_ra(struct ir3_shader_variant *v)
{
ir3_calc_dominance(v->ir);
+ /* Predicate RA needs dominance. */
+ ir3_ra_predicates(v);
+
ir3_create_parallel_copies(v->ir);
struct ra_ctx *ctx = rzalloc(NULL, struct ra_ctx);
ctx->merged_regs = v->mergedregs;
- ctx->compiler = v->shader->compiler;
+ ctx->compiler = v->compiler;
ctx->stage = v->type;
struct ir3_liveness *live = ir3_calc_liveness(ctx, v->ir);
@@ -2155,6 +2561,18 @@ ir3_ra(struct ir3_shader_variant *v)
ir3_merge_regs(live, v->ir);
+ bool has_shared_vectors = false;
+ foreach_block (block, &v->ir->block_list) {
+ foreach_instr (instr, &block->instr_list) {
+ ra_foreach_dst (dst, instr) {
+ if ((dst->flags & IR3_REG_SHARED) && reg_elems(dst) > 1) {
+ has_shared_vectors = true;
+ break;
+ }
+ }
+ }
+ }
+
struct ir3_pressure max_pressure;
ir3_calc_pressure(v, live, &max_pressure);
d("max pressure:");
@@ -2162,26 +2580,45 @@ ir3_ra(struct ir3_shader_variant *v)
d("\thalf: %u", max_pressure.half);
d("\tshared: %u", max_pressure.shared);
- /* TODO: calculate half/full limit correctly for CS with barrier */
struct ir3_pressure limit_pressure;
limit_pressure.full = RA_FULL_SIZE;
limit_pressure.half = RA_HALF_SIZE;
limit_pressure.shared = RA_SHARED_SIZE;
+ if (gl_shader_stage_is_compute(v->type) && v->has_barrier) {
+ calc_limit_pressure_for_cs_with_barrier(v, &limit_pressure);
+ }
+
+ /* If the user forces a doubled threadsize, we may have to lower the limit
+ * because on some gens the register file is not big enough to hold a
+ * double-size wave with all 48 registers in use.
+ */
+ if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY) {
+ limit_pressure.full =
+ MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16);
+ }
+
/* If requested, lower the limit so that spilling happens more often. */
if (ir3_shader_debug & IR3_DBG_SPILLALL)
calc_min_limit_pressure(v, live, &limit_pressure);
- if (max_pressure.shared > limit_pressure.shared) {
- /* TODO shared reg -> normal reg spilling */
- d("shared max pressure exceeded!");
- goto fail;
+ if (max_pressure.shared > limit_pressure.shared || has_shared_vectors) {
+ ir3_ra_shared(v, live);
+
+ /* Recalculate liveness and register pressure now that additional values
+ * have been added.
+ */
+ ralloc_free(live);
+ live = ir3_calc_liveness(ctx, v->ir);
+ ir3_calc_pressure(v, live, &max_pressure);
+
+ ir3_debug_print(v->ir, "AFTER: shared register allocation");
}
bool spilled = false;
if (max_pressure.full > limit_pressure.full ||
max_pressure.half > limit_pressure.half) {
- if (!v->shader->compiler->has_pvtmem) {
+ if (!v->compiler->has_pvtmem) {
d("max pressure exceeded!");
goto fail;
}
@@ -2206,10 +2643,12 @@ ir3_ra(struct ir3_shader_variant *v)
ctx->shared.size = RA_SHARED_SIZE;
+ ctx->full.start = ctx->half.start = ctx->shared.start = 0;
+
foreach_block (block, &v->ir->block_list)
handle_block(ctx, block);
- ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count);
+ ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count, false);
/* Strip array-ness and SSA-ness at the end, because various helpers still
* need to work even on definitions that have already been assigned. For