summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c742
1 files changed, 522 insertions, 220 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index d53181e1f75..52c0216b64b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
@@ -29,125 +30,125 @@
#include <stdio.h>
+#include "main/glheader.h"
+#include "program/register_allocate.h"
+#include "ralloc.h"
+
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
-
+#include "radeon_list.h"
+#include "radeon_variable.h"
#define VERBOSE 0
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
-struct live_intervals {
- int Start;
- int End;
- struct live_intervals * Next;
-};
struct register_info {
- struct live_intervals Live;
+ struct live_intervals Live[4];
unsigned int Used:1;
unsigned int Allocated:1;
unsigned int File:3;
unsigned int Index:RC_REGISTER_INDEX_BITS;
-};
-
-struct hardware_register {
- struct live_intervals * Used;
+ unsigned int Writemask;
};
struct regalloc_state {
struct radeon_compiler * C;
- struct register_info Input[RC_REGISTER_MAX_INDEX];
- struct register_info Temporary[RC_REGISTER_MAX_INDEX];
-
- struct hardware_register * HwTemporary;
- unsigned int NumHwTemporaries;
- /**
- * If an instruction is inside of a loop, EndLoop will be the
- * IP of the ENDLOOP instruction, and BeginLoop will be the IP
- * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop
- * will be -1.
- */
- int EndLoop;
- int BeginLoop;
+ struct register_info * Input;
+ unsigned int NumInputs;
+
+ struct register_info * Temporary;
+ unsigned int NumTemporaries;
+
+ unsigned int Simple;
+ unsigned int HasLoop;
+};
+
+enum rc_reg_class {
+ RC_REG_CLASS_SINGLE,
+ RC_REG_CLASS_DOUBLE,
+ RC_REG_CLASS_TRIPLE,
+ RC_REG_CLASS_ALPHA,
+ RC_REG_CLASS_SINGLE_PLUS_ALPHA,
+ RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
+ RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
+ RC_REG_CLASS_X,
+ RC_REG_CLASS_Y,
+ RC_REG_CLASS_Z,
+ RC_REG_CLASS_XY,
+ RC_REG_CLASS_YZ,
+ RC_REG_CLASS_XZ,
+ RC_REG_CLASS_XW,
+ RC_REG_CLASS_YW,
+ RC_REG_CLASS_ZW,
+ RC_REG_CLASS_XYW,
+ RC_REG_CLASS_YZW,
+ RC_REG_CLASS_XZW,
+ RC_REG_CLASS_COUNT
+};
+
+struct rc_class {
+ enum rc_reg_class Class;
+
+ unsigned int WritemaskCount;
+
+ /** This is 1 if this class is being used by the register allocator
+ * and 0 otherwise */
+ unsigned int Used;
+
+ /** This is the ID number assigned to this class by ra. */
+ unsigned int Id;
+
+ /** List of writemasks that belong to this class */
+ unsigned int Writemasks[3];
+
+
};
static void print_live_intervals(struct live_intervals * src)
{
- if (!src) {
+ if (!src || !src->Used) {
DBG("(null)");
return;
}
- while(src) {
- DBG("(%i,%i)", src->Start, src->End);
- src = src->Next;
- }
+ DBG("(%i,%i)", src->Start, src->End);
}
-static void add_live_intervals(struct regalloc_state * s,
- struct live_intervals ** dst, struct live_intervals * src)
+static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
{
- struct live_intervals ** dst_backup = dst;
-
if (VERBOSE) {
- DBG("add_live_intervals: ");
- print_live_intervals(*dst);
+ DBG("overlap_live_intervals: ");
+ print_live_intervals(a);
DBG(" to ");
- print_live_intervals(src);
+ print_live_intervals(b);
DBG("\n");
}
- while(src) {
- if (*dst && (*dst)->End < src->Start) {
- dst = &(*dst)->Next;
- } else if (!*dst || (*dst)->Start > src->End) {
- struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
- li->Start = src->Start;
- li->End = src->End;
- li->Next = *dst;
- *dst = li;
- src = src->Next;
- } else {
- if (src->End > (*dst)->End)
- (*dst)->End = src->End;
- if (src->Start < (*dst)->Start)
- (*dst)->Start = src->Start;
- src = src->Next;
- }
- }
-
- if (VERBOSE) {
- DBG(" result: ");
- print_live_intervals(*dst_backup);
- DBG("\n");
- }
-}
-
-static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
-{
- if (VERBOSE) {
- DBG("overlap_live_intervals: ");
- print_live_intervals(dst);
- DBG(" to ");
- print_live_intervals(src);
- DBG("\n");
+ if (!a->Used || !b->Used) {
+ DBG(" unused interval\n");
+ return 0;
}
- while(src && dst) {
- if (dst->End <= src->Start) {
- dst = dst->Next;
- } else if (dst->End <= src->End) {
+ if (a->Start > b->Start) {
+ if (a->Start < b->End) {
DBG(" overlap\n");
return 1;
- } else if (dst->Start < src->End) {
+ }
+ } else if (b->Start > a->Start) {
+ if (b->Start < a->End) {
+ DBG(" overlap\n");
+ return 1;
+ }
+ } else { /* a->Start == b->Start */
+ if (a->Start != a->End && b->Start != b->End) {
DBG(" overlap\n");
return 1;
- } else {
- src = src->Next;
}
}
@@ -156,92 +157,26 @@ static int overlap_live_intervals(struct live_intervals * dst, struct live_inter
return 0;
}
-static int try_add_live_intervals(struct regalloc_state * s,
- struct live_intervals ** dst, struct live_intervals * src)
-{
- if (overlap_live_intervals(*dst, src))
- return 0;
-
- add_live_intervals(s, dst, src);
- return 1;
-}
-
-static void scan_callback(void * data, struct rc_instruction * inst,
+static void scan_read_callback(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct regalloc_state * s = data;
struct register_info * reg;
+ unsigned int i;
- if (file == RC_FILE_TEMPORARY)
- reg = &s->Temporary[index];
- else if (file == RC_FILE_INPUT)
- reg = &s->Input[index];
- else
+ if (file != RC_FILE_INPUT)
return;
- if (!reg->Used) {
- reg->Used = 1;
- if (file == RC_FILE_INPUT)
- reg->Live.Start = -1;
- else if (s->BeginLoop >= 0)
- reg->Live.Start = s->BeginLoop;
- else
- reg->Live.Start = inst->IP;
- reg->Live.End = inst->IP;
- } else if (s->EndLoop >= 0)
- reg->Live.End = s->EndLoop;
- else if (inst->IP > reg->Live.End)
- reg->Live.End = inst->IP;
-}
-
-static void compute_live_intervals(struct radeon_compiler *c,
- struct regalloc_state *s)
-{
- memset(s, 0, sizeof(*s));
- s->C = c;
- s->NumHwTemporaries = c->max_temp_regs;
- s->BeginLoop = -1;
- s->EndLoop = -1;
- s->HwTemporary =
- memory_pool_malloc(&c->Pool,
- s->NumHwTemporaries * sizeof(struct hardware_register));
- memset(s->HwTemporary, 0, s->NumHwTemporaries * sizeof(struct hardware_register));
-
- rc_recompute_ips(s->C);
-
- for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
- inst != &s->C->Program.Instructions;
- inst = inst->Next) {
-
- /* For all instructions inside of a loop, the ENDLOOP
- * instruction is used as the end of the live interval and
- * the BGNLOOP instruction is used as the beginning. */
- if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) {
- int loops = 1;
- struct rc_instruction * tmp;
- s->BeginLoop = inst->IP;
- for(tmp = inst->Next;
- tmp != &s->C->Program.Instructions;
- tmp = tmp->Next) {
- if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
- loops++;
- } else if (tmp->U.I.Opcode
- == RC_OPCODE_ENDLOOP) {
- if(!--loops) {
- s->EndLoop = tmp->IP;
- break;
- }
- }
- }
- }
+ s->Input[index].Used = 1;
+ reg = &s->Input[index];
- if (inst->IP == s->EndLoop) {
- s->EndLoop = -1;
- s->BeginLoop = -1;
+ for (i = 0; i < 4; i++) {
+ if (!((mask >> i) & 0x1)) {
+ continue;
}
-
- rc_for_all_reads_mask(inst, scan_callback, s);
- rc_for_all_writes_mask(inst, scan_callback, s);
+ reg->Live[i].Used = 1;
+ reg->Live[i].Start = 0;
+ reg->Live[i].End = inst->IP;
}
}
@@ -251,7 +186,7 @@ static void remap_register(void * data, struct rc_instruction * inst,
struct regalloc_state * s = data;
const struct register_info * reg;
- if (*file == RC_FILE_TEMPORARY)
+ if (*file == RC_FILE_TEMPORARY && s->Simple)
reg = &s->Temporary[*index];
else if (*file == RC_FILE_INPUT)
reg = &s->Input[*index];
@@ -259,106 +194,473 @@ static void remap_register(void * data, struct rc_instruction * inst,
return;
if (reg->Allocated) {
- *file = reg->File;
*index = reg->Index;
}
}
-static void do_regalloc(struct regalloc_state * s)
+static void alloc_input_simple(void * data, unsigned int input,
+ unsigned int hwreg)
{
- /* Simple and stupid greedy register allocation */
- for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
- struct register_info * reg = &s->Temporary[index];
+ struct regalloc_state * s = data;
- if (!reg->Used)
- continue;
+ if (input >= s->NumInputs)
+ return;
+
+ s->Input[input].Allocated = 1;
+ s->Input[input].File = RC_FILE_TEMPORARY;
+ s->Input[input].Index = hwreg;
+}
- for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
- if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, &reg->Live)) {
- reg->Allocated = 1;
- reg->File = RC_FILE_TEMPORARY;
- reg->Index = hwreg;
- goto success;
+/* This functions offsets the temporary register indices by the number
+ * of input registers, because input registers are actually temporaries and
+ * should not occupy the same space.
+ *
+ * This pass is supposed to be used to maintain correct allocation of inputs
+ * if the standard register allocation is disabled. */
+static void do_regalloc_inputs_only(struct regalloc_state * s)
+{
+ for (unsigned i = 0; i < s->NumTemporaries; i++) {
+ s->Temporary[i].Allocated = 1;
+ s->Temporary[i].File = RC_FILE_TEMPORARY;
+ s->Temporary[i].Index = i + s->NumInputs;
+ }
+}
+
+static unsigned int is_derivative(rc_opcode op)
+{
+ return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
+}
+
+static enum rc_reg_class variable_get_class(
+ struct rc_variable * variable,
+ struct rc_class * classes)
+{
+ unsigned int i;
+ unsigned int can_change_writemask= 1;
+ unsigned int writemask = rc_variable_writemask_sum(variable);
+ struct rc_list * readers = rc_variable_readers_union(variable);
+
+ if (!variable->C->is_r500) {
+ unsigned int mask_count = 0;
+ /* The assumption here is that if an instruction has type
+ * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
+ * r300 and r400 can't swizzle the result of a TEX lookup. */
+ if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ writemask = RC_MASK_XYZW;
+ }
+ for (i = 0; i < 4; i++) {
+ if (GET_BIT(writemask, i)) {
+ mask_count++;
}
}
+ /* XXX We should do swizzle packing for r300 and r400 here.
+ * We need to figure out how not to create non-native
+ * swizzles. */
+ if (mask_count > 1) {
+ can_change_writemask = 0;
+ }
+ }
- rc_error(s->C, "Ran out of hardware temporaries\n");
- return;
-
- success:;
+ if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
+ /* DDX/DDY seem to always fail when their writemasks are
+ * changed.*/
+ if (is_derivative(variable->Inst->U.P.RGB.Opcode)
+ || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
+ can_change_writemask = 0;
+ }
}
+ for ( ; readers; readers = readers->Next) {
+ struct rc_reader * r = readers->Item;
+ if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
+ if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
+ can_change_writemask = 0;
+ break;
+ }
+ /* DDX/DDY also fail when their swizzles are changed. */
+ if (is_derivative(r->Inst->U.P.RGB.Opcode)
+ || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
+ can_change_writemask = 0;
+ break;
+ }
+ }
+ }
+ for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+ unsigned int j;
+ if (!can_change_writemask && classes[i].WritemaskCount > 1) {
+ continue;
+ }
+ for (j = 0; j < 3; j++) {
+ if (classes[i].Writemasks[j] == writemask) {
+ return classes[i].Class;
+ }
+ }
+ }
+ rc_error(variable->C, "Could not find class for index=%u mask=%u\n",
+ variable->Dst.Index, writemask);
+ return 0;
+}
- /* Rewrite all instructions based on the translation table we built */
- for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
- inst != &s->C->Program.Instructions;
- inst = inst->Next) {
- rc_remap_registers(inst, &remap_register, s);
+static unsigned int overlap_live_intervals_array(
+ struct live_intervals * a,
+ struct live_intervals * b)
+{
+ unsigned int a_chan, b_chan;
+ for (a_chan = 0; a_chan < 4; a_chan++) {
+ for (b_chan = 0; b_chan < 4; b_chan++) {
+ if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
+ return 1;
+ }
+ }
}
+ return 0;
}
-static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
+static unsigned int reg_get_index(int reg)
{
- struct regalloc_state * s = data;
+ return reg / RC_MASK_XYZW;
+}
- if (!s->Input[input].Used)
- return;
+static unsigned int reg_get_writemask(int reg)
+{
+ return (reg % RC_MASK_XYZW) + 1;
+}
- add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
+static int get_reg_id(unsigned int index, unsigned int writemask)
+{
+ assert(writemask);
+ if (writemask == 0) {
+ return 0;
+ }
+ return (index * RC_MASK_XYZW) + (writemask - 1);
+}
- s->Input[input].Allocated = 1;
- s->Input[input].File = RC_FILE_TEMPORARY;
- s->Input[input].Index = hwreg;
+#if VERBOSE
+static void print_reg(int reg)
+{
+ unsigned int index = reg_get_index(reg);
+ unsigned int mask = reg_get_writemask(reg);
+ fprintf(stderr, "Temp[%u].%c%c%c%c", index,
+ mask & RC_MASK_X ? 'x' : '_',
+ mask & RC_MASK_Y ? 'y' : '_',
+ mask & RC_MASK_Z ? 'z' : '_',
+ mask & RC_MASK_W ? 'w' : '_');
+}
+#endif
+static void add_register_conflicts(
+ struct ra_regs * regs,
+ unsigned int max_temp_regs)
+{
+ unsigned int index, a_mask, b_mask;
+ for (index = 0; index < max_temp_regs; index++) {
+ for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
+ for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
+ b_mask++) {
+ if (a_mask & b_mask) {
+ ra_add_reg_conflict(regs,
+ get_reg_id(index, a_mask),
+ get_reg_id(index, b_mask));
+ }
+ }
+ }
+ }
}
-void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
+static void do_advanced_regalloc(struct regalloc_state * s)
{
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
- struct regalloc_state s;
+ struct rc_class rc_class_list [] = {
+ {RC_REG_CLASS_SINGLE, 3, 0, 0,
+ {RC_MASK_X,
+ RC_MASK_Y,
+ RC_MASK_Z}},
+ {RC_REG_CLASS_DOUBLE, 3, 0, 0,
+ {RC_MASK_X | RC_MASK_Y,
+ RC_MASK_X | RC_MASK_Z,
+ RC_MASK_Y | RC_MASK_Z}},
+ {RC_REG_CLASS_TRIPLE, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_ALPHA, 1, 0, 0,
+ {RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
+ {RC_MASK_X | RC_MASK_W,
+ RC_MASK_Y | RC_MASK_W,
+ RC_MASK_Z | RC_MASK_W}},
+ {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+ RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
+ {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_X, 1, 0, 0,
+ {RC_MASK_X,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_Y, 1, 0, 0,
+ {RC_MASK_Y,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_Z, 1, 0, 0,
+ {RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XY, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_YZ, 1, 0, 0,
+ {RC_MASK_Y | RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XZ, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XW, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_YW, 1, 0, 0,
+ {RC_MASK_Y | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_ZW, 1, 0, 0,
+ {RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XYW, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_YZW, 1, 0, 0,
+ {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XZW, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}}
+ };
+
+ unsigned int i, j, index, input_node, node_count, node_index;
+ unsigned int * node_classes;
+ unsigned int * input_classes;
+ struct rc_instruction * inst;
+ struct rc_list * var_ptr;
+ struct rc_list * variables;
+ struct ra_regs * regs;
+ struct ra_graph * graph;
+
+ /* Allocate the main ra data structure */
+ regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW);
+
+ /* Get list of program variables */
+ variables = rc_get_variables(s->C);
+ node_count = rc_list_count(variables);
+ node_classes = memory_pool_malloc(&s->C->Pool,
+ node_count * sizeof(unsigned int));
+ input_classes = memory_pool_malloc(&s->C->Pool,
+ s->NumInputs * sizeof(unsigned int));
+
+ for (var_ptr = variables, node_index = 0; var_ptr;
+ var_ptr = var_ptr->Next, node_index++) {
+ unsigned int class_index;
+ /* Compute the live intervals */
+ rc_variable_compute_live_intervals(var_ptr->Item);
+
+ class_index = variable_get_class(var_ptr->Item, rc_class_list);
+
+ /* If we haven't used this register class yet, mark it
+ * as used and allocate space for it. */
+ if (!rc_class_list[class_index].Used) {
+ rc_class_list[class_index].Used = 1;
+ rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
+ }
- compute_live_intervals(cc, &s);
+ node_classes[node_index] = rc_class_list[class_index].Id;
+ }
- c->AllocateHwInputs(c, &alloc_input, &s);
- do_regalloc(&s);
-}
+ /* Assign registers to the classes */
+ for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+ struct rc_class class = rc_class_list[i];
+ if (!class.Used) {
+ continue;
+ }
-/* This functions offsets the temporary register indices by the number
- * of input registers, because input registers are actually temporaries and
- * should not occupy the same space.
- *
- * This pass is supposed to be used to maintain correct allocation of inputs
- * if the standard register allocation is disabled. */
-void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user)
-{
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
- struct regalloc_state s;
- int temp_reg_offset;
+ for (index = 0; index < s->C->max_temp_regs; index++) {
+ for (j = 0; j < class.WritemaskCount; j++) {
+ int reg_id = get_reg_id(index,
+ class.Writemasks[j]);
+ ra_class_add_reg(regs, class.Id, reg_id);
+ }
+ }
+ }
+
+ /* Add register conflicts */
+ add_register_conflicts(regs, s->C->max_temp_regs);
+
+ /* Calculate live intervals for input registers */
+ for (inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads_mask(inst, scan_read_callback, s);
+ }
+
+ /* Create classes for input registers */
+ for (i = 0; i < s->NumInputs; i++) {
+ unsigned int chan, class_id, writemask = 0;
+ for (chan = 0; chan < 4; chan++) {
+ if (s->Input[i].Live[chan].Used) {
+ writemask |= (1 << chan);
+ }
+ }
+ s->Input[i].Writemask = writemask;
+ if (!writemask) {
+ continue;
+ }
+
+ class_id = ra_alloc_reg_class(regs);
+ input_classes[i] = class_id;
+ ra_class_add_reg(regs, class_id,
+ get_reg_id(s->Input[i].Index, writemask));
+ }
+
+ ra_set_finalize(regs);
+
+ graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
- compute_live_intervals(cc, &s);
+ /* Build the interference graph */
+ for (var_ptr = variables, node_index = 0; var_ptr;
+ var_ptr = var_ptr->Next,node_index++) {
+ struct rc_list * a, * b;
+ unsigned int b_index;
- c->AllocateHwInputs(c, &alloc_input, &s);
+ ra_set_node_class(graph, node_index, node_classes[node_index]);
- temp_reg_offset = 0;
- for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
- if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index)
- temp_reg_offset = s.Input[i].Index + 1;
+ for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
+ b; b = b->Next, b_index++) {
+ struct rc_variable * var_a = a->Item;
+ while (var_a) {
+ struct rc_variable * var_b = b->Item;
+ while (var_b) {
+ if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
+ ra_add_node_interference(graph,
+ node_index, b_index);
+ }
+ var_b = var_b->Friend;
+ }
+ var_a = var_a->Friend;
+ }
+ }
}
- if (temp_reg_offset) {
- for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
- if (s.Temporary[i].Used) {
- s.Temporary[i].Allocated = 1;
- s.Temporary[i].File = RC_FILE_TEMPORARY;
- s.Temporary[i].Index = i + temp_reg_offset;
+ /* Add input registers to the interference graph */
+ for (i = 0, input_node = 0; i< s->NumInputs; i++) {
+ if (!s->Input[i].Writemask) {
+ continue;
+ }
+ ra_set_node_class(graph, node_count + input_node,
+ input_classes[i]);
+ for (var_ptr = variables, node_index = 0;
+ var_ptr; var_ptr = var_ptr->Next, node_index++) {
+ struct rc_variable * var = var_ptr->Item;
+ if (overlap_live_intervals_array(s->Input[i].Live,
+ var->Live)) {
+ ra_add_node_interference(graph, node_index,
+ node_count + input_node);
}
}
+ /* Manually allocate a register for this input */
+ ra_set_node_reg(graph, node_count + input_node, get_reg_id(
+ s->Input[i].Index, s->Input[i].Writemask));
+ input_node++;
+ }
+
+ if (!ra_allocate_no_spills(graph)) {
+ rc_error(s->C, "Ran out of hardware temporaries\n");
+ return;
+ }
+
+ /* Rewrite the registers */
+ for (var_ptr = variables, node_index = 0; var_ptr;
+ var_ptr = var_ptr->Next, node_index++) {
+ int reg = ra_get_node_reg(graph, node_index);
+ unsigned int writemask = reg_get_writemask(reg);
+ unsigned int index = reg_get_index(reg);
+ struct rc_variable * var = var_ptr->Item;
+
+ if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ writemask = rc_variable_writemask_sum(var);
+ }
+
+ if (var->Dst.File == RC_FILE_INPUT) {
+ continue;
+ }
+ rc_variable_change_dst(var, index, writemask);
+ }
+
+ ralloc_free(graph);
+ ralloc_free(regs);
+}
+
+/**
+ * @param user This parameter should be a pointer to an integer value. If this
+ * integer value is zero, then a simple register allocator will be used that
+ * only allocates space for input registers (\sa do_regalloc_inputs_only). If
+ * user is non-zero, then the regular register allocator will be used
+ * (\sa do_regalloc).
+ */
+void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
+{
+ struct r300_fragment_program_compiler *c =
+ (struct r300_fragment_program_compiler*)cc;
+ struct regalloc_state s;
+ int do_full_regalloc = (int)user;
+ struct rc_instruction * inst;
+
+ memset(&s, 0, sizeof(s));
+ s.C = cc;
+ s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
+ s.Input = memory_pool_malloc(&cc->Pool,
+ s.NumInputs * sizeof(struct register_info));
+ memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
+
+ s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
+ s.Temporary = memory_pool_malloc(&cc->Pool,
+ s.NumTemporaries * sizeof(struct register_info));
+ memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
+
+ for(inst = cc->Program.Instructions.Next;
+ inst != &cc->Program.Instructions;
+ inst = inst->Next) {
- /* Rewrite all registers. */
- for (struct rc_instruction *inst = cc->Program.Instructions.Next;
- inst != &cc->Program.Instructions;
- inst = inst->Next) {
- rc_remap_registers(inst, &remap_register, &s);
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ s.HasLoop = 1;
+ break;
}
}
+
+ rc_recompute_ips(s.C);
+
+ c->AllocateHwInputs(c, &alloc_input_simple, &s);
+ if (!s.HasLoop && do_full_regalloc) {
+ do_advanced_regalloc(&s);
+ } else {
+ s.Simple = 1;
+ do_regalloc_inputs_only(&s);
+ }
+
+ /* Rewrite inputs and if we are doing the simple allocation, rewrite
+ * temporaries too. */
+ for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
+ inst != &s.C->Program.Instructions;
+ inst = inst->Next) {
+ rc_remap_registers(inst, &remap_register, &s);
+ }
}