diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c')
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c | 742 |
1 files changed, 522 insertions, 220 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index d53181e1f75..52c0216b64b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2011 Tom Stellard <tstellar@gmail.com> * * All Rights Reserved. * @@ -29,125 +30,125 @@ #include <stdio.h> +#include "main/glheader.h" +#include "program/register_allocate.h" +#include "ralloc.h" + #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" - +#include "radeon_list.h" +#include "radeon_variable.h" #define VERBOSE 0 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) -struct live_intervals { - int Start; - int End; - struct live_intervals * Next; -}; struct register_info { - struct live_intervals Live; + struct live_intervals Live[4]; unsigned int Used:1; unsigned int Allocated:1; unsigned int File:3; unsigned int Index:RC_REGISTER_INDEX_BITS; -}; - -struct hardware_register { - struct live_intervals * Used; + unsigned int Writemask; }; struct regalloc_state { struct radeon_compiler * C; - struct register_info Input[RC_REGISTER_MAX_INDEX]; - struct register_info Temporary[RC_REGISTER_MAX_INDEX]; - - struct hardware_register * HwTemporary; - unsigned int NumHwTemporaries; - /** - * If an instruction is inside of a loop, EndLoop will be the - * IP of the ENDLOOP instruction, and BeginLoop will be the IP - * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop - * will be -1. - */ - int EndLoop; - int BeginLoop; + struct register_info * Input; + unsigned int NumInputs; + + struct register_info * Temporary; + unsigned int NumTemporaries; + + unsigned int Simple; + unsigned int HasLoop; +}; + +enum rc_reg_class { + RC_REG_CLASS_SINGLE, + RC_REG_CLASS_DOUBLE, + RC_REG_CLASS_TRIPLE, + RC_REG_CLASS_ALPHA, + RC_REG_CLASS_SINGLE_PLUS_ALPHA, + RC_REG_CLASS_DOUBLE_PLUS_ALPHA, + RC_REG_CLASS_TRIPLE_PLUS_ALPHA, + RC_REG_CLASS_X, + RC_REG_CLASS_Y, + RC_REG_CLASS_Z, + RC_REG_CLASS_XY, + RC_REG_CLASS_YZ, + RC_REG_CLASS_XZ, + RC_REG_CLASS_XW, + RC_REG_CLASS_YW, + RC_REG_CLASS_ZW, + RC_REG_CLASS_XYW, + RC_REG_CLASS_YZW, + RC_REG_CLASS_XZW, + RC_REG_CLASS_COUNT +}; + +struct rc_class { + enum rc_reg_class Class; + + unsigned int WritemaskCount; + + /** This is 1 if this class is being used by the register allocator + * and 0 otherwise */ + unsigned int Used; + + /** This is the ID number assigned to this class by ra. */ + unsigned int Id; + + /** List of writemasks that belong to this class */ + unsigned int Writemasks[3]; + + }; static void print_live_intervals(struct live_intervals * src) { - if (!src) { + if (!src || !src->Used) { DBG("(null)"); return; } - while(src) { - DBG("(%i,%i)", src->Start, src->End); - src = src->Next; - } + DBG("(%i,%i)", src->Start, src->End); } -static void add_live_intervals(struct regalloc_state * s, - struct live_intervals ** dst, struct live_intervals * src) +static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) { - struct live_intervals ** dst_backup = dst; - if (VERBOSE) { - DBG("add_live_intervals: "); - print_live_intervals(*dst); + DBG("overlap_live_intervals: "); + print_live_intervals(a); DBG(" to "); - print_live_intervals(src); + print_live_intervals(b); DBG("\n"); } - while(src) { - if (*dst && (*dst)->End < src->Start) { - dst = &(*dst)->Next; - } else if (!*dst || (*dst)->Start > src->End) { - struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li)); - li->Start = src->Start; - li->End = src->End; - li->Next = *dst; - *dst = li; - src = src->Next; - } else { - if (src->End > (*dst)->End) - (*dst)->End = src->End; - if (src->Start < (*dst)->Start) - (*dst)->Start = src->Start; - src = src->Next; - } - } - - if (VERBOSE) { - DBG(" result: "); - print_live_intervals(*dst_backup); - DBG("\n"); - } -} - -static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src) -{ - if (VERBOSE) { - DBG("overlap_live_intervals: "); - print_live_intervals(dst); - DBG(" to "); - print_live_intervals(src); - DBG("\n"); + if (!a->Used || !b->Used) { + DBG(" unused interval\n"); + return 0; } - while(src && dst) { - if (dst->End <= src->Start) { - dst = dst->Next; - } else if (dst->End <= src->End) { + if (a->Start > b->Start) { + if (a->Start < b->End) { DBG(" overlap\n"); return 1; - } else if (dst->Start < src->End) { + } + } else if (b->Start > a->Start) { + if (b->Start < a->End) { + DBG(" overlap\n"); + return 1; + } + } else { /* a->Start == b->Start */ + if (a->Start != a->End && b->Start != b->End) { DBG(" overlap\n"); return 1; - } else { - src = src->Next; } } @@ -156,92 +157,26 @@ static int overlap_live_intervals(struct live_intervals * dst, struct live_inter return 0; } -static int try_add_live_intervals(struct regalloc_state * s, - struct live_intervals ** dst, struct live_intervals * src) -{ - if (overlap_live_intervals(*dst, src)) - return 0; - - add_live_intervals(s, dst, src); - return 1; -} - -static void scan_callback(void * data, struct rc_instruction * inst, +static void scan_read_callback(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { struct regalloc_state * s = data; struct register_info * reg; + unsigned int i; - if (file == RC_FILE_TEMPORARY) - reg = &s->Temporary[index]; - else if (file == RC_FILE_INPUT) - reg = &s->Input[index]; - else + if (file != RC_FILE_INPUT) return; - if (!reg->Used) { - reg->Used = 1; - if (file == RC_FILE_INPUT) - reg->Live.Start = -1; - else if (s->BeginLoop >= 0) - reg->Live.Start = s->BeginLoop; - else - reg->Live.Start = inst->IP; - reg->Live.End = inst->IP; - } else if (s->EndLoop >= 0) - reg->Live.End = s->EndLoop; - else if (inst->IP > reg->Live.End) - reg->Live.End = inst->IP; -} - -static void compute_live_intervals(struct radeon_compiler *c, - struct regalloc_state *s) -{ - memset(s, 0, sizeof(*s)); - s->C = c; - s->NumHwTemporaries = c->max_temp_regs; - s->BeginLoop = -1; - s->EndLoop = -1; - s->HwTemporary = - memory_pool_malloc(&c->Pool, - s->NumHwTemporaries * sizeof(struct hardware_register)); - memset(s->HwTemporary, 0, s->NumHwTemporaries * sizeof(struct hardware_register)); - - rc_recompute_ips(s->C); - - for(struct rc_instruction * inst = s->C->Program.Instructions.Next; - inst != &s->C->Program.Instructions; - inst = inst->Next) { - - /* For all instructions inside of a loop, the ENDLOOP - * instruction is used as the end of the live interval and - * the BGNLOOP instruction is used as the beginning. */ - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) { - int loops = 1; - struct rc_instruction * tmp; - s->BeginLoop = inst->IP; - for(tmp = inst->Next; - tmp != &s->C->Program.Instructions; - tmp = tmp->Next) { - if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) { - loops++; - } else if (tmp->U.I.Opcode - == RC_OPCODE_ENDLOOP) { - if(!--loops) { - s->EndLoop = tmp->IP; - break; - } - } - } - } + s->Input[index].Used = 1; + reg = &s->Input[index]; - if (inst->IP == s->EndLoop) { - s->EndLoop = -1; - s->BeginLoop = -1; + for (i = 0; i < 4; i++) { + if (!((mask >> i) & 0x1)) { + continue; } - - rc_for_all_reads_mask(inst, scan_callback, s); - rc_for_all_writes_mask(inst, scan_callback, s); + reg->Live[i].Used = 1; + reg->Live[i].Start = 0; + reg->Live[i].End = inst->IP; } } @@ -251,7 +186,7 @@ static void remap_register(void * data, struct rc_instruction * inst, struct regalloc_state * s = data; const struct register_info * reg; - if (*file == RC_FILE_TEMPORARY) + if (*file == RC_FILE_TEMPORARY && s->Simple) reg = &s->Temporary[*index]; else if (*file == RC_FILE_INPUT) reg = &s->Input[*index]; @@ -259,106 +194,473 @@ static void remap_register(void * data, struct rc_instruction * inst, return; if (reg->Allocated) { - *file = reg->File; *index = reg->Index; } } -static void do_regalloc(struct regalloc_state * s) +static void alloc_input_simple(void * data, unsigned int input, + unsigned int hwreg) { - /* Simple and stupid greedy register allocation */ - for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { - struct register_info * reg = &s->Temporary[index]; + struct regalloc_state * s = data; - if (!reg->Used) - continue; + if (input >= s->NumInputs) + return; + + s->Input[input].Allocated = 1; + s->Input[input].File = RC_FILE_TEMPORARY; + s->Input[input].Index = hwreg; +} - for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) { - if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, ®->Live)) { - reg->Allocated = 1; - reg->File = RC_FILE_TEMPORARY; - reg->Index = hwreg; - goto success; +/* This functions offsets the temporary register indices by the number + * of input registers, because input registers are actually temporaries and + * should not occupy the same space. + * + * This pass is supposed to be used to maintain correct allocation of inputs + * if the standard register allocation is disabled. */ +static void do_regalloc_inputs_only(struct regalloc_state * s) +{ + for (unsigned i = 0; i < s->NumTemporaries; i++) { + s->Temporary[i].Allocated = 1; + s->Temporary[i].File = RC_FILE_TEMPORARY; + s->Temporary[i].Index = i + s->NumInputs; + } +} + +static unsigned int is_derivative(rc_opcode op) +{ + return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); +} + +static enum rc_reg_class variable_get_class( + struct rc_variable * variable, + struct rc_class * classes) +{ + unsigned int i; + unsigned int can_change_writemask= 1; + unsigned int writemask = rc_variable_writemask_sum(variable); + struct rc_list * readers = rc_variable_readers_union(variable); + + if (!variable->C->is_r500) { + unsigned int mask_count = 0; + /* The assumption here is that if an instruction has type + * RC_INSTRUCTION_NORMAL then it is a TEX instruction. + * r300 and r400 can't swizzle the result of a TEX lookup. */ + if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = RC_MASK_XYZW; + } + for (i = 0; i < 4; i++) { + if (GET_BIT(writemask, i)) { + mask_count++; } } + /* XXX We should do swizzle packing for r300 and r400 here. + * We need to figure out how not to create non-native + * swizzles. */ + if (mask_count > 1) { + can_change_writemask = 0; + } + } - rc_error(s->C, "Ran out of hardware temporaries\n"); - return; - - success:; + if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { + /* DDX/DDY seem to always fail when their writemasks are + * changed.*/ + if (is_derivative(variable->Inst->U.P.RGB.Opcode) + || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + } } + for ( ; readers; readers = readers->Next) { + struct rc_reader * r = readers->Item; + if (r->Inst->Type == RC_INSTRUCTION_PAIR) { + if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { + can_change_writemask = 0; + break; + } + /* DDX/DDY also fail when their swizzles are changed. */ + if (is_derivative(r->Inst->U.P.RGB.Opcode) + || is_derivative(r->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + break; + } + } + } + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + unsigned int j; + if (!can_change_writemask && classes[i].WritemaskCount > 1) { + continue; + } + for (j = 0; j < 3; j++) { + if (classes[i].Writemasks[j] == writemask) { + return classes[i].Class; + } + } + } + rc_error(variable->C, "Could not find class for index=%u mask=%u\n", + variable->Dst.Index, writemask); + return 0; +} - /* Rewrite all instructions based on the translation table we built */ - for(struct rc_instruction * inst = s->C->Program.Instructions.Next; - inst != &s->C->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, &remap_register, s); +static unsigned int overlap_live_intervals_array( + struct live_intervals * a, + struct live_intervals * b) +{ + unsigned int a_chan, b_chan; + for (a_chan = 0; a_chan < 4; a_chan++) { + for (b_chan = 0; b_chan < 4; b_chan++) { + if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { + return 1; + } + } } + return 0; } -static void alloc_input(void * data, unsigned int input, unsigned int hwreg) +static unsigned int reg_get_index(int reg) { - struct regalloc_state * s = data; + return reg / RC_MASK_XYZW; +} - if (!s->Input[input].Used) - return; +static unsigned int reg_get_writemask(int reg) +{ + return (reg % RC_MASK_XYZW) + 1; +} - add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live); +static int get_reg_id(unsigned int index, unsigned int writemask) +{ + assert(writemask); + if (writemask == 0) { + return 0; + } + return (index * RC_MASK_XYZW) + (writemask - 1); +} - s->Input[input].Allocated = 1; - s->Input[input].File = RC_FILE_TEMPORARY; - s->Input[input].Index = hwreg; +#if VERBOSE +static void print_reg(int reg) +{ + unsigned int index = reg_get_index(reg); + unsigned int mask = reg_get_writemask(reg); + fprintf(stderr, "Temp[%u].%c%c%c%c", index, + mask & RC_MASK_X ? 'x' : '_', + mask & RC_MASK_Y ? 'y' : '_', + mask & RC_MASK_Z ? 'z' : '_', + mask & RC_MASK_W ? 'w' : '_'); +} +#endif +static void add_register_conflicts( + struct ra_regs * regs, + unsigned int max_temp_regs) +{ + unsigned int index, a_mask, b_mask; + for (index = 0; index < max_temp_regs; index++) { + for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { + for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; + b_mask++) { + if (a_mask & b_mask) { + ra_add_reg_conflict(regs, + get_reg_id(index, a_mask), + get_reg_id(index, b_mask)); + } + } + } + } } -void rc_pair_regalloc(struct radeon_compiler *cc, void *user) +static void do_advanced_regalloc(struct regalloc_state * s) { - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; - struct regalloc_state s; + struct rc_class rc_class_list [] = { + {RC_REG_CLASS_SINGLE, 3, 0, 0, + {RC_MASK_X, + RC_MASK_Y, + RC_MASK_Z}}, + {RC_REG_CLASS_DOUBLE, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_X | RC_MASK_Z, + RC_MASK_Y | RC_MASK_Z}}, + {RC_REG_CLASS_TRIPLE, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ALPHA, 1, 0, 0, + {RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_Y | RC_MASK_W, + RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_X, 1, 0, 0, + {RC_MASK_X, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Y, 1, 0, 0, + {RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Z, 1, 0, 0, + {RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XY, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZ, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZ, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XW, 1, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ZW, 1, 0, 0, + {RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XYW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}} + }; + + unsigned int i, j, index, input_node, node_count, node_index; + unsigned int * node_classes; + unsigned int * input_classes; + struct rc_instruction * inst; + struct rc_list * var_ptr; + struct rc_list * variables; + struct ra_regs * regs; + struct ra_graph * graph; + + /* Allocate the main ra data structure */ + regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW); + + /* Get list of program variables */ + variables = rc_get_variables(s->C); + node_count = rc_list_count(variables); + node_classes = memory_pool_malloc(&s->C->Pool, + node_count * sizeof(unsigned int)); + input_classes = memory_pool_malloc(&s->C->Pool, + s->NumInputs * sizeof(unsigned int)); + + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + unsigned int class_index; + /* Compute the live intervals */ + rc_variable_compute_live_intervals(var_ptr->Item); + + class_index = variable_get_class(var_ptr->Item, rc_class_list); + + /* If we haven't used this register class yet, mark it + * as used and allocate space for it. */ + if (!rc_class_list[class_index].Used) { + rc_class_list[class_index].Used = 1; + rc_class_list[class_index].Id = ra_alloc_reg_class(regs); + } - compute_live_intervals(cc, &s); + node_classes[node_index] = rc_class_list[class_index].Id; + } - c->AllocateHwInputs(c, &alloc_input, &s); - do_regalloc(&s); -} + /* Assign registers to the classes */ + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + struct rc_class class = rc_class_list[i]; + if (!class.Used) { + continue; + } -/* This functions offsets the temporary register indices by the number - * of input registers, because input registers are actually temporaries and - * should not occupy the same space. - * - * This pass is supposed to be used to maintain correct allocation of inputs - * if the standard register allocation is disabled. */ -void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user) -{ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; - struct regalloc_state s; - int temp_reg_offset; + for (index = 0; index < s->C->max_temp_regs; index++) { + for (j = 0; j < class.WritemaskCount; j++) { + int reg_id = get_reg_id(index, + class.Writemasks[j]); + ra_class_add_reg(regs, class.Id, reg_id); + } + } + } + + /* Add register conflicts */ + add_register_conflicts(regs, s->C->max_temp_regs); + + /* Calculate live intervals for input registers */ + for (inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, scan_read_callback, s); + } + + /* Create classes for input registers */ + for (i = 0; i < s->NumInputs; i++) { + unsigned int chan, class_id, writemask = 0; + for (chan = 0; chan < 4; chan++) { + if (s->Input[i].Live[chan].Used) { + writemask |= (1 << chan); + } + } + s->Input[i].Writemask = writemask; + if (!writemask) { + continue; + } + + class_id = ra_alloc_reg_class(regs); + input_classes[i] = class_id; + ra_class_add_reg(regs, class_id, + get_reg_id(s->Input[i].Index, writemask)); + } + + ra_set_finalize(regs); + + graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs); - compute_live_intervals(cc, &s); + /* Build the interference graph */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next,node_index++) { + struct rc_list * a, * b; + unsigned int b_index; - c->AllocateHwInputs(c, &alloc_input, &s); + ra_set_node_class(graph, node_index, node_classes[node_index]); - temp_reg_offset = 0; - for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) { - if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index) - temp_reg_offset = s.Input[i].Index + 1; + for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; + b; b = b->Next, b_index++) { + struct rc_variable * var_a = a->Item; + while (var_a) { + struct rc_variable * var_b = b->Item; + while (var_b) { + if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { + ra_add_node_interference(graph, + node_index, b_index); + } + var_b = var_b->Friend; + } + var_a = var_a->Friend; + } + } } - if (temp_reg_offset) { - for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) { - if (s.Temporary[i].Used) { - s.Temporary[i].Allocated = 1; - s.Temporary[i].File = RC_FILE_TEMPORARY; - s.Temporary[i].Index = i + temp_reg_offset; + /* Add input registers to the interference graph */ + for (i = 0, input_node = 0; i< s->NumInputs; i++) { + if (!s->Input[i].Writemask) { + continue; + } + ra_set_node_class(graph, node_count + input_node, + input_classes[i]); + for (var_ptr = variables, node_index = 0; + var_ptr; var_ptr = var_ptr->Next, node_index++) { + struct rc_variable * var = var_ptr->Item; + if (overlap_live_intervals_array(s->Input[i].Live, + var->Live)) { + ra_add_node_interference(graph, node_index, + node_count + input_node); } } + /* Manually allocate a register for this input */ + ra_set_node_reg(graph, node_count + input_node, get_reg_id( + s->Input[i].Index, s->Input[i].Writemask)); + input_node++; + } + + if (!ra_allocate_no_spills(graph)) { + rc_error(s->C, "Ran out of hardware temporaries\n"); + return; + } + + /* Rewrite the registers */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + int reg = ra_get_node_reg(graph, node_index); + unsigned int writemask = reg_get_writemask(reg); + unsigned int index = reg_get_index(reg); + struct rc_variable * var = var_ptr->Item; + + if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = rc_variable_writemask_sum(var); + } + + if (var->Dst.File == RC_FILE_INPUT) { + continue; + } + rc_variable_change_dst(var, index, writemask); + } + + ralloc_free(graph); + ralloc_free(regs); +} + +/** + * @param user This parameter should be a pointer to an integer value. If this + * integer value is zero, then a simple register allocator will be used that + * only allocates space for input registers (\sa do_regalloc_inputs_only). If + * user is non-zero, then the regular register allocator will be used + * (\sa do_regalloc). + */ +void rc_pair_regalloc(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = + (struct r300_fragment_program_compiler*)cc; + struct regalloc_state s; + int do_full_regalloc = (int)user; + struct rc_instruction * inst; + + memset(&s, 0, sizeof(s)); + s.C = cc; + s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; + s.Input = memory_pool_malloc(&cc->Pool, + s.NumInputs * sizeof(struct register_info)); + memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); + + s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; + s.Temporary = memory_pool_malloc(&cc->Pool, + s.NumTemporaries * sizeof(struct register_info)); + memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); + + for(inst = cc->Program.Instructions.Next; + inst != &cc->Program.Instructions; + inst = inst->Next) { - /* Rewrite all registers. */ - for (struct rc_instruction *inst = cc->Program.Instructions.Next; - inst != &cc->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, &remap_register, &s); + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + s.HasLoop = 1; + break; } } + + rc_recompute_ips(s.C); + + c->AllocateHwInputs(c, &alloc_input_simple, &s); + if (!s.HasLoop && do_full_regalloc) { + do_advanced_regalloc(&s); + } else { + s.Simple = 1; + do_regalloc_inputs_only(&s); + } + + /* Rewrite inputs and if we are doing the simple allocation, rewrite + * temporaries too. */ + for (struct rc_instruction *inst = s.C->Program.Instructions.Next; + inst != &s.C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_register, &s); + } } |