diff options
author | Eric Anholt <eric@anholt.net> | 2011-08-16 15:28:53 -0700 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2011-08-19 17:06:29 -0700 |
commit | f4db75547f38f08665efac3daf1599fdc5594bb7 (patch) | |
tree | c471ade95caca6592811fe24e7ea3cf2c20eaf59 /src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | |
parent | 8174945d3346dc049ae56dcb4bf1eab39f5c88aa (diff) |
i965/vs: Implement proper register allocation instead of 1:1 mapping.
Fixes vs-atan-* and several others. This is not the real solution we
eventually want, which will pack floats, vec2s, and vec3s into vec4
registers, but this code should provide the framework for that.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 139 |
1 files changed, 138 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 70395533119..3f052ff64cf 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -21,6 +21,11 @@ * IN THE SOFTWARE. */ +extern "C" { +#include "main/macros.h" +#include "program/register_allocate.h" +} /* extern "C" */ + #include "brw_vec4.h" #include "../glsl/ir_print_visitor.h" @@ -88,10 +93,142 @@ vec4_visitor::reg_allocate_trivial() } } +static void +brw_alloc_reg_set_for_classes(struct brw_context *brw, + int *class_sizes, + int class_count, + int base_reg_count) +{ + /* Compute the total number of registers across all classes. */ + int ra_reg_count = 0; + for (int i = 0; i < class_count; i++) { + ra_reg_count += base_reg_count - (class_sizes[i] - 1); + } + + ralloc_free(brw->vs.ra_reg_to_grf); + brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); + ralloc_free(brw->vs.regs); + brw->vs.regs = ra_alloc_reg_set(ra_reg_count); + ralloc_free(brw->vs.classes); + brw->vs.classes = ralloc_array(brw, int, class_count + 1); + + /* Now, add the registers to their classes, and add the conflicts + * between them and the base GRF registers (and also each other). + */ + int reg = 0; + for (int i = 0; i < class_count; i++) { + int class_reg_count = base_reg_count - (class_sizes[i] - 1); + brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs); + + for (int j = 0; j < class_reg_count; j++) { + ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg); + + brw->vs.ra_reg_to_grf[reg] = j; + + for (int base_reg = j; + base_reg < j + class_sizes[i]; + base_reg++) { + ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg); + } + + reg++; + } + } + assert(reg == ra_reg_count); + + ra_set_finalize(brw->vs.regs); +} + void vec4_visitor::reg_allocate() { - reg_allocate_trivial(); + int hw_reg_mapping[virtual_grf_count]; + int first_assigned_grf = this->first_non_payload_grf; + int base_reg_count = BRW_MAX_GRF - first_assigned_grf; + int class_sizes[base_reg_count]; + int class_count = 0; + + /* Using the trivial allocator can be useful in debugging undefined + * register access as a result of broken optimization passes. + */ + if (0) { + reg_allocate_trivial(); + return; + } + + calculate_live_intervals(); + + /* Set up the register classes. + * + * The base registers store a vec4. However, we'll need larger + * storage for arrays, structures, and matrices, which will be sets + * of contiguous registers. + */ + class_sizes[class_count++] = 1; + + for (int r = 0; r < virtual_grf_count; r++) { + int i; + + for (i = 0; i < class_count; i++) { + if (class_sizes[i] == this->virtual_grf_sizes[r]) + break; + } + if (i == class_count) { + if (this->virtual_grf_sizes[r] >= base_reg_count) { + fail("Object too large to register allocate.\n"); + } + + class_sizes[class_count++] = this->virtual_grf_sizes[r]; + } + } + + brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count); + + struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs, + virtual_grf_count); + + for (int i = 0; i < virtual_grf_count; i++) { + for (int c = 0; c < class_count; c++) { + if (class_sizes[c] == this->virtual_grf_sizes[i]) { + ra_set_node_class(g, i, brw->vs.classes[c]); + break; + } + } + + for (int j = 0; j < i; j++) { + if (virtual_grf_interferes(i, j)) { + ra_add_node_interference(g, i, j); + } + } + } + + if (!ra_allocate_no_spills(g)) { + ralloc_free(g); + fail("No register spilling support yet\n"); + } + + /* Get the chosen virtual registers for each node, and map virtual + * regs in the register classes back down to real hardware reg + * numbers. + */ + prog_data->total_grf = first_assigned_grf; + for (int i = 0; i < virtual_grf_count; i++) { + int reg = ra_get_node_reg(g, i); + + hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg]; + prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1); + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + assign(hw_reg_mapping, &inst->dst); + assign(hw_reg_mapping, &inst->src[0]); + assign(hw_reg_mapping, &inst->src[1]); + assign(hw_reg_mapping, &inst->src[2]); + } + + ralloc_free(g); } } /* namespace brw */ |