summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2011-08-16 15:28:53 -0700
committerEric Anholt <eric@anholt.net>2011-08-19 17:06:29 -0700
commitf4db75547f38f08665efac3daf1599fdc5594bb7 (patch)
treec471ade95caca6592811fe24e7ea3cf2c20eaf59 /src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
parent8174945d3346dc049ae56dcb4bf1eab39f5c88aa (diff)
i965/vs: Implement proper register allocation instead of 1:1 mapping.
Fixes vs-atan-* and several others. This is not the real solution we eventually want, which will pack floats, vec2s, and vec3s into vec4 registers, but this code should provide the framework for that.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp139
1 files changed, 138 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 70395533119..3f052ff64cf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -21,6 +21,11 @@
* IN THE SOFTWARE.
*/
+extern "C" {
+#include "main/macros.h"
+#include "program/register_allocate.h"
+} /* extern "C" */
+
#include "brw_vec4.h"
#include "../glsl/ir_print_visitor.h"
@@ -88,10 +93,142 @@ vec4_visitor::reg_allocate_trivial()
}
}
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+ int *class_sizes,
+ int class_count,
+ int base_reg_count)
+{
+ /* Compute the total number of registers across all classes. */
+ int ra_reg_count = 0;
+ for (int i = 0; i < class_count; i++) {
+ ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+ }
+
+ ralloc_free(brw->vs.ra_reg_to_grf);
+ brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+ ralloc_free(brw->vs.regs);
+ brw->vs.regs = ra_alloc_reg_set(ra_reg_count);
+ ralloc_free(brw->vs.classes);
+ brw->vs.classes = ralloc_array(brw, int, class_count + 1);
+
+ /* Now, add the registers to their classes, and add the conflicts
+ * between them and the base GRF registers (and also each other).
+ */
+ int reg = 0;
+ for (int i = 0; i < class_count; i++) {
+ int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+ brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
+
+ for (int j = 0; j < class_reg_count; j++) {
+ ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
+
+ brw->vs.ra_reg_to_grf[reg] = j;
+
+ for (int base_reg = j;
+ base_reg < j + class_sizes[i];
+ base_reg++) {
+ ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
+ }
+
+ reg++;
+ }
+ }
+ assert(reg == ra_reg_count);
+
+ ra_set_finalize(brw->vs.regs);
+}
+
void
vec4_visitor::reg_allocate()
{
- reg_allocate_trivial();
+ int hw_reg_mapping[virtual_grf_count];
+ int first_assigned_grf = this->first_non_payload_grf;
+ int base_reg_count = BRW_MAX_GRF - first_assigned_grf;
+ int class_sizes[base_reg_count];
+ int class_count = 0;
+
+ /* Using the trivial allocator can be useful in debugging undefined
+ * register access as a result of broken optimization passes.
+ */
+ if (0) {
+ reg_allocate_trivial();
+ return;
+ }
+
+ calculate_live_intervals();
+
+ /* Set up the register classes.
+ *
+ * The base registers store a vec4. However, we'll need larger
+ * storage for arrays, structures, and matrices, which will be sets
+ * of contiguous registers.
+ */
+ class_sizes[class_count++] = 1;
+
+ for (int r = 0; r < virtual_grf_count; r++) {
+ int i;
+
+ for (i = 0; i < class_count; i++) {
+ if (class_sizes[i] == this->virtual_grf_sizes[r])
+ break;
+ }
+ if (i == class_count) {
+ if (this->virtual_grf_sizes[r] >= base_reg_count) {
+ fail("Object too large to register allocate.\n");
+ }
+
+ class_sizes[class_count++] = this->virtual_grf_sizes[r];
+ }
+ }
+
+ brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
+
+ struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
+ virtual_grf_count);
+
+ for (int i = 0; i < virtual_grf_count; i++) {
+ for (int c = 0; c < class_count; c++) {
+ if (class_sizes[c] == this->virtual_grf_sizes[i]) {
+ ra_set_node_class(g, i, brw->vs.classes[c]);
+ break;
+ }
+ }
+
+ for (int j = 0; j < i; j++) {
+ if (virtual_grf_interferes(i, j)) {
+ ra_add_node_interference(g, i, j);
+ }
+ }
+ }
+
+ if (!ra_allocate_no_spills(g)) {
+ ralloc_free(g);
+ fail("No register spilling support yet\n");
+ }
+
+ /* Get the chosen virtual registers for each node, and map virtual
+ * regs in the register classes back down to real hardware reg
+ * numbers.
+ */
+ prog_data->total_grf = first_assigned_grf;
+ for (int i = 0; i < virtual_grf_count; i++) {
+ int reg = ra_get_node_reg(g, i);
+
+ hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
+ prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1);
+ }
+
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ assign(hw_reg_mapping, &inst->dst);
+ assign(hw_reg_mapping, &inst->src[0]);
+ assign(hw_reg_mapping, &inst->src[1]);
+ assign(hw_reg_mapping, &inst->src[2]);
+ }
+
+ ralloc_free(g);
}
} /* namespace brw */