summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2015-08-21 10:57:24 -0700
committerEric Anholt <eric@anholt.net>2015-08-21 13:29:26 -0700
commiteb2776504ae32feaf41a5bad9f09f154045e96a3 (patch)
tree42aaf9f1fbb58807ccba1913f42bff009129765f
parent89b1b33f44bc6ce71109ac8668529c30b6d6d910 (diff)
vc4: Actually allow math results to allocate into r4.
I switched us to tracking whether the results *could* go to r4, but then didn't make a separate register class for the class bits that included r4. Switch the "any" class to actually be "any", and name the "any but r4" class more appropriately. total instructions in shared programs: 96798 -> 94680 (-2.19%) instructions in affected programs: 62736 -> 60618 (-3.38%)
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_register_allocate.c7
2 files changed, 7 insertions, 1 deletions
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index 654c46f3c0d..3a63af8f2b0 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -270,6 +270,7 @@ struct vc4_context {
struct ra_regs *regs;
unsigned int reg_class_any;
+ unsigned int reg_class_a_or_b_or_acc;
unsigned int reg_class_r4_or_a;
unsigned int reg_class_a;
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index 2ea88500227..3ced50f3a44 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -116,6 +116,7 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), true);
vc4->reg_class_any = ra_alloc_reg_class(vc4->regs);
+ vc4->reg_class_a_or_b_or_acc = ra_alloc_reg_class(vc4->regs);
vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs);
vc4->reg_class_a = ra_alloc_reg_class(vc4->regs);
for (uint32_t i = 0; i < ARRAY_SIZE(vc4_regs); i++) {
@@ -130,10 +131,12 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
*/
if (vc4_regs[i].mux == QPU_MUX_R4) {
ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i);
+ ra_class_add_reg(vc4->regs, vc4->reg_class_any, i);
continue;
}
ra_class_add_reg(vc4->regs, vc4->reg_class_any, i);
+ ra_class_add_reg(vc4->regs, vc4->reg_class_a_or_b_or_acc, i);
}
for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2) {
@@ -304,9 +307,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
switch (class_bits[i]) {
case CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4:
- case CLASS_BIT_A | CLASS_BIT_B_OR_ACC:
ra_set_node_class(g, node, vc4->reg_class_any);
break;
+ case CLASS_BIT_A | CLASS_BIT_B_OR_ACC:
+ ra_set_node_class(g, node, vc4->reg_class_a_or_b_or_acc);
+ break;
case CLASS_BIT_A | CLASS_BIT_R4:
ra_set_node_class(g, node, vc4->reg_class_r4_or_a);
break;