summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robdclark@chromium.org>2019-04-29 13:12:31 -0700
committerRob Clark <robdclark@chromium.org>2019-04-30 10:39:24 -0700
commitec6c2297634eba77248a929048cf4201887a5f0a (patch)
treee433954d948445d306c58a90b4af967fc69c4ab9
parentce57f4f7c4d672a88527d0d346e27b902cfc3c6a (diff)
freedreno/ir3: fixes for half reg in/out
Needs to update max_half_reg, or be remapped to full reg and update max_reg accordingly, depending on generation.. Signed-off-by: Rob Clark <robdclark@chromium.org>
-rw-r--r--src/freedreno/ir3/ir3.c14
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c8
-rw-r--r--src/freedreno/ir3/ir3_shader.c28
-rw-r--r--src/freedreno/ir3/ir3_shader.h2
4 files changed, 39 insertions, 13 deletions
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 55e03d86af1..97f4ae96cd9 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -104,28 +104,28 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
if (reg->flags & IR3_REG_RELATIV) {
components = reg->size;
val.idummy10 = reg->array.offset;
- max = (reg->array.offset + repeat + components - 1) >> 2;
+ max = (reg->array.offset + repeat + components - 1);
} else {
components = util_last_bit(reg->wrmask);
val.comp = reg->num & 0x3;
val.num = reg->num >> 2;
- max = (reg->num + repeat + components - 1) >> 2;
+ max = (reg->num + repeat + components - 1);
}
if (reg->flags & IR3_REG_CONST) {
- info->max_const = MAX2(info->max_const, max);
+ info->max_const = MAX2(info->max_const, max >> 2);
} else if (val.num == 63) {
/* ignore writes to dummy register r63.x */
- } else if (max < 48) {
+ } else if (max < regid(48, 0)) {
if (reg->flags & IR3_REG_HALF) {
if (info->gpu_id >= 600) {
/* starting w/ a6xx, half regs conflict with full regs: */
- info->max_reg = MAX2(info->max_reg, (max+1)/2);
+ info->max_reg = MAX2(info->max_reg, max >> 3);
} else {
- info->max_half_reg = MAX2(info->max_half_reg, max);
+ info->max_half_reg = MAX2(info->max_half_reg, max >> 2);
}
} else {
- info->max_reg = MAX2(info->max_reg, max);
+ info->max_reg = MAX2(info->max_reg, max >> 2);
}
}
}
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 4e139dc136e..3c813c73ae0 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2954,6 +2954,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
struct ir3_instruction *instr = ir->outputs[(i*4) + j];
if (instr) {
so->outputs[i].regid = instr->regs[0]->num;
+ so->outputs[i].half = !!(instr->regs[0]->flags & IR3_REG_HALF);
break;
}
}
@@ -2962,14 +2963,21 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
/* Note that some or all channels of an input may be unused: */
for (i = 0; i < so->inputs_count; i++) {
unsigned j, reg = regid(63,0);
+ bool half = false;
for (j = 0; j < 4; j++) {
struct ir3_instruction *in = inputs[(i*4) + j];
if (in && !(in->flags & IR3_INSTR_UNUSED)) {
reg = in->regs[0]->num - j;
+ if (half) {
+ compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF);
+ } else {
+ half = !!(in->regs[0]->flags & IR3_REG_HALF);
+ }
}
}
so->inputs[i].regid = reg;
+ so->inputs[i].half = half;
}
if (ctx->astc_srgb)
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index 3f8e8abdc08..46eba2a0c5e 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -63,7 +63,7 @@ delete_variant(struct ir3_shader_variant *v)
* the reg off.
*/
static void
-fixup_regfootprint(struct ir3_shader_variant *v)
+fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id)
{
unsigned i;
@@ -83,14 +83,30 @@ fixup_regfootprint(struct ir3_shader_variant *v)
if (v->inputs[i].compmask) {
unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
- int32_t regid = (v->inputs[i].regid + n) >> 2;
- v->info.max_reg = MAX2(v->info.max_reg, regid);
+ int32_t regid = v->inputs[i].regid + n;
+ if (v->inputs[i].half) {
+ if (gpu_id < 500) {
+ v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
+ } else {
+ v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
+ }
+ } else {
+ v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
+ }
}
}
for (i = 0; i < v->outputs_count; i++) {
- int32_t regid = (v->outputs[i].regid + 3) >> 2;
- v->info.max_reg = MAX2(v->info.max_reg, regid);
+ int32_t regid = v->outputs[i].regid + 3;
+ if (v->outputs[i].half) {
+ if (gpu_id < 500) {
+ v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
+ } else {
+ v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
+ }
+ } else {
+ v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
+ }
}
}
@@ -117,7 +133,7 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id)
*/
v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1));
- fixup_regfootprint(v);
+ fixup_regfootprint(v, gpu_id);
return bin;
}
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 7f09ee5312f..4e8ab085d7e 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -390,6 +390,7 @@ struct ir3_shader_variant {
struct {
uint8_t slot;
uint8_t regid;
+ bool half : 1;
} outputs[16 + 2]; /* +POSITION +PSIZE */
bool writes_pos, writes_smask, writes_psize;
@@ -413,6 +414,7 @@ struct ir3_shader_variant {
/* fragment shader specific: */
bool bary : 1; /* fetched varying (vs one loaded into reg) */
bool rasterflat : 1; /* special handling for emit->rasterflat */
+ bool half : 1;
enum glsl_interp_mode interpolate;
} inputs[16 + 2]; /* +POSITION +FACE */