summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robclark@freedesktop.org>2014-10-15 13:08:00 -0400
committerEmil Velikov <emil.l.velikov@gmail.com>2014-10-29 17:44:39 +0000
commit64373f072ca40bfa2e5ddd2461b9223c18bf49fd (patch)
tree1175107d519922b4755efd7ae069af6c2934a42f
parent9ea0efd1e2b9945cfb9b4e8076a3c39d18d7aa68 (diff)
freedreno/ir3: large const support
Signed-off-by: Rob Clark <robclark@freedesktop.org> (cherry picked from commit 652b8fbbbb0132c634c90e4d1fdbca9497b7cd94)
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c13
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c6
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler.c23
5 files changed, 33 insertions, 13 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 5c1ad21fe47..500b9ba9bf2 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -187,6 +187,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
enum a3xx_instrbuffermode fpbuffer, vpbuffer;
uint32_t fpbuffersz, vpbuffersz, fsoff;
uint32_t pos_regid, posz_regid, psize_regid, color_regid;
+ int constmode;
int i, j, k;
vp = fd3_emit_get_vp(emit);
@@ -242,6 +243,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
fsoff = 256 - fpbuffersz;
}
+ /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
+ constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
+
pos_regid = find_output_regid(vp,
ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
posz_regid = find_output_regid(fp,
@@ -257,6 +261,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+ A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
/* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
* flush some caches? I think we only need to set those
* bits if we have updated const or shader..
@@ -276,7 +281,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
- OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
+ OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
COND(emit->key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
A3XX_SP_SP_CTRL_REG_L0MODE(0));
@@ -382,11 +387,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) |
A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
- /* NOTE: I believe VS.CONSTLEN should be <= FS.CONSTOBJOFFSET*/
- debug_assert(vp->constlen <= 128);
-
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
- OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
+ OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
+ MAX2(128, vp->constlen)) |
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
}
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 9569fdf85ef..7abc3fcb32d 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -350,7 +350,11 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_MAX_TEMPS:
return 64; /* Max native temporaries. */
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
- return ((screen->gpu_id >= 300) ? 1024 : 64) * sizeof(float[4]);
+ /* NOTE: seems to be limit for a3xx is actually 512 but
+ * split between VS and FS. Use lower limit of 256 to
+ * avoid getting into impossible situations:
+ */
+ return ((screen->gpu_id >= 300) ? 256 : 64) * sizeof(float[4]);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return 1;
case PIPE_SHADER_CAP_MAX_PREDS:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index ea2a9251b28..9f1c2436450 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -104,7 +104,7 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
val.iim_val = reg->iim_val;
} else {
int8_t components = util_last_bit(reg->wrmask);
- int8_t max = (reg->num + repeat + components - 1) >> 2;
+ int16_t max = (reg->num + repeat + components - 1) >> 2;
val.comp = reg->num & 0x3;
val.num = reg->num >> 2;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index b92a57a43f8..d2d3dcaadb9 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -47,7 +47,7 @@ struct ir3_info {
*/
int8_t max_reg; /* highest GPR # used by shader */
int8_t max_half_reg;
- int8_t max_const;
+ int16_t max_const;
};
struct ir3_register {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index 80676830dd7..a714f665f8e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -600,11 +600,6 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
struct ir3_register *reg;
struct ir3_instruction *orig = NULL;
- /* TODO we need to use a mov to temp for const >= 64.. or maybe
- * we could use relative addressing..
- */
- compile_assert(ctx, src->Index < 64);
-
switch (src->File) {
case TGSI_FILE_IMMEDIATE:
/* TODO if possible, use actual immediate instead of const.. but
@@ -632,6 +627,24 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
break;
}
+ /* We seem to have 8 bits (6.2) for dst register always, so I think
+ * it is safe to assume GPR cannot be >=64
+ *
+ * cat3 instructions only have 8 bits for src2, but cannot take a
+ * const for src2
+ *
+ * cat5 and cat6 in some cases only has 8 bits, but cannot take a
+ * const for any src.
+ *
+ * Other than that we seem to have 12 bits to encode const src,
+ * except for cat1 which may only have 11 bits (but that seems like
+ * a bug)
+ */
+ if (flags & IR3_REG_CONST)
+ compile_assert(ctx, src->Index < (1 << 9));
+ else
+ compile_assert(ctx, src->Index < (1 << 6));
+
if (src->Absolute)
flags |= IR3_REG_ABS;
if (src->Negate)