From 9402d5a6b5755035cd28f2a1754ef50fadca3007 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Fri, 26 Mar 2021 16:39:12 +0200 Subject: ir3: make possible to specify branchstack up to 64 On a6xx/a5xx there is such dependency between branchstack bitfield and the amount of nested ifs, which could be seen with blob: IFs BRANCHSTACK 0 0 1 1 2 2 3 2 4 3 5 3 6 4 ... 59 30 60 31 61 31 62 32 63 32 64 32 Remove open-coded branchstack for a5xx compute along the way. Fixes tests: dEQP-VK.spirv_assembly.instruction.compute.float16.opvectorshuffle.344 dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_vert dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.444_geom dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.244_tessc dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_frag Signed-off-by: Danylo Piliaiev Part-of: --- src/gallium/drivers/freedreno/a5xx/fd5_program.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers/freedreno/a5xx/fd5_program.c') diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 06b41bae097..072d432db6b 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -374,12 +374,13 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, 0x00000000); /* HLSQ_CS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1); - OUT_RING(ring, - A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | - A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | - 0x6 | /* XXX seems to be always set? */ - A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) | - COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + OUT_RING( + ring, + A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | + A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | + 0x6 | /* XXX seems to be always set? */ + A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[VS].v)) | + COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); /* If we have streamout, link against the real FS in the binning program, * rather than the dummy FS used for binning pass state, to ensure the @@ -529,7 +530,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | - A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) | + A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[FS].v)) | COND(s[FS].v->need_pixlod, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); -- cgit v1.2.3