summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorDanylo Piliaiev <dpiliaiev@igalia.com>2021-03-26 16:39:12 +0200
committerMarge Bot <eric+marge@anholt.net>2021-04-21 11:57:07 +0000
commit9402d5a6b5755035cd28f2a1754ef50fadca3007 (patch)
tree2522f3132aa3a7b9a929fb9ad3f9b0904a0bcc01 /src/gallium/drivers
parente7eed458697cf247b211172547fcbb08294b0bff (diff)
ir3: make possible to specify branchstack up to 64
On a6xx/a5xx there is such dependency between branchstack bitfield and the amount of nested ifs, which could be seen with blob: IFs BRANCHSTACK 0 0 1 1 2 2 3 2 4 3 5 3 6 4 ... 59 30 60 31 61 31 62 32 63 32 64 32 Remove open-coded branchstack for a5xx compute along the way. Fixes tests: dEQP-VK.spirv_assembly.instruction.compute.float16.opvectorshuffle.344 dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_vert dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.444_geom dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.244_tessc dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_frag Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9859>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_compute.c3
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_program.c15
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_compute.c2
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_program.c19
4 files changed, 20 insertions, 19 deletions
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
index 668da2e320c..c08c3b67723 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
@@ -59,8 +59,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
A5XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
- A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(
- 0x3) | // XXX need to figure this out somehow..
+ A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)) |
0x6 /* XXX */);
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
index 06b41bae097..072d432db6b 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
@@ -374,12 +374,13 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, 0x00000000); /* HLSQ_CS_INSTRLEN */
OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1);
- OUT_RING(ring,
- A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
- A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
- 0x6 | /* XXX seems to be always set? */
- A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
- COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+ OUT_RING(
+ ring,
+ A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
+ A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
+ 0x6 | /* XXX seems to be always set? */
+ A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[VS].v)) |
+ COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
/* If we have streamout, link against the real FS in the binning program,
* rather than the dummy FS used for binning pass state, to ensure the
@@ -529,7 +530,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
- A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
+ A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[FS].v)) |
COND(s[FS].v->need_pixlod, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
index 688902e13f0..2cfe891def8 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
@@ -68,7 +68,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack));
+ A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)));
uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index d1b4e313169..97a000be9b0 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -451,11 +451,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1);
- OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) |
- A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(
- vs->info.max_half_reg + 1) |
- COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack));
+ OUT_RING(
+ ring,
+ A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) |
+ A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vs->info.max_half_reg + 1) |
+ COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) |
+ A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(vs)));
fd6_emit_shader(ctx, ring, vs);
fd6_emit_immediates(ctx->screen, vs, ring);
@@ -578,7 +579,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
ring,
A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) |
A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(hs->info.max_half_reg + 1) |
- A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack));
+ A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(hs)));
fd6_emit_shader(ctx, ring, hs);
fd6_emit_immediates(ctx->screen, hs, ring);
@@ -590,7 +591,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) |
A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(ds->info.max_half_reg + 1) |
COND(ds->mergedregs, A6XX_SP_DS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack));
+ A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(ds)));
fd6_emit_shader(ctx, ring, ds);
fd6_emit_immediates(ctx->screen, ds, ring);
@@ -732,7 +733,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) |
A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fs->info.max_half_reg + 1) |
COND(fs->mergedregs, A6XX_SP_FS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(fs->branchstack) |
+ A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(fs)) |
COND(fs->need_pixlod, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
OUT_PKT4(ring, REG_A6XX_VPC_VS_LAYER_CNTL, 1);
@@ -821,7 +822,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
ring,
A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(gs->info.max_reg + 1) |
A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(gs->info.max_half_reg + 1) |
- A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(gs->branchstack));
+ A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(gs)));
fd6_emit_shader(ctx, ring, gs);
fd6_emit_immediates(ctx->screen, gs, ring);