summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/freedreno/ci/deqp-freedreno-a630-fails.txt5
-rw-r--r--src/freedreno/computerator/a6xx.c2
-rw-r--r--src/freedreno/ir3/ir3_shader.h18
-rw-r--r--src/freedreno/vulkan/tu_clear_blit.c4
-rw-r--r--src/freedreno/vulkan/tu_pipeline.c12
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_compute.c3
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_program.c15
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_compute.c2
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_program.c19
9 files changed, 48 insertions, 32 deletions
diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
index 4a1e2fe698a..4ac5c109d29 100644
--- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt
+++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
@@ -141,7 +141,6 @@ dEQP-VK.renderpass.suballocation.attachment_allocation.input_output.7,Fail
dEQP-VK.renderpass.suballocation.subpass_dependencies.implicit_dependencies.render_passes_5,Fail
dEQP-VK.renderpass2.dedicated_allocation.attachment_allocation.input_output.7,Fail
dEQP-VK.renderpass2.suballocation.attachment_allocation.input_output.7,Fail
-dEQP-VK.spirv_assembly.instruction.compute.float16.opvectorshuffle.344,Crash
dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.denorm_clamp_denorm_preserve,Fail
dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.rounding_rtz_dot,Fail
dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.rounding_rtz_mmulm,Fail
@@ -151,10 +150,6 @@ dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.roundi
dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.sign_denorm_preserve,Fail
dEQP-VK.spirv_assembly.instruction.compute.opcopymemory.array,Fail
dEQP-VK.spirv_assembly.instruction.compute.opquantize.infinities,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.244_tessc,Crash
-dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_frag,Crash
-dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_vert,Crash
-dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.444_geom,Crash
dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.abs_denorm_preserve_frag,Fail
dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.abs_denorm_preserve_vert,Fail
dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.denorm_nclamp_denorm_preserve_frag,Fail
diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c
index ffd7f74917b..e31745ac427 100644
--- a/src/freedreno/computerator/a6xx.c
+++ b/src/freedreno/computerator/a6xx.c
@@ -148,7 +148,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack));
+ A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)));
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
OUT_RING(ring, 0x41);
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index a92917f4a36..0708b41ad7e 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -1004,4 +1004,22 @@ ir3_shader_nibo(const struct ir3_shader_variant *v)
return v->shader->nir->info.num_ssbos + v->shader->nir->info.num_images;
}
+static inline uint32_t
+ir3_shader_branchstack_hw(const struct ir3_shader_variant *v)
+{
+ /* Dummy shader */
+ if (!v->shader)
+ return 0;
+
+ if (v->shader->compiler->gpu_id < 500)
+ return v->branchstack;
+
+ if (v->branchstack > 0) {
+ uint32_t branchstack = v->branchstack / 2 + 1;
+ return MIN2(branchstack, v->shader->compiler->branchstack_size / 2);
+ } else {
+ return 0;
+ }
+}
+
#endif /* IR3_SHADER_H_ */
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index fb97f36d65a..d1593cbd236 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -380,7 +380,9 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
bool layered_clear, bool z_scale)
{
struct ir3_const_state dummy_const_state = {};
- struct ir3_shader dummy_shader = {};
+ struct ir3_shader dummy_shader = {
+ .compiler = cmd->device->compiler,
+ };
struct ir3_shader_variant vs = {
.type = MESA_SHADER_VERTEX,
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index e866f8d7e00..17f54ca3fa8 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -399,7 +399,7 @@ tu6_emit_xs_config(struct tu_cs *cs,
tu_cs_emit_regs(cs, A6XX_SP_VS_CTRL_REG0(
.fullregfootprint = xs->info.max_reg + 1,
.halfregfootprint = xs->info.max_half_reg + 1,
- .branchstack = xs->branchstack,
+ .branchstack = ir3_shader_branchstack_hw(xs),
.mergedregs = xs->mergedregs,
));
break;
@@ -407,14 +407,14 @@ tu6_emit_xs_config(struct tu_cs *cs,
tu_cs_emit_regs(cs, A6XX_SP_HS_CTRL_REG0(
.fullregfootprint = xs->info.max_reg + 1,
.halfregfootprint = xs->info.max_half_reg + 1,
- .branchstack = xs->branchstack,
+ .branchstack = ir3_shader_branchstack_hw(xs),
));
break;
case MESA_SHADER_TESS_EVAL:
tu_cs_emit_regs(cs, A6XX_SP_DS_CTRL_REG0(
.fullregfootprint = xs->info.max_reg + 1,
.halfregfootprint = xs->info.max_half_reg + 1,
- .branchstack = xs->branchstack,
+ .branchstack = ir3_shader_branchstack_hw(xs),
.mergedregs = xs->mergedregs,
));
break;
@@ -422,14 +422,14 @@ tu6_emit_xs_config(struct tu_cs *cs,
tu_cs_emit_regs(cs, A6XX_SP_GS_CTRL_REG0(
.fullregfootprint = xs->info.max_reg + 1,
.halfregfootprint = xs->info.max_half_reg + 1,
- .branchstack = xs->branchstack,
+ .branchstack = ir3_shader_branchstack_hw(xs),
));
break;
case MESA_SHADER_FRAGMENT:
tu_cs_emit_regs(cs, A6XX_SP_FS_CTRL_REG0(
.fullregfootprint = xs->info.max_reg + 1,
.halfregfootprint = xs->info.max_half_reg + 1,
- .branchstack = xs->branchstack,
+ .branchstack = ir3_shader_branchstack_hw(xs),
.mergedregs = xs->mergedregs,
.threadsize = thrsz,
.pixlodenable = xs->need_pixlod,
@@ -443,7 +443,7 @@ tu6_emit_xs_config(struct tu_cs *cs,
tu_cs_emit_regs(cs, A6XX_SP_CS_CTRL_REG0(
.fullregfootprint = xs->info.max_reg + 1,
.halfregfootprint = xs->info.max_half_reg + 1,
- .branchstack = xs->branchstack,
+ .branchstack = ir3_shader_branchstack_hw(xs),
.mergedregs = xs->mergedregs,
.threadsize = thrsz,
));
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
index 668da2e320c..c08c3b67723 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
@@ -59,8 +59,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
A5XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
- A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(
- 0x3) | // XXX need to figure this out somehow..
+ A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)) |
0x6 /* XXX */);
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
index 06b41bae097..072d432db6b 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
@@ -374,12 +374,13 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, 0x00000000); /* HLSQ_CS_INSTRLEN */
OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1);
- OUT_RING(ring,
- A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
- A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
- 0x6 | /* XXX seems to be always set? */
- A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
- COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+ OUT_RING(
+ ring,
+ A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
+ A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
+ 0x6 | /* XXX seems to be always set? */
+ A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[VS].v)) |
+ COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
/* If we have streamout, link against the real FS in the binning program,
* rather than the dummy FS used for binning pass state, to ensure the
@@ -529,7 +530,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
- A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
+ A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[FS].v)) |
COND(s[FS].v->need_pixlod, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
index 688902e13f0..2cfe891def8 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
@@ -68,7 +68,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack));
+ A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)));
uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index d1b4e313169..97a000be9b0 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -451,11 +451,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1);
- OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) |
- A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(
- vs->info.max_half_reg + 1) |
- COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack));
+ OUT_RING(
+ ring,
+ A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) |
+ A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vs->info.max_half_reg + 1) |
+ COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) |
+ A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(vs)));
fd6_emit_shader(ctx, ring, vs);
fd6_emit_immediates(ctx->screen, vs, ring);
@@ -578,7 +579,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
ring,
A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) |
A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(hs->info.max_half_reg + 1) |
- A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack));
+ A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(hs)));
fd6_emit_shader(ctx, ring, hs);
fd6_emit_immediates(ctx->screen, hs, ring);
@@ -590,7 +591,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) |
A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(ds->info.max_half_reg + 1) |
COND(ds->mergedregs, A6XX_SP_DS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack));
+ A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(ds)));
fd6_emit_shader(ctx, ring, ds);
fd6_emit_immediates(ctx->screen, ds, ring);
@@ -732,7 +733,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) |
A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fs->info.max_half_reg + 1) |
COND(fs->mergedregs, A6XX_SP_FS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(fs->branchstack) |
+ A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(fs)) |
COND(fs->need_pixlod, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
OUT_PKT4(ring, REG_A6XX_VPC_VS_LAYER_CNTL, 1);
@@ -821,7 +822,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
ring,
A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(gs->info.max_reg + 1) |
A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(gs->info.max_half_reg + 1) |
- A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(gs->branchstack));
+ A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(gs)));
fd6_emit_shader(ctx, ring, gs);
fd6_emit_immediates(ctx->screen, gs, ring);