diff options
-rw-r--r-- | src/freedreno/ci/deqp-freedreno-a630-fails.txt | 5 | ||||
-rw-r--r-- | src/freedreno/computerator/a6xx.c | 2 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_shader.h | 18 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_clear_blit.c | 4 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_pipeline.c | 12 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_compute.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_program.c | 15 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_compute.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_program.c | 19 |
9 files changed, 48 insertions, 32 deletions
diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt index 4a1e2fe698a..4ac5c109d29 100644 --- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt +++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt @@ -141,7 +141,6 @@ dEQP-VK.renderpass.suballocation.attachment_allocation.input_output.7,Fail dEQP-VK.renderpass.suballocation.subpass_dependencies.implicit_dependencies.render_passes_5,Fail dEQP-VK.renderpass2.dedicated_allocation.attachment_allocation.input_output.7,Fail dEQP-VK.renderpass2.suballocation.attachment_allocation.input_output.7,Fail -dEQP-VK.spirv_assembly.instruction.compute.float16.opvectorshuffle.344,Crash dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.denorm_clamp_denorm_preserve,Fail dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.rounding_rtz_dot,Fail dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.rounding_rtz_mmulm,Fail @@ -151,10 +150,6 @@ dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.roundi dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.sign_denorm_preserve,Fail dEQP-VK.spirv_assembly.instruction.compute.opcopymemory.array,Fail dEQP-VK.spirv_assembly.instruction.compute.opquantize.infinities,Fail -dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.244_tessc,Crash -dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_frag,Crash -dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_vert,Crash -dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.444_geom,Crash dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.abs_denorm_preserve_frag,Fail dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.abs_denorm_preserve_vert,Fail dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.denorm_nclamp_denorm_preserve_frag,Fail diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c index ffd7f74917b..e31745ac427 100644 --- a/src/freedreno/computerator/a6xx.c +++ b/src/freedreno/computerator/a6xx.c @@ -148,7 +148,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) | A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack)); + A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v))); OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); OUT_RING(ring, 0x41); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index a92917f4a36..0708b41ad7e 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -1004,4 +1004,22 @@ ir3_shader_nibo(const struct ir3_shader_variant *v) return v->shader->nir->info.num_ssbos + v->shader->nir->info.num_images; } +static inline uint32_t +ir3_shader_branchstack_hw(const struct ir3_shader_variant *v) +{ + /* Dummy shader */ + if (!v->shader) + return 0; + + if (v->shader->compiler->gpu_id < 500) + return v->branchstack; + + if (v->branchstack > 0) { + uint32_t branchstack = v->branchstack / 2 + 1; + return MIN2(branchstack, v->shader->compiler->branchstack_size / 2); + } else { + return 0; + } +} + #endif /* IR3_SHADER_H_ */ diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index fb97f36d65a..d1593cbd236 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -380,7 +380,9 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ bool layered_clear, bool z_scale) { struct ir3_const_state dummy_const_state = {}; - struct ir3_shader dummy_shader = {}; + struct ir3_shader dummy_shader = { + .compiler = cmd->device->compiler, + }; struct ir3_shader_variant vs = { .type = MESA_SHADER_VERTEX, diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index e866f8d7e00..17f54ca3fa8 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -399,7 +399,7 @@ tu6_emit_xs_config(struct tu_cs *cs, tu_cs_emit_regs(cs, A6XX_SP_VS_CTRL_REG0( .fullregfootprint = xs->info.max_reg + 1, .halfregfootprint = xs->info.max_half_reg + 1, - .branchstack = xs->branchstack, + .branchstack = ir3_shader_branchstack_hw(xs), .mergedregs = xs->mergedregs, )); break; @@ -407,14 +407,14 @@ tu6_emit_xs_config(struct tu_cs *cs, tu_cs_emit_regs(cs, A6XX_SP_HS_CTRL_REG0( .fullregfootprint = xs->info.max_reg + 1, .halfregfootprint = xs->info.max_half_reg + 1, - .branchstack = xs->branchstack, + .branchstack = ir3_shader_branchstack_hw(xs), )); break; case MESA_SHADER_TESS_EVAL: tu_cs_emit_regs(cs, A6XX_SP_DS_CTRL_REG0( .fullregfootprint = xs->info.max_reg + 1, .halfregfootprint = xs->info.max_half_reg + 1, - .branchstack = xs->branchstack, + .branchstack = ir3_shader_branchstack_hw(xs), .mergedregs = xs->mergedregs, )); break; @@ -422,14 +422,14 @@ tu6_emit_xs_config(struct tu_cs *cs, tu_cs_emit_regs(cs, A6XX_SP_GS_CTRL_REG0( .fullregfootprint = xs->info.max_reg + 1, .halfregfootprint = xs->info.max_half_reg + 1, - .branchstack = xs->branchstack, + .branchstack = ir3_shader_branchstack_hw(xs), )); break; case MESA_SHADER_FRAGMENT: tu_cs_emit_regs(cs, A6XX_SP_FS_CTRL_REG0( .fullregfootprint = xs->info.max_reg + 1, .halfregfootprint = xs->info.max_half_reg + 1, - .branchstack = xs->branchstack, + .branchstack = ir3_shader_branchstack_hw(xs), .mergedregs = xs->mergedregs, .threadsize = thrsz, .pixlodenable = xs->need_pixlod, @@ -443,7 +443,7 @@ tu6_emit_xs_config(struct tu_cs *cs, tu_cs_emit_regs(cs, A6XX_SP_CS_CTRL_REG0( .fullregfootprint = xs->info.max_reg + 1, .halfregfootprint = xs->info.max_half_reg + 1, - .branchstack = xs->branchstack, + .branchstack = ir3_shader_branchstack_hw(xs), .mergedregs = xs->mergedregs, .threadsize = thrsz, )); diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c index 668da2e320c..c08c3b67723 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c @@ -59,8 +59,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v) A5XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) | A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) | - A5XX_SP_CS_CTRL_REG0_BRANCHSTACK( - 0x3) | // XXX need to figure this out somehow.. + A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)) | 0x6 /* XXX */); OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1); diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 06b41bae097..072d432db6b 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -374,12 +374,13 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, 0x00000000); /* HLSQ_CS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1); - OUT_RING(ring, - A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | - A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | - 0x6 | /* XXX seems to be always set? */ - A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) | - COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + OUT_RING( + ring, + A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | + A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | + 0x6 | /* XXX seems to be always set? */ + A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[VS].v)) | + COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); /* If we have streamout, link against the real FS in the binning program, * rather than the dummy FS used for binning pass state, to ensure the @@ -529,7 +530,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | - A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) | + A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[FS].v)) | COND(s[FS].v->need_pixlod, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index 688902e13f0..2cfe891def8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -68,7 +68,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) | A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack)); + A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v))); uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1); OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index d1b4e313169..97a000be9b0 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -451,11 +451,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE)); OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1); - OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) | - A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT( - vs->info.max_half_reg + 1) | - COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack)); + OUT_RING( + ring, + A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) | + A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vs->info.max_half_reg + 1) | + COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) | + A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(vs))); fd6_emit_shader(ctx, ring, vs); fd6_emit_immediates(ctx->screen, vs, ring); @@ -578,7 +579,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, ring, A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) | A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(hs->info.max_half_reg + 1) | - A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack)); + A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(hs))); fd6_emit_shader(ctx, ring, hs); fd6_emit_immediates(ctx->screen, hs, ring); @@ -590,7 +591,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) | A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(ds->info.max_half_reg + 1) | COND(ds->mergedregs, A6XX_SP_DS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack)); + A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(ds))); fd6_emit_shader(ctx, ring, ds); fd6_emit_immediates(ctx->screen, ds, ring); @@ -732,7 +733,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) | A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fs->info.max_half_reg + 1) | COND(fs->mergedregs, A6XX_SP_FS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(fs->branchstack) | + A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(fs)) | COND(fs->need_pixlod, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A6XX_VPC_VS_LAYER_CNTL, 1); @@ -821,7 +822,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, ring, A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(gs->info.max_reg + 1) | A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(gs->info.max_half_reg + 1) | - A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(gs->branchstack)); + A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(gs))); fd6_emit_shader(ctx, ring, gs); fd6_emit_immediates(ctx->screen, gs, ring); |