diff options
author | Boris Brezillon <boris.brezillon@collabora.com> | 2020-12-08 11:38:50 +0100 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2020-12-09 16:29:25 +0000 |
commit | e27052281aa97f17df5b221337b1c5f3e7464b61 (patch) | |
tree | dd15bd942d8c1b859cc4bfd3bf46fc5f5c142ecf /src/panfrost/midgard | |
parent | 29f938a0ece889cd3236fca7e008bf0031de4be2 (diff) |
pan/mdg: Add support for multi sample iteration writeout
Some MSAA+fmt combination require writeout to be split. Right now, it
only impacts blend shaders since we only support MSAA 4x, and the only
formats that could exceed the 128bit/pixel limit in MSAA 4x are
not supported by the fixed-function blend unit. We thus rely on the
blend shader to split things properly. Things will change once we add
MSAA 8x/16x to the mix, since even the blendable formats will exceed
the 128b/pixel limit in that case.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7984>
Diffstat (limited to 'src/panfrost/midgard')
-rw-r--r-- | src/panfrost/midgard/compiler.h | 7 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_compile.c | 86 |
2 files changed, 63 insertions, 30 deletions
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index e486227303f..9b985c2474f 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -233,6 +233,8 @@ enum midgard_rt_id { MIDGARD_NUM_RTS, }; +#define MIDGARD_MAX_SAMPLE_ITER 16 + typedef struct compiler_context { nir_shader *nir; gl_shader_stage stage; @@ -243,6 +245,9 @@ typedef struct compiler_context { /* Render target number for a keyed blend shader. Depends on is_blend */ unsigned blend_rt; + /* Number of samples for a keyed blend shader. Depends on is_blend */ + unsigned blend_sample_iterations; + /* Index to precolour to r0 for an input blend colour */ unsigned blend_input; @@ -313,7 +318,7 @@ typedef struct compiler_context { uint32_t quirks; /* Writeout instructions for each render target */ - midgard_instruction *writeout_branch[MIDGARD_NUM_RTS]; + midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER]; struct panfrost_sysvals sysvals; } compiler_context; diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 537b03fd1c5..e2389084863 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -1316,11 +1316,13 @@ compute_builtin_arg(nir_op op) } static void -emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigned src_s, enum midgard_rt_id rt) +emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigned src_s, + enum midgard_rt_id rt, unsigned sample_iter) { assert(rt < ARRAY_SIZE(ctx->writeout_branch)); + assert(sample_iter < ARRAY_SIZE(ctx->writeout_branch[0])); - midgard_instruction *br = ctx->writeout_branch[rt]; + midgard_instruction *br = ctx->writeout_branch[rt][sample_iter]; assert(!br); @@ -1336,7 +1338,12 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigne /* Add dependencies */ ins.src[0] = src; ins.src_types[0] = nir_type_uint32; - ins.constants.u32[0] = depth_only ? 0xFF : (rt - MIDGARD_COLOR_RT0) * 0x100; + + if (depth_only) + ins.constants.u32[0] = 0xFF; + else + ins.constants.u32[0] = ((rt - MIDGARD_COLOR_RT0) << 8) | sample_iter; + for (int i = 0; i < 4; ++i) ins.swizzle[0][i] = i; @@ -1356,7 +1363,7 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigne /* Emit the branch */ br = emit_mir_instruction(ctx, ins); schedule_barrier(ctx); - ctx->writeout_branch[rt] = br; + ctx->writeout_branch[rt][sample_iter] = br; /* Push our current location = current block count - 1 = where we'll * jump to. Maybe a bit too clever for my own good */ @@ -1693,7 +1700,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) reg_s = nir_src_index(ctx, &instr->src[3]); } - emit_fragment_store(ctx, reg, reg_z, reg_s, rt); + emit_fragment_store(ctx, reg, reg_z, reg_s, rt, 0); } else if (ctx->stage == MESA_SHADER_VERTEX) { assert(instr->intrinsic == nir_intrinsic_store_output); @@ -1757,7 +1764,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) case nir_intrinsic_store_raw_output_pan: assert (ctx->stage == MESA_SHADER_FRAGMENT); reg = nir_src_index(ctx, &instr->src[0]); - emit_fragment_store(ctx, reg, ~0, ~0, ctx->blend_rt); + for (unsigned s = 0; s < ctx->blend_sample_iterations; s++) + emit_fragment_store(ctx, reg, ~0, ~0, ctx->blend_rt, s); break; case nir_intrinsic_store_global: @@ -2451,10 +2459,10 @@ midgard_legalize_invert(compiler_context *ctx, midgard_block *block) } static unsigned -emit_fragment_epilogue(compiler_context *ctx, unsigned rt) +emit_fragment_epilogue(compiler_context *ctx, unsigned rt, unsigned sample_iter) { /* Loop to ourselves */ - midgard_instruction *br = ctx->writeout_branch[rt]; + midgard_instruction *br = ctx->writeout_branch[rt][sample_iter]; struct midgard_instruction ins = v_branch(false, false); ins.writeout = br->writeout; ins.branch.target_block = ctx->block_count - 1; @@ -2683,27 +2691,38 @@ static void mir_add_writeout_loops(compiler_context *ctx) { for (unsigned rt = 0; rt < ARRAY_SIZE(ctx->writeout_branch); ++rt) { - midgard_instruction *br = ctx->writeout_branch[rt]; - if (!br) continue; - - unsigned popped = br->branch.target_block; - pan_block_add_successor(&(mir_get_block(ctx, popped - 1)->base), &ctx->current_block->base); - br->branch.target_block = emit_fragment_epilogue(ctx, rt); - br->branch.target_type = TARGET_GOTO; - - /* If we have more RTs, we'll need to restore back after our - * loop terminates */ - - if ((rt + 1) < ARRAY_SIZE(ctx->writeout_branch) && ctx->writeout_branch[rt + 1]) { - midgard_instruction uncond = v_branch(false, false); - uncond.branch.target_block = popped; - uncond.branch.target_type = TARGET_GOTO; - emit_mir_instruction(ctx, uncond); - pan_block_add_successor(&ctx->current_block->base, &(mir_get_block(ctx, popped)->base)); - schedule_barrier(ctx); - } else { - /* We're last, so we can terminate here */ - br->last_writeout = true; + for (unsigned s = 0; s < MIDGARD_MAX_SAMPLE_ITER; ++s) { + midgard_instruction *br = ctx->writeout_branch[rt][s]; + if (!br) continue; + + unsigned popped = br->branch.target_block; + pan_block_add_successor(&(mir_get_block(ctx, popped - 1)->base), + &ctx->current_block->base); + br->branch.target_block = emit_fragment_epilogue(ctx, rt, s); + br->branch.target_type = TARGET_GOTO; + + /* If we have more RTs, we'll need to restore back after our + * loop terminates */ + midgard_instruction *next_br = NULL; + + if ((s + 1) < MIDGARD_MAX_SAMPLE_ITER) + next_br = ctx->writeout_branch[rt][s + 1]; + + if (!next_br && (rt + 1) < ARRAY_SIZE(ctx->writeout_branch)) + next_br = ctx->writeout_branch[rt + 1][0]; + + if (next_br) { + midgard_instruction uncond = v_branch(false, false); + uncond.branch.target_block = popped; + uncond.branch.target_type = TARGET_GOTO; + emit_mir_instruction(ctx, uncond); + pan_block_add_successor(&ctx->current_block->base, + &(mir_get_block(ctx, popped)->base)); + schedule_barrier(ctx); + } else { + /* We're last, so we can terminate here */ + br->last_writeout = true; + } } } } @@ -2725,6 +2744,15 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, ctx->stage = nir->info.stage; ctx->is_blend = inputs->is_blend; ctx->blend_rt = MIDGARD_COLOR_RT0 + inputs->blend.rt; + if (inputs->is_blend) { + unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1); + const struct util_format_description *desc = + util_format_description(inputs->rt_formats[inputs->blend.rt]); + + /* We have to split writeout in 128 bit chunks */ + ctx->blend_sample_iterations = + DIV_ROUND_UP(desc->block.bits * nr_samples, 128); + } memcpy(ctx->blend_constants, inputs->blend.constants, sizeof(ctx->blend_constants)); ctx->blend_input = ~0; ctx->blend_src1 = ~0; |