diff options
author | Boris Brezillon <boris.brezillon@collabora.com> | 2021-02-13 08:24:03 +0100 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-02-15 11:23:46 +0000 |
commit | d5b1a33460edffa22ddd138bd63ef9e7063303c9 (patch) | |
tree | 3a6b4a64895ef06923a6a77bfe252729b271a6e1 /src/panfrost/midgard | |
parent | d18fc89066a6d1ee5a1354636b2ef94162b13507 (diff) |
panfrost: Move the shader compilation logic out of the gallium driver
While at it, rework the code to avoid copies between intermediate
structures: the pan_shader_info is passed to the compiler context so
the compiler can fill shader information directly.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8963>
Diffstat (limited to 'src/panfrost/midgard')
-rw-r--r-- | src/panfrost/midgard/compiler.h | 10 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_compile.c | 45 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_compile.h | 8 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_ra.c | 25 | ||||
-rw-r--r-- | src/panfrost/midgard/mir_promote_uniforms.c | 7 |
5 files changed, 38 insertions, 57 deletions
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index fdb8160d203..f804ee0f53c 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -238,6 +238,7 @@ enum midgard_rt_id { typedef struct compiler_context { const struct panfrost_compile_inputs *inputs; nir_shader *nir; + struct pan_shader_info *info; gl_shader_stage stage; /* Number of samples for a keyed blend shader. Depends on is_blend */ @@ -249,9 +250,6 @@ typedef struct compiler_context { /* Index to precolour to r2 for a dual-source blend colour */ unsigned blend_src1; - /* Number of bytes used for Thread Local Storage */ - unsigned tls_size; - /* Count of spills and fills for shaderdb */ unsigned spills; unsigned fills; @@ -291,10 +289,6 @@ typedef struct compiler_context { /* Set of NIR indices that were already emitted as outmods */ BITSET_WORD *already_emitted; - /* Just the count of the max register used. Higher count => higher - * register pressure */ - int work_registers; - /* The number of uniforms allowable for the fast path */ int uniform_cutoff; @@ -312,9 +306,7 @@ typedef struct compiler_context { /* Writeout instructions for each render target */ midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER]; - struct panfrost_sysvals sysvals; struct hash_table_u64 *sysval_to_id; - struct panfrost_ubo_push *push; } compiler_context; /* Per-block live_in/live_out */ diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 16617e0b8fc..7f6c18a26fc 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -1448,7 +1448,7 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr, int sysval = panfrost_sysval_for_instr(instr, &nir_dest); unsigned dest = nir_dest_index(&nir_dest); unsigned uniform = - pan_lookup_sysval(ctx->sysval_to_id, &ctx->sysvals, sysval); + pan_lookup_sysval(ctx->sysval_to_id, &ctx->info->sysvals, sysval); /* Emit the read itself -- this is never indirect */ midgard_instruction *ins = @@ -2978,24 +2978,22 @@ mir_add_writeout_loops(compiler_context *ctx) } } -panfrost_program * -midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, - const struct panfrost_compile_inputs *inputs) +void +midgard_compile_shader_nir(nir_shader *nir, + const struct panfrost_compile_inputs *inputs, + struct util_dynarray *binary, + struct pan_shader_info *info) { - panfrost_program *program = rzalloc(mem_ctx, panfrost_program); - - struct util_dynarray *compiled = &program->compiled; - midgard_debug = debug_get_option_midgard_debug(); /* TODO: Bound against what? */ compiler_context *ctx = rzalloc(NULL, compiler_context); - ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx); + ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx); ctx->inputs = inputs; ctx->nir = nir; + ctx->info = info; ctx->stage = nir->info.stage; - ctx->push = &program->push; if (inputs->is_blend) { unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1); @@ -3013,7 +3011,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, /* Start off with a safe cutoff, allowing usage of all 16 work * registers. Later, we'll promote uniform reads to uniform registers * if we determine it is beneficial to do so */ - ctx->uniform_cutoff = 8; + info->midgard.uniform_cutoff = 8; /* Initialize at a global (not block) level hash tables */ @@ -3059,7 +3057,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, nir_print_shader(nir, stdout); } - ctx->tls_size = nir->scratch_size; + info->tls_size = nir->scratch_size; nir_foreach_function(func, nir) { if (!func->impl) @@ -3086,8 +3084,6 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, break; /* TODO: Multi-function shaders */ } - util_dynarray_init(compiled, program); - /* Per-block lowering before opts */ mir_foreach_block(ctx, _block) { @@ -3164,7 +3160,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, if (!bundle->last_writeout && (current_bundle + 1 < bundle_count)) lookahead = source_order_bundles[current_bundle + 1]->tag; - emit_binary_bundle(ctx, block, bundle, compiled, lookahead); + emit_binary_bundle(ctx, block, bundle, binary, lookahead); ++current_bundle; } @@ -3175,20 +3171,11 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, free(source_order_bundles); /* Report the very first tag executed */ - program->first_tag = midgard_get_first_tag_from_block(ctx, 0); - - /* Deal with off-by-one related to the fencepost problem */ - program->work_register_count = ctx->work_registers + 1; - program->uniform_cutoff = ctx->uniform_cutoff; - - program->tls_size = ctx->tls_size; - - program->sysval_count = ctx->sysvals.sysval_count; - memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count); + info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0); if ((midgard_debug & MIDGARD_DBG_SHADERS) && !nir->info.internal) { - disassemble_midgard(stdout, program->compiled.data, - program->compiled.size, inputs->gpu_id); + disassemble_midgard(stdout, binary->data, + binary->size, inputs->gpu_id); } if ((midgard_debug & MIDGARD_DBG_SHADERDB || inputs->shaderdb) && @@ -3209,7 +3196,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, /* Calculate thread count. There are certain cutoffs by * register count for thread count */ - unsigned nr_registers = program->work_register_count; + unsigned nr_registers = info->work_reg_count; unsigned nr_threads = (nr_registers <= 4) ? 4 : @@ -3232,6 +3219,4 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, } ralloc_free(ctx); - - return program; } diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h index 00d43a64e90..f049fbabb6b 100644 --- a/src/panfrost/midgard/midgard_compile.h +++ b/src/panfrost/midgard/midgard_compile.h @@ -29,9 +29,11 @@ #include "util/u_dynarray.h" #include "panfrost/util/pan_ir.h" -panfrost_program * -midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, - const struct panfrost_compile_inputs *inputs); +void +midgard_compile_shader_nir(nir_shader *nir, + const struct panfrost_compile_inputs *inputs, + struct util_dynarray *binary, + struct pan_shader_info *info); /* NIR options are shared between the standalone compiler and the online * compiler. Defining it here is the simplest, though maybe not the Right diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 37cecb1c339..44b3c7dc1c6 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -99,7 +99,7 @@ index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg, unsigned /* Report that we actually use this register, and return it */ if (r.reg < 16) - ctx->work_registers = MAX2(ctx->work_registers, r.reg); + ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, r.reg + 1); return r; } @@ -395,7 +395,7 @@ allocate_registers(compiler_context *ctx, bool *spilled) * uniforms start and the shader stage. By ABI we limit blend shaders * to 8 registers, should be lower XXX */ int work_count = ctx->inputs->is_blend ? 8 : - 16 - MAX2((ctx->uniform_cutoff - 8), 0); + 16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0); /* No register allocation to do with no SSA */ @@ -646,7 +646,7 @@ allocate_registers(compiler_context *ctx, bool *spilled) if (ctx->blend_src1 != ~0) { assert(ctx->blend_src1 < ctx->temp_count); l->solutions[ctx->blend_src1] = (16 * 2); - ctx->work_registers = MAX2(ctx->work_registers, 2); + ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, 3); } mir_compute_interference(ctx, l); @@ -959,13 +959,14 @@ mir_spill_register( static void mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) { - unsigned old_work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0); + unsigned old_work_count = + 16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0); unsigned work_count = 16 - MAX2((new_cutoff - 8), 0); unsigned min_demote = SSA_FIXED_REGISTER(old_work_count); unsigned max_demote = SSA_FIXED_REGISTER(work_count); - ctx->uniform_cutoff = new_cutoff; + ctx->info->midgard.uniform_cutoff = new_cutoff; mir_foreach_block(ctx, _block) { midgard_block *block = (midgard_block *) _block; @@ -978,7 +979,7 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) unsigned temp = make_compiler_temp(ctx); unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4; - assert(idx < ctx->push->count); + assert(idx < ctx->info->push.count); midgard_instruction ld = { .type = TAG_LOAD_STORE_4, @@ -989,10 +990,10 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) .swizzle = SWIZZLE_IDENTITY_4, .op = midgard_op_ld_ubo_int4, .load_store = { - .arg_1 = ctx->push->words[idx].ubo, + .arg_1 = ctx->info->push.words[idx].ubo, .arg_2 = 0x1E, }, - .constants.u32[0] = ctx->push->words[idx].offset + .constants.u32[0] = ctx->info->push.words[idx].offset }; mir_insert_instruction_before_scheduled(ctx, block, before, ld); @@ -1013,7 +1014,7 @@ mir_ra(compiler_context *ctx) int iter_count = 1000; /* max iterations */ /* Number of 128-bit slots in memory we've spilled into */ - unsigned spill_count = DIV_ROUND_UP(ctx->tls_size, 16); + unsigned spill_count = DIV_ROUND_UP(ctx->info->tls_size, 16); mir_create_pipeline_registers(ctx); @@ -1025,9 +1026,9 @@ mir_ra(compiler_context *ctx) /* It's a lot cheaper to demote uniforms to get more * work registers than to spill to TLS. */ if (l->spill_class == REG_CLASS_WORK && - ctx->uniform_cutoff > 8) { + ctx->info->midgard.uniform_cutoff > 8) { - mir_demote_uniforms(ctx, MAX2(ctx->uniform_cutoff - 4, 8)); + mir_demote_uniforms(ctx, MAX2(ctx->info->midgard.uniform_cutoff - 4, 8)); } else if (spill_node == -1) { fprintf(stderr, "ERROR: Failed to choose spill node\n"); lcra_free(l); @@ -1056,7 +1057,7 @@ mir_ra(compiler_context *ctx) /* Report spilling information. spill_count is in 128-bit slots (vec4 x * fp32), but tls_size is in bytes, so multiply by 16 */ - ctx->tls_size = spill_count * 16; + ctx->info->tls_size = spill_count * 16; install_registers(ctx, l); diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c index b5e063e0600..744d88e540e 100644 --- a/src/panfrost/midgard/mir_promote_uniforms.c +++ b/src/panfrost/midgard/mir_promote_uniforms.c @@ -263,7 +263,7 @@ midgard_promote_uniforms(compiler_context *ctx) unsigned work_count = mir_work_heuristic(ctx, &analysis); unsigned promoted_count = 24 - work_count; - mir_pick_ubo(ctx->push, &analysis, promoted_count); + mir_pick_ubo(&ctx->info->push, &analysis, promoted_count); /* First, figure out special indices a priori so we don't recompute a lot */ BITSET_WORD *special = mir_special_indices(ctx); @@ -279,7 +279,7 @@ midgard_promote_uniforms(compiler_context *ctx) if (!BITSET_TEST(analysis.blocks[ubo].pushed, qword)) continue; /* Find where we pushed to, TODO: unaligned pushes to pack */ - unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo, qword * 16); + unsigned base = pan_lookup_pushed_ubo(&ctx->info->push, ubo, qword * 16); assert((base & 0x3) == 0); unsigned address = base / 4; @@ -288,7 +288,8 @@ midgard_promote_uniforms(compiler_context *ctx) /* Should've taken into account when pushing */ assert(address < promoted_count); - ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1); + ctx->info->midgard.uniform_cutoff = + MAX2(ctx->info->midgard.uniform_cutoff, address + 1); unsigned promoted = SSA_FIXED_REGISTER(uniform_reg); /* We do need the move for safety for a non-SSA dest, or if |