panfrost: Move the shader compilation logic out of the gallium driver

While at it, rework the code to avoid copies between intermediate structures: the pan_shader_info is passed to the compiler context so the compiler can fill shader information directly. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8963>
author: Boris Brezillon <boris.brezillon@collabora.com> 2021-02-13 08:24:03 +0100
committer: Marge Bot <eric+marge@anholt.net> 2021-02-15 11:23:46 +0000
commit: d5b1a33460edffa22ddd138bd63ef9e7063303c9 (patch)
tree: 3a6b4a64895ef06923a6a77bfe252729b271a6e1 /src/panfrost/midgard
parent: d18fc89066a6d1ee5a1354636b2ef94162b13507 (diff)
5 files changed, 38 insertions, 57 deletions
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h
index fdb8160d203..f804ee0f53c 100644
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -238,6 +238,7 @@ enum midgard_rt_id {
 typedef struct compiler_context {
         const struct panfrost_compile_inputs *inputs;
         nir_shader *nir;
+        struct pan_shader_info *info;
         gl_shader_stage stage;
 
         /* Number of samples for a keyed blend shader. Depends on is_blend */
@@ -249,9 +250,6 @@ typedef struct compiler_context {
         /* Index to precolour to r2 for a dual-source blend colour */
         unsigned blend_src1;
 
-        /* Number of bytes used for Thread Local Storage */
-        unsigned tls_size;
-
         /* Count of spills and fills for shaderdb */
         unsigned spills;
         unsigned fills;
@@ -291,10 +289,6 @@ typedef struct compiler_context {
         /* Set of NIR indices that were already emitted as outmods */
         BITSET_WORD *already_emitted;
 
-        /* Just the count of the max register used. Higher count => higher
-         * register pressure */
-        int work_registers;
-
         /* The number of uniforms allowable for the fast path */
         int uniform_cutoff;
 
@@ -312,9 +306,7 @@ typedef struct compiler_context {
         /* Writeout instructions for each render target */
         midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER];
 
-        struct panfrost_sysvals sysvals;
         struct hash_table_u64 *sysval_to_id;
-        struct panfrost_ubo_push *push;
 } compiler_context;
 
 /* Per-block live_in/live_out */
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 16617e0b8fc..7f6c18a26fc 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1448,7 +1448,7 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr,
         int sysval = panfrost_sysval_for_instr(instr, &nir_dest);
         unsigned dest = nir_dest_index(&nir_dest);
         unsigned uniform =
-                pan_lookup_sysval(ctx->sysval_to_id, &ctx->sysvals, sysval);
+                pan_lookup_sysval(ctx->sysval_to_id, &ctx->info->sysvals, sysval);
 
         /* Emit the read itself -- this is never indirect */
         midgard_instruction *ins =
@@ -2978,24 +2978,22 @@ mir_add_writeout_loops(compiler_context *ctx)
         }
 }
 
-panfrost_program *
-midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs)
+void
+midgard_compile_shader_nir(nir_shader *nir,
+                           const struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info)
 {
-        panfrost_program *program = rzalloc(mem_ctx, panfrost_program);
-
-        struct util_dynarray *compiled = &program->compiled;
-
         midgard_debug = debug_get_option_midgard_debug();
 
         /* TODO: Bound against what? */
         compiler_context *ctx = rzalloc(NULL, compiler_context);
-        ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx);
+        ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx);
 
         ctx->inputs = inputs;
         ctx->nir = nir;
+        ctx->info = info;
         ctx->stage = nir->info.stage;
-        ctx->push = &program->push;
 
         if (inputs->is_blend) {
                 unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1);
@@ -3013,7 +3011,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
         /* Start off with a safe cutoff, allowing usage of all 16 work
          * registers. Later, we'll promote uniform reads to uniform registers
          * if we determine it is beneficial to do so */
-        ctx->uniform_cutoff = 8;
+        info->midgard.uniform_cutoff = 8;
 
         /* Initialize at a global (not block) level hash tables */
 
@@ -3059,7 +3057,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                 nir_print_shader(nir, stdout);
         }
 
-        ctx->tls_size = nir->scratch_size;
+        info->tls_size = nir->scratch_size;
 
         nir_foreach_function(func, nir) {
                 if (!func->impl)
@@ -3086,8 +3084,6 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                 break; /* TODO: Multi-function shaders */
         }
 
-        util_dynarray_init(compiled, program);
-
         /* Per-block lowering before opts */
 
         mir_foreach_block(ctx, _block) {
@@ -3164,7 +3160,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                         if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
                                 lookahead = source_order_bundles[current_bundle + 1]->tag;
 
-                        emit_binary_bundle(ctx, block, bundle, compiled, lookahead);
+                        emit_binary_bundle(ctx, block, bundle, binary, lookahead);
                         ++current_bundle;
                 }
 
@@ -3175,20 +3171,11 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
         free(source_order_bundles);
 
         /* Report the very first tag executed */
-        program->first_tag = midgard_get_first_tag_from_block(ctx, 0);
-
-        /* Deal with off-by-one related to the fencepost problem */
-        program->work_register_count = ctx->work_registers + 1;
-        program->uniform_cutoff = ctx->uniform_cutoff;
-
-        program->tls_size = ctx->tls_size;
-
-        program->sysval_count = ctx->sysvals.sysval_count;
-        memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
+        info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0);
 
         if ((midgard_debug & MIDGARD_DBG_SHADERS) && !nir->info.internal) {
-                disassemble_midgard(stdout, program->compiled.data,
-                                    program->compiled.size, inputs->gpu_id);
+                disassemble_midgard(stdout, binary->data,
+                                    binary->size, inputs->gpu_id);
         }
 
         if ((midgard_debug & MIDGARD_DBG_SHADERDB || inputs->shaderdb) &&
@@ -3209,7 +3196,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                 /* Calculate thread count. There are certain cutoffs by
                  * register count for thread count */
 
-                unsigned nr_registers = program->work_register_count;
+                unsigned nr_registers = info->work_reg_count;
 
                 unsigned nr_threads =
                         (nr_registers <= 4) ? 4 :
@@ -3232,6 +3219,4 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
         }
 
         ralloc_free(ctx);
-
-        return program;
 }
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index 00d43a64e90..f049fbabb6b 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -29,9 +29,11 @@
 #include "util/u_dynarray.h"
 #include "panfrost/util/pan_ir.h"
 
-panfrost_program *
-midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs);
+void
+midgard_compile_shader_nir(nir_shader *nir,
+                           const struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info);
 
 /* NIR options are shared between the standalone compiler and the online
  * compiler. Defining it here is the simplest, though maybe not the Right
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c
index 37cecb1c339..44b3c7dc1c6 100644
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -99,7 +99,7 @@ index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg, unsigned
         /* Report that we actually use this register, and return it */
 
         if (r.reg < 16)
-                ctx->work_registers = MAX2(ctx->work_registers, r.reg);
+                ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, r.reg + 1);
 
         return r;
 }
@@ -395,7 +395,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
          * uniforms start and the shader stage. By ABI we limit blend shaders
          * to 8 registers, should be lower XXX */
         int work_count = ctx->inputs->is_blend ? 8 :
-                16 - MAX2((ctx->uniform_cutoff - 8), 0);
+                16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
 
        /* No register allocation to do with no SSA */
 
@@ -646,7 +646,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
         if (ctx->blend_src1 != ~0) {
                 assert(ctx->blend_src1 < ctx->temp_count);
                 l->solutions[ctx->blend_src1] = (16 * 2);
-                ctx->work_registers = MAX2(ctx->work_registers, 2);
+                ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, 3);
         }
 
         mir_compute_interference(ctx, l);
@@ -959,13 +959,14 @@ mir_spill_register(
 static void
 mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
 {
-        unsigned old_work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
+        unsigned old_work_count =
+                16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
         unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);
 
         unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
         unsigned max_demote = SSA_FIXED_REGISTER(work_count);
 
-        ctx->uniform_cutoff = new_cutoff;
+        ctx->info->midgard.uniform_cutoff = new_cutoff;
 
         mir_foreach_block(ctx, _block) {
                 midgard_block *block = (midgard_block *) _block;
@@ -978,7 +979,7 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
 
                                 unsigned temp = make_compiler_temp(ctx);
                                 unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4;
-                                assert(idx < ctx->push->count);
+                                assert(idx < ctx->info->push.count);
 
                                 midgard_instruction ld = {
                                         .type = TAG_LOAD_STORE_4,
@@ -989,10 +990,10 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
                                         .swizzle = SWIZZLE_IDENTITY_4,
                                         .op = midgard_op_ld_ubo_int4,
                                         .load_store = {
-                                                .arg_1 = ctx->push->words[idx].ubo,
+                                                .arg_1 = ctx->info->push.words[idx].ubo,
                                                 .arg_2 = 0x1E,
                                         },
-                                        .constants.u32[0] = ctx->push->words[idx].offset
+                                        .constants.u32[0] = ctx->info->push.words[idx].offset
                                 };
 
                                 mir_insert_instruction_before_scheduled(ctx, block, before, ld);
@@ -1013,7 +1014,7 @@ mir_ra(compiler_context *ctx)
         int iter_count = 1000; /* max iterations */
 
         /* Number of 128-bit slots in memory we've spilled into */
-        unsigned spill_count = DIV_ROUND_UP(ctx->tls_size, 16);
+        unsigned spill_count = DIV_ROUND_UP(ctx->info->tls_size, 16);
 
 
         mir_create_pipeline_registers(ctx);
@@ -1025,9 +1026,9 @@ mir_ra(compiler_context *ctx)
                         /* It's a lot cheaper to demote uniforms to get more
                          * work registers than to spill to TLS. */
                         if (l->spill_class == REG_CLASS_WORK &&
-                            ctx->uniform_cutoff > 8) {
+                            ctx->info->midgard.uniform_cutoff > 8) {
 
-                                mir_demote_uniforms(ctx, MAX2(ctx->uniform_cutoff - 4, 8));
+                                mir_demote_uniforms(ctx, MAX2(ctx->info->midgard.uniform_cutoff - 4, 8));
                         } else if (spill_node == -1) {
                                 fprintf(stderr, "ERROR: Failed to choose spill node\n");
                                 lcra_free(l);
@@ -1056,7 +1057,7 @@ mir_ra(compiler_context *ctx)
         /* Report spilling information. spill_count is in 128-bit slots (vec4 x
          * fp32), but tls_size is in bytes, so multiply by 16 */
 
-        ctx->tls_size = spill_count * 16;
+        ctx->info->tls_size = spill_count * 16;
 
         install_registers(ctx, l);
 
diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c
index b5e063e0600..744d88e540e 100644
--- a/src/panfrost/midgard/mir_promote_uniforms.c
+++ b/src/panfrost/midgard/mir_promote_uniforms.c
@@ -263,7 +263,7 @@ midgard_promote_uniforms(compiler_context *ctx)
         unsigned work_count = mir_work_heuristic(ctx, &analysis);
         unsigned promoted_count = 24 - work_count;
 
-        mir_pick_ubo(ctx->push, &analysis, promoted_count);
+        mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);
 
         /* First, figure out special indices a priori so we don't recompute a lot */
         BITSET_WORD *special = mir_special_indices(ctx);
@@ -279,7 +279,7 @@ midgard_promote_uniforms(compiler_context *ctx)
                 if (!BITSET_TEST(analysis.blocks[ubo].pushed, qword)) continue;
 
                 /* Find where we pushed to, TODO: unaligned pushes to pack */
-                unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo, qword * 16);
+                unsigned base = pan_lookup_pushed_ubo(&ctx->info->push, ubo, qword * 16);
                 assert((base & 0x3) == 0);
 
                 unsigned address = base / 4;
@@ -288,7 +288,8 @@ midgard_promote_uniforms(compiler_context *ctx)
                 /* Should've taken into account when pushing */
                 assert(address < promoted_count);
 
-                ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
+                ctx->info->midgard.uniform_cutoff =
+                        MAX2(ctx->info->midgard.uniform_cutoff, address + 1);
                 unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
 
                 /* We do need the move for safety for a non-SSA dest, or if
author	Boris Brezillon <boris.brezillon@collabora.com>	2021-02-13 08:24:03 +0100
committer	Marge Bot <eric+marge@anholt.net>	2021-02-15 11:23:46 +0000
commit	d5b1a33460edffa22ddd138bd63ef9e7063303c9 (patch)
tree	3a6b4a64895ef06923a6a77bfe252729b271a6e1 /src/panfrost/midgard
parent	d18fc89066a6d1ee5a1354636b2ef94162b13507 (diff)