summaryrefslogtreecommitdiff
path: root/src/panfrost/midgard
diff options
context:
space:
mode:
authorBoris Brezillon <boris.brezillon@collabora.com>2021-02-13 08:24:03 +0100
committerMarge Bot <eric+marge@anholt.net>2021-02-15 11:23:46 +0000
commitd5b1a33460edffa22ddd138bd63ef9e7063303c9 (patch)
tree3a6b4a64895ef06923a6a77bfe252729b271a6e1 /src/panfrost/midgard
parentd18fc89066a6d1ee5a1354636b2ef94162b13507 (diff)
panfrost: Move the shader compilation logic out of the gallium driver
While at it, rework the code to avoid copies between intermediate structures: the pan_shader_info is passed to the compiler context so the compiler can fill shader information directly. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8963>
Diffstat (limited to 'src/panfrost/midgard')
-rw-r--r--src/panfrost/midgard/compiler.h10
-rw-r--r--src/panfrost/midgard/midgard_compile.c45
-rw-r--r--src/panfrost/midgard/midgard_compile.h8
-rw-r--r--src/panfrost/midgard/midgard_ra.c25
-rw-r--r--src/panfrost/midgard/mir_promote_uniforms.c7
5 files changed, 38 insertions, 57 deletions
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h
index fdb8160d203..f804ee0f53c 100644
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -238,6 +238,7 @@ enum midgard_rt_id {
typedef struct compiler_context {
const struct panfrost_compile_inputs *inputs;
nir_shader *nir;
+ struct pan_shader_info *info;
gl_shader_stage stage;
/* Number of samples for a keyed blend shader. Depends on is_blend */
@@ -249,9 +250,6 @@ typedef struct compiler_context {
/* Index to precolour to r2 for a dual-source blend colour */
unsigned blend_src1;
- /* Number of bytes used for Thread Local Storage */
- unsigned tls_size;
-
/* Count of spills and fills for shaderdb */
unsigned spills;
unsigned fills;
@@ -291,10 +289,6 @@ typedef struct compiler_context {
/* Set of NIR indices that were already emitted as outmods */
BITSET_WORD *already_emitted;
- /* Just the count of the max register used. Higher count => higher
- * register pressure */
- int work_registers;
-
/* The number of uniforms allowable for the fast path */
int uniform_cutoff;
@@ -312,9 +306,7 @@ typedef struct compiler_context {
/* Writeout instructions for each render target */
midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER];
- struct panfrost_sysvals sysvals;
struct hash_table_u64 *sysval_to_id;
- struct panfrost_ubo_push *push;
} compiler_context;
/* Per-block live_in/live_out */
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 16617e0b8fc..7f6c18a26fc 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1448,7 +1448,7 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr,
int sysval = panfrost_sysval_for_instr(instr, &nir_dest);
unsigned dest = nir_dest_index(&nir_dest);
unsigned uniform =
- pan_lookup_sysval(ctx->sysval_to_id, &ctx->sysvals, sysval);
+ pan_lookup_sysval(ctx->sysval_to_id, &ctx->info->sysvals, sysval);
/* Emit the read itself -- this is never indirect */
midgard_instruction *ins =
@@ -2978,24 +2978,22 @@ mir_add_writeout_loops(compiler_context *ctx)
}
}
-panfrost_program *
-midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
- const struct panfrost_compile_inputs *inputs)
+void
+midgard_compile_shader_nir(nir_shader *nir,
+ const struct panfrost_compile_inputs *inputs,
+ struct util_dynarray *binary,
+ struct pan_shader_info *info)
{
- panfrost_program *program = rzalloc(mem_ctx, panfrost_program);
-
- struct util_dynarray *compiled = &program->compiled;
-
midgard_debug = debug_get_option_midgard_debug();
/* TODO: Bound against what? */
compiler_context *ctx = rzalloc(NULL, compiler_context);
- ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx);
+ ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals, ctx);
ctx->inputs = inputs;
ctx->nir = nir;
+ ctx->info = info;
ctx->stage = nir->info.stage;
- ctx->push = &program->push;
if (inputs->is_blend) {
unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1);
@@ -3013,7 +3011,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
/* Start off with a safe cutoff, allowing usage of all 16 work
* registers. Later, we'll promote uniform reads to uniform registers
* if we determine it is beneficial to do so */
- ctx->uniform_cutoff = 8;
+ info->midgard.uniform_cutoff = 8;
/* Initialize at a global (not block) level hash tables */
@@ -3059,7 +3057,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
nir_print_shader(nir, stdout);
}
- ctx->tls_size = nir->scratch_size;
+ info->tls_size = nir->scratch_size;
nir_foreach_function(func, nir) {
if (!func->impl)
@@ -3086,8 +3084,6 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
break; /* TODO: Multi-function shaders */
}
- util_dynarray_init(compiled, program);
-
/* Per-block lowering before opts */
mir_foreach_block(ctx, _block) {
@@ -3164,7 +3160,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
lookahead = source_order_bundles[current_bundle + 1]->tag;
- emit_binary_bundle(ctx, block, bundle, compiled, lookahead);
+ emit_binary_bundle(ctx, block, bundle, binary, lookahead);
++current_bundle;
}
@@ -3175,20 +3171,11 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
free(source_order_bundles);
/* Report the very first tag executed */
- program->first_tag = midgard_get_first_tag_from_block(ctx, 0);
-
- /* Deal with off-by-one related to the fencepost problem */
- program->work_register_count = ctx->work_registers + 1;
- program->uniform_cutoff = ctx->uniform_cutoff;
-
- program->tls_size = ctx->tls_size;
-
- program->sysval_count = ctx->sysvals.sysval_count;
- memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
+ info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0);
if ((midgard_debug & MIDGARD_DBG_SHADERS) && !nir->info.internal) {
- disassemble_midgard(stdout, program->compiled.data,
- program->compiled.size, inputs->gpu_id);
+ disassemble_midgard(stdout, binary->data,
+ binary->size, inputs->gpu_id);
}
if ((midgard_debug & MIDGARD_DBG_SHADERDB || inputs->shaderdb) &&
@@ -3209,7 +3196,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
/* Calculate thread count. There are certain cutoffs by
* register count for thread count */
- unsigned nr_registers = program->work_register_count;
+ unsigned nr_registers = info->work_reg_count;
unsigned nr_threads =
(nr_registers <= 4) ? 4 :
@@ -3232,6 +3219,4 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
}
ralloc_free(ctx);
-
- return program;
}
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index 00d43a64e90..f049fbabb6b 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -29,9 +29,11 @@
#include "util/u_dynarray.h"
#include "panfrost/util/pan_ir.h"
-panfrost_program *
-midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
- const struct panfrost_compile_inputs *inputs);
+void
+midgard_compile_shader_nir(nir_shader *nir,
+ const struct panfrost_compile_inputs *inputs,
+ struct util_dynarray *binary,
+ struct pan_shader_info *info);
/* NIR options are shared between the standalone compiler and the online
* compiler. Defining it here is the simplest, though maybe not the Right
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c
index 37cecb1c339..44b3c7dc1c6 100644
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -99,7 +99,7 @@ index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg, unsigned
/* Report that we actually use this register, and return it */
if (r.reg < 16)
- ctx->work_registers = MAX2(ctx->work_registers, r.reg);
+ ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, r.reg + 1);
return r;
}
@@ -395,7 +395,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
* uniforms start and the shader stage. By ABI we limit blend shaders
* to 8 registers, should be lower XXX */
int work_count = ctx->inputs->is_blend ? 8 :
- 16 - MAX2((ctx->uniform_cutoff - 8), 0);
+ 16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
/* No register allocation to do with no SSA */
@@ -646,7 +646,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
if (ctx->blend_src1 != ~0) {
assert(ctx->blend_src1 < ctx->temp_count);
l->solutions[ctx->blend_src1] = (16 * 2);
- ctx->work_registers = MAX2(ctx->work_registers, 2);
+ ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, 3);
}
mir_compute_interference(ctx, l);
@@ -959,13 +959,14 @@ mir_spill_register(
static void
mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
{
- unsigned old_work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
+ unsigned old_work_count =
+ 16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);
unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
unsigned max_demote = SSA_FIXED_REGISTER(work_count);
- ctx->uniform_cutoff = new_cutoff;
+ ctx->info->midgard.uniform_cutoff = new_cutoff;
mir_foreach_block(ctx, _block) {
midgard_block *block = (midgard_block *) _block;
@@ -978,7 +979,7 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
unsigned temp = make_compiler_temp(ctx);
unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4;
- assert(idx < ctx->push->count);
+ assert(idx < ctx->info->push.count);
midgard_instruction ld = {
.type = TAG_LOAD_STORE_4,
@@ -989,10 +990,10 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
.swizzle = SWIZZLE_IDENTITY_4,
.op = midgard_op_ld_ubo_int4,
.load_store = {
- .arg_1 = ctx->push->words[idx].ubo,
+ .arg_1 = ctx->info->push.words[idx].ubo,
.arg_2 = 0x1E,
},
- .constants.u32[0] = ctx->push->words[idx].offset
+ .constants.u32[0] = ctx->info->push.words[idx].offset
};
mir_insert_instruction_before_scheduled(ctx, block, before, ld);
@@ -1013,7 +1014,7 @@ mir_ra(compiler_context *ctx)
int iter_count = 1000; /* max iterations */
/* Number of 128-bit slots in memory we've spilled into */
- unsigned spill_count = DIV_ROUND_UP(ctx->tls_size, 16);
+ unsigned spill_count = DIV_ROUND_UP(ctx->info->tls_size, 16);
mir_create_pipeline_registers(ctx);
@@ -1025,9 +1026,9 @@ mir_ra(compiler_context *ctx)
/* It's a lot cheaper to demote uniforms to get more
* work registers than to spill to TLS. */
if (l->spill_class == REG_CLASS_WORK &&
- ctx->uniform_cutoff > 8) {
+ ctx->info->midgard.uniform_cutoff > 8) {
- mir_demote_uniforms(ctx, MAX2(ctx->uniform_cutoff - 4, 8));
+ mir_demote_uniforms(ctx, MAX2(ctx->info->midgard.uniform_cutoff - 4, 8));
} else if (spill_node == -1) {
fprintf(stderr, "ERROR: Failed to choose spill node\n");
lcra_free(l);
@@ -1056,7 +1057,7 @@ mir_ra(compiler_context *ctx)
/* Report spilling information. spill_count is in 128-bit slots (vec4 x
* fp32), but tls_size is in bytes, so multiply by 16 */
- ctx->tls_size = spill_count * 16;
+ ctx->info->tls_size = spill_count * 16;
install_registers(ctx, l);
diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c
index b5e063e0600..744d88e540e 100644
--- a/src/panfrost/midgard/mir_promote_uniforms.c
+++ b/src/panfrost/midgard/mir_promote_uniforms.c
@@ -263,7 +263,7 @@ midgard_promote_uniforms(compiler_context *ctx)
unsigned work_count = mir_work_heuristic(ctx, &analysis);
unsigned promoted_count = 24 - work_count;
- mir_pick_ubo(ctx->push, &analysis, promoted_count);
+ mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);
/* First, figure out special indices a priori so we don't recompute a lot */
BITSET_WORD *special = mir_special_indices(ctx);
@@ -279,7 +279,7 @@ midgard_promote_uniforms(compiler_context *ctx)
if (!BITSET_TEST(analysis.blocks[ubo].pushed, qword)) continue;
/* Find where we pushed to, TODO: unaligned pushes to pack */
- unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo, qword * 16);
+ unsigned base = pan_lookup_pushed_ubo(&ctx->info->push, ubo, qword * 16);
assert((base & 0x3) == 0);
unsigned address = base / 4;
@@ -288,7 +288,8 @@ midgard_promote_uniforms(compiler_context *ctx)
/* Should've taken into account when pushing */
assert(address < promoted_count);
- ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
+ ctx->info->midgard.uniform_cutoff =
+ MAX2(ctx->info->midgard.uniform_cutoff, address + 1);
unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
/* We do need the move for safety for a non-SSA dest, or if