diff options
Diffstat (limited to 'src/gallium/drivers/panfrost/midgard/midgard_compile.c')
-rw-r--r-- | src/gallium/drivers/panfrost/midgard/midgard_compile.c | 272 |
1 files changed, 140 insertions, 132 deletions
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c index ea8c0153c96..c68067d50a5 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c @@ -32,6 +32,7 @@ #include "main/mtypes.h" #include "compiler/glsl/glsl_to_nir.h" +#include "mesa/state_tracker/st_glsl_types.h" #include "compiler/nir_types.h" #include "main/imports.h" #include "compiler/nir/nir_builder.h" @@ -176,6 +177,7 @@ typedef struct midgard_block { * driver seems to do it that way */ #define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__)); +#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W) #define M_LOAD_STORE(name, rname, uname) \ static midgard_instruction m_##name(unsigned ssa, unsigned address) { \ @@ -189,7 +191,7 @@ typedef struct midgard_block { .load_store = { \ .op = midgard_op_##name, \ .mask = 0xF, \ - .swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W), \ + .swizzle = SWIZZLE_XYZW, \ .address = address \ } \ }; \ @@ -432,10 +434,6 @@ typedef struct compiler_context { int temp_count; int max_hash; - /* Uniform IDs for mdg */ - struct hash_table_u64 *uniform_nir_to_mdg; - int uniform_count; - /* Just the count of the max register used. Higher count => higher * register pressure */ int work_registers; @@ -447,9 +445,6 @@ typedef struct compiler_context { /* Mapping of texture register -> SSA index for unaliasing */ int texture_index[2]; - /* Count of special uniforms (viewport, etc) in vec4 units */ - int special_uniforms; - /* If any path hits a discard instruction */ bool can_discard; @@ -464,6 +459,11 @@ typedef struct compiler_context { /* The index corresponding to the fragment output */ unsigned fragment_output; + + /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */ + unsigned sysvals[MAX_SYSVAL_COUNT]; + unsigned sysval_count; + struct hash_table_u64 *sysval_to_id; } compiler_context; /* Append instruction to end of current block */ @@ -645,6 +645,12 @@ glsl_type_size(const struct glsl_type *type) return glsl_count_attribute_slots(type, false); } +static int +uniform_type_size(const struct glsl_type *type) +{ + return st_glsl_storage_type_size(type, false); +} + /* Lower fdot2 to a vector multiplication followed by channel addition */ static void midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu) @@ -667,6 +673,60 @@ midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu) nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum)); } +static int +midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_viewport_scale: + return PAN_SYSVAL_VIEWPORT_SCALE; + case nir_intrinsic_load_viewport_offset: + return PAN_SYSVAL_VIEWPORT_OFFSET; + default: + return -1; + } +} + +static void +midgard_nir_assign_sysval_body(compiler_context *ctx, nir_instr *instr) +{ + int sysval = -1; + + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + sysval = midgard_nir_sysval_for_intrinsic(intr); + } + + if (sysval < 0) + return; + + /* We have a sysval load; check if it's already been assigned */ + + if (_mesa_hash_table_u64_search(ctx->sysval_to_id, sysval)) + return; + + /* It hasn't -- so assign it now! */ + + unsigned id = ctx->sysval_count++; + _mesa_hash_table_u64_insert(ctx->sysval_to_id, sysval, (void *) ((uintptr_t) id + 1)); + ctx->sysvals[id] = sysval; +} + +static void +midgard_nir_assign_sysvals(compiler_context *ctx, nir_shader *shader) +{ + ctx->sysval_count = 0; + + nir_foreach_function(function, shader) { + if (!function->impl) continue; + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + midgard_nir_assign_sysval_body(ctx, instr); + } + } + } +} + static bool midgard_nir_lower_fdot2(nir_shader *shader) { @@ -715,7 +775,6 @@ optimise_nir(nir_shader *nir) progress = false; NIR_PASS(progress, nir, midgard_nir_lower_algebraic); - NIR_PASS(progress, nir, nir_lower_io, nir_var_all, glsl_type_size, 0); NIR_PASS(progress, nir, nir_lower_var_copies); NIR_PASS(progress, nir, nir_lower_vars_to_ssa); @@ -1207,6 +1266,52 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) #undef ALU_CASE static void +emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset) +{ + /* TODO: half-floats */ + + if (offset < ctx->uniform_cutoff) { + /* Fast path: For the first 16 uniform, + * accesses are 0-cycle, since they're + * just a register fetch in the usual + * case. So, we alias the registers + * while we're still in SSA-space */ + + int reg_slot = 23 - offset; + alias_ssa(ctx, dest, SSA_FIXED_REGISTER(reg_slot)); + } else { + /* Otherwise, read from the 'special' + * UBO to access higher-indexed + * uniforms, at a performance cost */ + + midgard_instruction ins = m_load_uniform_32(dest, offset); + + /* TODO: Don't split */ + ins.load_store.varying_parameters = (offset & 7) << 7; + ins.load_store.address = offset >> 3; + + ins.load_store.unknown = 0x1E00; /* xxx: what is this? */ + emit_mir_instruction(ctx, ins); + } +} + +static void +emit_sysval_read(compiler_context *ctx, nir_intrinsic_instr *instr) +{ + /* First, pull out the destination */ + unsigned dest = nir_dest_index(ctx, &instr->dest); + + /* Now, figure out which uniform this is */ + int sysval = midgard_nir_sysval_for_intrinsic(instr); + void *val = _mesa_hash_table_u64_search(ctx->sysval_to_id, sysval); + + /* Sysvals are prefix uniforms */ + unsigned uniform = ((uintptr_t) val) - 1; + + emit_uniform_read(ctx, dest, uniform); +} + +static void emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) { nir_const_value *const_offset; @@ -1238,52 +1343,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) reg = nir_dest_index(ctx, &instr->dest); if (instr->intrinsic == nir_intrinsic_load_uniform && !ctx->is_blend) { - /* TODO: half-floats */ - - int uniform_offset = 0; - - if (offset >= SPECIAL_UNIFORM_BASE) { - /* XXX: Resolve which uniform */ - uniform_offset = 0; - } else { - /* Offset away from the special - * uniform block */ - - void *entry = _mesa_hash_table_u64_search(ctx->uniform_nir_to_mdg, offset + 1); - - /* XXX */ - if (!entry) { - DBG("WARNING: Unknown uniform %d\n", offset); - break; - } - - uniform_offset = (uintptr_t) (entry) - 1; - uniform_offset += ctx->special_uniforms; - } - - if (uniform_offset < ctx->uniform_cutoff) { - /* Fast path: For the first 16 uniform, - * accesses are 0-cycle, since they're - * just a register fetch in the usual - * case. So, we alias the registers - * while we're still in SSA-space */ - - int reg_slot = 23 - uniform_offset; - alias_ssa(ctx, reg, SSA_FIXED_REGISTER(reg_slot)); - } else { - /* Otherwise, read from the 'special' - * UBO to access higher-indexed - * uniforms, at a performance cost */ - - midgard_instruction ins = m_load_uniform_32(reg, uniform_offset); - - /* TODO: Don't split */ - ins.load_store.varying_parameters = (uniform_offset & 7) << 7; - ins.load_store.address = uniform_offset >> 3; - - ins.load_store.unknown = 0x1E00; /* xxx: what is this? */ - emit_mir_instruction(ctx, ins); - } + emit_uniform_read(ctx, reg, ctx->sysval_count + offset); } else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) { /* XXX: Half-floats? */ /* TODO: swizzle, mask */ @@ -1490,6 +1550,10 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) _mesa_hash_table_u64_insert(ctx->ssa_constants, instr->dest.ssa.index + 1, v); break; + case nir_intrinsic_load_viewport_scale: + case nir_intrinsic_load_viewport_offset: + emit_sysval_read(ctx, instr); + break; default: printf ("Unhandled intrinsic\n"); @@ -3005,41 +3069,17 @@ actualise_ssa_to_alias(compiler_context *ctx) * */ static void -write_transformed_position(nir_builder *b, nir_src input_point_src, int uniform_no) +write_transformed_position(nir_builder *b, nir_src input_point_src) { nir_ssa_def *input_point = nir_ssa_for_src(b, input_point_src, 4); + nir_ssa_def *scale = nir_load_viewport_scale(b); + nir_ssa_def *offset = nir_load_viewport_offset(b); - /* Get viewport from the uniforms */ - nir_intrinsic_instr *load; - load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); - load->num_components = 4; - load->src[0] = nir_src_for_ssa(nir_imm_int(b, uniform_no)); - nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); - nir_builder_instr_insert(b, &load->instr); - - /* Formatted as <width, height, centerx, centery> */ - nir_ssa_def *viewport_vec4 = &load->dest.ssa; - nir_ssa_def *viewport_width_2 = nir_channel(b, viewport_vec4, 0); - nir_ssa_def *viewport_height_2 = nir_channel(b, viewport_vec4, 1); - nir_ssa_def *viewport_offset = nir_channels(b, viewport_vec4, 0x8 | 0x4); - - /* XXX: From uniforms? */ - nir_ssa_def *depth_near = nir_imm_float(b, 0.0); - nir_ssa_def *depth_far = nir_imm_float(b, 1.0); - - /* World space to normalised device coordinates */ + /* World space to normalised device coordinates to screen space */ nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, input_point, 3)); nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, input_point, 0x7), w_recip); - - /* Normalised device coordinates to screen space */ - - nir_ssa_def *viewport_multiplier = nir_vec2(b, viewport_width_2, viewport_height_2); - nir_ssa_def *viewport_xy = nir_fadd(b, nir_fmul(b, nir_channels(b, ndc_point, 0x3), viewport_multiplier), viewport_offset); - - nir_ssa_def *depth_multiplier = nir_fmul(b, nir_fsub(b, depth_far, depth_near), nir_imm_float(b, 0.5f)); - nir_ssa_def *depth_offset = nir_fmul(b, nir_fadd(b, depth_far, depth_near), nir_imm_float(b, 0.5f)); - nir_ssa_def *screen_depth = nir_fadd(b, nir_fmul(b, nir_channel(b, ndc_point, 2), depth_multiplier), depth_offset); + nir_ssa_def *screen = nir_fadd(b, nir_fmul(b, ndc_point, scale), offset); /* gl_Position will be written out in screenspace xyz, with w set to * the reciprocal we computed earlier. The transformed w component is @@ -3048,9 +3088,9 @@ write_transformed_position(nir_builder *b, nir_src input_point_src, int uniform_ * used in depth clipping computations */ nir_ssa_def *screen_space = nir_vec4(b, - nir_channel(b, viewport_xy, 0), - nir_channel(b, viewport_xy, 1), - screen_depth, + nir_channel(b, screen, 0), + nir_channel(b, screen, 1), + nir_channel(b, screen, 2), w_recip); /* Finally, write out the transformed values to the varying */ @@ -3107,7 +3147,7 @@ transform_position_writes(nir_shader *shader) nir_builder_init(&b, func->impl); b.cursor = nir_before_instr(instr); - write_transformed_position(&b, intr->src[0], UNIFORM_VIEWPORT); + write_transformed_position(&b, intr->src[0]); nir_instr_remove(instr); } } @@ -3457,28 +3497,11 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl /* TODO: Decide this at runtime */ ctx->uniform_cutoff = 8; - switch (ctx->stage) { - case MESA_SHADER_VERTEX: - ctx->special_uniforms = 1; - break; - - default: - ctx->special_uniforms = 0; - break; - } - - /* Append epilogue uniforms if necessary. The cmdstream depends on - * these being at the -end-; see assign_var_locations. */ - - if (ctx->stage == MESA_SHADER_VERTEX) { - nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "viewport"); - } - /* Assign var locations early, so the epilogue can use them if necessary */ nir_assign_var_locations(&nir->outputs, &nir->num_outputs, glsl_type_size); nir_assign_var_locations(&nir->inputs, &nir->num_inputs, glsl_type_size); - nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, glsl_type_size); + nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, uniform_type_size); /* Initialize at a global (not block) level hash tables */ @@ -3487,31 +3510,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl ctx->ssa_to_alias = _mesa_hash_table_u64_create(NULL); ctx->ssa_to_register = _mesa_hash_table_u64_create(NULL); ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL); + ctx->sysval_to_id = _mesa_hash_table_u64_create(NULL); ctx->leftover_ssa_to_alias = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); - /* Assign actual uniform location, skipping over samplers */ - - ctx->uniform_nir_to_mdg = _mesa_hash_table_u64_create(NULL); - - nir_foreach_variable(var, &nir->uniforms) { - if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue; - - unsigned length = glsl_get_aoa_size(var->type); - - if (!length) { - length = glsl_get_length(var->type); - } - - if (!length) { - length = glsl_get_matrix_columns(var->type); - } - - for (int col = 0; col < length; ++col) { - int id = ctx->uniform_count++; - _mesa_hash_table_u64_insert(ctx->uniform_nir_to_mdg, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1))); - } - } - /* Record the varying mapping for the command stream's bookkeeping */ struct exec_list *varyings = @@ -3531,7 +3532,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl NIR_PASS_V(nir, nir_lower_global_vars_to_local); NIR_PASS_V(nir, nir_lower_var_copies); NIR_PASS_V(nir, nir_lower_vars_to_ssa); - NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0); + + NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, uniform_type_size, 0); + NIR_PASS_V(nir, nir_lower_io, nir_var_all & ~nir_var_uniform, glsl_type_size, 0); /* Append vertex epilogue before optimisation, so the epilogue itself * is optimised */ @@ -3547,13 +3550,18 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl nir_print_shader(nir, stdout); } - /* Assign counts, now that we're sure (post-optimisation) */ + /* Assign sysvals and counts, now that we're sure + * (post-optimisation) */ + + midgard_nir_assign_sysvals(ctx, nir); + program->uniform_count = nir->num_uniforms; + program->sysval_count = ctx->sysval_count; + memcpy(program->sysvals, ctx->sysvals, sizeof(ctx->sysvals[0]) * ctx->sysval_count); program->attribute_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_inputs : 0; program->varying_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_outputs : ((ctx->stage == MESA_SHADER_FRAGMENT) ? nir->num_inputs : 0); - nir_foreach_function(func, nir) { if (!func->impl) continue; |