summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/panfrost/midgard/midgard_compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/panfrost/midgard/midgard_compile.c')
-rw-r--r--src/gallium/drivers/panfrost/midgard/midgard_compile.c272
1 files changed, 140 insertions, 132 deletions
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index ea8c0153c96..c68067d50a5 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -32,6 +32,7 @@
#include "main/mtypes.h"
#include "compiler/glsl/glsl_to_nir.h"
+#include "mesa/state_tracker/st_glsl_types.h"
#include "compiler/nir_types.h"
#include "main/imports.h"
#include "compiler/nir/nir_builder.h"
@@ -176,6 +177,7 @@ typedef struct midgard_block {
* driver seems to do it that way */
#define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__));
+#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W)
#define M_LOAD_STORE(name, rname, uname) \
static midgard_instruction m_##name(unsigned ssa, unsigned address) { \
@@ -189,7 +191,7 @@ typedef struct midgard_block {
.load_store = { \
.op = midgard_op_##name, \
.mask = 0xF, \
- .swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W), \
+ .swizzle = SWIZZLE_XYZW, \
.address = address \
} \
}; \
@@ -432,10 +434,6 @@ typedef struct compiler_context {
int temp_count;
int max_hash;
- /* Uniform IDs for mdg */
- struct hash_table_u64 *uniform_nir_to_mdg;
- int uniform_count;
-
/* Just the count of the max register used. Higher count => higher
* register pressure */
int work_registers;
@@ -447,9 +445,6 @@ typedef struct compiler_context {
/* Mapping of texture register -> SSA index for unaliasing */
int texture_index[2];
- /* Count of special uniforms (viewport, etc) in vec4 units */
- int special_uniforms;
-
/* If any path hits a discard instruction */
bool can_discard;
@@ -464,6 +459,11 @@ typedef struct compiler_context {
/* The index corresponding to the fragment output */
unsigned fragment_output;
+
+ /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
+ unsigned sysvals[MAX_SYSVAL_COUNT];
+ unsigned sysval_count;
+ struct hash_table_u64 *sysval_to_id;
} compiler_context;
/* Append instruction to end of current block */
@@ -645,6 +645,12 @@ glsl_type_size(const struct glsl_type *type)
return glsl_count_attribute_slots(type, false);
}
+static int
+uniform_type_size(const struct glsl_type *type)
+{
+ return st_glsl_storage_type_size(type, false);
+}
+
/* Lower fdot2 to a vector multiplication followed by channel addition */
static void
midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu)
@@ -667,6 +673,60 @@ midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu)
nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum));
}
+static int
+midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_viewport_scale:
+ return PAN_SYSVAL_VIEWPORT_SCALE;
+ case nir_intrinsic_load_viewport_offset:
+ return PAN_SYSVAL_VIEWPORT_OFFSET;
+ default:
+ return -1;
+ }
+}
+
+static void
+midgard_nir_assign_sysval_body(compiler_context *ctx, nir_instr *instr)
+{
+ int sysval = -1;
+
+ if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ sysval = midgard_nir_sysval_for_intrinsic(intr);
+ }
+
+ if (sysval < 0)
+ return;
+
+ /* We have a sysval load; check if it's already been assigned */
+
+ if (_mesa_hash_table_u64_search(ctx->sysval_to_id, sysval))
+ return;
+
+ /* It hasn't -- so assign it now! */
+
+ unsigned id = ctx->sysval_count++;
+ _mesa_hash_table_u64_insert(ctx->sysval_to_id, sysval, (void *) ((uintptr_t) id + 1));
+ ctx->sysvals[id] = sysval;
+}
+
+static void
+midgard_nir_assign_sysvals(compiler_context *ctx, nir_shader *shader)
+{
+ ctx->sysval_count = 0;
+
+ nir_foreach_function(function, shader) {
+ if (!function->impl) continue;
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ midgard_nir_assign_sysval_body(ctx, instr);
+ }
+ }
+ }
+}
+
static bool
midgard_nir_lower_fdot2(nir_shader *shader)
{
@@ -715,7 +775,6 @@ optimise_nir(nir_shader *nir)
progress = false;
NIR_PASS(progress, nir, midgard_nir_lower_algebraic);
- NIR_PASS(progress, nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
NIR_PASS(progress, nir, nir_lower_var_copies);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
@@ -1207,6 +1266,52 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
#undef ALU_CASE
static void
+emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset)
+{
+ /* TODO: half-floats */
+
+ if (offset < ctx->uniform_cutoff) {
+ /* Fast path: For the first 16 uniform,
+ * accesses are 0-cycle, since they're
+ * just a register fetch in the usual
+ * case. So, we alias the registers
+ * while we're still in SSA-space */
+
+ int reg_slot = 23 - offset;
+ alias_ssa(ctx, dest, SSA_FIXED_REGISTER(reg_slot));
+ } else {
+ /* Otherwise, read from the 'special'
+ * UBO to access higher-indexed
+ * uniforms, at a performance cost */
+
+ midgard_instruction ins = m_load_uniform_32(dest, offset);
+
+ /* TODO: Don't split */
+ ins.load_store.varying_parameters = (offset & 7) << 7;
+ ins.load_store.address = offset >> 3;
+
+ ins.load_store.unknown = 0x1E00; /* xxx: what is this? */
+ emit_mir_instruction(ctx, ins);
+ }
+}
+
+static void
+emit_sysval_read(compiler_context *ctx, nir_intrinsic_instr *instr)
+{
+ /* First, pull out the destination */
+ unsigned dest = nir_dest_index(ctx, &instr->dest);
+
+ /* Now, figure out which uniform this is */
+ int sysval = midgard_nir_sysval_for_intrinsic(instr);
+ void *val = _mesa_hash_table_u64_search(ctx->sysval_to_id, sysval);
+
+ /* Sysvals are prefix uniforms */
+ unsigned uniform = ((uintptr_t) val) - 1;
+
+ emit_uniform_read(ctx, dest, uniform);
+}
+
+static void
emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
{
nir_const_value *const_offset;
@@ -1238,52 +1343,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
reg = nir_dest_index(ctx, &instr->dest);
if (instr->intrinsic == nir_intrinsic_load_uniform && !ctx->is_blend) {
- /* TODO: half-floats */
-
- int uniform_offset = 0;
-
- if (offset >= SPECIAL_UNIFORM_BASE) {
- /* XXX: Resolve which uniform */
- uniform_offset = 0;
- } else {
- /* Offset away from the special
- * uniform block */
-
- void *entry = _mesa_hash_table_u64_search(ctx->uniform_nir_to_mdg, offset + 1);
-
- /* XXX */
- if (!entry) {
- DBG("WARNING: Unknown uniform %d\n", offset);
- break;
- }
-
- uniform_offset = (uintptr_t) (entry) - 1;
- uniform_offset += ctx->special_uniforms;
- }
-
- if (uniform_offset < ctx->uniform_cutoff) {
- /* Fast path: For the first 16 uniform,
- * accesses are 0-cycle, since they're
- * just a register fetch in the usual
- * case. So, we alias the registers
- * while we're still in SSA-space */
-
- int reg_slot = 23 - uniform_offset;
- alias_ssa(ctx, reg, SSA_FIXED_REGISTER(reg_slot));
- } else {
- /* Otherwise, read from the 'special'
- * UBO to access higher-indexed
- * uniforms, at a performance cost */
-
- midgard_instruction ins = m_load_uniform_32(reg, uniform_offset);
-
- /* TODO: Don't split */
- ins.load_store.varying_parameters = (uniform_offset & 7) << 7;
- ins.load_store.address = uniform_offset >> 3;
-
- ins.load_store.unknown = 0x1E00; /* xxx: what is this? */
- emit_mir_instruction(ctx, ins);
- }
+ emit_uniform_read(ctx, reg, ctx->sysval_count + offset);
} else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) {
/* XXX: Half-floats? */
/* TODO: swizzle, mask */
@@ -1490,6 +1550,10 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
_mesa_hash_table_u64_insert(ctx->ssa_constants, instr->dest.ssa.index + 1, v);
break;
+ case nir_intrinsic_load_viewport_scale:
+ case nir_intrinsic_load_viewport_offset:
+ emit_sysval_read(ctx, instr);
+ break;
default:
printf ("Unhandled intrinsic\n");
@@ -3005,41 +3069,17 @@ actualise_ssa_to_alias(compiler_context *ctx)
* */
static void
-write_transformed_position(nir_builder *b, nir_src input_point_src, int uniform_no)
+write_transformed_position(nir_builder *b, nir_src input_point_src)
{
nir_ssa_def *input_point = nir_ssa_for_src(b, input_point_src, 4);
+ nir_ssa_def *scale = nir_load_viewport_scale(b);
+ nir_ssa_def *offset = nir_load_viewport_offset(b);
- /* Get viewport from the uniforms */
- nir_intrinsic_instr *load;
- load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
- load->num_components = 4;
- load->src[0] = nir_src_for_ssa(nir_imm_int(b, uniform_no));
- nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
- nir_builder_instr_insert(b, &load->instr);
-
- /* Formatted as <width, height, centerx, centery> */
- nir_ssa_def *viewport_vec4 = &load->dest.ssa;
- nir_ssa_def *viewport_width_2 = nir_channel(b, viewport_vec4, 0);
- nir_ssa_def *viewport_height_2 = nir_channel(b, viewport_vec4, 1);
- nir_ssa_def *viewport_offset = nir_channels(b, viewport_vec4, 0x8 | 0x4);
-
- /* XXX: From uniforms? */
- nir_ssa_def *depth_near = nir_imm_float(b, 0.0);
- nir_ssa_def *depth_far = nir_imm_float(b, 1.0);
-
- /* World space to normalised device coordinates */
+ /* World space to normalised device coordinates to screen space */
nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, input_point, 3));
nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, input_point, 0x7), w_recip);
-
- /* Normalised device coordinates to screen space */
-
- nir_ssa_def *viewport_multiplier = nir_vec2(b, viewport_width_2, viewport_height_2);
- nir_ssa_def *viewport_xy = nir_fadd(b, nir_fmul(b, nir_channels(b, ndc_point, 0x3), viewport_multiplier), viewport_offset);
-
- nir_ssa_def *depth_multiplier = nir_fmul(b, nir_fsub(b, depth_far, depth_near), nir_imm_float(b, 0.5f));
- nir_ssa_def *depth_offset = nir_fmul(b, nir_fadd(b, depth_far, depth_near), nir_imm_float(b, 0.5f));
- nir_ssa_def *screen_depth = nir_fadd(b, nir_fmul(b, nir_channel(b, ndc_point, 2), depth_multiplier), depth_offset);
+ nir_ssa_def *screen = nir_fadd(b, nir_fmul(b, ndc_point, scale), offset);
/* gl_Position will be written out in screenspace xyz, with w set to
* the reciprocal we computed earlier. The transformed w component is
@@ -3048,9 +3088,9 @@ write_transformed_position(nir_builder *b, nir_src input_point_src, int uniform_
* used in depth clipping computations */
nir_ssa_def *screen_space = nir_vec4(b,
- nir_channel(b, viewport_xy, 0),
- nir_channel(b, viewport_xy, 1),
- screen_depth,
+ nir_channel(b, screen, 0),
+ nir_channel(b, screen, 1),
+ nir_channel(b, screen, 2),
w_recip);
/* Finally, write out the transformed values to the varying */
@@ -3107,7 +3147,7 @@ transform_position_writes(nir_shader *shader)
nir_builder_init(&b, func->impl);
b.cursor = nir_before_instr(instr);
- write_transformed_position(&b, intr->src[0], UNIFORM_VIEWPORT);
+ write_transformed_position(&b, intr->src[0]);
nir_instr_remove(instr);
}
}
@@ -3457,28 +3497,11 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
/* TODO: Decide this at runtime */
ctx->uniform_cutoff = 8;
- switch (ctx->stage) {
- case MESA_SHADER_VERTEX:
- ctx->special_uniforms = 1;
- break;
-
- default:
- ctx->special_uniforms = 0;
- break;
- }
-
- /* Append epilogue uniforms if necessary. The cmdstream depends on
- * these being at the -end-; see assign_var_locations. */
-
- if (ctx->stage == MESA_SHADER_VERTEX) {
- nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "viewport");
- }
-
/* Assign var locations early, so the epilogue can use them if necessary */
nir_assign_var_locations(&nir->outputs, &nir->num_outputs, glsl_type_size);
nir_assign_var_locations(&nir->inputs, &nir->num_inputs, glsl_type_size);
- nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, glsl_type_size);
+ nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, uniform_type_size);
/* Initialize at a global (not block) level hash tables */
@@ -3487,31 +3510,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
ctx->ssa_to_alias = _mesa_hash_table_u64_create(NULL);
ctx->ssa_to_register = _mesa_hash_table_u64_create(NULL);
ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
+ ctx->sysval_to_id = _mesa_hash_table_u64_create(NULL);
ctx->leftover_ssa_to_alias = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
- /* Assign actual uniform location, skipping over samplers */
-
- ctx->uniform_nir_to_mdg = _mesa_hash_table_u64_create(NULL);
-
- nir_foreach_variable(var, &nir->uniforms) {
- if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue;
-
- unsigned length = glsl_get_aoa_size(var->type);
-
- if (!length) {
- length = glsl_get_length(var->type);
- }
-
- if (!length) {
- length = glsl_get_matrix_columns(var->type);
- }
-
- for (int col = 0; col < length; ++col) {
- int id = ctx->uniform_count++;
- _mesa_hash_table_u64_insert(ctx->uniform_nir_to_mdg, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1)));
- }
- }
-
/* Record the varying mapping for the command stream's bookkeeping */
struct exec_list *varyings =
@@ -3531,7 +3532,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
NIR_PASS_V(nir, nir_lower_var_copies);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
- NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
+
+ NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, uniform_type_size, 0);
+ NIR_PASS_V(nir, nir_lower_io, nir_var_all & ~nir_var_uniform, glsl_type_size, 0);
/* Append vertex epilogue before optimisation, so the epilogue itself
* is optimised */
@@ -3547,13 +3550,18 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
nir_print_shader(nir, stdout);
}
- /* Assign counts, now that we're sure (post-optimisation) */
+ /* Assign sysvals and counts, now that we're sure
+ * (post-optimisation) */
+
+ midgard_nir_assign_sysvals(ctx, nir);
+
program->uniform_count = nir->num_uniforms;
+ program->sysval_count = ctx->sysval_count;
+ memcpy(program->sysvals, ctx->sysvals, sizeof(ctx->sysvals[0]) * ctx->sysval_count);
program->attribute_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_inputs : 0;
program->varying_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_outputs : ((ctx->stage == MESA_SHADER_FRAGMENT) ? nir->num_inputs : 0);
-
nir_foreach_function(func, nir) {
if (!func->impl)
continue;