7 files changed, 229 insertions, 188 deletions
diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build
index 371a11a934b..b7e7d0c5808 100644
--- a/src/gallium/drivers/panfrost/meson.build
+++ b/src/gallium/drivers/panfrost/meson.build
@@ -109,6 +109,7 @@ midgard_compiler = executable(
   link_with : [
     libgallium,
     libglsl_standalone,
+    libmesa_gallium, # for st_glsl_storage_type_size
     libmesa_util
   ],
   build_by_default : true
diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h b/src/gallium/drivers/panfrost/midgard/helpers.h
index 530a086e928..54960c7e599 100644
--- a/src/gallium/drivers/panfrost/midgard/helpers.h
+++ b/src/gallium/drivers/panfrost/midgard/helpers.h
@@ -95,10 +95,6 @@
 #define REGISTER_TEXTURE_BASE 28
 #define REGISTER_SELECT 31
 
-/* Special uniforms used for e.g. vertex epilogues */
-#define SPECIAL_UNIFORM_BASE (1 << 24)
-#define UNIFORM_VIEWPORT (SPECIAL_UNIFORM_BASE + 0)
-
 /* SSA helper aliases to mimic the registers. UNUSED_0 encoded as an inline
  * constant. UNUSED_1 encoded as REGISTER_UNUSED */
 
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index ea8c0153c96..c68067d50a5 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -32,6 +32,7 @@
 
 #include "main/mtypes.h"
 #include "compiler/glsl/glsl_to_nir.h"
+#include "mesa/state_tracker/st_glsl_types.h"
 #include "compiler/nir_types.h"
 #include "main/imports.h"
 #include "compiler/nir/nir_builder.h"
@@ -176,6 +177,7 @@ typedef struct midgard_block {
  * driver seems to do it that way */
 
 #define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__));
+#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W)
 
 #define M_LOAD_STORE(name, rname, uname) \
 	static midgard_instruction m_##name(unsigned ssa, unsigned address) { \
@@ -189,7 +191,7 @@ typedef struct midgard_block {
 			.load_store = { \
 				.op = midgard_op_##name, \
 				.mask = 0xF, \
-				.swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W), \
+				.swizzle = SWIZZLE_XYZW, \
 				.address = address \
 			} \
 		}; \
@@ -432,10 +434,6 @@ typedef struct compiler_context {
         int temp_count;
         int max_hash;
 
-        /* Uniform IDs for mdg */
-        struct hash_table_u64 *uniform_nir_to_mdg;
-        int uniform_count;
-
         /* Just the count of the max register used. Higher count => higher
          * register pressure */
         int work_registers;
@@ -447,9 +445,6 @@ typedef struct compiler_context {
         /* Mapping of texture register -> SSA index for unaliasing */
         int texture_index[2];
 
-        /* Count of special uniforms (viewport, etc) in vec4 units */
-        int special_uniforms;
-
         /* If any path hits a discard instruction */
         bool can_discard;
 
@@ -464,6 +459,11 @@ typedef struct compiler_context {
 
         /* The index corresponding to the fragment output */
         unsigned fragment_output;
+
+        /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
+        unsigned sysvals[MAX_SYSVAL_COUNT];
+        unsigned sysval_count;
+        struct hash_table_u64 *sysval_to_id;
 } compiler_context;
 
 /* Append instruction to end of current block */
@@ -645,6 +645,12 @@ glsl_type_size(const struct glsl_type *type)
         return glsl_count_attribute_slots(type, false);
 }
 
+static int
+uniform_type_size(const struct glsl_type *type)
+{
+        return st_glsl_storage_type_size(type, false);
+}
+
 /* Lower fdot2 to a vector multiplication followed by channel addition  */
 static void
 midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu)
@@ -667,6 +673,60 @@ midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu)
         nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum));
 }
 
+static int
+midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr)
+{
+        switch (instr->intrinsic) {
+        case nir_intrinsic_load_viewport_scale:
+                return PAN_SYSVAL_VIEWPORT_SCALE;
+        case nir_intrinsic_load_viewport_offset:
+                return PAN_SYSVAL_VIEWPORT_OFFSET;
+        default:
+                return -1;
+        }
+}
+
+static void
+midgard_nir_assign_sysval_body(compiler_context *ctx, nir_instr *instr)
+{
+        int sysval = -1;
+
+        if (instr->type == nir_instr_type_intrinsic) {
+                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+                sysval = midgard_nir_sysval_for_intrinsic(intr);
+        }
+
+        if (sysval < 0)
+                return;
+
+        /* We have a sysval load; check if it's already been assigned */
+
+        if (_mesa_hash_table_u64_search(ctx->sysval_to_id, sysval))
+                return;
+
+        /* It hasn't -- so assign it now! */
+
+        unsigned id = ctx->sysval_count++;
+        _mesa_hash_table_u64_insert(ctx->sysval_to_id, sysval, (void *) ((uintptr_t) id + 1));
+        ctx->sysvals[id] = sysval;
+}
+
+static void
+midgard_nir_assign_sysvals(compiler_context *ctx, nir_shader *shader)
+{
+        ctx->sysval_count = 0;
+
+        nir_foreach_function(function, shader) {
+                if (!function->impl) continue;
+
+                nir_foreach_block(block, function->impl) {
+                        nir_foreach_instr_safe(instr, block) {
+                                midgard_nir_assign_sysval_body(ctx, instr);
+                        }
+                }
+        }
+}
+
 static bool
 midgard_nir_lower_fdot2(nir_shader *shader)
 {
@@ -715,7 +775,6 @@ optimise_nir(nir_shader *nir)
                 progress = false;
 
                 NIR_PASS(progress, nir, midgard_nir_lower_algebraic);
-                NIR_PASS(progress, nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
                 NIR_PASS(progress, nir, nir_lower_var_copies);
                 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
 
@@ -1207,6 +1266,52 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
 #undef ALU_CASE
 
 static void
+emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset)
+{
+        /* TODO: half-floats */
+
+        if (offset < ctx->uniform_cutoff) {
+                /* Fast path: For the first 16 uniform,
+                 * accesses are 0-cycle, since they're
+                 * just a register fetch in the usual
+                 * case.  So, we alias the registers
+                 * while we're still in SSA-space */
+
+                int reg_slot = 23 - offset;
+                alias_ssa(ctx, dest, SSA_FIXED_REGISTER(reg_slot));
+        } else {
+                /* Otherwise, read from the 'special'
+                 * UBO to access higher-indexed
+                 * uniforms, at a performance cost */
+
+                midgard_instruction ins = m_load_uniform_32(dest, offset);
+
+                /* TODO: Don't split */
+                ins.load_store.varying_parameters = (offset & 7) << 7;
+                ins.load_store.address = offset >> 3;
+
+                ins.load_store.unknown = 0x1E00; /* xxx: what is this? */
+                emit_mir_instruction(ctx, ins);
+        }
+}
+
+static void
+emit_sysval_read(compiler_context *ctx, nir_intrinsic_instr *instr)
+{
+        /* First, pull out the destination */
+        unsigned dest = nir_dest_index(ctx, &instr->dest);
+
+        /* Now, figure out which uniform this is */
+        int sysval = midgard_nir_sysval_for_intrinsic(instr);
+        void *val = _mesa_hash_table_u64_search(ctx->sysval_to_id, sysval);
+
+        /* Sysvals are prefix uniforms */
+        unsigned uniform = ((uintptr_t) val) - 1;
+
+        emit_uniform_read(ctx, dest, uniform);
+}
+
+static void
 emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
 {
         nir_const_value *const_offset;
@@ -1238,52 +1343,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                 reg = nir_dest_index(ctx, &instr->dest);
 
                 if (instr->intrinsic == nir_intrinsic_load_uniform && !ctx->is_blend) {
-                        /* TODO: half-floats */
-
-                        int uniform_offset = 0;
-
-                        if (offset >= SPECIAL_UNIFORM_BASE) {
-                                /* XXX: Resolve which uniform */
-                                uniform_offset = 0;
-                        } else {
-                                /* Offset away from the special
-                                 * uniform block */
-
-                                void *entry = _mesa_hash_table_u64_search(ctx->uniform_nir_to_mdg, offset + 1);
-
-                                /* XXX */
-                                if (!entry) {
-                                        DBG("WARNING: Unknown uniform %d\n", offset);
-                                        break;
-                                }
-
-                                uniform_offset = (uintptr_t) (entry) - 1;
-                                uniform_offset += ctx->special_uniforms;
-                        }
-
-                        if (uniform_offset < ctx->uniform_cutoff) {
-                                /* Fast path: For the first 16 uniform,
-                                 * accesses are 0-cycle, since they're
-                                 * just a register fetch in the usual
-                                 * case.  So, we alias the registers
-                                 * while we're still in SSA-space */
-
-                                int reg_slot = 23 - uniform_offset;
-                                alias_ssa(ctx, reg, SSA_FIXED_REGISTER(reg_slot));
-                        } else {
-                                /* Otherwise, read from the 'special'
-                                 * UBO to access higher-indexed
-                                 * uniforms, at a performance cost */
-
-                                midgard_instruction ins = m_load_uniform_32(reg, uniform_offset);
-
-                                /* TODO: Don't split */
-                                ins.load_store.varying_parameters = (uniform_offset & 7) << 7;
-                                ins.load_store.address = uniform_offset >> 3;
-
-                                ins.load_store.unknown = 0x1E00; /* xxx: what is this? */
-                                emit_mir_instruction(ctx, ins);
-                        }
+                        emit_uniform_read(ctx, reg, ctx->sysval_count + offset);
                 } else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) {
                         /* XXX: Half-floats? */
                         /* TODO: swizzle, mask */
@@ -1490,6 +1550,10 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                 _mesa_hash_table_u64_insert(ctx->ssa_constants, instr->dest.ssa.index + 1, v);
                 break;
 
+        case nir_intrinsic_load_viewport_scale:
+        case nir_intrinsic_load_viewport_offset:
+                emit_sysval_read(ctx, instr);
+                break;
 
         default:
                 printf ("Unhandled intrinsic\n");
@@ -3005,41 +3069,17 @@ actualise_ssa_to_alias(compiler_context *ctx)
  * */
 
 static void
-write_transformed_position(nir_builder *b, nir_src input_point_src, int uniform_no)
+write_transformed_position(nir_builder *b, nir_src input_point_src)
 {
         nir_ssa_def *input_point = nir_ssa_for_src(b, input_point_src, 4);
+        nir_ssa_def *scale = nir_load_viewport_scale(b);
+        nir_ssa_def *offset = nir_load_viewport_offset(b);
 
-        /* Get viewport from the uniforms */
-        nir_intrinsic_instr *load;
-        load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
-        load->num_components = 4;
-        load->src[0] = nir_src_for_ssa(nir_imm_int(b, uniform_no));
-        nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
-        nir_builder_instr_insert(b, &load->instr);
-
-        /* Formatted as <width, height, centerx, centery> */
-        nir_ssa_def *viewport_vec4 = &load->dest.ssa;
-        nir_ssa_def *viewport_width_2 = nir_channel(b, viewport_vec4, 0);
-        nir_ssa_def *viewport_height_2 = nir_channel(b, viewport_vec4, 1);
-        nir_ssa_def *viewport_offset = nir_channels(b, viewport_vec4, 0x8 | 0x4);
-
-        /* XXX: From uniforms? */
-        nir_ssa_def *depth_near = nir_imm_float(b, 0.0);
-        nir_ssa_def *depth_far = nir_imm_float(b, 1.0);
-
-        /* World space to normalised device coordinates */
+        /* World space to normalised device coordinates to screen space */
 
         nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, input_point, 3));
         nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, input_point, 0x7), w_recip);
-
-        /* Normalised device coordinates to screen space */
-
-        nir_ssa_def *viewport_multiplier = nir_vec2(b, viewport_width_2, viewport_height_2);
-        nir_ssa_def *viewport_xy = nir_fadd(b, nir_fmul(b, nir_channels(b, ndc_point, 0x3), viewport_multiplier), viewport_offset);
-
-        nir_ssa_def *depth_multiplier = nir_fmul(b, nir_fsub(b, depth_far, depth_near), nir_imm_float(b, 0.5f));
-        nir_ssa_def *depth_offset     = nir_fmul(b, nir_fadd(b, depth_far, depth_near), nir_imm_float(b, 0.5f));
-        nir_ssa_def *screen_depth     = nir_fadd(b, nir_fmul(b, nir_channel(b, ndc_point, 2), depth_multiplier), depth_offset);
+        nir_ssa_def *screen = nir_fadd(b, nir_fmul(b, ndc_point, scale), offset);
 
         /* gl_Position will be written out in screenspace xyz, with w set to
          * the reciprocal we computed earlier. The transformed w component is
@@ -3048,9 +3088,9 @@ write_transformed_position(nir_builder *b, nir_src input_point_src, int uniform_
          * used in depth clipping computations */
 
         nir_ssa_def *screen_space = nir_vec4(b,
-                                             nir_channel(b, viewport_xy, 0),
-                                             nir_channel(b, viewport_xy, 1),
-                                             screen_depth,
+                                             nir_channel(b, screen, 0),
+                                             nir_channel(b, screen, 1),
+                                             nir_channel(b, screen, 2),
                                              w_recip);
 
         /* Finally, write out the transformed values to the varying */
@@ -3107,7 +3147,7 @@ transform_position_writes(nir_shader *shader)
                                 nir_builder_init(&b, func->impl);
                                 b.cursor = nir_before_instr(instr);
 
-                                write_transformed_position(&b, intr->src[0], UNIFORM_VIEWPORT);
+                                write_transformed_position(&b, intr->src[0]);
                                 nir_instr_remove(instr);
                         }
                 }
@@ -3457,28 +3497,11 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
         /* TODO: Decide this at runtime */
         ctx->uniform_cutoff = 8;
 
-        switch (ctx->stage) {
-        case MESA_SHADER_VERTEX:
-                ctx->special_uniforms = 1;
-                break;
-
-        default:
-                ctx->special_uniforms = 0;
-                break;
-        }
-
-        /* Append epilogue uniforms if necessary. The cmdstream depends on
-         * these being at the -end-; see assign_var_locations. */
-
-        if (ctx->stage == MESA_SHADER_VERTEX) {
-                nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "viewport");
-        }
-
         /* Assign var locations early, so the epilogue can use them if necessary */
 
         nir_assign_var_locations(&nir->outputs, &nir->num_outputs, glsl_type_size);
         nir_assign_var_locations(&nir->inputs, &nir->num_inputs, glsl_type_size);
-        nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, glsl_type_size);
+        nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, uniform_type_size);
 
         /* Initialize at a global (not block) level hash tables */
 
@@ -3487,31 +3510,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
         ctx->ssa_to_alias = _mesa_hash_table_u64_create(NULL);
         ctx->ssa_to_register = _mesa_hash_table_u64_create(NULL);
         ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
+        ctx->sysval_to_id = _mesa_hash_table_u64_create(NULL);
         ctx->leftover_ssa_to_alias = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
 
-        /* Assign actual uniform location, skipping over samplers */
-
-        ctx->uniform_nir_to_mdg = _mesa_hash_table_u64_create(NULL);
-
-        nir_foreach_variable(var, &nir->uniforms) {
-                if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue;
-
-                unsigned length = glsl_get_aoa_size(var->type);
-
-                if (!length) {
-                        length = glsl_get_length(var->type);
-                }
-
-                if (!length) {
-                        length = glsl_get_matrix_columns(var->type);
-                }
-
-                for (int col = 0; col < length; ++col) {
-                        int id = ctx->uniform_count++;
-                        _mesa_hash_table_u64_insert(ctx->uniform_nir_to_mdg, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1)));
-                }
-        }
-
         /* Record the varying mapping for the command stream's bookkeeping */
 
         struct exec_list *varyings =
@@ -3531,7 +3532,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
         NIR_PASS_V(nir, nir_lower_global_vars_to_local);
         NIR_PASS_V(nir, nir_lower_var_copies);
         NIR_PASS_V(nir, nir_lower_vars_to_ssa);
-        NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
+
+        NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, uniform_type_size, 0);
+        NIR_PASS_V(nir, nir_lower_io, nir_var_all & ~nir_var_uniform, glsl_type_size, 0);
 
         /* Append vertex epilogue before optimisation, so the epilogue itself
          * is optimised */
@@ -3547,13 +3550,18 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
 	        nir_print_shader(nir, stdout);
 	}
 
-        /* Assign counts, now that we're sure (post-optimisation) */
+        /* Assign sysvals and counts, now that we're sure
+         * (post-optimisation) */
+
+        midgard_nir_assign_sysvals(ctx, nir);
+
         program->uniform_count = nir->num_uniforms;
+        program->sysval_count = ctx->sysval_count;
+        memcpy(program->sysvals, ctx->sysvals, sizeof(ctx->sysvals[0]) * ctx->sysval_count);
 
         program->attribute_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_inputs : 0;
         program->varying_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_outputs : ((ctx->stage == MESA_SHADER_FRAGMENT) ? nir->num_inputs : 0);
 
-
         nir_foreach_function(func, nir) {
                 if (!func->impl)
                         continue;
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.h b/src/gallium/drivers/panfrost/midgard/midgard_compile.h
index a4dfb25648d..ffc1b1f70e6 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -21,12 +21,29 @@
  * SOFTWARE.
  */
 
+#ifndef __MIDGARD_H_
+#define __MIDGARD_H_
 
 #include "compiler/nir/nir.h"
 #include "util/u_dynarray.h"
 
 /* Define the general compiler entry point */
 
+#define MAX_SYSVAL_COUNT 32
+
+/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
+ * their class for equal comparison */
+
+#define PAN_SYSVAL(type, no) ((no << 16) | PAN_SYSVAL_##type)
+
+/* Define some common types. We start at one for easy indexing of hash
+ * tables internal to the compiler */
+
+enum {
+        PAN_SYSVAL_VIEWPORT_SCALE = 1,
+        PAN_SYSVAL_VIEWPORT_OFFSET = 2,
+} pan_sysval;
+
 typedef struct {
         int work_register_count;
         int uniform_count;
@@ -35,6 +52,12 @@ typedef struct {
         int attribute_count;
         int varying_count;
 
+        /* Prepended before uniforms, mapping to SYSVAL_ names for the
+         * sysval */
+
+        unsigned sysval_count;
+        unsigned sysvals[MAX_SYSVAL_COUNT];
+
         unsigned varyings[32];
 
         /* Boolean properties of the program */
@@ -81,3 +104,5 @@ static const nir_shader_compiler_options midgard_nir_options = {
 
         .native_integers = true
 };
+
+#endif
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c
index c9abe9e6687..52226311832 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -81,6 +81,11 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m
 
         util_dynarray_fini(&program.compiled);
 
+        /* Sysvals are prepended */
+        program.uniform_count += program.sysval_count;
+        state->sysval_count = program.sysval_count;
+        memcpy(state->sysval, program.sysvals, sizeof(state->sysval[0]) * state->sysval_count);
+
         meta->midgard1.uniform_count = MIN2(program.uniform_count, program.uniform_cutoff);
         meta->attribute_count = program.attribute_count;
         meta->varying_count = program.varying_count;
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index bafe67e138f..9f401b1a7a1 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1048,7 +1048,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                 }
         }
 
-        /* Generate the viewport vector of the form: <width/2, height/2, centerx, centery> */
         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 
         /* For flipped-Y buffers (signaled by negative scale), the translate is
@@ -1060,71 +1059,73 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
         if (invert_y)
                 translate_y = ctx->pipe_framebuffer.height - translate_y;
 
-        float viewport_vec4[] = {
-                vp->scale[0],
-                fabsf(vp->scale[1]),
-
-                vp->translate[0],
-                translate_y
-        };
-
-        for (int i = 0; i < PIPE_SHADER_TYPES; ++i) {
+        for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i) {
                 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i];
 
-                if (i == PIPE_SHADER_VERTEX || i == PIPE_SHADER_FRAGMENT) {
-                        /* It doesn't matter if we don't use all the memory;
-                         * we'd need a dummy UBO anyway. Compute the max */
-
-                        size_t size = sizeof(viewport_vec4) + buf->size;
-                        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
+                struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
+                struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant];
+                struct panfrost_shader_state *ss = (i == PIPE_SHADER_FRAGMENT) ? fs : vs;
 
-                        /* Keep track how much we've uploaded */
-                        off_t offset = 0;
+                /* Allocate room for the sysval and the uniforms */
+                size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
+                size_t size = sys_size + buf->size;
+                struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
 
-                        if (i == PIPE_SHADER_VERTEX) {
-                                /* Upload viewport */
-                                memcpy(transfer.cpu + offset, viewport_vec4, sizeof(viewport_vec4));
-                                offset += sizeof(viewport_vec4);
+                /* Upload sysvals requested by the shader */
+                float *uniforms = (float *) transfer.cpu;
+                for (unsigned i = 0; i < ss->sysval_count; ++i) {
+                        int sysval = ss->sysval[i];
+
+                        if (sysval == PAN_SYSVAL_VIEWPORT_SCALE) {
+                                uniforms[4*i + 0] = vp->scale[0];
+                                uniforms[4*i + 1] = fabsf(vp->scale[1]);
+                                uniforms[4*i + 2] = vp->scale[2];
+                        } else if (sysval == PAN_SYSVAL_VIEWPORT_OFFSET) {
+                                uniforms[4*i + 0] = vp->translate[0];
+                                uniforms[4*i + 1] = translate_y;
+                                uniforms[4*i + 2] = vp->translate[2];
+                        } else {
+                                assert(0);
                         }
+                }
 
-                        /* Upload uniforms */
-                        memcpy(transfer.cpu + offset, buf->buffer, buf->size);
+                /* Upload uniforms */
+                memcpy(transfer.cpu + sys_size, buf->buffer, buf->size);
 
-                        int uniform_count = 0;
+                int uniform_count = 0;
 
-                        struct mali_vertex_tiler_postfix *postfix;
+                struct mali_vertex_tiler_postfix *postfix;
 
-                        switch (i) {
-                        case PIPE_SHADER_VERTEX:
-                                uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count;
-                                postfix = &ctx->payload_vertex.postfix;
-                                break;
+                switch (i) {
+                case PIPE_SHADER_VERTEX:
+                        uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count;
+                        postfix = &ctx->payload_vertex.postfix;
+                        break;
 
-                        case PIPE_SHADER_FRAGMENT:
-                                uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count;
-                                postfix = &ctx->payload_tiler.postfix;
-                                break;
+                case PIPE_SHADER_FRAGMENT:
+                        uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count;
+                        postfix = &ctx->payload_tiler.postfix;
+                        break;
 
-                        default:
-                                DBG("Unknown shader stage %d in uniform upload\n", i);
-                                assert(0);
-                        }
+                default:
+                        DBG("Unknown shader stage %d in uniform upload\n", i);
+                        assert(0);
+                }
 
-                        /* Also attach the same buffer as a UBO for extended access */
+                /* Also attach the same buffer as a UBO for extended access */
 
-                        struct mali_uniform_buffer_meta uniform_buffers[] = {
-                                {
-                                        .size = MALI_POSITIVE((2 + uniform_count)),
-                                        .ptr = transfer.gpu >> 2,
-                                },
-                        };
+                struct mali_uniform_buffer_meta uniform_buffers[] = {
+                        {
+                                .size = MALI_POSITIVE((2 + uniform_count)),
+                                .ptr = transfer.gpu >> 2,
+                        },
+                };
 
-                        mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers));
-                        postfix->uniforms = transfer.gpu;
-                        postfix->uniform_buffers = ubufs;
+                mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers));
+                postfix->uniforms = transfer.gpu;
+                postfix->uniform_buffers = ubufs;
 
-                        buf->dirty = 0;
-                }
+                buf->dirty = 0;
         }
 
         /* TODO: Upload the viewport somewhere more appropriate */
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index 7947169f83b..d071da1c62f 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -42,6 +42,8 @@
 #include "util/u_blitter.h"
 #include "util/hash_table.h"
 
+#include "midgard/midgard_compile.h"
+
 /* Forward declare to avoid extra header dep */
 struct prim_convert_context;
 
@@ -264,6 +266,9 @@ struct panfrost_shader_state {
         unsigned general_varying_stride;
         struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS];
 
+        unsigned sysval_count;
+        unsigned sysval[MAX_SYSVAL_COUNT];
+
         /* Information on this particular shader variant */
         struct pipe_alpha_state alpha_state;
 };