1 files changed, 54 insertions, 49 deletions
diff --git a/src/compiler/nir/nir_lower_uniforms_to_ubo.c b/src/compiler/nir/nir_lower_uniforms_to_ubo.c
index 65107c01046..0b37b705ef8 100644
--- a/src/compiler/nir/nir_lower_uniforms_to_ubo.c
+++ b/src/compiler/nir/nir_lower_uniforms_to_ubo.c
@@ -22,27 +22,24 @@
  */
 
 /*
- * Remap load_uniform intrinsics to UBO accesses of UBO binding point 0.
- * Simultaneously, remap existing UBO accesses by increasing their binding
- * point by 1.
+ * Remap load_uniform intrinsics to nir_load_ubo or nir_load_ubo_vec4 accesses
+ * of UBO binding point 0. Simultaneously, remap existing UBO accesses by
+ * increasing their binding point by 1.
  *
- * Note that nir_intrinsic_load_uniform base/ranges can be set in different
- * units, and the multiplier argument caters to supporting these different
- * units.
+ * For PIPE_CAP_PACKED_UNIFORMS, dword_packed should be set to indicate that
+ * nir_intrinsic_load_uniform is in increments of dwords instead of vec4s.
  *
- * For example:
- * - st_glsl_to_nir for PIPE_CAP_PACKED_UNIFORMS uses dwords (4 bytes) so the
- *   multiplier should be 4
- * - st_glsl_to_nir for !PIPE_CAP_PACKED_UNIFORMS uses vec4s so the
- *   multiplier should be 16
- * - tgsi_to_nir uses vec4s, so the multiplier should be 16
+ * If load_vec4 is set, then nir_intrinsic_load_ubo_vec4 will be generated
+ * instead of nir_intrinsic_load_ubo, saving addressing math for hardawre
+ * needing aligned vec4 loads in increments of vec4s (such as TGSI CONST file
+ * loads).
  */
 
 #include "nir.h"
 #include "nir_builder.h"
 
 static bool
-lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier)
+lower_instr(nir_intrinsic_instr *instr, nir_builder *b, bool dword_packed, bool load_vec4)
 {
    b->cursor = nir_before_instr(&instr->instr);
 
@@ -58,43 +55,51 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier)
 
    if (instr->intrinsic == nir_intrinsic_load_uniform) {
       nir_ssa_def *ubo_idx = nir_imm_int(b, 0);
-      nir_ssa_def *ubo_offset =
-         nir_iadd(b, nir_imm_int(b, multiplier * nir_intrinsic_base(instr)),
-                  nir_imul(b, nir_imm_int(b, multiplier),
-                           nir_ssa_for_src(b, instr->src[0], 1)));
-
-      nir_intrinsic_instr *load =
-         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
-      load->num_components = instr->num_components;
-      load->src[0] = nir_src_for_ssa(ubo_idx);
-      load->src[1] = nir_src_for_ssa(ubo_offset);
-      assert(instr->dest.ssa.bit_size >= 8);
+      nir_ssa_def *uniform_offset = nir_ssa_for_src(b, instr->src[0], 1);
 
-      /* If it's const, set the alignment to our known constant offset.  If
-       * not, set it to a pessimistic value based on the multiplier (or the
-       * scalar size, for qword loads).
-       *
-       * We could potentially set up stricter alignments for indirects by
-       * knowing what features are enabled in the APIs (see comment in
-       * nir_lower_ubo_vec4.c)
-       */
-      if (nir_src_is_const(instr->src[0])) {
-         nir_intrinsic_set_align(load, NIR_ALIGN_MUL_MAX,
-                                 (nir_src_as_uint(instr->src[0]) +
-                                  nir_intrinsic_base(instr) * multiplier) %
-                                 NIR_ALIGN_MUL_MAX);
+      assert(instr->dest.ssa.bit_size >= 8);
+      nir_ssa_def *load_result;
+      if (load_vec4) {
+         /* No asking us to generate load_vec4 when you've packed your uniforms
+          * as dwords instead of vec4s.
+          */
+         assert(!dword_packed);
+         load_result = nir_load_ubo_vec4(b, instr->num_components, instr->dest.ssa.bit_size,
+                                         ubo_idx,
+                                         nir_iadd_imm(b, uniform_offset, nir_intrinsic_base(instr)));
       } else {
-         nir_intrinsic_set_align(load, MAX2(multiplier,
-                                            instr->dest.ssa.bit_size / 8), 0);
-      }
-      nir_ssa_dest_init(&load->instr, &load->dest,
-                        load->num_components, instr->dest.ssa.bit_size,
-                        instr->dest.ssa.name);
-      nir_builder_instr_insert(b, &load->instr);
-      nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
+         /* For PIPE_CAP_PACKED_UNIFORMS, the uniforms are packed with the
+          * base/offset in dword units instead of vec4 units.
+          */
+         int multiplier = dword_packed ? 4 : 16;
+         load_result = nir_load_ubo(b, instr->num_components, instr->dest.ssa.bit_size,
+                             ubo_idx,
+                             nir_iadd_imm(b, nir_imul_imm(b, uniform_offset, multiplier),
+                                          nir_intrinsic_base(instr) * multiplier));
+         nir_intrinsic_instr *load = nir_instr_as_intrinsic(load_result->parent_instr);
+
+         /* If it's const, set the alignment to our known constant offset.  If
+          * not, set it to a pessimistic value based on the multiplier (or the
+          * scalar size, for qword loads).
+          *
+          * We could potentially set up stricter alignments for indirects by
+          * knowing what features are enabled in the APIs (see comment in
+          * nir_lower_ubo_vec4.c)
+          */
+         if (nir_src_is_const(instr->src[0])) {
+            nir_intrinsic_set_align(load, NIR_ALIGN_MUL_MAX,
+                                    (nir_src_as_uint(instr->src[0]) +
+                                    nir_intrinsic_base(instr) * multiplier) %
+                                    NIR_ALIGN_MUL_MAX);
+         } else {
+            nir_intrinsic_set_align(load, MAX2(multiplier,
+                                             instr->dest.ssa.bit_size / 8), 0);
+         }
 
-      nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier);
-      nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier);
+         nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier);
+         nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier);
+      }
+      nir_ssa_def_rewrite_uses(&instr->dest.ssa, load_result);
 
       nir_instr_remove(&instr->instr);
       return true;
@@ -104,7 +109,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier)
 }
 
 bool
-nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier)
+nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4)
 {
    bool progress = false;
 
@@ -117,7 +122,7 @@ nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier)
                if (instr->type == nir_instr_type_intrinsic)
                   progress |= lower_instr(nir_instr_as_intrinsic(instr),
                                           &builder,
-                                          multiplier);
+                                          dword_packed, load_vec4);
             }
          }