diff options
Diffstat (limited to 'src/compiler/nir/nir_lower_uniforms_to_ubo.c')
-rw-r--r-- | src/compiler/nir/nir_lower_uniforms_to_ubo.c | 103 |
1 files changed, 54 insertions, 49 deletions
diff --git a/src/compiler/nir/nir_lower_uniforms_to_ubo.c b/src/compiler/nir/nir_lower_uniforms_to_ubo.c index 65107c01046..0b37b705ef8 100644 --- a/src/compiler/nir/nir_lower_uniforms_to_ubo.c +++ b/src/compiler/nir/nir_lower_uniforms_to_ubo.c @@ -22,27 +22,24 @@ */ /* - * Remap load_uniform intrinsics to UBO accesses of UBO binding point 0. - * Simultaneously, remap existing UBO accesses by increasing their binding - * point by 1. + * Remap load_uniform intrinsics to nir_load_ubo or nir_load_ubo_vec4 accesses + * of UBO binding point 0. Simultaneously, remap existing UBO accesses by + * increasing their binding point by 1. * - * Note that nir_intrinsic_load_uniform base/ranges can be set in different - * units, and the multiplier argument caters to supporting these different - * units. + * For PIPE_CAP_PACKED_UNIFORMS, dword_packed should be set to indicate that + * nir_intrinsic_load_uniform is in increments of dwords instead of vec4s. * - * For example: - * - st_glsl_to_nir for PIPE_CAP_PACKED_UNIFORMS uses dwords (4 bytes) so the - * multiplier should be 4 - * - st_glsl_to_nir for !PIPE_CAP_PACKED_UNIFORMS uses vec4s so the - * multiplier should be 16 - * - tgsi_to_nir uses vec4s, so the multiplier should be 16 + * If load_vec4 is set, then nir_intrinsic_load_ubo_vec4 will be generated + * instead of nir_intrinsic_load_ubo, saving addressing math for hardawre + * needing aligned vec4 loads in increments of vec4s (such as TGSI CONST file + * loads). */ #include "nir.h" #include "nir_builder.h" static bool -lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier) +lower_instr(nir_intrinsic_instr *instr, nir_builder *b, bool dword_packed, bool load_vec4) { b->cursor = nir_before_instr(&instr->instr); @@ -58,43 +55,51 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier) if (instr->intrinsic == nir_intrinsic_load_uniform) { nir_ssa_def *ubo_idx = nir_imm_int(b, 0); - nir_ssa_def *ubo_offset = - nir_iadd(b, nir_imm_int(b, multiplier * nir_intrinsic_base(instr)), - nir_imul(b, nir_imm_int(b, multiplier), - nir_ssa_for_src(b, instr->src[0], 1))); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); - load->num_components = instr->num_components; - load->src[0] = nir_src_for_ssa(ubo_idx); - load->src[1] = nir_src_for_ssa(ubo_offset); - assert(instr->dest.ssa.bit_size >= 8); + nir_ssa_def *uniform_offset = nir_ssa_for_src(b, instr->src[0], 1); - /* If it's const, set the alignment to our known constant offset. If - * not, set it to a pessimistic value based on the multiplier (or the - * scalar size, for qword loads). - * - * We could potentially set up stricter alignments for indirects by - * knowing what features are enabled in the APIs (see comment in - * nir_lower_ubo_vec4.c) - */ - if (nir_src_is_const(instr->src[0])) { - nir_intrinsic_set_align(load, NIR_ALIGN_MUL_MAX, - (nir_src_as_uint(instr->src[0]) + - nir_intrinsic_base(instr) * multiplier) % - NIR_ALIGN_MUL_MAX); + assert(instr->dest.ssa.bit_size >= 8); + nir_ssa_def *load_result; + if (load_vec4) { + /* No asking us to generate load_vec4 when you've packed your uniforms + * as dwords instead of vec4s. + */ + assert(!dword_packed); + load_result = nir_load_ubo_vec4(b, instr->num_components, instr->dest.ssa.bit_size, + ubo_idx, + nir_iadd_imm(b, uniform_offset, nir_intrinsic_base(instr))); } else { - nir_intrinsic_set_align(load, MAX2(multiplier, - instr->dest.ssa.bit_size / 8), 0); - } - nir_ssa_dest_init(&load->instr, &load->dest, - load->num_components, instr->dest.ssa.bit_size, - instr->dest.ssa.name); - nir_builder_instr_insert(b, &load->instr); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa); + /* For PIPE_CAP_PACKED_UNIFORMS, the uniforms are packed with the + * base/offset in dword units instead of vec4 units. + */ + int multiplier = dword_packed ? 4 : 16; + load_result = nir_load_ubo(b, instr->num_components, instr->dest.ssa.bit_size, + ubo_idx, + nir_iadd_imm(b, nir_imul_imm(b, uniform_offset, multiplier), + nir_intrinsic_base(instr) * multiplier)); + nir_intrinsic_instr *load = nir_instr_as_intrinsic(load_result->parent_instr); + + /* If it's const, set the alignment to our known constant offset. If + * not, set it to a pessimistic value based on the multiplier (or the + * scalar size, for qword loads). + * + * We could potentially set up stricter alignments for indirects by + * knowing what features are enabled in the APIs (see comment in + * nir_lower_ubo_vec4.c) + */ + if (nir_src_is_const(instr->src[0])) { + nir_intrinsic_set_align(load, NIR_ALIGN_MUL_MAX, + (nir_src_as_uint(instr->src[0]) + + nir_intrinsic_base(instr) * multiplier) % + NIR_ALIGN_MUL_MAX); + } else { + nir_intrinsic_set_align(load, MAX2(multiplier, + instr->dest.ssa.bit_size / 8), 0); + } - nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier); - nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier); + nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier); + nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier); + } + nir_ssa_def_rewrite_uses(&instr->dest.ssa, load_result); nir_instr_remove(&instr->instr); return true; @@ -104,7 +109,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier) } bool -nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier) +nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4) { bool progress = false; @@ -117,7 +122,7 @@ nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier) if (instr->type == nir_instr_type_intrinsic) progress |= lower_instr(nir_instr_as_intrinsic(instr), &builder, - multiplier); + dword_packed, load_vec4); } } |