diff options
Diffstat (limited to 'src/broadcom/compiler/v3d_nir_lower_image_load_store.c')
-rw-r--r-- | src/broadcom/compiler/v3d_nir_lower_image_load_store.c | 352 |
1 files changed, 291 insertions, 61 deletions
diff --git a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c index 2706432d5ef..9a651bfc6a7 100644 --- a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c +++ b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c @@ -40,9 +40,20 @@ * calculations and load/store using the TMU general memory access path. */ +static const unsigned bits_8[4] = {8, 8, 8, 8}; +static const unsigned bits_16[4] = {16, 16, 16, 16}; +static const unsigned bits_1010102[4] = {10, 10, 10, 2}; + bool v3d_gl_format_is_return_32(enum pipe_format format) { + /* We can get a NONE format in Vulkan because we support the + * shaderStorageImageReadWithoutFormat feature. We consider these to + * always use 32-bit precision. + */ + if (format == PIPE_FORMAT_NONE) + return true; + const struct util_format_description *desc = util_format_description(format); const struct util_format_channel_description *chan = &desc->channel[0]; @@ -52,15 +63,17 @@ v3d_gl_format_is_return_32(enum pipe_format format) /* Packs a 32-bit vector of colors in the range [0, (1 << bits[i]) - 1] to a * 32-bit SSA value, with as many channels as necessary to store all the bits + * + * This is the generic helper, using all common nir operations. */ -static nir_ssa_def * -pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits, +static nir_def * +pack_bits(nir_builder *b, nir_def *color, const unsigned *bits, int num_components, bool mask) { - nir_ssa_def *results[4]; + nir_def *results[4]; int offset = 0; for (int i = 0; i < num_components; i++) { - nir_ssa_def *chan = nir_channel(b, color, i); + nir_def *chan = nir_channel(b, color, i); /* Channels being stored shouldn't cross a 32-bit boundary. */ assert((offset & ~31) == ((offset + bits[i] - 1) & ~31)); @@ -84,10 +97,187 @@ pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits, return nir_vec(b, results, DIV_ROUND_UP(offset, 32)); } -static void -v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr) +/* Utility wrapper as half_2x16_split is mapped to vfpack, and sometimes it is + * just easier to read vfpack on the code, specially while using the PRM as + * reference + */ +static inline nir_def * +nir_vfpack(nir_builder *b, nir_def *p1, nir_def *p2) +{ + return nir_pack_half_2x16_split(b, p1, p2); +} + +static inline nir_def * +pack_11f11f10f(nir_builder *b, nir_def *color) +{ + nir_def *p1 = nir_vfpack(b, nir_channel(b, color, 0), + nir_channel(b, color, 1)); + nir_def *undef = nir_undef(b, 1, color->bit_size); + nir_def *p2 = nir_vfpack(b, nir_channel(b, color, 2), undef); + + return nir_pack_32_to_r11g11b10_v3d(b, p1, p2); +} + +static inline nir_def * +pack_r10g10b10a2_uint(nir_builder *b, nir_def *color) +{ + nir_def *p1 = nir_pack_2x32_to_2x16_v3d(b, nir_channel(b, color, 0), + nir_channel(b, color, 1)); + nir_def *p2 = nir_pack_2x32_to_2x16_v3d(b, nir_channel(b, color, 2), + nir_channel(b, color, 3)); + + return nir_pack_uint_32_to_r10g10b10a2_v3d(b, p1, p2); +} + +static inline nir_def * +pack_r10g10b10a2_unorm(nir_builder *b, nir_def *color) +{ + nir_def *p1 = nir_vfpack(b, nir_channel(b, color, 0), + nir_channel(b, color, 1)); + p1 = nir_pack_2x16_to_unorm_2x10_v3d(b, p1); + + nir_def *p2 = nir_vfpack(b, nir_channel(b, color, 2), + nir_channel(b, color, 3)); + p2 = nir_pack_2x16_to_unorm_10_2_v3d(b, p2); + + return nir_pack_uint_32_to_r10g10b10a2_v3d(b, p1, p2); +} + +enum hw_conversion { + NONE, + TO_SNORM, + TO_UNORM +}; + +static inline nir_def * +pack_8bit(nir_builder *b, nir_def *color, + unsigned num_components, + enum hw_conversion conversion) +{ + /* Note that usually you should not use this method (that relies on + * custom packing) for 1 component if we are not doing any + * conversion. But we support also that case, and let the caller + * decide which method to use. + */ + nir_def *p1; + nir_def *p2; + + if (conversion == NONE) { + p1 = nir_pack_2x32_to_2x16_v3d(b, nir_channel(b, color, 0), + nir_channel(b, color, num_components == 1 ? 0 : 1)); + } else { + p1 = nir_vfpack(b, nir_channel(b, color, 0), + nir_channel(b, color, num_components == 1 ? 0 : 1)); + p1 = (conversion == TO_UNORM) ? + nir_pack_2x16_to_unorm_2x8_v3d(b, p1) : + nir_pack_2x16_to_snorm_2x8_v3d(b, p1); + } + if (num_components == 4) { + if (conversion == NONE) { + p2 = nir_pack_2x32_to_2x16_v3d(b, nir_channel(b, color, 2), + nir_channel(b, color, 3)); + } else { + p2 = nir_vfpack(b, nir_channel(b, color, 2), + nir_channel(b, color, 3)); + p2 = (conversion == TO_UNORM) ? + nir_pack_2x16_to_unorm_2x8_v3d(b, p2) : + nir_pack_2x16_to_snorm_2x8_v3d(b, p2); + } + } else { + /* Using an undef here would be more correct. But for this + * case we are getting worse shader-db values with some CTS + * tests, so we just reuse the first packing. + */ + p2 = p1; + } + + return nir_pack_4x16_to_4x8_v3d(b, p1, p2); +} + +static inline nir_def * +pack_16bit(nir_builder *b, nir_def *color, + unsigned num_components, + enum hw_conversion conversion) +{ + nir_def *results[2] = {0}; + nir_def *channels[4] = {0}; + + for (unsigned i = 0; i < num_components; i++) { + channels[i] = nir_channel(b, color, i); + switch (conversion) { + case TO_SNORM: + channels[i] = nir_f2snorm_16_v3d(b, channels[i]); + break; + case TO_UNORM: + channels[i] = nir_f2unorm_16_v3d(b, channels[i]); + break; + default: + /* Note that usually you should not use this method + * (that relies on custom packing) if we are not doing + * any conversion. But we support also that case, and + * let the caller decide which method to use. + */ + break; + } + } + + switch (num_components) { + case 1: + results[0] = channels[0]; + break; + case 4: + results[1] = nir_pack_2x32_to_2x16_v3d(b, channels[2], channels[3]); + FALLTHROUGH; + case 2: + results[0] = nir_pack_2x32_to_2x16_v3d(b, channels[0], channels[1]); + break; + default: + unreachable("Invalid number of components"); + } + + return nir_vec(b, results, DIV_ROUND_UP(num_components, 2)); +} + +static inline nir_def * +pack_xbit(nir_builder *b, nir_def *color, + unsigned num_components, + const struct util_format_channel_description *r_chan) +{ + bool pack_mask = (r_chan->type == UTIL_FORMAT_TYPE_SIGNED); + enum hw_conversion conversion = NONE; + if (r_chan->normalized) { + conversion = + (r_chan->type == UTIL_FORMAT_TYPE_UNSIGNED) ? TO_UNORM : TO_SNORM; + } + + switch (r_chan->size) { + case 8: + if (conversion == NONE && num_components < 2) + return pack_bits(b, color, bits_8, num_components, pack_mask); + else + return pack_8bit(b, color, num_components, conversion); + break; + case 16: + /* pack_mask implies that the generic packing method would + * need to include extra operations to handle negative values, + * so in that case, even without a conversion, it is better to + * use the packing using custom hw operations. + */ + if (conversion == NONE && !pack_mask) + return pack_bits(b, color, bits_16, num_components, pack_mask); + else + return pack_16bit(b, color, num_components, conversion); + break; + default: + unreachable("unrecognized bits"); + } +} + +static bool +v3d_nir_lower_image_store_v42(nir_builder *b, nir_intrinsic_instr *instr) { enum pipe_format format = nir_intrinsic_format(instr); + assert(format != PIPE_FORMAT_NONE); const struct util_format_description *desc = util_format_description(format); const struct util_format_channel_description *r_chan = &desc->channel[0]; @@ -95,10 +285,10 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr) b->cursor = nir_before_instr(&instr->instr); - nir_ssa_def *color = nir_channels(b, - nir_ssa_for_src(b, instr->src[3], 4), - (1 << num_components) - 1); - nir_ssa_def *formatted = NULL; + nir_def *color = nir_trim_vector(b, + instr->src[3].ssa, + num_components); + nir_def *formatted = NULL; if (format == PIPE_FORMAT_R11G11B10_FLOAT) { formatted = nir_format_pack_11f11f10f(b, color); @@ -110,9 +300,6 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr) */ formatted = color; } else { - static const unsigned bits_8[4] = {8, 8, 8, 8}; - static const unsigned bits_16[4] = {16, 16, 16, 16}; - static const unsigned bits_1010102[4] = {10, 10, 10, 2}; const unsigned *bits; switch (r_chan->size) { @@ -132,11 +319,13 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr) bool pack_mask = false; if (r_chan->pure_integer && r_chan->type == UTIL_FORMAT_TYPE_SIGNED) { - formatted = nir_format_clamp_sint(b, color, bits); + /* We don't need to do any conversion or clamping in this case */ + formatted = color; pack_mask = true; } else if (r_chan->pure_integer && r_chan->type == UTIL_FORMAT_TYPE_UNSIGNED) { - formatted = nir_format_clamp_uint(b, color, bits); + /* We don't need to do any conversion or clamping in this case */ + formatted = color; } else if (r_chan->normalized && r_chan->type == UTIL_FORMAT_TYPE_SIGNED) { formatted = nir_format_float_to_snorm(b, color, bits); @@ -154,75 +343,116 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr) pack_mask); } - nir_instr_rewrite_src(&instr->instr, &instr->src[3], - nir_src_for_ssa(formatted)); + nir_src_rewrite(&instr->src[3], formatted); instr->num_components = formatted->num_components; + + return true; } -static void + +static bool +v3d_nir_lower_image_store_v71(nir_builder *b, nir_intrinsic_instr *instr) +{ + enum pipe_format format = nir_intrinsic_format(instr); + assert(format != PIPE_FORMAT_NONE); + const struct util_format_description *desc = + util_format_description(format); + const struct util_format_channel_description *r_chan = &desc->channel[0]; + unsigned num_components = util_format_get_nr_components(format); + b->cursor = nir_before_instr(&instr->instr); + + nir_def *color = + nir_trim_vector(b, instr->src[3].ssa, num_components); + nir_def *formatted = NULL; + if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { + formatted = nir_format_pack_r9g9b9e5(b, color); + } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { + formatted = pack_11f11f10f(b, color); + } else if (format == PIPE_FORMAT_R10G10B10A2_UINT) { + formatted = pack_r10g10b10a2_uint(b, color); + } else if (format == PIPE_FORMAT_R10G10B10A2_UNORM) { + formatted = pack_r10g10b10a2_unorm(b, color); + } else if (r_chan->size == 32) { + /* For 32-bit formats, we just have to move the vector + * across (possibly reducing the number of channels). + */ + formatted = color; + } else if (r_chan->type == UTIL_FORMAT_TYPE_FLOAT) { + assert(r_chan->size == 16); + formatted = nir_format_float_to_half(b, color); + formatted = pack_bits(b, formatted, bits_16, num_components, + false); + } else { + assert(r_chan->size == 8 || r_chan->size == 16); + formatted = pack_xbit(b, color, num_components, r_chan); + } + + nir_src_rewrite(&instr->src[3], formatted); + instr->num_components = formatted->num_components; + + return true; +} + +static bool v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr) { static const unsigned bits16[] = {16, 16, 16, 16}; enum pipe_format format = nir_intrinsic_format(instr); if (v3d_gl_format_is_return_32(format)) - return; + return false; b->cursor = nir_after_instr(&instr->instr); - assert(instr->dest.is_ssa); - nir_ssa_def *result = &instr->dest.ssa; + nir_def *result = &instr->def; if (util_format_is_pure_uint(format)) { result = nir_format_unpack_uint(b, result, bits16, 4); } else if (util_format_is_pure_sint(format)) { result = nir_format_unpack_sint(b, result, bits16, 4); } else { - nir_ssa_def *rg = nir_channel(b, result, 0); - nir_ssa_def *ba = nir_channel(b, result, 1); - result = nir_vec4(b, - nir_unpack_half_2x16_split_x(b, rg), - nir_unpack_half_2x16_split_y(b, rg), - nir_unpack_half_2x16_split_x(b, ba), - nir_unpack_half_2x16_split_y(b, ba)); + nir_def *rg = nir_channel(b, result, 0); + nir_def *ba = nir_channel(b, result, 1); + result = nir_vec4(b, + nir_unpack_half_2x16_split_x(b, rg), + nir_unpack_half_2x16_split_y(b, rg), + nir_unpack_half_2x16_split_x(b, ba), + nir_unpack_half_2x16_split_y(b, ba)); } - nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, result, + nir_def_rewrite_uses_after(&instr->def, result, result->parent_instr); + + return true; } -void -v3d_nir_lower_image_load_store(nir_shader *s) +static bool +v3d_nir_lower_image_load_store_cb(nir_builder *b, + nir_intrinsic_instr *intr, + void *_state) { - nir_foreach_function(function, s) { - if (!function->impl) - continue; - - nir_builder b; - nir_builder_init(&b, function->impl); - - nir_foreach_block(block, function->impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intr = - nir_instr_as_intrinsic(instr); - - switch (intr->intrinsic) { - case nir_intrinsic_image_load: - v3d_nir_lower_image_load(&b, intr); - break; - case nir_intrinsic_image_store: - v3d_nir_lower_image_store(&b, intr); - break; - default: - break; - } - } - } + struct v3d_compile *c = (struct v3d_compile *) _state; - nir_metadata_preserve(function->impl, - nir_metadata_block_index | - nir_metadata_dominance); + switch (intr->intrinsic) { + case nir_intrinsic_image_load: + return v3d_nir_lower_image_load(b, intr); + case nir_intrinsic_image_store: + if (c->devinfo->ver >= 71) + return v3d_nir_lower_image_store_v71(b, intr); + else + return v3d_nir_lower_image_store_v42(b, intr); + break; + default: + return false; } + + return false; +} + +bool +v3d_nir_lower_image_load_store(nir_shader *s, struct v3d_compile *c) +{ + return nir_shader_intrinsics_pass(s, + v3d_nir_lower_image_load_store_cb, + nir_metadata_block_index | + nir_metadata_dominance, c); } |