diff options
author | Boris Brezillon <boris.brezillon@collabora.com> | 2021-08-06 14:24:31 +0200 |
---|---|---|
committer | Boris Brezillon <boris.brezillon@collabora.com> | 2021-09-20 15:18:56 +0200 |
commit | 0d57a76458a8a3bf690b30471165f47ab7a84ae9 (patch) | |
tree | 01d99afbc0b81de40c9813cf75fe5a37b9e26d47 | |
parent | a865a4a789ac21ebe7fbb6cc98a7e2a519e7e8c7 (diff) |
panfrost: Prepare texture helpers to per-gen XML
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12935>
-rw-r--r-- | src/gallium/drivers/panfrost/pan_cmdstream.c | 6 | ||||
-rw-r--r-- | src/panfrost/lib/meson.build | 1 | ||||
-rw-r--r-- | src/panfrost/lib/pan_blitter.c | 8 | ||||
-rw-r--r-- | src/panfrost/lib/pan_texture.c | 808 | ||||
-rw-r--r-- | src/panfrost/lib/pan_texture.h | 15 | ||||
-rw-r--r-- | src/panfrost/vulkan/panvk_vX_image.c | 4 |
6 files changed, 416 insertions, 426 deletions
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index f04f35e684e..ff5f6b190e9 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1315,8 +1315,8 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, }; unsigned size = - (PAN_ARCH <= 5 ? pan_size(MIDGARD_TEXTURE) : 0) + - panfrost_estimate_texture_payload_size(device, &iview); + (PAN_ARCH <= 5 ? pan_size(TEXTURE) : 0) + + GENX(panfrost_estimate_texture_payload_size)(&iview); struct panfrost_ptr payload = pan_pool_alloc_aligned(&ctx->descs.base, size, 64); so->state = panfrost_pool_take_ref(&ctx->descs, payload.gpu); @@ -1328,7 +1328,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, payload.gpu += pan_size(MIDGARD_TEXTURE); } - panfrost_new_texture(device, &iview, tex, &payload); + GENX(panfrost_new_texture)(device, &iview, tex, &payload); } static void diff --git a/src/panfrost/lib/meson.build b/src/panfrost/lib/meson.build index 522149466b3..8658637f607 100644 --- a/src/panfrost/lib/meson.build +++ b/src/panfrost/lib/meson.build @@ -60,6 +60,7 @@ foreach ver : ['4', '5', '6', '7'] 'pan_cs.c', 'pan_indirect_dispatch.c', 'pan_indirect_draw.c', + 'pan_texture.c', ], include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_panfrost_hw], c_args : ['-DPAN_ARCH=' + ver], diff --git a/src/panfrost/lib/pan_blitter.c b/src/panfrost/lib/pan_blitter.c index abb375b6528..69cc967dd49 100644 --- a/src/panfrost/lib/pan_blitter.c +++ b/src/panfrost/lib/pan_blitter.c @@ -883,12 +883,12 @@ pan_blitter_emit_textures(struct pan_pool *pool, for (unsigned i = 0; i < tex_count; i++) { void *texture = textures.cpu + (pan_size(TEXTURE) * i); size_t payload_size = - panfrost_estimate_texture_payload_size(pool->dev, views[i]); + GENX(panfrost_estimate_texture_payload_size)(views[i]); struct panfrost_ptr surfaces = pan_pool_alloc_aligned(pool, payload_size, pan_alignment(SURFACE_WITH_STRIDE)); - panfrost_new_texture(pool->dev, views[i], texture, &surfaces); + GENX(panfrost_new_texture)(pool->dev, views[i], texture, &surfaces); } return textures.gpu; @@ -897,7 +897,7 @@ pan_blitter_emit_textures(struct pan_pool *pool, for (unsigned i = 0; i < tex_count; i++) { size_t sz = pan_size(TEXTURE) + - panfrost_estimate_texture_payload_size(pool->dev, views[i]); + GENX(panfrost_estimate_texture_payload_size)(views[i]); struct panfrost_ptr texture = pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE)); struct panfrost_ptr surfaces = { @@ -905,7 +905,7 @@ pan_blitter_emit_textures(struct pan_pool *pool, .gpu = texture.gpu + pan_size(TEXTURE), }; - panfrost_new_texture(pool->dev, views[i], texture.cpu, &surfaces); + GENX(panfrost_new_texture)(pool->dev, views[i], texture.cpu, &surfaces); textures[i] = texture.gpu; } diff --git a/src/panfrost/lib/pan_texture.c b/src/panfrost/lib/pan_texture.c index 49da5340d0a..226f98a4be6 100644 --- a/src/panfrost/lib/pan_texture.c +++ b/src/panfrost/lib/pan_texture.c @@ -30,6 +30,8 @@ #include "pan_texture.h" #include "panfrost-quirks.h" +#ifndef PAN_ARCH + /* Generates a texture descriptor. Ideally, descriptors are immutable after the * texture is created, so we can keep these hanging around in GPU memory in a * dedicated BO and not have to worry. In practice there are some minor gotchas @@ -57,38 +59,295 @@ uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = { DRM_FORMAT_MOD_LINEAR }; -/* Check if we need to set a custom stride by computing the "expected" - * stride and comparing it to what the user actually wants. Only applies - * to linear textures, since tiled/compressed textures have strict - * alignment requirements for their strides as it is */ +/* If not explicitly, line stride is calculated for block-based formats as + * (ceil(width / block_width) * block_size). As a special case, this is left + * zero if there is only a single block vertically. So, we have a helper to + * extract the dimensions of a block-based format and use that to calculate the + * line stride as such. + */ -static bool -panfrost_needs_explicit_stride(const struct panfrost_device *dev, - const struct pan_image_view *iview) +unsigned +panfrost_block_dim(uint64_t modifier, bool width, unsigned plane) { - /* Stride is explicit on Bifrost */ - if (pan_is_bifrost(dev)) - return true; + if (!drm_is_afbc(modifier)) { + assert(modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED); + return 16; + } - if (iview->image->layout.modifier != DRM_FORMAT_MOD_LINEAR) + switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) { + case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16: + return 16; + case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8: + return width ? 32 : 8; + case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4: + return width ? 64 : 4; + case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4: + return plane ? (width ? 64 : 4) : (width ? 32 : 8); + default: + unreachable("Invalid AFBC block size"); + } +} + +/* Computes sizes for checksumming, which is 8 bytes per 16x16 tile. + * Checksumming is believed to be a CRC variant (CRC64 based on the size?). + * This feature is also known as "transaction elimination". */ + +#define CHECKSUM_TILE_WIDTH 16 +#define CHECKSUM_TILE_HEIGHT 16 +#define CHECKSUM_BYTES_PER_TILE 8 + +unsigned +panfrost_compute_checksum_size( + struct pan_image_slice_layout *slice, + unsigned width, + unsigned height) +{ + unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH); + unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT); + + slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE; + + return slice->crc.stride * tile_count_y; +} + +unsigned +panfrost_get_layer_stride(const struct pan_image_layout *layout, + unsigned level) +{ + if (layout->dim != MALI_TEXTURE_DIMENSION_3D) + return layout->array_stride; + else if (drm_is_afbc(layout->modifier)) + return layout->slices[level].afbc.surface_stride; + else + return layout->slices[level].surface_stride; +} + +/* Computes the offset into a texture at a particular level/face. Add to + * the base address of a texture to get the address to that level/face */ + +unsigned +panfrost_texture_offset(const struct pan_image_layout *layout, + unsigned level, unsigned array_idx, + unsigned surface_idx) +{ + return layout->slices[level].offset + + (array_idx * layout->array_stride) + + (surface_idx * layout->slices[level].surface_stride); +} + +bool +pan_image_layout_init(const struct panfrost_device *dev, + struct pan_image_layout *layout, + uint64_t modifier, + enum pipe_format format, + enum mali_texture_dimension dim, + unsigned width, unsigned height, unsigned depth, + unsigned array_size, unsigned nr_samples, + unsigned nr_slices, enum pan_image_crc_mode crc_mode, + const struct pan_image_explicit_layout *explicit_layout) +{ + /* Explicit stride only work with non-mipmap, non-array; single-sample + * 2D image, and in-band CRC can't be used. + */ + if (explicit_layout && + (depth > 1 || nr_samples > 1 || array_size > 1 || + dim != MALI_TEXTURE_DIMENSION_2D || nr_slices > 1 || + crc_mode == PAN_IMAGE_CRC_INBAND)) return false; - unsigned bytes_per_block = util_format_get_blocksize(iview->format); - unsigned block_w = util_format_get_blockwidth(iview->format); + /* Mandate 64 byte alignement */ + if (explicit_layout && (explicit_layout->offset & 63)) + return false; - for (unsigned l = iview->first_level; l <= iview->last_level; ++l) { - unsigned actual = iview->image->layout.slices[l].line_stride; - unsigned expected = - DIV_ROUND_UP(u_minify(iview->image->layout.width, l), block_w) * - bytes_per_block; + layout->crc_mode = crc_mode; + layout->modifier = modifier; + layout->format = format; + layout->dim = dim; + layout->width = width; + layout->height = height; + layout->depth = depth; + layout->array_size = array_size; + layout->nr_samples = nr_samples; + layout->nr_slices = nr_slices; - if (actual != expected) - return true; + unsigned bytes_per_pixel = util_format_get_blocksize(format); + + /* MSAA is implemented as a 3D texture with z corresponding to the + * sample #, horrifyingly enough */ + + assert(depth == 1 || nr_samples == 1); + + bool afbc = drm_is_afbc(layout->modifier); + bool tiled = layout->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; + bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR; + bool should_align = tiled || afbc; + bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D; + + unsigned oob_crc_offset = 0; + unsigned offset = explicit_layout ? explicit_layout->offset : 0; + unsigned tile_h = 1, tile_w = 1, tile_shift = 0; + + if (tiled || afbc) { + tile_w = panfrost_block_dim(layout->modifier, true, 0); + tile_h = panfrost_block_dim(layout->modifier, false, 0); + if (util_format_is_compressed(format)) + tile_shift = 2; } - return false; + for (unsigned l = 0; l < nr_slices; ++l) { + struct pan_image_slice_layout *slice = &layout->slices[l]; + + unsigned effective_width = width; + unsigned effective_height = height; + unsigned effective_depth = depth; + + if (should_align) { + effective_width = ALIGN_POT(effective_width, tile_w) >> tile_shift; + effective_height = ALIGN_POT(effective_height, tile_h); + + /* We don't need to align depth */ + } + + /* Align levels to cache-line as a performance improvement for + * linear/tiled and as a requirement for AFBC */ + + offset = ALIGN_POT(offset, 64); + + slice->offset = offset; + + /* Compute the would-be stride */ + unsigned stride = bytes_per_pixel * effective_width; + + if (explicit_layout) { + /* Make sure the explicit stride is valid */ + if (explicit_layout->line_stride < stride) + return false; + + stride = explicit_layout->line_stride; + } else if (linear) { + /* Keep lines alignment on 64 byte for performance */ + stride = ALIGN_POT(stride, 64); + } + + slice->line_stride = stride; + slice->row_stride = stride * (tile_h >> tile_shift); + + unsigned slice_one_size = slice->line_stride * effective_height; + + /* Compute AFBC sizes if necessary */ + if (afbc) { + slice->afbc.header_size = + panfrost_afbc_header_size(width, height); + + /* Stride between two rows of AFBC headers */ + slice->afbc.row_stride = + (effective_width / tile_w) * + AFBC_HEADER_BYTES_PER_TILE; + + /* AFBC body size */ + slice->afbc.body_size = slice_one_size; + + /* 3D AFBC resources have all headers placed at the + * beginning instead of having them split per depth + * level + */ + if (is_3d) { + slice->afbc.surface_stride = + slice->afbc.header_size; + slice->afbc.header_size *= effective_depth; + slice->afbc.body_size *= effective_depth; + offset += slice->afbc.header_size; + } else { + slice_one_size += slice->afbc.header_size; + slice->afbc.surface_stride = slice_one_size; + } + } + + unsigned slice_full_size = + slice_one_size * effective_depth * nr_samples; + + slice->surface_stride = slice_one_size; + + /* Compute AFBC sizes if necessary */ + + offset += slice_full_size; + slice->size = slice_full_size; + + /* Add a checksum region if necessary */ + if (crc_mode != PAN_IMAGE_CRC_NONE) { + slice->crc.size = + panfrost_compute_checksum_size(slice, width, height); + + if (crc_mode == PAN_IMAGE_CRC_INBAND) { + slice->crc.offset = offset; + offset += slice->crc.size; + slice->size += slice->crc.size; + } else { + slice->crc.offset = oob_crc_offset; + oob_crc_offset += slice->crc.size; + } + } + + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + /* Arrays and cubemaps have the entire miptree duplicated */ + layout->array_stride = ALIGN_POT(offset, 64); + if (explicit_layout) + layout->data_size = offset; + else + layout->data_size = ALIGN_POT(layout->array_stride * array_size, 4096); + layout->crc_size = oob_crc_offset; + + return true; +} + +void +pan_iview_get_surface(const struct pan_image_view *iview, + unsigned level, unsigned layer, unsigned sample, + struct pan_surface *surf) +{ + level += iview->first_level; + assert(level < iview->image->layout.nr_slices); + + layer += iview->first_layer; + + bool is_3d = iview->image->layout.dim == MALI_TEXTURE_DIMENSION_3D; + const struct pan_image_slice_layout *slice = &iview->image->layout.slices[level]; + mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset; + + if (drm_is_afbc(iview->image->layout.modifier)) { + assert(!sample); + + if (is_3d) { + ASSERTED unsigned depth = u_minify(iview->image->layout.depth, level); + assert(layer < depth); + surf->afbc.header = base + slice->offset + + (layer * slice->afbc.surface_stride); + surf->afbc.body = base + slice->offset + + slice->afbc.header_size + + (slice->surface_stride * layer); + } else { + assert(layer < iview->image->layout.array_size); + surf->afbc.header = base + + panfrost_texture_offset(&iview->image->layout, + level, layer, 0); + surf->afbc.body = surf->afbc.header + slice->afbc.header_size; + } + } else { + unsigned array_idx = is_3d ? 0 : layer; + unsigned surface_idx = is_3d ? layer : sample; + + surf->data = base + + panfrost_texture_offset(&iview->image->layout, level, + array_idx, surface_idx); + } } +#else /* ifndef PAN_ARCH */ + /* A Scalable Texture Compression (ASTC) corresponds to just a few texture type * in the hardware, but in fact can be parametrized to have various widths and * heights for the so-called "stretch factor". It turns out these parameters @@ -111,8 +370,7 @@ panfrost_astc_stretch(unsigned dim) * For ASTC, this is a "stretch factor" encoding the block size. */ static unsigned -panfrost_compression_tag(const struct panfrost_device *dev, - const struct util_format_description *desc, +panfrost_compression_tag(const struct util_format_description *desc, enum mali_texture_dimension dim, uint64_t modifier) { @@ -120,9 +378,7 @@ panfrost_compression_tag(const struct panfrost_device *dev, unsigned flags = (modifier & AFBC_FORMAT_MOD_YTR) ? MALI_AFBC_SURFACE_FLAG_YTR : 0; - if (!pan_is_bifrost(dev)) - return flags; - +#if PAN_ARCH >= 6 /* Prefetch enable */ flags |= MALI_AFBC_SURFACE_FLAG_PREFETCH; @@ -135,8 +391,12 @@ panfrost_compression_tag(const struct panfrost_device *dev, * which doesn't work for 3D textures because the surface * stride does not cover the body. Only supported on v7+. */ - if (dev->arch >= 7 && dim != MALI_TEXTURE_DIMENSION_3D) +#endif + +#if PAN_ARCH >= 7 + if (dim != MALI_TEXTURE_DIMENSION_3D) flags |= MALI_AFBC_SURFACE_FLAG_CHECK_PAYLOAD_RANGE; +#endif return flags; } else if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { @@ -147,7 +407,6 @@ panfrost_compression_tag(const struct panfrost_device *dev, } } - /* Cubemaps have 6 faces as "layers" in between each actual layer. We * need to fix this up. TODO: logic wrong in the asserted out cases ... * can they happen, perhaps from cubemap arrays? */ @@ -199,11 +458,10 @@ panfrost_texture_num_elements( * as an allocation amount */ unsigned -panfrost_estimate_texture_payload_size(const struct panfrost_device *dev, - const struct pan_image_view *iview) +GENX(panfrost_estimate_texture_payload_size)(const struct pan_image_view *iview) { /* Assume worst case */ - unsigned manual_stride = pan_is_bifrost(dev) || + unsigned manual_stride = PAN_ARCH >= 6 || (iview->image->layout.modifier == DRM_FORMAT_MOD_LINEAR); unsigned elements = @@ -216,74 +474,6 @@ panfrost_estimate_texture_payload_size(const struct panfrost_device *dev, return sizeof(mali_ptr) * elements; } -/* If not explicitly, line stride is calculated for block-based formats as - * (ceil(width / block_width) * block_size). As a special case, this is left - * zero if there is only a single block vertically. So, we have a helper to - * extract the dimensions of a block-based format and use that to calculate the - * line stride as such. - */ - -unsigned -panfrost_block_dim(uint64_t modifier, bool width, unsigned plane) -{ - if (!drm_is_afbc(modifier)) { - assert(modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED); - return 16; - } - - switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) { - case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16: - return 16; - case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8: - return width ? 32 : 8; - case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4: - return width ? 64 : 4; - case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4: - return plane ? (width ? 64 : 4) : (width ? 32 : 8); - default: - unreachable("Invalid AFBC block size"); - } -} - -static void -panfrost_get_surface_strides(const struct panfrost_device *dev, - const struct pan_image_layout *layout, - unsigned l, - int32_t *row_stride, int32_t *surf_stride) -{ - const struct pan_image_slice_layout *slice = &layout->slices[l]; - - if (drm_is_afbc(layout->modifier)) { - /* Pre v7 don't have a row stride field. This field is - * repurposed as a Y offset which we don't use */ - *row_stride = dev->arch < 7 ? 0 : slice->afbc.row_stride; - *surf_stride = slice->afbc.surface_stride; - } else { - *row_stride = slice->row_stride; - *surf_stride = slice->surface_stride; - } -} - -static mali_ptr -panfrost_get_surface_pointer(const struct pan_image_layout *layout, - enum mali_texture_dimension dim, - mali_ptr base, - unsigned l, unsigned w, unsigned f, unsigned s) -{ - unsigned face_mult = dim == MALI_TEXTURE_DIMENSION_CUBE ? 6 : 1; - unsigned offset; - - if (layout->dim == MALI_TEXTURE_DIMENSION_3D) { - assert(!f && !s); - offset = layout->slices[l].offset + - (w * panfrost_get_layer_stride(layout, l)); - } else { - offset = panfrost_texture_offset(layout, l, (w * face_mult) + f, s); - } - - return base + offset; -} - struct panfrost_surface_iter { unsigned layer, last_layer; unsigned level, first_level, last_level; @@ -315,8 +505,7 @@ panfrost_surface_iter_end(const struct panfrost_surface_iter *iter) } static void -panfrost_surface_iter_next(const struct panfrost_device *dev, - struct panfrost_surface_iter *iter) +panfrost_surface_iter_next(struct panfrost_surface_iter *iter) { #define INC_TEST(field) \ do { \ @@ -326,13 +515,13 @@ panfrost_surface_iter_next(const struct panfrost_device *dev, } while (0) /* Ordering is different on v7: inner loop is iterating on levels */ - if (dev->arch >= 7) + if (PAN_ARCH >= 7) INC_TEST(level); INC_TEST(sample); INC_TEST(face); - if (dev->arch < 7) + if (PAN_ARCH < 7) INC_TEST(level); iter->layer++; @@ -341,8 +530,45 @@ panfrost_surface_iter_next(const struct panfrost_device *dev, } static void -panfrost_emit_texture_payload(const struct panfrost_device *dev, - const struct pan_image_view *iview, +panfrost_get_surface_strides(const struct pan_image_layout *layout, + unsigned l, + int32_t *row_stride, int32_t *surf_stride) +{ + const struct pan_image_slice_layout *slice = &layout->slices[l]; + + if (drm_is_afbc(layout->modifier)) { + /* Pre v7 don't have a row stride field. This field is + * repurposed as a Y offset which we don't use */ + *row_stride = PAN_ARCH < 7 ? 0 : slice->afbc.row_stride; + *surf_stride = slice->afbc.surface_stride; + } else { + *row_stride = slice->row_stride; + *surf_stride = slice->surface_stride; + } +} + +static mali_ptr +panfrost_get_surface_pointer(const struct pan_image_layout *layout, + enum mali_texture_dimension dim, + mali_ptr base, + unsigned l, unsigned w, unsigned f, unsigned s) +{ + unsigned face_mult = dim == MALI_TEXTURE_DIMENSION_CUBE ? 6 : 1; + unsigned offset; + + if (layout->dim == MALI_TEXTURE_DIMENSION_3D) { + assert(!f && !s); + offset = layout->slices[l].offset + + (w * panfrost_get_layer_stride(layout, l)); + } else { + offset = panfrost_texture_offset(layout, l, (w * face_mult) + f, s); + } + + return base + offset; +} + +static void +panfrost_emit_texture_payload(const struct pan_image_view *iview, enum pipe_format format, bool manual_stride, void *payload) @@ -361,7 +587,7 @@ panfrost_emit_texture_payload(const struct panfrost_device *dev, /* panfrost_compression_tag() wants the dimension of the resource, not the * one of the image view (those might differ). */ - base |= panfrost_compression_tag(dev, desc, layout->dim, layout->modifier); + base |= panfrost_compression_tag(desc, layout->dim, layout->modifier); /* Inject the addresses in, interleaving array indices, mip levels, * cube faces, and strides in that order */ @@ -381,7 +607,7 @@ panfrost_emit_texture_payload(const struct panfrost_device *dev, iview->first_level, iview->last_level, first_face, last_face, nr_samples); !panfrost_surface_iter_end(&iter); - panfrost_surface_iter_next(dev, &iter)) { + panfrost_surface_iter_next(&iter)) { mali_ptr pointer = panfrost_get_surface_pointer(layout, iview->dim, base, iter.level, iter.layer, @@ -395,7 +621,7 @@ panfrost_emit_texture_payload(const struct panfrost_device *dev, } else { pan_pack(payload, SURFACE_WITH_STRIDE, cfg) { cfg.pointer = pointer; - panfrost_get_surface_strides(dev, layout, iter.level, + panfrost_get_surface_strides(layout, iter.level, &cfg.row_stride, &cfg.surface_stride); } @@ -404,6 +630,37 @@ panfrost_emit_texture_payload(const struct panfrost_device *dev, } } +/* Check if we need to set a custom stride by computing the "expected" + * stride and comparing it to what the user actually wants. Only applies + * to linear textures, since tiled/compressed textures have strict + * alignment requirements for their strides as it is */ + +static bool +panfrost_needs_explicit_stride(const struct pan_image_view *iview) +{ + /* Stride is explicit on Bifrost */ + if (PAN_ARCH >= 6) + return true; + + if (iview->image->layout.modifier != DRM_FORMAT_MOD_LINEAR) + return false; + + unsigned bytes_per_block = util_format_get_blocksize(iview->format); + unsigned block_w = util_format_get_blockwidth(iview->format); + + for (unsigned l = iview->first_level; l <= iview->last_level; ++l) { + unsigned actual = iview->image->layout.slices[l].line_stride; + unsigned expected = + DIV_ROUND_UP(u_minify(iview->image->layout.width, l), block_w) * + bytes_per_block; + + if (actual != expected) + return true; + } + + return false; +} + /* Map modifiers to mali_texture_layout for packing in a texture descriptor */ static enum mali_texture_layout @@ -420,15 +677,15 @@ panfrost_modifier_to_layout(uint64_t modifier) } void -panfrost_new_texture(const struct panfrost_device *dev, - const struct pan_image_view *iview, - void *out, const struct panfrost_ptr *payload) +GENX(panfrost_new_texture)(const struct panfrost_device *dev, + const struct pan_image_view *iview, + void *out, const struct panfrost_ptr *payload) { const struct pan_image_layout *layout = &iview->image->layout; enum pipe_format format = iview->format; unsigned swizzle; - if (dev->arch == 7 && util_format_is_depth_or_stencil(format)) { + if (PAN_ARCH == 7 && util_format_is_depth_or_stencil(format)) { /* v7 doesn't have an _RRRR component order, combine the * user swizzle with a .XXXX swizzle to emulate that. */ @@ -447,9 +704,9 @@ panfrost_new_texture(const struct panfrost_device *dev, } bool manual_stride = - panfrost_needs_explicit_stride(dev, iview); + panfrost_needs_explicit_stride(iview); - panfrost_emit_texture_payload(dev, iview, format, + panfrost_emit_texture_payload(iview, format, manual_stride, payload->cpu); @@ -475,302 +732,31 @@ panfrost_new_texture(const struct panfrost_device *dev, width = u_minify(layout->width, iview->first_level); } - if (pan_is_bifrost(dev)) { - pan_pack(out, BIFROST_TEXTURE, cfg) { - cfg.dimension = iview->dim; - cfg.format = dev->formats[format].hw; - cfg.width = width; - cfg.height = u_minify(layout->height, iview->first_level); - if (iview->dim == MALI_TEXTURE_DIMENSION_3D) - cfg.depth = u_minify(layout->depth, iview->first_level); - else - cfg.sample_count = layout->nr_samples; - cfg.swizzle = swizzle; - cfg.texel_ordering = - panfrost_modifier_to_layout(layout->modifier); - cfg.levels = iview->last_level - iview->first_level + 1; - cfg.array_size = array_size; - cfg.surfaces = payload->gpu; - - /* We specify API-level LOD clamps in the sampler descriptor - * and use these clamps simply for bounds checking */ - cfg.minimum_lod = FIXED_16(0, false); - cfg.maximum_lod = FIXED_16(cfg.levels - 1, false); - } - } else { - pan_pack(out, MIDGARD_TEXTURE, cfg) { - cfg.width = width; - cfg.height = u_minify(layout->height, iview->first_level); - if (iview->dim == MALI_TEXTURE_DIMENSION_3D) - cfg.depth = u_minify(layout->depth, iview->first_level); - else - cfg.sample_count = layout->nr_samples; - cfg.array_size = array_size; - cfg.format = panfrost_pipe_format_v6[format].hw; - cfg.dimension = iview->dim; - cfg.texel_ordering = - panfrost_modifier_to_layout(layout->modifier); - cfg.manual_stride = manual_stride; - cfg.levels = iview->last_level - iview->first_level + 1; - cfg.swizzle = swizzle; - }; - } -} - -/* Computes sizes for checksumming, which is 8 bytes per 16x16 tile. - * Checksumming is believed to be a CRC variant (CRC64 based on the size?). - * This feature is also known as "transaction elimination". */ - -#define CHECKSUM_TILE_WIDTH 16 -#define CHECKSUM_TILE_HEIGHT 16 -#define CHECKSUM_BYTES_PER_TILE 8 - -unsigned -panfrost_compute_checksum_size( - struct pan_image_slice_layout *slice, - unsigned width, - unsigned height) -{ - unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH); - unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT); - - slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE; - - return slice->crc.stride * tile_count_y; -} - -unsigned -panfrost_get_layer_stride(const struct pan_image_layout *layout, - unsigned level) -{ - if (layout->dim != MALI_TEXTURE_DIMENSION_3D) - return layout->array_stride; - else if (drm_is_afbc(layout->modifier)) - return layout->slices[level].afbc.surface_stride; - else - return layout->slices[level].surface_stride; -} - -/* Computes the offset into a texture at a particular level/face. Add to - * the base address of a texture to get the address to that level/face */ - -unsigned -panfrost_texture_offset(const struct pan_image_layout *layout, - unsigned level, unsigned array_idx, - unsigned surface_idx) -{ - return layout->slices[level].offset + - (array_idx * layout->array_stride) + - (surface_idx * layout->slices[level].surface_stride); -} - -bool -pan_image_layout_init(const struct panfrost_device *dev, - struct pan_image_layout *layout, - uint64_t modifier, - enum pipe_format format, - enum mali_texture_dimension dim, - unsigned width, unsigned height, unsigned depth, - unsigned array_size, unsigned nr_samples, - unsigned nr_slices, enum pan_image_crc_mode crc_mode, - const struct pan_image_explicit_layout *explicit_layout) -{ - /* Explicit stride only work with non-mipmap, non-array; single-sample - * 2D image, and in-band CRC can't be used. - */ - if (explicit_layout && - (depth > 1 || nr_samples > 1 || array_size > 1 || - dim != MALI_TEXTURE_DIMENSION_2D || nr_slices > 1 || - crc_mode == PAN_IMAGE_CRC_INBAND)) - return false; - - /* Mandate 64 byte alignement */ - if (explicit_layout && (explicit_layout->offset & 63)) - return false; - - layout->crc_mode = crc_mode; - layout->modifier = modifier; - layout->format = format; - layout->dim = dim; - layout->width = width; - layout->height = height; - layout->depth = depth; - layout->array_size = array_size; - layout->nr_samples = nr_samples; - layout->nr_slices = nr_slices; - - unsigned bytes_per_pixel = util_format_get_blocksize(format); - - /* MSAA is implemented as a 3D texture with z corresponding to the - * sample #, horrifyingly enough */ - - assert(depth == 1 || nr_samples == 1); - - bool afbc = drm_is_afbc(layout->modifier); - bool tiled = layout->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; - bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR; - bool should_align = tiled || afbc; - bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D; - - unsigned oob_crc_offset = 0; - unsigned offset = explicit_layout ? explicit_layout->offset : 0; - unsigned tile_h = 1, tile_w = 1, tile_shift = 0; - - if (tiled || afbc) { - tile_w = panfrost_block_dim(layout->modifier, true, 0); - tile_h = panfrost_block_dim(layout->modifier, false, 0); - if (util_format_is_compressed(format)) - tile_shift = 2; - } - - for (unsigned l = 0; l < nr_slices; ++l) { - struct pan_image_slice_layout *slice = &layout->slices[l]; - - unsigned effective_width = width; - unsigned effective_height = height; - unsigned effective_depth = depth; - - if (should_align) { - effective_width = ALIGN_POT(effective_width, tile_w) >> tile_shift; - effective_height = ALIGN_POT(effective_height, tile_h); - - /* We don't need to align depth */ - } - - /* Align levels to cache-line as a performance improvement for - * linear/tiled and as a requirement for AFBC */ - - offset = ALIGN_POT(offset, 64); - - slice->offset = offset; - - /* Compute the would-be stride */ - unsigned stride = bytes_per_pixel * effective_width; - - if (explicit_layout) { - /* Make sure the explicit stride is valid */ - if (explicit_layout->line_stride < stride) - return false; - - stride = explicit_layout->line_stride; - } else if (linear) { - /* Keep lines alignment on 64 byte for performance */ - stride = ALIGN_POT(stride, 64); - } - - slice->line_stride = stride; - slice->row_stride = stride * (tile_h >> tile_shift); - - unsigned slice_one_size = slice->line_stride * effective_height; - - /* Compute AFBC sizes if necessary */ - if (afbc) { - slice->afbc.header_size = - panfrost_afbc_header_size(width, height); - - /* Stride between two rows of AFBC headers */ - slice->afbc.row_stride = - (effective_width / tile_w) * - AFBC_HEADER_BYTES_PER_TILE; - - /* AFBC body size */ - slice->afbc.body_size = slice_one_size; - - /* 3D AFBC resources have all headers placed at the - * beginning instead of having them split per depth - * level - */ - if (is_3d) { - slice->afbc.surface_stride = - slice->afbc.header_size; - slice->afbc.header_size *= effective_depth; - slice->afbc.body_size *= effective_depth; - offset += slice->afbc.header_size; - } else { - slice_one_size += slice->afbc.header_size; - slice->afbc.surface_stride = slice_one_size; - } - } - - unsigned slice_full_size = - slice_one_size * effective_depth * nr_samples; - - slice->surface_stride = slice_one_size; - - /* Compute AFBC sizes if necessary */ - - offset += slice_full_size; - slice->size = slice_full_size; - - /* Add a checksum region if necessary */ - if (crc_mode != PAN_IMAGE_CRC_NONE) { - slice->crc.size = - panfrost_compute_checksum_size(slice, width, height); - - if (crc_mode == PAN_IMAGE_CRC_INBAND) { - slice->crc.offset = offset; - offset += slice->crc.size; - slice->size += slice->crc.size; - } else { - slice->crc.offset = oob_crc_offset; - oob_crc_offset += slice->crc.size; - } - } - - width = u_minify(width, 1); - height = u_minify(height, 1); - depth = u_minify(depth, 1); - } - - /* Arrays and cubemaps have the entire miptree duplicated */ - layout->array_stride = ALIGN_POT(offset, 64); - if (explicit_layout) - layout->data_size = offset; - else - layout->data_size = ALIGN_POT(layout->array_stride * array_size, 4096); - layout->crc_size = oob_crc_offset; - - return true; -} - -void -pan_iview_get_surface(const struct pan_image_view *iview, - unsigned level, unsigned layer, unsigned sample, - struct pan_surface *surf) -{ - level += iview->first_level; - assert(level < iview->image->layout.nr_slices); - - layer += iview->first_layer; - - bool is_3d = iview->image->layout.dim == MALI_TEXTURE_DIMENSION_3D; - const struct pan_image_slice_layout *slice = &iview->image->layout.slices[level]; - mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset; - - if (drm_is_afbc(iview->image->layout.modifier)) { - assert(!sample); - - if (is_3d) { - ASSERTED unsigned depth = u_minify(iview->image->layout.depth, level); - assert(layer < depth); - surf->afbc.header = base + slice->offset + - (layer * slice->afbc.surface_stride); - surf->afbc.body = base + slice->offset + - slice->afbc.header_size + - (slice->surface_stride * layer); - } else { - assert(layer < iview->image->layout.array_size); - surf->afbc.header = base + - panfrost_texture_offset(&iview->image->layout, - level, layer, 0); - surf->afbc.body = surf->afbc.header + slice->afbc.header_size; - } - } else { - unsigned array_idx = is_3d ? 0 : layer; - unsigned surface_idx = is_3d ? layer : sample; - - surf->data = base + - panfrost_texture_offset(&iview->image->layout, level, - array_idx, surface_idx); + pan_pack(out, TEXTURE, cfg) { + cfg.dimension = iview->dim; + cfg.format = dev->formats[format].hw; + cfg.width = width; + cfg.height = u_minify(layout->height, iview->first_level); + if (iview->dim == MALI_TEXTURE_DIMENSION_3D) + cfg.depth = u_minify(layout->depth, iview->first_level); + else + cfg.sample_count = layout->nr_samples; + cfg.swizzle = swizzle; + cfg.texel_ordering = + panfrost_modifier_to_layout(layout->modifier); + cfg.levels = iview->last_level - iview->first_level + 1; + cfg.array_size = array_size; + +#if PAN_ARCH >= 6 + cfg.surfaces = payload->gpu; + + /* We specify API-level LOD clamps in the sampler descriptor + * and use these clamps simply for bounds checking */ + cfg.minimum_lod = FIXED_16(0, false); + cfg.maximum_lod = FIXED_16(cfg.levels - 1, false); +#else + cfg.manual_stride = manual_stride; +#endif } } +#endif /* ifdef PAN_ARCH */ diff --git a/src/panfrost/lib/pan_texture.h b/src/panfrost/lib/pan_texture.h index 0b797b9fc11..992731791ff 100644 --- a/src/panfrost/lib/pan_texture.h +++ b/src/panfrost/lib/pan_texture.h @@ -28,6 +28,8 @@ #ifndef __PAN_TEXTURE_H #define __PAN_TEXTURE_H +#include "gen_macros.h" + #include <stdbool.h> #include "drm-uapi/drm_fourcc.h" #include "util/format/u_format.h" @@ -157,15 +159,16 @@ panfrost_afbc_can_ytr(enum pipe_format format); unsigned panfrost_block_dim(uint64_t modifier, bool width, unsigned plane); +#ifdef PAN_ARCH unsigned -panfrost_estimate_texture_payload_size(const struct panfrost_device *dev, - const struct pan_image_view *iview); +GENX(panfrost_estimate_texture_payload_size)(const struct pan_image_view *iview); void -panfrost_new_texture(const struct panfrost_device *dev, - const struct pan_image_view *iview, - void *out, - const struct panfrost_ptr *payload); +GENX(panfrost_new_texture)(const struct panfrost_device *dev, + const struct pan_image_view *iview, + void *out, + const struct panfrost_ptr *payload); +#endif unsigned panfrost_get_layer_stride(const struct pan_image_layout *layout, diff --git a/src/panfrost/vulkan/panvk_vX_image.c b/src/panfrost/vulkan/panvk_vX_image.c index f75f7b21305..232fe270336 100644 --- a/src/panfrost/vulkan/panvk_vX_image.c +++ b/src/panfrost/vulkan/panvk_vX_image.c @@ -127,7 +127,7 @@ panvk_per_arch(CreateImageView)(VkDevice _device, struct panfrost_device *pdev = &device->physical_device->pdev; unsigned bo_size = - panfrost_estimate_texture_payload_size(pdev, &view->pview) + + GENX(panfrost_estimate_texture_payload_size)(&view->pview) + pan_size(TEXTURE); unsigned surf_descs_offset = PAN_ARCH <= 5 ? pan_size(TEXTURE) : 0; @@ -142,7 +142,7 @@ panvk_per_arch(CreateImageView)(VkDevice _device, &view->desc : view->bo->ptr.cpu; STATIC_ASSERT(sizeof(view->desc) >= pan_size(TEXTURE)); - panfrost_new_texture(pdev, &view->pview, tex_desc, &surf_descs); + GENX(panfrost_new_texture)(pdev, &view->pview, tex_desc, &surf_descs); *pView = panvk_image_view_to_handle(view); return VK_SUCCESS; |