/* * Copyright 2006 VMware, Inc. * Copyright © 2006 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial * portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file brw_tex_layout.cpp * * Code to lay out images in a mipmap tree. * * \author Keith Whitwell * \author Michel Dänzer */ #include "intel_mipmap_tree.h" #include "brw_context.h" #include "main/macros.h" #include "main/glformats.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE static unsigned int intel_horizontal_texture_alignment_unit(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t layout_flags) { if (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) return 16; /** * +----------------------------------------------------------------------+ * | | alignment unit width ("i") | * | Surface Property |-----------------------------| * | | 915 | 965 | ILK | SNB | IVB | * +----------------------------------------------------------------------+ * | YUV 4:2:2 format | 8 | 4 | 4 | 4 | 4 | * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 | * | FXT1 compressed format | 8 | 8 | 8 | 8 | 8 | * | Depth Buffer (16-bit) | 4 | 4 | 4 | 4 | 8 | * | Depth Buffer (other) | 4 | 4 | 4 | 4 | 4 | * | Separate Stencil Buffer | N/A | N/A | 8 | 8 | 8 | * | All Others | 4 | 4 | 4 | 4 | 4 | * +----------------------------------------------------------------------+ * * On IVB+, non-special cases can be overridden by setting the SURFACE_STATE * "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8. */ if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16) return 8; return 4; } static unsigned int intel_vertical_texture_alignment_unit(struct brw_context *brw, const struct intel_mipmap_tree *mt) { /** * +----------------------------------------------------------------------+ * | | alignment unit height ("j") | * | Surface Property |-----------------------------| * | | 915 | 965 | ILK | SNB | IVB | * +----------------------------------------------------------------------+ * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 | * | FXT1 compressed format | 4 | 4 | 4 | 4 | 4 | * | Depth Buffer | 2 | 2 | 2 | 4 | 4 | * | Separate Stencil Buffer | N/A | N/A | N/A | 4 | 8 | * | Multisampled (4x or 8x) render target | N/A | N/A | N/A | 4 | 4 | * | All Others | 2 | 2 | 2 | * | * | * +----------------------------------------------------------------------+ * * Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of * the SURFACE_STATE "Surface Vertical Alignment" field. */ /* Broadwell only supports VALIGN of 4, 8, and 16. The BSpec says 4 * should always be used, except for stencil buffers, which should be 8. */ if (brw->gen >= 8) return 4; if (mt->num_samples > 1) return 4; GLenum base_format = _mesa_get_format_base_format(mt->format); if (brw->gen >= 6 && (base_format == GL_DEPTH_COMPONENT || base_format == GL_DEPTH_STENCIL)) { return 4; } if (brw->gen == 7) { /* On Gen7, we prefer a vertical alignment of 4 when possible, because * that allows Y tiled render targets. * * From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most * messages), on p64, under the heading "Surface Vertical Alignment": * * Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY * (0x190) * * VALIGN_4 is not supported for surface format R32G32B32_FLOAT. */ if (base_format == GL_YCBCR_MESA || mt->format == MESA_FORMAT_RGB_FLOAT32) return 2; return 4; } return 2; } static void gen9_miptree_layout_1d(struct intel_mipmap_tree *mt) { unsigned x = 0; unsigned width = mt->physical_width0; unsigned depth = mt->physical_depth0; /* number of array layers. */ /* When this layout is used the horizontal alignment is fixed at 64 and the * hardware ignores the value given in the surface state */ const unsigned int halign = 64; mt->total_height = mt->physical_height0; mt->total_width = 0; for (unsigned level = mt->first_level; level <= mt->last_level; level++) { unsigned img_width; intel_miptree_set_level_info(mt, level, x, 0, depth); img_width = ALIGN(width, halign); mt->total_width = MAX2(mt->total_width, x + img_width); x += img_width; width = minify(width, 1); } } static void brw_miptree_layout_2d(struct intel_mipmap_tree *mt) { unsigned x = 0; unsigned y = 0; unsigned width = mt->physical_width0; unsigned height = mt->physical_height0; /* Number of layers of array texture or slices of 3d texture (gen9+). */ unsigned depth = mt->physical_depth0; unsigned int bw, bh; _mesa_get_format_block_size(mt->format, &bw, &bh); mt->total_width = mt->physical_width0; mt->total_width = ALIGN_NPOT(mt->total_width, bw); /* May need to adjust width to accommodate the placement of * the 2nd mipmap. This occurs when the alignment * constraints of mipmap placement push the right edge of the * 2nd mipmap out past the width of its parent. */ if (mt->first_level != mt->last_level) { unsigned mip1_width; mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->halign) + ALIGN_NPOT(minify(mt->physical_width0, 2), bw); if (mip1_width > mt->total_width) mt->total_width = mip1_width; } mt->total_width /= bw; mt->total_height = 0; for (unsigned level = mt->first_level; level <= mt->last_level; level++) { unsigned img_height; intel_miptree_set_level_info(mt, level, x, y, depth); img_height = ALIGN_NPOT(height, mt->valign); img_height /= bh; if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) { /* Compact arrays with separated miplevels */ img_height *= depth; } /* Because the images are packed better, the final offset * might not be the maximal one: */ mt->total_height = MAX2(mt->total_height, y + img_height); /* Layout_below: step right after second mipmap. * * For Sandy Bridge HiZ and stencil, we always step down. */ if (level == mt->first_level + 1) { x += ALIGN_NPOT(width, mt->halign) / bw; } else { y += img_height; } width = minify(width, 1); height = minify(height, 1); if (mt->target == GL_TEXTURE_3D) depth = minify(depth, 1); } } static void brw_miptree_layout_gen6_hiz_stencil(struct intel_mipmap_tree *mt) { unsigned x = 0; unsigned y = 0; unsigned width = mt->physical_width0; unsigned height = mt->physical_height0; /* Number of layers of array texture. */ unsigned depth = mt->physical_depth0; unsigned tile_width, tile_height, bw, bh; if (mt->format == MESA_FORMAT_S_UINT8) { bw = bh = 1; /* W-tiled */ tile_width = 64; tile_height = 64; } else { assert(_mesa_get_format_base_format(mt->format) == GL_DEPTH_COMPONENT || _mesa_get_format_base_format(mt->format) == GL_DEPTH_STENCIL); /* Each 128-bit HiZ block corresponds to a region of of 8x4 depth * samples. Each cache line in the Y-Tiled HiZ image contains 2x2 HiZ * blocks. Therefore, each Y-tiled cache line corresponds to an 16x8 * region in the depth surface. Since we're representing it as * RGBA_FLOAT32, the miptree calculations will think that each cache * line is 1x4 pixels. Therefore, we need a scale-down factor of 16x2 * and a vertical alignment of 2. */ mt->cpp = 16; bw = 16; bh = 2; /* Y-tiled */ tile_width = 128 / mt->cpp; tile_height = 32; } mt->total_width = 0; mt->total_height = 0; for (unsigned level = mt->first_level; level <= mt->last_level; level++) { intel_miptree_set_level_info(mt, level, x, y, depth); const unsigned img_width = ALIGN(DIV_ROUND_UP(width, bw), mt->halign); const unsigned img_height = ALIGN(DIV_ROUND_UP(height, bh), mt->valign) * depth; mt->total_width = MAX2(mt->total_width, x + img_width); mt->total_height = MAX2(mt->total_height, y + img_height); if (level == mt->first_level) { y += ALIGN(img_height, tile_height); } else { x += ALIGN(img_width, tile_width); } /* We only minify the width. We want qpitch to match for all miplevels * because the hardware doesn't know we aren't on LOD0. */ width = minify(width, 1); } } unsigned brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw, const struct intel_mipmap_tree *mt, unsigned level) { if ((brw->gen < 9 && mt->target == GL_TEXTURE_3D) || (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) { return ALIGN_NPOT(minify(mt->physical_width0, level), mt->halign); } else { return 0; } } unsigned brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw, const struct intel_mipmap_tree *mt, unsigned level) { assert(mt->array_layout != GEN6_HIZ_STENCIL || brw->gen == 6); if (brw->gen >= 9) { /* ALL_SLICES_AT_EACH_LOD isn't supported on Gen8+ but this code will * effectively end up with a packed qpitch anyway whenever * mt->first_level == mt->last_level. */ assert(mt->array_layout != ALL_SLICES_AT_EACH_LOD); /* On Gen9 we can pick whatever qpitch we like as long as it's aligned * to the vertical alignment so we don't need to add any extra rows. */ unsigned qpitch = mt->total_height; /* If the surface might be used as a stencil buffer or HiZ buffer then * it needs to be a multiple of 8. */ const GLenum base_format = _mesa_get_format_base_format(mt->format); if (_mesa_is_depth_or_stencil_format(base_format)) qpitch = ALIGN(qpitch, 8); /* 3D textures need to be aligned to the tile height. At this point we * don't know which tiling will be used so let's just align it to 32 */ if (mt->target == GL_TEXTURE_3D) qpitch = ALIGN(qpitch, 32); return qpitch; } else if (mt->target == GL_TEXTURE_3D || (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP) || mt->array_layout == ALL_SLICES_AT_EACH_LOD) { return ALIGN_NPOT(minify(mt->physical_height0, level), mt->valign); } else if (mt->array_layout == GEN6_HIZ_STENCIL) { /* For HiZ and stencil on Sandy Bridge, we don't minify the height. */ if (mt->format == MESA_FORMAT_S_UINT8) { return ALIGN(mt->physical_height0, mt->valign); } else { /* HiZ has a vertical scale factor of 2. */ return ALIGN(DIV_ROUND_UP(mt->physical_height0, 2), mt->valign); } } else { const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->valign); const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->valign); return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->valign; } } static void align_cube(struct intel_mipmap_tree *mt) { /* The 965's sampler lays cachelines out according to how accesses * in the texture surfaces run, so they may be "vertical" through * memory. As a result, the docs say in Surface Padding Requirements: * Sampling Engine Surfaces that two extra rows of padding are required. */ if (mt->target == GL_TEXTURE_CUBE_MAP) mt->total_height += 2; } bool gen9_use_linear_1d_layout(const struct brw_context *brw, const struct intel_mipmap_tree *mt) { /* On Gen9+ the mipmap levels of a 1D surface are all laid out in a * horizontal line. This isn't done for depth/stencil buffers however * because those will be using a tiled layout */ if (brw->gen >= 9 && (mt->target == GL_TEXTURE_1D || mt->target == GL_TEXTURE_1D_ARRAY)) { GLenum base_format = _mesa_get_format_base_format(mt->format); if (base_format != GL_DEPTH_COMPONENT && base_format != GL_DEPTH_STENCIL && base_format != GL_STENCIL_INDEX) return true; } return false; } static void brw_miptree_layout_texture_array(struct brw_context *brw, struct intel_mipmap_tree *mt) { unsigned height = mt->physical_height0; bool layout_1d = gen9_use_linear_1d_layout(brw, mt); int physical_qpitch; if (layout_1d) gen9_miptree_layout_1d(mt); else if (mt->array_layout == GEN6_HIZ_STENCIL) brw_miptree_layout_gen6_hiz_stencil(mt); else brw_miptree_layout_2d(mt); if (layout_1d) { physical_qpitch = 1; /* When using the horizontal layout the qpitch specifies the distance in * pixels between array slices. The total_width is forced to be a * multiple of the horizontal alignment in brw_miptree_layout_1d (in * this case it's always 64). The vertical alignment is ignored. */ mt->qpitch = mt->total_width; } else { mt->qpitch = brw_miptree_get_vertical_slice_pitch(brw, mt, 0); /* Unlike previous generations the qpitch is a multiple of the * compressed block size on Gen9 so physical_qpitch matches mt->qpitch. */ physical_qpitch = (mt->compressed && brw->gen < 9 ? mt->qpitch / 4 : mt->qpitch); } for (unsigned level = mt->first_level; level <= mt->last_level; level++) { unsigned img_height; img_height = ALIGN_NPOT(height, mt->valign); if (mt->compressed) img_height /= mt->valign; for (unsigned q = 0; q < mt->level[level].depth; q++) { if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) { intel_miptree_set_image_offset(mt, level, q, 0, q * img_height); } else { intel_miptree_set_image_offset(mt, level, q, 0, q * physical_qpitch); } } height = minify(height, 1); } if (mt->array_layout == ALL_LOD_IN_EACH_SLICE) mt->total_height = physical_qpitch * mt->physical_depth0; align_cube(mt); } static void brw_miptree_layout_texture_3d(struct brw_context *brw, struct intel_mipmap_tree *mt) { mt->total_width = 0; mt->total_height = 0; unsigned ysum = 0; unsigned bh, bw; _mesa_get_format_block_size(mt->format, &bw, &bh); for (unsigned level = mt->first_level; level <= mt->last_level; level++) { unsigned WL = MAX2(mt->physical_width0 >> level, 1); unsigned HL = MAX2(mt->physical_height0 >> level, 1); unsigned DL = MAX2(mt->physical_depth0 >> level, 1); unsigned wL = ALIGN_NPOT(WL, mt->halign); unsigned hL = ALIGN_NPOT(HL, mt->valign); if (mt->target == GL_TEXTURE_CUBE_MAP) DL = 6; intel_miptree_set_level_info(mt, level, 0, 0, DL); for (unsigned q = 0; q < DL; q++) { unsigned x = (q % (1 << level)) * wL; unsigned y = ysum + (q >> level) * hL; intel_miptree_set_image_offset(mt, level, q, x / bw, y / bh); mt->total_width = MAX2(mt->total_width, (x + wL) / bw); mt->total_height = MAX2(mt->total_height, (y + hL) / bh); } ysum += ALIGN(DL, 1 << level) / (1 << level) * hL; } align_cube(mt); } /** * \brief Helper function for intel_miptree_create(). */ static uint32_t brw_miptree_choose_tiling(struct brw_context *brw, const struct intel_mipmap_tree *mt, uint32_t layout_flags) { if (mt->format == MESA_FORMAT_S_UINT8) { /* The stencil buffer is W tiled. However, we request from the kernel a * non-tiled buffer because the GTT is incapable of W fencing. */ return I915_TILING_NONE; } /* Do not support changing the tiling for miptrees with pre-allocated BOs. */ assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0); /* Some usages may want only one type of tiling, like depth miptrees (Y * tiled), or temporary BOs for uploading data once (linear). */ switch (layout_flags & MIPTREE_LAYOUT_TILING_ANY) { case MIPTREE_LAYOUT_TILING_ANY: break; case MIPTREE_LAYOUT_TILING_Y: return I915_TILING_Y; case MIPTREE_LAYOUT_TILING_NONE: return I915_TILING_NONE; } if (mt->num_samples > 1) { /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled * Surface"): * * [DevSNB+]: For multi-sample render targets, this field must be * 1. MSRTs can only be tiled. * * Our usual reason for preferring X tiling (fast blits using the * blitting engine) doesn't apply to MSAA, since we'll generally be * downsampling or upsampling when blitting between the MSAA buffer * and another buffer, and the blitting engine doesn't support that. * So use Y tiling, since it makes better use of the cache. */ return I915_TILING_Y; } GLenum base_format = _mesa_get_format_base_format(mt->format); if (base_format == GL_DEPTH_COMPONENT || base_format == GL_DEPTH_STENCIL_EXT) return I915_TILING_Y; /* 1D textures (and 1D array textures) don't get any benefit from tiling, * in fact it leads to a less efficient use of memory space and bandwidth * due to tile alignment. */ if (mt->logical_height0 == 1) return I915_TILING_NONE; int minimum_pitch = mt->total_width * mt->cpp; /* If the width is much smaller than a tile, don't bother tiling. */ if (minimum_pitch < 64) return I915_TILING_NONE; if (ALIGN(minimum_pitch, 512) >= 32768) { perf_debug("%dx%d miptree too large to blit, falling back to untiled", mt->total_width, mt->total_height); return I915_TILING_NONE; } /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */ if (brw->gen < 6) return I915_TILING_X; /* From the Sandybridge PRM, Volume 1, Part 2, page 32: * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX * or Linear." * 128 bits per pixel translates to 16 bytes per pixel. This is necessary * all the way back to 965, but is permitted on Gen7+. */ if (brw->gen < 7 && mt->cpp >= 16) return I915_TILING_X; /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most * messages), on p64, under the heading "Surface Vertical Alignment": * * This field must be set to VALIGN_4 for all tiled Y Render Target * surfaces. * * So if the surface is renderable and uses a vertical alignment of 2, * force it to be X tiled. This is somewhat conservative (it's possible * that the client won't ever render to this surface), but it's difficult * to know that ahead of time. And besides, since we use a vertical * alignment of 4 as often as we can, this shouldn't happen very often. */ if (brw->gen == 7 && mt->valign == 2 && brw->format_supported_as_render_target[mt->format]) { return I915_TILING_X; } return I915_TILING_Y | I915_TILING_X; } static void intel_miptree_set_total_width_height(struct brw_context *brw, struct intel_mipmap_tree *mt) { switch (mt->target) { case GL_TEXTURE_CUBE_MAP: if (brw->gen == 4) { /* Gen4 stores cube maps as 3D textures. */ assert(mt->physical_depth0 == 6); brw_miptree_layout_texture_3d(brw, mt); } else { /* All other hardware stores cube maps as 2D arrays. */ brw_miptree_layout_texture_array(brw, mt); } break; case GL_TEXTURE_3D: if (brw->gen >= 9) brw_miptree_layout_texture_array(brw, mt); else brw_miptree_layout_texture_3d(brw, mt); break; case GL_TEXTURE_1D_ARRAY: case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: case GL_TEXTURE_CUBE_MAP_ARRAY: brw_miptree_layout_texture_array(brw, mt); break; default: switch (mt->msaa_layout) { case INTEL_MSAA_LAYOUT_UMS: case INTEL_MSAA_LAYOUT_CMS: brw_miptree_layout_texture_array(brw, mt); break; case INTEL_MSAA_LAYOUT_NONE: case INTEL_MSAA_LAYOUT_IMS: if (gen9_use_linear_1d_layout(brw, mt)) gen9_miptree_layout_1d(mt); else if (mt->array_layout == GEN6_HIZ_STENCIL) brw_miptree_layout_gen6_hiz_stencil(mt); else brw_miptree_layout_2d(mt); break; } break; } DBG("%s: %dx%dx%d\n", __func__, mt->total_width, mt->total_height, mt->cpp); } static void intel_miptree_set_alignment(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t layout_flags) { /** * From the "Alignment Unit Size" section of various specs, namely: * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4. * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4 * - BSpec (for Ivybridge and slight variations in separate stencil) */ if (mt->array_layout == GEN6_HIZ_STENCIL) { /* On gen6, we use GEN6_HIZ_STENCIL for stencil/hiz because the * hardware doesn't support multiple mip levels on stencil/hiz. * * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer: * "The hierarchical depth buffer does not support the LOD field" * * PRM Vol 2, Part 1, 7.5.4.1 Separate Stencil Buffer: * "The stencil depth buffer does not support the LOD field" */ if (mt->format == MESA_FORMAT_S_UINT8) { /* Stencil uses W tiling, so we force W tiling alignment for the * ALL_SLICES_AT_EACH_LOD miptree layout. */ mt->halign = 4; mt->valign = 2; assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); } else { /* See brw_miptree_layout_gen6_hiz_stencil() */ mt->halign = 1; mt->valign = 2; } } else if (mt->compressed) { /* The hardware alignment requirements for compressed textures * happen to match the block boundaries. */ _mesa_get_format_block_size(mt->format, &mt->halign, &mt->valign); /* On Gen9+ we can pick our own alignment for compressed textures but it * has to be a multiple of the block size. The minimum alignment we can * pick is 4 so we effectively have to align to 4 times the block * size */ if (brw->gen >= 9) { mt->halign *= 4; mt->valign *= 4; } } else if (mt->format == MESA_FORMAT_S_UINT8) { mt->halign = 8; mt->valign = brw->gen >= 7 ? 8 : 4; } else { mt->halign = intel_horizontal_texture_alignment_unit(brw, mt, layout_flags); mt->valign = intel_vertical_texture_alignment_unit(brw, mt); } } bool brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t layout_flags) { intel_miptree_set_alignment(brw, mt, layout_flags); intel_miptree_set_total_width_height(brw, mt); if (!mt->total_width || !mt->total_height) return false; /* On Gen9+ the alignment values are expressed in multiples of the block * size */ if (brw->gen >= 9) { unsigned int i, j; _mesa_get_format_block_size(mt->format, &i, &j); mt->halign /= i; mt->valign /= j; } if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0) mt->tiling = brw_miptree_choose_tiling(brw, mt, layout_flags); return true; }