summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmma Anholt <emma@anholt.net>2023-01-30 16:25:30 -0800
committerEmma Anholt <emma@anholt.net>2023-02-02 14:56:13 -0800
commitaae679e221371c892d8e1984fcab20229d9d0d57 (patch)
treed5188b60ced9c5149327c310236d3dc1bc3790c8
parente27f84c56e1c0026edfc31aee9fe909763da81d9 (diff)
turnip: Optimize tile sizes to reduce the number of bins.
We were aiming for very square tiles, but it's actually better for us to reduce the number of different bins so you take fewer trips through the geometry and keep the caches hotter. Example changes to aztec ruins on angle: 3x3 tiles of 352x352 to 4x2 tiles of 256x512 4x5 tiles of 256x224 to 5x4 tiles of 224x256 17x11 tiles of 160x128 to 14x11 tiles of 192x128 12x7 tiles of 224x224 to 7x11 tiles of 384x128 12x8 tiles of 224x192 to 7x11 tiles of 384x128 11x6 tiles of 256x256 to 12x5 tiles of 224x288 11x7 tiles of 256x224 to 7x9 tiles of 384x160 8x4 tiles of 352x352 to 6x5 tiles of 448x288 and minecraft: 3x3 tiles of 352x352 to 4x2 tiles of 256x512 12x6 tiles of 256x256 to 3x23 tiles of 1024x64 12x7 tiles of 256x224 to 8x9 tiles of 384x160 FPS changes: VK aztec ruins normal: 1.12478% +/- 0.213393% (n=67) ANGLE manhattan_31: +1.42813% +/- 0.893332% (n=7). ANGLE minecraft: no change (n=21) ANGLE google_maps: +6.80618% +/- 2.40857% (n=4) ANGLE trex_200: no change (n=11) ANGLE pubg: no change (n=21) Fixes: #8160 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21004>
-rw-r--r--src/freedreno/ci/traces-freedreno.yml2
-rw-r--r--src/freedreno/vulkan/tu_util.c104
2 files changed, 56 insertions, 50 deletions
diff --git a/src/freedreno/ci/traces-freedreno.yml b/src/freedreno/ci/traces-freedreno.yml
index 6c973c0c248..dc94740bc50 100644
--- a/src/freedreno/ci/traces-freedreno.yml
+++ b/src/freedreno/ci/traces-freedreno.yml
@@ -171,7 +171,7 @@ traces:
freedreno-a630:
checksum: 5c0c5da476775dfac94b79e7edb556ce
zink-a630:
- checksum: e9c21d9a253cbae19f09ddfa01d9622a
+ checksum: a8dea3dcb341c0f6c786a264dee51766
filament/filament-default.trace:
freedreno-a306:
diff --git a/src/freedreno/vulkan/tu_util.c b/src/freedreno/vulkan/tu_util.c
index 46880f20fb1..4ba7df7bfbb 100644
--- a/src/freedreno/vulkan/tu_util.c
+++ b/src/freedreno/vulkan/tu_util.c
@@ -119,8 +119,6 @@ tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb,
{
const uint32_t tile_align_w = pass->tile_align_w;
uint32_t tile_align_h = dev->physical_device->info->tile_align_h;
- const uint32_t max_tile_width = dev->physical_device->info->tile_max_w;
- const uint32_t max_tile_height = dev->physical_device->info->tile_max_h;
struct tu_tiling_config *tiling = &fb->tiling[gmem_layout];
/* From the Vulkan 1.3.232 spec, under VkFramebufferCreateInfo:
@@ -161,65 +159,73 @@ tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb,
assert(align(min_layer_stride, gmem_align) == min_layer_stride);
}
- /* start from 1 tile */
- tiling->tile_count = (VkExtent2D) {
- .width = 1,
- .height = 1,
- };
- tiling->tile0 = (VkExtent2D) {
- .width = util_align_npot(fb->width, tile_align_w),
- .height = align(fb->height, tile_align_h),
- };
-
/* will force to sysmem, don't bother trying to have a valid tile config
* TODO: just skip all GMEM stuff when sysmem is forced?
*/
if (!pass->gmem_pixels[gmem_layout]) {
tiling->possible = false;
+ /* Some parts of the code do conditional gmem setup even when gmem is not
+ * possible. Give them a dummy tiling layout.
+ */
+ tiling->tile_count = (VkExtent2D) { 1, 1 };
+ tiling->tile0 = (VkExtent2D) { tile_align_w, tile_align_h };
return;
}
- if (TU_DEBUG(FORCEBIN)) {
- /* start with 2x2 tiles */
- tiling->tile_count.width = 2;
- tiling->tile_count.height = 2;
- tiling->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, 2), tile_align_w);
- tiling->tile0.height = align(DIV_ROUND_UP(fb->height, 2), tile_align_h);
- }
+ tiling->possible = false;
- /* do not exceed max tile width */
- while (tiling->tile0.width > max_tile_width) {
- tiling->tile_count.width++;
- tiling->tile0.width =
- util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w);
- }
+ uint32_t best_tile_count = ~0;
+ VkExtent2D tile_count;
+ VkExtent2D tile_size;
+ /* There aren't that many different tile widths possible, so just walk all
+ * of them finding which produces the lowest number of bins.
+ */
+ const uint32_t max_tile_width = MIN2(
+ dev->physical_device->info->tile_max_w, align(fb->width, tile_align_w));
+ const uint32_t max_tile_height =
+ MIN2(dev->physical_device->info->tile_max_h,
+ align(fb->height, tile_align_h));
+ for (tile_size.width = tile_align_w; tile_size.width <= max_tile_width;
+ tile_size.width += tile_align_w) {
+ tile_size.height = pass->gmem_pixels[gmem_layout] / (tile_size.width * layers);
+ tile_size.height = MIN2(tile_size.height, max_tile_height);
+ tile_size.height = ROUND_DOWN_TO(tile_size.height, tile_align_h);
+ if (!tile_size.height)
+ continue;
+
+ tile_count.width = DIV_ROUND_UP(fb->width, tile_size.width);
+ tile_count.height = DIV_ROUND_UP(fb->height, tile_size.height);
+
+ /* Drop the height of the tile down to split tiles more evenly across the
+ * screen for a given tile count.
+ */
+ tile_size.height =
+ align(DIV_ROUND_UP(fb->height, tile_count.height), tile_align_h);
- /* do not exceed max tile height */
- while (tiling->tile0.height > max_tile_height) {
- tiling->tile_count.height++;
- tiling->tile0.height =
- util_align_npot(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h);
+ /* Pick the layout with the minimum number of bins (lowest CP overhead
+ * and amount of cache flushing), but the most square tiles in the case
+ * of a tie (likely highest cache locality).
+ */
+ if (tile_count.width * tile_count.height < best_tile_count ||
+ (tile_count.width * tile_count.height == best_tile_count &&
+ abs((int)(tile_size.width - tile_size.height)) <
+ abs((int)(tiling->tile0.width - tiling->tile0.height)))) {
+ tiling->possible = true;
+ tiling->tile0 = tile_size;
+ tiling->tile_count = tile_count;
+ best_tile_count = tile_count.width * tile_count.height;
+ }
}
- tiling->possible = true;
-
- /* do not exceed gmem size */
- while (tiling->tile0.width * tiling->tile0.height * layers > pass->gmem_pixels[gmem_layout]) {
- if (tiling->tile0.width > MAX2(tile_align_w, tiling->tile0.height)) {
- tiling->tile_count.width++;
- tiling->tile0.width =
- util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w);
- } else {
- tiling->tile_count.height++;
- if (DIV_ROUND_UP(fb->height, tiling->tile_count.height) < tile_align_h) {
- /* Tiling is impossible. This may happen when there is more than
- * one layer.
- */
- tiling->possible = false;
- return;
- }
- tiling->tile0.height =
- align(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h);
+ /* If forcing binning, try to get at least 2 tiles in each direction. */
+ if (TU_DEBUG(FORCEBIN) && tiling->possible) {
+ if (tiling->tile_count.width == 1 && tiling->tile0.width != tile_align_w) {
+ tiling->tile0.width = align(DIV_ROUND_UP(tiling->tile0.width, 2), tile_align_w);
+ tiling->tile_count.width = 2;
+ }
+ if (tiling->tile_count.height == 1 && tiling->tile0.height != tile_align_h) {
+ tiling->tile0.height = align(DIV_ROUND_UP(tiling->tile0.height, 2), tile_align_h);
+ tiling->tile_count.height = 2;
}
}
}