diff options
author | Emma Anholt <emma@anholt.net> | 2023-01-30 16:25:30 -0800 |
---|---|---|
committer | Emma Anholt <emma@anholt.net> | 2023-02-02 14:56:13 -0800 |
commit | aae679e221371c892d8e1984fcab20229d9d0d57 (patch) | |
tree | d5188b60ced9c5149327c310236d3dc1bc3790c8 | |
parent | e27f84c56e1c0026edfc31aee9fe909763da81d9 (diff) |
turnip: Optimize tile sizes to reduce the number of bins.
We were aiming for very square tiles, but it's actually better for us to
reduce the number of different bins so you take fewer trips through the
geometry and keep the caches hotter. Example changes to aztec ruins on
angle:
3x3 tiles of 352x352 to 4x2 tiles of 256x512
4x5 tiles of 256x224 to 5x4 tiles of 224x256
17x11 tiles of 160x128 to 14x11 tiles of 192x128
12x7 tiles of 224x224 to 7x11 tiles of 384x128
12x8 tiles of 224x192 to 7x11 tiles of 384x128
11x6 tiles of 256x256 to 12x5 tiles of 224x288
11x7 tiles of 256x224 to 7x9 tiles of 384x160
8x4 tiles of 352x352 to 6x5 tiles of 448x288
and minecraft:
3x3 tiles of 352x352 to 4x2 tiles of 256x512
12x6 tiles of 256x256 to 3x23 tiles of 1024x64
12x7 tiles of 256x224 to 8x9 tiles of 384x160
FPS changes:
VK aztec ruins normal: 1.12478% +/- 0.213393% (n=67)
ANGLE manhattan_31: +1.42813% +/- 0.893332% (n=7).
ANGLE minecraft: no change (n=21)
ANGLE google_maps: +6.80618% +/- 2.40857% (n=4)
ANGLE trex_200: no change (n=11)
ANGLE pubg: no change (n=21)
Fixes: #8160
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21004>
-rw-r--r-- | src/freedreno/ci/traces-freedreno.yml | 2 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_util.c | 104 |
2 files changed, 56 insertions, 50 deletions
diff --git a/src/freedreno/ci/traces-freedreno.yml b/src/freedreno/ci/traces-freedreno.yml index 6c973c0c248..dc94740bc50 100644 --- a/src/freedreno/ci/traces-freedreno.yml +++ b/src/freedreno/ci/traces-freedreno.yml @@ -171,7 +171,7 @@ traces: freedreno-a630: checksum: 5c0c5da476775dfac94b79e7edb556ce zink-a630: - checksum: e9c21d9a253cbae19f09ddfa01d9622a + checksum: a8dea3dcb341c0f6c786a264dee51766 filament/filament-default.trace: freedreno-a306: diff --git a/src/freedreno/vulkan/tu_util.c b/src/freedreno/vulkan/tu_util.c index 46880f20fb1..4ba7df7bfbb 100644 --- a/src/freedreno/vulkan/tu_util.c +++ b/src/freedreno/vulkan/tu_util.c @@ -119,8 +119,6 @@ tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb, { const uint32_t tile_align_w = pass->tile_align_w; uint32_t tile_align_h = dev->physical_device->info->tile_align_h; - const uint32_t max_tile_width = dev->physical_device->info->tile_max_w; - const uint32_t max_tile_height = dev->physical_device->info->tile_max_h; struct tu_tiling_config *tiling = &fb->tiling[gmem_layout]; /* From the Vulkan 1.3.232 spec, under VkFramebufferCreateInfo: @@ -161,65 +159,73 @@ tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb, assert(align(min_layer_stride, gmem_align) == min_layer_stride); } - /* start from 1 tile */ - tiling->tile_count = (VkExtent2D) { - .width = 1, - .height = 1, - }; - tiling->tile0 = (VkExtent2D) { - .width = util_align_npot(fb->width, tile_align_w), - .height = align(fb->height, tile_align_h), - }; - /* will force to sysmem, don't bother trying to have a valid tile config * TODO: just skip all GMEM stuff when sysmem is forced? */ if (!pass->gmem_pixels[gmem_layout]) { tiling->possible = false; + /* Some parts of the code do conditional gmem setup even when gmem is not + * possible. Give them a dummy tiling layout. + */ + tiling->tile_count = (VkExtent2D) { 1, 1 }; + tiling->tile0 = (VkExtent2D) { tile_align_w, tile_align_h }; return; } - if (TU_DEBUG(FORCEBIN)) { - /* start with 2x2 tiles */ - tiling->tile_count.width = 2; - tiling->tile_count.height = 2; - tiling->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, 2), tile_align_w); - tiling->tile0.height = align(DIV_ROUND_UP(fb->height, 2), tile_align_h); - } + tiling->possible = false; - /* do not exceed max tile width */ - while (tiling->tile0.width > max_tile_width) { - tiling->tile_count.width++; - tiling->tile0.width = - util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w); - } + uint32_t best_tile_count = ~0; + VkExtent2D tile_count; + VkExtent2D tile_size; + /* There aren't that many different tile widths possible, so just walk all + * of them finding which produces the lowest number of bins. + */ + const uint32_t max_tile_width = MIN2( + dev->physical_device->info->tile_max_w, align(fb->width, tile_align_w)); + const uint32_t max_tile_height = + MIN2(dev->physical_device->info->tile_max_h, + align(fb->height, tile_align_h)); + for (tile_size.width = tile_align_w; tile_size.width <= max_tile_width; + tile_size.width += tile_align_w) { + tile_size.height = pass->gmem_pixels[gmem_layout] / (tile_size.width * layers); + tile_size.height = MIN2(tile_size.height, max_tile_height); + tile_size.height = ROUND_DOWN_TO(tile_size.height, tile_align_h); + if (!tile_size.height) + continue; + + tile_count.width = DIV_ROUND_UP(fb->width, tile_size.width); + tile_count.height = DIV_ROUND_UP(fb->height, tile_size.height); + + /* Drop the height of the tile down to split tiles more evenly across the + * screen for a given tile count. + */ + tile_size.height = + align(DIV_ROUND_UP(fb->height, tile_count.height), tile_align_h); - /* do not exceed max tile height */ - while (tiling->tile0.height > max_tile_height) { - tiling->tile_count.height++; - tiling->tile0.height = - util_align_npot(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h); + /* Pick the layout with the minimum number of bins (lowest CP overhead + * and amount of cache flushing), but the most square tiles in the case + * of a tie (likely highest cache locality). + */ + if (tile_count.width * tile_count.height < best_tile_count || + (tile_count.width * tile_count.height == best_tile_count && + abs((int)(tile_size.width - tile_size.height)) < + abs((int)(tiling->tile0.width - tiling->tile0.height)))) { + tiling->possible = true; + tiling->tile0 = tile_size; + tiling->tile_count = tile_count; + best_tile_count = tile_count.width * tile_count.height; + } } - tiling->possible = true; - - /* do not exceed gmem size */ - while (tiling->tile0.width * tiling->tile0.height * layers > pass->gmem_pixels[gmem_layout]) { - if (tiling->tile0.width > MAX2(tile_align_w, tiling->tile0.height)) { - tiling->tile_count.width++; - tiling->tile0.width = - util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w); - } else { - tiling->tile_count.height++; - if (DIV_ROUND_UP(fb->height, tiling->tile_count.height) < tile_align_h) { - /* Tiling is impossible. This may happen when there is more than - * one layer. - */ - tiling->possible = false; - return; - } - tiling->tile0.height = - align(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h); + /* If forcing binning, try to get at least 2 tiles in each direction. */ + if (TU_DEBUG(FORCEBIN) && tiling->possible) { + if (tiling->tile_count.width == 1 && tiling->tile0.width != tile_align_w) { + tiling->tile0.width = align(DIV_ROUND_UP(tiling->tile0.width, 2), tile_align_w); + tiling->tile_count.width = 2; + } + if (tiling->tile_count.height == 1 && tiling->tile0.height != tile_align_h) { + tiling->tile0.height = align(DIV_ROUND_UP(tiling->tile0.height, 2), tile_align_h); + tiling->tile_count.height = 2; } } } |