diff options
author | Konstantin Seurer <konstantin.seurer@gmail.com> | 2023-01-11 21:28:52 +0100 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2023-01-21 20:26:41 +0000 |
commit | b1755c0b21fdc7d11a4e23ba67e535e87fa51bf2 (patch) | |
tree | 10b52fb773148a20bae8e28c7d94b42ddb602baf /src/amd/vulkan | |
parent | 13a8a4071a4b70ca3e4ee0f09479906a95922237 (diff) |
radv/bvh: Add a define for extended SAH
This will be used to only chose depth aware SAH when we know that it's
more optimal and doesn't increase build overhead too much.
Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20656>
Diffstat (limited to 'src/amd/vulkan')
-rw-r--r-- | src/amd/vulkan/bvh/meson.build | 2 | ||||
-rw-r--r-- | src/amd/vulkan/bvh/ploc_internal.comp | 10 |
2 files changed, 11 insertions, 1 deletions
diff --git a/src/amd/vulkan/bvh/meson.build b/src/amd/vulkan/bvh/meson.build index 63a3cef9b63..6ef8f91ea82 100644 --- a/src/amd/vulkan/bvh/meson.build +++ b/src/amd/vulkan/bvh/meson.build @@ -48,7 +48,7 @@ bvh_shaders = [ [ 'ploc_internal.comp', 'ploc_internal', - [], + ['EXTENDED_SAH=0'], ], [ 'converter_internal.comp', diff --git a/src/amd/vulkan/bvh/ploc_internal.comp b/src/amd/vulkan/bvh/ploc_internal.comp index 68f5db0c2d6..41ede73953c 100644 --- a/src/amd/vulkan/bvh/ploc_internal.comp +++ b/src/amd/vulkan/bvh/ploc_internal.comp @@ -134,7 +134,9 @@ push_node(uint32_t children[2]) } DEREF(dst_node).base.aabb = total_bounds; +#if EXTENDED_SAH DEREF(dst_node).base.cost = cost * 0.5 + BVH_LEVEL_COST; +#endif DEREF(dst_node).in_final_tree = FINAL_TREE_UNKNOWN; return dst_id; } @@ -160,7 +162,9 @@ decode_neighbour_offset(uint32_t encoded_offset) #define NUM_PLOC_LDS_ITEMS PLOC_WORKGROUP_SIZE + 4 * PLOC_NEIGHBOURHOOD shared radv_aabb shared_bounds[NUM_PLOC_LDS_ITEMS]; +#if EXTENDED_SAH shared float shared_costs[NUM_PLOC_LDS_ITEMS]; +#endif shared uint32_t nearest_neighbour_indices[NUM_PLOC_LDS_ITEMS]; uint32_t @@ -186,7 +190,9 @@ load_bounds(VOID_REF ids, uint32_t iter, uint32_t task_index, uint32_t lds_base, REF(radv_ir_node) node = REF(radv_ir_node)(addr); shared_bounds[i - lds_base] = DEREF(node).aabb; +#if EXTENDED_SAH shared_costs[i - lds_base] = DEREF(node).cost; +#endif } } @@ -198,6 +204,7 @@ combined_node_cost(uint32_t lds_base, uint32_t i, uint32_t j) combined_bounds.max = max(shared_bounds[i - lds_base].max, shared_bounds[j - lds_base].max); float area = aabb_surface_area(combined_bounds); +#if EXTENDED_SAH if (area == 0.0) return 0.0; @@ -220,6 +227,9 @@ combined_node_cost(uint32_t lds_base, uint32_t i, uint32_t j) (1.0 + shared_costs[i - lds_base] * p_i) * (1.0 + shared_costs[j - lds_base] * p_j); return area / combined_cost; +#else + return area; +#endif } shared uint32_t shared_aggregate_sum; |