diff options
Diffstat (limited to 'src/vulkan/runtime')
107 files changed, 39507 insertions, 0 deletions
diff --git a/src/vulkan/runtime/meson.build b/src/vulkan/runtime/meson.build new file mode 100644 index 00000000000..762c29e97e7 --- /dev/null +++ b/src/vulkan/runtime/meson.build @@ -0,0 +1,326 @@ +# Copyright © 2017 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Mesa-local imports in the Python files must be declared here for correct +# dependency tracking. + +vulkan_lite_runtime_files = files( + 'rmv/vk_rmv_common.c', + 'rmv/vk_rmv_exporter.c', + 'vk_acceleration_structure.c', + 'vk_blend.c', + 'vk_buffer.c', + 'vk_buffer_view.c', + 'vk_cmd_copy.c', + 'vk_cmd_enqueue.c', + 'vk_command_buffer.c', + 'vk_command_pool.c', + 'vk_debug_report.c', + 'vk_debug_utils.c', + 'vk_deferred_operation.c', + 'vk_descriptor_set_layout.c', + 'vk_descriptors.c', + 'vk_descriptor_update_template.c', + 'vk_device.c', + 'vk_device_memory.c', + 'vk_fence.c', + 'vk_framebuffer.c', + 'vk_graphics_state.c', + 'vk_image.c', + 'vk_log.c', + 'vk_object.c', + 'vk_physical_device.c', + 'vk_pipeline_layout.c', + 'vk_query_pool.c', + 'vk_queue.c', + 'vk_render_pass.c', + 'vk_sampler.c', + 'vk_semaphore.c', + 'vk_standard_sample_locations.c', + 'vk_sync.c', + 'vk_sync_binary.c', + 'vk_sync_dummy.c', + 'vk_sync_timeline.c', + 'vk_synchronization.c', + 'vk_video.c', + 'vk_ycbcr_conversion.c', +) + +vulkan_lite_runtime_deps = [ + vulkan_wsi_deps, + idep_mesautil, + idep_nir_headers, + idep_vulkan_util, +] + +if dep_libdrm.found() + vulkan_lite_runtime_files += files('vk_drm_syncobj.c') + vulkan_lite_runtime_deps += dep_libdrm +endif + +if with_platform_android + vulkan_lite_runtime_files += files('vk_android.c') + vulkan_lite_runtime_deps += dep_android +endif + +vk_common_entrypoints = custom_target( + 'vk_common_entrypoints', + input : [vk_entrypoints_gen, vk_api_xml], + output : ['vk_common_entrypoints.h', 'vk_common_entrypoints.c'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak', + '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'vk_common', + '--beta', with_vulkan_beta.to_string() + ], + depend_files : vk_entrypoints_gen_depend_files, +) + +vk_cmd_queue = custom_target( + 'vk_cmd_queue', + input : [vk_cmd_queue_gen, vk_api_xml], + output : ['vk_cmd_queue.c', 'vk_cmd_queue.h'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', + '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@', + '--beta', with_vulkan_beta.to_string() + ], + depend_files : vk_cmd_queue_gen_depend_files, +) + +vk_cmd_enqueue_entrypoints = custom_target( + 'vk_cmd_enqueue_entrypoints', + input : [vk_entrypoints_gen, vk_api_xml], + output : ['vk_cmd_enqueue_entrypoints.h', 'vk_cmd_enqueue_entrypoints.c'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak', + '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', + '--prefix', 'vk_cmd_enqueue', '--prefix', 'vk_cmd_enqueue_unless_primary', + '--beta', with_vulkan_beta.to_string() + ], + depend_files : vk_entrypoints_gen_depend_files, +) + +vk_dispatch_trampolines = custom_target( + 'vk_dispatch_trampolines', + input : [vk_dispatch_trampolines_gen, vk_api_xml], + output : ['vk_dispatch_trampolines.c', 'vk_dispatch_trampolines.h'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', + '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@', + '--beta', with_vulkan_beta.to_string() + ], + depend_files : vk_dispatch_trampolines_gen_depend_files, +) + +vk_physical_device_features = custom_target( + 'vk_physical_device_features', + input : [vk_physical_device_features_gen, vk_api_xml], + output : ['vk_physical_device_features.c', 'vk_physical_device_features.h'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', + '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@', + '--beta', with_vulkan_beta.to_string() + ], + depend_files : vk_physical_device_features_gen_depend_files, +) + +vk_physical_device_properties = custom_target( + 'vk_physical_device_properties', + input : [vk_physical_device_properties_gen, vk_api_xml], + output : ['vk_physical_device_properties.c', 'vk_physical_device_properties.h'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', + '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@', + '--beta', with_vulkan_beta.to_string() + ], + depend_files : vk_physical_device_properties_gen_depend_files, +) + +vk_synchronization_helpers = custom_target( + 'vk_synchronization_helpers', + input : [vk_synchronization_helpers_gen, vk_api_xml], + output : 'vk_synchronization_helpers.c', + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', + '--out-c', '@OUTPUT0@', + '--beta', with_vulkan_beta.to_string() + ], + depend_files : vk_synchronization_helpers_gen_depend_files, +) + +vk_format_info = custom_target( + 'vk_format_info', + input : ['vk_format_info_gen.py', vk_api_xml], + output : ['vk_format_info.c', 'vk_format_info.h'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', + '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@' + ], +) + +vulkan_lite_runtime_files += [ + vk_cmd_enqueue_entrypoints, + vk_cmd_queue, + vk_common_entrypoints, + vk_dispatch_trampolines, + vk_format_info, + vk_physical_device_features, + vk_physical_device_properties, + vk_synchronization_helpers, +] + +# as a runtime library dep to ensure header gen order +vulkan_lite_runtime_header_gen_deps = declare_dependency( + sources : [ + vk_cmd_enqueue_entrypoints[0], + vk_cmd_queue[1], + vk_common_entrypoints[0], + vk_dispatch_trampolines[1], + vk_format_info[1], + vk_physical_device_features[1], + vk_physical_device_properties[1], + ], +) + +vulkan_lite_runtime_deps += vulkan_lite_runtime_header_gen_deps + +libvulkan_lite_runtime = static_library( + 'vulkan_lite_runtime', + vulkan_lite_runtime_files, + include_directories : [inc_include, inc_src], + dependencies : vulkan_lite_runtime_deps, + c_args : c_msvc_compat_args, + gnu_symbol_visibility : 'hidden', + build_by_default : false, +) + +libvulkan_lite_instance = static_library( + 'vulkan_lite_instance', + ['vk_instance.c'], + include_directories : [inc_include, inc_src], + dependencies : vulkan_lite_runtime_deps, + c_args : ['-DVK_LITE_RUNTIME_INSTANCE=1', c_msvc_compat_args], + gnu_symbol_visibility : 'hidden', + build_by_default : false, +) + +# The sources part is to ensure those generated headers used externally are +# indeed generated before being compiled with, as long as either one of below +# is included as a dependency: +# - idep_vulkan_lite_runtime_headers +# - idep_vulkan_lite_runtime +# - idep_vulkan_runtime_headers +# - idep_vulkan_runtime +idep_vulkan_lite_runtime_headers = declare_dependency( + sources : [ + vk_cmd_enqueue_entrypoints[0], + vk_cmd_queue[1], + vk_common_entrypoints[0], + vk_physical_device_features[1], + vk_physical_device_properties[1], + ], + include_directories : include_directories('.'), +) + +# This is likely a bug in the Meson VS backend, as MSVC with ninja works fine. +# See this discussion here: +# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10506 +if get_option('backend').startswith('vs') + idep_vulkan_lite_runtime = declare_dependency( + link_with : [libvulkan_lite_runtime, libvulkan_lite_instance], + dependencies : idep_vulkan_lite_runtime_headers + ) +else + idep_vulkan_lite_runtime = declare_dependency( + # Instruct users of this library to link with --whole-archive. Otherwise, + # our weak function overloads may not resolve properly. + link_whole : [libvulkan_lite_runtime, libvulkan_lite_instance], + dependencies : idep_vulkan_lite_runtime_headers + ) +endif + +vulkan_runtime_files = files( + 'vk_meta.c', + 'vk_meta_blit_resolve.c', + 'vk_meta_clear.c', + 'vk_meta_draw_rects.c', + 'vk_nir.c', + 'vk_nir_convert_ycbcr.c', + 'vk_pipeline.c', + 'vk_pipeline_cache.c', + 'vk_shader.c', + 'vk_shader_module.c', + 'vk_texcompress_etc2.c', +) + +vulkan_runtime_deps = [ + vulkan_lite_runtime_deps, + idep_nir, + idep_vtn, +] + +if prog_glslang.found() + vulkan_runtime_files += files('vk_texcompress_astc.c') + vulkan_runtime_files += custom_target( + 'astc_spv.h', + input : astc_decoder_glsl_file, + output : 'astc_spv.h', + command : [prog_glslang, '-V', '-S', 'comp', '-x', '-o', '@OUTPUT@', '@INPUT@'] + glslang_quiet, + ) +endif + +libvulkan_runtime = static_library( + 'vulkan_runtime', + [vulkan_runtime_files], + include_directories : [inc_include, inc_src], + dependencies : vulkan_runtime_deps, + c_args : c_msvc_compat_args, + gnu_symbol_visibility : 'hidden', + build_by_default : false, +) + +libvulkan_instance = static_library( + 'vulkan_instance', + ['vk_instance.c'], + include_directories : [inc_include, inc_src], + dependencies : vulkan_runtime_deps, + c_args : ['-DVK_LITE_RUNTIME_INSTANCE=0', c_msvc_compat_args], + gnu_symbol_visibility : 'hidden', + build_by_default : false, +) + +if get_option('backend').startswith('vs') + idep_vulkan_runtime_body = declare_dependency( + link_with : [libvulkan_lite_runtime, libvulkan_runtime, libvulkan_instance], + ) +else + idep_vulkan_runtime_body = declare_dependency( + link_whole : [libvulkan_lite_runtime, libvulkan_runtime, libvulkan_instance], + ) +endif + +idep_vulkan_runtime_headers = idep_vulkan_lite_runtime_headers + +idep_vulkan_runtime = declare_dependency( + dependencies : [ + idep_vulkan_runtime_headers, + idep_vulkan_runtime_body, + ] +) diff --git a/src/vulkan/runtime/rmv/vk_rmv_common.c b/src/vulkan/runtime/rmv/vk_rmv_common.c new file mode 100644 index 00000000000..48873d463c3 --- /dev/null +++ b/src/vulkan/runtime/rmv/vk_rmv_common.c @@ -0,0 +1,144 @@ +/* + * Copyright © 2022 Friedrich Vock + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_rmv_common.h" +#include "vk_buffer.h" +#include "vk_device.h" + +void +vk_memory_trace_init(struct vk_device *device, const struct vk_rmv_device_info *device_info) +{ + device->memory_trace_data.device_info = *device_info; + device->memory_trace_data.is_enabled = true; + util_dynarray_init(&device->memory_trace_data.tokens, NULL); + simple_mtx_init(&device->memory_trace_data.token_mtx, mtx_plain); + + device->memory_trace_data.next_resource_id = 1; + device->memory_trace_data.handle_table = _mesa_hash_table_u64_create(NULL); +} + +void +vk_memory_trace_finish(struct vk_device *device) +{ + if (!device->memory_trace_data.is_enabled) + return; + util_dynarray_foreach (&device->memory_trace_data.tokens, struct vk_rmv_token, token) { + switch (token->type) { + case VK_RMV_TOKEN_TYPE_RESOURCE_CREATE: { + struct vk_rmv_resource_create_token *create_token = &token->data.resource_create; + if (create_token->type == VK_RMV_RESOURCE_TYPE_DESCRIPTOR_POOL) { + free(create_token->descriptor_pool.pool_sizes); + } + break; + } + case VK_RMV_TOKEN_TYPE_USERDATA: + free(token->data.userdata.name); + break; + default: + break; + } + } + util_dynarray_fini(&device->memory_trace_data.tokens); + if (_mesa_hash_table_num_entries(device->memory_trace_data.handle_table->table)) + fprintf(stderr, + "mesa: Unfreed resources detected at device destroy, there may be memory leaks!\n"); + _mesa_hash_table_u64_destroy(device->memory_trace_data.handle_table); + device->memory_trace_data.is_enabled = false; +} + +void +vk_rmv_emit_token(struct vk_memory_trace_data *data, enum vk_rmv_token_type type, void *token_data) +{ + struct vk_rmv_token token; + token.type = type; + token.timestamp = (uint64_t)os_time_get_nano(); + memcpy(&token.data, token_data, vk_rmv_token_size_from_type(type)); + util_dynarray_append(&data->tokens, struct vk_rmv_token, token); +} + +uint32_t +vk_rmv_get_resource_id_locked(struct vk_device *device, uint64_t handle) +{ + void *entry = _mesa_hash_table_u64_search(device->memory_trace_data.handle_table, handle); + if (!entry) { + uint32_t id = device->memory_trace_data.next_resource_id++; + _mesa_hash_table_u64_insert(device->memory_trace_data.handle_table, handle, + (void *)(uintptr_t)id); + return id; + } + return (uint32_t)(uintptr_t)entry; +} + +void +vk_rmv_destroy_resource_id_locked(struct vk_device *device, uint64_t handle) +{ + _mesa_hash_table_u64_remove(device->memory_trace_data.handle_table, handle); +} + +void +vk_rmv_log_buffer_create(struct vk_device *device, bool is_internal, VkBuffer _buffer) +{ + if (!device->memory_trace_data.is_enabled) + return; + + VK_FROM_HANDLE(vk_buffer, buffer, _buffer); + simple_mtx_lock(&device->memory_trace_data.token_mtx); + struct vk_rmv_resource_create_token token = {0}; + token.is_driver_internal = is_internal; + token.resource_id = vk_rmv_get_resource_id_locked(device, (uint64_t)_buffer); + token.type = VK_RMV_RESOURCE_TYPE_BUFFER; + token.buffer.create_flags = buffer->create_flags; + token.buffer.size = buffer->size; + token.buffer.usage_flags = buffer->usage; + + vk_rmv_emit_token(&device->memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); + simple_mtx_unlock(&device->memory_trace_data.token_mtx); +} + +void +vk_rmv_log_cpu_map(struct vk_device *device, uint64_t va, bool is_unmap) +{ + if (!device->memory_trace_data.is_enabled) + return; + + struct vk_rmv_cpu_map_token map_token; + map_token.address = va; + map_token.unmapped = is_unmap; + + simple_mtx_lock(&device->memory_trace_data.token_mtx); + vk_rmv_emit_token(&device->memory_trace_data, VK_RMV_TOKEN_TYPE_CPU_MAP, &map_token); + simple_mtx_unlock(&device->memory_trace_data.token_mtx); +} + +void +vk_rmv_log_misc_token(struct vk_device *device, enum vk_rmv_misc_event_type type) +{ + if (!device->memory_trace_data.is_enabled) + return; + + simple_mtx_lock(&device->memory_trace_data.token_mtx); + struct vk_rmv_misc_token token; + token.type = type; + vk_rmv_emit_token(&device->memory_trace_data, VK_RMV_TOKEN_TYPE_MISC, &token); + simple_mtx_unlock(&device->memory_trace_data.token_mtx); +} diff --git a/src/vulkan/runtime/rmv/vk_rmv_common.h b/src/vulkan/runtime/rmv/vk_rmv_common.h new file mode 100644 index 00000000000..d4f0fb62f54 --- /dev/null +++ b/src/vulkan/runtime/rmv/vk_rmv_common.h @@ -0,0 +1,148 @@ +/* + * Copyright © 2022 Friedrich Vock + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_RMV_COMMON_H +#define VK_RMV_COMMON_H + +#include <stdbool.h> +#include "util/hash_table.h" +#include "util/simple_mtx.h" +#include "util/u_debug.h" +#include "util/u_dynarray.h" +#include <vulkan/vulkan_core.h> +#include "vk_rmv_tokens.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_memory_trace_data; + +/* + * The different memory domains RMV supports. + */ +enum vk_rmv_memory_location { + /* DEVICE_LOCAL | HOST_VISIBLE */ + VK_RMV_MEMORY_LOCATION_DEVICE, + /* DEVICE_LOCAL */ + VK_RMV_MEMORY_LOCATION_DEVICE_INVISIBLE, + /* HOST_VISIBLE | HOST_COHERENT */ + VK_RMV_MEMORY_LOCATION_HOST, + + /* add above here */ + VK_RMV_MEMORY_LOCATION_COUNT +}; + +/* + * Information about a memory domain. + */ +struct vk_rmv_memory_info { + uint64_t size; + uint64_t physical_base_address; +}; + +enum vk_rmv_memory_type { + VK_RMV_MEMORY_TYPE_UNKNOWN, + VK_RMV_MEMORY_TYPE_DDR2, + VK_RMV_MEMORY_TYPE_DDR3, + VK_RMV_MEMORY_TYPE_DDR4, + VK_RMV_MEMORY_TYPE_GDDR5, + VK_RMV_MEMORY_TYPE_GDDR6, + VK_RMV_MEMORY_TYPE_HBM, + VK_RMV_MEMORY_TYPE_HBM2, + VK_RMV_MEMORY_TYPE_HBM3, + VK_RMV_MEMORY_TYPE_LPDDR4, + VK_RMV_MEMORY_TYPE_LPDDR5, + VK_RMV_MEMORY_TYPE_DDR5 +}; + +/* + * Device information for RMV traces. + */ +struct vk_rmv_device_info { + struct vk_rmv_memory_info memory_infos[VK_RMV_MEMORY_LOCATION_COUNT]; + + /* The memory type of dedicated VRAM. */ + enum vk_rmv_memory_type vram_type; + + char device_name[128]; + + uint32_t pcie_family_id; + uint32_t pcie_revision_id; + uint32_t pcie_device_id; + /* The minimum shader clock, in MHz. */ + uint32_t minimum_shader_clock; + /* The maximum shader clock, in MHz. */ + uint32_t maximum_shader_clock; + uint32_t vram_operations_per_clock; + uint32_t vram_bus_width; + /* The VRAM bandwidth, in GB/s (1 GB/s = 1000 MB/s). */ + uint32_t vram_bandwidth; + /* The minimum memory clock, in MHz. */ + uint32_t minimum_memory_clock; + /* The maximum memory clock, in MHz. */ + uint32_t maximum_memory_clock; +}; + +struct vk_device; + +struct vk_memory_trace_data { + struct util_dynarray tokens; + simple_mtx_t token_mtx; + + bool is_enabled; + + struct vk_rmv_device_info device_info; + + struct hash_table_u64 *handle_table; + uint32_t next_resource_id; +}; + +struct vk_device; + +void vk_memory_trace_init(struct vk_device *device, const struct vk_rmv_device_info *device_info); + +void vk_memory_trace_finish(struct vk_device *device); + +int vk_dump_rmv_capture(struct vk_memory_trace_data *data); + +void vk_rmv_emit_token(struct vk_memory_trace_data *data, enum vk_rmv_token_type type, + void *token_data); +void vk_rmv_log_buffer_create(struct vk_device *device, bool is_internal, VkBuffer _buffer); +void vk_rmv_log_cpu_map(struct vk_device *device, uint64_t va, bool is_unmap); +void vk_rmv_log_misc_token(struct vk_device *device, enum vk_rmv_misc_event_type type); + +/* Retrieves the unique resource id for the resource specified by handle. + * Allocates a new id if none exists already. + * The memory trace mutex should be locked when entering this function. */ +uint32_t vk_rmv_get_resource_id_locked(struct vk_device *device, uint64_t handle); +/* Destroys a resource id. If the same handle is allocated again, a new resource + * id is given to it. + * The memory trace mutex should be locked when entering this function. */ +void vk_rmv_destroy_resource_id_locked(struct vk_device *device, uint64_t handle); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/vulkan/runtime/rmv/vk_rmv_exporter.c b/src/vulkan/runtime/rmv/vk_rmv_exporter.c new file mode 100644 index 00000000000..ebf13941011 --- /dev/null +++ b/src/vulkan/runtime/rmv/vk_rmv_exporter.c @@ -0,0 +1,1727 @@ +/* + * Copyright © 2022 Friedrich Vock + * + * Exporter based on Radeon Memory Visualizer code which is + * + * Copyright (c) 2017-2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_rmv_common.h" +#include "vk_rmv_tokens.h" + +#include "util/format/u_format.h" +#include "util/u_process.h" +#include "vk_format.h" + +static int +vk_rmv_token_compare(const void *first, const void *second) +{ + const struct vk_rmv_token *first_token = (struct vk_rmv_token *)first; + const struct vk_rmv_token *second_token = (struct vk_rmv_token *)second; + if (first_token->timestamp < second_token->timestamp) + return -1; + else if (first_token->timestamp > second_token->timestamp) + return 1; + return 0; +} + +enum rmt_format { + RMT_FORMAT_UNDEFINED, + RMT_FORMAT_R1_UNORM, + RMT_FORMAT_R1_USCALED, + RMT_FORMAT_R4G4_UNORM, + RMT_FORMAT_R4G4_USCALED, + RMT_FORMAT_L4A4_UNORM, + RMT_FORMAT_R4G4B4A4_UNORM, + RMT_FORMAT_R4G4B4A4_USCALED, + RMT_FORMAT_R5G6B5_UNORM, + RMT_FORMAT_R5G6B5_USCALED, + RMT_FORMAT_R5G5B5A1_UNORM, + RMT_FORMAT_R5G5B5A1_USCALED, + RMT_FORMAT_R1G5B5A5_UNORM, + RMT_FORMAT_R1G5B5A5_USCALED, + RMT_FORMAT_R8_XNORM, + RMT_FORMAT_R8_SNORM, + RMT_FORMAT_R8_USCALED, + RMT_FORMAT_R8_SSCALED, + RMT_FORMAT_R8_UINT, + RMT_FORMAT_R8_SINT, + RMT_FORMAT_R8_SRGB, + RMT_FORMAT_A8_UNORM, + RMT_FORMAT_L8_UNORM, + RMT_FORMAT_P8_UINT, + RMT_FORMAT_R8G8_UNORM, + RMT_FORMAT_R8G8_SNORM, + RMT_FORMAT_R8G8_USCALED, + RMT_FORMAT_R8G8_SSCALED, + RMT_FORMAT_R8G8_UINT, + RMT_FORMAT_R8G8_SINT, + RMT_FORMAT_R8G8_SRGB, + RMT_FORMAT_L8A8_UNORM, + RMT_FORMAT_R8G8B8A8_UNORM, + RMT_FORMAT_R8G8B8A8_SNORM, + RMT_FORMAT_R8G8B8A8_USCALED, + RMT_FORMAT_R8G8B8A8_SSCALED, + RMT_FORMAT_R8G8B8A8_UINT, + RMT_FORMAT_R8G8B8A8_SINT, + RMT_FORMAT_R8G8B8A8_SRGB, + RMT_FORMAT_U8V8_SNORM_L8W8_UNORM, + RMT_FORMAT_R10G11B11_FLOAT, + RMT_FORMAT_R11G11B10_FLOAT, + RMT_FORMAT_R10G10B10A2_UNORM, + RMT_FORMAT_R10G10B10A2_SNORM, + RMT_FORMAT_R10G10B10A2_USCALED, + RMT_FORMAT_R10G10B10A2_SSCALED, + RMT_FORMAT_R10G10B10A2_UINT, + RMT_FORMAT_R10G10B10A2_SINT, + RMT_FORMAT_R10G10B10A2_BIAS_UNORM, + RMT_FORMAT_U10V10W10_SNORMA2_UNORM, + RMT_FORMAT_R16_UNORM, + RMT_FORMAT_R16_SNORM, + RMT_FORMAT_R16_USCALED, + RMT_FORMAT_R16_SSCALED, + RMT_FORMAT_R16_UINT, + RMT_FORMAT_R16_SINT, + RMT_FORMAT_R16_FLOAT, + RMT_FORMAT_L16_UNORM, + RMT_FORMAT_R16G16_UNORM, + RMT_FORMAT_R16G16_SNORM, + RMT_FORMAT_R16G16_USCALED, + RMT_FORMAT_R16G16_SSCALED, + RMT_FORMAT_R16G16_UINT, + RMT_FORMAT_R16G16_SINT, + RMT_FORMAT_R16G16_FLOAT, + RMT_FORMAT_R16G16B16A16_UNORM, + RMT_FORMAT_R16G16B16A16_SNORM, + RMT_FORMAT_R16G16B16A16_USCALED, + RMT_FORMAT_R16G16B16A16_SSCALED, + RMT_FORMAT_R16G16B16A16_UINT, + RMT_FORMAT_R16G16B16A16_SINT, + RMT_FORMAT_R16G16B16A16_FLOAT, + RMT_FORMAT_R32_UINT, + RMT_FORMAT_R32_SINT, + RMT_FORMAT_R32_FLOAT, + RMT_FORMAT_R32G32_UINT, + RMT_FORMAT_R32G32_SINT, + RMT_FORMAT_R32G32_FLOAT, + RMT_FORMAT_R32G32B32_UINT, + RMT_FORMAT_R32G32B32_SINT, + RMT_FORMAT_R32G32B32_FLOAT, + RMT_FORMAT_R32G32B32A32_UINT, + RMT_FORMAT_R32G32B32A32_SINT, + RMT_FORMAT_R32G32B32A32_FLOAT, + RMT_FORMAT_D16_UNORM_S8_UINT, + RMT_FORMAT_D32_UNORM_S8_UINT, + RMT_FORMAT_R9G9B9E5_FLOAT, + RMT_FORMAT_BC1_UNORM, + RMT_FORMAT_BC1_SRGB, + RMT_FORMAT_BC2_UNORM, + RMT_FORMAT_BC2_SRGB, + RMT_FORMAT_BC3_UNORM, + RMT_FORMAT_BC3_SRGB, + RMT_FORMAT_BC4_UNORM, + RMT_FORMAT_BC4_SRGB, + RMT_FORMAT_BC5_UNORM, + RMT_FORMAT_BC5_SRGB, + RMT_FORMAT_BC6_UNORM, + RMT_FORMAT_BC6_SRGB, + RMT_FORMAT_BC7_UNORM, + RMT_FORMAT_BC7_SRGB, + RMT_FORMAT_ETC2_R8G8B8_UNORM, + RMT_FORMAT_ETC2_R8G8B8_SRGB, + RMT_FORMAT_ETC2_R8G8B8A1_UNORM, + RMT_FORMAT_ETC2_R8G8B8A1_SRGB, + RMT_FORMAT_ETC2_R8G8B8A8_UNORM, + RMT_FORMAT_ETC2_R8G8B8A8_SRGB, + RMT_FORMAT_ETC2_R11_UNORM, + RMT_FORMAT_ETC2_R11_SNORM, + RMT_FORMAT_ETC2_R11G11_UNORM, + RMT_FORMAT_ETC2_R11G11_SNORM, + RMT_FORMAT_ASTCLD_R4X4_UNORM, + RMT_FORMAT_ASTCLD_R4X4_SRGB, + RMT_FORMAT_ASTCLD_R5X4_UNORM, + RMT_FORMAT_ASTCLD_R5X4_SRGB, + RMT_FORMAT_ASTCLD_R5X5_UNORM, + RMT_FORMAT_ASTCLD_R5X5_SRGB, + RMT_FORMAT_ASTCLD_R6X5_UNORM, + RMT_FORMAT_ASTCLD_R6X5_SRGB, + RMT_FORMAT_ASTCLD_R6X6_UNORM, + RMT_FORMAT_ASTCLD_R6X6_SRGB, + RMT_FORMAT_ASTCLD_R8X5_UNORM, + RMT_FORMAT_ASTCLD_R8X5_SRGB, + RMT_FORMAT_ASTCLD_R8X6_UNORM, + RMT_FORMAT_ASTCLD_R8X6_SRGB, + RMT_FORMAT_ASTCLD_R8X8_UNORM, + RMT_FORMAT_ASTCLD_R8X8_SRGB, + RMT_FORMAT_ASTCLD_R10X5_UNORM, + RMT_FORMAT_ASTCLD_R10X5_SRGB, + RMT_FORMAT_ASTCLD_R10X6_UNORM, + RMT_FORMAT_ASTCLD_R10X6_SRGB, + RMT_FORMAT_ASTCLD_R10X8_UNORM, + RMT_FORMAT_ASTCLD_R10X10_UNORM, + RMT_FORMAT_ASTCLD_R12X10_UNORM, + RMT_FORMAT_ASTCLD_R12X10_SRGB, + RMT_FORMAT_ASTCLD_R12X12_UNORM, + RMT_FORMAT_ASTCLD_R12X12_SRGB, + RMT_FORMAT_ASTCHD_R4x4_FLOAT, + RMT_FORMAT_ASTCHD_R5x4_FLOAT, + RMT_FORMAT_ASTCHD_R5x5_FLOAT, + RMT_FORMAT_ASTCHD_R6x5_FLOAT, + RMT_FORMAT_ASTCHD_R6x6_FLOAT, + RMT_FORMAT_ASTCHD_R8x5_FLOAT, + RMT_FORMAT_ASTCHD_R8x6_FLOAT, + RMT_FORMAT_ASTCHD_R8x8_FLOAT, + RMT_FORMAT_ASTCHD_R10x5_FLOAT, + RMT_FORMAT_ASTCHD_R10x6_FLOAT, + RMT_FORMAT_ASTCHD_R10x8_FLOAT, + RMT_FORMAT_ASTCHD_R10x10_FLOAT, + RMT_FORMAT_ASTCHD_R12x10_FLOAT, + RMT_FORMAT_ASTCHD_R12x12_FLOAT, + RMT_FORMAT_R8G8B8G8_UNORM, + RMT_FORMAT_R8G8B8G8_USCALED, + RMT_FORMAT_G8R8G8B8_UNORM, + RMT_FORMAT_G8R8G8B8_USCALED, + RMT_FORMAT_AYUV, + RMT_FORMAT_UYVY, + RMT_FORMAT_VYUY, + RMT_FORMAT_YUY2, + RMT_FORMAT_YVY2, + RMT_FORMAT_YV12, + RMT_FORMAT_NV11, + RMT_FORMAT_NV12, + RMT_FORMAT_NV21, + RMT_FORMAT_P016, + RMT_FORMAT_P010, +}; + +enum rmt_swizzle { + RMT_SWIZZLE_ZERO, + RMT_SWIZZLE_ONE, + RMT_SWIZZLE_R, + RMT_SWIZZLE_G, + RMT_SWIZZLE_B, + RMT_SWIZZLE_A, +}; + +static inline enum rmt_format +vk_to_rmt_format(VkFormat format) +{ + switch (format) { + case VK_FORMAT_R8_UNORM: + return RMT_FORMAT_A8_UNORM; + case VK_FORMAT_R8_SNORM: + return RMT_FORMAT_R8_SNORM; + case VK_FORMAT_R8_USCALED: + return RMT_FORMAT_R8_USCALED; + case VK_FORMAT_R8_SSCALED: + return RMT_FORMAT_R8_SSCALED; + case VK_FORMAT_R8_UINT: + return RMT_FORMAT_R8_UINT; + case VK_FORMAT_R8_SINT: + return RMT_FORMAT_R8_SINT; + case VK_FORMAT_R8_SRGB: + return RMT_FORMAT_R8_SRGB; + case VK_FORMAT_R8G8_UNORM: + return RMT_FORMAT_R8G8_UNORM; + case VK_FORMAT_R8G8_SNORM: + return RMT_FORMAT_R8G8_SNORM; + case VK_FORMAT_R8G8_USCALED: + return RMT_FORMAT_R8G8_USCALED; + case VK_FORMAT_R8G8_SSCALED: + return RMT_FORMAT_R8G8_SSCALED; + case VK_FORMAT_R8G8_UINT: + return RMT_FORMAT_R8G8_UINT; + case VK_FORMAT_R8G8_SINT: + return RMT_FORMAT_R8G8_SINT; + case VK_FORMAT_R8G8_SRGB: + return RMT_FORMAT_R8G8_SRGB; + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + return RMT_FORMAT_R8G8B8A8_UNORM; + case VK_FORMAT_R8G8B8A8_SNORM: + case VK_FORMAT_B8G8R8A8_SNORM: + case VK_FORMAT_A8B8G8R8_SNORM_PACK32: + return RMT_FORMAT_R8G8B8A8_SNORM; + case VK_FORMAT_R8G8B8A8_USCALED: + case VK_FORMAT_B8G8R8A8_USCALED: + case VK_FORMAT_A8B8G8R8_USCALED_PACK32: + return RMT_FORMAT_R8G8B8A8_USCALED; + case VK_FORMAT_R8G8B8A8_SSCALED: + case VK_FORMAT_B8G8R8A8_SSCALED: + case VK_FORMAT_A8B8G8R8_SSCALED_PACK32: + return RMT_FORMAT_R8G8B8A8_SSCALED; + case VK_FORMAT_R8G8B8A8_UINT: + case VK_FORMAT_B8G8R8A8_UINT: + case VK_FORMAT_A8B8G8R8_UINT_PACK32: + return RMT_FORMAT_R8G8B8A8_UINT; + case VK_FORMAT_R8G8B8A8_SINT: + case VK_FORMAT_B8G8R8A8_SINT: + case VK_FORMAT_A8B8G8R8_SINT_PACK32: + return RMT_FORMAT_R8G8B8A8_SINT; + case VK_FORMAT_R8G8B8A8_SRGB: + case VK_FORMAT_B8G8R8A8_SRGB: + case VK_FORMAT_A8B8G8R8_SRGB_PACK32: + return RMT_FORMAT_R8G8B8A8_SRGB; + case VK_FORMAT_R16_UNORM: + return RMT_FORMAT_R16_UNORM; + case VK_FORMAT_R16_SNORM: + return RMT_FORMAT_R16_SNORM; + case VK_FORMAT_R16_USCALED: + return RMT_FORMAT_R16_USCALED; + case VK_FORMAT_R16_SSCALED: + return RMT_FORMAT_R16_SSCALED; + case VK_FORMAT_R16_UINT: + return RMT_FORMAT_R16_UINT; + case VK_FORMAT_R16_SINT: + return RMT_FORMAT_R16_SINT; + case VK_FORMAT_R16G16_UNORM: + return RMT_FORMAT_R16G16_UNORM; + case VK_FORMAT_R16G16_SNORM: + return RMT_FORMAT_R16G16_SNORM; + case VK_FORMAT_R16G16_USCALED: + return RMT_FORMAT_R16G16_USCALED; + case VK_FORMAT_R16G16_SSCALED: + return RMT_FORMAT_R16G16_SSCALED; + case VK_FORMAT_R16G16_UINT: + return RMT_FORMAT_R16G16_UINT; + case VK_FORMAT_R16G16_SINT: + return RMT_FORMAT_R16G16_SINT; + case VK_FORMAT_R16G16_SFLOAT: + return RMT_FORMAT_R16G16_FLOAT; + case VK_FORMAT_R16G16B16A16_UNORM: + return RMT_FORMAT_R16G16B16A16_UNORM; + case VK_FORMAT_R16G16B16A16_SNORM: + return RMT_FORMAT_R16G16B16A16_SNORM; + case VK_FORMAT_R16G16B16A16_USCALED: + return RMT_FORMAT_R16G16B16A16_USCALED; + case VK_FORMAT_R16G16B16A16_SSCALED: + return RMT_FORMAT_R16G16B16A16_SSCALED; + case VK_FORMAT_R16G16B16A16_UINT: + return RMT_FORMAT_R16G16B16A16_UINT; + case VK_FORMAT_R16G16B16A16_SINT: + return RMT_FORMAT_R16G16B16A16_SINT; + case VK_FORMAT_R16G16B16A16_SFLOAT: + return RMT_FORMAT_R16G16B16A16_FLOAT; + case VK_FORMAT_R32_UINT: + return RMT_FORMAT_R32_UINT; + case VK_FORMAT_R32_SINT: + return RMT_FORMAT_R32_SINT; + case VK_FORMAT_R32_SFLOAT: + return RMT_FORMAT_R32_FLOAT; + case VK_FORMAT_R32G32_UINT: + return RMT_FORMAT_R32G32_UINT; + case VK_FORMAT_R32G32_SINT: + return RMT_FORMAT_R32G32_SINT; + case VK_FORMAT_R32G32_SFLOAT: + return RMT_FORMAT_R32G32_FLOAT; + case VK_FORMAT_R32G32B32_UINT: + return RMT_FORMAT_R32G32B32_UINT; + case VK_FORMAT_R32G32B32_SINT: + return RMT_FORMAT_R32G32B32_SINT; + case VK_FORMAT_R32G32B32_SFLOAT: + return RMT_FORMAT_R32G32B32_FLOAT; + case VK_FORMAT_R32G32B32A32_UINT: + return RMT_FORMAT_R32G32B32A32_UINT; + case VK_FORMAT_R32G32B32A32_SINT: + return RMT_FORMAT_R32G32B32A32_SINT; + case VK_FORMAT_R32G32B32A32_SFLOAT: + return RMT_FORMAT_R32G32B32A32_FLOAT; + case VK_FORMAT_D16_UNORM_S8_UINT: + return RMT_FORMAT_D16_UNORM_S8_UINT; + case VK_FORMAT_D32_SFLOAT_S8_UINT: + return RMT_FORMAT_D32_UNORM_S8_UINT; + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + return RMT_FORMAT_BC1_UNORM; + case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: + return RMT_FORMAT_BC1_SRGB; + case VK_FORMAT_BC2_UNORM_BLOCK: + return RMT_FORMAT_BC2_UNORM; + case VK_FORMAT_BC2_SRGB_BLOCK: + return RMT_FORMAT_BC2_SRGB; + case VK_FORMAT_BC3_UNORM_BLOCK: + return RMT_FORMAT_BC3_UNORM; + case VK_FORMAT_BC3_SRGB_BLOCK: + return RMT_FORMAT_BC3_SRGB; + case VK_FORMAT_BC4_UNORM_BLOCK: + return RMT_FORMAT_BC4_UNORM; + case VK_FORMAT_BC5_UNORM_BLOCK: + return RMT_FORMAT_BC5_UNORM; + case VK_FORMAT_BC7_UNORM_BLOCK: + return RMT_FORMAT_BC7_UNORM; + case VK_FORMAT_BC7_SRGB_BLOCK: + return RMT_FORMAT_BC7_SRGB; + case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: + return RMT_FORMAT_ETC2_R8G8B8_UNORM; + case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: + return RMT_FORMAT_ETC2_R8G8B8_SRGB; + case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: + return RMT_FORMAT_ETC2_R8G8B8A1_UNORM; + case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: + return RMT_FORMAT_ETC2_R8G8B8A1_SRGB; + case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: + return RMT_FORMAT_ETC2_R8G8B8A8_UNORM; + case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: + return RMT_FORMAT_ETC2_R8G8B8A8_SRGB; + case VK_FORMAT_ASTC_4x4_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R4X4_UNORM; + case VK_FORMAT_ASTC_4x4_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R4X4_SRGB; + case VK_FORMAT_ASTC_5x4_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R5X4_UNORM; + case VK_FORMAT_ASTC_5x4_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R5X4_SRGB; + case VK_FORMAT_ASTC_5x5_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R5X5_UNORM; + case VK_FORMAT_ASTC_5x5_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R5X5_SRGB; + case VK_FORMAT_ASTC_6x5_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R6X5_UNORM; + case VK_FORMAT_ASTC_6x5_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R6X5_SRGB; + case VK_FORMAT_ASTC_6x6_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R6X6_UNORM; + case VK_FORMAT_ASTC_6x6_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R6X6_SRGB; + case VK_FORMAT_ASTC_8x5_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R8X5_UNORM; + case VK_FORMAT_ASTC_8x5_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R8X5_SRGB; + case VK_FORMAT_ASTC_8x6_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R8X6_UNORM; + case VK_FORMAT_ASTC_8x6_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R8X6_SRGB; + case VK_FORMAT_ASTC_8x8_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R8X8_UNORM; + case VK_FORMAT_ASTC_8x8_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R8X8_SRGB; + case VK_FORMAT_ASTC_10x5_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R10X5_UNORM; + case VK_FORMAT_ASTC_10x5_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R10X5_SRGB; + case VK_FORMAT_ASTC_10x6_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R10X6_UNORM; + case VK_FORMAT_ASTC_10x6_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R10X6_SRGB; + case VK_FORMAT_ASTC_10x8_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R10X8_UNORM; + case VK_FORMAT_ASTC_10x10_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R10X10_UNORM; + case VK_FORMAT_ASTC_12x10_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R12X10_UNORM; + case VK_FORMAT_ASTC_12x10_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R12X10_SRGB; + case VK_FORMAT_ASTC_12x12_UNORM_BLOCK: + return RMT_FORMAT_ASTCLD_R12X12_UNORM; + case VK_FORMAT_ASTC_12x12_SRGB_BLOCK: + return RMT_FORMAT_ASTCLD_R12X12_SRGB; + case VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R4x4_FLOAT; + case VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R5x4_FLOAT; + case VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R5x5_FLOAT; + case VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R6x5_FLOAT; + case VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R6x6_FLOAT; + case VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R8x5_FLOAT; + case VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R8x6_FLOAT; + case VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R8x8_FLOAT; + case VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R10x5_FLOAT; + case VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R10x6_FLOAT; + case VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R10x8_FLOAT; + case VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R10x10_FLOAT; + case VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R12x10_FLOAT; + case VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK: + return RMT_FORMAT_ASTCHD_R12x12_FLOAT; + default: + return RMT_FORMAT_UNDEFINED; + } +} + +static void +rmt_format_to_swizzle(VkFormat format, enum rmt_swizzle *swizzles) +{ + const struct util_format_description *description = + util_format_description(vk_format_to_pipe_format(format)); + for (unsigned i = 0; i < 4; ++i) { + switch (description->swizzle[i]) { + case PIPE_SWIZZLE_X: + swizzles[i] = RMT_SWIZZLE_R; + break; + case PIPE_SWIZZLE_Y: + swizzles[i] = RMT_SWIZZLE_G; + break; + case PIPE_SWIZZLE_Z: + swizzles[i] = RMT_SWIZZLE_B; + break; + case PIPE_SWIZZLE_W: + swizzles[i] = RMT_SWIZZLE_A; + break; + case PIPE_SWIZZLE_0: + case PIPE_SWIZZLE_NONE: + swizzles[i] = RMT_SWIZZLE_ZERO; + break; + case PIPE_SWIZZLE_1: + swizzles[i] = RMT_SWIZZLE_ONE; + break; + } + } +} + +#define RMT_FILE_MAGIC_NUMBER 0x494e494d +#define RMT_FILE_VERSION_MAJOR 1 +#define RMT_FILE_VERSION_MINOR 0 +#define RMT_FILE_ADAPTER_NAME_MAX_SIZE 128 + +enum rmt_heap_type { + RMT_HEAP_TYPE_LOCAL, /* DEVICE_LOCAL | HOST_VISIBLE */ + RMT_HEAP_TYPE_INVISIBLE, /* DEVICE_LOCAL */ + RMT_HEAP_TYPE_SYSTEM, /* HOST_VISIBLE | HOST_COHERENT */ + RMT_HEAP_TYPE_NONE, + RMT_HEAP_TYPE_UNKNOWN = -1, +}; + +enum rmt_file_chunk_type { + RMT_FILE_CHUNK_TYPE_ASIC_INFO, /* Seems to be unused in RMV */ + RMT_FILE_CHUNK_TYPE_API_INFO, + RMT_FILE_CHUNK_TYPE_SYSTEM_INFO, + RMT_FILE_CHUNK_TYPE_RMT_DATA, + RMT_FILE_CHUNK_TYPE_SEGMENT_INFO, + RMT_FILE_CHUNK_TYPE_PROCESS_START, + RMT_FILE_CHUNK_TYPE_SNAPSHOT_INFO, + RMT_FILE_CHUNK_TYPE_ADAPTER_INFO, +}; + +/** + * RMT API info. + */ +enum rmt_api_type { + RMT_API_TYPE_DIRECTX_12, + RMT_API_TYPE_VULKAN, + RMT_API_TYPE_GENERIC, + RMT_API_TYPE_OPENCL, +}; + +struct rmt_file_chunk_id { + enum rmt_file_chunk_type type : 8; + int32_t index : 8; + int32_t reserved : 16; +}; + +struct rmt_file_chunk_header { + struct rmt_file_chunk_id chunk_id; + uint16_t minor_version; + uint16_t major_version; + int32_t size_in_bytes; + int32_t padding; +}; + +struct rmt_file_header_flags { + union { + struct { + int32_t reserved : 32; + }; + + uint32_t value; + }; +}; + +struct rmt_file_header { + uint32_t magic_number; + uint32_t version_major; + uint32_t version_minor; + struct rmt_file_header_flags flags; + int32_t chunk_offset; + int32_t second; + int32_t minute; + int32_t hour; + int32_t day_in_month; + int32_t month; + int32_t year; + int32_t day_in_week; + int32_t day_in_year; + int32_t is_daylight_savings; +}; + +static_assert(sizeof(struct rmt_file_header) == 56, "rmt_file_header doesn't match RMV spec"); + +static void +rmt_fill_header(struct rmt_file_header *header) +{ + struct tm *timep, result; + time_t raw_time; + + header->magic_number = RMT_FILE_MAGIC_NUMBER; + header->version_major = RMT_FILE_VERSION_MAJOR; + header->version_minor = RMT_FILE_VERSION_MINOR; + header->flags.value = 0; + header->chunk_offset = sizeof(*header); + + time(&raw_time); + timep = os_localtime(&raw_time, &result); + + header->second = timep->tm_sec; + header->minute = timep->tm_min; + header->hour = timep->tm_hour; + header->day_in_month = timep->tm_mday; + header->month = timep->tm_mon; + header->year = timep->tm_year; + header->day_in_week = timep->tm_wday; + header->day_in_year = timep->tm_yday; + header->is_daylight_savings = timep->tm_isdst; +} + +/* + * RMT data. + */ +struct rmt_file_chunk_rmt_data { + struct rmt_file_chunk_header header; + uint64_t process_id; + uint64_t thread_id; +}; + +static_assert(sizeof(struct rmt_file_chunk_rmt_data) == 32, + "rmt_file_chunk_rmt_data doesn't match RMV spec"); + +static void +rmt_fill_chunk_rmt_data(size_t token_stream_size, struct rmt_file_chunk_rmt_data *chunk) +{ + chunk->header.chunk_id.type = RMT_FILE_CHUNK_TYPE_RMT_DATA; + chunk->header.chunk_id.index = 0; + chunk->header.major_version = 1; + chunk->header.minor_version = 6; + chunk->header.size_in_bytes = sizeof(*chunk) + token_stream_size; + + chunk->process_id = (uint64_t)getpid(); +} + +/* + * RMT System info. Equivalent to SQTT CPU info. + */ +struct rmt_file_chunk_system_info { + struct rmt_file_chunk_header header; + uint32_t vendor_id[4]; + uint32_t processor_brand[12]; + uint32_t reserved[2]; + uint64_t cpu_timestamp_freq; + uint32_t clock_speed; + uint32_t num_logical_cores; + uint32_t num_physical_cores; + uint32_t system_ram_size; +}; + +static_assert(sizeof(struct rmt_file_chunk_system_info) == 112, + "rmt_file_chunk_system_info doesn't match RMV spec"); + +/* same as vk_sqtt_fill_cpu_info. TODO: Share with ac_rgp.c */ +static void +rmt_fill_chunk_system_info(struct rmt_file_chunk_system_info *chunk) +{ + uint32_t cpu_clock_speed_total = 0; + uint64_t system_ram_size = 0; + char line[1024]; + FILE *f; + + chunk->header.chunk_id.type = RMT_FILE_CHUNK_TYPE_SYSTEM_INFO; + chunk->header.chunk_id.index = 0; + chunk->header.major_version = 0; + chunk->header.minor_version = 0; + chunk->header.size_in_bytes = sizeof(*chunk); + + /* For some reason, RMV allocates scratch data based on the + * maximum timestamp in clock ticks. A tick of 1ns produces extremely + * large timestamps, which causes RMV to run out of memory. Therefore, + * all timestamps are translated as if the clock ran at 1 MHz. */ + chunk->cpu_timestamp_freq = 1 * 1000000; + + strncpy((char *)chunk->vendor_id, "Unknown", sizeof(chunk->vendor_id)); + strncpy((char *)chunk->processor_brand, "Unknown", sizeof(chunk->processor_brand)); + chunk->clock_speed = 0; + chunk->num_logical_cores = 0; + chunk->num_physical_cores = 0; + chunk->system_ram_size = 0; + if (os_get_total_physical_memory(&system_ram_size)) + chunk->system_ram_size = system_ram_size / (1024 * 1024); + + /* Parse cpuinfo to get more detailled information. */ + f = fopen("/proc/cpuinfo", "r"); + if (!f) + return; + + while (fgets(line, sizeof(line), f)) { + char *str; + + /* Parse vendor name. */ + str = strstr(line, "vendor_id"); + if (str) { + char *ptr = (char *)chunk->vendor_id; + char *v = strtok(str, ":"); + v = strtok(NULL, ":"); + strncpy(ptr, v + 1, sizeof(chunk->vendor_id) - 1); + ptr[sizeof(chunk->vendor_id) - 1] = '\0'; + } + + /* Parse processor name. */ + str = strstr(line, "model name"); + if (str) { + char *ptr = (char *)chunk->processor_brand; + char *v = strtok(str, ":"); + v = strtok(NULL, ":"); + strncpy(ptr, v + 1, sizeof(chunk->processor_brand) - 1); + ptr[sizeof(chunk->processor_brand) - 1] = '\0'; + } + + /* Parse the current CPU clock speed for each cores. */ + str = strstr(line, "cpu MHz"); + if (str) { + uint32_t v = 0; + if (sscanf(str, "cpu MHz : %d", &v) == 1) + cpu_clock_speed_total += v; + } + + /* Parse the number of logical cores. */ + str = strstr(line, "siblings"); + if (str) { + uint32_t v = 0; + if (sscanf(str, "siblings : %d", &v) == 1) + chunk->num_logical_cores = v; + } + + /* Parse the number of physical cores. */ + str = strstr(line, "cpu cores"); + if (str) { + uint32_t v = 0; + if (sscanf(str, "cpu cores : %d", &v) == 1) + chunk->num_physical_cores = v; + } + } + + if (chunk->num_logical_cores) + chunk->clock_speed = cpu_clock_speed_total / chunk->num_logical_cores; + + fclose(f); +} + +/* + * RMT Segment info. + */ +struct rmt_file_chunk_segment_info { + struct rmt_file_chunk_header header; + uint64_t base_address; + uint64_t size; + enum rmt_heap_type heap_type; + int32_t memory_index; +}; + +static_assert(sizeof(struct rmt_file_chunk_segment_info) == 40, + "rmt_file_chunk_segment_info doesn't match RMV spec"); + +static void +rmt_fill_chunk_segment_info(struct vk_memory_trace_data *data, struct vk_rmv_device_info *info, + struct rmt_file_chunk_segment_info *chunk, int32_t index) +{ + chunk->header.chunk_id.type = RMT_FILE_CHUNK_TYPE_SEGMENT_INFO; + chunk->header.chunk_id.index = index; + chunk->header.major_version = 0; + chunk->header.minor_version = 0; + chunk->header.size_in_bytes = sizeof(*chunk); + + chunk->memory_index = index; + chunk->heap_type = (enum rmt_heap_type)index; + chunk->base_address = info->memory_infos[index].physical_base_address; + chunk->size = info->memory_infos[index].size; +} + +/* + * RMT PCIe adapter info + */ +struct rmt_file_chunk_adapter_info { + struct rmt_file_chunk_header header; + char name[RMT_FILE_ADAPTER_NAME_MAX_SIZE]; + uint32_t pcie_family_id; + uint32_t pcie_revision_id; + uint32_t device_id; + uint32_t minimum_engine_clock; + uint32_t maximum_engine_clock; + uint32_t memory_type; + uint32_t memory_operations_per_clock; + uint32_t memory_bus_width; + uint32_t memory_bandwidth; + uint32_t minimum_memory_clock; + uint32_t maximum_memory_clock; +}; + +static_assert(sizeof(struct rmt_file_chunk_adapter_info) == 188, + "rmt_file_chunk_adapter_info doesn't match RMV spec"); + +static void +rmt_fill_chunk_adapter_info(struct vk_rmv_device_info *info, + struct rmt_file_chunk_adapter_info *chunk) +{ + chunk->header.chunk_id.type = RMT_FILE_CHUNK_TYPE_ADAPTER_INFO; + chunk->header.chunk_id.index = 0; + chunk->header.major_version = 0; + chunk->header.minor_version = 0; + chunk->header.size_in_bytes = sizeof(*chunk); + + memcpy(chunk->name, info->device_name, RMT_FILE_ADAPTER_NAME_MAX_SIZE); + chunk->pcie_family_id = info->pcie_family_id; + chunk->pcie_revision_id = info->pcie_revision_id; + chunk->device_id = info->pcie_device_id; + chunk->minimum_engine_clock = info->minimum_shader_clock; + chunk->maximum_engine_clock = info->maximum_shader_clock; + chunk->memory_type = info->vram_type; + chunk->memory_operations_per_clock = info->vram_operations_per_clock; + + chunk->memory_bus_width = info->vram_bus_width; + chunk->minimum_memory_clock = info->minimum_memory_clock; + chunk->maximum_memory_clock = info->maximum_memory_clock; + /* Convert bandwidth from GB/s to MiB/s */ + chunk->memory_bandwidth = + ((uint64_t)info->vram_bandwidth * 1000ULL * 1000ULL * 1000ULL) / (1024ULL * 1024ULL); +} + +/* + * RMT snapshot info + */ +struct rmt_file_chunk_snapshot_info { + struct rmt_file_chunk_header header; + uint64_t snapshot_time; + int32_t name_length; + int32_t padding; + /* The name follows after this struct */ + /* After the name, a stream of tokens is written. */ +}; + +static_assert(sizeof(struct rmt_file_chunk_snapshot_info) == 32, + "rmt_file_chunk_snapshot_info doesn't match RMV spec"); + +static void +rmt_fill_chunk_snapshot_info(uint64_t timestamp, int32_t name_length, + struct rmt_file_chunk_snapshot_info *chunk) +{ + chunk->header.chunk_id.type = RMT_FILE_CHUNK_TYPE_SNAPSHOT_INFO; + chunk->header.chunk_id.index = 0; + chunk->header.major_version = 1; + chunk->header.minor_version = 6; + chunk->header.size_in_bytes = sizeof(*chunk) + name_length; + + chunk->snapshot_time = timestamp; + chunk->name_length = name_length; +} + +/* + * RMT stream tokens + */ + +enum rmt_token_type { + RMT_TOKEN_TYPE_TIMESTAMP, + RMT_TOKEN_TYPE_RESERVED0, + RMT_TOKEN_TYPE_RESERVED1, + RMT_TOKEN_TYPE_PAGE_TABLE_UPDATE, + RMT_TOKEN_TYPE_USERDATA, + RMT_TOKEN_TYPE_MISC, + RMT_TOKEN_TYPE_RESOURCE_REFERENCE, + RMT_TOKEN_TYPE_RESOURCE_BIND, + RMT_TOKEN_TYPE_PROCESS_EVENT, + RMT_TOKEN_TYPE_PAGE_REFERENCE, + RMT_TOKEN_TYPE_CPU_MAP, + RMT_TOKEN_TYPE_VIRTUAL_FREE, + RMT_TOKEN_TYPE_VIRTUAL_ALLOCATE, + RMT_TOKEN_TYPE_RESOURCE_CREATE, + RMT_TOKEN_TYPE_TIME_DELTA, + RMT_TOKEN_TYPE_RESOURCE_DESTROY, +}; + +static enum rmt_token_type +token_type_to_rmt(enum vk_rmv_token_type type) +{ + switch (type) { + case VK_RMV_TOKEN_TYPE_PAGE_TABLE_UPDATE: + return RMT_TOKEN_TYPE_PAGE_TABLE_UPDATE; + case VK_RMV_TOKEN_TYPE_USERDATA: + return RMT_TOKEN_TYPE_USERDATA; + case VK_RMV_TOKEN_TYPE_MISC: + return RMT_TOKEN_TYPE_MISC; + case VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE: + return RMT_TOKEN_TYPE_RESOURCE_REFERENCE; + case VK_RMV_TOKEN_TYPE_RESOURCE_BIND: + return RMT_TOKEN_TYPE_RESOURCE_BIND; + case VK_RMV_TOKEN_TYPE_CPU_MAP: + return RMT_TOKEN_TYPE_CPU_MAP; + case VK_RMV_TOKEN_TYPE_VIRTUAL_FREE: + return RMT_TOKEN_TYPE_VIRTUAL_FREE; + case VK_RMV_TOKEN_TYPE_VIRTUAL_ALLOCATE: + return RMT_TOKEN_TYPE_VIRTUAL_ALLOCATE; + case VK_RMV_TOKEN_TYPE_RESOURCE_CREATE: + return RMT_TOKEN_TYPE_RESOURCE_CREATE; + case VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY: + return RMT_TOKEN_TYPE_RESOURCE_DESTROY; + default: + unreachable("invalid token type"); + } +} + +enum rmt_descriptor_type { + RMT_DESCRIPTOR_TYPE_CSV_SRV_UAV, + RMT_DESCRIPTOR_TYPE_SAMPLER, + RMT_DESCRIPTOR_TYPE_RTV, + RMT_DESCRIPTOR_TYPE_DSV, + RMT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + RMT_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + RMT_DESCRIPTOR_TYPE_STORAGE_IMAGE, + RMT_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + RMT_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + RMT_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + RMT_DESCRIPTOR_TYPE_STORAGE_BUFFER, + RMT_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + RMT_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, + RMT_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, + RMT_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK, + RMT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE, + RMT_DESCRIPTOR_TYPE_INVALID = 0x7FFF, +}; + +static enum rmt_descriptor_type +vk_to_rmt_descriptor_type(VkDescriptorType type) +{ + switch (type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return RMT_DESCRIPTOR_TYPE_SAMPLER; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + return RMT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + return RMT_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + return RMT_DESCRIPTOR_TYPE_STORAGE_IMAGE; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + return RMT_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + return RMT_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + return RMT_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + return RMT_DESCRIPTOR_TYPE_STORAGE_BUFFER; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + return RMT_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + return RMT_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + return RMT_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: + return RMT_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK; + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: + return RMT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE; + default: + /* This is reachable, error should be handled by caller */ + return RMT_DESCRIPTOR_TYPE_INVALID; + } +}; + +static uint32_t +rmt_valid_pool_size_count(struct vk_rmv_descriptor_pool_description *description) +{ + uint32_t count = 0; + for (uint32_t i = 0; i < description->pool_size_count; ++i) { + enum rmt_descriptor_type rmt_type = + vk_to_rmt_descriptor_type(description->pool_sizes[i].type); + if (rmt_type == RMT_DESCRIPTOR_TYPE_INVALID) + /* Unknown descriptor type, skip */ + continue; + ++count; + } + return count; +} + +enum rmt_resource_owner_type { + RMT_RESOURCE_OWNER_TYPE_APPLICATION, + RMT_RESOURCE_OWNER_TYPE_PAL, + RMT_RESOURCE_OWNER_TYPE_CLIENT_DRIVER, + RMT_RESOURCE_OWNER_TYPE_KMD, +}; + +static void +rmt_file_write_bits(uint64_t *dst, uint64_t data, unsigned first_bit, unsigned last_bit) +{ + unsigned index = first_bit / 64; + unsigned shift = first_bit % 64; + + /* Data crosses an uint64_t boundary, split */ + if (index != last_bit / 64) { + unsigned first_part_size = 64 - shift; + rmt_file_write_bits(dst, data & ((1ULL << first_part_size) - 1ULL), first_bit, + index * 64 + 63); + rmt_file_write_bits(dst, data >> first_part_size, (index + 1) * 64, last_bit); + } else { + assert(data <= (1ULL << (uint64_t)(last_bit - first_bit + 1ULL)) - 1ULL); + dst[index] |= data << shift; + } +} + +static void +rmt_file_write_token_bits(uint64_t *dst, uint64_t data, unsigned first_bit, unsigned last_bit) +{ + rmt_file_write_bits(dst, data, first_bit - 8, last_bit - 8); +} + +static enum rmt_heap_type +rmt_file_domain_to_heap_type(enum vk_rmv_kernel_memory_domain domain, bool has_cpu_access) +{ + switch (domain) { + case VK_RMV_KERNEL_MEMORY_DOMAIN_CPU: + case VK_RMV_KERNEL_MEMORY_DOMAIN_GTT: + return RMT_HEAP_TYPE_SYSTEM; + case VK_RMV_KERNEL_MEMORY_DOMAIN_VRAM: + return has_cpu_access ? RMT_HEAP_TYPE_LOCAL : RMT_HEAP_TYPE_INVISIBLE; + default: + unreachable("invalid domain"); + } +} + +/* + * Write helpers for stream tokens + */ + +/* The timestamp frequency, in clock units / second. + * Currently set to 1MHz. */ +#define RMT_TIMESTAMP_FREQUENCY (1 * 1000000) +/* Factor needed to convert nanosecond timestamps as returned by os_get_time_nano + * to RMV timestamps */ +#define RMT_TIMESTAMP_DIVISOR (1000000000L / RMT_TIMESTAMP_FREQUENCY) + +static void +rmt_dump_timestamp(struct vk_rmv_timestamp_token *token, FILE *output) +{ + uint64_t data[2] = {0}; + rmt_file_write_bits(data, RMT_TOKEN_TYPE_TIMESTAMP, 0, 3); + /* RMT stores clock ticks divided by 32 */ + rmt_file_write_bits(data, token->value / 32, 4, 63); + rmt_file_write_bits(data, RMT_TIMESTAMP_FREQUENCY, 64, 89); + fwrite(data, 12, 1, output); +} + +static void +rmt_dump_time_delta(uint64_t delta, FILE *output) +{ + uint64_t data = 0; + rmt_file_write_bits(&data, RMT_TOKEN_TYPE_TIME_DELTA, 0, 3); + rmt_file_write_bits(&data, 7, 4, 7); /* no. of delta bytes */ + rmt_file_write_bits(&data, delta, 8, 63); + fwrite(&data, 8, 1, output); +} + +static void +rmt_dump_event_resource(struct vk_rmv_event_description *description, FILE *output) +{ + /* 8 bits of flags are the only thing in the payload */ + fwrite(&description->flags, 1, 1, output); +} + +static void +rmt_dump_border_color_palette_resource(struct vk_rmv_border_color_palette_description *description, + FILE *output) +{ + /* no. of entries is the only thing in the payload */ + fwrite(&description->num_entries, 1, 1, output); +} + +enum rmt_page_size { + RMT_PAGE_SIZE_UNMAPPED, + RMT_PAGE_SIZE_4_KB, + RMT_PAGE_SIZE_64_KB, + RMT_PAGE_SIZE_256_KB, + RMT_PAGE_SIZE_1_MB, + RMT_PAGE_SIZE_2_MB, +}; + +static enum rmt_page_size +rmt_size_to_page_size(uint32_t size) +{ + switch (size) { + case 4096: + return RMT_PAGE_SIZE_4_KB; + case 65536: + return RMT_PAGE_SIZE_64_KB; + case 262144: + return RMT_PAGE_SIZE_256_KB; + case 1048576: + return RMT_PAGE_SIZE_1_MB; + case 2097152: + return RMT_PAGE_SIZE_2_MB; + default: + unreachable("invalid page size"); + } +} + +static void +rmt_dump_heap_resource(struct vk_rmv_heap_description *description, FILE *output) +{ + uint64_t data[2] = {0}; + rmt_file_write_bits(data, description->alloc_flags, 0, 3); + rmt_file_write_bits(data, description->size, 4, 68); + rmt_file_write_bits(data, rmt_size_to_page_size(description->alignment), 69, 73); + rmt_file_write_bits(data, description->heap_index, 74, 77); + fwrite(data, 10, 1, output); +} + +enum rmt_buffer_usage_flags { + RMT_BUFFER_USAGE_FLAGS_TRANSFER_SOURCE = 1 << 0, + RMT_BUFFER_USAGE_FLAGS_TRANSFER_DESTINATION = 1 << 1, + RMT_BUFFER_USAGE_FLAGS_UNIFORM_TEXEL_BUFFER = 1 << 2, + RMT_BUFFER_USAGE_FLAGS_STORAGE_TEXEL_BUFFER = 1 << 3, + RMT_BUFFER_USAGE_FLAGS_UNIFORM_BUFFER = 1 << 4, + RMT_BUFFER_USAGE_FLAGS_STORAGE_BUFFER = 1 << 5, + RMT_BUFFER_USAGE_FLAGS_INDEX_BUFFER = 1 << 6, + RMT_BUFFER_USAGE_FLAGS_VERTEX_BUFFER = 1 << 7, + RMT_BUFFER_USAGE_FLAGS_INDIRECT_BUFFER = 1 << 8, + RMT_BUFFER_USAGE_FLAGS_TRANSFORM_FEEDBACK_BUFFER = 1 << 9, + RMT_BUFFER_USAGE_FLAGS_TRANSFORM_FEEDBACK_COUNTER_BUFFER = 1 << 10, + RMT_BUFFER_USAGE_FLAGS_CONDITIONAL_RENDERING = 1 << 11, + RMT_BUFFER_USAGE_FLAGS_RAY_TRACING = 1 << 12, + RMT_BUFFER_USAGE_FLAGS_SHADER_DEVICE_ADDRESS = 1 << 13, +}; + +static void +rmt_dump_buffer_resource(struct vk_rmv_buffer_description *description, FILE *output) +{ + /* flags up to indirect buffer are equivalent */ + uint32_t usage_flags = + description->usage_flags & ((VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT << 1) - 1); + + if (description->usage_flags & VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT) + usage_flags |= RMT_BUFFER_USAGE_FLAGS_TRANSFORM_FEEDBACK_BUFFER; + if (description->usage_flags & VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT) + usage_flags |= RMT_BUFFER_USAGE_FLAGS_TRANSFORM_FEEDBACK_COUNTER_BUFFER; + if (description->usage_flags & VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT) + usage_flags |= RMT_BUFFER_USAGE_FLAGS_CONDITIONAL_RENDERING; + if (description->usage_flags & + (VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | + VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | + VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR)) + usage_flags |= RMT_BUFFER_USAGE_FLAGS_RAY_TRACING; + if (description->usage_flags & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) + usage_flags |= RMT_BUFFER_USAGE_FLAGS_SHADER_DEVICE_ADDRESS; + + uint64_t data[2] = {0}; + rmt_file_write_bits(data, description->create_flags, 0, 7); + rmt_file_write_bits(data, usage_flags, 8, 23); + rmt_file_write_bits(data, description->size, 24, 87); + fwrite(data, 11, 1, output); +} + +enum rmt_tiling { + RMT_TILING_LINEAR, + RMT_TILING_OPTIMAL, + RMT_TILING_SWIZZLED, +}; + +enum rmt_tiling_optimization_mode { + RMT_TILING_OPTIMIZATION_MODE_BALANCED, + RMT_TILING_OPTIMIZATION_MODE_SPACE, + RMT_TILING_OPTIMIZATION_MODE_SPEED, +}; + +enum rmt_metadata_mode { + RMT_METADATA_MODE_DEFAULT, + RMT_METADATA_MODE_OPTIMIZE_TEX_PREFETCH, + RMT_METADATA_MODE_DISABLE, +}; + +enum rmt_image_create_flags { + RMT_IMAGE_CREATE_INVARIANT = 1 << 0, + RMT_IMAGE_CREATE_CLONEABLE = 1 << 1, + RMT_IMAGE_CREATE_SHAREABLE = 1 << 2, + RMT_IMAGE_CREATE_FLIPPABLE = 1 << 3, + RMT_IMAGE_CREATE_STEREO = 1 << 4, + RMT_IMAGE_CREATE_CUBEMAP = 1 << 5, + RMT_IMAGE_CREATE_PRT = 1 << 6, +}; + +enum rmt_image_usage_flags { + RMT_IMAGE_USAGE_SHADER_READ = 1 << 0, + RMT_IMAGE_USAGE_SHADER_WRITE = 1 << 1, + RMT_IMAGE_USAGE_RESOLVE_SRC = 1 << 2, + RMT_IMAGE_USAGE_RESOLVE_DST = 1 << 3, + RMT_IMAGE_USAGE_COLOR_TARGET = 1 << 4, + RMT_IMAGE_USAGE_DEPTH_STENCIL = 1 << 5, +}; + +static void +rmt_dump_image_resource(struct vk_rmv_image_description *description, FILE *output) +{ + uint64_t data[5] = {0}; + + enum rmt_tiling tiling; + switch (description->tiling) { + case VK_IMAGE_TILING_LINEAR: + tiling = RMT_TILING_LINEAR; + break; + case VK_IMAGE_TILING_OPTIMAL: + case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT: + tiling = RMT_TILING_OPTIMAL; + break; + default: + unreachable("invalid image tiling"); + } + + uint32_t create_flags = 0; + if (description->create_flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) + create_flags |= RMT_IMAGE_CREATE_CUBEMAP; + if (description->create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) + create_flags |= RMT_IMAGE_CREATE_PRT; + + uint32_t usage_flags = 0; + if (description->usage_flags & VK_IMAGE_USAGE_SAMPLED_BIT || + description->usage_flags & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) + usage_flags |= RMT_IMAGE_USAGE_SHADER_READ; + if (description->usage_flags & VK_IMAGE_USAGE_STORAGE_BIT) + usage_flags |= RMT_IMAGE_USAGE_SHADER_READ | RMT_IMAGE_USAGE_SHADER_WRITE; + if (description->usage_flags & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + usage_flags |= RMT_IMAGE_USAGE_COLOR_TARGET; + if (description->usage_flags & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + usage_flags |= RMT_IMAGE_USAGE_DEPTH_STENCIL; + if (description->usage_flags & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) + usage_flags |= RMT_IMAGE_USAGE_RESOLVE_SRC; + if (description->usage_flags & VK_IMAGE_USAGE_TRANSFER_DST_BIT) + usage_flags |= RMT_IMAGE_USAGE_RESOLVE_DST; + + enum rmt_swizzle swizzles[4] = {RMT_SWIZZLE_ZERO, RMT_SWIZZLE_ZERO, RMT_SWIZZLE_ZERO, + RMT_SWIZZLE_ZERO}; + rmt_format_to_swizzle(description->format, swizzles); + + rmt_file_write_bits(data, create_flags, 0, 19); + rmt_file_write_bits(data, usage_flags, 20, 34); + rmt_file_write_bits(data, description->type, 35, 36); + rmt_file_write_bits(data, description->extent.width - 1, 37, 50); + rmt_file_write_bits(data, description->extent.height - 1, 51, 64); + rmt_file_write_bits(data, description->extent.depth - 1, 65, 78); + rmt_file_write_bits(data, swizzles[0], 79, 81); + rmt_file_write_bits(data, swizzles[1], 82, 84); + rmt_file_write_bits(data, swizzles[2], 85, 87); + rmt_file_write_bits(data, swizzles[3], 88, 90); + rmt_file_write_bits(data, vk_to_rmt_format(description->format), 91, 98); + rmt_file_write_bits(data, description->num_mips, 99, 102); + rmt_file_write_bits(data, description->num_slices - 1, 103, 113); + rmt_file_write_bits(data, description->log2_samples, 114, 116); + rmt_file_write_bits(data, description->log2_storage_samples, 117, 118); + rmt_file_write_bits(data, tiling, 119, 120); + rmt_file_write_bits(data, RMT_TILING_OPTIMIZATION_MODE_BALANCED, 121, 122); + rmt_file_write_bits(data, RMT_METADATA_MODE_DEFAULT, 123, 124); + rmt_file_write_bits(data, description->alignment_log2, 125, 129); + rmt_file_write_bits(data, description->presentable, 130, 130); + rmt_file_write_bits(data, description->size, 131, 162); + rmt_file_write_bits(data, description->metadata_offset, 163, 194); + rmt_file_write_bits(data, description->metadata_size, 195, 226); + rmt_file_write_bits(data, description->metadata_header_offset, 227, 258); + rmt_file_write_bits(data, description->metadata_header_size, 259, 290); + rmt_file_write_bits(data, description->image_alignment_log2, 291, 295); + rmt_file_write_bits(data, description->metadata_alignment_log2, 296, 300); + /* metadata header alignment */ + rmt_file_write_bits(data, description->metadata_alignment_log2, 301, 305); + /* is fullscreen presentable */ + rmt_file_write_bits(data, description->presentable, 306, 306); + fwrite(data, 39, 1, output); +} + +enum rmt_query_pool_type { + RMT_QUERY_POOL_TYPE_OCCLUSION, + RMT_QUERY_POOL_TYPE_PIPELINE, + RMT_QUERY_POOL_TYPE_STREAMOUT +}; + +static void +rmt_dump_query_pool_resource(struct vk_rmv_query_pool_description *description, FILE *output) +{ + enum rmt_query_pool_type pool_type; + switch (description->type) { + case VK_QUERY_TYPE_OCCLUSION: + pool_type = RMT_QUERY_POOL_TYPE_OCCLUSION; + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + pool_type = RMT_QUERY_POOL_TYPE_PIPELINE; + break; + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + pool_type = RMT_QUERY_POOL_TYPE_STREAMOUT; + break; + default: + unreachable("invalid query pool type"); + break; + } + + uint64_t data = 0; + rmt_file_write_bits(&data, pool_type, 0, 1); + rmt_file_write_bits(&data, description->has_cpu_access, 2, 2); + fwrite(&data, 1, 1, output); +} + +enum rmt_pipeline_flags { + RMT_PIPELINE_FLAG_INTERNAL = (1 << 0), + RMT_PIPELINE_FLAG_OVERRIDE_GPU_HEAP = (1 << 1), +}; + +enum rmt_pipeline_stage_flags { + RMT_PIPELINE_STAGE_FRAGMENT = 1 << 0, + RMT_PIPELINE_STAGE_TESS_CONTROL = 1 << 1, + RMT_PIPELINE_STAGE_TESS_EVAL = 1 << 2, + RMT_PIPELINE_STAGE_VERTEX = 1 << 3, + RMT_PIPELINE_STAGE_GEOMETRY = 1 << 4, + RMT_PIPELINE_STAGE_COMPUTE = 1 << 5, + RMT_PIPELINE_STAGE_TASK = 1 << 6, + RMT_PIPELINE_STAGE_MESH = 1 << 7 +}; + +static void +rmt_dump_pipeline_resource(struct vk_rmv_pipeline_description *description, FILE *output) +{ + uint64_t data[3] = {0}; + + enum rmt_pipeline_flags flags = 0; + if (description->is_internal) + flags |= RMT_PIPELINE_FLAG_INTERNAL; + + enum rmt_pipeline_stage_flags stage_flags = 0; + if (description->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) + stage_flags |= RMT_PIPELINE_STAGE_FRAGMENT; + if (description->shader_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) + stage_flags |= RMT_PIPELINE_STAGE_TESS_CONTROL; + if (description->shader_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) + stage_flags |= RMT_PIPELINE_STAGE_TESS_EVAL; + if (description->shader_stages & VK_SHADER_STAGE_VERTEX_BIT) + stage_flags |= RMT_PIPELINE_STAGE_VERTEX; + if (description->shader_stages & VK_SHADER_STAGE_GEOMETRY_BIT) + stage_flags |= RMT_PIPELINE_STAGE_GEOMETRY; + if (description->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT || + description->shader_stages & VK_SHADER_STAGE_RAYGEN_BIT_KHR || + description->shader_stages & VK_SHADER_STAGE_INTERSECTION_BIT_KHR || + description->shader_stages & VK_SHADER_STAGE_ANY_HIT_BIT_KHR || + description->shader_stages & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR || + description->shader_stages & VK_SHADER_STAGE_MISS_BIT_KHR || + description->shader_stages & VK_SHADER_STAGE_CALLABLE_BIT_KHR) + stage_flags |= RMT_PIPELINE_STAGE_COMPUTE; + if (description->shader_stages & VK_SHADER_STAGE_TASK_BIT_EXT) + stage_flags |= RMT_PIPELINE_STAGE_TASK; + if (description->shader_stages & VK_SHADER_STAGE_MESH_BIT_EXT) + stage_flags |= RMT_PIPELINE_STAGE_MESH; + + rmt_file_write_bits(data, flags, 0, 7); + rmt_file_write_bits(data, description->hash_hi, 8, 71); + rmt_file_write_bits(data, description->hash_lo, 72, 135); + rmt_file_write_bits(data, stage_flags, 136, 143); + rmt_file_write_bits(data, description->is_ngg, 144, 144); + fwrite(data, 19, 1, output); +} + +static void +rmt_dump_descriptor_pool_resource(struct vk_rmv_descriptor_pool_description *description, + FILE *output) +{ + uint64_t data = 0; + /* TODO: figure out a better way of handling descriptor counts > 65535 */ + rmt_file_write_bits(&data, MIN2(description->max_sets, 65535), 0, 15); + rmt_file_write_bits(&data, rmt_valid_pool_size_count(description), 16, 23); + fwrite(&data, 3, 1, output); + + for (uint32_t i = 0; i < description->pool_size_count; ++i) { + data = 0; + enum rmt_descriptor_type rmt_type = + vk_to_rmt_descriptor_type(description->pool_sizes[i].type); + if (rmt_type == RMT_DESCRIPTOR_TYPE_INVALID) + /* Unknown descriptor type, skip */ + continue; + rmt_file_write_bits(&data, rmt_type, 0, 15); + rmt_file_write_bits(&data, MIN2(description->pool_sizes[i].descriptorCount, 65535), 16, 31); + fwrite(&data, 4, 1, output); + } +} + +static void +rmt_dump_command_buffer_resource(struct vk_rmv_command_buffer_description *description, + FILE *output) +{ + uint64_t data[6] = {0}; + rmt_file_write_bits(data, 0, 0, 3); /* flags */ + /* heap for executable commands */ + rmt_file_write_bits(data, rmt_file_domain_to_heap_type(description->preferred_domain, true), 4, + 7); + /* executable command allocation size */ + rmt_file_write_bits(data, description->executable_size, 8, 63); + /* executable command size usable by command buffers */ + rmt_file_write_bits(data, description->app_available_executable_size, 64, 119); + /* heap for embedded data */ + rmt_file_write_bits(data, rmt_file_domain_to_heap_type(description->preferred_domain, true), 120, + 123); + /* embedded data allocation size */ + rmt_file_write_bits(data, description->embedded_data_size, 124, 179); + /* embedded data size usable by command buffers */ + rmt_file_write_bits(data, description->app_available_embedded_data_size, 180, 235); + /* heap for scratch data */ + rmt_file_write_bits(data, rmt_file_domain_to_heap_type(description->preferred_domain, true), 4, + 7); + /* scratch data allocation size */ + rmt_file_write_bits(data, description->scratch_size, 240, 295); + /* scratch data size usable by command buffers */ + rmt_file_write_bits(data, description->app_available_scratch_size, 296, 351); + + fwrite(data, 44, 1, output); +} + +static void +rmt_dump_misc_internal_resource(struct vk_rmv_misc_internal_description *description, + FILE *output) +{ + /* 8 bits of zero-value enum are the only thing in the payload */ + fwrite(&description->type, 1, 1, output); +} + +static void +rmt_dump_resource_create(struct vk_rmv_resource_create_token *token, FILE *output) +{ + uint64_t data = 0; + rmt_file_write_token_bits(&data, token->resource_id, 8, 39); + rmt_file_write_token_bits(&data, + token->is_driver_internal ? RMT_RESOURCE_OWNER_TYPE_CLIENT_DRIVER + : RMT_RESOURCE_OWNER_TYPE_APPLICATION, + 40, 41); + rmt_file_write_token_bits(&data, token->type, 48, 53); + fwrite(&data, 6, 1, output); + + switch (token->type) { + case VK_RMV_RESOURCE_TYPE_GPU_EVENT: + rmt_dump_event_resource(&token->event, output); + break; + case VK_RMV_RESOURCE_TYPE_BORDER_COLOR_PALETTE: + rmt_dump_border_color_palette_resource(&token->border_color_palette, output); + break; + case VK_RMV_RESOURCE_TYPE_HEAP: + rmt_dump_heap_resource(&token->heap, output); + break; + case VK_RMV_RESOURCE_TYPE_BUFFER: + rmt_dump_buffer_resource(&token->buffer, output); + break; + case VK_RMV_RESOURCE_TYPE_IMAGE: + rmt_dump_image_resource(&token->image, output); + break; + case VK_RMV_RESOURCE_TYPE_QUERY_HEAP: + rmt_dump_query_pool_resource(&token->query_pool, output); + break; + case VK_RMV_RESOURCE_TYPE_PIPELINE: + rmt_dump_pipeline_resource(&token->pipeline, output); + break; + case VK_RMV_RESOURCE_TYPE_DESCRIPTOR_POOL: + rmt_dump_descriptor_pool_resource(&token->descriptor_pool, output); + break; + case VK_RMV_RESOURCE_TYPE_COMMAND_ALLOCATOR: + rmt_dump_command_buffer_resource(&token->command_buffer, output); + break; + case VK_RMV_RESOURCE_TYPE_MISC_INTERNAL: + rmt_dump_misc_internal_resource(&token->misc_internal, output); + break; + default: + unreachable("invalid resource type"); + } +} + +static void +rmt_dump_resource_bind(struct vk_rmv_resource_bind_token *token, FILE *output) +{ + uint64_t data[3] = {0}; + rmt_file_write_token_bits(data, token->address & 0xFFFFFFFFFFFF, 8, 55); + rmt_file_write_token_bits(data, token->size, 56, 99); + rmt_file_write_token_bits(data, token->is_system_memory, 100, 100); + rmt_file_write_token_bits(data, token->resource_id, 104, 135); + fwrite(data, 16, 1, output); +} + +static void +rmt_dump_resource_reference(struct vk_rmv_resource_reference_token *token, + FILE *output) +{ + uint64_t data = 0; + rmt_file_write_token_bits(&data, token->residency_removed, 8, 8); + rmt_file_write_token_bits(&data, token->virtual_address & 0xFFFFFFFFFFFF, 9, 56); + fwrite(&data, 7, 1, output); +} + +static void +rmt_dump_resource_destroy(struct vk_rmv_resource_destroy_token *token, FILE *output) +{ + uint64_t data = 0; + rmt_file_write_token_bits(&data, token->resource_id, 8, 39); + fwrite(&data, 4, 1, output); +} + +enum rmt_virtual_allocation_owner_type { + RMT_VIRTUAL_ALLOCATION_OWNER_TYPE_APPLICATION, + RMT_VIRTUAL_ALLOCATION_OWNER_TYPE_PAL, + RMT_VIRTUAL_ALLOCATION_OWNER_TYPE_CLIENT_DRIVER, + RMT_VIRTUAL_ALLOCATION_OWNER_TYPE_KERNEL_DRIVER +}; + +static void +rmt_dump_virtual_alloc(struct vk_rmv_virtual_allocate_token *token, FILE *output) +{ + uint64_t data[2] = {0}; + rmt_file_write_token_bits(data, token->page_count - 1, 8, 31); + rmt_file_write_token_bits(data, + token->is_driver_internal + ? RMT_VIRTUAL_ALLOCATION_OWNER_TYPE_CLIENT_DRIVER + : RMT_VIRTUAL_ALLOCATION_OWNER_TYPE_APPLICATION, + 32, 33); + rmt_file_write_token_bits(data, token->address & 0xFFFFFFFFFFFF, 34, 81); + if (token->preferred_domains) { + rmt_file_write_token_bits( + data, rmt_file_domain_to_heap_type(token->preferred_domains, !token->is_in_invisible_vram), + 82, 83); + /* num. of heap types */ + rmt_file_write_token_bits(data, 1, 90, 92); + } else + rmt_file_write_token_bits(data, 0, 90, 92); + fwrite(data, 11, 1, output); +} + +static void +rmt_dump_virtual_free(struct vk_rmv_virtual_free_token *token, FILE *output) +{ + uint64_t data = 0; + rmt_file_write_token_bits(&data, token->address & 0xFFFFFFFFFFFF, 8, 56); + fwrite(&data, 6, 1, output); +} + +enum rmt_page_table_controller { + RMT_PAGE_TABLE_CONTROLLER_OS, + RMT_PAGE_TABLE_CONTROLLER_KMD, +}; + +static void +rmt_dump_page_table_update(struct vk_rmv_page_table_update_token *token, + FILE *output) +{ + uint64_t virtual_page_idx = (token->virtual_address / 4096); + uint64_t physical_page_idx = (token->physical_address / 4096); + + enum rmt_page_size page_size = rmt_size_to_page_size(token->page_size); + + uint64_t data[3] = {0}; + rmt_file_write_token_bits(data, virtual_page_idx & 0xFFFFFFFFF, 8, 43); + rmt_file_write_token_bits(data, physical_page_idx & 0xFFFFFFFFF, 44, 79); + rmt_file_write_token_bits(data, token->page_count, 80, 99); + rmt_file_write_token_bits(data, page_size, 100, 102); + rmt_file_write_token_bits(data, token->is_unmap, 103, 103); + rmt_file_write_token_bits(data, token->pid, 104, 135); + rmt_file_write_token_bits(data, token->type, 136, 137); + rmt_file_write_token_bits(data, RMT_PAGE_TABLE_CONTROLLER_KMD, 138, 138); + fwrite(data, 17, 1, output); +} + +enum rmt_userdata_type { + RMT_USERDATA_TYPE_NAME, + RMT_USERDATA_TYPE_SNAPSHOT, + RMT_USERDATA_TYPE_BINARY, + RMT_USERDATA_TYPE_RESERVED, + RMT_USERDATA_TYPE_CORRELATION, + RMT_USERDATA_TYPE_MARK_IMPLICIT_RESOURCE, +}; + +static void +rmt_dump_userdata(struct vk_rmv_userdata_token *token, FILE *output) +{ + uint64_t data = 0; + /* userdata type */ + rmt_file_write_token_bits(&data, RMT_USERDATA_TYPE_NAME, 8, 11); + /* size of userdata payload */ + rmt_file_write_token_bits(&data, strlen(token->name) + sizeof(uint32_t) + 1, 12, 23); + + fwrite(&data, 3, 1, output); + fwrite(token->name, 1, strlen(token->name) + 1, output); + fwrite(&token->resource_id, sizeof(uint32_t), 1, output); +} + +static void +rmt_dump_misc(struct vk_rmv_misc_token *token, FILE *output) +{ + uint64_t data = 0; + rmt_file_write_token_bits(&data, token->type, 8, 11); + fwrite(&data, 1, 1, output); +} + +static void +rmt_dump_cpu_map(struct vk_rmv_cpu_map_token *token, FILE *output) +{ + uint64_t data = 0; + rmt_file_write_token_bits(&data, token->address & 0xFFFFFFFFFFFF, 8, 55); + rmt_file_write_token_bits(&data, token->unmapped, 56, 56); + fwrite(&data, 7, 1, output); +} + +static void +rmt_dump_data(struct vk_memory_trace_data *data, FILE *output) +{ + struct rmt_file_header header = {0}; + struct rmt_file_chunk_system_info system_info_chunk = {0}; + struct rmt_file_chunk_adapter_info adapter_info_chunk = {0}; + struct rmt_file_chunk_rmt_data data_chunk = {0}; + + /* RMT header */ + rmt_fill_header(&header); + fwrite(&header, sizeof(header), 1, output); + + /* System info */ + rmt_fill_chunk_system_info(&system_info_chunk); + fwrite(&system_info_chunk, sizeof(system_info_chunk), 1, output); + + /* Segment info */ + for (int32_t i = 0; i < 3; ++i) { + struct rmt_file_chunk_segment_info segment_info_chunk = {0}; + + rmt_fill_chunk_segment_info(data, &data->device_info, &segment_info_chunk, i); + fwrite(&segment_info_chunk, sizeof(segment_info_chunk), 1, output); + } + + /* Adapter info */ + rmt_fill_chunk_adapter_info(&data->device_info, &adapter_info_chunk); + fwrite(&adapter_info_chunk, sizeof(adapter_info_chunk), 1, output); + + long chunk_start = ftell(output); + /* Write a dummy data chunk to reserve space */ + fwrite(&data_chunk, sizeof(data_chunk), 1, output); + + qsort(data->tokens.data, util_dynarray_num_elements(&data->tokens, struct vk_rmv_token), + sizeof(struct vk_rmv_token), vk_rmv_token_compare); + + uint64_t current_timestamp = 0; + if (util_dynarray_num_elements(&data->tokens, struct vk_rmv_token)) + current_timestamp = + util_dynarray_element(&data->tokens, struct vk_rmv_token, 0)->timestamp / RMT_TIMESTAMP_DIVISOR; + + long stream_start = ftell(output); + + struct vk_rmv_timestamp_token timestamp_token; + timestamp_token.value = 0; + rmt_dump_timestamp(×tamp_token, output); + + util_dynarray_foreach (&data->tokens, struct vk_rmv_token, token) { + /* Only temporarily modify the token's timestamp in case of multiple traces */ + uint64_t old_timestamp = token->timestamp; + /* adjust timestamp to 1 MHz, see rmt_fill_chunk_system_info */ + token->timestamp /= RMT_TIMESTAMP_DIVISOR; + + int64_t delta = token->timestamp - current_timestamp; + + /* Time values are stored divided by 32 */ + delta /= 32; + + /* + * Each token can hold up to 4 bits of time delta. If the delta doesn't + * fit in 4 bits, an additional token containing more space for the delta + * has to be emitted. + */ + if (delta > 0xF) { + rmt_dump_time_delta(delta, output); + delta = 0; + } + + uint64_t token_header = 0; + rmt_file_write_bits(&token_header, token_type_to_rmt(token->type), 0, 3); + rmt_file_write_bits(&token_header, delta, 4, 7); + fwrite(&token_header, 1, 1, output); + + switch (token->type) { + case VK_RMV_TOKEN_TYPE_VIRTUAL_ALLOCATE: + rmt_dump_virtual_alloc(&token->data.virtual_allocate, output); + break; + case VK_RMV_TOKEN_TYPE_VIRTUAL_FREE: + rmt_dump_virtual_free(&token->data.virtual_free, output); + break; + case VK_RMV_TOKEN_TYPE_PAGE_TABLE_UPDATE: + rmt_dump_page_table_update(&token->data.page_table_update, output); + break; + case VK_RMV_TOKEN_TYPE_RESOURCE_CREATE: + rmt_dump_resource_create(&token->data.resource_create, output); + break; + case VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY: + rmt_dump_resource_destroy(&token->data.resource_destroy, output); + break; + case VK_RMV_TOKEN_TYPE_RESOURCE_BIND: + rmt_dump_resource_bind(&token->data.resource_bind, output); + break; + case VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE: + rmt_dump_resource_reference(&token->data.resource_reference, output); + break; + case VK_RMV_TOKEN_TYPE_USERDATA: + rmt_dump_userdata(&token->data.userdata, output); + break; + case VK_RMV_TOKEN_TYPE_MISC: + rmt_dump_misc(&token->data.misc, output); + break; + case VK_RMV_TOKEN_TYPE_CPU_MAP: + rmt_dump_cpu_map(&token->data.cpu_map, output); + break; + default: + unreachable("invalid token type"); + } + + current_timestamp = token->timestamp; + token->timestamp = old_timestamp; + } + long stream_end = ftell(output); + + /* Go back and write the correct chunk data. */ + fseek(output, chunk_start, SEEK_SET); + rmt_fill_chunk_rmt_data(stream_end - stream_start, &data_chunk); + fwrite(&data_chunk, sizeof(data_chunk), 1, output); +} + +int +vk_dump_rmv_capture(struct vk_memory_trace_data *data) +{ + char filename[2048]; + struct tm now; + FILE *f; + + time_t t = time(NULL); + now = *localtime(&t); + + snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.rmv", + util_get_process_name(), 1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour, + now.tm_min, now.tm_sec); + + f = fopen(filename, "wb"); + if (!f) + return -1; + + rmt_dump_data(data, f); + + fprintf(stderr, "RMV capture saved to '%s'\n", filename); + + fclose(f); + return 0; +} diff --git a/src/vulkan/runtime/rmv/vk_rmv_tokens.h b/src/vulkan/runtime/rmv/vk_rmv_tokens.h new file mode 100644 index 00000000000..e16998b1184 --- /dev/null +++ b/src/vulkan/runtime/rmv/vk_rmv_tokens.h @@ -0,0 +1,304 @@ +/* + * Copyright © 2022 Friedrich Vock + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_RMV_TOKENS_H +#define VK_RMV_TOKENS_H + +#include <stdint.h> +#include <string.h> +#include "util/os_time.h" +#include <vulkan/vulkan_core.h> + +/* + * Implemented types of tokens. + */ +enum vk_rmv_token_type { + VK_RMV_TOKEN_TYPE_USERDATA, + VK_RMV_TOKEN_TYPE_MISC, + VK_RMV_TOKEN_TYPE_RESOURCE_BIND, + VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE, + VK_RMV_TOKEN_TYPE_PAGE_TABLE_UPDATE, + VK_RMV_TOKEN_TYPE_CPU_MAP, + VK_RMV_TOKEN_TYPE_VIRTUAL_FREE, + VK_RMV_TOKEN_TYPE_VIRTUAL_ALLOCATE, + VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, + VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY +}; + +/* + * The type of miscellaneous event reported through a MISC token. + */ +enum vk_rmv_misc_event_type { + VK_RMV_MISC_EVENT_TYPE_SUBMIT_GRAPHICS, + VK_RMV_MISC_EVENT_TYPE_SUBMIT_COMPUTE, + VK_RMV_MISC_EVENT_TYPE_SUBMIT_COPY, + VK_RMV_MISC_EVENT_TYPE_PRESENT, + VK_RMV_MISC_EVENT_TYPE_INVALIDATE_RANGES, + VK_RMV_MISC_EVENT_TYPE_FLUSH_MAPPED_RANGE, + VK_RMV_MISC_EVENT_TYPE_TRIM_MEMORY +}; + +enum vk_rmv_resource_type { + VK_RMV_RESOURCE_TYPE_IMAGE, + VK_RMV_RESOURCE_TYPE_BUFFER, + VK_RMV_RESOURCE_TYPE_GPU_EVENT, + VK_RMV_RESOURCE_TYPE_BORDER_COLOR_PALETTE, + VK_RMV_RESOURCE_TYPE_INDIRECT_CMD_GENERATOR, + VK_RMV_RESOURCE_TYPE_MOTION_ESTIMATOR, + VK_RMV_RESOURCE_TYPE_PERF_EXPERIMENT, + VK_RMV_RESOURCE_TYPE_QUERY_HEAP, + VK_RMV_RESOURCE_TYPE_VIDEO_DECODER, + VK_RMV_RESOURCE_TYPE_VIDEO_ENCODER, + VK_RMV_RESOURCE_TYPE_TIMESTAMP, + VK_RMV_RESOURCE_TYPE_HEAP, + VK_RMV_RESOURCE_TYPE_PIPELINE, + VK_RMV_RESOURCE_TYPE_DESCRIPTOR_HEAP, + VK_RMV_RESOURCE_TYPE_DESCRIPTOR_POOL, + VK_RMV_RESOURCE_TYPE_COMMAND_ALLOCATOR, + VK_RMV_RESOURCE_TYPE_MISC_INTERNAL +}; + +/* + * Token data for all types of tokens. + */ + +struct vk_rmv_timestamp_token { + uint64_t value; +}; + +struct vk_rmv_userdata_token { + char *name; + uint32_t resource_id; +}; + +struct vk_rmv_misc_token { + enum vk_rmv_misc_event_type type; +}; + +struct vk_rmv_resource_bind_token { + uint64_t address; + uint64_t size; + bool is_system_memory; + uint32_t resource_id; +}; + +struct vk_rmv_resource_reference_token { + uint64_t virtual_address; + bool residency_removed; +}; + +enum vk_rmv_page_table_update_type { + VK_RMV_PAGE_TABLE_UPDATE_TYPE_DISCARD, + VK_RMV_PAGE_TABLE_UPDATE_TYPE_UPDATE, + VK_RMV_PAGE_TABLE_UPDATE_TYPE_TRANSFER +}; + +struct vk_rmv_page_table_update_token { + uint64_t virtual_address; + uint64_t physical_address; + uint64_t page_count; + uint32_t page_size; + int pid; + bool is_unmap; + enum vk_rmv_page_table_update_type type; +}; + +struct vk_rmv_cpu_map_token { + uint64_t address; + bool unmapped; +}; + +struct vk_rmv_virtual_free_token { + uint64_t address; +}; + +enum vk_rmv_kernel_memory_domain { + VK_RMV_KERNEL_MEMORY_DOMAIN_CPU = 0x1, + VK_RMV_KERNEL_MEMORY_DOMAIN_GTT = 0x2, + VK_RMV_KERNEL_MEMORY_DOMAIN_VRAM = 0x4 +}; + +struct vk_rmv_virtual_allocate_token { + uint32_t page_count; + bool is_driver_internal; + bool is_in_invisible_vram; + uint64_t address; + enum vk_rmv_kernel_memory_domain preferred_domains; +}; + +struct vk_rmv_image_description { + VkImageCreateFlags create_flags; + VkImageUsageFlags usage_flags; + VkImageType type; + VkExtent3D extent; + VkFormat format; + uint32_t num_mips; + uint32_t num_slices; + VkImageTiling tiling; + + uint32_t log2_samples; + uint32_t log2_storage_samples; + + uint32_t alignment_log2; + uint32_t metadata_alignment_log2; + uint32_t image_alignment_log2; + + uint64_t size; + uint64_t metadata_size; + uint64_t metadata_header_size; + + uint64_t metadata_offset; + uint64_t metadata_header_offset; + + bool presentable; +}; + +struct vk_rmv_event_description { + VkEventCreateFlags flags; +}; + +struct vk_rmv_border_color_palette_description { + uint8_t num_entries; +}; + +struct vk_rmv_buffer_description { + VkBufferCreateFlags create_flags; + VkBufferUsageFlags usage_flags; + uint64_t size; +}; + +struct vk_rmv_query_pool_description { + VkQueryType type; + bool has_cpu_access; +}; + +/* The heap description refers to a VkDeviceMemory resource. */ +struct vk_rmv_heap_description { + VkMemoryAllocateFlags alloc_flags; + uint64_t size; + uint32_t alignment; + uint32_t heap_index; +}; + +struct vk_rmv_pipeline_description { + bool is_internal; + uint64_t hash_lo; + uint64_t hash_hi; + VkShaderStageFlags shader_stages; + bool is_ngg; +}; + +struct vk_rmv_descriptor_pool_description { + uint32_t max_sets; + uint32_t pool_size_count; + VkDescriptorPoolSize *pool_sizes; +}; + +struct vk_rmv_command_buffer_description { + enum vk_rmv_kernel_memory_domain preferred_domain; + uint64_t executable_size; + uint64_t app_available_executable_size; + uint64_t embedded_data_size; + uint64_t app_available_embedded_data_size; + uint64_t scratch_size; + uint64_t app_available_scratch_size; +}; + +enum vk_rmv_misc_internal_type { + VK_RMV_MISC_INTERNAL_TYPE_PADDING, +}; + +struct vk_rmv_misc_internal_description { + enum vk_rmv_misc_internal_type type; +}; + +struct vk_rmv_resource_create_token { + uint32_t resource_id; + bool is_driver_internal; + enum vk_rmv_resource_type type; + union { + struct vk_rmv_event_description event; + struct vk_rmv_border_color_palette_description border_color_palette; + struct vk_rmv_image_description image; + struct vk_rmv_buffer_description buffer; + struct vk_rmv_query_pool_description query_pool; + struct vk_rmv_heap_description heap; + struct vk_rmv_pipeline_description pipeline; + struct vk_rmv_descriptor_pool_description descriptor_pool; + struct vk_rmv_command_buffer_description command_buffer; + struct vk_rmv_misc_internal_description misc_internal; + }; +}; + +struct vk_rmv_resource_destroy_token { + uint32_t resource_id; +}; + +struct vk_rmv_token { + enum vk_rmv_token_type type; + uint64_t timestamp; + union { + struct vk_rmv_timestamp_token timestamp; + struct vk_rmv_userdata_token userdata; + struct vk_rmv_misc_token misc; + struct vk_rmv_resource_bind_token resource_bind; + struct vk_rmv_resource_reference_token resource_reference; + struct vk_rmv_page_table_update_token page_table_update; + struct vk_rmv_cpu_map_token cpu_map; + struct vk_rmv_virtual_free_token virtual_free; + struct vk_rmv_virtual_allocate_token virtual_allocate; + struct vk_rmv_resource_create_token resource_create; + struct vk_rmv_resource_destroy_token resource_destroy; + } data; +}; + +static inline size_t +vk_rmv_token_size_from_type(enum vk_rmv_token_type type) +{ + switch (type) { + case VK_RMV_TOKEN_TYPE_USERDATA: + return sizeof(struct vk_rmv_userdata_token); + case VK_RMV_TOKEN_TYPE_MISC: + return sizeof(struct vk_rmv_misc_token); + case VK_RMV_TOKEN_TYPE_RESOURCE_BIND: + return sizeof(struct vk_rmv_resource_bind_token); + case VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE: + return sizeof(struct vk_rmv_resource_reference_token); + case VK_RMV_TOKEN_TYPE_PAGE_TABLE_UPDATE: + return sizeof(struct vk_rmv_page_table_update_token); + case VK_RMV_TOKEN_TYPE_CPU_MAP: + return sizeof(struct vk_rmv_cpu_map_token); + case VK_RMV_TOKEN_TYPE_VIRTUAL_FREE: + return sizeof(struct vk_rmv_virtual_free_token); + case VK_RMV_TOKEN_TYPE_VIRTUAL_ALLOCATE: + return sizeof(struct vk_rmv_virtual_allocate_token); + case VK_RMV_TOKEN_TYPE_RESOURCE_CREATE: + return sizeof(struct vk_rmv_resource_create_token); + case VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY: + return sizeof(struct vk_rmv_resource_destroy_token); + default: + unreachable("invalid token type"); + } +} + +#endif diff --git a/src/vulkan/runtime/vk_acceleration_structure.c b/src/vulkan/runtime/vk_acceleration_structure.c new file mode 100644 index 00000000000..074b94ea85c --- /dev/null +++ b/src/vulkan/runtime/vk_acceleration_structure.c @@ -0,0 +1,94 @@ +/* + * Copyright © 2021 Bas Nieuwenhuizen + * Copyright © 2023 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_acceleration_structure.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_log.h" + +VkDeviceAddress +vk_acceleration_structure_get_va(struct vk_acceleration_structure *accel_struct) +{ + VkBufferDeviceAddressInfo info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = accel_struct->buffer, + }; + + VkDeviceAddress base_addr = accel_struct->base.device->dispatch_table.GetBufferDeviceAddress( + vk_device_to_handle(accel_struct->base.device), &info); + + return base_addr + accel_struct->offset; +} + + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateAccelerationStructureKHR(VkDevice _device, + const VkAccelerationStructureCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkAccelerationStructureKHR *pAccelerationStructure) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + struct vk_acceleration_structure *accel_struct = vk_object_alloc( + device, pAllocator, sizeof(struct vk_acceleration_structure), + VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR); + + if (!accel_struct) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + accel_struct->buffer = pCreateInfo->buffer; + accel_struct->offset = pCreateInfo->offset; + accel_struct->size = pCreateInfo->size; + + if (pCreateInfo->deviceAddress && + vk_acceleration_structure_get_va(accel_struct) != pCreateInfo->deviceAddress) + return vk_error(device, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS); + + *pAccelerationStructure = vk_acceleration_structure_to_handle(accel_struct); + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyAccelerationStructureKHR(VkDevice _device, + VkAccelerationStructureKHR accelerationStructure, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accelerationStructure); + + if (!accel_struct) + return; + + vk_object_free(device, pAllocator, accel_struct); +} + +VKAPI_ATTR VkDeviceAddress VKAPI_CALL +vk_common_GetAccelerationStructureDeviceAddressKHR( + VkDevice _device, const VkAccelerationStructureDeviceAddressInfoKHR *pInfo) +{ + VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfo->accelerationStructure); + return vk_acceleration_structure_get_va(accel_struct); +} diff --git a/src/vulkan/runtime/vk_acceleration_structure.h b/src/vulkan/runtime/vk_acceleration_structure.h new file mode 100644 index 00000000000..bcc2eff4660 --- /dev/null +++ b/src/vulkan/runtime/vk_acceleration_structure.h @@ -0,0 +1,43 @@ +/* + * Copyright © 2021 Bas Nieuwenhuizen + * Copyright © 2023 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_ACCELERATION_STRUCTURE_H +#define VK_ACCELERATION_STRUCTURE_H + +#include "vk_object.h" + +struct vk_acceleration_structure { + struct vk_object_base base; + + VkBuffer buffer; + uint64_t offset; + uint64_t size; +}; + +VkDeviceAddress vk_acceleration_structure_get_va(struct vk_acceleration_structure *accel_struct); + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_acceleration_structure, base, VkAccelerationStructureKHR, + VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR) + +#endif diff --git a/src/vulkan/runtime/vk_android.c b/src/vulkan/runtime/vk_android.c new file mode 100644 index 00000000000..df4efae1b5f --- /dev/null +++ b/src/vulkan/runtime/vk_android.c @@ -0,0 +1,361 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_android.h" + +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_image.h" +#include "vk_log.h" +#include "vk_queue.h" +#include "vk_util.h" + +#include "util/libsync.h" + +#include <hardware/gralloc.h> + +#if ANDROID_API_LEVEL >= 26 +#include <hardware/gralloc1.h> +#endif + +#include <unistd.h> + +#if ANDROID_API_LEVEL >= 26 +#include <vndk/hardware_buffer.h> + +/* From the Android hardware_buffer.h header: + * + * "The buffer will be written to by the GPU as a framebuffer attachment. + * + * Note that the name of this flag is somewhat misleading: it does not + * imply that the buffer contains a color format. A buffer with depth or + * stencil format that will be used as a framebuffer attachment should + * also have this flag. Use the equivalent flag + * AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER to avoid this confusion." + * + * The flag was renamed from COLOR_OUTPUT to FRAMEBUFFER at Android API + * version 29. + */ +#if ANDROID_API_LEVEL < 29 +#define AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT +#endif + +/* Convert an AHB format to a VkFormat, based on the "AHardwareBuffer Format + * Equivalence" table in Vulkan spec. + * + * Note that this only covers a subset of AHB formats defined in NDK. Drivers + * can support more AHB formats, including private ones. + */ +VkFormat +vk_ahb_format_to_image_format(uint32_t ahb_format) +{ + switch (ahb_format) { + case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM: + case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM: + return VK_FORMAT_R8G8B8A8_UNORM; + case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM: + return VK_FORMAT_R8G8B8_UNORM; + case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM: + return VK_FORMAT_R5G6B5_UNORM_PACK16; + case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT: + return VK_FORMAT_R16G16B16A16_SFLOAT; + case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM: + return VK_FORMAT_A2B10G10R10_UNORM_PACK32; + case AHARDWAREBUFFER_FORMAT_D16_UNORM: + return VK_FORMAT_D16_UNORM; + case AHARDWAREBUFFER_FORMAT_D24_UNORM: + return VK_FORMAT_X8_D24_UNORM_PACK32; + case AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT: + return VK_FORMAT_D24_UNORM_S8_UINT; + case AHARDWAREBUFFER_FORMAT_D32_FLOAT: + return VK_FORMAT_D32_SFLOAT; + case AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT: + return VK_FORMAT_D32_SFLOAT_S8_UINT; + case AHARDWAREBUFFER_FORMAT_S8_UINT: + return VK_FORMAT_S8_UINT; + default: + return VK_FORMAT_UNDEFINED; + } +} + +/* Convert a VkFormat to an AHB format, based on the "AHardwareBuffer Format + * Equivalence" table in Vulkan spec. + * + * Note that this only covers a subset of AHB formats defined in NDK. Drivers + * can support more AHB formats, including private ones. + */ +uint32_t +vk_image_format_to_ahb_format(VkFormat vk_format) +{ + switch (vk_format) { + case VK_FORMAT_R8G8B8A8_UNORM: + return AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM; + case VK_FORMAT_R8G8B8_UNORM: + return AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM; + case VK_FORMAT_R5G6B5_UNORM_PACK16: + return AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM; + case VK_FORMAT_R16G16B16A16_SFLOAT: + return AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT; + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: + return AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM; + case VK_FORMAT_D16_UNORM: + return AHARDWAREBUFFER_FORMAT_D16_UNORM; + case VK_FORMAT_X8_D24_UNORM_PACK32: + return AHARDWAREBUFFER_FORMAT_D24_UNORM; + case VK_FORMAT_D24_UNORM_S8_UINT: + return AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT; + case VK_FORMAT_D32_SFLOAT: + return AHARDWAREBUFFER_FORMAT_D32_FLOAT; + case VK_FORMAT_D32_SFLOAT_S8_UINT: + return AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT; + case VK_FORMAT_S8_UINT: + return AHARDWAREBUFFER_FORMAT_S8_UINT; + default: + return 0; + } +} + +/* Construct ahw usage mask from image usage bits, see + * 'AHardwareBuffer Usage Equivalence' in Vulkan spec. + */ +uint64_t +vk_image_usage_to_ahb_usage(const VkImageCreateFlags vk_create, + const VkImageUsageFlags vk_usage) +{ + uint64_t ahb_usage = 0; + if (vk_usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + ahb_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE; + + if (vk_usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) + ahb_usage |= AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER; + + if (vk_usage & VK_IMAGE_USAGE_STORAGE_BIT) + ahb_usage |= AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + + if (vk_create & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) + ahb_usage |= AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP; + + if (vk_create & VK_IMAGE_CREATE_PROTECTED_BIT) + ahb_usage |= AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT; + + /* No usage bits set - set at least one GPU usage. */ + if (ahb_usage == 0) + ahb_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE; + + return ahb_usage; +} + +struct AHardwareBuffer * +vk_alloc_ahardware_buffer(const VkMemoryAllocateInfo *pAllocateInfo) +{ + const VkMemoryDedicatedAllocateInfo *dedicated_info = + vk_find_struct_const(pAllocateInfo->pNext, + MEMORY_DEDICATED_ALLOCATE_INFO); + + uint32_t w = 0; + uint32_t h = 1; + uint32_t layers = 1; + uint32_t format = 0; + uint64_t usage = 0; + + /* If caller passed dedicated information. */ + if (dedicated_info && dedicated_info->image) { + VK_FROM_HANDLE(vk_image, image, dedicated_info->image); + + if (!image->ahb_format) + return NULL; + + w = image->extent.width; + h = image->extent.height; + layers = image->array_layers; + format = image->ahb_format; + usage = vk_image_usage_to_ahb_usage(image->create_flags, + image->usage); + } else { + /* AHB export allocation for VkBuffer requires a valid allocationSize */ + assert(pAllocateInfo->allocationSize); + w = pAllocateInfo->allocationSize; + format = AHARDWAREBUFFER_FORMAT_BLOB; + usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER | + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | + AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN; + } + + struct AHardwareBuffer_Desc desc = { + .width = w, + .height = h, + .layers = layers, + .format = format, + .usage = usage, + }; + + struct AHardwareBuffer *ahb; + if (AHardwareBuffer_allocate(&desc, &ahb) != 0) + return NULL; + + return ahb; +} +#endif /* ANDROID_API_LEVEL >= 26 */ + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_AcquireImageANDROID(VkDevice _device, + VkImage image, + int nativeFenceFd, + VkSemaphore semaphore, + VkFence fence) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VkResult result = VK_SUCCESS; + + /* From https://source.android.com/devices/graphics/implement-vulkan : + * + * "The driver takes ownership of the fence file descriptor and closes + * the fence file descriptor when no longer needed. The driver must do + * so even if neither a semaphore or fence object is provided, or even + * if vkAcquireImageANDROID fails and returns an error." + * + * The Vulkan spec for VkImportFence/SemaphoreFdKHR(), however, requires + * the file descriptor to be left alone on failure. + */ + int semaphore_fd = -1, fence_fd = -1; + if (nativeFenceFd >= 0) { + if (semaphore != VK_NULL_HANDLE && fence != VK_NULL_HANDLE) { + /* We have both so we have to import the sync file twice. One of + * them needs to be a dup. + */ + semaphore_fd = nativeFenceFd; + fence_fd = dup(nativeFenceFd); + if (fence_fd < 0) { + VkResult err = (errno == EMFILE) ? VK_ERROR_TOO_MANY_OBJECTS : + VK_ERROR_OUT_OF_HOST_MEMORY; + close(nativeFenceFd); + return vk_error(device, err); + } + } else if (semaphore != VK_NULL_HANDLE) { + semaphore_fd = nativeFenceFd; + } else if (fence != VK_NULL_HANDLE) { + fence_fd = nativeFenceFd; + } else { + /* Nothing to import into so we have to close the file */ + close(nativeFenceFd); + } + } + + if (semaphore != VK_NULL_HANDLE) { + const VkImportSemaphoreFdInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR, + .semaphore = semaphore, + .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + .fd = semaphore_fd, + }; + result = device->dispatch_table.ImportSemaphoreFdKHR(_device, &info); + if (result == VK_SUCCESS) + semaphore_fd = -1; /* The driver took ownership */ + } + + if (result == VK_SUCCESS && fence != VK_NULL_HANDLE) { + const VkImportFenceFdInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR, + .fence = fence, + .flags = VK_FENCE_IMPORT_TEMPORARY_BIT, + .handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, + .fd = fence_fd, + }; + result = device->dispatch_table.ImportFenceFdKHR(_device, &info); + if (result == VK_SUCCESS) + fence_fd = -1; /* The driver took ownership */ + } + + if (semaphore_fd >= 0) + close(semaphore_fd); + if (fence_fd >= 0) + close(fence_fd); + + return result; +} + +static VkResult +vk_anb_semaphore_init_once(struct vk_queue *queue, struct vk_device *device) +{ + if (queue->anb_semaphore != VK_NULL_HANDLE) + return VK_SUCCESS; + + const VkExportSemaphoreCreateInfo export_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, + .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + }; + const VkSemaphoreCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &export_info, + }; + return device->dispatch_table.CreateSemaphore(vk_device_to_handle(device), + &create_info, NULL, + &queue->anb_semaphore); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_QueueSignalReleaseImageANDROID(VkQueue _queue, + uint32_t waitSemaphoreCount, + const VkSemaphore *pWaitSemaphores, + VkImage image, + int *pNativeFenceFd) +{ + VK_FROM_HANDLE(vk_queue, queue, _queue); + struct vk_device *device = queue->base.device; + VkResult result = VK_SUCCESS; + + STACK_ARRAY(VkPipelineStageFlags, stage_flags, MAX2(1, waitSemaphoreCount)); + for (uint32_t i = 0; i < MAX2(1, waitSemaphoreCount); i++) + stage_flags[i] = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + + result = vk_anb_semaphore_init_once(queue, device); + if (result != VK_SUCCESS) { + STACK_ARRAY_FINISH(stage_flags); + return result; + } + + const VkSubmitInfo submit_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .waitSemaphoreCount = waitSemaphoreCount, + .pWaitSemaphores = pWaitSemaphores, + .pWaitDstStageMask = stage_flags, + .signalSemaphoreCount = 1, + .pSignalSemaphores = &queue->anb_semaphore, + }; + result = device->dispatch_table.QueueSubmit(_queue, 1, &submit_info, + VK_NULL_HANDLE); + STACK_ARRAY_FINISH(stage_flags); + if (result != VK_SUCCESS) + return result; + + const VkSemaphoreGetFdInfoKHR get_fd = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + .semaphore = queue->anb_semaphore, + }; + return device->dispatch_table.GetSemaphoreFdKHR(vk_device_to_handle(device), + &get_fd, pNativeFenceFd); +} diff --git a/src/vulkan/runtime/vk_android.h b/src/vulkan/runtime/vk_android.h new file mode 100644 index 00000000000..496b6c54751 --- /dev/null +++ b/src/vulkan/runtime/vk_android.h @@ -0,0 +1,79 @@ +/* + * Copyright © 2023 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_ANDROID_H +#define VK_ANDROID_H + +#include "vulkan/vulkan_core.h" + +#include "util/detect_os.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26 + +VkFormat vk_ahb_format_to_image_format(uint32_t ahb_format); + +uint32_t vk_image_format_to_ahb_format(VkFormat vk_format); + +uint64_t vk_image_usage_to_ahb_usage(const VkImageCreateFlags vk_create, + const VkImageUsageFlags vk_usage); + +struct AHardwareBuffer * +vk_alloc_ahardware_buffer(const VkMemoryAllocateInfo *pAllocateInfo); + +#else /* DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26 */ + +static inline VkFormat +vk_ahb_format_to_image_format(uint32_t ahb_format) +{ + return VK_FORMAT_UNDEFINED; +} + +static inline uint32_t +vk_image_format_to_ahb_format(VkFormat vk_format) +{ + return 0; +} + +static inline uint64_t +vk_image_usage_to_ahb_usage(const VkImageCreateFlags vk_create, + const VkImageUsageFlags vk_usage) +{ + return 0; +} + +static inline struct AHardwareBuffer * +vk_alloc_ahardware_buffer(const VkMemoryAllocateInfo *pAllocateInfo) +{ + return NULL; +} + +#endif /* ANDROID_API_LEVEL >= 26 */ + +#ifdef __cplusplus +} +#endif + +#endif /* VK_ANDROID_H */ diff --git a/src/vulkan/runtime/vk_blend.c b/src/vulkan/runtime/vk_blend.c new file mode 100644 index 00000000000..b7253bb0ea7 --- /dev/null +++ b/src/vulkan/runtime/vk_blend.c @@ -0,0 +1,115 @@ +/* + * Copyright 2023 Valve Corporation + * Copyright 2021 Collabora Ltd. + * SPDX-License-Identifier: MIT + */ + +#include "vk_blend.h" +#include "util/macros.h" + +enum pipe_logicop +vk_logic_op_to_pipe(VkLogicOp in) +{ + switch (in) { + case VK_LOGIC_OP_CLEAR: + return PIPE_LOGICOP_CLEAR; + case VK_LOGIC_OP_AND: + return PIPE_LOGICOP_AND; + case VK_LOGIC_OP_AND_REVERSE: + return PIPE_LOGICOP_AND_REVERSE; + case VK_LOGIC_OP_COPY: + return PIPE_LOGICOP_COPY; + case VK_LOGIC_OP_AND_INVERTED: + return PIPE_LOGICOP_AND_INVERTED; + case VK_LOGIC_OP_NO_OP: + return PIPE_LOGICOP_NOOP; + case VK_LOGIC_OP_XOR: + return PIPE_LOGICOP_XOR; + case VK_LOGIC_OP_OR: + return PIPE_LOGICOP_OR; + case VK_LOGIC_OP_NOR: + return PIPE_LOGICOP_NOR; + case VK_LOGIC_OP_EQUIVALENT: + return PIPE_LOGICOP_EQUIV; + case VK_LOGIC_OP_INVERT: + return PIPE_LOGICOP_INVERT; + case VK_LOGIC_OP_OR_REVERSE: + return PIPE_LOGICOP_OR_REVERSE; + case VK_LOGIC_OP_COPY_INVERTED: + return PIPE_LOGICOP_COPY_INVERTED; + case VK_LOGIC_OP_OR_INVERTED: + return PIPE_LOGICOP_OR_INVERTED; + case VK_LOGIC_OP_NAND: + return PIPE_LOGICOP_NAND; + case VK_LOGIC_OP_SET: + return PIPE_LOGICOP_SET; + default: + unreachable("Invalid logicop"); + } +} + +enum pipe_blend_func +vk_blend_op_to_pipe(VkBlendOp in) +{ + switch (in) { + case VK_BLEND_OP_ADD: + return PIPE_BLEND_ADD; + case VK_BLEND_OP_SUBTRACT: + return PIPE_BLEND_SUBTRACT; + case VK_BLEND_OP_REVERSE_SUBTRACT: + return PIPE_BLEND_REVERSE_SUBTRACT; + case VK_BLEND_OP_MIN: + return PIPE_BLEND_MIN; + case VK_BLEND_OP_MAX: + return PIPE_BLEND_MAX; + default: + unreachable("Invalid blend op"); + } +} + +enum pipe_blendfactor +vk_blend_factor_to_pipe(enum VkBlendFactor vk_factor) +{ + switch (vk_factor) { + case VK_BLEND_FACTOR_ZERO: + return PIPE_BLENDFACTOR_ZERO; + case VK_BLEND_FACTOR_ONE: + return PIPE_BLENDFACTOR_ONE; + case VK_BLEND_FACTOR_SRC_COLOR: + return PIPE_BLENDFACTOR_SRC_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + return PIPE_BLENDFACTOR_INV_SRC_COLOR; + case VK_BLEND_FACTOR_DST_COLOR: + return PIPE_BLENDFACTOR_DST_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + return PIPE_BLENDFACTOR_INV_DST_COLOR; + case VK_BLEND_FACTOR_SRC_ALPHA: + return PIPE_BLENDFACTOR_SRC_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: + return PIPE_BLENDFACTOR_INV_SRC_ALPHA; + case VK_BLEND_FACTOR_DST_ALPHA: + return PIPE_BLENDFACTOR_DST_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + return PIPE_BLENDFACTOR_INV_DST_ALPHA; + case VK_BLEND_FACTOR_CONSTANT_COLOR: + return PIPE_BLENDFACTOR_CONST_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + return PIPE_BLENDFACTOR_INV_CONST_COLOR; + case VK_BLEND_FACTOR_CONSTANT_ALPHA: + return PIPE_BLENDFACTOR_CONST_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + return PIPE_BLENDFACTOR_INV_CONST_ALPHA; + case VK_BLEND_FACTOR_SRC1_COLOR: + return PIPE_BLENDFACTOR_SRC1_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: + return PIPE_BLENDFACTOR_INV_SRC1_COLOR; + case VK_BLEND_FACTOR_SRC1_ALPHA: + return PIPE_BLENDFACTOR_SRC1_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: + return PIPE_BLENDFACTOR_INV_SRC1_ALPHA; + case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: + return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE; + default: + unreachable("Invalid blend factor"); + } +} diff --git a/src/vulkan/runtime/vk_blend.h b/src/vulkan/runtime/vk_blend.h new file mode 100644 index 00000000000..fb50c17eddc --- /dev/null +++ b/src/vulkan/runtime/vk_blend.h @@ -0,0 +1,25 @@ +/* + * Copyright 2023 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#ifndef VK_BLEND_H +#define VK_BLEND_H + +#include <stdbool.h> +#include "util/blend.h" +#include "vulkan/vulkan_core.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum pipe_logicop vk_logic_op_to_pipe(VkLogicOp in); +enum pipe_blend_func vk_blend_op_to_pipe(VkBlendOp in); +enum pipe_blendfactor vk_blend_factor_to_pipe(VkBlendFactor in); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/vulkan/runtime/vk_buffer.c b/src/vulkan/runtime/vk_buffer.c new file mode 100644 index 00000000000..023aafd2177 --- /dev/null +++ b/src/vulkan/runtime/vk_buffer.c @@ -0,0 +1,145 @@ +/* + * Copyright © 2022 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_buffer.h" + +#include "vk_common_entrypoints.h" +#include "vk_alloc.h" +#include "vk_device.h" +#include "vk_util.h" + +void +vk_buffer_init(struct vk_device *device, + struct vk_buffer *buffer, + const VkBufferCreateInfo *pCreateInfo) +{ + vk_object_base_init(device, &buffer->base, VK_OBJECT_TYPE_BUFFER); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + assert(pCreateInfo->size > 0); + + buffer->create_flags = pCreateInfo->flags; + buffer->size = pCreateInfo->size; + buffer->usage = pCreateInfo->usage; + + const VkBufferUsageFlags2CreateInfoKHR *usage2_info = + vk_find_struct_const(pCreateInfo->pNext, + BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR); + if (usage2_info != NULL) + buffer->usage = usage2_info->usage; +} + +void * +vk_buffer_create(struct vk_device *device, + const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size) +{ + struct vk_buffer *buffer = + vk_zalloc2(&device->alloc, alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (buffer == NULL) + return NULL; + + vk_buffer_init(device, buffer, pCreateInfo); + + return buffer; +} + +void +vk_buffer_finish(struct vk_buffer *buffer) +{ + vk_object_base_finish(&buffer->base); +} + +void +vk_buffer_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_buffer *buffer) +{ + vk_object_free(device, alloc, buffer); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetBufferMemoryRequirements(VkDevice _device, + VkBuffer buffer, + VkMemoryRequirements *pMemoryRequirements) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + VkBufferMemoryRequirementsInfo2 info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, + .buffer = buffer, + }; + VkMemoryRequirements2 reqs = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + }; + device->dispatch_table.GetBufferMemoryRequirements2(_device, &info, &reqs); + + *pMemoryRequirements = reqs.memoryRequirements; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetBufferMemoryRequirements2(VkDevice _device, + const VkBufferMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_buffer, buffer, pInfo->buffer); + + VkBufferCreateInfo pCreateInfo = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = NULL, + .usage = buffer->usage, + .size = buffer->size, + .flags = buffer->create_flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = NULL, + }; + VkDeviceBufferMemoryRequirements info = { + .sType = VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS, + .pNext = NULL, + .pCreateInfo = &pCreateInfo, + }; + + device->dispatch_table.GetDeviceBufferMemoryRequirements(_device, &info, pMemoryRequirements); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_BindBufferMemory(VkDevice _device, + VkBuffer buffer, + VkDeviceMemory memory, + VkDeviceSize memoryOffset) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + VkBindBufferMemoryInfo bind = { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, + .buffer = buffer, + .memory = memory, + .memoryOffset = memoryOffset, + }; + + return device->dispatch_table.BindBufferMemory2(_device, 1, &bind); +} diff --git a/src/vulkan/runtime/vk_buffer.h b/src/vulkan/runtime/vk_buffer.h new file mode 100644 index 00000000000..0bd9b19b6fb --- /dev/null +++ b/src/vulkan/runtime/vk_buffer.h @@ -0,0 +1,78 @@ +/* + * Copyright © 2022 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_BUFFER_H +#define VK_BUFFER_H + +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_buffer { + struct vk_object_base base; + + /** VkBufferCreateInfo::flags */ + VkBufferCreateFlags create_flags; + + /** VkBufferCreateInfo::size */ + VkDeviceSize size; + + /** VkBufferCreateInfo::usage or VkBufferUsageFlags2CreateInfoKHR::usage */ + VkBufferUsageFlags2KHR usage; +}; +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_buffer, base, VkBuffer, + VK_OBJECT_TYPE_BUFFER); + +void vk_buffer_init(struct vk_device *device, + struct vk_buffer *buffer, + const VkBufferCreateInfo *pCreateInfo); +void vk_buffer_finish(struct vk_buffer *buffer); + +void *vk_buffer_create(struct vk_device *device, + const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size); +void vk_buffer_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_buffer *buffer); + +static inline uint64_t +vk_buffer_range(const struct vk_buffer *buffer, + uint64_t offset, uint64_t range) +{ + assert(offset <= buffer->size); + if (range == VK_WHOLE_SIZE) { + return buffer->size - offset; + } else { + assert(range + offset >= range); + assert(range + offset <= buffer->size); + return range; + } +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_BUFFER_H */ diff --git a/src/vulkan/runtime/vk_buffer_view.c b/src/vulkan/runtime/vk_buffer_view.c new file mode 100644 index 00000000000..cea62d7de90 --- /dev/null +++ b/src/vulkan/runtime/vk_buffer_view.c @@ -0,0 +1,83 @@ +/* + * Copyright © 2022 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_buffer_view.h" + +#include "vk_alloc.h" +#include "vk_buffer.h" +#include "vk_device.h" +#include "vk_format.h" + +void +vk_buffer_view_init(struct vk_device *device, + struct vk_buffer_view *buffer_view, + const VkBufferViewCreateInfo *pCreateInfo) +{ + VK_FROM_HANDLE(vk_buffer, buffer, pCreateInfo->buffer); + + vk_object_base_init(device, &buffer_view->base, VK_OBJECT_TYPE_BUFFER_VIEW); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); + assert(pCreateInfo->flags == 0); + assert(pCreateInfo->range > 0); + + buffer_view->buffer = buffer; + buffer_view->format = pCreateInfo->format; + buffer_view->offset = pCreateInfo->offset; + buffer_view->range = vk_buffer_range(buffer, pCreateInfo->offset, + pCreateInfo->range); + buffer_view->elements = buffer_view->range / + vk_format_get_blocksize(buffer_view->format); +} + +void * +vk_buffer_view_create(struct vk_device *device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size) +{ + struct vk_buffer_view *buffer_view; + + buffer_view = vk_zalloc2(&device->alloc, alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!buffer_view) + return NULL; + + vk_buffer_view_init(device, buffer_view, pCreateInfo); + + return buffer_view; +} + +void +vk_buffer_view_finish(struct vk_buffer_view *buffer_view) +{ + vk_object_base_finish(&buffer_view->base); +} + +void +vk_buffer_view_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_buffer_view *buffer_view) +{ + vk_object_free(device, alloc, buffer_view); +} diff --git a/src/vulkan/runtime/vk_buffer_view.h b/src/vulkan/runtime/vk_buffer_view.h new file mode 100644 index 00000000000..aa9f4270175 --- /dev/null +++ b/src/vulkan/runtime/vk_buffer_view.h @@ -0,0 +1,75 @@ +/* + * Copyright © 2022 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_BUFFER_VIEW_H +#define VK_BUFFER_VIEW_H + +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_buffer_view { + struct vk_object_base base; + + /** VkBufferViewCreateInfo::buffer */ + struct vk_buffer *buffer; + + /** VkBufferViewCreateInfo::format */ + VkFormat format; + + /** VkBufferViewCreateInfo::offset */ + VkDeviceSize offset; + + /** VkBufferViewCreateInfo::range + * + * This is asserted to be in-range for the attached buffer and will never + * be VK_WHOLE_SIZE. + */ + VkDeviceSize range; + + /* Number of elements in the buffer. This is range divided by the size of + * format, rounded down. + */ + VkDeviceSize elements; +}; +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_buffer_view, base, VkBufferView, + VK_OBJECT_TYPE_BUFFER_VIEW); + +void vk_buffer_view_init(struct vk_device *device, + struct vk_buffer_view *buffer_view, + const VkBufferViewCreateInfo *pCreateInfo); +void vk_buffer_view_finish(struct vk_buffer_view *buffer_view); +void *vk_buffer_view_create(struct vk_device *device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size); +void vk_buffer_view_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_buffer_view *buffer_view); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_BUFFER_VIEW_H */ diff --git a/src/vulkan/runtime/vk_cmd_copy.c b/src/vulkan/runtime/vk_cmd_copy.c new file mode 100644 index 00000000000..5b4ef8a28d2 --- /dev/null +++ b/src/vulkan/runtime/vk_cmd_copy.c @@ -0,0 +1,277 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_util.h" + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdCopyBuffer(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferCopy *pRegions) +{ + /* We don't have a vk_command_buffer object but we can assume, since we're + * using common dispatch, that it's a vk_object of some sort. + */ + struct vk_object_base *disp = (struct vk_object_base *)commandBuffer; + + STACK_ARRAY(VkBufferCopy2, region2s, regionCount); + + for (uint32_t r = 0; r < regionCount; r++) { + region2s[r] = (VkBufferCopy2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2, + .srcOffset = pRegions[r].srcOffset, + .dstOffset = pRegions[r].dstOffset, + .size = pRegions[r].size, + }; + } + + VkCopyBufferInfo2 info = { + .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2, + .srcBuffer = srcBuffer, + .dstBuffer = dstBuffer, + .regionCount = regionCount, + .pRegions = region2s, + }; + + disp->device->dispatch_table.CmdCopyBuffer2(commandBuffer, &info); + + STACK_ARRAY_FINISH(region2s); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdCopyImage(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageCopy *pRegions) +{ + /* We don't have a vk_command_buffer object but we can assume, since we're + * using common dispatch, that it's a vk_object of some sort. + */ + struct vk_object_base *disp = (struct vk_object_base *)commandBuffer; + + STACK_ARRAY(VkImageCopy2, region2s, regionCount); + + for (uint32_t r = 0; r < regionCount; r++) { + region2s[r] = (VkImageCopy2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2, + .srcSubresource = pRegions[r].srcSubresource, + .srcOffset = pRegions[r].srcOffset, + .dstSubresource = pRegions[r].dstSubresource, + .dstOffset = pRegions[r].dstOffset, + .extent = pRegions[r].extent, + }; + } + + VkCopyImageInfo2 info = { + .sType = VK_STRUCTURE_TYPE_COPY_IMAGE_INFO_2, + .srcImage = srcImage, + .srcImageLayout = srcImageLayout, + .dstImage = dstImage, + .dstImageLayout = dstImageLayout, + .regionCount = regionCount, + .pRegions = region2s, + }; + + disp->device->dispatch_table.CmdCopyImage2(commandBuffer, &info); + + STACK_ARRAY_FINISH(region2s); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy *pRegions) +{ + /* We don't have a vk_command_buffer object but we can assume, since we're + * using common dispatch, that it's a vk_object of some sort. + */ + struct vk_object_base *disp = (struct vk_object_base *)commandBuffer; + + STACK_ARRAY(VkBufferImageCopy2, region2s, regionCount); + + for (uint32_t r = 0; r < regionCount; r++) { + region2s[r] = (VkBufferImageCopy2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2, + .bufferOffset = pRegions[r].bufferOffset, + .bufferRowLength = pRegions[r].bufferRowLength, + .bufferImageHeight = pRegions[r].bufferImageHeight, + .imageSubresource = pRegions[r].imageSubresource, + .imageOffset = pRegions[r].imageOffset, + .imageExtent = pRegions[r].imageExtent, + }; + } + + VkCopyBufferToImageInfo2 info = { + .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_TO_IMAGE_INFO_2, + .srcBuffer = srcBuffer, + .dstImage = dstImage, + .dstImageLayout = dstImageLayout, + .regionCount = regionCount, + .pRegions = region2s, + }; + + disp->device->dispatch_table.CmdCopyBufferToImage2(commandBuffer, &info); + + STACK_ARRAY_FINISH(region2s); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy *pRegions) +{ + /* We don't have a vk_command_buffer object but we can assume, since we're + * using common dispatch, that it's a vk_object of some sort. + */ + struct vk_object_base *disp = (struct vk_object_base *)commandBuffer; + + STACK_ARRAY(VkBufferImageCopy2, region2s, regionCount); + + for (uint32_t r = 0; r < regionCount; r++) { + region2s[r] = (VkBufferImageCopy2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2, + .bufferOffset = pRegions[r].bufferOffset, + .bufferRowLength = pRegions[r].bufferRowLength, + .bufferImageHeight = pRegions[r].bufferImageHeight, + .imageSubresource = pRegions[r].imageSubresource, + .imageOffset = pRegions[r].imageOffset, + .imageExtent = pRegions[r].imageExtent, + }; + } + + VkCopyImageToBufferInfo2 info = { + .sType = VK_STRUCTURE_TYPE_COPY_IMAGE_TO_BUFFER_INFO_2, + .srcImage = srcImage, + .srcImageLayout = srcImageLayout, + .dstBuffer = dstBuffer, + .regionCount = regionCount, + .pRegions = region2s, + }; + + disp->device->dispatch_table.CmdCopyImageToBuffer2(commandBuffer, &info); + + STACK_ARRAY_FINISH(region2s); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdBlitImage(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageBlit *pRegions, + VkFilter filter) +{ + /* We don't have a vk_command_buffer object but we can assume, since we're + * using common dispatch, that it's a vk_object of some sort. + */ + struct vk_object_base *disp = (struct vk_object_base *)commandBuffer; + + STACK_ARRAY(VkImageBlit2, region2s, regionCount); + + for (uint32_t r = 0; r < regionCount; r++) { + region2s[r] = (VkImageBlit2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2, + .srcSubresource = pRegions[r].srcSubresource, + .srcOffsets = { + pRegions[r].srcOffsets[0], + pRegions[r].srcOffsets[1], + }, + .dstSubresource = pRegions[r].dstSubresource, + .dstOffsets = { + pRegions[r].dstOffsets[0], + pRegions[r].dstOffsets[1], + }, + }; + } + + VkBlitImageInfo2 info = { + .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2, + .srcImage = srcImage, + .srcImageLayout = srcImageLayout, + .dstImage = dstImage, + .dstImageLayout = dstImageLayout, + .regionCount = regionCount, + .pRegions = region2s, + .filter = filter, + }; + + disp->device->dispatch_table.CmdBlitImage2(commandBuffer, &info); + + STACK_ARRAY_FINISH(region2s); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdResolveImage(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageResolve *pRegions) +{ + /* We don't have a vk_command_buffer object but we can assume, since we're + * using common dispatch, that it's a vk_object of some sort. + */ + struct vk_object_base *disp = (struct vk_object_base *)commandBuffer; + + STACK_ARRAY(VkImageResolve2, region2s, regionCount); + + for (uint32_t r = 0; r < regionCount; r++) { + region2s[r] = (VkImageResolve2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2, + .srcSubresource = pRegions[r].srcSubresource, + .srcOffset = pRegions[r].srcOffset, + .dstSubresource = pRegions[r].dstSubresource, + .dstOffset = pRegions[r].dstOffset, + .extent = pRegions[r].extent, + }; + } + + VkResolveImageInfo2 info = { + .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2, + .srcImage = srcImage, + .srcImageLayout = srcImageLayout, + .dstImage = dstImage, + .dstImageLayout = dstImageLayout, + .regionCount = regionCount, + .pRegions = region2s, + }; + + disp->device->dispatch_table.CmdResolveImage2(commandBuffer, &info); + + STACK_ARRAY_FINISH(region2s); +} diff --git a/src/vulkan/runtime/vk_cmd_enqueue.c b/src/vulkan/runtime/vk_cmd_enqueue.c new file mode 100644 index 00000000000..31ea5589d67 --- /dev/null +++ b/src/vulkan/runtime/vk_cmd_enqueue.c @@ -0,0 +1,471 @@ +/* + * Copyright © 2019 Red Hat. + * Copyright © 2022 Collabora, LTD + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_alloc.h" +#include "vk_cmd_enqueue_entrypoints.h" +#include "vk_command_buffer.h" +#include "vk_device.h" +#include "vk_pipeline_layout.h" +#include "vk_util.h" + +VKAPI_ATTR void VKAPI_CALL +vk_cmd_enqueue_CmdDrawMultiEXT(VkCommandBuffer commandBuffer, + uint32_t drawCount, + const VkMultiDrawInfoEXT *pVertexInfo, + uint32_t instanceCount, + uint32_t firstInstance, + uint32_t stride) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + + struct vk_cmd_queue_entry *cmd = + vk_zalloc(cmd_buffer->cmd_queue.alloc, sizeof(*cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!cmd) + return; + + cmd->type = VK_CMD_DRAW_MULTI_EXT; + list_addtail(&cmd->cmd_link, &cmd_buffer->cmd_queue.cmds); + + cmd->u.draw_multi_ext.draw_count = drawCount; + if (pVertexInfo) { + unsigned i = 0; + cmd->u.draw_multi_ext.vertex_info = + vk_zalloc(cmd_buffer->cmd_queue.alloc, + sizeof(*cmd->u.draw_multi_ext.vertex_info) * drawCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + vk_foreach_multi_draw(draw, i, pVertexInfo, drawCount, stride) { + memcpy(&cmd->u.draw_multi_ext.vertex_info[i], draw, + sizeof(*cmd->u.draw_multi_ext.vertex_info)); + } + } + cmd->u.draw_multi_ext.instance_count = instanceCount; + cmd->u.draw_multi_ext.first_instance = firstInstance; + cmd->u.draw_multi_ext.stride = stride; +} + +VKAPI_ATTR void VKAPI_CALL +vk_cmd_enqueue_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, + uint32_t drawCount, + const VkMultiDrawIndexedInfoEXT *pIndexInfo, + uint32_t instanceCount, + uint32_t firstInstance, + uint32_t stride, + const int32_t *pVertexOffset) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + + struct vk_cmd_queue_entry *cmd = + vk_zalloc(cmd_buffer->cmd_queue.alloc, sizeof(*cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!cmd) + return; + + cmd->type = VK_CMD_DRAW_MULTI_INDEXED_EXT; + list_addtail(&cmd->cmd_link, &cmd_buffer->cmd_queue.cmds); + + cmd->u.draw_multi_indexed_ext.draw_count = drawCount; + + if (pIndexInfo) { + unsigned i = 0; + cmd->u.draw_multi_indexed_ext.index_info = + vk_zalloc(cmd_buffer->cmd_queue.alloc, + sizeof(*cmd->u.draw_multi_indexed_ext.index_info) * drawCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + vk_foreach_multi_draw_indexed(draw, i, pIndexInfo, drawCount, stride) { + cmd->u.draw_multi_indexed_ext.index_info[i].firstIndex = draw->firstIndex; + cmd->u.draw_multi_indexed_ext.index_info[i].indexCount = draw->indexCount; + if (pVertexOffset == NULL) + cmd->u.draw_multi_indexed_ext.index_info[i].vertexOffset = draw->vertexOffset; + } + } + + cmd->u.draw_multi_indexed_ext.instance_count = instanceCount; + cmd->u.draw_multi_indexed_ext.first_instance = firstInstance; + cmd->u.draw_multi_indexed_ext.stride = stride; + + if (pVertexOffset) { + cmd->u.draw_multi_indexed_ext.vertex_offset = + vk_zalloc(cmd_buffer->cmd_queue.alloc, + sizeof(*cmd->u.draw_multi_indexed_ext.vertex_offset), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + memcpy(cmd->u.draw_multi_indexed_ext.vertex_offset, pVertexOffset, + sizeof(*cmd->u.draw_multi_indexed_ext.vertex_offset)); + } +} + +static void +push_descriptors_set_free(struct vk_cmd_queue *queue, + struct vk_cmd_queue_entry *cmd) +{ + struct vk_cmd_push_descriptor_set_khr *pds = &cmd->u.push_descriptor_set_khr; + for (unsigned i = 0; i < pds->descriptor_write_count; i++) { + VkWriteDescriptorSet *entry = &pds->descriptor_writes[i]; + switch (entry->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + vk_free(queue->alloc, (void *)entry->pImageInfo); + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + vk_free(queue->alloc, (void *)entry->pTexelBufferView); + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + default: + vk_free(queue->alloc, (void *)entry->pBufferInfo); + break; + } + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_cmd_enqueue_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t set, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + struct vk_cmd_push_descriptor_set_khr *pds; + + struct vk_cmd_queue_entry *cmd = + vk_zalloc(cmd_buffer->cmd_queue.alloc, sizeof(*cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!cmd) + return; + + pds = &cmd->u.push_descriptor_set_khr; + + cmd->type = VK_CMD_PUSH_DESCRIPTOR_SET_KHR; + cmd->driver_free_cb = push_descriptors_set_free; + list_addtail(&cmd->cmd_link, &cmd_buffer->cmd_queue.cmds); + + pds->pipeline_bind_point = pipelineBindPoint; + pds->layout = layout; + pds->set = set; + pds->descriptor_write_count = descriptorWriteCount; + + if (pDescriptorWrites) { + pds->descriptor_writes = + vk_zalloc(cmd_buffer->cmd_queue.alloc, + sizeof(*pds->descriptor_writes) * descriptorWriteCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + memcpy(pds->descriptor_writes, + pDescriptorWrites, + sizeof(*pds->descriptor_writes) * descriptorWriteCount); + + for (unsigned i = 0; i < descriptorWriteCount; i++) { + switch (pds->descriptor_writes[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + pds->descriptor_writes[i].pImageInfo = + vk_zalloc(cmd_buffer->cmd_queue.alloc, + sizeof(VkDescriptorImageInfo) * pds->descriptor_writes[i].descriptorCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + memcpy((VkDescriptorImageInfo *)pds->descriptor_writes[i].pImageInfo, + pDescriptorWrites[i].pImageInfo, + sizeof(VkDescriptorImageInfo) * pds->descriptor_writes[i].descriptorCount); + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + pds->descriptor_writes[i].pTexelBufferView = + vk_zalloc(cmd_buffer->cmd_queue.alloc, + sizeof(VkBufferView) * pds->descriptor_writes[i].descriptorCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + memcpy((VkBufferView *)pds->descriptor_writes[i].pTexelBufferView, + pDescriptorWrites[i].pTexelBufferView, + sizeof(VkBufferView) * pds->descriptor_writes[i].descriptorCount); + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + default: + pds->descriptor_writes[i].pBufferInfo = + vk_zalloc(cmd_buffer->cmd_queue.alloc, + sizeof(VkDescriptorBufferInfo) * pds->descriptor_writes[i].descriptorCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + memcpy((VkDescriptorBufferInfo *)pds->descriptor_writes[i].pBufferInfo, + pDescriptorWrites[i].pBufferInfo, + sizeof(VkDescriptorBufferInfo) * pds->descriptor_writes[i].descriptorCount); + break; + } + } + } +} + +static void +unref_pipeline_layout(struct vk_cmd_queue *queue, + struct vk_cmd_queue_entry *cmd) +{ + struct vk_command_buffer *cmd_buffer = + container_of(queue, struct vk_command_buffer, cmd_queue); + VK_FROM_HANDLE(vk_pipeline_layout, layout, + cmd->u.bind_descriptor_sets.layout); + + assert(cmd->type == VK_CMD_BIND_DESCRIPTOR_SETS); + + vk_pipeline_layout_unref(cmd_buffer->base.device, layout); +} + +VKAPI_ATTR void VKAPI_CALL +vk_cmd_enqueue_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t *pDynamicOffsets) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + + struct vk_cmd_queue_entry *cmd = + vk_zalloc(cmd_buffer->cmd_queue.alloc, sizeof(*cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!cmd) + return; + + cmd->type = VK_CMD_BIND_DESCRIPTOR_SETS; + list_addtail(&cmd->cmd_link, &cmd_buffer->cmd_queue.cmds); + + /* We need to hold a reference to the descriptor set as long as this + * command is in the queue. Otherwise, it may get deleted out from under + * us before the command is replayed. + */ + vk_pipeline_layout_ref(vk_pipeline_layout_from_handle(layout)); + cmd->u.bind_descriptor_sets.layout = layout; + cmd->driver_free_cb = unref_pipeline_layout; + + cmd->u.bind_descriptor_sets.pipeline_bind_point = pipelineBindPoint; + cmd->u.bind_descriptor_sets.first_set = firstSet; + cmd->u.bind_descriptor_sets.descriptor_set_count = descriptorSetCount; + if (pDescriptorSets) { + cmd->u.bind_descriptor_sets.descriptor_sets = + vk_zalloc(cmd_buffer->cmd_queue.alloc, + sizeof(*cmd->u.bind_descriptor_sets.descriptor_sets) * descriptorSetCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + memcpy(cmd->u.bind_descriptor_sets.descriptor_sets, pDescriptorSets, + sizeof(*cmd->u.bind_descriptor_sets.descriptor_sets) * descriptorSetCount); + } + cmd->u.bind_descriptor_sets.dynamic_offset_count = dynamicOffsetCount; + if (pDynamicOffsets) { + cmd->u.bind_descriptor_sets.dynamic_offsets = + vk_zalloc(cmd_buffer->cmd_queue.alloc, + sizeof(*cmd->u.bind_descriptor_sets.dynamic_offsets) * dynamicOffsetCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + memcpy(cmd->u.bind_descriptor_sets.dynamic_offsets, pDynamicOffsets, + sizeof(*cmd->u.bind_descriptor_sets.dynamic_offsets) * dynamicOffsetCount); + } +} + +#ifdef VK_ENABLE_BETA_EXTENSIONS +static void +dispatch_graph_amdx_free(struct vk_cmd_queue *queue, struct vk_cmd_queue_entry *cmd) +{ + VkDispatchGraphCountInfoAMDX *count_info = cmd->u.dispatch_graph_amdx.count_info; + void *infos = (void *)count_info->infos.hostAddress; + + for (uint32_t i = 0; i < count_info->count; i++) { + VkDispatchGraphInfoAMDX *info = (void *)((const uint8_t *)infos + i * count_info->stride); + vk_free(queue->alloc, (void *)info->payloads.hostAddress); + } + + vk_free(queue->alloc, infos); +} + +VKAPI_ATTR void VKAPI_CALL +vk_cmd_enqueue_CmdDispatchGraphAMDX(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, + const VkDispatchGraphCountInfoAMDX *pCountInfo) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + + if (vk_command_buffer_has_error(cmd_buffer)) + return; + + VkResult result = VK_SUCCESS; + const VkAllocationCallbacks *alloc = cmd_buffer->cmd_queue.alloc; + + struct vk_cmd_queue_entry *cmd = + vk_zalloc(alloc, sizeof(struct vk_cmd_queue_entry), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!cmd) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto err; + } + + cmd->type = VK_CMD_DISPATCH_GRAPH_AMDX; + cmd->driver_free_cb = dispatch_graph_amdx_free; + + cmd->u.dispatch_graph_amdx.scratch = scratch; + + cmd->u.dispatch_graph_amdx.count_info = + vk_zalloc(alloc, sizeof(VkDispatchGraphCountInfoAMDX), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cmd->u.dispatch_graph_amdx.count_info == NULL) + goto err; + + memcpy((void *)cmd->u.dispatch_graph_amdx.count_info, pCountInfo, + sizeof(VkDispatchGraphCountInfoAMDX)); + + uint32_t infos_size = pCountInfo->count * pCountInfo->stride; + void *infos = vk_zalloc(alloc, infos_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->u.dispatch_graph_amdx.count_info->infos.hostAddress = infos; + memcpy(infos, pCountInfo->infos.hostAddress, infos_size); + + for (uint32_t i = 0; i < pCountInfo->count; i++) { + VkDispatchGraphInfoAMDX *info = (void *)((const uint8_t *)infos + i * pCountInfo->stride); + + uint32_t payloads_size = info->payloadCount * info->payloadStride; + void *dst_payload = vk_zalloc(alloc, payloads_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + memcpy(dst_payload, info->payloads.hostAddress, payloads_size); + info->payloads.hostAddress = dst_payload; + } + + list_addtail(&cmd->cmd_link, &cmd_buffer->cmd_queue.cmds); + goto finish; +err: + if (cmd) { + vk_free(alloc, cmd); + dispatch_graph_amdx_free(&cmd_buffer->cmd_queue, cmd); + } + +finish: + if (unlikely(result != VK_SUCCESS)) + vk_command_buffer_set_error(cmd_buffer, result); +} +#endif + +static void +vk_cmd_build_acceleration_structures_khr_free(struct vk_cmd_queue *queue, + struct vk_cmd_queue_entry *cmd) +{ + struct vk_cmd_build_acceleration_structures_khr *build = + &cmd->u.build_acceleration_structures_khr; + + for (uint32_t i = 0; i < build->info_count; i++) { + vk_free(queue->alloc, (void *)build->infos[i].pGeometries); + vk_free(queue->alloc, (void *)build->pp_build_range_infos[i]); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_cmd_enqueue_CmdBuildAccelerationStructuresKHR( + VkCommandBuffer commandBuffer, uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, + const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + + if (vk_command_buffer_has_error(cmd_buffer)) + return; + + struct vk_cmd_queue *queue = &cmd_buffer->cmd_queue; + + struct vk_cmd_queue_entry *cmd = + vk_zalloc(queue->alloc, vk_cmd_queue_type_sizes[VK_CMD_BUILD_ACCELERATION_STRUCTURES_KHR], 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!cmd) + goto err; + + cmd->type = VK_CMD_BUILD_ACCELERATION_STRUCTURES_KHR; + cmd->driver_free_cb = vk_cmd_build_acceleration_structures_khr_free; + + struct vk_cmd_build_acceleration_structures_khr *build = + &cmd->u.build_acceleration_structures_khr; + + build->info_count = infoCount; + if (pInfos) { + build->infos = vk_zalloc(queue->alloc, sizeof(*build->infos) * infoCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!build->infos) + goto err; + + memcpy((VkAccelerationStructureBuildGeometryInfoKHR *)build->infos, pInfos, + sizeof(*build->infos) * (infoCount)); + + for (uint32_t i = 0; i < infoCount; i++) { + uint32_t geometries_size = + build->infos[i].geometryCount * sizeof(VkAccelerationStructureGeometryKHR); + VkAccelerationStructureGeometryKHR *geometries = + vk_zalloc(queue->alloc, geometries_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!geometries) + goto err; + + if (pInfos[i].pGeometries) { + memcpy(geometries, pInfos[i].pGeometries, geometries_size); + } else { + for (uint32_t j = 0; j < build->infos[i].geometryCount; j++) + memcpy(&geometries[j], pInfos[i].ppGeometries[j], sizeof(VkAccelerationStructureGeometryKHR)); + } + + build->infos[i].pGeometries = geometries; + } + } + if (ppBuildRangeInfos) { + build->pp_build_range_infos = + vk_zalloc(queue->alloc, sizeof(*build->pp_build_range_infos) * infoCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!build->pp_build_range_infos) + goto err; + + VkAccelerationStructureBuildRangeInfoKHR **pp_build_range_infos = + (void *)build->pp_build_range_infos; + + for (uint32_t i = 0; i < infoCount; i++) { + uint32_t build_range_size = + build->infos[i].geometryCount * sizeof(VkAccelerationStructureBuildRangeInfoKHR); + VkAccelerationStructureBuildRangeInfoKHR *p_build_range_infos = + vk_zalloc(queue->alloc, build_range_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!p_build_range_infos) + goto err; + + memcpy(p_build_range_infos, ppBuildRangeInfos[i], build_range_size); + + pp_build_range_infos[i] = p_build_range_infos; + } + } + + list_addtail(&cmd->cmd_link, &queue->cmds); + return; + +err: + if (cmd) + vk_cmd_build_acceleration_structures_khr_free(queue, cmd); + + vk_command_buffer_set_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); +} diff --git a/src/vulkan/runtime/vk_command_buffer.c b/src/vulkan/runtime/vk_command_buffer.c new file mode 100644 index 00000000000..f678d9bc0a1 --- /dev/null +++ b/src/vulkan/runtime/vk_command_buffer.c @@ -0,0 +1,372 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_command_buffer.h" + +#include "vk_command_pool.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" + +VkResult +vk_command_buffer_init(struct vk_command_pool *pool, + struct vk_command_buffer *command_buffer, + const struct vk_command_buffer_ops *ops, + VkCommandBufferLevel level) +{ + memset(command_buffer, 0, sizeof(*command_buffer)); + vk_object_base_init(pool->base.device, &command_buffer->base, + VK_OBJECT_TYPE_COMMAND_BUFFER); + + command_buffer->pool = pool; + command_buffer->level = level; + command_buffer->ops = ops; + vk_dynamic_graphics_state_init(&command_buffer->dynamic_graphics_state); + command_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_INITIAL; + command_buffer->record_result = VK_SUCCESS; + vk_cmd_queue_init(&command_buffer->cmd_queue, &pool->alloc); + vk_meta_object_list_init(&command_buffer->meta_objects); + util_dynarray_init(&command_buffer->labels, NULL); + command_buffer->region_begin = true; + + list_add(&command_buffer->pool_link, &pool->command_buffers); + + return VK_SUCCESS; +} + +void +vk_command_buffer_reset(struct vk_command_buffer *command_buffer) +{ + vk_dynamic_graphics_state_clear(&command_buffer->dynamic_graphics_state); + command_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_INITIAL; + command_buffer->record_result = VK_SUCCESS; + vk_command_buffer_reset_render_pass(command_buffer); + vk_cmd_queue_reset(&command_buffer->cmd_queue); + vk_meta_object_list_reset(command_buffer->base.device, + &command_buffer->meta_objects); + util_dynarray_clear(&command_buffer->labels); + command_buffer->region_begin = true; +} + +void +vk_command_buffer_begin(struct vk_command_buffer *command_buffer, + const VkCommandBufferBeginInfo *pBeginInfo) +{ + if (command_buffer->state != MESA_VK_COMMAND_BUFFER_STATE_INITIAL && + command_buffer->ops->reset != NULL) + command_buffer->ops->reset(command_buffer, 0); + + command_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_RECORDING; +} + +VkResult +vk_command_buffer_end(struct vk_command_buffer *command_buffer) +{ + assert(command_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_RECORDING); + + if (vk_command_buffer_has_error(command_buffer)) + command_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_INVALID; + else + command_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_EXECUTABLE; + + return vk_command_buffer_get_record_result(command_buffer); +} + +void +vk_command_buffer_finish(struct vk_command_buffer *command_buffer) +{ + list_del(&command_buffer->pool_link); + vk_command_buffer_reset_render_pass(command_buffer); + vk_cmd_queue_finish(&command_buffer->cmd_queue); + util_dynarray_fini(&command_buffer->labels); + vk_meta_object_list_finish(command_buffer->base.device, + &command_buffer->meta_objects); + vk_object_base_finish(&command_buffer->base); +} + +void +vk_command_buffer_recycle(struct vk_command_buffer *cmd_buffer) +{ + /* Reset, returning resources to the pool. The command buffer object + * itself will be recycled but, if the driver supports returning other + * resources such as batch buffers to the pool, it should do so so they're + * not tied up in recycled command buffer objects. + */ + cmd_buffer->ops->reset(cmd_buffer, + VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + + vk_object_base_recycle(&cmd_buffer->base); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_ResetCommandBuffer(VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + + if (cmd_buffer->state != MESA_VK_COMMAND_BUFFER_STATE_INITIAL) + cmd_buffer->ops->reset(cmd_buffer, flags); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdExecuteCommands(VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer *pCommandBuffers) +{ + VK_FROM_HANDLE(vk_command_buffer, primary, commandBuffer); + const struct vk_device_dispatch_table *disp = + primary->base.device->command_dispatch_table; + + for (uint32_t i = 0; i < commandBufferCount; i++) { + VK_FROM_HANDLE(vk_command_buffer, secondary, pCommandBuffers[i]); + + vk_cmd_queue_execute(&secondary->cmd_queue, commandBuffer, disp); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdBindVertexBuffers2(commandBuffer, firstBinding, bindingCount, + pBuffers, pOffsets, NULL, NULL); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdBindIndexBuffer( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdBindIndexBuffer2KHR(commandBuffer, buffer, offset, + VK_WHOLE_SIZE, indexType); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdDispatch(VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdDispatchBase(commandBuffer, 0, 0, 0, + groupCountX, groupCountY, groupCountZ); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask) +{ + /* Nothing to do here since we only support a single device */ + assert(deviceMask == 0x1); +} + +VkShaderStageFlags +vk_shader_stages_from_bind_point(VkPipelineBindPoint pipelineBindPoint) +{ + switch (pipelineBindPoint) { +#ifdef VK_ENABLE_BETA_EXTENSIONS + case VK_PIPELINE_BIND_POINT_EXECUTION_GRAPH_AMDX: + return VK_SHADER_STAGE_COMPUTE_BIT | MESA_VK_SHADER_STAGE_WORKGRAPH_HACK_BIT_FIXME; +#endif + case VK_PIPELINE_BIND_POINT_COMPUTE: + return VK_SHADER_STAGE_COMPUTE_BIT; + case VK_PIPELINE_BIND_POINT_GRAPHICS: + return VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_TASK_BIT_EXT | VK_SHADER_STAGE_MESH_BIT_EXT; + case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: + return VK_SHADER_STAGE_RAYGEN_BIT_KHR | + VK_SHADER_STAGE_ANY_HIT_BIT_KHR | + VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | + VK_SHADER_STAGE_MISS_BIT_KHR | + VK_SHADER_STAGE_INTERSECTION_BIT_KHR | + VK_SHADER_STAGE_CALLABLE_BIT_KHR; + default: + unreachable("unknown bind point!"); + } + return 0; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdBindDescriptorSets( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) +{ + const VkBindDescriptorSetsInfoKHR two = { + .sType = VK_STRUCTURE_TYPE_BIND_DESCRIPTOR_SETS_INFO_KHR, + .stageFlags = vk_shader_stages_from_bind_point(pipelineBindPoint), + .layout = layout, + .firstSet = firstSet, + .descriptorSetCount = descriptorSetCount, + .pDescriptorSets = pDescriptorSets, + .dynamicOffsetCount = dynamicOffsetCount, + .pDynamicOffsets = pDynamicOffsets + }; + + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdBindDescriptorSets2KHR(commandBuffer, &two); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdPushConstants( + VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void* pValues) +{ + const VkPushConstantsInfoKHR two = { + .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR, + .layout = layout, + .stageFlags = stageFlags, + .offset = offset, + .size = size, + .pValues = pValues, + }; + + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdPushConstants2KHR(commandBuffer, &two); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdPushDescriptorSetKHR( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t set, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet* pDescriptorWrites) +{ + const VkPushDescriptorSetInfoKHR two = { + .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR, + .stageFlags = vk_shader_stages_from_bind_point(pipelineBindPoint), + .layout = layout, + .set = set, + .descriptorWriteCount = descriptorWriteCount, + .pDescriptorWrites = pDescriptorWrites, + }; + + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdPushDescriptorSet2KHR(commandBuffer, &two); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdPushDescriptorSetWithTemplateKHR( + VkCommandBuffer commandBuffer, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + VkPipelineLayout layout, + uint32_t set, + const void* pData) +{ + const VkPushDescriptorSetWithTemplateInfoKHR two = { + .sType = VK_STRUCTURE_TYPE_PUSH_DESCRIPTOR_SET_WITH_TEMPLATE_INFO_KHR, + .descriptorUpdateTemplate = descriptorUpdateTemplate, + .layout = layout, + .set = set, + .pData = pData, + }; + + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdPushDescriptorSetWithTemplate2KHR(commandBuffer, &two); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDescriptorBufferOffsetsEXT( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t firstSet, + uint32_t setCount, + const uint32_t* pBufferIndices, + const VkDeviceSize* pOffsets) +{ + const VkSetDescriptorBufferOffsetsInfoEXT two = { + .sType = VK_STRUCTURE_TYPE_SET_DESCRIPTOR_BUFFER_OFFSETS_INFO_EXT, + .stageFlags = vk_shader_stages_from_bind_point(pipelineBindPoint), + .layout = layout, + .firstSet = firstSet, + .setCount = setCount, + .pBufferIndices = pBufferIndices, + .pOffsets = pOffsets + }; + + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdSetDescriptorBufferOffsets2EXT(commandBuffer, &two); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdBindDescriptorBufferEmbeddedSamplersEXT( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t set) +{ + const VkBindDescriptorBufferEmbeddedSamplersInfoEXT two = { + .sType = VK_STRUCTURE_TYPE_BIND_DESCRIPTOR_BUFFER_EMBEDDED_SAMPLERS_INFO_EXT, + .stageFlags = vk_shader_stages_from_bind_point(pipelineBindPoint), + .layout = layout, + .set = set + }; + + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdBindDescriptorBufferEmbeddedSamplers2EXT(commandBuffer, &two); +} diff --git a/src/vulkan/runtime/vk_command_buffer.h b/src/vulkan/runtime/vk_command_buffer.h new file mode 100644 index 00000000000..5ff51bbf578 --- /dev/null +++ b/src/vulkan/runtime/vk_command_buffer.h @@ -0,0 +1,253 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_COMMAND_BUFFER_H +#define VK_COMMAND_BUFFER_H + +#include "vk_cmd_queue.h" +#include "vk_graphics_state.h" +#include "vk_log.h" +#include "vk_meta.h" +#include "vk_object.h" +#include "util/list.h" +#include "util/u_dynarray.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_command_pool; +struct vk_framebuffer; +struct vk_image_view; +struct vk_render_pass; + +struct vk_attachment_view_state { + VkImageLayout layout; + VkImageLayout stencil_layout; + const VkSampleLocationsInfoEXT *sample_locations; +}; + +struct vk_attachment_state { + struct vk_image_view *image_view; + + /** A running tally of which views have been loaded */ + uint32_t views_loaded; + + /** Per-view state */ + struct vk_attachment_view_state views[MESA_VK_MAX_MULTIVIEW_VIEW_COUNT]; + + /** VkRenderPassBeginInfo::pClearValues[i] */ + VkClearValue clear_value; +}; + +/** Command buffer ops */ +struct vk_command_buffer_ops { + /** Creates a command buffer + * + * Used by the common command pool implementation. This function MUST + * call `vk_command_buffer_finish()`. Notably, this function does not + * receive any additional parameters such as the level. The level will be + * set by `vk_common_AllocateCommandBuffers()` and the driver must not rely + * on it until `vkBeginCommandBuffer()` time. + */ + VkResult (*create)(struct vk_command_pool *, VkCommandBufferLevel, + struct vk_command_buffer **); + + /** Resets the command buffer + * + * Used by the common command pool implementation. This function MUST + * call `vk_command_buffer_reset()`. Unlike `vkResetCommandBuffer()`, + * this function does not have a return value because it may be called on + * destruction paths. + */ + void (*reset)(struct vk_command_buffer *, VkCommandBufferResetFlags); + + /** Destroys the command buffer + * + * Used by the common command pool implementation. This function MUST + * call `vk_command_buffer_finish()`. + */ + void (*destroy)(struct vk_command_buffer *); +}; + +enum mesa_vk_command_buffer_state { + MESA_VK_COMMAND_BUFFER_STATE_INVALID, + MESA_VK_COMMAND_BUFFER_STATE_INITIAL, + MESA_VK_COMMAND_BUFFER_STATE_RECORDING, + MESA_VK_COMMAND_BUFFER_STATE_EXECUTABLE, + MESA_VK_COMMAND_BUFFER_STATE_PENDING, +}; + +/* this needs spec fixes */ +#define MESA_VK_SHADER_STAGE_WORKGRAPH_HACK_BIT_FIXME (1<<30) +VkShaderStageFlags vk_shader_stages_from_bind_point(VkPipelineBindPoint pipelineBindPoint); + +struct vk_command_buffer { + struct vk_object_base base; + + struct vk_command_pool *pool; + + /** VkCommandBufferAllocateInfo::level */ + VkCommandBufferLevel level; + + const struct vk_command_buffer_ops *ops; + + struct vk_dynamic_graphics_state dynamic_graphics_state; + + /** State of the command buffer */ + enum mesa_vk_command_buffer_state state; + + /** Command buffer recording error state. */ + VkResult record_result; + + /** Link in vk_command_pool::command_buffers if pool != NULL */ + struct list_head pool_link; + + /** Command list for emulated secondary command buffers */ + struct vk_cmd_queue cmd_queue; + + /** Object list for meta objects */ + struct vk_meta_object_list meta_objects; + + /** + * VK_EXT_debug_utils + * + * The next two fields represent debug labels storage. + * + * VK_EXT_debug_utils spec requires that upon triggering a debug message + * with a command buffer attached to it, all "active" labels will also be + * provided to the callback. The spec describes two distinct ways of + * attaching a debug label to the command buffer: opening a label region + * and inserting a single label. + * + * Label region is active between the corresponding `*BeginDebugUtilsLabel` + * and `*EndDebugUtilsLabel` calls. The spec doesn't mention any limits on + * nestedness of label regions. This implementation assumes that there + * aren't any. + * + * The spec, however, doesn't explain the lifetime of a label submitted by + * an `*InsertDebugUtilsLabel` call. The LunarG whitepaper [1] (pp 12-15) + * provides a more detailed explanation along with some examples. According + * to those, such label remains active until the next `*DebugUtilsLabel` + * call. This means that there can be no more than one such label at a + * time. + * + * ``labels`` contains all active labels at this point in order of + * submission ``region_begin`` denotes whether the most recent label opens + * a new region If ``labels`` is empty ``region_begin`` must be true. + * + * Anytime we modify labels, we first check for ``region_begin``. If it's + * false, it means that the most recent label was submitted by + * `*InsertDebugUtilsLabel` and we need to remove it before doing anything + * else. + * + * See the discussion here: + * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10318#note_1061317 + * + * [1] https://www.lunarg.com/wp-content/uploads/2018/05/Vulkan-Debug-Utils_05_18_v1.pdf + */ + struct util_dynarray labels; + bool region_begin; + + struct vk_render_pass *render_pass; + uint32_t subpass_idx; + struct vk_framebuffer *framebuffer; + VkRect2D render_area; + + /** + * True if we are currently inside a CmdPipelineBarrier() is inserted by + * the runtime's vk_render_pass.c + */ + bool runtime_rp_barrier; + + /* This uses the same trick as STACK_ARRAY */ + struct vk_attachment_state *attachments; + struct vk_attachment_state _attachments[8]; + + VkRenderPassSampleLocationsBeginInfoEXT *pass_sample_locations; + + /** + * Bitmask of shader stages bound via a vk_pipeline since the last call to + * vkBindShadersEXT(). + * + * Used by the common vk_pipeline implementation + */ + VkShaderStageFlags pipeline_shader_stages; +}; + +VK_DEFINE_HANDLE_CASTS(vk_command_buffer, base, VkCommandBuffer, + VK_OBJECT_TYPE_COMMAND_BUFFER) + +VkResult MUST_CHECK +vk_command_buffer_init(struct vk_command_pool *pool, + struct vk_command_buffer *command_buffer, + const struct vk_command_buffer_ops *ops, + VkCommandBufferLevel level); + +void +vk_command_buffer_reset_render_pass(struct vk_command_buffer *cmd_buffer); + +void +vk_command_buffer_reset(struct vk_command_buffer *command_buffer); + +void +vk_command_buffer_recycle(struct vk_command_buffer *command_buffer); + +void +vk_command_buffer_begin(struct vk_command_buffer *command_buffer, + const VkCommandBufferBeginInfo *pBeginInfo); + +VkResult +vk_command_buffer_end(struct vk_command_buffer *command_buffer); + +void +vk_command_buffer_finish(struct vk_command_buffer *command_buffer); + +static inline VkResult +__vk_command_buffer_set_error(struct vk_command_buffer *command_buffer, + VkResult error, const char *file, int line) +{ + assert(error != VK_SUCCESS); + error = __vk_errorf(command_buffer, error, file, line, NULL); + if (command_buffer->record_result == VK_SUCCESS) + command_buffer->record_result = error; + return error; +} + +#define vk_command_buffer_set_error(command_buffer, error) \ + __vk_command_buffer_set_error(command_buffer, error, __FILE__, __LINE__) + +static inline VkResult +vk_command_buffer_get_record_result(struct vk_command_buffer *command_buffer) +{ + return command_buffer->record_result; +} + +#define vk_command_buffer_has_error(command_buffer) \ + unlikely((command_buffer)->record_result != VK_SUCCESS) + +#ifdef __cplusplus +} +#endif + +#endif /* VK_COMMAND_BUFFER_H */ diff --git a/src/vulkan/runtime/vk_command_pool.c b/src/vulkan/runtime/vk_command_pool.c new file mode 100644 index 00000000000..4481a52da80 --- /dev/null +++ b/src/vulkan/runtime/vk_command_pool.c @@ -0,0 +1,266 @@ +/* + * Copyright © 2015 Intel Corporation + * Copyright © 2022 Collabora, Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_command_pool.h" + +#include "vk_alloc.h" +#include "vk_command_buffer.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_log.h" + +static bool +should_recycle_command_buffers(struct vk_device *device) +{ + /* They have to be using the common allocation implementation, otherwise + * the recycled command buffers will never actually get re-used + */ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + if (disp->AllocateCommandBuffers != vk_common_AllocateCommandBuffers) + return false; + + /* We need to be able to reset command buffers */ + if (device->command_buffer_ops->reset == NULL) + return false; + + return true; +} + +VkResult MUST_CHECK +vk_command_pool_init(struct vk_device *device, + struct vk_command_pool *pool, + const VkCommandPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator) +{ + memset(pool, 0, sizeof(*pool)); + vk_object_base_init(device, &pool->base, + VK_OBJECT_TYPE_COMMAND_POOL); + + pool->flags = pCreateInfo->flags; + pool->queue_family_index = pCreateInfo->queueFamilyIndex; + pool->alloc = pAllocator ? *pAllocator : device->alloc; + pool->command_buffer_ops = device->command_buffer_ops; + pool->recycle_command_buffers = should_recycle_command_buffers(device); + list_inithead(&pool->command_buffers); + list_inithead(&pool->free_command_buffers); + + return VK_SUCCESS; +} + +void +vk_command_pool_finish(struct vk_command_pool *pool) +{ + list_for_each_entry_safe(struct vk_command_buffer, cmd_buffer, + &pool->command_buffers, pool_link) { + cmd_buffer->ops->destroy(cmd_buffer); + } + assert(list_is_empty(&pool->command_buffers)); + + list_for_each_entry_safe(struct vk_command_buffer, cmd_buffer, + &pool->free_command_buffers, pool_link) { + cmd_buffer->ops->destroy(cmd_buffer); + } + assert(list_is_empty(&pool->free_command_buffers)); + + vk_object_base_finish(&pool->base); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateCommandPool(VkDevice _device, + const VkCommandPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkCommandPool *pCommandPool) +{ + VK_FROM_HANDLE(vk_device, device, _device); + struct vk_command_pool *pool; + VkResult result; + + pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + result = vk_command_pool_init(device, pool, pCreateInfo, pAllocator); + if (unlikely(result != VK_SUCCESS)) { + vk_free2(&device->alloc, pAllocator, pool); + return result; + } + + *pCommandPool = vk_command_pool_to_handle(pool); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyCommandPool(VkDevice _device, + VkCommandPool commandPool, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_command_pool, pool, commandPool); + + if (pool == NULL) + return; + + vk_command_pool_finish(pool); + vk_free2(&device->alloc, pAllocator, pool); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_ResetCommandPool(VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) +{ + VK_FROM_HANDLE(vk_command_pool, pool, commandPool); + const struct vk_device_dispatch_table *disp = + &pool->base.device->dispatch_table; + +#define COPY_FLAG(flag) \ + if (flags & VK_COMMAND_POOL_RESET_##flag) \ + cb_flags |= VK_COMMAND_BUFFER_RESET_##flag + + VkCommandBufferResetFlags cb_flags = 0; + COPY_FLAG(RELEASE_RESOURCES_BIT); + +#undef COPY_FLAG + + list_for_each_entry_safe(struct vk_command_buffer, cmd_buffer, + &pool->command_buffers, pool_link) { + VkResult result = + disp->ResetCommandBuffer(vk_command_buffer_to_handle(cmd_buffer), + cb_flags); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +static void +vk_command_buffer_recycle_or_destroy(struct vk_command_pool *pool, + struct vk_command_buffer *cmd_buffer) +{ + assert(pool == cmd_buffer->pool); + + if (pool->recycle_command_buffers) { + vk_command_buffer_recycle(cmd_buffer); + + list_del(&cmd_buffer->pool_link); + list_add(&cmd_buffer->pool_link, &pool->free_command_buffers); + } else { + cmd_buffer->ops->destroy(cmd_buffer); + } +} + +static struct vk_command_buffer * +vk_command_pool_find_free(struct vk_command_pool *pool) +{ + if (list_is_empty(&pool->free_command_buffers)) + return NULL; + + struct vk_command_buffer *cmd_buffer = + list_first_entry(&pool->free_command_buffers, + struct vk_command_buffer, pool_link); + + list_del(&cmd_buffer->pool_link); + list_addtail(&cmd_buffer->pool_link, &pool->command_buffers); + + return cmd_buffer; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_AllocateCommandBuffers(VkDevice device, + const VkCommandBufferAllocateInfo *pAllocateInfo, + VkCommandBuffer *pCommandBuffers) +{ + VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool); + VkResult result; + uint32_t i; + + assert(device == vk_device_to_handle(pool->base.device)); + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + struct vk_command_buffer *cmd_buffer = vk_command_pool_find_free(pool); + if (cmd_buffer == NULL) { + result = pool->command_buffer_ops->create(pool, pAllocateInfo->level, &cmd_buffer); + if (unlikely(result != VK_SUCCESS)) + goto fail; + } + + cmd_buffer->level = pAllocateInfo->level; + + pCommandBuffers[i] = vk_command_buffer_to_handle(cmd_buffer); + } + + return VK_SUCCESS; + +fail: + while (i--) { + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, pCommandBuffers[i]); + vk_command_buffer_recycle_or_destroy(pool, cmd_buffer); + } + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) + pCommandBuffers[i] = VK_NULL_HANDLE; + + return result; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_FreeCommandBuffers(VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer *pCommandBuffers) +{ + VK_FROM_HANDLE(vk_command_pool, pool, commandPool); + + for (uint32_t i = 0; i < commandBufferCount; i++) { + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, pCommandBuffers[i]); + + if (cmd_buffer == NULL) + continue; + + vk_command_buffer_recycle_or_destroy(pool, cmd_buffer); + } +} + +void +vk_command_pool_trim(struct vk_command_pool *pool, + VkCommandPoolTrimFlags flags) +{ + list_for_each_entry_safe(struct vk_command_buffer, cmd_buffer, + &pool->free_command_buffers, pool_link) { + cmd_buffer->ops->destroy(cmd_buffer); + } + assert(list_is_empty(&pool->free_command_buffers)); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_TrimCommandPool(VkDevice device, + VkCommandPool commandPool, + VkCommandPoolTrimFlags flags) +{ + VK_FROM_HANDLE(vk_command_pool, pool, commandPool); + + vk_command_pool_trim(pool, flags); +} diff --git a/src/vulkan/runtime/vk_command_pool.h b/src/vulkan/runtime/vk_command_pool.h new file mode 100644 index 00000000000..104cfab7265 --- /dev/null +++ b/src/vulkan/runtime/vk_command_pool.h @@ -0,0 +1,104 @@ +/* + * Copyright © 2022 Collabora, Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_COMMAND_POOL_H +#define VK_COMMAND_POOL_H + +#include "vk_object.h" +#include "util/list.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Base object for implementin VkCommandPool */ +struct vk_command_pool { + struct vk_object_base base; + + /** VkCommandPoolCreateInfo::flags */ + VkCommandPoolCreateFlags flags; + + /** VkCommandPoolCreateInfo::queueFamilyIndex */ + uint32_t queue_family_index; + + /** Allocator passed to vkCreateCommandPool() */ + VkAllocationCallbacks alloc; + + /** Command buffer vtable for command buffers allocated from this pool */ + const struct vk_command_buffer_ops *command_buffer_ops; + + /** True if we should recycle command buffers */ + bool recycle_command_buffers; + + /** List of all command buffers */ + struct list_head command_buffers; + + /** List of freed command buffers for trimming. */ + struct list_head free_command_buffers; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_command_pool, base, VkCommandPool, + VK_OBJECT_TYPE_COMMAND_POOL); + +/** Initialize a vk_command_pool + * + * :param device: |in| The Vulkan device + * :param pool: |out| The command pool to initialize + * :param pCreateInfo: |in| VkCommandPoolCreateInfo pointer passed to + * `vkCreateCommandPool()` + * :param pAllocator: |in| Allocation callbacks passed to + * `vkCreateCommandPool()` + */ +VkResult MUST_CHECK +vk_command_pool_init(struct vk_device *device, + struct vk_command_pool *pool, + const VkCommandPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator); + +/** Tear down a vk_command_pool + * + * :param pool: |inout| The command pool to tear down + */ +void +vk_command_pool_finish(struct vk_command_pool *pool); + +/** Trim a vk_command_pool + * + * This discards any resources that may be cached by the common + * vk_command_pool code. For driver-implemented command pools, drivers should + * call this function inside their `vkTrimCommandPool()` implementation. This + * should be called before doing any driver-specific trimming in case it ends + * up returning driver-internal resources to the pool. + * + * :param pool: |inout| The command pool to trim + * :param flags: |in| Flags controling the trim operation + */ +void +vk_command_pool_trim(struct vk_command_pool *pool, + VkCommandPoolTrimFlags flags); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_COMMAND_POOL_H */ diff --git a/src/vulkan/runtime/vk_debug_report.c b/src/vulkan/runtime/vk_debug_report.c new file mode 100644 index 00000000000..6712ba6d1c8 --- /dev/null +++ b/src/vulkan/runtime/vk_debug_report.c @@ -0,0 +1,158 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_debug_report.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_instance.h" +#include "vk_util.h" + +struct vk_debug_report_callback { + struct vk_object_base base; + + /* Link in the 'callbacks' list in anv_instance struct. */ + struct list_head link; + VkDebugReportFlagsEXT flags; + PFN_vkDebugReportCallbackEXT callback; + void * data; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_debug_report_callback, base, + VkDebugReportCallbackEXT, + VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT) + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateDebugReportCallbackEXT(VkInstance _instance, + const VkDebugReportCallbackCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDebugReportCallbackEXT *pCallback) +{ + VK_FROM_HANDLE(vk_instance, instance, _instance); + + struct vk_debug_report_callback *cb = + vk_alloc2(&instance->alloc, pAllocator, + sizeof(struct vk_debug_report_callback), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!cb) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + vk_object_base_instance_init(instance, &cb->base, + VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT); + + cb->flags = pCreateInfo->flags; + cb->callback = pCreateInfo->pfnCallback; + cb->data = pCreateInfo->pUserData; + + mtx_lock(&instance->debug_report.callbacks_mutex); + list_addtail(&cb->link, &instance->debug_report.callbacks); + mtx_unlock(&instance->debug_report.callbacks_mutex); + + *pCallback = vk_debug_report_callback_to_handle(cb); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyDebugReportCallbackEXT(VkInstance _instance, + VkDebugReportCallbackEXT _callback, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_instance, instance, _instance); + VK_FROM_HANDLE(vk_debug_report_callback, callback, _callback); + + if (callback == NULL) + return; + + /* Remove from list and destroy given callback. */ + mtx_lock(&instance->debug_report.callbacks_mutex); + list_del(&callback->link); + vk_object_base_finish(&callback->base); + vk_free2(&instance->alloc, pAllocator, callback); + mtx_unlock(&instance->debug_report.callbacks_mutex); +} + +static void +debug_report(struct vk_instance *instance, + VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT object_type, + uint64_t handle, + size_t location, + int32_t messageCode, + const char* pLayerPrefix, + const char *pMessage) +{ + /* Allow NULL for convinience, return if no callbacks registered. */ + if (!instance || list_is_empty(&instance->debug_report.callbacks)) + return; + + mtx_lock(&instance->debug_report.callbacks_mutex); + + /* Section 33.2 of the Vulkan 1.0.59 spec says: + * + * "callback is an externally synchronized object and must not be + * used on more than one thread at a time. This means that + * vkDestroyDebugReportCallbackEXT must not be called when a callback + * is active." + */ + list_for_each_entry(struct vk_debug_report_callback, cb, + &instance->debug_report.callbacks, link) { + if (cb->flags & flags) + cb->callback(flags, object_type, handle, location, messageCode, + pLayerPrefix, pMessage, cb->data); + } + + mtx_unlock(&instance->debug_report.callbacks_mutex); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DebugReportMessageEXT(VkInstance _instance, + VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objectType, + uint64_t object, + size_t location, + int32_t messageCode, + const char* pLayerPrefix, + const char* pMessage) +{ + VK_FROM_HANDLE(vk_instance, instance, _instance); + debug_report(instance, flags, objectType, + object, location, messageCode, pLayerPrefix, pMessage); +} + +void +vk_debug_report(struct vk_instance *instance, + VkDebugReportFlagsEXT flags, + const struct vk_object_base *object, + size_t location, + int32_t messageCode, + const char* pLayerPrefix, + const char *pMessage) +{ + VkObjectType object_type = + object ? object->type : VK_OBJECT_TYPE_UNKNOWN; + debug_report(instance, flags, (VkDebugReportObjectTypeEXT)object_type, + (uint64_t)(uintptr_t)object, location, messageCode, + pLayerPrefix, pMessage); +} diff --git a/src/vulkan/runtime/vk_debug_report.h b/src/vulkan/runtime/vk_debug_report.h new file mode 100644 index 00000000000..ca208bb5f26 --- /dev/null +++ b/src/vulkan/runtime/vk_debug_report.h @@ -0,0 +1,48 @@ +/* + * Copyright © 2018, Google Inc. + * + * based on the anv driver which is: + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_DEBUG_REPORT_H +#define VK_DEBUG_REPORT_H + +#include "vk_instance.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void +vk_debug_report(struct vk_instance *instance, + VkDebugReportFlagsEXT flags, + const struct vk_object_base *object, + size_t location, + int32_t messageCode, + const char* pLayerPrefix, + const char *pMessage); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_DEBUG_REPORT_H */ diff --git a/src/vulkan/runtime/vk_debug_utils.c b/src/vulkan/runtime/vk_debug_utils.c new file mode 100644 index 00000000000..2c083ab8937 --- /dev/null +++ b/src/vulkan/runtime/vk_debug_utils.c @@ -0,0 +1,459 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_debug_utils.h" + +#include "vk_common_entrypoints.h" +#include "vk_command_buffer.h" +#include "vk_device.h" +#include "vk_queue.h" +#include "vk_object.h" +#include "vk_alloc.h" +#include "vk_util.h" +#include "stdarg.h" +#include "util/u_dynarray.h" + +void +vk_debug_message(struct vk_instance *instance, + VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT types, + const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData) +{ + mtx_lock(&instance->debug_utils.callbacks_mutex); + + list_for_each_entry(struct vk_debug_utils_messenger, messenger, + &instance->debug_utils.callbacks, link) { + if ((messenger->severity & severity) && + (messenger->type & types)) + messenger->callback(severity, types, pCallbackData, messenger->data); + } + + mtx_unlock(&instance->debug_utils.callbacks_mutex); +} + +/* This function intended to be used by the drivers to report a + * message to the special messenger, provided in the pNext chain while + * creating an instance. It's only meant to be used during + * vkCreateInstance or vkDestroyInstance calls. + */ +void +vk_debug_message_instance(struct vk_instance *instance, + VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT types, + const char *pMessageIdName, + int32_t messageIdNumber, + const char *pMessage) +{ + if (list_is_empty(&instance->debug_utils.instance_callbacks)) + return; + + const VkDebugUtilsMessengerCallbackDataEXT cbData = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CALLBACK_DATA_EXT, + .pMessageIdName = pMessageIdName, + .messageIdNumber = messageIdNumber, + .pMessage = pMessage, + }; + + list_for_each_entry(struct vk_debug_utils_messenger, messenger, + &instance->debug_utils.instance_callbacks, link) { + if ((messenger->severity & severity) && + (messenger->type & types)) + messenger->callback(severity, types, &cbData, messenger->data); + } +} + +void +vk_address_binding_report(struct vk_instance *instance, + struct vk_object_base *object, + uint64_t base_address, + uint64_t size, + VkDeviceAddressBindingTypeEXT type) +{ + if (list_is_empty(&instance->debug_utils.callbacks)) + return; + + VkDeviceAddressBindingCallbackDataEXT addr_binding = { + .sType = VK_STRUCTURE_TYPE_DEVICE_ADDRESS_BINDING_CALLBACK_DATA_EXT, + .flags = object->client_visible ? 0 : VK_DEVICE_ADDRESS_BINDING_INTERNAL_OBJECT_BIT_EXT, + .baseAddress = base_address, + .size = size, + .bindingType = type, + }; + + VkDebugUtilsObjectNameInfoEXT object_name_info = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .pNext = NULL, + .objectType = object->type, + .objectHandle = (uint64_t)(uintptr_t)object, + .pObjectName = object->object_name, + }; + + VkDebugUtilsMessengerCallbackDataEXT cb_data = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CALLBACK_DATA_EXT, + .pNext = &addr_binding, + .objectCount = 1, + .pObjects = &object_name_info, + }; + + vk_debug_message(instance, VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT, + VK_DEBUG_UTILS_MESSAGE_TYPE_DEVICE_ADDRESS_BINDING_BIT_EXT, + &cb_data); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateDebugUtilsMessengerEXT( + VkInstance _instance, + const VkDebugUtilsMessengerCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDebugUtilsMessengerEXT *pMessenger) +{ + VK_FROM_HANDLE(vk_instance, instance, _instance); + + struct vk_debug_utils_messenger *messenger = + vk_alloc2(&instance->alloc, pAllocator, + sizeof(struct vk_debug_utils_messenger), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!messenger) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + if (pAllocator) + messenger->alloc = *pAllocator; + else + messenger->alloc = instance->alloc; + + vk_object_base_init(NULL, &messenger->base, + VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT); + + messenger->severity = pCreateInfo->messageSeverity; + messenger->type = pCreateInfo->messageType; + messenger->callback = pCreateInfo->pfnUserCallback; + messenger->data = pCreateInfo->pUserData; + + mtx_lock(&instance->debug_utils.callbacks_mutex); + list_addtail(&messenger->link, &instance->debug_utils.callbacks); + mtx_unlock(&instance->debug_utils.callbacks_mutex); + + *pMessenger = vk_debug_utils_messenger_to_handle(messenger); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_SubmitDebugUtilsMessageEXT( + VkInstance _instance, + VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + VkDebugUtilsMessageTypeFlagsEXT messageTypes, + const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData) +{ + VK_FROM_HANDLE(vk_instance, instance, _instance); + + vk_debug_message(instance, messageSeverity, messageTypes, pCallbackData); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyDebugUtilsMessengerEXT( + VkInstance _instance, + VkDebugUtilsMessengerEXT _messenger, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_instance, instance, _instance); + VK_FROM_HANDLE(vk_debug_utils_messenger, messenger, _messenger); + + if (messenger == NULL) + return; + + mtx_lock(&instance->debug_utils.callbacks_mutex); + list_del(&messenger->link); + mtx_unlock(&instance->debug_utils.callbacks_mutex); + + vk_object_base_finish(&messenger->base); + vk_free2(&instance->alloc, pAllocator, messenger); +} + +static VkResult +vk_common_set_object_name_locked( + struct vk_device *device, + const VkDebugUtilsObjectNameInfoEXT *pNameInfo) +{ + if (unlikely(device->swapchain_name == NULL)) { + /* Even though VkSwapchain/Surface are non-dispatchable objects, we know + * a priori that these are actually pointers so we can use + * the pointer hash table for them. + */ + device->swapchain_name = _mesa_pointer_hash_table_create(NULL); + if (device->swapchain_name == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + char *object_name = vk_strdup(&device->alloc, pNameInfo->pObjectName, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (object_name == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + struct hash_entry *entry = + _mesa_hash_table_search(device->swapchain_name, + (void *)(uintptr_t)pNameInfo->objectHandle); + if (unlikely(entry == NULL)) { + entry = _mesa_hash_table_insert(device->swapchain_name, + (void *)(uintptr_t)pNameInfo->objectHandle, + object_name); + if (entry == NULL) { + vk_free(&device->alloc, object_name); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + } else { + vk_free(&device->alloc, entry->data); + entry->data = object_name; + } + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_DebugMarkerSetObjectNameEXT( + VkDevice _device, + const VkDebugMarkerObjectNameInfoEXT *pNameInfo) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + assert(pNameInfo->sType == VK_STRUCTURE_TYPE_DEBUG_MARKER_OBJECT_NAME_INFO_EXT); + + VkObjectType object_type; + switch (pNameInfo->objectType) { + case VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT: + object_type = VK_OBJECT_TYPE_SURFACE_KHR; + break; + case VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT: + object_type = VK_OBJECT_TYPE_SWAPCHAIN_KHR; + break; + case VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT: + object_type = VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT; + break; + case VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_KHR_EXT: + object_type = VK_OBJECT_TYPE_DISPLAY_KHR; + break; + case VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT: + object_type = VK_OBJECT_TYPE_DISPLAY_MODE_KHR; + break; + case VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT: + object_type = VK_OBJECT_TYPE_VALIDATION_CACHE_EXT; + break; + default: + object_type = (VkObjectType)pNameInfo->objectType; + break; + } + + VkDebugUtilsObjectNameInfoEXT name_info = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .objectType = object_type, + .objectHandle = pNameInfo->object, + .pObjectName = pNameInfo->pObjectName, + }; + + return device->dispatch_table.SetDebugUtilsObjectNameEXT(_device, &name_info); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_SetDebugUtilsObjectNameEXT( + VkDevice _device, + const VkDebugUtilsObjectNameInfoEXT *pNameInfo) +{ + VK_FROM_HANDLE(vk_device, device, _device); + +#if DETECT_OS_ANDROID + if (pNameInfo->objectType == VK_OBJECT_TYPE_SWAPCHAIN_KHR || + pNameInfo->objectType == VK_OBJECT_TYPE_SURFACE_KHR) { +#else + if (pNameInfo->objectType == VK_OBJECT_TYPE_SURFACE_KHR) { +#endif + mtx_lock(&device->swapchain_name_mtx); + VkResult res = vk_common_set_object_name_locked(device, pNameInfo); + mtx_unlock(&device->swapchain_name_mtx); + return res; + } + + struct vk_object_base *object = + vk_object_base_from_u64_handle(pNameInfo->objectHandle, + pNameInfo->objectType); + + assert(object->device != NULL || object->instance != NULL); + VkAllocationCallbacks *alloc = object->device != NULL ? + &object->device->alloc : &object->instance->alloc; + if (object->object_name) { + vk_free(alloc, object->object_name); + object->object_name = NULL; + } + object->object_name = vk_strdup(alloc, pNameInfo->pObjectName, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!object->object_name) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_SetDebugUtilsObjectTagEXT( + VkDevice _device, + const VkDebugUtilsObjectTagInfoEXT *pTagInfo) +{ + /* no-op */ + return VK_SUCCESS; +} + +static void +vk_common_append_debug_label(struct vk_device *device, + struct util_dynarray *labels, + const VkDebugUtilsLabelEXT *pLabelInfo) +{ + util_dynarray_append(labels, VkDebugUtilsLabelEXT, *pLabelInfo); + VkDebugUtilsLabelEXT *current_label = + util_dynarray_top_ptr(labels, VkDebugUtilsLabelEXT); + current_label->pLabelName = + vk_strdup(&device->alloc, current_label->pLabelName, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); +} + +static void +vk_common_pop_debug_label(struct vk_device *device, + struct util_dynarray *labels) +{ + if (labels->size == 0) + return; + + VkDebugUtilsLabelEXT previous_label = + util_dynarray_pop(labels, VkDebugUtilsLabelEXT); + vk_free(&device->alloc, (void *)previous_label.pLabelName); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdBeginDebugUtilsLabelEXT( + VkCommandBuffer _commandBuffer, + const VkDebugUtilsLabelEXT *pLabelInfo) +{ + VK_FROM_HANDLE(vk_command_buffer, command_buffer, _commandBuffer); + + /* If the latest label was submitted by CmdInsertDebugUtilsLabelEXT, we + * should remove it first. + */ + if (!command_buffer->region_begin) { + vk_common_pop_debug_label(command_buffer->base.device, + &command_buffer->labels); + } + + vk_common_append_debug_label(command_buffer->base.device, + &command_buffer->labels, + pLabelInfo); + command_buffer->region_begin = true; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer) +{ + VK_FROM_HANDLE(vk_command_buffer, command_buffer, _commandBuffer); + + /* If the latest label was submitted by CmdInsertDebugUtilsLabelEXT, we + * should remove it first. + */ + if (!command_buffer->region_begin) { + vk_common_pop_debug_label(command_buffer->base.device, + &command_buffer->labels); + } + + vk_common_pop_debug_label(command_buffer->base.device, + &command_buffer->labels); + command_buffer->region_begin = true; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdInsertDebugUtilsLabelEXT( + VkCommandBuffer _commandBuffer, + const VkDebugUtilsLabelEXT *pLabelInfo) +{ + VK_FROM_HANDLE(vk_command_buffer, command_buffer, _commandBuffer); + + /* If the latest label was submitted by CmdInsertDebugUtilsLabelEXT, we + * should remove it first. + */ + if (!command_buffer->region_begin) { + vk_common_append_debug_label(command_buffer->base.device, + &command_buffer->labels, + pLabelInfo); + } + + vk_common_append_debug_label(command_buffer->base.device, + &command_buffer->labels, + pLabelInfo); + command_buffer->region_begin = false; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_QueueBeginDebugUtilsLabelEXT( + VkQueue _queue, + const VkDebugUtilsLabelEXT *pLabelInfo) +{ + VK_FROM_HANDLE(vk_queue, queue, _queue); + + /* If the latest label was submitted by QueueInsertDebugUtilsLabelEXT, we + * should remove it first. + */ + if (!queue->region_begin) + (void)util_dynarray_pop(&queue->labels, VkDebugUtilsLabelEXT); + + vk_common_append_debug_label(queue->base.device, + &queue->labels, + pLabelInfo); + queue->region_begin = true; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_QueueEndDebugUtilsLabelEXT(VkQueue _queue) +{ + VK_FROM_HANDLE(vk_queue, queue, _queue); + + /* If the latest label was submitted by QueueInsertDebugUtilsLabelEXT, we + * should remove it first. + */ + if (!queue->region_begin) + vk_common_pop_debug_label(queue->base.device, &queue->labels); + + vk_common_pop_debug_label(queue->base.device, &queue->labels); + queue->region_begin = true; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_QueueInsertDebugUtilsLabelEXT( + VkQueue _queue, + const VkDebugUtilsLabelEXT *pLabelInfo) +{ + VK_FROM_HANDLE(vk_queue, queue, _queue); + + /* If the latest label was submitted by QueueInsertDebugUtilsLabelEXT, we + * should remove it first. + */ + if (!queue->region_begin) + vk_common_pop_debug_label(queue->base.device, &queue->labels); + + vk_common_append_debug_label(queue->base.device, + &queue->labels, + pLabelInfo); + queue->region_begin = false; +} diff --git a/src/vulkan/runtime/vk_debug_utils.h b/src/vulkan/runtime/vk_debug_utils.h new file mode 100644 index 00000000000..7f27be47461 --- /dev/null +++ b/src/vulkan/runtime/vk_debug_utils.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_DEBUG_UTILS_H +#define VK_DEBUG_UTILS_H + +#include "vk_instance.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_debug_utils_messenger { + struct vk_object_base base; + VkAllocationCallbacks alloc; + + struct list_head link; + + VkDebugUtilsMessageSeverityFlagsEXT severity; + VkDebugUtilsMessageTypeFlagsEXT type; + PFN_vkDebugUtilsMessengerCallbackEXT callback; + void *data; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_debug_utils_messenger, base, + VkDebugUtilsMessengerEXT, + VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT) + +void +vk_debug_message(struct vk_instance *instance, + VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT types, + const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData); + +void +vk_debug_message_instance(struct vk_instance *instance, + VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT types, + const char *pMessageIdName, + int32_t messageIdNumber, + const char *pMessage); + +void +vk_address_binding_report(struct vk_instance *instance, + struct vk_object_base *object, + uint64_t base_address, + uint64_t size, + VkDeviceAddressBindingTypeEXT type); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_DEBUG_UTILS_H */ diff --git a/src/vulkan/runtime/vk_deferred_operation.c b/src/vulkan/runtime/vk_deferred_operation.c new file mode 100644 index 00000000000..a9f6e0d269b --- /dev/null +++ b/src/vulkan/runtime/vk_deferred_operation.c @@ -0,0 +1,85 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_deferred_operation.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateDeferredOperationKHR(VkDevice _device, + const VkAllocationCallbacks *pAllocator, + VkDeferredOperationKHR *pDeferredOperation) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + struct vk_deferred_operation *op = + vk_alloc2(&device->alloc, pAllocator, sizeof(*op), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (op == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + vk_object_base_init(device, &op->base, + VK_OBJECT_TYPE_DEFERRED_OPERATION_KHR); + + *pDeferredOperation = vk_deferred_operation_to_handle(op); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyDeferredOperationKHR(VkDevice _device, + VkDeferredOperationKHR operation, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_deferred_operation, op, operation); + + if (op == NULL) + return; + + vk_object_base_finish(&op->base); + vk_free2(&device->alloc, pAllocator, op); +} + +VKAPI_ATTR uint32_t VKAPI_CALL +vk_common_GetDeferredOperationMaxConcurrencyKHR(UNUSED VkDevice device, + UNUSED VkDeferredOperationKHR operation) +{ + return 1; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetDeferredOperationResultKHR(UNUSED VkDevice device, + UNUSED VkDeferredOperationKHR operation) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_DeferredOperationJoinKHR(UNUSED VkDevice device, + UNUSED VkDeferredOperationKHR operation) +{ + return VK_SUCCESS; +} diff --git a/src/vulkan/runtime/vk_deferred_operation.h b/src/vulkan/runtime/vk_deferred_operation.h new file mode 100644 index 00000000000..588db8085f2 --- /dev/null +++ b/src/vulkan/runtime/vk_deferred_operation.h @@ -0,0 +1,47 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_DEFERRED_OPERATION_H +#define VK_DEFERRED_OPERATION_H + +#include "vk_object.h" + +#include "c11/threads.h" +#include "util/list.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_deferred_operation { + struct vk_object_base base; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_deferred_operation, base, + VkDeferredOperationKHR, + VK_OBJECT_TYPE_DEFERRED_OPERATION_KHR) + +#ifdef __cplusplus +} +#endif + +#endif /* VK_DEFERRED_OPERATION_H */ diff --git a/src/vulkan/runtime/vk_descriptor_set_layout.c b/src/vulkan/runtime/vk_descriptor_set_layout.c new file mode 100644 index 00000000000..9e657bec4c5 --- /dev/null +++ b/src/vulkan/runtime/vk_descriptor_set_layout.c @@ -0,0 +1,96 @@ +/* + * Copyright © 2022 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_descriptor_set_layout.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" + +static void +vk_descriptor_set_layout_init(struct vk_device *device, + struct vk_descriptor_set_layout *layout) +{ + vk_object_base_init(device, &layout->base, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT); + + layout->ref_cnt = 1; + layout->destroy = vk_descriptor_set_layout_destroy; +} + +void * +vk_descriptor_set_layout_zalloc(struct vk_device *device, size_t size) +{ + /* Because we're reference counting and lifetimes may not be what the + * client expects, these have to be allocated off the device and not as + * their own object. + */ + struct vk_descriptor_set_layout *layout = + vk_zalloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!layout) + return NULL; + + vk_descriptor_set_layout_init(device, layout); + + return layout; +} + +void * +vk_descriptor_set_layout_multizalloc(struct vk_device *device, + struct vk_multialloc *ma) +{ + /* Because we're reference counting and lifetimes may not be what the + * client expects, these have to be allocated off the device and not as + * their own object. + */ + struct vk_descriptor_set_layout *layout = + vk_multialloc_zalloc(ma, &device->alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!layout) + return NULL; + + vk_descriptor_set_layout_init(device, layout); + + return layout; +} + +void +vk_descriptor_set_layout_destroy(struct vk_device *device, + struct vk_descriptor_set_layout *layout) +{ + vk_object_free(device, NULL, layout); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyDescriptorSetLayout(VkDevice _device, + VkDescriptorSetLayout descriptorSetLayout, + UNUSED const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_descriptor_set_layout, layout, descriptorSetLayout); + + if (layout == NULL) + return; + + vk_descriptor_set_layout_unref(device, layout); +} diff --git a/src/vulkan/runtime/vk_descriptor_set_layout.h b/src/vulkan/runtime/vk_descriptor_set_layout.h new file mode 100644 index 00000000000..b01f30157e4 --- /dev/null +++ b/src/vulkan/runtime/vk_descriptor_set_layout.h @@ -0,0 +1,99 @@ +/* + * Copyright © 2022 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_DESCRIPTOR_SET_LAYOUT_H +#define VK_DESCRIPTOR_SET_LAYOUT_H + +#include "vk_object.h" + +#include "util/mesa-blake3.h" +#include "util/u_atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_descriptor_set_layout { + struct vk_object_base base; + + /* BLAKE3 hash of the descriptor set layout. This is used by the common + * pipeline code to properly cache shaders, including handling pipeline + * layouts. It must be populated by the driver or you risk pipeline cache + * collisions. + */ + blake3_hash blake3; + + void (*destroy)(struct vk_device *device, + struct vk_descriptor_set_layout *layout); + + /** Reference count + * + * It's often necessary to store a pointer to the descriptor set layout in + * the descriptor so that any entrypoint which has access to a descriptor + * set also has the layout. While layouts are often passed into various + * entrypoints, they're notably missing from vkUpdateDescriptorSets(). In + * order to implement descriptor writes, you either need to stash a pointer + * to the descriptor set layout in the descriptor set or you need to copy + * all of the relevant information. Storing a pointer is a lot cheaper. + * + * Because descriptor set layout lifetimes and descriptor set lifetimes are + * not guaranteed to coincide, we have to reference count if we're going to + * do this. + */ + uint32_t ref_cnt; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_descriptor_set_layout, base, + VkDescriptorSetLayout, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT); + +void *vk_descriptor_set_layout_zalloc(struct vk_device *device, size_t size); + +void *vk_descriptor_set_layout_multizalloc(struct vk_device *device, + struct vk_multialloc *ma); + +void vk_descriptor_set_layout_destroy(struct vk_device *device, + struct vk_descriptor_set_layout *layout); + +static inline struct vk_descriptor_set_layout * +vk_descriptor_set_layout_ref(struct vk_descriptor_set_layout *layout) +{ + assert(layout && layout->ref_cnt >= 1); + p_atomic_inc(&layout->ref_cnt); + return layout; +} + +static inline void +vk_descriptor_set_layout_unref(struct vk_device *device, + struct vk_descriptor_set_layout *layout) +{ + assert(layout && layout->ref_cnt >= 1); + if (p_atomic_dec_zero(&layout->ref_cnt)) + layout->destroy(device, layout); +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_DESCRIPTOR_SET_LAYOUT_H */ + diff --git a/src/vulkan/runtime/vk_descriptor_update_template.c b/src/vulkan/runtime/vk_descriptor_update_template.c new file mode 100644 index 00000000000..2d4ff52f25e --- /dev/null +++ b/src/vulkan/runtime/vk_descriptor_update_template.c @@ -0,0 +1,97 @@ +/* + * Copyright © 2017 Intel Corporation + * Copyright © 2022 Collabora, Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_descriptor_update_template.h" + +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_log.h" + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateDescriptorUpdateTemplate(VkDevice _device, + const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate) +{ + VK_FROM_HANDLE(vk_device, device, _device); + struct vk_descriptor_update_template *template; + + uint32_t entry_count = 0; + for (uint32_t i = 0; i < pCreateInfo->descriptorUpdateEntryCount; i++) { + if (pCreateInfo->pDescriptorUpdateEntries[i].descriptorCount > 0) + entry_count++; + } + + size_t size = sizeof(*template) + entry_count * sizeof(template->entries[0]); + template = vk_object_alloc(device, pAllocator, size, + VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE); + if (template == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + template->type = pCreateInfo->templateType; + template->bind_point = pCreateInfo->pipelineBindPoint; + + if (template->type == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET) + template->set = pCreateInfo->set; + + uint32_t entry_idx = 0; + template->entry_count = entry_count; + for (uint32_t i = 0; i < pCreateInfo->descriptorUpdateEntryCount; i++) { + const VkDescriptorUpdateTemplateEntry *pEntry = + &pCreateInfo->pDescriptorUpdateEntries[i]; + + if (pEntry->descriptorCount == 0) + continue; + + template->entries[entry_idx++] = (struct vk_descriptor_template_entry) { + .type = pEntry->descriptorType, + .binding = pEntry->dstBinding, + .array_element = pEntry->dstArrayElement, + .array_count = pEntry->descriptorCount, + .offset = pEntry->offset, + .stride = pEntry->stride, + }; + } + assert(entry_idx == entry_count); + + *pDescriptorUpdateTemplate = + vk_descriptor_update_template_to_handle(template); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyDescriptorUpdateTemplate(VkDevice _device, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_descriptor_update_template, template, + descriptorUpdateTemplate); + + if (!template) + return; + + vk_object_free(device, pAllocator, template); +} diff --git a/src/vulkan/runtime/vk_descriptor_update_template.h b/src/vulkan/runtime/vk_descriptor_update_template.h new file mode 100644 index 00000000000..ee4ccdf3006 --- /dev/null +++ b/src/vulkan/runtime/vk_descriptor_update_template.h @@ -0,0 +1,90 @@ +/* + * Copyright © 2017 Intel Corporation + * Copyright © 2022 Collabora, Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_DESCRIPTOR_UPDATE_TEMPLATE_H +#define VK_DESCRIPTOR_UPDATE_TEMPLATE_H + +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_descriptor_template_entry { + /** VkDescriptorUpdateTemplateEntry::descriptorType */ + VkDescriptorType type; + + /** VkDescriptorUpdateTemplateEntry::dstBinding */ + uint32_t binding; + + /** VkDescriptorUpdateTemplateEntry::dstArrayElement */ + uint32_t array_element; + + /** VkDescriptorUpdateTemplateEntry::descriptorCount */ + uint32_t array_count; + + /** VkDescriptorUpdateTemplateEntry::offset + * + * Offset into the user provided data */ + size_t offset; + + /** VkDescriptorUpdateTemplateEntry::stride + * + * Stride between elements into the user provided data + */ + size_t stride; +}; + +struct vk_descriptor_update_template { + struct vk_object_base base; + + /** VkDescriptorUpdateTemplateCreateInfo::templateType */ + VkDescriptorUpdateTemplateType type; + + /** VkDescriptorUpdateTemplateCreateInfo::pipelineBindPoint */ + VkPipelineBindPoint bind_point; + + /** VkDescriptorUpdateTemplateCreateInfo::set + * + * The descriptor set this template corresponds to. This value is only + * valid if the template was created with the templateType + * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET. + */ + uint8_t set; + + /** VkDescriptorUpdateTemplateCreateInfo::descriptorUpdateEntryCount */ + uint32_t entry_count; + + /** Entries of the template */ + struct vk_descriptor_template_entry entries[0]; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_descriptor_update_template, base, + VkDescriptorUpdateTemplate, + VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE) + +#ifdef __cplusplus +} +#endif + +#endif /* VK_DESCRIPTOR_UPDATE_TEMPLATE_H */ diff --git a/src/vulkan/runtime/vk_descriptors.c b/src/vulkan/runtime/vk_descriptors.c new file mode 100644 index 00000000000..ff79db9b390 --- /dev/null +++ b/src/vulkan/runtime/vk_descriptors.c @@ -0,0 +1,103 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdlib.h> +#include <string.h> +#include "vk_descriptors.h" +#include "vk_common_entrypoints.h" +#include "util/macros.h" + +static int +binding_compare(const void* av, const void *bv) +{ + const VkDescriptorSetLayoutBinding *a = (const VkDescriptorSetLayoutBinding*)av; + const VkDescriptorSetLayoutBinding *b = (const VkDescriptorSetLayoutBinding*)bv; + + return (a->binding < b->binding) ? -1 : (a->binding > b->binding) ? 1 : 0; +} + +VkResult +vk_create_sorted_bindings(const VkDescriptorSetLayoutBinding *bindings, unsigned count, + VkDescriptorSetLayoutBinding **sorted_bindings) +{ + if (!count) { + *sorted_bindings = NULL; + return VK_SUCCESS; + } + + *sorted_bindings = malloc(count * sizeof(VkDescriptorSetLayoutBinding)); + if (!*sorted_bindings) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + memcpy(*sorted_bindings, bindings, count * sizeof(VkDescriptorSetLayoutBinding)); + qsort(*sorted_bindings, count, sizeof(VkDescriptorSetLayoutBinding), binding_compare); + + return VK_SUCCESS; +} + +/* + * For drivers that don't have mutable state in buffers, images, image views, or + * samplers, there's no need to save/restore anything to get the same + * descriptor back as long as the user uses the same GPU virtual address. In + * this case, the following EXT_descriptor_buffer functions are trivial. + */ +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetBufferOpaqueCaptureDescriptorDataEXT(VkDevice device, + const VkBufferCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetImageOpaqueCaptureDescriptorDataEXT(VkDevice device, + const VkImageCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetImageViewOpaqueCaptureDescriptorDataEXT(VkDevice device, + const VkImageViewCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetSamplerOpaqueCaptureDescriptorDataEXT(VkDevice _device, + const VkSamplerCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetAccelerationStructureOpaqueCaptureDescriptorDataEXT(VkDevice device, + const VkAccelerationStructureCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} diff --git a/src/vulkan/runtime/vk_descriptors.h b/src/vulkan/runtime/vk_descriptors.h new file mode 100644 index 00000000000..220787ece37 --- /dev/null +++ b/src/vulkan/runtime/vk_descriptors.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_DESCRIPTORS_H +#define VK_DESCRIPTORS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <vulkan/vulkan_core.h> + +VkResult +vk_create_sorted_bindings(const VkDescriptorSetLayoutBinding *bindings, unsigned count, + VkDescriptorSetLayoutBinding **sorted_bindings); + +#ifdef __cplusplus +} +#endif + + + +#endif diff --git a/src/vulkan/runtime/vk_device.c b/src/vulkan/runtime/vk_device.c new file mode 100644 index 00000000000..31cb331e9ab --- /dev/null +++ b/src/vulkan/runtime/vk_device.c @@ -0,0 +1,812 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_device.h" + +#include "vk_common_entrypoints.h" +#include "vk_instance.h" +#include "vk_log.h" +#include "vk_physical_device.h" +#include "vk_queue.h" +#include "vk_sync.h" +#include "vk_sync_timeline.h" +#include "vk_util.h" +#include "util/u_debug.h" +#include "util/hash_table.h" +#include "util/perf/cpu_trace.h" +#include "util/ralloc.h" + +static enum vk_device_timeline_mode +get_timeline_mode(struct vk_physical_device *physical_device) +{ + if (physical_device->supported_sync_types == NULL) + return VK_DEVICE_TIMELINE_MODE_NONE; + + const struct vk_sync_type *timeline_type = NULL; + for (const struct vk_sync_type *const *t = + physical_device->supported_sync_types; *t; t++) { + if ((*t)->features & VK_SYNC_FEATURE_TIMELINE) { + /* We can only have one timeline mode */ + assert(timeline_type == NULL); + timeline_type = *t; + } + } + + if (timeline_type == NULL) + return VK_DEVICE_TIMELINE_MODE_NONE; + + if (vk_sync_type_is_vk_sync_timeline(timeline_type)) + return VK_DEVICE_TIMELINE_MODE_EMULATED; + + if (timeline_type->features & VK_SYNC_FEATURE_WAIT_BEFORE_SIGNAL) + return VK_DEVICE_TIMELINE_MODE_NATIVE; + + /* For assisted mode, we require a few additional things of all sync types + * which may be used as semaphores. + */ + for (const struct vk_sync_type *const *t = + physical_device->supported_sync_types; *t; t++) { + if ((*t)->features & VK_SYNC_FEATURE_GPU_WAIT) { + assert((*t)->features & VK_SYNC_FEATURE_WAIT_PENDING); + if ((*t)->features & VK_SYNC_FEATURE_BINARY) + assert((*t)->features & VK_SYNC_FEATURE_CPU_RESET); + } + } + + return VK_DEVICE_TIMELINE_MODE_ASSISTED; +} + +static void +collect_enabled_features(struct vk_device *device, + const VkDeviceCreateInfo *pCreateInfo) +{ + if (pCreateInfo->pEnabledFeatures) + vk_set_physical_device_features_1_0(&device->enabled_features, pCreateInfo->pEnabledFeatures); + vk_set_physical_device_features(&device->enabled_features, pCreateInfo->pNext); +} + +VkResult +vk_device_init(struct vk_device *device, + struct vk_physical_device *physical_device, + const struct vk_device_dispatch_table *dispatch_table, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc) +{ + memset(device, 0, sizeof(*device)); + vk_object_base_init(device, &device->base, VK_OBJECT_TYPE_DEVICE); + if (alloc != NULL) + device->alloc = *alloc; + else + device->alloc = physical_device->instance->alloc; + + device->physical = physical_device; + + if (dispatch_table) { + device->dispatch_table = *dispatch_table; + + /* Add common entrypoints without overwriting driver-provided ones. */ + vk_device_dispatch_table_from_entrypoints( + &device->dispatch_table, &vk_common_device_entrypoints, false); + } + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + int idx; + for (idx = 0; idx < VK_DEVICE_EXTENSION_COUNT; idx++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + vk_device_extensions[idx].extensionName) == 0) + break; + } + + if (idx >= VK_DEVICE_EXTENSION_COUNT) + return vk_errorf(physical_device, VK_ERROR_EXTENSION_NOT_PRESENT, + "%s not supported", + pCreateInfo->ppEnabledExtensionNames[i]); + + if (!physical_device->supported_extensions.extensions[idx]) + return vk_errorf(physical_device, VK_ERROR_EXTENSION_NOT_PRESENT, + "%s not supported", + pCreateInfo->ppEnabledExtensionNames[i]); + +#ifdef ANDROID_STRICT + if (!vk_android_allowed_device_extensions.extensions[idx]) + return vk_errorf(physical_device, VK_ERROR_EXTENSION_NOT_PRESENT, + "%s not supported", + pCreateInfo->ppEnabledExtensionNames[i]); +#endif + + device->enabled_extensions.extensions[idx] = true; + } + + VkResult result = + vk_physical_device_check_device_features(physical_device, + pCreateInfo); + if (result != VK_SUCCESS) + return result; + + collect_enabled_features(device, pCreateInfo); + + p_atomic_set(&device->private_data_next_index, 0); + + list_inithead(&device->queues); + + device->drm_fd = -1; + device->mem_cache = NULL; + + device->timeline_mode = get_timeline_mode(physical_device); + + switch (device->timeline_mode) { + case VK_DEVICE_TIMELINE_MODE_NONE: + case VK_DEVICE_TIMELINE_MODE_NATIVE: + device->submit_mode = VK_QUEUE_SUBMIT_MODE_IMMEDIATE; + break; + + case VK_DEVICE_TIMELINE_MODE_EMULATED: + device->submit_mode = VK_QUEUE_SUBMIT_MODE_DEFERRED; + break; + + case VK_DEVICE_TIMELINE_MODE_ASSISTED: + if (debug_get_bool_option("MESA_VK_ENABLE_SUBMIT_THREAD", false)) { + device->submit_mode = VK_QUEUE_SUBMIT_MODE_THREADED; + } else { + device->submit_mode = VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND; + } + break; + + default: + unreachable("Invalid timeline mode"); + } + +#if DETECT_OS_ANDROID + mtx_init(&device->swapchain_private_mtx, mtx_plain); + device->swapchain_private = NULL; +#endif /* DETECT_OS_ANDROID */ + + simple_mtx_init(&device->trace_mtx, mtx_plain); + + return VK_SUCCESS; +} + +void +vk_device_finish(struct vk_device *device) +{ + /* Drivers should tear down their own queues */ + assert(list_is_empty(&device->queues)); + + vk_memory_trace_finish(device); + +#if DETECT_OS_ANDROID + if (device->swapchain_private) { + hash_table_foreach(device->swapchain_private, entry) + util_sparse_array_finish(entry->data); + ralloc_free(device->swapchain_private); + } +#endif /* DETECT_OS_ANDROID */ + + simple_mtx_destroy(&device->trace_mtx); + + vk_object_base_finish(&device->base); +} + +void +vk_device_enable_threaded_submit(struct vk_device *device) +{ + /* This must be called before any queues are created */ + assert(list_is_empty(&device->queues)); + + /* In order to use threaded submit, we need every sync type that can be + * used as a wait fence for vkQueueSubmit() to support WAIT_PENDING. + * It's required for cross-thread/process submit re-ordering. + */ + for (const struct vk_sync_type *const *t = + device->physical->supported_sync_types; *t; t++) { + if ((*t)->features & VK_SYNC_FEATURE_GPU_WAIT) + assert((*t)->features & VK_SYNC_FEATURE_WAIT_PENDING); + } + + /* Any binary vk_sync types which will be used as permanent semaphore + * payloads also need to support vk_sync_type::move, but that's a lot + * harder to assert since it only applies to permanent semaphore payloads. + */ + + if (device->submit_mode != VK_QUEUE_SUBMIT_MODE_THREADED) + device->submit_mode = VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND; +} + +VkResult +vk_device_flush(struct vk_device *device) +{ + if (device->submit_mode != VK_QUEUE_SUBMIT_MODE_DEFERRED) + return VK_SUCCESS; + + bool progress; + do { + progress = false; + + vk_foreach_queue(queue, device) { + uint32_t queue_submit_count; + VkResult result = vk_queue_flush(queue, &queue_submit_count); + if (unlikely(result != VK_SUCCESS)) + return result; + + if (queue_submit_count) + progress = true; + } + } while (progress); + + return VK_SUCCESS; +} + +static const char * +timeline_mode_str(struct vk_device *device) +{ + switch (device->timeline_mode) { +#define CASE(X) case VK_DEVICE_TIMELINE_MODE_##X: return #X; + CASE(NONE) + CASE(EMULATED) + CASE(ASSISTED) + CASE(NATIVE) +#undef CASE + default: return "UNKNOWN"; + } +} + +void +_vk_device_report_lost(struct vk_device *device) +{ + assert(p_atomic_read(&device->_lost.lost) > 0); + + device->_lost.reported = true; + + vk_foreach_queue(queue, device) { + if (queue->_lost.lost) { + __vk_errorf(queue, VK_ERROR_DEVICE_LOST, + queue->_lost.error_file, queue->_lost.error_line, + "%s", queue->_lost.error_msg); + } + } + + vk_logd(VK_LOG_OBJS(device), "Timeline mode is %s.", + timeline_mode_str(device)); +} + +VkResult +_vk_device_set_lost(struct vk_device *device, + const char *file, int line, + const char *msg, ...) +{ + /* This flushes out any per-queue device lost messages */ + if (vk_device_is_lost(device)) + return VK_ERROR_DEVICE_LOST; + + p_atomic_inc(&device->_lost.lost); + device->_lost.reported = true; + + va_list ap; + va_start(ap, msg); + __vk_errorv(device, VK_ERROR_DEVICE_LOST, file, line, msg, ap); + va_end(ap); + + vk_logd(VK_LOG_OBJS(device), "Timeline mode is %s.", + timeline_mode_str(device)); + + if (debug_get_bool_option("MESA_VK_ABORT_ON_DEVICE_LOSS", false)) + abort(); + + return VK_ERROR_DEVICE_LOST; +} + +PFN_vkVoidFunction +vk_device_get_proc_addr(const struct vk_device *device, + const char *name) +{ + if (device == NULL || name == NULL) + return NULL; + + struct vk_instance *instance = device->physical->instance; + return vk_device_dispatch_table_get_if_supported(&device->dispatch_table, + name, + instance->app_info.api_version, + &instance->enabled_extensions, + &device->enabled_extensions); +} + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_common_GetDeviceProcAddr(VkDevice _device, + const char *pName) +{ + VK_FROM_HANDLE(vk_device, device, _device); + return vk_device_get_proc_addr(device, pName); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetDeviceQueue(VkDevice _device, + uint32_t queueFamilyIndex, + uint32_t queueIndex, + VkQueue *pQueue) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + const VkDeviceQueueInfo2 info = { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2, + .pNext = NULL, + /* flags = 0 because (Vulkan spec 1.2.170 - vkGetDeviceQueue): + * + * "vkGetDeviceQueue must only be used to get queues that were + * created with the flags parameter of VkDeviceQueueCreateInfo set + * to zero. To get queues that were created with a non-zero flags + * parameter use vkGetDeviceQueue2." + */ + .flags = 0, + .queueFamilyIndex = queueFamilyIndex, + .queueIndex = queueIndex, + }; + + device->dispatch_table.GetDeviceQueue2(_device, &info, pQueue); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetDeviceQueue2(VkDevice _device, + const VkDeviceQueueInfo2 *pQueueInfo, + VkQueue *pQueue) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + struct vk_queue *queue = NULL; + vk_foreach_queue(iter, device) { + if (iter->queue_family_index == pQueueInfo->queueFamilyIndex && + iter->index_in_family == pQueueInfo->queueIndex) { + queue = iter; + break; + } + } + + /* From the Vulkan 1.1.70 spec: + * + * "The queue returned by vkGetDeviceQueue2 must have the same flags + * value from this structure as that used at device creation time in a + * VkDeviceQueueCreateInfo instance. If no matching flags were specified + * at device creation time then pQueue will return VK_NULL_HANDLE." + */ + if (queue && queue->flags == pQueueInfo->flags) + *pQueue = vk_queue_to_handle(queue); + else + *pQueue = VK_NULL_HANDLE; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_MapMemory(VkDevice _device, + VkDeviceMemory memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void **ppData) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + const VkMemoryMapInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_MAP_INFO_KHR, + .flags = flags, + .memory = memory, + .offset = offset, + .size = size, + }; + + return device->dispatch_table.MapMemory2KHR(_device, &info, ppData); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_UnmapMemory(VkDevice _device, + VkDeviceMemory memory) +{ + VK_FROM_HANDLE(vk_device, device, _device); + ASSERTED VkResult result; + + const VkMemoryUnmapInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_UNMAP_INFO_KHR, + .memory = memory, + }; + + result = device->dispatch_table.UnmapMemory2KHR(_device, &info); + assert(result == VK_SUCCESS); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetDeviceGroupPeerMemoryFeatures( + VkDevice device, + uint32_t heapIndex, + uint32_t localDeviceIndex, + uint32_t remoteDeviceIndex, + VkPeerMemoryFeatureFlags *pPeerMemoryFeatures) +{ + assert(localDeviceIndex == 0 && remoteDeviceIndex == 0); + *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | + VK_PEER_MEMORY_FEATURE_COPY_DST_BIT | + VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | + VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetImageMemoryRequirements(VkDevice _device, + VkImage image, + VkMemoryRequirements *pMemoryRequirements) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + VkImageMemoryRequirementsInfo2 info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, + .image = image, + }; + VkMemoryRequirements2 reqs = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + }; + device->dispatch_table.GetImageMemoryRequirements2(_device, &info, &reqs); + + *pMemoryRequirements = reqs.memoryRequirements; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_BindImageMemory(VkDevice _device, + VkImage image, + VkDeviceMemory memory, + VkDeviceSize memoryOffset) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + VkBindImageMemoryInfo bind = { + .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO, + .image = image, + .memory = memory, + .memoryOffset = memoryOffset, + }; + + return device->dispatch_table.BindImageMemory2(_device, 1, &bind); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetImageSparseMemoryRequirements(VkDevice _device, + VkImage image, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements *pSparseMemoryRequirements) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + VkImageSparseMemoryRequirementsInfo2 info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2, + .image = image, + }; + + if (!pSparseMemoryRequirements) { + device->dispatch_table.GetImageSparseMemoryRequirements2(_device, + &info, + pSparseMemoryRequirementCount, + NULL); + return; + } + + STACK_ARRAY(VkSparseImageMemoryRequirements2, mem_reqs2, *pSparseMemoryRequirementCount); + + for (unsigned i = 0; i < *pSparseMemoryRequirementCount; ++i) { + mem_reqs2[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2; + mem_reqs2[i].pNext = NULL; + } + + device->dispatch_table.GetImageSparseMemoryRequirements2(_device, + &info, + pSparseMemoryRequirementCount, + mem_reqs2); + + for (unsigned i = 0; i < *pSparseMemoryRequirementCount; ++i) + pSparseMemoryRequirements[i] = mem_reqs2[i].memoryRequirements; + + STACK_ARRAY_FINISH(mem_reqs2); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_DeviceWaitIdle(VkDevice _device) +{ + MESA_TRACE_FUNC(); + + VK_FROM_HANDLE(vk_device, device, _device); + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + + vk_foreach_queue(queue, device) { + VkResult result = disp->QueueWaitIdle(vk_queue_to_handle(queue)); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +#ifndef _WIN32 + +uint64_t +vk_clock_gettime(clockid_t clock_id) +{ + struct timespec current; + int ret; + + ret = clock_gettime(clock_id, ¤t); +#ifdef CLOCK_MONOTONIC_RAW + if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW) + ret = clock_gettime(CLOCK_MONOTONIC, ¤t); +#endif + if (ret < 0) + return 0; + + return (uint64_t)current.tv_sec * 1000000000ULL + current.tv_nsec; +} + +#endif //!_WIN32 + +#define CORE_RENAMED_PROPERTY(ext_property, core_property) \ + memcpy(&properties->ext_property, &core->core_property, sizeof(core->core_property)) + +#define CORE_PROPERTY(property) CORE_RENAMED_PROPERTY(property, property) + +bool +vk_get_physical_device_core_1_1_property_ext(struct VkBaseOutStructure *ext, + const VkPhysicalDeviceVulkan11Properties *core) +{ + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { + VkPhysicalDeviceIDProperties *properties = (void *)ext; + CORE_PROPERTY(deviceUUID); + CORE_PROPERTY(driverUUID); + CORE_PROPERTY(deviceLUID); + CORE_PROPERTY(deviceNodeMask); + CORE_PROPERTY(deviceLUIDValid); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { + VkPhysicalDeviceMaintenance3Properties *properties = (void *)ext; + CORE_PROPERTY(maxPerSetDescriptors); + CORE_PROPERTY(maxMemoryAllocationSize); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { + VkPhysicalDeviceMultiviewProperties *properties = (void *)ext; + CORE_PROPERTY(maxMultiviewViewCount); + CORE_PROPERTY(maxMultiviewInstanceIndex); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { + VkPhysicalDevicePointClippingProperties *properties = (void *) ext; + CORE_PROPERTY(pointClippingBehavior); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { + VkPhysicalDeviceProtectedMemoryProperties *properties = (void *)ext; + CORE_PROPERTY(protectedNoFault); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: { + VkPhysicalDeviceSubgroupProperties *properties = (void *)ext; + CORE_PROPERTY(subgroupSize); + CORE_RENAMED_PROPERTY(supportedStages, + subgroupSupportedStages); + CORE_RENAMED_PROPERTY(supportedOperations, + subgroupSupportedOperations); + CORE_RENAMED_PROPERTY(quadOperationsInAllStages, + subgroupQuadOperationsInAllStages); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES: + vk_copy_struct_guts(ext, (void *)core, sizeof(*core)); + return true; + + default: + return false; + } +} + +bool +vk_get_physical_device_core_1_2_property_ext(struct VkBaseOutStructure *ext, + const VkPhysicalDeviceVulkan12Properties *core) +{ + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: { + VkPhysicalDeviceDepthStencilResolveProperties *properties = (void *)ext; + CORE_PROPERTY(supportedDepthResolveModes); + CORE_PROPERTY(supportedStencilResolveModes); + CORE_PROPERTY(independentResolveNone); + CORE_PROPERTY(independentResolve); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: { + VkPhysicalDeviceDescriptorIndexingProperties *properties = (void *)ext; + CORE_PROPERTY(maxUpdateAfterBindDescriptorsInAllPools); + CORE_PROPERTY(shaderUniformBufferArrayNonUniformIndexingNative); + CORE_PROPERTY(shaderSampledImageArrayNonUniformIndexingNative); + CORE_PROPERTY(shaderStorageBufferArrayNonUniformIndexingNative); + CORE_PROPERTY(shaderStorageImageArrayNonUniformIndexingNative); + CORE_PROPERTY(shaderInputAttachmentArrayNonUniformIndexingNative); + CORE_PROPERTY(robustBufferAccessUpdateAfterBind); + CORE_PROPERTY(quadDivergentImplicitLod); + CORE_PROPERTY(maxPerStageDescriptorUpdateAfterBindSamplers); + CORE_PROPERTY(maxPerStageDescriptorUpdateAfterBindUniformBuffers); + CORE_PROPERTY(maxPerStageDescriptorUpdateAfterBindStorageBuffers); + CORE_PROPERTY(maxPerStageDescriptorUpdateAfterBindSampledImages); + CORE_PROPERTY(maxPerStageDescriptorUpdateAfterBindStorageImages); + CORE_PROPERTY(maxPerStageDescriptorUpdateAfterBindInputAttachments); + CORE_PROPERTY(maxPerStageUpdateAfterBindResources); + CORE_PROPERTY(maxDescriptorSetUpdateAfterBindSamplers); + CORE_PROPERTY(maxDescriptorSetUpdateAfterBindUniformBuffers); + CORE_PROPERTY(maxDescriptorSetUpdateAfterBindUniformBuffersDynamic); + CORE_PROPERTY(maxDescriptorSetUpdateAfterBindStorageBuffers); + CORE_PROPERTY(maxDescriptorSetUpdateAfterBindStorageBuffersDynamic); + CORE_PROPERTY(maxDescriptorSetUpdateAfterBindSampledImages); + CORE_PROPERTY(maxDescriptorSetUpdateAfterBindStorageImages); + CORE_PROPERTY(maxDescriptorSetUpdateAfterBindInputAttachments); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: { + VkPhysicalDeviceDriverProperties *properties = (void *) ext; + CORE_PROPERTY(driverID); + CORE_PROPERTY(driverName); + CORE_PROPERTY(driverInfo); + CORE_PROPERTY(conformanceVersion); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: { + VkPhysicalDeviceSamplerFilterMinmaxProperties *properties = (void *)ext; + CORE_PROPERTY(filterMinmaxImageComponentMapping); + CORE_PROPERTY(filterMinmaxSingleComponentFormats); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES : { + VkPhysicalDeviceFloatControlsProperties *properties = (void *)ext; + CORE_PROPERTY(denormBehaviorIndependence); + CORE_PROPERTY(roundingModeIndependence); + CORE_PROPERTY(shaderDenormFlushToZeroFloat16); + CORE_PROPERTY(shaderDenormPreserveFloat16); + CORE_PROPERTY(shaderRoundingModeRTEFloat16); + CORE_PROPERTY(shaderRoundingModeRTZFloat16); + CORE_PROPERTY(shaderSignedZeroInfNanPreserveFloat16); + CORE_PROPERTY(shaderDenormFlushToZeroFloat32); + CORE_PROPERTY(shaderDenormPreserveFloat32); + CORE_PROPERTY(shaderRoundingModeRTEFloat32); + CORE_PROPERTY(shaderRoundingModeRTZFloat32); + CORE_PROPERTY(shaderSignedZeroInfNanPreserveFloat32); + CORE_PROPERTY(shaderDenormFlushToZeroFloat64); + CORE_PROPERTY(shaderDenormPreserveFloat64); + CORE_PROPERTY(shaderRoundingModeRTEFloat64); + CORE_PROPERTY(shaderRoundingModeRTZFloat64); + CORE_PROPERTY(shaderSignedZeroInfNanPreserveFloat64); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: { + VkPhysicalDeviceTimelineSemaphoreProperties *properties = (void *) ext; + CORE_PROPERTY(maxTimelineSemaphoreValueDifference); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES: + vk_copy_struct_guts(ext, (void *)core, sizeof(*core)); + return true; + + default: + return false; + } +} + +bool +vk_get_physical_device_core_1_3_property_ext(struct VkBaseOutStructure *ext, + const VkPhysicalDeviceVulkan13Properties *core) +{ + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES: { + VkPhysicalDeviceInlineUniformBlockProperties *properties = (void *)ext; + CORE_PROPERTY(maxInlineUniformBlockSize); + CORE_PROPERTY(maxPerStageDescriptorInlineUniformBlocks); + CORE_PROPERTY(maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks); + CORE_PROPERTY(maxDescriptorSetInlineUniformBlocks); + CORE_PROPERTY(maxDescriptorSetUpdateAfterBindInlineUniformBlocks); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: { + VkPhysicalDeviceMaintenance4Properties *properties = (void *)ext; + CORE_PROPERTY(maxBufferSize); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES: { + VkPhysicalDeviceShaderIntegerDotProductProperties *properties = (void *)ext; + +#define IDP_PROPERTY(x) CORE_PROPERTY(integerDotProduct##x) + IDP_PROPERTY(8BitUnsignedAccelerated); + IDP_PROPERTY(8BitSignedAccelerated); + IDP_PROPERTY(8BitMixedSignednessAccelerated); + IDP_PROPERTY(4x8BitPackedUnsignedAccelerated); + IDP_PROPERTY(4x8BitPackedSignedAccelerated); + IDP_PROPERTY(4x8BitPackedMixedSignednessAccelerated); + IDP_PROPERTY(16BitUnsignedAccelerated); + IDP_PROPERTY(16BitSignedAccelerated); + IDP_PROPERTY(16BitMixedSignednessAccelerated); + IDP_PROPERTY(32BitUnsignedAccelerated); + IDP_PROPERTY(32BitSignedAccelerated); + IDP_PROPERTY(32BitMixedSignednessAccelerated); + IDP_PROPERTY(64BitUnsignedAccelerated); + IDP_PROPERTY(64BitSignedAccelerated); + IDP_PROPERTY(64BitMixedSignednessAccelerated); + IDP_PROPERTY(AccumulatingSaturating8BitUnsignedAccelerated); + IDP_PROPERTY(AccumulatingSaturating8BitSignedAccelerated); + IDP_PROPERTY(AccumulatingSaturating8BitMixedSignednessAccelerated); + IDP_PROPERTY(AccumulatingSaturating4x8BitPackedUnsignedAccelerated); + IDP_PROPERTY(AccumulatingSaturating4x8BitPackedSignedAccelerated); + IDP_PROPERTY(AccumulatingSaturating4x8BitPackedMixedSignednessAccelerated); + IDP_PROPERTY(AccumulatingSaturating16BitUnsignedAccelerated); + IDP_PROPERTY(AccumulatingSaturating16BitSignedAccelerated); + IDP_PROPERTY(AccumulatingSaturating16BitMixedSignednessAccelerated); + IDP_PROPERTY(AccumulatingSaturating32BitUnsignedAccelerated); + IDP_PROPERTY(AccumulatingSaturating32BitSignedAccelerated); + IDP_PROPERTY(AccumulatingSaturating32BitMixedSignednessAccelerated); + IDP_PROPERTY(AccumulatingSaturating64BitUnsignedAccelerated); + IDP_PROPERTY(AccumulatingSaturating64BitSignedAccelerated); + IDP_PROPERTY(AccumulatingSaturating64BitMixedSignednessAccelerated); +#undef IDP_PROPERTY + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES: { + VkPhysicalDeviceSubgroupSizeControlProperties *properties = (void *)ext; + CORE_PROPERTY(minSubgroupSize); + CORE_PROPERTY(maxSubgroupSize); + CORE_PROPERTY(maxComputeWorkgroupSubgroups); + CORE_PROPERTY(requiredSubgroupSizeStages); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES: { + VkPhysicalDeviceTexelBufferAlignmentProperties *properties = (void *)ext; + CORE_PROPERTY(storageTexelBufferOffsetAlignmentBytes); + CORE_PROPERTY(storageTexelBufferOffsetSingleTexelAlignment); + CORE_PROPERTY(uniformTexelBufferOffsetAlignmentBytes); + CORE_PROPERTY(uniformTexelBufferOffsetSingleTexelAlignment); + return true; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES: + vk_copy_struct_guts(ext, (void *)core, sizeof(*core)); + return true; + + default: + return false; + } +} + +#undef CORE_RENAMED_PROPERTY +#undef CORE_PROPERTY + diff --git a/src/vulkan/runtime/vk_device.h b/src/vulkan/runtime/vk_device.h new file mode 100644 index 00000000000..37e56771062 --- /dev/null +++ b/src/vulkan/runtime/vk_device.h @@ -0,0 +1,442 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_DEVICE_H +#define VK_DEVICE_H + +#include "rmv/vk_rmv_common.h" +#include "vk_dispatch_table.h" +#include "vk_extensions.h" +#include "vk_object.h" +#include "vk_physical_device_features.h" + +#include "util/list.h" +#include "util/simple_mtx.h" +#include "util/u_atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_command_buffer_ops; +struct vk_device_shader_ops; +struct vk_sync; + +enum vk_queue_submit_mode { + /** Submits happen immediately + * + * `vkQueueSubmit()` and `vkQueueBindSparse()` call + * ``vk_queue::driver_submit`` directly for all submits and the last call to + * ``vk_queue::driver_submit`` will have completed by the time + * `vkQueueSubmit()` or `vkQueueBindSparse()` return. + */ + VK_QUEUE_SUBMIT_MODE_IMMEDIATE, + + /** Submits may be deferred until a future `vk_queue_flush()` + * + * Submits are added to the queue and `vk_queue_flush()` is called. + * However, any submits with unsatisfied dependencies will be left on the + * queue until a future `vk_queue_flush()` call. This is used for + * implementing emulated timeline semaphores without threading. + */ + VK_QUEUE_SUBMIT_MODE_DEFERRED, + + /** Submits will be added to the queue and handled later by a thread + * + * This places additional requirements on the vk_sync types used by the + * driver: + * + * 1. All `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT` also + * support `VK_SYNC_FEATURE_WAIT_PENDING` so that the threads can + * sort out when a given submit has all its dependencies resolved. + * + * 2. All binary `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT` + * also support `VK_SYNC_FEATURE_CPU_RESET` so we can reset + * semaphores after waiting on them. + * + * 3. All vk_sync types used as permanent payloads of semaphores support + * ``vk_sync_type::move`` so that it can move the pending signal into a + * temporary vk_sync and reset the semaphore. + * + * This is requied for shared timeline semaphores where we need to handle + * wait-before-signal by threading in the driver if we ever see an + * unresolve dependency. + */ + VK_QUEUE_SUBMIT_MODE_THREADED, + + /** Threaded but only if we need it to resolve dependencies + * + * This imposes all the same requirements on `vk_sync` types as + * `VK_QUEUE_SUBMIT_MODE_THREADED`. + */ + VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND, +}; + +/** Base struct for VkDevice */ +struct vk_device { + struct vk_object_base base; + + /** Allocator used to create this device + * + * This is used as a fall-back for when a NULL pAllocator is passed into a + * device-level create function such as vkCreateImage(). + */ + VkAllocationCallbacks alloc; + + /** Pointer to the physical device */ + struct vk_physical_device *physical; + + /** Table of enabled extensions */ + struct vk_device_extension_table enabled_extensions; + + /** Table of enabled features */ + struct vk_features enabled_features; + + /** Device-level dispatch table */ + struct vk_device_dispatch_table dispatch_table; + + /** Command dispatch table + * + * This is used for emulated secondary command buffer support. To use + * emulated (trace/replay) secondary command buffers: + * + * 1. Provide your "real" command buffer dispatch table here. Because + * this doesn't get populated by vk_device_init(), the driver will have + * to add the vk_common entrypoints to this table itself. + * + * 2. Add vk_enqueue_unless_primary_device_entrypoint_table to your device + * level dispatch table. + */ + const struct vk_device_dispatch_table *command_dispatch_table; + + /** Command buffer vtable when using the common command pool */ + const struct vk_command_buffer_ops *command_buffer_ops; + + /** Shader vtable for VK_EXT_shader_object and common pipelines */ + const struct vk_device_shader_ops *shader_ops; + + /** Driver provided callback for capturing traces + * + * Triggers for this callback are: + * - Keyboard input (F12) + * - Creation of a trigger file + * - Reaching the trace frame + */ + VkResult (*capture_trace)(VkQueue queue); + + uint32_t current_frame; + bool trace_hotkey_trigger; + simple_mtx_t trace_mtx; + + /* For VK_EXT_private_data */ + uint32_t private_data_next_index; + + struct list_head queues; + + struct { + int lost; + bool reported; + } _lost; + + /** Checks the status of this device + * + * This is expected to return either VK_SUCCESS or VK_ERROR_DEVICE_LOST. + * It is called before ``vk_queue::driver_submit`` and after every non-trivial + * wait operation to ensure the device is still around. This gives the + * driver a hook to ask the kernel if its device is still valid. If the + * kernel says the device has been lost, it MUST call vk_device_set_lost(). + * + * This function may be called from any thread at any time. + */ + VkResult (*check_status)(struct vk_device *device); + + /** Creates a vk_sync that wraps a memory object + * + * This is always a one-shot object so it need not track any additional + * state. Since it's intended for synchronizing between processes using + * implicit synchronization mechanisms, no such tracking would be valid + * anyway. + * + * If `signal_memory` is set, the resulting vk_sync will be used to signal + * the memory object from a queue ``via vk_queue_submit::signals``. The common + * code guarantees that, by the time vkQueueSubmit() returns, the signal + * operation has been submitted to the kernel via the driver's + * ``vk_queue::driver_submit`` hook. This means that any vkQueueSubmit() call + * which needs implicit synchronization may block. + * + * If `signal_memory` is not set, it can be assumed that memory object + * already has a signal operation pending from some other process and we + * need only wait on it. + */ + VkResult (*create_sync_for_memory)(struct vk_device *device, + VkDeviceMemory memory, + bool signal_memory, + struct vk_sync **sync_out); + + /* Set by vk_device_set_drm_fd() */ + int drm_fd; + + /** Implicit pipeline cache, or NULL */ + struct vk_pipeline_cache *mem_cache; + + /** An enum describing how timeline semaphores work */ + enum vk_device_timeline_mode { + /** Timeline semaphores are not supported */ + VK_DEVICE_TIMELINE_MODE_NONE, + + /** Timeline semaphores are emulated with vk_timeline + * + * In this mode, timeline semaphores are emulated using vk_timeline + * which is a collection of binary semaphores, one per time point. + * These timeline semaphores cannot be shared because the data structure + * exists entirely in userspace. These timelines are virtually + * invisible to the driver; all it sees are the binary vk_syncs, one per + * time point. + * + * To handle wait-before-signal, we place all vk_queue_submits in the + * queue's submit list in vkQueueSubmit() and call vk_device_flush() at + * key points such as the end of vkQueueSubmit() and vkSemaphoreSignal(). + * This ensures that, as soon as a given submit's dependencies are fully + * resolvable, it gets submitted to the driver. + */ + VK_DEVICE_TIMELINE_MODE_EMULATED, + + /** Timeline semaphores are a kernel-assisted emulation + * + * In this mode, timeline semaphores are still technically an emulation + * in the sense that they don't support wait-before-signal natively. + * Instead, all GPU-waitable objects support a CPU wait-for-pending + * operation which lets the userspace driver wait until a given event + * on the (possibly shared) vk_sync is pending. The event is "pending" + * if a job has been submitted to the kernel (possibly from a different + * process) which will signal it. In vkQueueSubit, we use this wait + * mode to detect waits which are not yet pending and, the first time we + * do, spawn a thread to manage the queue. That thread waits for each + * submit's waits to all be pending before submitting to the driver + * queue. + * + * We have to be a bit more careful about a few things in this mode. + * In particular, we can never assume that any given wait operation is + * pending. For instance, when we go to export a sync file from a + * binary semaphore, we need to first wait for it to be pending. The + * spec guarantees that the vast majority of these waits return almost + * immediately, but we do need to insert them for correctness. + */ + VK_DEVICE_TIMELINE_MODE_ASSISTED, + + /** Timeline semaphores are 100% native + * + * In this mode, wait-before-signal is natively supported by the + * underlying timeline implementation. We can submit-and-forget and + * assume that dependencies will get resolved for us by the kernel. + * Currently, this isn't supported by any Linux primitives. + */ + VK_DEVICE_TIMELINE_MODE_NATIVE, + } timeline_mode; + + /** Per-device submit mode + * + * This represents the device-wide submit strategy which may be different + * from the per-queue submit mode. See vk_queue.submit.mode for more + * details. + */ + enum vk_queue_submit_mode submit_mode; + + struct vk_memory_trace_data memory_trace_data; + + mtx_t swapchain_private_mtx; + struct hash_table *swapchain_private; + mtx_t swapchain_name_mtx; + struct hash_table *swapchain_name; +}; + +VK_DEFINE_HANDLE_CASTS(vk_device, base, VkDevice, + VK_OBJECT_TYPE_DEVICE); + +/** Initialize a vk_device + * + * Along with initializing the data structures in `vk_device`, this function + * checks that every extension specified by + * ``VkInstanceCreateInfo::ppEnabledExtensionNames`` is actually supported by + * the physical device and returns `VK_ERROR_EXTENSION_NOT_PRESENT` if an + * unsupported extension is requested. It also checks all the feature struct + * chained into the `pCreateInfo->pNext` chain against the features returned + * by `vkGetPhysicalDeviceFeatures2` and returns + * `VK_ERROR_FEATURE_NOT_PRESENT` if an unsupported feature is requested. + * + * :param device: |out| The device to initialize + * :param physical_device: |in| The physical device + * :param dispatch_table: |in| Device-level dispatch table + * :param pCreateInfo: |in| VkDeviceCreateInfo pointer passed to + * `vkCreateDevice()` + * :param alloc: |in| Allocation callbacks passed to + * `vkCreateDevice()` + */ +VkResult MUST_CHECK +vk_device_init(struct vk_device *device, + struct vk_physical_device *physical_device, + const struct vk_device_dispatch_table *dispatch_table, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc); + +static inline void +vk_device_set_drm_fd(struct vk_device *device, int drm_fd) +{ + device->drm_fd = drm_fd; +} + +/** Tears down a vk_device + * + * :param device: |out| The device to tear down + */ +void +vk_device_finish(struct vk_device *device); + +/** Enables threaded submit on this device + * + * This doesn't ensure that threaded submit will be used. It just disables + * the deferred submit option for emulated timeline semaphores and forces them + * to always use the threaded path. It also does some checks that the vk_sync + * types used by the driver work for threaded submit. + * + * This must be called before any queues are created. + */ +void vk_device_enable_threaded_submit(struct vk_device *device); + +static inline bool +vk_device_supports_threaded_submit(const struct vk_device *device) +{ + return device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED || + device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND; +} + +VkResult vk_device_flush(struct vk_device *device); + +VkResult PRINTFLIKE(4, 5) +_vk_device_set_lost(struct vk_device *device, + const char *file, int line, + const char *msg, ...); + +#define vk_device_set_lost(device, ...) \ + _vk_device_set_lost(device, __FILE__, __LINE__, __VA_ARGS__) + +void _vk_device_report_lost(struct vk_device *device); + +static inline bool +vk_device_is_lost_no_report(struct vk_device *device) +{ + return p_atomic_read(&device->_lost.lost) > 0; +} + +static inline bool +vk_device_is_lost(struct vk_device *device) +{ + int lost = vk_device_is_lost_no_report(device); + if (unlikely(lost && !device->_lost.reported)) + _vk_device_report_lost(device); + return lost; +} + +static inline VkResult +vk_device_check_status(struct vk_device *device) +{ + if (vk_device_is_lost(device)) + return VK_ERROR_DEVICE_LOST; + + if (!device->check_status) + return VK_SUCCESS; + + VkResult result = device->check_status(device); + + assert(result == VK_SUCCESS || result == VK_ERROR_DEVICE_LOST); + if (result == VK_ERROR_DEVICE_LOST) + assert(vk_device_is_lost_no_report(device)); + + return result; +} + +#ifndef _WIN32 + +uint64_t +vk_clock_gettime(clockid_t clock_id); + +static inline uint64_t +vk_time_max_deviation(uint64_t begin, uint64_t end, uint64_t max_clock_period) +{ + /* + * The maximum deviation is the sum of the interval over which we + * perform the sampling and the maximum period of any sampled + * clock. That's because the maximum skew between any two sampled + * clock edges is when the sampled clock with the largest period is + * sampled at the end of that period but right at the beginning of the + * sampling interval and some other clock is sampled right at the + * beginning of its sampling period and right at the end of the + * sampling interval. Let's assume the GPU has the longest clock + * period and that the application is sampling GPU and monotonic: + * + * s e + * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f + * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- + * + * g + * 0 1 2 3 + * GPU -----_____-----_____-----_____-----_____ + * + * m + * x y z 0 1 2 3 4 5 6 7 8 9 a b c + * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- + * + * Interval <-----------------> + * Deviation <--------------------------> + * + * s = read(raw) 2 + * g = read(GPU) 1 + * m = read(monotonic) 2 + * e = read(raw) b + * + * We round the sample interval up by one tick to cover sampling error + * in the interval clock + */ + + uint64_t sample_interval = end - begin + 1; + + return sample_interval + max_clock_period; +} + +#endif //!_WIN32 + +PFN_vkVoidFunction +vk_device_get_proc_addr(const struct vk_device *device, + const char *name); + +bool vk_get_physical_device_core_1_1_property_ext(struct VkBaseOutStructure *ext, + const VkPhysicalDeviceVulkan11Properties *core); +bool vk_get_physical_device_core_1_2_property_ext(struct VkBaseOutStructure *ext, + const VkPhysicalDeviceVulkan12Properties *core); +bool vk_get_physical_device_core_1_3_property_ext(struct VkBaseOutStructure *ext, + const VkPhysicalDeviceVulkan13Properties *core); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_DEVICE_H */ diff --git a/src/vulkan/runtime/vk_device_memory.c b/src/vulkan/runtime/vk_device_memory.c new file mode 100644 index 00000000000..e0a742e198e --- /dev/null +++ b/src/vulkan/runtime/vk_device_memory.c @@ -0,0 +1,221 @@ +/* + * Copyright © 2023 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_device_memory.h" + +#include "vk_android.h" +#include "vk_common_entrypoints.h" +#include "vk_util.h" + +#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26 +#include <vndk/hardware_buffer.h> +#endif + +void * +vk_device_memory_create(struct vk_device *device, + const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *alloc, + size_t size) +{ + struct vk_device_memory *mem = + vk_object_zalloc(device, alloc, size, VK_OBJECT_TYPE_DEVICE_MEMORY); + if (mem == NULL) + return NULL; + + assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); + + mem->size = pAllocateInfo->allocationSize; + mem->memory_type_index = pAllocateInfo->memoryTypeIndex; + + vk_foreach_struct_const(ext, pAllocateInfo->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO: { + const VkExportMemoryAllocateInfo *export_info = (void *)ext; + mem->export_handle_types = export_info->handleTypes; + break; + } + + case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID: { +#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26 + const VkImportAndroidHardwareBufferInfoANDROID *ahb_info = (void *)ext; + + assert(mem->import_handle_type == 0); + mem->import_handle_type = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID; + + /* From the Vulkan 1.3.242 spec: + * + * "If the vkAllocateMemory command succeeds, the implementation + * must acquire a reference to the imported hardware buffer, which + * it must release when the device memory object is freed. If the + * command fails, the implementation must not retain a + * reference." + * + * We assume that if the driver fails to create its memory object, + * it will call vk_device_memory_destroy which will delete our + * reference. + */ + AHardwareBuffer_acquire(ahb_info->buffer); + mem->ahardware_buffer = ahb_info->buffer; + break; +#else + unreachable("AHardwareBuffer import requires Android >= 26"); +#endif /* DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26 */ + } + + case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR: { + const VkImportMemoryFdInfoKHR *fd_info = (void *)ext; + if (fd_info->handleType) { + assert(fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || + fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + assert(mem->import_handle_type == 0); + mem->import_handle_type = fd_info->handleType; + } + break; + } + + case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT: { + const VkImportMemoryHostPointerInfoEXT *host_ptr_info = (void *)ext; + if (host_ptr_info->handleType) { + assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT || + host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT); + + assert(mem->import_handle_type == 0); + mem->import_handle_type = host_ptr_info->handleType; + mem->host_ptr = host_ptr_info->pHostPointer; + } + break; + } + + case VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR: { +#ifdef VK_USE_PLATFORM_WIN32_KHR + const VkImportMemoryWin32HandleInfoKHR *w32h_info = (void *)ext; + if (w32h_info->handleType) { + assert(w32h_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT || + w32h_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT || + w32h_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT || + w32h_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT || + w32h_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT || + w32h_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT); + assert(mem->import_handle_type == 0); + mem->import_handle_type = w32h_info->handleType; + } + break; +#else + unreachable("Win32 platform support disabled"); +#endif + } + + case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: { + const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext; + mem->alloc_flags = flags_info->flags; + break; + } + + default: + break; + } + } + + /* From the Vulkan Specification 1.3.261: + * + * VUID-VkMemoryAllocateInfo-allocationSize-07897 + * + * "If the parameters do not define an import or export operation, + * allocationSize must be greater than 0." + */ + if (!mem->import_handle_type && !mem->export_handle_types) + assert(pAllocateInfo->allocationSize > 0); + + /* From the Vulkan Specification 1.3.261: + * + * VUID-VkMemoryAllocateInfo-allocationSize-07899 + * + * "If the parameters define an export operation and the handle type is + * not VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID, + * allocationSize must be greater than 0." + */ + if (mem->export_handle_types && + mem->export_handle_types != + VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) + assert(pAllocateInfo->allocationSize > 0); + + if ((mem->export_handle_types & + VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) && + mem->ahardware_buffer == NULL) { + /* If we need to be able to export an Android hardware buffer but none + * is provided as an import, create a new one. + */ + mem->ahardware_buffer = vk_alloc_ahardware_buffer(pAllocateInfo); + if (mem->ahardware_buffer == NULL) { + vk_device_memory_destroy(device, alloc, mem); + return NULL; + } + } + + return mem; +} + +void +vk_device_memory_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_device_memory *mem) +{ + +#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26 + if (mem->ahardware_buffer) + AHardwareBuffer_release(mem->ahardware_buffer); +#endif /* DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26 */ + + vk_object_free(device, alloc, mem); +} + +#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26 +VkResult +vk_common_GetMemoryAndroidHardwareBufferANDROID( + VkDevice _device, + const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo, + struct AHardwareBuffer **pBuffer) +{ + VK_FROM_HANDLE(vk_device_memory, mem, pInfo->memory); + + /* Some quotes from Vulkan spec: + * + * "If the device memory was created by importing an Android hardware + * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same + * Android hardware buffer object." + * + * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID + * must have been included in VkExportMemoryAllocateInfo::handleTypes + * when memory was created." + */ + if (mem->ahardware_buffer) { + *pBuffer = mem->ahardware_buffer; + /* Increase refcount. */ + AHardwareBuffer_acquire(*pBuffer); + return VK_SUCCESS; + } + + return VK_ERROR_INVALID_EXTERNAL_HANDLE; +} +#endif diff --git a/src/vulkan/runtime/vk_device_memory.h b/src/vulkan/runtime/vk_device_memory.h new file mode 100644 index 00000000000..6e490172011 --- /dev/null +++ b/src/vulkan/runtime/vk_device_memory.h @@ -0,0 +1,87 @@ +/* + * Copyright © 2023 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_DEVICE_MEMORY_H +#define VK_DEVICE_MEMORY_H + +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct AHardwareBuffer; + +struct vk_device_memory { + struct vk_object_base base; + + /* VkMemoryAllocateFlagsInfo::flags */ + VkMemoryAllocateFlags alloc_flags; + + /* VkMemoryAllocateInfo::allocationSize */ + VkDeviceSize size; + + /* VkMemoryAllocateInfo::memoryTypeIndex */ + uint32_t memory_type_index; + + /* Import handle type (if any) */ + VkExternalMemoryHandleTypeFlags import_handle_type; + + /* VkExportMemoryAllocateInfo::handleTypes */ + VkExternalMemoryHandleTypeFlags export_handle_types; + + /* VkImportMemoryHostPointerInfoEXT::pHostPointer */ + void *host_ptr; + + /* VkImportAndroidHardwareBufferInfoANDROID::buffer */ + struct AHardwareBuffer *ahardware_buffer; +}; +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_device_memory, base, VkDeviceMemory, + VK_OBJECT_TYPE_DEVICE_MEMORY); + +void *vk_device_memory_create(struct vk_device *device, + const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *alloc, + size_t size); +void vk_device_memory_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_device_memory *mem); + +static inline uint64_t +vk_device_memory_range(const struct vk_device_memory *mem, + uint64_t offset, uint64_t range) +{ + assert(offset <= mem->size); + if (range == VK_WHOLE_SIZE) { + return mem->size - offset; + } else { + assert(range + offset >= range); + assert(range + offset <= mem->size); + return range; + } +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_DEVICE_MEMORY_H */ diff --git a/src/vulkan/runtime/vk_drm_syncobj.c b/src/vulkan/runtime/vk_drm_syncobj.c new file mode 100644 index 00000000000..38da5e123cb --- /dev/null +++ b/src/vulkan/runtime/vk_drm_syncobj.c @@ -0,0 +1,449 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_drm_syncobj.h" + +#include <sched.h> +#include <xf86drm.h> + +#include "drm-uapi/drm.h" + +#include "util/os_time.h" + +#include "vk_device.h" +#include "vk_log.h" +#include "vk_util.h" + +static struct vk_drm_syncobj * +to_drm_syncobj(struct vk_sync *sync) +{ + assert(vk_sync_type_is_drm_syncobj(sync->type)); + return container_of(sync, struct vk_drm_syncobj, base); +} + +static VkResult +vk_drm_syncobj_init(struct vk_device *device, + struct vk_sync *sync, + uint64_t initial_value) +{ + struct vk_drm_syncobj *sobj = to_drm_syncobj(sync); + + uint32_t flags = 0; + if (!(sync->flags & VK_SYNC_IS_TIMELINE) && initial_value) + flags |= DRM_SYNCOBJ_CREATE_SIGNALED; + + assert(device->drm_fd >= 0); + int err = drmSyncobjCreate(device->drm_fd, flags, &sobj->syncobj); + if (err < 0) { + return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY, + "DRM_IOCTL_SYNCOBJ_CREATE failed: %m"); + } + + if ((sync->flags & VK_SYNC_IS_TIMELINE) && initial_value) { + err = drmSyncobjTimelineSignal(device->drm_fd, &sobj->syncobj, + &initial_value, 1); + if (err < 0) { + vk_drm_syncobj_finish(device, sync); + return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY, + "DRM_IOCTL_SYNCOBJ_CREATE failed: %m"); + } + } + + return VK_SUCCESS; +} + +void +vk_drm_syncobj_finish(struct vk_device *device, + struct vk_sync *sync) +{ + struct vk_drm_syncobj *sobj = to_drm_syncobj(sync); + + assert(device->drm_fd >= 0); + ASSERTED int err = drmSyncobjDestroy(device->drm_fd, sobj->syncobj); + assert(err == 0); +} + +static VkResult +vk_drm_syncobj_signal(struct vk_device *device, + struct vk_sync *sync, + uint64_t value) +{ + struct vk_drm_syncobj *sobj = to_drm_syncobj(sync); + + assert(device->drm_fd >= 0); + int err; + if (sync->flags & VK_SYNC_IS_TIMELINE) + err = drmSyncobjTimelineSignal(device->drm_fd, &sobj->syncobj, &value, 1); + else + err = drmSyncobjSignal(device->drm_fd, &sobj->syncobj, 1); + if (err) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "DRM_IOCTL_SYNCOBJ_SIGNAL failed: %m"); + } + + return VK_SUCCESS; +} + +static VkResult +vk_drm_syncobj_get_value(struct vk_device *device, + struct vk_sync *sync, + uint64_t *value) +{ + struct vk_drm_syncobj *sobj = to_drm_syncobj(sync); + + assert(device->drm_fd >= 0); + int err = drmSyncobjQuery(device->drm_fd, &sobj->syncobj, value, 1); + if (err) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "DRM_IOCTL_SYNCOBJ_QUERY failed: %m"); + } + + return VK_SUCCESS; +} + +static VkResult +vk_drm_syncobj_reset(struct vk_device *device, + struct vk_sync *sync) +{ + struct vk_drm_syncobj *sobj = to_drm_syncobj(sync); + + assert(device->drm_fd >= 0); + int err = drmSyncobjReset(device->drm_fd, &sobj->syncobj, 1); + if (err) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "DRM_IOCTL_SYNCOBJ_RESET failed: %m"); + } + + return VK_SUCCESS; +} + +static VkResult +sync_has_sync_file(struct vk_device *device, struct vk_sync *sync) +{ + uint32_t handle = to_drm_syncobj(sync)->syncobj; + + int fd = -1; + int err = drmSyncobjExportSyncFile(device->drm_fd, handle, &fd); + if (!err) { + close(fd); + return VK_SUCCESS; + } + + /* On the off chance the sync_file export repeatedly fails for some + * unexpected reason, we want to ensure this function will return success + * eventually. Do a zero-time syncobj wait if the export failed. + */ + err = drmSyncobjWait(device->drm_fd, &handle, 1, 0 /* timeout */, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + NULL /* first_signaled */); + if (!err) { + return VK_SUCCESS; + } else if (errno == ETIME) { + return VK_TIMEOUT; + } else { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "DRM_IOCTL_SYNCOBJ_WAIT failed: %m"); + } +} + +static VkResult +spin_wait_for_sync_file(struct vk_device *device, + uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + if (wait_flags & VK_SYNC_WAIT_ANY) { + while (1) { + for (uint32_t i = 0; i < wait_count; i++) { + VkResult result = sync_has_sync_file(device, waits[i].sync); + if (result != VK_TIMEOUT) + return result; + } + + if (os_time_get_nano() >= abs_timeout_ns) + return VK_TIMEOUT; + + sched_yield(); + } + } else { + for (uint32_t i = 0; i < wait_count; i++) { + while (1) { + VkResult result = sync_has_sync_file(device, waits[i].sync); + if (result != VK_TIMEOUT) + return result; + + if (os_time_get_nano() >= abs_timeout_ns) + return VK_TIMEOUT; + + sched_yield(); + } + } + } + + return VK_SUCCESS; +} + +static VkResult +vk_drm_syncobj_wait_many(struct vk_device *device, + uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + if ((wait_flags & VK_SYNC_WAIT_PENDING) && + !(waits[0].sync->type->features & VK_SYNC_FEATURE_TIMELINE)) { + /* Sadly, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE was never implemented + * for drivers that don't support timelines. Instead, we have to spin + * on DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE until it succeeds. + */ + return spin_wait_for_sync_file(device, wait_count, waits, + wait_flags, abs_timeout_ns); + } + + /* Syncobj timeouts are signed */ + abs_timeout_ns = MIN2(abs_timeout_ns, (uint64_t)INT64_MAX); + + STACK_ARRAY(uint32_t, handles, wait_count); + STACK_ARRAY(uint64_t, wait_values, wait_count); + + uint32_t j = 0; + bool has_timeline = false; + for (uint32_t i = 0; i < wait_count; i++) { + /* The syncobj API doesn't like wait values of 0 but it's safe to skip + * them because a wait for 0 is a no-op. + */ + if (waits[i].sync->flags & VK_SYNC_IS_TIMELINE) { + if (waits[i].wait_value == 0) + continue; + + has_timeline = true; + } + + handles[j] = to_drm_syncobj(waits[i].sync)->syncobj; + wait_values[j] = waits[i].wait_value; + j++; + } + assert(j <= wait_count); + wait_count = j; + + uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT; + if (!(wait_flags & VK_SYNC_WAIT_ANY)) + syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL; + + assert(device->drm_fd >= 0); + int err; + if (wait_count == 0) { + err = 0; + } else if (wait_flags & VK_SYNC_WAIT_PENDING) { + /* We always use a timeline wait for WAIT_PENDING, even for binary + * syncobjs because the non-timeline wait doesn't support + * DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE. + */ + err = drmSyncobjTimelineWait(device->drm_fd, handles, wait_values, + wait_count, abs_timeout_ns, + syncobj_wait_flags | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE, + NULL /* first_signaled */); + } else if (has_timeline) { + err = drmSyncobjTimelineWait(device->drm_fd, handles, wait_values, + wait_count, abs_timeout_ns, + syncobj_wait_flags, + NULL /* first_signaled */); + } else { + err = drmSyncobjWait(device->drm_fd, handles, + wait_count, abs_timeout_ns, + syncobj_wait_flags, + NULL /* first_signaled */); + } + + STACK_ARRAY_FINISH(handles); + STACK_ARRAY_FINISH(wait_values); + + if (err && errno == ETIME) { + return VK_TIMEOUT; + } else if (err) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "DRM_IOCTL_SYNCOBJ_WAIT failed: %m"); + } + + return VK_SUCCESS; +} + +static VkResult +vk_drm_syncobj_import_opaque_fd(struct vk_device *device, + struct vk_sync *sync, + int fd) +{ + struct vk_drm_syncobj *sobj = to_drm_syncobj(sync); + + assert(device->drm_fd >= 0); + uint32_t new_handle; + int err = drmSyncobjFDToHandle(device->drm_fd, fd, &new_handle); + if (err) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %m"); + } + + err = drmSyncobjDestroy(device->drm_fd, sobj->syncobj); + assert(!err); + + sobj->syncobj = new_handle; + + return VK_SUCCESS; +} + +static VkResult +vk_drm_syncobj_export_opaque_fd(struct vk_device *device, + struct vk_sync *sync, + int *fd) +{ + struct vk_drm_syncobj *sobj = to_drm_syncobj(sync); + + assert(device->drm_fd >= 0); + int err = drmSyncobjHandleToFD(device->drm_fd, sobj->syncobj, fd); + if (err) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD failed: %m"); + } + + return VK_SUCCESS; +} + +static VkResult +vk_drm_syncobj_import_sync_file(struct vk_device *device, + struct vk_sync *sync, + int sync_file) +{ + struct vk_drm_syncobj *sobj = to_drm_syncobj(sync); + + assert(device->drm_fd >= 0); + int err = drmSyncobjImportSyncFile(device->drm_fd, sobj->syncobj, sync_file); + if (err) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %m"); + } + + return VK_SUCCESS; +} + +static VkResult +vk_drm_syncobj_export_sync_file(struct vk_device *device, + struct vk_sync *sync, + int *sync_file) +{ + struct vk_drm_syncobj *sobj = to_drm_syncobj(sync); + + assert(device->drm_fd >= 0); + int err = drmSyncobjExportSyncFile(device->drm_fd, sobj->syncobj, sync_file); + if (err) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD failed: %m"); + } + + return VK_SUCCESS; +} + +static VkResult +vk_drm_syncobj_move(struct vk_device *device, + struct vk_sync *dst, + struct vk_sync *src) +{ + struct vk_drm_syncobj *dst_sobj = to_drm_syncobj(dst); + struct vk_drm_syncobj *src_sobj = to_drm_syncobj(src); + VkResult result; + + if (!(dst->flags & VK_SYNC_IS_SHARED) && + !(src->flags & VK_SYNC_IS_SHARED)) { + result = vk_drm_syncobj_reset(device, dst); + if (unlikely(result != VK_SUCCESS)) + return result; + + uint32_t tmp = dst_sobj->syncobj; + dst_sobj->syncobj = src_sobj->syncobj; + src_sobj->syncobj = tmp; + + return VK_SUCCESS; + } else { + int fd; + result = vk_drm_syncobj_export_sync_file(device, src, &fd); + if (result != VK_SUCCESS) + return result; + + result = vk_drm_syncobj_import_sync_file(device, dst, fd); + if (fd >= 0) + close(fd); + if (result != VK_SUCCESS) + return result; + + return vk_drm_syncobj_reset(device, src); + } +} + +struct vk_sync_type +vk_drm_syncobj_get_type(int drm_fd) +{ + uint32_t syncobj = 0; + int err = drmSyncobjCreate(drm_fd, DRM_SYNCOBJ_CREATE_SIGNALED, &syncobj); + if (err < 0) + return (struct vk_sync_type) { .features = 0 }; + + struct vk_sync_type type = { + .size = sizeof(struct vk_drm_syncobj), + .features = VK_SYNC_FEATURE_BINARY | + VK_SYNC_FEATURE_GPU_WAIT | + VK_SYNC_FEATURE_CPU_RESET | + VK_SYNC_FEATURE_CPU_SIGNAL | + VK_SYNC_FEATURE_WAIT_PENDING, + .init = vk_drm_syncobj_init, + .finish = vk_drm_syncobj_finish, + .signal = vk_drm_syncobj_signal, + .reset = vk_drm_syncobj_reset, + .move = vk_drm_syncobj_move, + .import_opaque_fd = vk_drm_syncobj_import_opaque_fd, + .export_opaque_fd = vk_drm_syncobj_export_opaque_fd, + .import_sync_file = vk_drm_syncobj_import_sync_file, + .export_sync_file = vk_drm_syncobj_export_sync_file, + }; + + err = drmSyncobjWait(drm_fd, &syncobj, 1, 0, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, + NULL /* first_signaled */); + if (err == 0) { + type.wait_many = vk_drm_syncobj_wait_many; + type.features |= VK_SYNC_FEATURE_CPU_WAIT | + VK_SYNC_FEATURE_WAIT_ANY; + } + + uint64_t cap; + err = drmGetCap(drm_fd, DRM_CAP_SYNCOBJ_TIMELINE, &cap); + if (err == 0 && cap != 0) { + type.get_value = vk_drm_syncobj_get_value; + type.features |= VK_SYNC_FEATURE_TIMELINE; + } + + err = drmSyncobjDestroy(drm_fd, syncobj); + assert(err == 0); + + return type; +} diff --git a/src/vulkan/runtime/vk_drm_syncobj.h b/src/vulkan/runtime/vk_drm_syncobj.h new file mode 100644 index 00000000000..d4987f403da --- /dev/null +++ b/src/vulkan/runtime/vk_drm_syncobj.h @@ -0,0 +1,63 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_DRM_SYNCOBJ_H +#define VK_DRM_SYNCOBJ_H + +#include "vk_sync.h" + +#include "util/macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_drm_syncobj { + struct vk_sync base; + uint32_t syncobj; +}; + +void vk_drm_syncobj_finish(struct vk_device *device, + struct vk_sync *sync); + +static inline bool +vk_sync_type_is_drm_syncobj(const struct vk_sync_type *type) +{ + return type->finish == vk_drm_syncobj_finish; +} + +static inline struct vk_drm_syncobj * +vk_sync_as_drm_syncobj(struct vk_sync *sync) +{ + if (!vk_sync_type_is_drm_syncobj(sync->type)) + return NULL; + + return container_of(sync, struct vk_drm_syncobj, base); +} + +struct vk_sync_type vk_drm_syncobj_get_type(int drm_fd); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_DRM_SYNCOBJ_H */ diff --git a/src/vulkan/runtime/vk_fence.c b/src/vulkan/runtime/vk_fence.c new file mode 100644 index 00000000000..77cb5a3a47f --- /dev/null +++ b/src/vulkan/runtime/vk_fence.c @@ -0,0 +1,491 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_fence.h" + +#include "util/os_time.h" +#include "util/perf/cpu_trace.h" + +#ifndef _WIN32 +#include <unistd.h> +#endif + +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_log.h" +#include "vk_physical_device.h" +#include "vk_util.h" + +static VkExternalFenceHandleTypeFlags +vk_sync_fence_import_types(const struct vk_sync_type *type) +{ + VkExternalFenceHandleTypeFlags handle_types = 0; + + if (type->import_opaque_fd) + handle_types |= VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT; + + if (type->import_sync_file) + handle_types |= VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; + + return handle_types; +} + +static VkExternalFenceHandleTypeFlags +vk_sync_fence_export_types(const struct vk_sync_type *type) +{ + VkExternalFenceHandleTypeFlags handle_types = 0; + + if (type->export_opaque_fd) + handle_types |= VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT; + + if (type->export_sync_file) + handle_types |= VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; + + return handle_types; +} + +static VkExternalFenceHandleTypeFlags +vk_sync_fence_handle_types(const struct vk_sync_type *type) +{ + return vk_sync_fence_export_types(type) & + vk_sync_fence_import_types(type); +} + +static const struct vk_sync_type * +get_fence_sync_type(struct vk_physical_device *pdevice, + VkExternalFenceHandleTypeFlags handle_types) +{ + static const enum vk_sync_features req_features = + VK_SYNC_FEATURE_BINARY | + VK_SYNC_FEATURE_CPU_WAIT | + VK_SYNC_FEATURE_CPU_RESET; + + for (const struct vk_sync_type *const *t = + pdevice->supported_sync_types; *t; t++) { + if (req_features & ~(*t)->features) + continue; + + if (handle_types & ~vk_sync_fence_handle_types(*t)) + continue; + + return *t; + } + + return NULL; +} + +VkResult +vk_fence_create(struct vk_device *device, + const VkFenceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + struct vk_fence **fence_out) +{ + struct vk_fence *fence; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); + + const VkExportFenceCreateInfo *export = + vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO); + VkExternalFenceHandleTypeFlags handle_types = + export ? export->handleTypes : 0; + + const struct vk_sync_type *sync_type = + get_fence_sync_type(device->physical, handle_types); + if (sync_type == NULL) { + /* We should always be able to get a fence type for internal */ + assert(get_fence_sync_type(device->physical, 0) != NULL); + return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE, + "Combination of external handle types is unsupported " + "for VkFence creation."); + } + + /* Allocate a vk_fence + vk_sync implementation. Because the permanent + * field of vk_fence is the base field of the vk_sync implementation, we + * can make the 2 structures overlap. + */ + size_t size = offsetof(struct vk_fence, permanent) + sync_type->size; + fence = vk_object_zalloc(device, pAllocator, size, VK_OBJECT_TYPE_FENCE); + if (fence == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + enum vk_sync_flags sync_flags = 0; + if (handle_types) + sync_flags |= VK_SYNC_IS_SHAREABLE; + + bool signaled = pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT; + VkResult result = vk_sync_init(device, &fence->permanent, + sync_type, sync_flags, signaled); + if (result != VK_SUCCESS) { + vk_object_free(device, pAllocator, fence); + return result; + } + + *fence_out = fence; + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateFence(VkDevice _device, + const VkFenceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFence *pFence) +{ + VK_FROM_HANDLE(vk_device, device, _device); + struct vk_fence *fence = NULL; + + VkResult result = vk_fence_create(device, pCreateInfo, pAllocator, &fence); + if (result != VK_SUCCESS) + return result; + + *pFence = vk_fence_to_handle(fence); + + return VK_SUCCESS; +} + +void +vk_fence_reset_temporary(struct vk_device *device, + struct vk_fence *fence) +{ + if (fence->temporary == NULL) + return; + + vk_sync_destroy(device, fence->temporary); + fence->temporary = NULL; +} + +void +vk_fence_destroy(struct vk_device *device, + struct vk_fence *fence, + const VkAllocationCallbacks *pAllocator) +{ + vk_fence_reset_temporary(device, fence); + vk_sync_finish(device, &fence->permanent); + + vk_object_free(device, pAllocator, fence); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyFence(VkDevice _device, + VkFence _fence, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_fence, fence, _fence); + + if (fence == NULL) + return; + + vk_fence_destroy(device, fence, pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_ResetFences(VkDevice _device, + uint32_t fenceCount, + const VkFence *pFences) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + for (uint32_t i = 0; i < fenceCount; i++) { + VK_FROM_HANDLE(vk_fence, fence, pFences[i]); + + /* From the Vulkan 1.2.194 spec: + * + * "If any member of pFences currently has its payload imported with + * temporary permanence, that fence’s prior permanent payload is + * first restored. The remaining operations described therefore + * operate on the restored payload." + */ + vk_fence_reset_temporary(device, fence); + + VkResult result = vk_sync_reset(device, &fence->permanent); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetFenceStatus(VkDevice _device, + VkFence _fence) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_fence, fence, _fence); + + if (vk_device_is_lost(device)) + return VK_ERROR_DEVICE_LOST; + + VkResult result = vk_sync_wait(device, vk_fence_get_active_sync(fence), + 0 /* wait_value */, + VK_SYNC_WAIT_COMPLETE, + 0 /* abs_timeout_ns */); + if (result == VK_TIMEOUT) + return VK_NOT_READY; + else + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_WaitForFences(VkDevice _device, + uint32_t fenceCount, + const VkFence *pFences, + VkBool32 waitAll, + uint64_t timeout) +{ + MESA_TRACE_FUNC(); + + VK_FROM_HANDLE(vk_device, device, _device); + + if (vk_device_is_lost(device)) + return VK_ERROR_DEVICE_LOST; + + if (fenceCount == 0) + return VK_SUCCESS; + + uint64_t abs_timeout_ns = os_time_get_absolute_timeout(timeout); + + STACK_ARRAY(struct vk_sync_wait, waits, fenceCount); + + for (uint32_t i = 0; i < fenceCount; i++) { + VK_FROM_HANDLE(vk_fence, fence, pFences[i]); + waits[i] = (struct vk_sync_wait) { + .sync = vk_fence_get_active_sync(fence), + .stage_mask = ~(VkPipelineStageFlags2)0, + }; + } + + enum vk_sync_wait_flags wait_flags = VK_SYNC_WAIT_COMPLETE; + if (!waitAll) + wait_flags |= VK_SYNC_WAIT_ANY; + + VkResult result = vk_sync_wait_many(device, fenceCount, waits, + wait_flags, abs_timeout_ns); + + STACK_ARRAY_FINISH(waits); + + VkResult device_status = vk_device_check_status(device); + if (device_status != VK_SUCCESS) + return device_status; + + return result; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetPhysicalDeviceExternalFenceProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo, + VkExternalFenceProperties *pExternalFenceProperties) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + + assert(pExternalFenceInfo->sType == + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_FENCE_INFO); + const VkExternalFenceHandleTypeFlagBits handle_type = + pExternalFenceInfo->handleType; + + const struct vk_sync_type *sync_type = + get_fence_sync_type(pdevice, handle_type); + if (sync_type == NULL) { + pExternalFenceProperties->exportFromImportedHandleTypes = 0; + pExternalFenceProperties->compatibleHandleTypes = 0; + pExternalFenceProperties->externalFenceFeatures = 0; + return; + } + + VkExternalFenceHandleTypeFlagBits import = + vk_sync_fence_import_types(sync_type); + VkExternalFenceHandleTypeFlagBits export = + vk_sync_fence_export_types(sync_type); + + if (handle_type != VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT) { + const struct vk_sync_type *opaque_sync_type = + get_fence_sync_type(pdevice, VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT); + + /* If we're a different vk_sync_type than the one selected when only + * OPAQUE_FD is set, then we can't import/export OPAQUE_FD. Put + * differently, there can only be one OPAQUE_FD sync type. + */ + if (sync_type != opaque_sync_type) { + import &= ~VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT; + export &= ~VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT; + } + } + + VkExternalFenceHandleTypeFlags compatible = import & export; + VkExternalFenceFeatureFlags features = 0; + if (handle_type & export) + features |= VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT; + if (handle_type & import) + features |= VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT; + + pExternalFenceProperties->exportFromImportedHandleTypes = export; + pExternalFenceProperties->compatibleHandleTypes = compatible; + pExternalFenceProperties->externalFenceFeatures = features; +} + +#ifndef _WIN32 + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_ImportFenceFdKHR(VkDevice _device, + const VkImportFenceFdInfoKHR *pImportFenceFdInfo) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_fence, fence, pImportFenceFdInfo->fence); + + assert(pImportFenceFdInfo->sType == + VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR); + + const int fd = pImportFenceFdInfo->fd; + const VkExternalFenceHandleTypeFlagBits handle_type = + pImportFenceFdInfo->handleType; + + struct vk_sync *temporary = NULL, *sync; + if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) { + const struct vk_sync_type *sync_type = + get_fence_sync_type(device->physical, handle_type); + + VkResult result = vk_sync_create(device, sync_type, 0 /* flags */, + 0 /* initial_value */, &temporary); + if (result != VK_SUCCESS) + return result; + + sync = temporary; + } else { + sync = &fence->permanent; + } + assert(handle_type & vk_sync_fence_handle_types(sync->type)); + + VkResult result; + switch (pImportFenceFdInfo->handleType) { + case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: + result = vk_sync_import_opaque_fd(device, sync, fd); + break; + + case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: + result = vk_sync_import_sync_file(device, sync, fd); + break; + + default: + result = vk_error(fence, VK_ERROR_INVALID_EXTERNAL_HANDLE); + } + + if (result != VK_SUCCESS) { + if (temporary != NULL) + vk_sync_destroy(device, temporary); + return result; + } + + /* From the Vulkan 1.2.194 spec: + * + * "Importing a fence payload from a file descriptor transfers + * ownership of the file descriptor from the application to the + * Vulkan implementation. The application must not perform any + * operations on the file descriptor after a successful import." + * + * If the import fails, we leave the file descriptor open. + */ + if (fd != -1) + close(fd); + + if (temporary) { + vk_fence_reset_temporary(device, fence); + fence->temporary = temporary; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetFenceFdKHR(VkDevice _device, + const VkFenceGetFdInfoKHR *pGetFdInfo, + int *pFd) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_fence, fence, pGetFdInfo->fence); + + assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR); + + struct vk_sync *sync = vk_fence_get_active_sync(fence); + + VkResult result; + switch (pGetFdInfo->handleType) { + case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: + result = vk_sync_export_opaque_fd(device, sync, pFd); + if (unlikely(result != VK_SUCCESS)) + return result; + break; + + case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: + /* There's no direct spec quote for this but the same rules as for + * semaphore export apply. We can't export a sync file from a fence + * if the fence event hasn't been submitted to the kernel yet. + */ + if (vk_device_supports_threaded_submit(device)) { + result = vk_sync_wait(device, sync, 0, + VK_SYNC_WAIT_PENDING, + UINT64_MAX); + if (unlikely(result != VK_SUCCESS)) + return result; + } + + result = vk_sync_export_sync_file(device, sync, pFd); + if (unlikely(result != VK_SUCCESS)) + return result; + + /* From the Vulkan 1.2.194 spec: + * + * "Export operations have the same transference as the specified + * handle type’s import operations. Additionally, exporting a fence + * payload to a handle with copy transference has the same side + * effects on the source fence’s payload as executing a fence reset + * operation." + * + * In other words, exporting a sync file also resets the fence. We + * only care about this for the permanent payload because the temporary + * payload will be destroyed below. + */ + if (sync == &fence->permanent) { + result = vk_sync_reset(device, sync); + if (unlikely(result != VK_SUCCESS)) + return result; + } + break; + + default: + unreachable("Invalid fence export handle type"); + } + + /* From the Vulkan 1.2.194 spec: + * + * "Export operations have the same transference as the specified + * handle type’s import operations. [...] If the fence was using a + * temporarily imported payload, the fence’s prior permanent payload + * will be restored. + */ + vk_fence_reset_temporary(device, fence); + + return VK_SUCCESS; +} + +#endif /* !defined(_WIN32) */ diff --git a/src/vulkan/runtime/vk_fence.h b/src/vulkan/runtime/vk_fence.h new file mode 100644 index 00000000000..12cb1ab315a --- /dev/null +++ b/src/vulkan/runtime/vk_fence.h @@ -0,0 +1,82 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_FENCE_H +#define VK_FENCE_H + +#include "vk_object.h" +#include "vk_sync.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_sync; + +struct vk_fence { + struct vk_object_base base; + + /* Temporary fence state. + * + * A fence *may* have temporary state. That state is added to the fence by + * an import operation and is reset back to NULL when the fence is reset. + * A fence with temporary state cannot be signaled because the fence must + * already be signaled before the temporary state can be exported from the + * fence in the other process and imported here. + */ + struct vk_sync *temporary; + + /** Permanent fence state. + * + * Every fence has some form of permanent state. + * + * This field must be last + */ + alignas(8) struct vk_sync permanent; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_fence, base, VkFence, + VK_OBJECT_TYPE_FENCE); + +VkResult vk_fence_create(struct vk_device *device, + const VkFenceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + struct vk_fence **fence_out); + +void vk_fence_destroy(struct vk_device *device, + struct vk_fence *fence, + const VkAllocationCallbacks *pAllocator); + +void vk_fence_reset_temporary(struct vk_device *device, + struct vk_fence *fence); + +static inline struct vk_sync * +vk_fence_get_active_sync(struct vk_fence *fence) +{ + return fence->temporary ? fence->temporary : &fence->permanent; +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_FENCE_H */ diff --git a/src/vulkan/runtime/vk_format_info_gen.py b/src/vulkan/runtime/vk_format_info_gen.py new file mode 100644 index 00000000000..29cb4ebe65f --- /dev/null +++ b/src/vulkan/runtime/vk_format_info_gen.py @@ -0,0 +1,245 @@ +COPYRIGHT=u""" +/* Copyright © 2022 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +""" + +import argparse +import os +import re +from collections import namedtuple +import xml.etree.ElementTree as et + +from mako.template import Template + +TEMPLATE_H = Template(COPYRIGHT + """\ +/* This file generated from ${filename}, don't edit directly. */ + +#ifndef VK_FORMAT_INFO_H +#define VK_FORMAT_INFO_H + +#include <vulkan/vulkan_core.h> + +#ifdef __cplusplus +extern "C" { +#endif + +enum vk_format_class { + MESA_VK_FORMAT_CLASS_UNKNOWN, +% for name in format_classes: + ${to_enum_name('MESA_VK_FORMAT_CLASS_', name)}, +% endfor +}; + +struct vk_format_class_info { + const VkFormat *formats; + uint32_t format_count; +}; + +const struct vk_format_class_info * +vk_format_class_get_info(enum vk_format_class class); + +const struct vk_format_class_info * +vk_format_get_class_info(VkFormat format); + +#ifdef __cplusplus +} +#endif + +#endif +""") + +TEMPLATE_C = Template(COPYRIGHT + """ +/* This file generated from ${filename}, don't edit directly. */ + +#include "${header}" + +#include "util/macros.h" + +#include "vk_format.h" + +struct vk_format_info { + enum vk_format_class class; +}; + +% for id, ext in extensions.items(): +static const struct vk_format_info ext${id}_format_infos[] = { +% for name, format in ext.formats.items(): + [${format.offset}] = { + .class = ${to_enum_name('MESA_VK_FORMAT_CLASS_', format.cls)}, + }, +% endfor +}; + +% endfor +static const struct vk_format_info * +vk_format_get_info(VkFormat format) +{ + uint32_t extnumber = + format < 1000000000 ? 0 : (((format % 1000000000) / 1000) + 1); + uint32_t offset = format % 1000; + + switch (extnumber) { +% for id, ext in extensions.items(): + case ${id}: + assert(offset < ARRAY_SIZE(ext${id}_format_infos)); + return &ext${id}_format_infos[offset]; +% endfor + default: + unreachable("Invalid extension"); + } +} + +% for clsname, cls in format_classes.items(): +% if len(cls.formats) > 0: +static const VkFormat ${to_enum_name('MESA_VK_FORMAT_CLASS_', clsname).lower() + '_formats'}[] = { +% for fname in cls.formats: + ${fname}, +% endfor +% endif +}; + +% endfor +static const struct vk_format_class_info class_infos[] = { +% for clsname, cls in format_classes.items(): + [${to_enum_name('MESA_VK_FORMAT_CLASS_', clsname)}] = { +% if len(cls.formats) > 0: + .formats = ${to_enum_name('MESA_VK_FORMAT_CLASS_', clsname).lower() + '_formats'}, + .format_count = ARRAY_SIZE(${to_enum_name('MESA_VK_FORMAT_CLASS_', clsname).lower() + '_formats'}), +% else: + 0 +% endif + }, +% endfor +}; + +const struct vk_format_class_info * +vk_format_class_get_info(enum vk_format_class class) +{ + assert(class < ARRAY_SIZE(class_infos)); + return &class_infos[class]; +} + +const struct vk_format_class_info * +vk_format_get_class_info(VkFormat format) +{ + const struct vk_format_info *format_info = vk_format_get_info(format); + return &class_infos[format_info->class]; +} +""") + +def to_enum_name(prefix, name): + return "%s" % prefix + re.sub('([^A-Za-z0-9_])', '_', name).upper() + +Format = namedtuple('Format', ['name', 'cls', 'ext', 'offset']) +FormatClass = namedtuple('FormatClass', ['name', 'formats']) +Extension = namedtuple('Extension', ['id', 'formats']) + +def get_formats(doc): + """Extract the formats from the registry.""" + formats = {} + + for fmt in doc.findall('./formats/format'): + xpath = './/enum[@name="{}"]'.format(fmt.attrib['name']) + enum = doc.find(xpath) + ext = None + if 'extends' in enum.attrib: + assert(enum.attrib['extends'] == 'VkFormat') + if 'extnumber' in enum.attrib: + ext = int(enum.attrib['extnumber']) + else: + xpath = xpath + '/..' + parent = doc.find(xpath) + while parent != None and ext == None: + if parent.tag == 'extension': + assert('number' in parent.attrib) + ext = parent.attrib['number'] + xpath = xpath + '/..' + parent = doc.find(xpath) + offset = int(enum.attrib['offset']) + else: + ext = 0 + offset = int(enum.attrib['value']) + + assert(ext != None) + format = Format(fmt.attrib['name'], fmt.attrib['class'], ext, offset) + formats[format.name] = format + + return formats + +def get_formats_from_xml(xml_files): + formats = {} + + for filename in xml_files: + doc = et.parse(filename) + formats.update(get_formats(doc)) + + return formats + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--out-c', required=True, help='Output C file.') + parser.add_argument('--out-h', required=True, help='Output H file.') + parser.add_argument('--xml', + help='Vulkan API XML file.', + required=True, action='append', dest='xml_files') + args = parser.parse_args() + + formats = get_formats_from_xml(args.xml_files) + classes = {} + extensions = {} + for n, f in formats.items(): + if f.cls not in classes: + classes[f.cls] = FormatClass(f.cls, {}) + classes[f.cls].formats[f.name] = f + if f.ext not in extensions: + extensions[f.ext] = Extension(f.cls, {}) + extensions[f.ext].formats[f.name] = f + + assert os.path.dirname(args.out_c) == os.path.dirname(args.out_h) + + environment = { + 'header': os.path.basename(args.out_h), + 'formats': formats, + 'format_classes': classes, + 'extensions': extensions, + 'filename': os.path.basename(__file__), + 'to_enum_name': to_enum_name, + } + + try: + with open(args.out_h, 'w', encoding='utf-8') as f: + guard = os.path.basename(args.out_h).replace('.', '_').upper() + f.write(TEMPLATE_H.render(guard=guard, **environment)) + with open(args.out_c, 'w', encoding='utf-8') as f: + f.write(TEMPLATE_C.render(**environment)) + except Exception: + # In the event there's an error, this imports some helpers from mako + # to print a useful stack trace and prints it, then exits with + # status 1, if python is run with debug; otherwise it just raises + # the exception + import sys + from mako import exceptions + print(exceptions.text_error_template().render(), file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main() diff --git a/src/vulkan/runtime/vk_framebuffer.c b/src/vulkan/runtime/vk_framebuffer.c new file mode 100644 index 00000000000..f28dce1ffb1 --- /dev/null +++ b/src/vulkan/runtime/vk_framebuffer.c @@ -0,0 +1,83 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_framebuffer.h" + +#include "vk_common_entrypoints.h" +#include "vk_device.h" + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateFramebuffer(VkDevice _device, + const VkFramebufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFramebuffer *pFramebuffer) +{ + VK_FROM_HANDLE(vk_device, device, _device); + struct vk_framebuffer *framebuffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + size_t size = sizeof(*framebuffer); + + /* VK_KHR_imageless_framebuffer extension says: + * + * If flags includes VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT, + * parameter pAttachments is ignored. + */ + if (!(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT)) + size += sizeof(VkImageView) * pCreateInfo->attachmentCount; + + framebuffer = vk_object_alloc(device, pAllocator, size, + VK_OBJECT_TYPE_FRAMEBUFFER); + if (framebuffer == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + framebuffer->flags = pCreateInfo->flags; + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + if (!(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT)) { + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) + framebuffer->attachments[i] = pCreateInfo->pAttachments[i]; + framebuffer->attachment_count = pCreateInfo->attachmentCount; + } + + *pFramebuffer = vk_framebuffer_to_handle(framebuffer); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyFramebuffer(VkDevice _device, + VkFramebuffer _framebuffer, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_framebuffer, framebuffer, _framebuffer); + + if (!framebuffer) + return; + + vk_object_free(device, pAllocator, framebuffer); +} diff --git a/src/vulkan/runtime/vk_framebuffer.h b/src/vulkan/runtime/vk_framebuffer.h new file mode 100644 index 00000000000..a0f4b61a797 --- /dev/null +++ b/src/vulkan/runtime/vk_framebuffer.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_FRAMEBUFFER_H +#define VK_FRAMEBUFFER_H + +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_framebuffer { + struct vk_object_base base; + + /** VkFramebufferCreateInfo::flags */ + VkFramebufferCreateFlags flags; + + uint32_t width; + uint32_t height; + uint32_t layers; + + uint32_t attachment_count; + VkImageView attachments[]; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_framebuffer, base, VkFramebuffer, + VK_OBJECT_TYPE_FRAMEBUFFER) + +#ifdef __cplusplus +} +#endif + +#endif /* VK_FRAMEBUFFER_H */ + diff --git a/src/vulkan/runtime/vk_graphics_state.c b/src/vulkan/runtime/vk_graphics_state.c new file mode 100644 index 00000000000..3f875a33d50 --- /dev/null +++ b/src/vulkan/runtime/vk_graphics_state.c @@ -0,0 +1,3280 @@ +#include "vk_graphics_state.h" + +#include "vk_alloc.h" +#include "vk_command_buffer.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_log.h" +#include "vk_pipeline.h" +#include "vk_render_pass.h" +#include "vk_standard_sample_locations.h" +#include "vk_util.h" + +#include <assert.h> + +enum mesa_vk_graphics_state_groups { + MESA_VK_GRAPHICS_STATE_VERTEX_INPUT_BIT = (1 << 0), + MESA_VK_GRAPHICS_STATE_INPUT_ASSEMBLY_BIT = (1 << 1), + MESA_VK_GRAPHICS_STATE_TESSELLATION_BIT = (1 << 2), + MESA_VK_GRAPHICS_STATE_VIEWPORT_BIT = (1 << 3), + MESA_VK_GRAPHICS_STATE_DISCARD_RECTANGLES_BIT = (1 << 4), + MESA_VK_GRAPHICS_STATE_RASTERIZATION_BIT = (1 << 5), + MESA_VK_GRAPHICS_STATE_FRAGMENT_SHADING_RATE_BIT = (1 << 6), + MESA_VK_GRAPHICS_STATE_MULTISAMPLE_BIT = (1 << 7), + MESA_VK_GRAPHICS_STATE_DEPTH_STENCIL_BIT = (1 << 8), + MESA_VK_GRAPHICS_STATE_COLOR_BLEND_BIT = (1 << 9), + MESA_VK_GRAPHICS_STATE_INPUT_ATTACHMENT_MAP_BIT = (1 << 10), + MESA_VK_GRAPHICS_STATE_COLOR_ATTACHMENT_MAP_BIT = (1 << 11), + MESA_VK_GRAPHICS_STATE_RENDER_PASS_BIT = (1 << 12), +}; + +static void +clear_all_dynamic_state(BITSET_WORD *dynamic) +{ + /* Clear the whole array so there are no undefined bits at the top */ + memset(dynamic, 0, sizeof(*dynamic) * + BITSET_WORDS(MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX)); +} + +static void +get_dynamic_state_groups(BITSET_WORD *dynamic, + enum mesa_vk_graphics_state_groups groups) +{ + clear_all_dynamic_state(dynamic); + + if (groups & MESA_VK_GRAPHICS_STATE_VERTEX_INPUT_BIT) { + BITSET_SET(dynamic, MESA_VK_DYNAMIC_VI); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_VI_BINDINGS_VALID); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES); + } + + if (groups & MESA_VK_GRAPHICS_STATE_INPUT_ASSEMBLY_BIT) { + BITSET_SET(dynamic, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE); + } + + if (groups & MESA_VK_GRAPHICS_STATE_TESSELLATION_BIT) { + BITSET_SET(dynamic, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN); + } + + if (groups & MESA_VK_GRAPHICS_STATE_VIEWPORT_BIT) { + BITSET_SET(dynamic, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_VP_VIEWPORTS); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_VP_SCISSORS); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE); + } + + if (groups & MESA_VK_GRAPHICS_STATE_DISCARD_RECTANGLES_BIT) { + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DR_RECTANGLES); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DR_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DR_MODE); + } + + if (groups & MESA_VK_GRAPHICS_STATE_RASTERIZATION_BIT) { + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_POLYGON_MODE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_CULL_MODE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_FRONT_FACE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_RASTERIZATION_ORDER_AMD); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_LINE_WIDTH); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_LINE_MODE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RS_LINE_STIPPLE); + } + + if (groups & MESA_VK_GRAPHICS_STATE_FRAGMENT_SHADING_RATE_BIT) + BITSET_SET(dynamic, MESA_VK_DYNAMIC_FSR); + + if (groups & MESA_VK_GRAPHICS_STATE_MULTISAMPLE_BIT) { + BITSET_SET(dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_MS_SAMPLE_MASK); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS); + } + + if (groups & MESA_VK_GRAPHICS_STATE_DEPTH_STENCIL_BIT) { + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DS_STENCIL_OP); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE); + } + + if (groups & MESA_VK_GRAPHICS_STATE_COLOR_BLEND_BIT) { + BITSET_SET(dynamic, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_CB_LOGIC_OP); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_CB_BLEND_ENABLES); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_CB_WRITE_MASKS); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS); + } + + if (groups & MESA_VK_GRAPHICS_STATE_COLOR_ATTACHMENT_MAP_BIT) + BITSET_SET(dynamic, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP); + + if (groups & MESA_VK_GRAPHICS_STATE_INPUT_ATTACHMENT_MAP_BIT) + BITSET_SET(dynamic, MESA_VK_DYNAMIC_INPUT_ATTACHMENT_MAP); + + if (groups & MESA_VK_GRAPHICS_STATE_RENDER_PASS_BIT) { + BITSET_SET(dynamic, MESA_VK_DYNAMIC_RP_ATTACHMENTS); + BITSET_SET(dynamic, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE); + } +} + +static enum mesa_vk_graphics_state_groups +fully_dynamic_state_groups(const BITSET_WORD *dynamic) +{ + enum mesa_vk_graphics_state_groups groups = 0; + + if (BITSET_TEST(dynamic, MESA_VK_DYNAMIC_VI) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_VI_BINDINGS_VALID)) + groups |= MESA_VK_GRAPHICS_STATE_VERTEX_INPUT_BIT; + + if (BITSET_TEST(dynamic, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) + groups |= MESA_VK_GRAPHICS_STATE_TESSELLATION_BIT; + + if (BITSET_TEST(dynamic, MESA_VK_DYNAMIC_FSR)) + groups |= MESA_VK_GRAPHICS_STATE_FRAGMENT_SHADING_RATE_BIT; + + if (BITSET_TEST(dynamic, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_DS_STENCIL_OP) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) + groups |= MESA_VK_GRAPHICS_STATE_DEPTH_STENCIL_BIT; + + if (BITSET_TEST(dynamic, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_CB_LOGIC_OP) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_CB_WRITE_MASKS) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) + groups |= MESA_VK_GRAPHICS_STATE_COLOR_BLEND_BIT; + + if (BITSET_TEST(dynamic, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP)) + groups |= MESA_VK_GRAPHICS_STATE_COLOR_ATTACHMENT_MAP_BIT; + + if (BITSET_TEST(dynamic, MESA_VK_DYNAMIC_INPUT_ATTACHMENT_MAP)) + groups |= MESA_VK_GRAPHICS_STATE_INPUT_ATTACHMENT_MAP_BIT; + + return groups; +} + +static void +validate_dynamic_state_groups(const BITSET_WORD *dynamic, + enum mesa_vk_graphics_state_groups groups) +{ +#ifndef NDEBUG + BITSET_DECLARE(all_dynamic, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + get_dynamic_state_groups(all_dynamic, groups); + + for (uint32_t w = 0; w < ARRAY_SIZE(all_dynamic); w++) + assert(!(dynamic[w] & ~all_dynamic[w])); +#endif +} + +void +vk_get_dynamic_graphics_states(BITSET_WORD *dynamic, + const VkPipelineDynamicStateCreateInfo *info) +{ + clear_all_dynamic_state(dynamic); + + /* From the Vulkan 1.3.218 spec: + * + * "pDynamicState is a pointer to a VkPipelineDynamicStateCreateInfo + * structure defining which properties of the pipeline state object are + * dynamic and can be changed independently of the pipeline state. This + * can be NULL, which means no state in the pipeline is considered + * dynamic." + */ + if (info == NULL) + return; + +#define CASE(VK, MESA) \ + case VK_DYNAMIC_STATE_##VK: \ + BITSET_SET(dynamic, MESA_VK_DYNAMIC_##MESA); \ + break; + +#define CASE2(VK, MESA1, MESA2) \ + case VK_DYNAMIC_STATE_##VK: \ + BITSET_SET(dynamic, MESA_VK_DYNAMIC_##MESA1); \ + BITSET_SET(dynamic, MESA_VK_DYNAMIC_##MESA2); \ + break; + +#define CASE3(VK, MESA1, MESA2, MESA3) \ + case VK_DYNAMIC_STATE_##VK: \ + BITSET_SET(dynamic, MESA_VK_DYNAMIC_##MESA1); \ + BITSET_SET(dynamic, MESA_VK_DYNAMIC_##MESA2); \ + BITSET_SET(dynamic, MESA_VK_DYNAMIC_##MESA3); \ + break; + + for (uint32_t i = 0; i < info->dynamicStateCount; i++) { + switch (info->pDynamicStates[i]) { + CASE3(VERTEX_INPUT_EXT, VI, VI_BINDINGS_VALID, VI_BINDING_STRIDES) + CASE( VERTEX_INPUT_BINDING_STRIDE, VI_BINDING_STRIDES) + CASE( VIEWPORT, VP_VIEWPORTS) + CASE( SCISSOR, VP_SCISSORS) + CASE( LINE_WIDTH, RS_LINE_WIDTH) + CASE( DEPTH_BIAS, RS_DEPTH_BIAS_FACTORS) + CASE( BLEND_CONSTANTS, CB_BLEND_CONSTANTS) + CASE( DEPTH_BOUNDS, DS_DEPTH_BOUNDS_TEST_BOUNDS) + CASE( STENCIL_COMPARE_MASK, DS_STENCIL_COMPARE_MASK) + CASE( STENCIL_WRITE_MASK, DS_STENCIL_WRITE_MASK) + CASE( STENCIL_REFERENCE, DS_STENCIL_REFERENCE) + CASE( CULL_MODE, RS_CULL_MODE) + CASE( FRONT_FACE, RS_FRONT_FACE) + CASE( PRIMITIVE_TOPOLOGY, IA_PRIMITIVE_TOPOLOGY) + CASE2(VIEWPORT_WITH_COUNT, VP_VIEWPORT_COUNT, VP_VIEWPORTS) + CASE2(SCISSOR_WITH_COUNT, VP_SCISSOR_COUNT, VP_SCISSORS) + CASE( DEPTH_TEST_ENABLE, DS_DEPTH_TEST_ENABLE) + CASE( DEPTH_WRITE_ENABLE, DS_DEPTH_WRITE_ENABLE) + CASE( DEPTH_COMPARE_OP, DS_DEPTH_COMPARE_OP) + CASE( DEPTH_BOUNDS_TEST_ENABLE, DS_DEPTH_BOUNDS_TEST_ENABLE) + CASE( STENCIL_TEST_ENABLE, DS_STENCIL_TEST_ENABLE) + CASE( STENCIL_OP, DS_STENCIL_OP) + CASE( RASTERIZER_DISCARD_ENABLE, RS_RASTERIZER_DISCARD_ENABLE) + CASE( DEPTH_BIAS_ENABLE, RS_DEPTH_BIAS_ENABLE) + CASE( PRIMITIVE_RESTART_ENABLE, IA_PRIMITIVE_RESTART_ENABLE) + CASE( DISCARD_RECTANGLE_EXT, DR_RECTANGLES) + CASE( DISCARD_RECTANGLE_ENABLE_EXT, DR_ENABLE) + CASE( DISCARD_RECTANGLE_MODE_EXT, DR_MODE) + CASE( SAMPLE_LOCATIONS_EXT, MS_SAMPLE_LOCATIONS) + CASE( FRAGMENT_SHADING_RATE_KHR, FSR) + CASE( LINE_STIPPLE_EXT, RS_LINE_STIPPLE) + CASE( PATCH_CONTROL_POINTS_EXT, TS_PATCH_CONTROL_POINTS) + CASE( LOGIC_OP_EXT, CB_LOGIC_OP) + CASE( COLOR_WRITE_ENABLE_EXT, CB_COLOR_WRITE_ENABLES) + CASE( TESSELLATION_DOMAIN_ORIGIN_EXT, TS_DOMAIN_ORIGIN) + CASE( DEPTH_CLAMP_ENABLE_EXT, RS_DEPTH_CLAMP_ENABLE) + CASE( POLYGON_MODE_EXT, RS_POLYGON_MODE) + CASE( RASTERIZATION_SAMPLES_EXT, MS_RASTERIZATION_SAMPLES) + CASE( SAMPLE_MASK_EXT, MS_SAMPLE_MASK) + CASE( ALPHA_TO_COVERAGE_ENABLE_EXT, MS_ALPHA_TO_COVERAGE_ENABLE) + CASE( ALPHA_TO_ONE_ENABLE_EXT, MS_ALPHA_TO_ONE_ENABLE) + CASE( LOGIC_OP_ENABLE_EXT, CB_LOGIC_OP_ENABLE) + CASE( COLOR_BLEND_ENABLE_EXT, CB_BLEND_ENABLES) + CASE( COLOR_BLEND_EQUATION_EXT, CB_BLEND_EQUATIONS) + CASE( COLOR_WRITE_MASK_EXT, CB_WRITE_MASKS) + CASE( RASTERIZATION_STREAM_EXT, RS_RASTERIZATION_STREAM) + CASE( CONSERVATIVE_RASTERIZATION_MODE_EXT, RS_CONSERVATIVE_MODE) + CASE( DEPTH_CLIP_ENABLE_EXT, RS_DEPTH_CLIP_ENABLE) + CASE( SAMPLE_LOCATIONS_ENABLE_EXT, MS_SAMPLE_LOCATIONS_ENABLE) + CASE( PROVOKING_VERTEX_MODE_EXT, RS_PROVOKING_VERTEX) + CASE( LINE_RASTERIZATION_MODE_EXT, RS_LINE_MODE) + CASE( LINE_STIPPLE_ENABLE_EXT, RS_LINE_STIPPLE_ENABLE) + CASE( DEPTH_CLIP_NEGATIVE_ONE_TO_ONE_EXT, VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) + CASE( ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT, ATTACHMENT_FEEDBACK_LOOP_ENABLE) + default: + unreachable("Unsupported dynamic graphics state"); + } + } + + /* attachmentCount is ignored if all of the states using it are dyanmic. + * + * TODO: Handle advanced blending here when supported. + */ + if (BITSET_TEST(dynamic, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS) && + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_CB_WRITE_MASKS)) + BITSET_SET(dynamic, MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT); +} + +#define IS_DYNAMIC(STATE) \ + BITSET_TEST(dynamic, MESA_VK_DYNAMIC_##STATE) + +#define IS_NEEDED(STATE) \ + BITSET_TEST(needed, MESA_VK_DYNAMIC_##STATE) + +static void +vk_vertex_input_state_init(struct vk_vertex_input_state *vi, + const BITSET_WORD *dynamic, + const VkPipelineVertexInputStateCreateInfo *vi_info) +{ + assert(!IS_DYNAMIC(VI)); + + memset(vi, 0, sizeof(*vi)); + if (!vi_info) + return; + + for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *desc = + &vi_info->pVertexBindingDescriptions[i]; + + assert(desc->binding < MESA_VK_MAX_VERTEX_BINDINGS); + assert(desc->stride <= MESA_VK_MAX_VERTEX_BINDING_STRIDE); + assert(desc->inputRate <= 1); + + const uint32_t b = desc->binding; + vi->bindings_valid |= BITFIELD_BIT(b); + vi->bindings[b].stride = desc->stride; + vi->bindings[b].input_rate = desc->inputRate; + vi->bindings[b].divisor = 1; + } + + for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &vi_info->pVertexAttributeDescriptions[i]; + + assert(desc->location < MESA_VK_MAX_VERTEX_ATTRIBUTES); + assert(desc->binding < MESA_VK_MAX_VERTEX_BINDINGS); + assert(vi->bindings_valid & BITFIELD_BIT(desc->binding)); + + const uint32_t a = desc->location; + vi->attributes_valid |= BITFIELD_BIT(a); + vi->attributes[a].binding = desc->binding; + vi->attributes[a].format = desc->format; + vi->attributes[a].offset = desc->offset; + } + + const VkPipelineVertexInputDivisorStateCreateInfoKHR *vi_div_state = + vk_find_struct_const(vi_info->pNext, + PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_KHR); + if (vi_div_state) { + for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) { + const VkVertexInputBindingDivisorDescriptionKHR *desc = + &vi_div_state->pVertexBindingDivisors[i]; + + assert(desc->binding < MESA_VK_MAX_VERTEX_BINDINGS); + assert(vi->bindings_valid & BITFIELD_BIT(desc->binding)); + + const uint32_t b = desc->binding; + vi->bindings[b].divisor = desc->divisor; + } + } +} + +static void +vk_dynamic_graphics_state_init_vi(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_vertex_input_state *vi) +{ + if (IS_NEEDED(VI)) + *dst->vi = *vi; + + if (IS_NEEDED(VI_BINDINGS_VALID)) + dst->vi_bindings_valid = vi->bindings_valid; + + if (IS_NEEDED(VI_BINDING_STRIDES)) { + for (uint32_t b = 0; b < MESA_VK_MAX_VERTEX_BINDINGS; b++) { + if (vi->bindings_valid & BITFIELD_BIT(b)) + dst->vi_binding_strides[b] = vi->bindings[b].stride; + else + dst->vi_binding_strides[b] = 0; + } + } +} + +static void +vk_input_assembly_state_init(struct vk_input_assembly_state *ia, + const BITSET_WORD *dynamic, + const VkPipelineInputAssemblyStateCreateInfo *ia_info) +{ + memset(ia, 0, sizeof(*ia)); + if (!ia_info) + return; + + /* From the Vulkan 1.3.224 spec: + * + * "VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY specifies that the topology + * state in VkPipelineInputAssemblyStateCreateInfo only specifies the + * topology class, and the specific topology order and adjacency must be + * set dynamically with vkCmdSetPrimitiveTopology before any drawing + * commands." + */ + assert(ia_info->topology <= UINT8_MAX); + ia->primitive_topology = ia_info->topology; + + ia->primitive_restart_enable = ia_info->primitiveRestartEnable; +} + +static void +vk_dynamic_graphics_state_init_ia(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_input_assembly_state *ia) +{ + dst->ia = *ia; +} + +static void +vk_tessellation_state_init(struct vk_tessellation_state *ts, + const BITSET_WORD *dynamic, + const VkPipelineTessellationStateCreateInfo *ts_info) +{ + *ts = (struct vk_tessellation_state) { + .domain_origin = VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT, + }; + if (!ts_info) + return; + + if (!IS_DYNAMIC(TS_PATCH_CONTROL_POINTS)) { + assert(ts_info->patchControlPoints <= UINT8_MAX); + ts->patch_control_points = ts_info->patchControlPoints; + } + + if (!IS_DYNAMIC(TS_DOMAIN_ORIGIN)) { + const VkPipelineTessellationDomainOriginStateCreateInfo *ts_do_info = + vk_find_struct_const(ts_info->pNext, + PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO); + if (ts_do_info != NULL) { + assert(ts_do_info->domainOrigin <= UINT8_MAX); + ts->domain_origin = ts_do_info->domainOrigin; + } + } +} + +static void +vk_dynamic_graphics_state_init_ts(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_tessellation_state *ts) +{ + dst->ts = *ts; +} + +static void +vk_viewport_state_init(struct vk_viewport_state *vp, + const BITSET_WORD *dynamic, + const VkPipelineViewportStateCreateInfo *vp_info) +{ + memset(vp, 0, sizeof(*vp)); + if (!vp_info) + return; + + if (!IS_DYNAMIC(VP_VIEWPORT_COUNT)) { + assert(vp_info->viewportCount <= MESA_VK_MAX_VIEWPORTS); + vp->viewport_count = vp_info->viewportCount; + } + + if (!IS_DYNAMIC(VP_VIEWPORTS)) { + assert(!IS_DYNAMIC(VP_VIEWPORT_COUNT)); + typed_memcpy(vp->viewports, vp_info->pViewports, + vp_info->viewportCount); + } + + if (!IS_DYNAMIC(VP_SCISSOR_COUNT)) { + assert(vp_info->scissorCount <= MESA_VK_MAX_SCISSORS); + vp->scissor_count = vp_info->scissorCount; + } + + if (!IS_DYNAMIC(VP_SCISSORS)) { + assert(!IS_DYNAMIC(VP_SCISSOR_COUNT)); + typed_memcpy(vp->scissors, vp_info->pScissors, + vp_info->scissorCount); + } + + if (!IS_DYNAMIC(VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) { + const VkPipelineViewportDepthClipControlCreateInfoEXT *vp_dcc_info = + vk_find_struct_const(vp_info->pNext, + PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT); + if (vp_dcc_info != NULL) + vp->depth_clip_negative_one_to_one = vp_dcc_info->negativeOneToOne; + } +} + +static void +vk_dynamic_graphics_state_init_vp(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_viewport_state *vp) +{ + dst->vp.viewport_count = vp->viewport_count; + if (IS_NEEDED(VP_VIEWPORTS)) + typed_memcpy(dst->vp.viewports, vp->viewports, vp->viewport_count); + + dst->vp.scissor_count = vp->scissor_count; + if (IS_NEEDED(VP_SCISSORS)) + typed_memcpy(dst->vp.scissors, vp->scissors, vp->scissor_count); + + dst->vp.depth_clip_negative_one_to_one = vp->depth_clip_negative_one_to_one; +} + +static void +vk_discard_rectangles_state_init(struct vk_discard_rectangles_state *dr, + const BITSET_WORD *dynamic, + const VkPipelineDiscardRectangleStateCreateInfoEXT *dr_info) +{ + memset(dr, 0, sizeof(*dr)); + + if (dr_info == NULL) + return; + + assert(dr_info->discardRectangleCount <= MESA_VK_MAX_DISCARD_RECTANGLES); + dr->mode = dr_info->discardRectangleMode; + dr->rectangle_count = dr_info->discardRectangleCount; + + if (!IS_DYNAMIC(DR_RECTANGLES)) { + typed_memcpy(dr->rectangles, dr_info->pDiscardRectangles, + dr_info->discardRectangleCount); + } +} + +static void +vk_dynamic_graphics_state_init_dr(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_discard_rectangles_state *dr) +{ + dst->dr.enable = dr->rectangle_count > 0; + dst->dr.mode = dr->mode; + dst->dr.rectangle_count = dr->rectangle_count; + typed_memcpy(dst->dr.rectangles, dr->rectangles, dr->rectangle_count); +} + +static void +vk_rasterization_state_init(struct vk_rasterization_state *rs, + const BITSET_WORD *dynamic, + const VkPipelineRasterizationStateCreateInfo *rs_info) +{ + *rs = (struct vk_rasterization_state) { + .rasterizer_discard_enable = false, + .conservative_mode = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, + .extra_primitive_overestimation_size = 0.0f, + .rasterization_order_amd = VK_RASTERIZATION_ORDER_STRICT_AMD, + .provoking_vertex = VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + .line.mode = VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR, + .depth_clip_enable = IS_DYNAMIC(RS_DEPTH_CLAMP_ENABLE) ? VK_MESA_DEPTH_CLIP_ENABLE_NOT_CLAMP : VK_MESA_DEPTH_CLIP_ENABLE_FALSE, + .depth_bias.representation = VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORMAT_EXT, + .depth_bias.exact = false, + }; + if (!rs_info) + return; + + if (!IS_DYNAMIC(RS_RASTERIZER_DISCARD_ENABLE)) + rs->rasterizer_discard_enable = rs_info->rasterizerDiscardEnable; + + /* From the Vulkan 1.3.218 spec: + * + * "If VkPipelineRasterizationDepthClipStateCreateInfoEXT is present in + * the graphics pipeline state then depth clipping is disabled if + * VkPipelineRasterizationDepthClipStateCreateInfoEXT::depthClipEnable + * is VK_FALSE. Otherwise, if + * VkPipelineRasterizationDepthClipStateCreateInfoEXT is not present, + * depth clipping is disabled when + * VkPipelineRasterizationStateCreateInfo::depthClampEnable is VK_TRUE. + */ + if (!IS_DYNAMIC(RS_DEPTH_CLAMP_ENABLE)) { + rs->depth_clamp_enable = rs_info->depthClampEnable; + rs->depth_clip_enable = rs_info->depthClampEnable ? + VK_MESA_DEPTH_CLIP_ENABLE_FALSE : + VK_MESA_DEPTH_CLIP_ENABLE_TRUE; + } + + rs->polygon_mode = rs_info->polygonMode; + + rs->cull_mode = rs_info->cullMode; + rs->front_face = rs_info->frontFace; + rs->depth_bias.enable = rs_info->depthBiasEnable; + if ((rs_info->depthBiasEnable || IS_DYNAMIC(RS_DEPTH_BIAS_ENABLE)) && + !IS_DYNAMIC(RS_DEPTH_BIAS_FACTORS)) { + rs->depth_bias.constant = rs_info->depthBiasConstantFactor; + rs->depth_bias.clamp = rs_info->depthBiasClamp; + rs->depth_bias.slope = rs_info->depthBiasSlopeFactor; + } + rs->line.width = rs_info->lineWidth; + + vk_foreach_struct_const(ext, rs_info->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT: { + const VkPipelineRasterizationConservativeStateCreateInfoEXT *rcs_info = + (const VkPipelineRasterizationConservativeStateCreateInfoEXT *)ext; + rs->conservative_mode = rcs_info->conservativeRasterizationMode; + rs->extra_primitive_overestimation_size = + rcs_info->extraPrimitiveOverestimationSize; + break; + } + + case VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT: { + const VkPipelineRasterizationDepthClipStateCreateInfoEXT *rdc_info = + (const VkPipelineRasterizationDepthClipStateCreateInfoEXT *)ext; + rs->depth_clip_enable = rdc_info->depthClipEnable ? + VK_MESA_DEPTH_CLIP_ENABLE_TRUE : + VK_MESA_DEPTH_CLIP_ENABLE_FALSE; + break; + } + + case VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT: { + const VkPipelineRasterizationLineStateCreateInfoKHR *rl_info = + (const VkPipelineRasterizationLineStateCreateInfoKHR *)ext; + rs->line.mode = rl_info->lineRasterizationMode; + if (!IS_DYNAMIC(RS_LINE_STIPPLE_ENABLE)) + rs->line.stipple.enable = rl_info->stippledLineEnable; + if ((IS_DYNAMIC(RS_LINE_STIPPLE_ENABLE) || rs->line.stipple.enable) && !IS_DYNAMIC(RS_LINE_STIPPLE)) { + rs->line.stipple.factor = rl_info->lineStippleFactor; + rs->line.stipple.pattern = rl_info->lineStipplePattern; + } + break; + } + + case VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT: { + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *rpv_info = + (const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *)ext; + rs->provoking_vertex = rpv_info->provokingVertexMode; + break; + } + + case VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD: { + const VkPipelineRasterizationStateRasterizationOrderAMD *rro_info = + (const VkPipelineRasterizationStateRasterizationOrderAMD *)ext; + rs->rasterization_order_amd = rro_info->rasterizationOrder; + break; + } + + case VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT: { + const VkPipelineRasterizationStateStreamCreateInfoEXT *rss_info = + (const VkPipelineRasterizationStateStreamCreateInfoEXT *)ext; + rs->rasterization_stream = rss_info->rasterizationStream; + break; + } + + case VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT: { + const VkDepthBiasRepresentationInfoEXT *dbr_info = + (const VkDepthBiasRepresentationInfoEXT *)ext; + if (!IS_DYNAMIC(RS_DEPTH_BIAS_FACTORS)) { + rs->depth_bias.representation = dbr_info->depthBiasRepresentation; + rs->depth_bias.exact = dbr_info->depthBiasExact; + } + break; + } + + default: + break; + } + } +} + +static void +vk_dynamic_graphics_state_init_rs(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_rasterization_state *rs) +{ + dst->rs = *rs; +} + +static void +vk_fragment_shading_rate_state_init( + struct vk_fragment_shading_rate_state *fsr, + const BITSET_WORD *dynamic, + const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info) +{ + if (fsr_info != NULL) { + fsr->fragment_size = fsr_info->fragmentSize; + fsr->combiner_ops[0] = fsr_info->combinerOps[0]; + fsr->combiner_ops[1] = fsr_info->combinerOps[1]; + } else { + fsr->fragment_size = (VkExtent2D) { 1, 1 }; + fsr->combiner_ops[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; + fsr->combiner_ops[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; + } +} + +static void +vk_dynamic_graphics_state_init_fsr( + struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_fragment_shading_rate_state *fsr) +{ + dst->fsr = *fsr; +} + +static void +vk_sample_locations_state_init(struct vk_sample_locations_state *sl, + const VkSampleLocationsInfoEXT *sl_info) +{ + sl->per_pixel = sl_info->sampleLocationsPerPixel; + sl->grid_size = sl_info->sampleLocationGridSize; + + /* From the Vulkan 1.3.218 spec: + * + * VUID-VkSampleLocationsInfoEXT-sampleLocationsCount-01527 + * + * "sampleLocationsCount must equal sampleLocationsPerPixel * + * sampleLocationGridSize.width * sampleLocationGridSize.height" + */ + assert(sl_info->sampleLocationsCount == + sl_info->sampleLocationsPerPixel * + sl_info->sampleLocationGridSize.width * + sl_info->sampleLocationGridSize.height); + + assert(sl_info->sampleLocationsCount <= MESA_VK_MAX_SAMPLE_LOCATIONS); + typed_memcpy(sl->locations, sl_info->pSampleLocations, + sl_info->sampleLocationsCount); +} + +static void +vk_multisample_state_init(struct vk_multisample_state *ms, + const BITSET_WORD *dynamic, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + memset(ms, 0, sizeof(*ms)); + if (!ms_info) + return; + + if (!IS_DYNAMIC(MS_RASTERIZATION_SAMPLES)) { + assert(ms_info->rasterizationSamples <= MESA_VK_MAX_SAMPLES); + ms->rasterization_samples = ms_info->rasterizationSamples; + } + + ms->sample_shading_enable = ms_info->sampleShadingEnable; + ms->min_sample_shading = ms_info->minSampleShading; + + /* From the Vulkan 1.3.218 spec: + * + * "If pSampleMask is NULL, it is treated as if the mask has all bits + * set to 1." + */ + ms->sample_mask = ms_info->pSampleMask ? *ms_info->pSampleMask : ~0; + + ms->alpha_to_coverage_enable = ms_info->alphaToCoverageEnable; + ms->alpha_to_one_enable = ms_info->alphaToOneEnable; + + /* These get filled in by vk_multisample_sample_locations_state_init() */ + ms->sample_locations_enable = false; + ms->sample_locations = NULL; +} + +static bool +needs_sample_locations_state( + const BITSET_WORD *dynamic, + const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info) +{ + return !IS_DYNAMIC(MS_SAMPLE_LOCATIONS) && + (IS_DYNAMIC(MS_SAMPLE_LOCATIONS_ENABLE) || + (sl_info != NULL && sl_info->sampleLocationsEnable)); +} + +static void +vk_multisample_sample_locations_state_init( + struct vk_multisample_state *ms, + struct vk_sample_locations_state *sl, + const BITSET_WORD *dynamic, + const VkPipelineMultisampleStateCreateInfo *ms_info, + const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info) +{ + ms->sample_locations_enable = + IS_DYNAMIC(MS_SAMPLE_LOCATIONS_ENABLE) || + (sl_info != NULL && sl_info->sampleLocationsEnable); + + assert(ms->sample_locations == NULL); + if (!IS_DYNAMIC(MS_SAMPLE_LOCATIONS)) { + if (ms->sample_locations_enable) { + vk_sample_locations_state_init(sl, &sl_info->sampleLocationsInfo); + ms->sample_locations = sl; + } else if (!IS_DYNAMIC(MS_RASTERIZATION_SAMPLES)) { + /* Otherwise, pre-populate with the standard sample locations. If + * the driver doesn't support standard sample locations, it probably + * doesn't support custom locations either and can completely ignore + * this state. + */ + ms->sample_locations = + vk_standard_sample_locations_state(ms_info->rasterizationSamples); + } + /* In the case that the rasterization samples are dynamic we cannot + * pre-populate with a specific set of standard sample locations + */ + } +} + +static void +vk_dynamic_graphics_state_init_ms(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_multisample_state *ms) +{ + dst->ms.rasterization_samples = ms->rasterization_samples; + dst->ms.sample_mask = ms->sample_mask; + dst->ms.alpha_to_coverage_enable = ms->alpha_to_coverage_enable; + dst->ms.alpha_to_one_enable = ms->alpha_to_one_enable; + dst->ms.sample_locations_enable = ms->sample_locations_enable; + + if (IS_NEEDED(MS_SAMPLE_LOCATIONS) && ms->sample_locations) + *dst->ms.sample_locations = *ms->sample_locations; +} + +static void +vk_stencil_test_face_state_init(struct vk_stencil_test_face_state *face, + const VkStencilOpState *info) +{ + face->op.fail = info->failOp; + face->op.pass = info->passOp; + face->op.depth_fail = info->depthFailOp; + face->op.compare = info->compareOp; + face->compare_mask = info->compareMask; + face->write_mask = info->writeMask; + face->reference = info->reference; +} + +static void +vk_depth_stencil_state_init(struct vk_depth_stencil_state *ds, + const BITSET_WORD *dynamic, + const VkPipelineDepthStencilStateCreateInfo *ds_info) +{ + *ds = (struct vk_depth_stencil_state) { + .stencil.write_enable = true, + }; + if (!ds_info) + return; + + ds->depth.test_enable = ds_info->depthTestEnable; + ds->depth.write_enable = ds_info->depthWriteEnable; + ds->depth.compare_op = ds_info->depthCompareOp; + ds->depth.bounds_test.enable = ds_info->depthBoundsTestEnable; + ds->depth.bounds_test.min = ds_info->minDepthBounds; + ds->depth.bounds_test.max = ds_info->maxDepthBounds; + ds->stencil.test_enable = ds_info->stencilTestEnable; + vk_stencil_test_face_state_init(&ds->stencil.front, &ds_info->front); + vk_stencil_test_face_state_init(&ds->stencil.back, &ds_info->back); +} + +static void +vk_dynamic_graphics_state_init_ds(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_depth_stencil_state *ds) +{ + dst->ds = *ds; +} + +static bool +optimize_stencil_face(struct vk_stencil_test_face_state *face, + VkCompareOp depthCompareOp, + bool consider_write_mask) +{ + /* If compareOp is ALWAYS then the stencil test will never fail and failOp + * will never happen. Set failOp to KEEP in this case. + */ + if (face->op.compare == VK_COMPARE_OP_ALWAYS) + face->op.fail = VK_STENCIL_OP_KEEP; + + /* If compareOp is NEVER or depthCompareOp is NEVER then one of the depth + * or stencil tests will fail and passOp will never happen. + */ + if (face->op.compare == VK_COMPARE_OP_NEVER || + depthCompareOp == VK_COMPARE_OP_NEVER) + face->op.pass = VK_STENCIL_OP_KEEP; + + /* If compareOp is NEVER or depthCompareOp is ALWAYS then either the + * stencil test will fail or the depth test will pass. In either case, + * depthFailOp will never happen. + */ + if (face->op.compare == VK_COMPARE_OP_NEVER || + depthCompareOp == VK_COMPARE_OP_ALWAYS) + face->op.depth_fail = VK_STENCIL_OP_KEEP; + + /* If the write mask is zero, nothing will be written to the stencil buffer + * so it's as if all operations are KEEP. + */ + if (consider_write_mask && face->write_mask == 0) { + face->op.pass = VK_STENCIL_OP_KEEP; + face->op.fail = VK_STENCIL_OP_KEEP; + face->op.depth_fail = VK_STENCIL_OP_KEEP; + } + + return face->op.fail != VK_STENCIL_OP_KEEP || + face->op.depth_fail != VK_STENCIL_OP_KEEP || + face->op.pass != VK_STENCIL_OP_KEEP; +} + +void +vk_optimize_depth_stencil_state(struct vk_depth_stencil_state *ds, + VkImageAspectFlags ds_aspects, + bool consider_write_mask) +{ + /* stencil.write_enable is a dummy right now that should always be true */ + assert(ds->stencil.write_enable); + + /* From the Vulkan 1.3.221 spec: + * + * "If there is no depth attachment then the depth test is skipped." + */ + if (!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) + ds->depth.test_enable = false; + + /* From the Vulkan 1.3.221 spec: + * + * "...or if there is no stencil attachment, the coverage mask is + * unmodified by this operation." + */ + if (!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT)) + ds->stencil.test_enable = false; + + /* If the depth test is disabled, we won't be writing anything. Make sure we + * treat the test as always passing later on as well. + */ + if (!ds->depth.test_enable) { + ds->depth.write_enable = false; + ds->depth.compare_op = VK_COMPARE_OP_ALWAYS; + } + + /* If the stencil test is disabled, we won't be writing anything. Make sure + * we treat the test as always passing later on as well. + */ + if (!ds->stencil.test_enable) { + ds->stencil.write_enable = false; + ds->stencil.front.op.compare = VK_COMPARE_OP_ALWAYS; + ds->stencil.back.op.compare = VK_COMPARE_OP_ALWAYS; + } + + /* If the stencil test is enabled and always fails, then we will never get + * to the depth test so we can just disable the depth test entirely. + */ + if (ds->stencil.test_enable && + ds->stencil.front.op.compare == VK_COMPARE_OP_NEVER && + ds->stencil.back.op.compare == VK_COMPARE_OP_NEVER) { + ds->depth.test_enable = false; + ds->depth.write_enable = false; + } + + /* If depthCompareOp is EQUAL then the value we would be writing to the + * depth buffer is the same as the value that's already there so there's no + * point in writing it. + */ + if (ds->depth.compare_op == VK_COMPARE_OP_EQUAL) + ds->depth.write_enable = false; + + /* If the stencil ops are such that we don't actually ever modify the + * stencil buffer, we should disable writes. + */ + if (!optimize_stencil_face(&ds->stencil.front, ds->depth.compare_op, + consider_write_mask) && + !optimize_stencil_face(&ds->stencil.back, ds->depth.compare_op, + consider_write_mask)) + ds->stencil.write_enable = false; + + /* If the depth test always passes and we never write out depth, that's the + * same as if the depth test is disabled entirely. + */ + if (ds->depth.compare_op == VK_COMPARE_OP_ALWAYS && !ds->depth.write_enable) + ds->depth.test_enable = false; + + /* If the stencil test always passes and we never write out stencil, that's + * the same as if the stencil test is disabled entirely. + */ + if (ds->stencil.front.op.compare == VK_COMPARE_OP_ALWAYS && + ds->stencil.back.op.compare == VK_COMPARE_OP_ALWAYS && + !ds->stencil.write_enable) + ds->stencil.test_enable = false; +} + +static void +vk_color_blend_state_init(struct vk_color_blend_state *cb, + const BITSET_WORD *dynamic, + const VkPipelineColorBlendStateCreateInfo *cb_info) +{ + *cb = (struct vk_color_blend_state) { + .color_write_enables = BITFIELD_MASK(MESA_VK_MAX_COLOR_ATTACHMENTS), + }; + if (!cb_info) + return; + + cb->logic_op_enable = cb_info->logicOpEnable; + cb->logic_op = cb_info->logicOp; + + assert(cb_info->attachmentCount <= MESA_VK_MAX_COLOR_ATTACHMENTS); + cb->attachment_count = cb_info->attachmentCount; + /* pAttachments is ignored if any of these is not set */ + bool full_dynamic = IS_DYNAMIC(CB_BLEND_ENABLES) && IS_DYNAMIC(CB_BLEND_EQUATIONS) && IS_DYNAMIC(CB_WRITE_MASKS); + for (uint32_t a = 0; a < cb_info->attachmentCount; a++) { + const VkPipelineColorBlendAttachmentState *att = full_dynamic ? NULL : &cb_info->pAttachments[a]; + + cb->attachments[a] = (struct vk_color_blend_attachment_state) { + .blend_enable = IS_DYNAMIC(CB_BLEND_ENABLES) || att->blendEnable, + .src_color_blend_factor = IS_DYNAMIC(CB_BLEND_EQUATIONS) ? 0 : att->srcColorBlendFactor, + .dst_color_blend_factor = IS_DYNAMIC(CB_BLEND_EQUATIONS) ? 0 : att->dstColorBlendFactor, + .src_alpha_blend_factor = IS_DYNAMIC(CB_BLEND_EQUATIONS) ? 0 : att->srcAlphaBlendFactor, + .dst_alpha_blend_factor = IS_DYNAMIC(CB_BLEND_EQUATIONS) ? 0 : att->dstAlphaBlendFactor, + .write_mask = IS_DYNAMIC(CB_WRITE_MASKS) ? 0xf : att->colorWriteMask, + .color_blend_op = IS_DYNAMIC(CB_BLEND_EQUATIONS) ? 0 : att->colorBlendOp, + .alpha_blend_op = IS_DYNAMIC(CB_BLEND_EQUATIONS) ? 0 : att->alphaBlendOp, + }; + } + + for (uint32_t i = 0; i < 4; i++) + cb->blend_constants[i] = cb_info->blendConstants[i]; + + const VkPipelineColorWriteCreateInfoEXT *cw_info = + vk_find_struct_const(cb_info->pNext, PIPELINE_COLOR_WRITE_CREATE_INFO_EXT); + if (!IS_DYNAMIC(CB_COLOR_WRITE_ENABLES) && cw_info != NULL) { + uint8_t color_write_enables = 0; + assert(cb_info->attachmentCount == cw_info->attachmentCount); + for (uint32_t a = 0; a < cw_info->attachmentCount; a++) { + if (cw_info->pColorWriteEnables[a]) + color_write_enables |= BITFIELD_BIT(a); + } + cb->color_write_enables = color_write_enables; + } else { + cb->color_write_enables = BITFIELD_MASK(MESA_VK_MAX_COLOR_ATTACHMENTS); + } +} + +static void +vk_input_attachment_location_state_init(struct vk_input_attachment_location_state *ial, + const BITSET_WORD *dynamic, + const VkRenderingInputAttachmentIndexInfoKHR *ial_info) +{ + *ial = (struct vk_input_attachment_location_state) { + .color_map = { 0, 1, 2, 3, 4, 5, 6, 7 }, + .depth_att = MESA_VK_ATTACHMENT_UNUSED, + .stencil_att = MESA_VK_ATTACHMENT_UNUSED, + }; + if (!ial_info) + return; + + for (uint32_t a = 0; a < MIN2(ial_info->colorAttachmentCount, + MESA_VK_MAX_COLOR_ATTACHMENTS); a++) { + ial->color_map[a] = + ial_info->pColorAttachmentInputIndices[a] == VK_ATTACHMENT_UNUSED ? + MESA_VK_ATTACHMENT_UNUSED : ial_info->pColorAttachmentInputIndices[a]; + } + ial->depth_att = ial_info->pDepthInputAttachmentIndex != NULL ? + *ial_info->pDepthInputAttachmentIndex : MESA_VK_ATTACHMENT_UNUSED; + ial->stencil_att = ial_info->pStencilInputAttachmentIndex != NULL ? + *ial_info->pStencilInputAttachmentIndex : MESA_VK_ATTACHMENT_UNUSED; +} + +static void +vk_color_attachment_location_state_init(struct vk_color_attachment_location_state *cal, + const BITSET_WORD *dynamic, + const VkRenderingAttachmentLocationInfoKHR *cal_info) +{ + *cal = (struct vk_color_attachment_location_state) { + .color_map = { 0, 1, 2, 3, 4, 5, 6, 7 }, + }; + if (!cal_info) + return; + + for (uint32_t a = 0; a < MIN2(cal_info->colorAttachmentCount, + MESA_VK_MAX_COLOR_ATTACHMENTS); a++) { + cal->color_map[a] = + cal_info->pColorAttachmentLocations[a] == VK_ATTACHMENT_UNUSED ? + MESA_VK_ATTACHMENT_UNUSED : cal_info->pColorAttachmentLocations[a]; + } +} + +static void +vk_dynamic_graphics_state_init_cb(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_color_blend_state *cb) +{ + dst->cb.logic_op_enable = cb->logic_op_enable; + dst->cb.logic_op = cb->logic_op; + dst->cb.color_write_enables = cb->color_write_enables; + dst->cb.attachment_count = cb->attachment_count; + + if (IS_NEEDED(CB_BLEND_ENABLES) || + IS_NEEDED(CB_BLEND_EQUATIONS) || + IS_NEEDED(CB_WRITE_MASKS)) { + typed_memcpy(dst->cb.attachments, cb->attachments, cb->attachment_count); + } + + if (IS_NEEDED(CB_BLEND_CONSTANTS)) + typed_memcpy(dst->cb.blend_constants, cb->blend_constants, 4); +} + +static void +vk_dynamic_graphics_state_init_ial(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_input_attachment_location_state *ial) +{ + if (IS_NEEDED(INPUT_ATTACHMENT_MAP)) { + typed_memcpy(dst->ial.color_map, ial->color_map, MESA_VK_MAX_COLOR_ATTACHMENTS); + dst->ial.depth_att = ial->depth_att; + dst->ial.stencil_att = ial->stencil_att; + } +} + +static void +vk_dynamic_graphics_state_init_cal(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_color_attachment_location_state *cal) +{ + if (IS_NEEDED(COLOR_ATTACHMENT_MAP)) + typed_memcpy(dst->cal.color_map, cal->color_map, MESA_VK_MAX_COLOR_ATTACHMENTS); +} + +static void +vk_pipeline_flags_init(struct vk_graphics_pipeline_state *state, + VkPipelineCreateFlags2KHR driver_rp_flags, + bool has_driver_rp, + const VkGraphicsPipelineCreateInfo *info, + const BITSET_WORD *dynamic, + VkGraphicsPipelineLibraryFlagsEXT lib) +{ + VkPipelineCreateFlags2KHR valid_pipeline_flags = 0; + VkPipelineCreateFlags2KHR valid_renderpass_flags = 0; + if (lib & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { + valid_renderpass_flags |= + VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR | + VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT; + valid_pipeline_flags |= + VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR | + VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT; + } + if (lib & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) { + valid_renderpass_flags |= + VK_PIPELINE_CREATE_2_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT | + VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + if (!IS_DYNAMIC(ATTACHMENT_FEEDBACK_LOOP_ENABLE)) { + valid_pipeline_flags |= + VK_PIPELINE_CREATE_2_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT | + VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + } + } + const VkPipelineCreateFlags2KHR renderpass_flags = + (has_driver_rp ? driver_rp_flags : + vk_get_pipeline_rendering_flags(info)) & valid_renderpass_flags; + + const VkPipelineCreateFlags2KHR pipeline_flags = + vk_graphics_pipeline_create_flags(info) & valid_pipeline_flags; + + bool pipeline_feedback_loop = pipeline_flags & + (VK_PIPELINE_CREATE_2_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT | + VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT); + + bool renderpass_feedback_loop = renderpass_flags & + (VK_PIPELINE_CREATE_2_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT | + VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT); + + state->pipeline_flags |= renderpass_flags | pipeline_flags; + state->feedback_loop_not_input_only |= + pipeline_feedback_loop || (!has_driver_rp && renderpass_feedback_loop); +} + +static void +vk_render_pass_state_init(struct vk_render_pass_state *rp, + const struct vk_render_pass_state *old_rp, + const struct vk_render_pass_state *driver_rp, + const VkGraphicsPipelineCreateInfo *info, + VkGraphicsPipelineLibraryFlagsEXT lib) +{ + /* If we already have render pass state and it has attachment info, then + * it's complete and we don't need a new one. The one caveat here is that + * we may need to add in some rendering flags. + */ + if (old_rp != NULL && vk_render_pass_state_has_attachment_info(old_rp)) { + *rp = *old_rp; + return; + } + + *rp = (struct vk_render_pass_state) { + .depth_attachment_format = VK_FORMAT_UNDEFINED, + .stencil_attachment_format = VK_FORMAT_UNDEFINED, + }; + + if (info->renderPass != VK_NULL_HANDLE && driver_rp != NULL) { + *rp = *driver_rp; + return; + } + + const VkPipelineRenderingCreateInfo *r_info = + vk_get_pipeline_rendering_create_info(info); + + if (r_info == NULL) + return; + + rp->view_mask = r_info->viewMask; + + /* From the Vulkan 1.3.218 spec description of pre-rasterization state: + * + * "Fragment shader state is defined by: + * ... + * * VkRenderPass and subpass parameter + * * The viewMask parameter of VkPipelineRenderingCreateInfo (formats + * are ignored)" + * + * The description of fragment shader state contains identical text. + * + * If we have a render pass then we have full information. Even if we're + * dynamic-rendering-only, the presence of a render pass means the + * rendering info came from a vk_render_pass and is therefore complete. + * Otherwise, all we can grab is the view mask and we have to leave the + * rest for later. + */ + if (info->renderPass == VK_NULL_HANDLE && + !(lib & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) { + rp->attachments = MESA_VK_RP_ATTACHMENT_INFO_INVALID; + return; + } + + assert(r_info->colorAttachmentCount <= MESA_VK_MAX_COLOR_ATTACHMENTS); + rp->color_attachment_count = r_info->colorAttachmentCount; + for (uint32_t i = 0; i < r_info->colorAttachmentCount; i++) { + rp->color_attachment_formats[i] = r_info->pColorAttachmentFormats[i]; + if (r_info->pColorAttachmentFormats[i] != VK_FORMAT_UNDEFINED) + rp->attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(i); + } + + rp->depth_attachment_format = r_info->depthAttachmentFormat; + if (r_info->depthAttachmentFormat != VK_FORMAT_UNDEFINED) + rp->attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT; + + rp->stencil_attachment_format = r_info->stencilAttachmentFormat; + if (r_info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) + rp->attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT; + + const VkAttachmentSampleCountInfoAMD *asc_info = + vk_get_pipeline_sample_count_info_amd(info); + if (asc_info != NULL) { + assert(asc_info->colorAttachmentCount == rp->color_attachment_count); + for (uint32_t i = 0; i < asc_info->colorAttachmentCount; i++) { + rp->color_attachment_samples[i] = asc_info->pColorAttachmentSamples[i]; + } + + rp->depth_stencil_attachment_samples = asc_info->depthStencilAttachmentSamples; + } +} + +static void +vk_dynamic_graphics_state_init_rp(struct vk_dynamic_graphics_state *dst, + const BITSET_WORD *needed, + const struct vk_render_pass_state *rp) +{ + dst->rp.attachments = rp->attachments; +} + +#define FOREACH_STATE_GROUP(f) \ + f(MESA_VK_GRAPHICS_STATE_VERTEX_INPUT_BIT, \ + vk_vertex_input_state, vi); \ + f(MESA_VK_GRAPHICS_STATE_INPUT_ASSEMBLY_BIT, \ + vk_input_assembly_state, ia); \ + f(MESA_VK_GRAPHICS_STATE_TESSELLATION_BIT, \ + vk_tessellation_state, ts); \ + f(MESA_VK_GRAPHICS_STATE_VIEWPORT_BIT, \ + vk_viewport_state, vp); \ + f(MESA_VK_GRAPHICS_STATE_DISCARD_RECTANGLES_BIT, \ + vk_discard_rectangles_state, dr); \ + f(MESA_VK_GRAPHICS_STATE_RASTERIZATION_BIT, \ + vk_rasterization_state, rs); \ + f(MESA_VK_GRAPHICS_STATE_FRAGMENT_SHADING_RATE_BIT, \ + vk_fragment_shading_rate_state, fsr); \ + f(MESA_VK_GRAPHICS_STATE_MULTISAMPLE_BIT, \ + vk_multisample_state, ms); \ + f(MESA_VK_GRAPHICS_STATE_DEPTH_STENCIL_BIT, \ + vk_depth_stencil_state, ds); \ + f(MESA_VK_GRAPHICS_STATE_COLOR_BLEND_BIT, \ + vk_color_blend_state, cb); \ + f(MESA_VK_GRAPHICS_STATE_INPUT_ATTACHMENT_MAP_BIT, \ + vk_input_attachment_location_state, ial); \ + f(MESA_VK_GRAPHICS_STATE_COLOR_ATTACHMENT_MAP_BIT, \ + vk_color_attachment_location_state, cal); \ + f(MESA_VK_GRAPHICS_STATE_RENDER_PASS_BIT, \ + vk_render_pass_state, rp); + +static enum mesa_vk_graphics_state_groups +vk_graphics_pipeline_state_groups(const struct vk_graphics_pipeline_state *state) +{ + /* For now, we just validate dynamic state */ + enum mesa_vk_graphics_state_groups groups = 0; + +#define FILL_HAS(STATE, type, s) \ + if (state->s != NULL) groups |= STATE + + FOREACH_STATE_GROUP(FILL_HAS) + +#undef FILL_HAS + + return groups | fully_dynamic_state_groups(state->dynamic); +} + +void +vk_graphics_pipeline_get_state(const struct vk_graphics_pipeline_state *state, + BITSET_WORD *set_state_out) +{ + /* For now, we just validate dynamic state */ + enum mesa_vk_graphics_state_groups groups = 0; + +#define FILL_HAS(STATE, type, s) \ + if (state->s != NULL) groups |= STATE + + FOREACH_STATE_GROUP(FILL_HAS) + +#undef FILL_HAS + + BITSET_DECLARE(set_state, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + get_dynamic_state_groups(set_state, groups); + BITSET_ANDNOT(set_state, set_state, state->dynamic); + memcpy(set_state_out, set_state, sizeof(set_state)); +} + +static void +vk_graphics_pipeline_state_validate(const struct vk_graphics_pipeline_state *state) +{ +#ifndef NDEBUG + /* For now, we just validate dynamic state */ + enum mesa_vk_graphics_state_groups groups = + vk_graphics_pipeline_state_groups(state); + validate_dynamic_state_groups(state->dynamic, groups); +#endif +} + +static bool +may_have_rasterization(const struct vk_graphics_pipeline_state *state, + const BITSET_WORD *dynamic, + const VkGraphicsPipelineCreateInfo *info) +{ + if (state->rs) { + /* We default rasterizer_discard_enable to false when dynamic */ + return !state->rs->rasterizer_discard_enable; + } else { + return IS_DYNAMIC(RS_RASTERIZER_DISCARD_ENABLE) || + !info->pRasterizationState->rasterizerDiscardEnable; + } +} + +VkResult +vk_graphics_pipeline_state_fill(const struct vk_device *device, + struct vk_graphics_pipeline_state *state, + const VkGraphicsPipelineCreateInfo *info, + const struct vk_render_pass_state *driver_rp, + VkPipelineCreateFlags2KHR driver_rp_flags, + struct vk_graphics_pipeline_all_state *all, + const VkAllocationCallbacks *alloc, + VkSystemAllocationScope scope, + void **alloc_ptr_out) +{ + vk_graphics_pipeline_state_validate(state); + + BITSET_DECLARE(dynamic, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + vk_get_dynamic_graphics_states(dynamic, info->pDynamicState); + + /* + * First, figure out which library-level shader/state groups we need + */ + + VkGraphicsPipelineLibraryFlagsEXT lib; + const VkGraphicsPipelineLibraryCreateInfoEXT *gpl_info = + vk_find_struct_const(info->pNext, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT); + const VkPipelineLibraryCreateInfoKHR *lib_info = + vk_find_struct_const(info->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR); + + VkPipelineCreateFlags2KHR pipeline_flags = vk_graphics_pipeline_create_flags(info); + + VkShaderStageFlagBits allowed_stages; + if (!(pipeline_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) { + allowed_stages = VK_SHADER_STAGE_ALL_GRAPHICS | + VK_SHADER_STAGE_TASK_BIT_EXT | + VK_SHADER_STAGE_MESH_BIT_EXT; + } else if (gpl_info) { + allowed_stages = 0; + + /* If we're creating a pipeline library without pre-rasterization, + * discard all the associated stages. + */ + if (gpl_info->flags & + VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { + allowed_stages |= (VK_SHADER_STAGE_VERTEX_BIT | + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | + VK_SHADER_STAGE_GEOMETRY_BIT | + VK_SHADER_STAGE_TASK_BIT_EXT | + VK_SHADER_STAGE_MESH_BIT_EXT); + } + + /* If we're creating a pipeline library without fragment shader, + * discard that stage. + */ + if (gpl_info->flags & + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) + allowed_stages |= VK_SHADER_STAGE_FRAGMENT_BIT; + } else { + /* VkGraphicsPipelineLibraryCreateInfoEXT was omitted, flags should + * be assumed to be empty and therefore no shader stage should be + * considered. + */ + allowed_stages = 0; + } + + for (uint32_t i = 0; i < info->stageCount; i++) { + state->shader_stages |= info->pStages[i].stage & allowed_stages; + } + + /* In case we return early */ + if (alloc_ptr_out != NULL) + *alloc_ptr_out = NULL; + + if (gpl_info) { + lib = gpl_info->flags; + } else if ((lib_info && lib_info->libraryCount > 0) || + (pipeline_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) { + /* + * From the Vulkan 1.3.210 spec: + * "If this structure is omitted, and either VkGraphicsPipelineCreateInfo::flags + * includes VK_PIPELINE_CREATE_LIBRARY_BIT_KHR or the + * VkGraphicsPipelineCreateInfo::pNext chain includes a + * VkPipelineLibraryCreateInfoKHR structure with a libraryCount greater than 0, + * it is as if flags is 0. Otherwise if this structure is omitted, it is as if + * flags includes all possible subsets of the graphics pipeline." + */ + lib = 0; + } else { + /* We're building a complete pipeline. From the Vulkan 1.3.218 spec: + * + * "A complete graphics pipeline always includes pre-rasterization + * shader state, with other subsets included depending on that state. + * If the pre-rasterization shader state includes a vertex shader, + * then vertex input state is included in a complete graphics + * pipeline. If the value of + * VkPipelineRasterizationStateCreateInfo::rasterizerDiscardEnable in + * the pre-rasterization shader state is VK_FALSE or the + * VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE dynamic state is + * enabled fragment shader state and fragment output interface state + * is included in a complete graphics pipeline." + */ + lib = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT; + + if (state->shader_stages & VK_SHADER_STAGE_VERTEX_BIT) + lib |= VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT; + + if (may_have_rasterization(state, dynamic, info)) { + lib |= VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT; + lib |= VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT; + } + } + + /* + * Next, turn those into individual states. Among other things, this + * de-duplicates things like FSR and multisample state which appear in + * multiple library groups. + */ + + enum mesa_vk_graphics_state_groups needs = 0; + if (lib & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) { + needs |= MESA_VK_GRAPHICS_STATE_VERTEX_INPUT_BIT; + needs |= MESA_VK_GRAPHICS_STATE_INPUT_ASSEMBLY_BIT; + } + + /* Other stuff potentially depends on this so gather it early */ + struct vk_render_pass_state rp; + if (lib & (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) { + vk_render_pass_state_init(&rp, state->rp, driver_rp, info, lib); + + needs |= MESA_VK_GRAPHICS_STATE_RENDER_PASS_BIT; + + /* If the old state was incomplete but the new one isn't, set state->rp + * to NULL so it gets replaced with the new version. + */ + if (state->rp != NULL && + !vk_render_pass_state_has_attachment_info(state->rp) && + !vk_render_pass_state_has_attachment_info(&rp)) + state->rp = NULL; + } + + if (lib & (VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) { + vk_pipeline_flags_init(state, driver_rp_flags, !!driver_rp, info, dynamic, lib); + } + + if (lib & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { + /* From the Vulkan 1.3.218 spec: + * + * VUID-VkGraphicsPipelineCreateInfo-stage-02096 + * + * "If the pipeline is being created with pre-rasterization shader + * state the stage member of one element of pStages must be either + * VK_SHADER_STAGE_VERTEX_BIT or VK_SHADER_STAGE_MESH_BIT_EXT" + */ + assert(state->shader_stages & (VK_SHADER_STAGE_VERTEX_BIT | + VK_SHADER_STAGE_MESH_BIT_EXT)); + + if (state->shader_stages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) + needs |= MESA_VK_GRAPHICS_STATE_TESSELLATION_BIT; + + if (may_have_rasterization(state, dynamic, info)) + needs |= MESA_VK_GRAPHICS_STATE_VIEWPORT_BIT; + + needs |= MESA_VK_GRAPHICS_STATE_DISCARD_RECTANGLES_BIT; + needs |= MESA_VK_GRAPHICS_STATE_RASTERIZATION_BIT; + needs |= MESA_VK_GRAPHICS_STATE_FRAGMENT_SHADING_RATE_BIT; + } + + if (lib & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { + needs |= MESA_VK_GRAPHICS_STATE_FRAGMENT_SHADING_RATE_BIT; + + /* From the Vulkan 1.3.218 spec: + * + * "Fragment shader state is defined by: + * ... + * - VkPipelineMultisampleStateCreateInfo if sample shading is + * enabled or renderpass is not VK_NULL_HANDLE" + * + * and + * + * VUID-VkGraphicsPipelineCreateInfo-pMultisampleState-06629 + * + * "If the pipeline is being created with fragment shader state + * pMultisampleState must be NULL or a valid pointer to a valid + * VkPipelineMultisampleStateCreateInfo structure" + * + * so we can reliably detect when to include it based on the + * pMultisampleState pointer. + */ + if (info->pMultisampleState != NULL) + needs |= MESA_VK_GRAPHICS_STATE_MULTISAMPLE_BIT; + + /* From the Vulkan 1.3.218 spec: + * + * VUID-VkGraphicsPipelineCreateInfo-renderPass-06043 + * + * "If renderPass is not VK_NULL_HANDLE, the pipeline is being + * created with fragment shader state, and subpass uses a + * depth/stencil attachment, pDepthStencilState must be a valid + * pointer to a valid VkPipelineDepthStencilStateCreateInfo + * structure" + * + * VUID-VkGraphicsPipelineCreateInfo-renderPass-06053 + * + * "If renderPass is VK_NULL_HANDLE, the pipeline is being created + * with fragment shader state and fragment output interface state, + * and either of VkPipelineRenderingCreateInfo::depthAttachmentFormat + * or VkPipelineRenderingCreateInfo::stencilAttachmentFormat are not + * VK_FORMAT_UNDEFINED, pDepthStencilState must be a valid pointer to + * a valid VkPipelineDepthStencilStateCreateInfo structure" + * + * VUID-VkGraphicsPipelineCreateInfo-renderPass-06590 + * + * "If renderPass is VK_NULL_HANDLE and the pipeline is being created + * with fragment shader state but not fragment output interface + * state, pDepthStencilState must be a valid pointer to a valid + * VkPipelineDepthStencilStateCreateInfo structure" + * + * In the first case, we'll have a real set of aspects in rp. In the + * second case, where we have both fragment shader and fragment output + * state, we will also have a valid set of aspects. In the third case + * where we only have fragment shader state and no render pass, the + * vk_render_pass_state will be incomplete. + */ + if (!vk_render_pass_state_has_attachment_info(&rp) || + (rp.attachments & (MESA_VK_RP_ATTACHMENT_DEPTH_BIT | + MESA_VK_RP_ATTACHMENT_STENCIL_BIT))) + needs |= MESA_VK_GRAPHICS_STATE_DEPTH_STENCIL_BIT; + + needs |= MESA_VK_GRAPHICS_STATE_INPUT_ATTACHMENT_MAP_BIT; + } + + if (lib & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) { + if (rp.attachments & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS) + needs |= MESA_VK_GRAPHICS_STATE_COLOR_BLEND_BIT; + + needs |= MESA_VK_GRAPHICS_STATE_MULTISAMPLE_BIT; + + needs |= MESA_VK_GRAPHICS_STATE_COLOR_ATTACHMENT_MAP_BIT; + } + + /* + * Next, Filter off any states we already have. + */ + +#define FILTER_NEEDS(STATE, type, s) \ + if (state->s != NULL) needs &= ~STATE + + FOREACH_STATE_GROUP(FILTER_NEEDS) + +#undef FILTER_NEEDS + + /* Filter dynamic state down to just what we're adding */ + BITSET_DECLARE(dynamic_filter, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + get_dynamic_state_groups(dynamic_filter, needs); + + /* Attachment feedback loop state is part of the renderpass state in mesa + * because attachment feedback loops can also come from the render pass, + * but in Vulkan it is part of the fragment output interface. The + * renderpass state also exists, possibly in an incomplete state, in other + * stages for things like the view mask, but it does not contain the + * feedback loop flags. In those other stages we have to ignore + * VK_DYNAMIC_STATE_ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT, even though it is + * part of a state group that exists in those stages. + */ + if (!(lib & + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) { + BITSET_CLEAR(dynamic_filter, + MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE); + } + + BITSET_AND(dynamic, dynamic, dynamic_filter); + + /* And add it in */ + BITSET_OR(state->dynamic, state->dynamic, dynamic); + + /* + * If a state is fully dynamic, we don't need to even allocate them. Do + * this after we've filtered dynamic state because we still want them to + * show up in the dynamic state but don't want the actual state. + */ + needs &= ~fully_dynamic_state_groups(state->dynamic); + + /* If we don't need to set up any new states, bail early */ + if (needs == 0) + return VK_SUCCESS; + + /* + * Now, ensure that we have space for each of the states we're going to + * fill. If all != NULL, we'll pull from that. Otherwise, we need to + * allocate memory. + */ + + VK_MULTIALLOC(ma); + +#define ENSURE_STATE_IF_NEEDED(STATE, type, s) \ + struct type *new_##s = NULL; \ + if (needs & STATE) { \ + if (all == NULL) { \ + vk_multialloc_add(&ma, &new_##s, struct type, 1); \ + } else { \ + new_##s = &all->s; \ + } \ + } + + FOREACH_STATE_GROUP(ENSURE_STATE_IF_NEEDED) + +#undef ENSURE_STATE_IF_NEEDED + + /* Sample locations are a bit special. We don't want to waste the memory + * for 64 floats if we don't need to. Also, we set up standard sample + * locations if no user-provided sample locations are available. + */ + const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = NULL; + struct vk_sample_locations_state *new_sl = NULL; + if (needs & MESA_VK_GRAPHICS_STATE_MULTISAMPLE_BIT) { + if (info->pMultisampleState) + sl_info = vk_find_struct_const(info->pMultisampleState->pNext, + PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); + if (needs_sample_locations_state(dynamic, sl_info)) { + if (all == NULL) { + vk_multialloc_add(&ma, &new_sl, struct vk_sample_locations_state, 1); + } else { + new_sl = &all->ms_sample_locations; + } + } + } + + /* + * Allocate memory, if needed + */ + + if (ma.size > 0) { + assert(all == NULL); + *alloc_ptr_out = vk_multialloc_alloc2(&ma, &device->alloc, alloc, scope); + if (*alloc_ptr_out == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + /* + * Create aliases for various input infos so we can use or FOREACH macro + */ + +#define INFO_ALIAS(_State, s) \ + const VkPipeline##_State##StateCreateInfo *s##_info = info->p##_State##State + + INFO_ALIAS(VertexInput, vi); + INFO_ALIAS(InputAssembly, ia); + INFO_ALIAS(Tessellation, ts); + INFO_ALIAS(Viewport, vp); + INFO_ALIAS(Rasterization, rs); + INFO_ALIAS(Multisample, ms); + INFO_ALIAS(DepthStencil, ds); + INFO_ALIAS(ColorBlend, cb); + +#undef INFO_ALIAS + + const VkPipelineDiscardRectangleStateCreateInfoEXT *dr_info = + vk_find_struct_const(info->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT); + + const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info = + vk_find_struct_const(info->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR); + + const VkRenderingInputAttachmentIndexInfoKHR *ial_info = + vk_find_struct_const(info->pNext, RENDERING_INPUT_ATTACHMENT_INDEX_INFO_KHR); + const VkRenderingAttachmentLocationInfoKHR *cal_info = + vk_find_struct_const(info->pNext, RENDERING_ATTACHMENT_LOCATION_INFO_KHR); + + /* + * Finally, fill out all the states + */ + +#define INIT_STATE_IF_NEEDED(STATE, type, s) \ + if (needs & STATE) { \ + type##_init(new_##s, dynamic, s##_info); \ + state->s = new_##s; \ + } + + /* render pass state is special and we just copy it */ +#define vk_render_pass_state_init(s, d, i) *s = rp + + FOREACH_STATE_GROUP(INIT_STATE_IF_NEEDED) + +#undef vk_render_pass_state_init +#undef INIT_STATE_IF_NEEDED + + if (needs & MESA_VK_GRAPHICS_STATE_MULTISAMPLE_BIT) { + vk_multisample_sample_locations_state_init(new_ms, new_sl, dynamic, + ms_info, sl_info); + } + + return VK_SUCCESS; +} + +#undef IS_DYNAMIC +#undef IS_NEEDED + +void +vk_graphics_pipeline_state_merge(struct vk_graphics_pipeline_state *dst, + const struct vk_graphics_pipeline_state *src) +{ + vk_graphics_pipeline_state_validate(dst); + vk_graphics_pipeline_state_validate(src); + + BITSET_OR(dst->dynamic, dst->dynamic, src->dynamic); + + dst->shader_stages |= src->shader_stages; + + dst->pipeline_flags |= src->pipeline_flags; + dst->feedback_loop_not_input_only |= src->feedback_loop_not_input_only; + + /* Render pass state needs special care because a render pass state may be + * incomplete (view mask only). See vk_render_pass_state_init(). + */ + if (dst->rp != NULL && src->rp != NULL && + !vk_render_pass_state_has_attachment_info(dst->rp) && + vk_render_pass_state_has_attachment_info(src->rp)) + dst->rp = src->rp; + +#define MERGE(STATE, type, state) \ + if (dst->state == NULL && src->state != NULL) dst->state = src->state; + + FOREACH_STATE_GROUP(MERGE) + +#undef MERGE +} + +static bool +is_group_all_dynamic(const struct vk_graphics_pipeline_state *state, + enum mesa_vk_graphics_state_groups group) +{ + /* Render pass is a bit special, because it contains always-static state + * (e.g. the view mask). It's never all dynamic. + */ + if (group == MESA_VK_GRAPHICS_STATE_RENDER_PASS_BIT) + return false; + + BITSET_DECLARE(group_state, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + BITSET_DECLARE(dynamic_state, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + get_dynamic_state_groups(group_state, group); + BITSET_AND(dynamic_state, group_state, state->dynamic); + return BITSET_EQUAL(dynamic_state, group_state); +} + +VkResult +vk_graphics_pipeline_state_copy(const struct vk_device *device, + struct vk_graphics_pipeline_state *state, + const struct vk_graphics_pipeline_state *old_state, + const VkAllocationCallbacks *alloc, + VkSystemAllocationScope scope, + void **alloc_ptr_out) +{ + vk_graphics_pipeline_state_validate(old_state); + + VK_MULTIALLOC(ma); + +#define ENSURE_STATE_IF_NEEDED(STATE, type, s) \ + struct type *new_##s = NULL; \ + if (old_state->s && !is_group_all_dynamic(state, STATE)) { \ + vk_multialloc_add(&ma, &new_##s, struct type, 1); \ + } + + FOREACH_STATE_GROUP(ENSURE_STATE_IF_NEEDED) + +#undef ENSURE_STATE_IF_NEEDED + + /* Sample locations are a bit special. */ + struct vk_sample_locations_state *new_sample_locations = NULL; + if (old_state->ms && old_state->ms->sample_locations && + !BITSET_TEST(old_state->dynamic, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS)) { + assert(old_state->ms->sample_locations); + vk_multialloc_add(&ma, &new_sample_locations, + struct vk_sample_locations_state, 1); + } + + if (ma.size > 0) { + *alloc_ptr_out = vk_multialloc_alloc2(&ma, &device->alloc, alloc, scope); + if (*alloc_ptr_out == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (new_sample_locations) { + *new_sample_locations = *old_state->ms->sample_locations; + } + +#define COPY_STATE_IF_NEEDED(STATE, type, s) \ + if (new_##s) { \ + *new_##s = *old_state->s; \ + } \ + state->s = new_##s; + + FOREACH_STATE_GROUP(COPY_STATE_IF_NEEDED) + + if (new_ms) { + new_ms->sample_locations = new_sample_locations; + } + + state->shader_stages = old_state->shader_stages; + BITSET_COPY(state->dynamic, old_state->dynamic); + +#undef COPY_STATE_IF_NEEDED + + state->pipeline_flags = old_state->pipeline_flags; + state->feedback_loop_not_input_only = + old_state->feedback_loop_not_input_only; + + vk_graphics_pipeline_state_validate(state); + return VK_SUCCESS; +} + +static const struct vk_dynamic_graphics_state vk_default_dynamic_graphics_state = { + .rs = { + .line = { + .width = 1.0f, + }, + }, + .fsr = { + .fragment_size = {1u, 1u}, + .combiner_ops = { + VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, + VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, + }, + }, + .ds = { + .depth = { + .bounds_test = { + .min = 0.0f, + .max = 1.0f, + }, + }, + .stencil = { + .write_enable = true, + .front = { + .compare_mask = -1, + .write_mask = -1, + }, + .back = { + .compare_mask = -1, + .write_mask = -1, + }, + }, + }, + .cb = { + .color_write_enables = 0xffu, + .attachment_count = MESA_VK_MAX_COLOR_ATTACHMENTS, + }, + .ial = { + .color_map = { 0, 1, 2, 3, 4, 5, 6, 7 }, + .depth_att = MESA_VK_ATTACHMENT_UNUSED, + .stencil_att = MESA_VK_ATTACHMENT_UNUSED, + }, + .cal = { + .color_map = { 0, 1, 2, 3, 4, 5, 6, 7 }, + }, +}; + +void +vk_dynamic_graphics_state_init(struct vk_dynamic_graphics_state *dyn) +{ + *dyn = vk_default_dynamic_graphics_state; +} + +void +vk_dynamic_graphics_state_clear(struct vk_dynamic_graphics_state *dyn) +{ + struct vk_vertex_input_state *vi = dyn->vi; + struct vk_sample_locations_state *sl = dyn->ms.sample_locations; + + *dyn = vk_default_dynamic_graphics_state; + + if (vi != NULL) { + memset(vi, 0, sizeof(*vi)); + dyn->vi = vi; + } + + if (sl != NULL) { + memset(sl, 0, sizeof(*sl)); + dyn->ms.sample_locations = sl; + } +} + +void +vk_dynamic_graphics_state_fill(struct vk_dynamic_graphics_state *dyn, + const struct vk_graphics_pipeline_state *p) +{ + /* This funciton (and the individual vk_dynamic_graphics_state_init_* + * functions it calls) are a bit sloppy. Instead of checking every single + * bit, we just copy everything and set the bits the right way at the end + * based on what groups we actually had. + */ + enum mesa_vk_graphics_state_groups groups = 0; + + BITSET_DECLARE(needed, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + BITSET_COPY(needed, p->dynamic); + BITSET_NOT(needed); + + /* We only want to copy these if the driver has filled out the relevant + * pointer in the dynamic state struct. If not, they don't support them + * as dynamic state and we should leave them alone. + */ + if (dyn->vi == NULL) + BITSET_CLEAR(needed, MESA_VK_DYNAMIC_VI); + if (dyn->ms.sample_locations == NULL) + BITSET_CLEAR(needed, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS); + +#define INIT_DYNAMIC_STATE(STATE, type, s) \ + if (p->s != NULL) { \ + vk_dynamic_graphics_state_init_##s(dyn, needed, p->s); \ + groups |= STATE; \ + } + + FOREACH_STATE_GROUP(INIT_DYNAMIC_STATE); + +#undef INIT_DYNAMIC_STATE + + /* Feedback loop state is weird: implicit feedback loops from the + * renderpass and dynamically-enabled feedback loops can in theory both be + * enabled independently, so we can't just use one field; instead drivers + * have to OR the pipeline state (in vk_render_pass_state::pipeline_flags) + * and dynamic state. Due to this it isn't worth tracking + * implicit render pass flags vs. pipeline flags in the pipeline state, and + * we just combine the two in vk_render_pass_flags_init() and don't bother + * setting the dynamic state from the pipeline here, instead just making + * sure the dynamic state is reset to 0 when feedback loop state is static. + */ + dyn->feedback_loops = 0; + + get_dynamic_state_groups(dyn->set, groups); + + /* Vertex input state is always included in a complete pipeline. If p->vi + * is NULL, that means that it has been precompiled by the driver, but we + * should still track vi_bindings_valid. + */ + BITSET_SET(dyn->set, MESA_VK_DYNAMIC_VI_BINDINGS_VALID); + + /* If the pipeline doesn't render any color attachments, we should still + * keep track of the fact that it writes 0 attachments, even though none of + * the other blend states will be initialized. Normally this would be + * initialized with the other blend states. + */ + if (!p->rp || !(p->rp->attachments & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS)) { + dyn->cb.attachment_count = 0; + BITSET_SET(dyn->set, MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT); + } + + /* Mask off all but the groups we actually found */ + BITSET_AND(dyn->set, dyn->set, needed); +} + +#define SET_DYN_VALUE(dst, STATE, state, value) do { \ + if (!BITSET_TEST((dst)->set, MESA_VK_DYNAMIC_##STATE) || \ + (dst)->state != (value)) { \ + (dst)->state = (value); \ + assert((dst)->state == (value)); \ + BITSET_SET(dst->set, MESA_VK_DYNAMIC_##STATE); \ + BITSET_SET(dst->dirty, MESA_VK_DYNAMIC_##STATE); \ + } \ +} while(0) + +#define SET_DYN_BOOL(dst, STATE, state, value) \ + SET_DYN_VALUE(dst, STATE, state, (bool)value); + +#define SET_DYN_ARRAY(dst, STATE, state, start, count, src) do { \ + assert(start + count <= ARRAY_SIZE((dst)->state)); \ + STATIC_ASSERT(sizeof(*(dst)->state) == sizeof(*(src))); \ + const size_t __state_size = sizeof(*(dst)->state) * (count); \ + if (!BITSET_TEST((dst)->set, MESA_VK_DYNAMIC_##STATE) || \ + memcmp((dst)->state + start, src, __state_size)) { \ + memcpy((dst)->state + start, src, __state_size); \ + BITSET_SET(dst->set, MESA_VK_DYNAMIC_##STATE); \ + BITSET_SET(dst->dirty, MESA_VK_DYNAMIC_##STATE); \ + } \ +} while(0) + +void +vk_dynamic_graphics_state_copy(struct vk_dynamic_graphics_state *dst, + const struct vk_dynamic_graphics_state *src) +{ +#define IS_SET_IN_SRC(STATE) \ + BITSET_TEST(src->set, MESA_VK_DYNAMIC_##STATE) + +#define COPY_MEMBER(STATE, state) \ + SET_DYN_VALUE(dst, STATE, state, src->state) + +#define COPY_ARRAY(STATE, state, count) \ + SET_DYN_ARRAY(dst, STATE, state, 0, count, src->state) + +#define COPY_IF_SET(STATE, state) \ + if (IS_SET_IN_SRC(STATE)) SET_DYN_VALUE(dst, STATE, state, src->state) + + if (IS_SET_IN_SRC(VI)) { + assert(dst->vi != NULL); + COPY_MEMBER(VI, vi->bindings_valid); + u_foreach_bit(b, src->vi->bindings_valid) { + COPY_MEMBER(VI, vi->bindings[b].stride); + COPY_MEMBER(VI, vi->bindings[b].input_rate); + COPY_MEMBER(VI, vi->bindings[b].divisor); + } + COPY_MEMBER(VI, vi->attributes_valid); + u_foreach_bit(a, src->vi->attributes_valid) { + COPY_MEMBER(VI, vi->attributes[a].binding); + COPY_MEMBER(VI, vi->attributes[a].format); + COPY_MEMBER(VI, vi->attributes[a].offset); + } + } + + if (IS_SET_IN_SRC(VI_BINDINGS_VALID)) + COPY_MEMBER(VI_BINDINGS_VALID, vi_bindings_valid); + + if (IS_SET_IN_SRC(VI_BINDING_STRIDES)) { + assert(IS_SET_IN_SRC(VI_BINDINGS_VALID)); + u_foreach_bit(a, src->vi_bindings_valid) { + COPY_MEMBER(VI_BINDING_STRIDES, vi_binding_strides[a]); + } + } + + COPY_IF_SET(IA_PRIMITIVE_TOPOLOGY, ia.primitive_topology); + COPY_IF_SET(IA_PRIMITIVE_RESTART_ENABLE, ia.primitive_restart_enable); + COPY_IF_SET(TS_PATCH_CONTROL_POINTS, ts.patch_control_points); + COPY_IF_SET(TS_DOMAIN_ORIGIN, ts.domain_origin); + + COPY_IF_SET(VP_VIEWPORT_COUNT, vp.viewport_count); + if (IS_SET_IN_SRC(VP_VIEWPORTS)) { + assert(IS_SET_IN_SRC(VP_VIEWPORT_COUNT)); + COPY_ARRAY(VP_VIEWPORTS, vp.viewports, src->vp.viewport_count); + } + + COPY_IF_SET(VP_SCISSOR_COUNT, vp.scissor_count); + if (IS_SET_IN_SRC(VP_SCISSORS)) { + assert(IS_SET_IN_SRC(VP_SCISSOR_COUNT)); + COPY_ARRAY(VP_SCISSORS, vp.scissors, src->vp.scissor_count); + } + + COPY_IF_SET(VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE, + vp.depth_clip_negative_one_to_one); + + COPY_IF_SET(DR_ENABLE, dr.enable); + COPY_IF_SET(DR_MODE, dr.mode); + if (IS_SET_IN_SRC(DR_RECTANGLES)) { + COPY_MEMBER(DR_RECTANGLES, dr.rectangle_count); + COPY_ARRAY(DR_RECTANGLES, dr.rectangles, src->dr.rectangle_count); + } + + COPY_IF_SET(RS_RASTERIZER_DISCARD_ENABLE, rs.rasterizer_discard_enable); + COPY_IF_SET(RS_DEPTH_CLAMP_ENABLE, rs.depth_clamp_enable); + COPY_IF_SET(RS_DEPTH_CLIP_ENABLE, rs.depth_clip_enable); + COPY_IF_SET(RS_POLYGON_MODE, rs.polygon_mode); + COPY_IF_SET(RS_CULL_MODE, rs.cull_mode); + COPY_IF_SET(RS_FRONT_FACE, rs.front_face); + COPY_IF_SET(RS_CONSERVATIVE_MODE, rs.conservative_mode); + COPY_IF_SET(RS_EXTRA_PRIMITIVE_OVERESTIMATION_SIZE, + rs.extra_primitive_overestimation_size); + COPY_IF_SET(RS_RASTERIZATION_ORDER_AMD, rs.rasterization_order_amd); + COPY_IF_SET(RS_PROVOKING_VERTEX, rs.provoking_vertex); + COPY_IF_SET(RS_RASTERIZATION_STREAM, rs.rasterization_stream); + COPY_IF_SET(RS_DEPTH_BIAS_ENABLE, rs.depth_bias.enable); + COPY_IF_SET(RS_DEPTH_BIAS_FACTORS, rs.depth_bias.constant); + COPY_IF_SET(RS_DEPTH_BIAS_FACTORS, rs.depth_bias.clamp); + COPY_IF_SET(RS_DEPTH_BIAS_FACTORS, rs.depth_bias.slope); + COPY_IF_SET(RS_DEPTH_BIAS_FACTORS, rs.depth_bias.representation); + COPY_IF_SET(RS_DEPTH_BIAS_FACTORS, rs.depth_bias.exact); + COPY_IF_SET(RS_LINE_WIDTH, rs.line.width); + COPY_IF_SET(RS_LINE_MODE, rs.line.mode); + COPY_IF_SET(RS_LINE_STIPPLE_ENABLE, rs.line.stipple.enable); + COPY_IF_SET(RS_LINE_STIPPLE, rs.line.stipple.factor); + COPY_IF_SET(RS_LINE_STIPPLE, rs.line.stipple.pattern); + + COPY_IF_SET(FSR, fsr.fragment_size.width); + COPY_IF_SET(FSR, fsr.fragment_size.height); + COPY_IF_SET(FSR, fsr.combiner_ops[0]); + COPY_IF_SET(FSR, fsr.combiner_ops[1]); + + COPY_IF_SET(MS_RASTERIZATION_SAMPLES, ms.rasterization_samples); + COPY_IF_SET(MS_SAMPLE_MASK, ms.sample_mask); + COPY_IF_SET(MS_ALPHA_TO_COVERAGE_ENABLE, ms.alpha_to_coverage_enable); + COPY_IF_SET(MS_ALPHA_TO_ONE_ENABLE, ms.alpha_to_one_enable); + COPY_IF_SET(MS_SAMPLE_LOCATIONS_ENABLE, ms.sample_locations_enable); + + if (IS_SET_IN_SRC(MS_SAMPLE_LOCATIONS)) { + assert(dst->ms.sample_locations != NULL); + COPY_MEMBER(MS_SAMPLE_LOCATIONS, ms.sample_locations->per_pixel); + COPY_MEMBER(MS_SAMPLE_LOCATIONS, ms.sample_locations->grid_size.width); + COPY_MEMBER(MS_SAMPLE_LOCATIONS, ms.sample_locations->grid_size.height); + const uint32_t sl_count = src->ms.sample_locations->per_pixel * + src->ms.sample_locations->grid_size.width * + src->ms.sample_locations->grid_size.height; + COPY_ARRAY(MS_SAMPLE_LOCATIONS, ms.sample_locations->locations, sl_count); + } + + COPY_IF_SET(DS_DEPTH_TEST_ENABLE, ds.depth.test_enable); + COPY_IF_SET(DS_DEPTH_WRITE_ENABLE, ds.depth.write_enable); + COPY_IF_SET(DS_DEPTH_COMPARE_OP, ds.depth.compare_op); + COPY_IF_SET(DS_DEPTH_BOUNDS_TEST_ENABLE, ds.depth.bounds_test.enable); + if (IS_SET_IN_SRC(DS_DEPTH_BOUNDS_TEST_BOUNDS)) { + COPY_MEMBER(DS_DEPTH_BOUNDS_TEST_BOUNDS, ds.depth.bounds_test.min); + COPY_MEMBER(DS_DEPTH_BOUNDS_TEST_BOUNDS, ds.depth.bounds_test.max); + } + + COPY_IF_SET(DS_STENCIL_TEST_ENABLE, ds.stencil.test_enable); + if (IS_SET_IN_SRC(DS_STENCIL_OP)) { + COPY_MEMBER(DS_STENCIL_OP, ds.stencil.front.op.fail); + COPY_MEMBER(DS_STENCIL_OP, ds.stencil.front.op.pass); + COPY_MEMBER(DS_STENCIL_OP, ds.stencil.front.op.depth_fail); + COPY_MEMBER(DS_STENCIL_OP, ds.stencil.front.op.compare); + COPY_MEMBER(DS_STENCIL_OP, ds.stencil.back.op.fail); + COPY_MEMBER(DS_STENCIL_OP, ds.stencil.back.op.pass); + COPY_MEMBER(DS_STENCIL_OP, ds.stencil.back.op.depth_fail); + COPY_MEMBER(DS_STENCIL_OP, ds.stencil.back.op.compare); + } + if (IS_SET_IN_SRC(DS_STENCIL_COMPARE_MASK)) { + COPY_MEMBER(DS_STENCIL_COMPARE_MASK, ds.stencil.front.compare_mask); + COPY_MEMBER(DS_STENCIL_COMPARE_MASK, ds.stencil.back.compare_mask); + } + if (IS_SET_IN_SRC(DS_STENCIL_WRITE_MASK)) { + COPY_MEMBER(DS_STENCIL_WRITE_MASK, ds.stencil.front.write_mask); + COPY_MEMBER(DS_STENCIL_WRITE_MASK, ds.stencil.back.write_mask); + } + if (IS_SET_IN_SRC(DS_STENCIL_REFERENCE)) { + COPY_MEMBER(DS_STENCIL_REFERENCE, ds.stencil.front.reference); + COPY_MEMBER(DS_STENCIL_REFERENCE, ds.stencil.back.reference); + } + + COPY_IF_SET(CB_LOGIC_OP_ENABLE, cb.logic_op_enable); + COPY_IF_SET(CB_LOGIC_OP, cb.logic_op); + COPY_IF_SET(CB_ATTACHMENT_COUNT, cb.attachment_count); + COPY_IF_SET(CB_COLOR_WRITE_ENABLES, cb.color_write_enables); + if (IS_SET_IN_SRC(CB_BLEND_ENABLES)) { + for (uint32_t a = 0; a < src->cb.attachment_count; a++) + COPY_MEMBER(CB_BLEND_ENABLES, cb.attachments[a].blend_enable); + } + if (IS_SET_IN_SRC(CB_BLEND_EQUATIONS)) { + for (uint32_t a = 0; a < src->cb.attachment_count; a++) { + COPY_MEMBER(CB_BLEND_EQUATIONS, + cb.attachments[a].src_color_blend_factor); + COPY_MEMBER(CB_BLEND_EQUATIONS, + cb.attachments[a].dst_color_blend_factor); + COPY_MEMBER(CB_BLEND_EQUATIONS, + cb.attachments[a].src_alpha_blend_factor); + COPY_MEMBER(CB_BLEND_EQUATIONS, + cb.attachments[a].dst_alpha_blend_factor); + COPY_MEMBER(CB_BLEND_EQUATIONS, cb.attachments[a].color_blend_op); + COPY_MEMBER(CB_BLEND_EQUATIONS, cb.attachments[a].alpha_blend_op); + } + } + if (IS_SET_IN_SRC(CB_WRITE_MASKS)) { + for (uint32_t a = 0; a < src->cb.attachment_count; a++) + COPY_MEMBER(CB_WRITE_MASKS, cb.attachments[a].write_mask); + } + if (IS_SET_IN_SRC(CB_BLEND_CONSTANTS)) + COPY_ARRAY(CB_BLEND_CONSTANTS, cb.blend_constants, 4); + + COPY_IF_SET(RP_ATTACHMENTS, rp.attachments); + + if (IS_SET_IN_SRC(COLOR_ATTACHMENT_MAP)) { + COPY_ARRAY(COLOR_ATTACHMENT_MAP, cal.color_map, + MESA_VK_MAX_COLOR_ATTACHMENTS); + } + + COPY_IF_SET(ATTACHMENT_FEEDBACK_LOOP_ENABLE, feedback_loops); + +#undef IS_SET_IN_SRC +#undef MARK_DIRTY +#undef COPY_MEMBER +#undef COPY_ARRAY +#undef COPY_IF_SET + + for (uint32_t w = 0; w < ARRAY_SIZE(dst->dirty); w++) { + /* If it's in the source but isn't set in the destination at all, mark + * it dirty. It's possible that the default values just happen to equal + * the value from src. + */ + dst->dirty[w] |= src->set[w] & ~dst->set[w]; + + /* Everything that was in the source is now in the destination */ + dst->set[w] |= src->set[w]; + } +} + +void +vk_cmd_set_dynamic_graphics_state(struct vk_command_buffer *cmd, + const struct vk_dynamic_graphics_state *state) +{ + vk_dynamic_graphics_state_copy(&cmd->dynamic_graphics_state, state); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, + uint32_t vertexBindingDescriptionCount, + const VkVertexInputBindingDescription2EXT* pVertexBindingDescriptions, + uint32_t vertexAttributeDescriptionCount, + const VkVertexInputAttributeDescription2EXT* pVertexAttributeDescriptions) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + uint32_t bindings_valid = 0; + for (uint32_t i = 0; i < vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription2EXT *desc = + &pVertexBindingDescriptions[i]; + + assert(desc->binding < MESA_VK_MAX_VERTEX_BINDINGS); + assert(desc->stride <= MESA_VK_MAX_VERTEX_BINDING_STRIDE); + assert(desc->inputRate <= UINT8_MAX); + + const uint32_t b = desc->binding; + bindings_valid |= BITFIELD_BIT(b); + dyn->vi->bindings[b].stride = desc->stride; + dyn->vi->bindings[b].input_rate = desc->inputRate; + dyn->vi->bindings[b].divisor = desc->divisor; + + /* Also set bindings_strides in case a driver is keying off that */ + dyn->vi_binding_strides[b] = desc->stride; + } + + dyn->vi->bindings_valid = bindings_valid; + SET_DYN_VALUE(dyn, VI_BINDINGS_VALID, vi_bindings_valid, bindings_valid); + + uint32_t attributes_valid = 0; + for (uint32_t i = 0; i < vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription2EXT *desc = + &pVertexAttributeDescriptions[i]; + + assert(desc->location < MESA_VK_MAX_VERTEX_ATTRIBUTES); + assert(desc->binding < MESA_VK_MAX_VERTEX_BINDINGS); + assert(bindings_valid & BITFIELD_BIT(desc->binding)); + + const uint32_t a = desc->location; + attributes_valid |= BITFIELD_BIT(a); + dyn->vi->attributes[a].binding = desc->binding; + dyn->vi->attributes[a].format = desc->format; + dyn->vi->attributes[a].offset = desc->offset; + } + dyn->vi->attributes_valid = attributes_valid; + + BITSET_SET(dyn->set, MESA_VK_DYNAMIC_VI); + BITSET_SET(dyn->set, MESA_VK_DYNAMIC_VI_BINDING_STRIDES); + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_VI); + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES); +} + +void +vk_cmd_set_vertex_binding_strides(struct vk_command_buffer *cmd, + uint32_t first_binding, + uint32_t binding_count, + const VkDeviceSize *strides) +{ + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + for (uint32_t i = 0; i < binding_count; i++) { + SET_DYN_VALUE(dyn, VI_BINDING_STRIDES, + vi_binding_strides[first_binding + i], strides[i]); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetPrimitiveTopology(VkCommandBuffer commandBuffer, + VkPrimitiveTopology primitiveTopology) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, IA_PRIMITIVE_TOPOLOGY, + ia.primitive_topology, primitiveTopology); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetPrimitiveRestartEnable(VkCommandBuffer commandBuffer, + VkBool32 primitiveRestartEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, IA_PRIMITIVE_RESTART_ENABLE, + ia.primitive_restart_enable, primitiveRestartEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetPatchControlPointsEXT(VkCommandBuffer commandBuffer, + uint32_t patchControlPoints) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, TS_PATCH_CONTROL_POINTS, + ts.patch_control_points, patchControlPoints); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetTessellationDomainOriginEXT(VkCommandBuffer commandBuffer, + VkTessellationDomainOrigin domainOrigin) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, TS_DOMAIN_ORIGIN, ts.domain_origin, domainOrigin); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetViewport(VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport *pViewports) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_ARRAY(dyn, VP_VIEWPORTS, vp.viewports, + firstViewport, viewportCount, pViewports); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetViewportWithCount(VkCommandBuffer commandBuffer, + uint32_t viewportCount, + const VkViewport *pViewports) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, VP_VIEWPORT_COUNT, vp.viewport_count, viewportCount); + SET_DYN_ARRAY(dyn, VP_VIEWPORTS, vp.viewports, 0, viewportCount, pViewports); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetScissor(VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D *pScissors) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_ARRAY(dyn, VP_SCISSORS, vp.scissors, + firstScissor, scissorCount, pScissors); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetScissorWithCount(VkCommandBuffer commandBuffer, + uint32_t scissorCount, + const VkRect2D *pScissors) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, VP_SCISSOR_COUNT, vp.scissor_count, scissorCount); + SET_DYN_ARRAY(dyn, VP_SCISSORS, vp.scissors, 0, scissorCount, pScissors); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDepthClipNegativeOneToOneEXT(VkCommandBuffer commandBuffer, + VkBool32 negativeOneToOne) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE, + vp.depth_clip_negative_one_to_one, negativeOneToOne); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDiscardRectangleEXT(VkCommandBuffer commandBuffer, + uint32_t firstDiscardRectangle, + uint32_t discardRectangleCount, + const VkRect2D *pDiscardRectangles) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, DR_RECTANGLES, dr.rectangle_count, discardRectangleCount); + SET_DYN_ARRAY(dyn, DR_RECTANGLES, dr.rectangles, firstDiscardRectangle, + discardRectangleCount, pDiscardRectangles); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetRasterizerDiscardEnable(VkCommandBuffer commandBuffer, + VkBool32 rasterizerDiscardEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, RS_RASTERIZER_DISCARD_ENABLE, + rs.rasterizer_discard_enable, rasterizerDiscardEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDepthClampEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 depthClampEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, RS_DEPTH_CLAMP_ENABLE, + rs.depth_clamp_enable, depthClampEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDepthClipEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 depthClipEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_DEPTH_CLIP_ENABLE, rs.depth_clip_enable, + depthClipEnable ? VK_MESA_DEPTH_CLIP_ENABLE_TRUE : + VK_MESA_DEPTH_CLIP_ENABLE_FALSE); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetPolygonModeEXT(VkCommandBuffer commandBuffer, + VkPolygonMode polygonMode) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_POLYGON_MODE, rs.polygon_mode, polygonMode); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetCullMode(VkCommandBuffer commandBuffer, + VkCullModeFlags cullMode) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_CULL_MODE, rs.cull_mode, cullMode); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetFrontFace(VkCommandBuffer commandBuffer, + VkFrontFace frontFace) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_FRONT_FACE, rs.front_face, frontFace); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetConservativeRasterizationModeEXT( + VkCommandBuffer commandBuffer, + VkConservativeRasterizationModeEXT conservativeRasterizationMode) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_CONSERVATIVE_MODE, rs.conservative_mode, + conservativeRasterizationMode); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetExtraPrimitiveOverestimationSizeEXT( + VkCommandBuffer commandBuffer, + float extraPrimitiveOverestimationSize) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_EXTRA_PRIMITIVE_OVERESTIMATION_SIZE, + rs.extra_primitive_overestimation_size, + extraPrimitiveOverestimationSize); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetProvokingVertexModeEXT(VkCommandBuffer commandBuffer, + VkProvokingVertexModeEXT provokingVertexMode) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_PROVOKING_VERTEX, + rs.provoking_vertex, provokingVertexMode); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetAttachmentFeedbackLoopEnableEXT(VkCommandBuffer commandBuffer, + VkImageAspectFlags aspectMask) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, ATTACHMENT_FEEDBACK_LOOP_ENABLE, + feedback_loops, aspectMask); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetRasterizationStreamEXT(VkCommandBuffer commandBuffer, + uint32_t rasterizationStream) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_RASTERIZATION_STREAM, + rs.rasterization_stream, rasterizationStream); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDepthBiasEnable(VkCommandBuffer commandBuffer, + VkBool32 depthBiasEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, RS_DEPTH_BIAS_ENABLE, + rs.depth_bias.enable, depthBiasEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDepthBias(VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + + VkDepthBiasInfoEXT depth_bias_info = { + .sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_INFO_EXT, + .depthBiasConstantFactor = depthBiasConstantFactor, + .depthBiasClamp = depthBiasClamp, + .depthBiasSlopeFactor = depthBiasSlopeFactor, + }; + + cmd->base.device->dispatch_table.CmdSetDepthBias2EXT(commandBuffer, + &depth_bias_info); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetLineWidth(VkCommandBuffer commandBuffer, + float lineWidth) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_LINE_WIDTH, rs.line.width, lineWidth); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetLineRasterizationModeEXT(VkCommandBuffer commandBuffer, + VkLineRasterizationModeKHR lineRasterizationMode) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_LINE_MODE, rs.line.mode, lineRasterizationMode); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetLineStippleEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 stippledLineEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, RS_LINE_STIPPLE_ENABLE, + rs.line.stipple.enable, stippledLineEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetLineStippleKHR(VkCommandBuffer commandBuffer, + uint32_t lineStippleFactor, + uint16_t lineStipplePattern) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_LINE_STIPPLE, + rs.line.stipple.factor, lineStippleFactor); + SET_DYN_VALUE(dyn, RS_LINE_STIPPLE, + rs.line.stipple.pattern, lineStipplePattern); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetFragmentShadingRateKHR(VkCommandBuffer commandBuffer, + const VkExtent2D *pFragmentSize, + const VkFragmentShadingRateCombinerOpKHR combinerOps[2]) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, FSR, fsr.fragment_size.width, pFragmentSize->width); + SET_DYN_VALUE(dyn, FSR, fsr.fragment_size.height, pFragmentSize->height); + SET_DYN_VALUE(dyn, FSR, fsr.combiner_ops[0], combinerOps[0]); + SET_DYN_VALUE(dyn, FSR, fsr.combiner_ops[1], combinerOps[1]); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetRasterizationSamplesEXT(VkCommandBuffer commandBuffer, + VkSampleCountFlagBits rasterizationSamples) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + assert(rasterizationSamples <= MESA_VK_MAX_SAMPLES); + + SET_DYN_VALUE(dyn, MS_RASTERIZATION_SAMPLES, + ms.rasterization_samples, rasterizationSamples); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetSampleMaskEXT(VkCommandBuffer commandBuffer, + VkSampleCountFlagBits samples, + const VkSampleMask *pSampleMask) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + VkSampleMask sample_mask = *pSampleMask & BITFIELD_MASK(MESA_VK_MAX_SAMPLES); + + SET_DYN_VALUE(dyn, MS_SAMPLE_MASK, ms.sample_mask, sample_mask); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetAlphaToCoverageEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 alphaToCoverageEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, MS_ALPHA_TO_COVERAGE_ENABLE, + ms.alpha_to_coverage_enable, alphaToCoverageEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetAlphaToOneEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 alphaToOneEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, MS_ALPHA_TO_ONE_ENABLE, + ms.alpha_to_one_enable, alphaToOneEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer, + const VkSampleLocationsInfoEXT *pSampleLocationsInfo) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, MS_SAMPLE_LOCATIONS, + ms.sample_locations->per_pixel, + pSampleLocationsInfo->sampleLocationsPerPixel); + SET_DYN_VALUE(dyn, MS_SAMPLE_LOCATIONS, + ms.sample_locations->grid_size.width, + pSampleLocationsInfo->sampleLocationGridSize.width); + SET_DYN_VALUE(dyn, MS_SAMPLE_LOCATIONS, + ms.sample_locations->grid_size.height, + pSampleLocationsInfo->sampleLocationGridSize.height); + + assert(pSampleLocationsInfo->sampleLocationsCount == + pSampleLocationsInfo->sampleLocationsPerPixel * + pSampleLocationsInfo->sampleLocationGridSize.width * + pSampleLocationsInfo->sampleLocationGridSize.height); + + assert(pSampleLocationsInfo->sampleLocationsCount <= + MESA_VK_MAX_SAMPLE_LOCATIONS); + + SET_DYN_ARRAY(dyn, MS_SAMPLE_LOCATIONS, + ms.sample_locations->locations, + 0, pSampleLocationsInfo->sampleLocationsCount, + pSampleLocationsInfo->pSampleLocations); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetSampleLocationsEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 sampleLocationsEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, MS_SAMPLE_LOCATIONS_ENABLE, + ms.sample_locations_enable, sampleLocationsEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDepthTestEnable(VkCommandBuffer commandBuffer, + VkBool32 depthTestEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, DS_DEPTH_TEST_ENABLE, + ds.depth.test_enable, depthTestEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDepthWriteEnable(VkCommandBuffer commandBuffer, + VkBool32 depthWriteEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, DS_DEPTH_WRITE_ENABLE, + ds.depth.write_enable, depthWriteEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDepthCompareOp(VkCommandBuffer commandBuffer, + VkCompareOp depthCompareOp) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, DS_DEPTH_COMPARE_OP, ds.depth.compare_op, + depthCompareOp); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDepthBoundsTestEnable(VkCommandBuffer commandBuffer, + VkBool32 depthBoundsTestEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, DS_DEPTH_BOUNDS_TEST_ENABLE, + ds.depth.bounds_test.enable, depthBoundsTestEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDepthBounds(VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, DS_DEPTH_BOUNDS_TEST_BOUNDS, + ds.depth.bounds_test.min, minDepthBounds); + SET_DYN_VALUE(dyn, DS_DEPTH_BOUNDS_TEST_BOUNDS, + ds.depth.bounds_test.max, maxDepthBounds); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetStencilTestEnable(VkCommandBuffer commandBuffer, + VkBool32 stencilTestEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, DS_STENCIL_TEST_ENABLE, + ds.stencil.test_enable, stencilTestEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetStencilOp(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { + SET_DYN_VALUE(dyn, DS_STENCIL_OP, ds.stencil.front.op.fail, failOp); + SET_DYN_VALUE(dyn, DS_STENCIL_OP, ds.stencil.front.op.pass, passOp); + SET_DYN_VALUE(dyn, DS_STENCIL_OP, ds.stencil.front.op.depth_fail, depthFailOp); + SET_DYN_VALUE(dyn, DS_STENCIL_OP, ds.stencil.front.op.compare, compareOp); + } + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) { + SET_DYN_VALUE(dyn, DS_STENCIL_OP, ds.stencil.back.op.fail, failOp); + SET_DYN_VALUE(dyn, DS_STENCIL_OP, ds.stencil.back.op.pass, passOp); + SET_DYN_VALUE(dyn, DS_STENCIL_OP, ds.stencil.back.op.depth_fail, depthFailOp); + SET_DYN_VALUE(dyn, DS_STENCIL_OP, ds.stencil.back.op.compare, compareOp); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + /* We assume 8-bit stencil always */ + STATIC_ASSERT(sizeof(dyn->ds.stencil.front.write_mask) == 1); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { + SET_DYN_VALUE(dyn, DS_STENCIL_COMPARE_MASK, + ds.stencil.front.compare_mask, (uint8_t)compareMask); + } + if (faceMask & VK_STENCIL_FACE_BACK_BIT) { + SET_DYN_VALUE(dyn, DS_STENCIL_COMPARE_MASK, + ds.stencil.back.compare_mask, (uint8_t)compareMask); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + /* We assume 8-bit stencil always */ + STATIC_ASSERT(sizeof(dyn->ds.stencil.front.write_mask) == 1); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { + SET_DYN_VALUE(dyn, DS_STENCIL_WRITE_MASK, + ds.stencil.front.write_mask, (uint8_t)writeMask); + } + if (faceMask & VK_STENCIL_FACE_BACK_BIT) { + SET_DYN_VALUE(dyn, DS_STENCIL_WRITE_MASK, + ds.stencil.back.write_mask, (uint8_t)writeMask); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetStencilReference(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + /* We assume 8-bit stencil always */ + STATIC_ASSERT(sizeof(dyn->ds.stencil.front.write_mask) == 1); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { + SET_DYN_VALUE(dyn, DS_STENCIL_REFERENCE, + ds.stencil.front.reference, (uint8_t)reference); + } + if (faceMask & VK_STENCIL_FACE_BACK_BIT) { + SET_DYN_VALUE(dyn, DS_STENCIL_REFERENCE, + ds.stencil.back.reference, (uint8_t)reference); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetLogicOpEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 logicOpEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_BOOL(dyn, CB_LOGIC_OP_ENABLE, cb.logic_op_enable, logicOpEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetLogicOpEXT(VkCommandBuffer commandBuffer, + VkLogicOp logicOp) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, CB_LOGIC_OP, cb.logic_op, logicOp); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetColorWriteEnableEXT(VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkBool32 *pColorWriteEnables) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + assert(attachmentCount <= MESA_VK_MAX_COLOR_ATTACHMENTS); + + uint8_t color_write_enables = 0; + for (uint32_t a = 0; a < attachmentCount; a++) { + if (pColorWriteEnables[a]) + color_write_enables |= BITFIELD_BIT(a); + } + + SET_DYN_VALUE(dyn, CB_COLOR_WRITE_ENABLES, + cb.color_write_enables, color_write_enables); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetColorBlendEnableEXT(VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkBool32 *pColorBlendEnables) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + for (uint32_t i = 0; i < attachmentCount; i++) { + uint32_t a = firstAttachment + i; + assert(a < ARRAY_SIZE(dyn->cb.attachments)); + + SET_DYN_BOOL(dyn, CB_BLEND_ENABLES, + cb.attachments[a].blend_enable, pColorBlendEnables[i]); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetColorBlendEquationEXT(VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkColorBlendEquationEXT *pColorBlendEquations) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + for (uint32_t i = 0; i < attachmentCount; i++) { + uint32_t a = firstAttachment + i; + assert(a < ARRAY_SIZE(dyn->cb.attachments)); + + SET_DYN_VALUE(dyn, CB_BLEND_EQUATIONS, + cb.attachments[a].src_color_blend_factor, + pColorBlendEquations[i].srcColorBlendFactor); + + SET_DYN_VALUE(dyn, CB_BLEND_EQUATIONS, + cb.attachments[a].dst_color_blend_factor, + pColorBlendEquations[i].dstColorBlendFactor); + + SET_DYN_VALUE(dyn, CB_BLEND_EQUATIONS, + cb.attachments[a].color_blend_op, + pColorBlendEquations[i].colorBlendOp); + + SET_DYN_VALUE(dyn, CB_BLEND_EQUATIONS, + cb.attachments[a].src_alpha_blend_factor, + pColorBlendEquations[i].srcAlphaBlendFactor); + + SET_DYN_VALUE(dyn, CB_BLEND_EQUATIONS, + cb.attachments[a].dst_alpha_blend_factor, + pColorBlendEquations[i].dstAlphaBlendFactor); + + SET_DYN_VALUE(dyn, CB_BLEND_EQUATIONS, + cb.attachments[a].alpha_blend_op, + pColorBlendEquations[i].alphaBlendOp); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkColorComponentFlags *pColorWriteMasks) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + for (uint32_t i = 0; i < attachmentCount; i++) { + uint32_t a = firstAttachment + i; + assert(a < ARRAY_SIZE(dyn->cb.attachments)); + + SET_DYN_VALUE(dyn, CB_WRITE_MASKS, + cb.attachments[a].write_mask, pColorWriteMasks[i]); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetBlendConstants(VkCommandBuffer commandBuffer, + const float blendConstants[4]) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_ARRAY(dyn, CB_BLEND_CONSTANTS, cb.blend_constants, + 0, 4, blendConstants); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetColorBlendAdvancedEXT(VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkColorBlendAdvancedEXT* pColorBlendAdvanced) +{ + unreachable("VK_EXT_blend_operation_advanced unsupported"); +} + +void +vk_cmd_set_cb_attachment_count(struct vk_command_buffer *cmd, + uint32_t attachment_count) +{ + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, CB_ATTACHMENT_COUNT, cb.attachment_count, attachment_count); +} + +void +vk_cmd_set_rp_attachments(struct vk_command_buffer *cmd, + enum vk_rp_attachment_flags attachments) +{ + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RP_ATTACHMENTS, rp.attachments, attachments); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDiscardRectangleEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 discardRectangleEnable) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, DR_ENABLE, dr.enable, discardRectangleEnable); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDiscardRectangleModeEXT(VkCommandBuffer commandBuffer, + VkDiscardRectangleModeEXT discardRectangleMode) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, DR_MODE, dr.mode, discardRectangleMode); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetDepthBias2EXT( + VkCommandBuffer commandBuffer, + const VkDepthBiasInfoEXT* pDepthBiasInfo) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + SET_DYN_VALUE(dyn, RS_DEPTH_BIAS_FACTORS, + rs.depth_bias.constant, pDepthBiasInfo->depthBiasConstantFactor); + SET_DYN_VALUE(dyn, RS_DEPTH_BIAS_FACTORS, + rs.depth_bias.clamp, pDepthBiasInfo->depthBiasClamp); + SET_DYN_VALUE(dyn, RS_DEPTH_BIAS_FACTORS, + rs.depth_bias.slope, pDepthBiasInfo->depthBiasSlopeFactor); + + /** From the Vulkan 1.3.254 spec: + * + * "If pNext does not contain a VkDepthBiasRepresentationInfoEXT + * structure, then this command is equivalent to including a + * VkDepthBiasRepresentationInfoEXT with depthBiasExact set to VK_FALSE + * and depthBiasRepresentation set to + * VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORMAT_EXT." + */ + const VkDepthBiasRepresentationInfoEXT *dbr_info = + vk_find_struct_const(pDepthBiasInfo->pNext, DEPTH_BIAS_REPRESENTATION_INFO_EXT); + if (dbr_info) { + SET_DYN_VALUE(dyn, RS_DEPTH_BIAS_FACTORS, + rs.depth_bias.representation, dbr_info->depthBiasRepresentation); + SET_DYN_VALUE(dyn, RS_DEPTH_BIAS_FACTORS, + rs.depth_bias.exact, dbr_info->depthBiasExact); + } else { + SET_DYN_VALUE(dyn, RS_DEPTH_BIAS_FACTORS, + rs.depth_bias.representation, + VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORMAT_EXT); + SET_DYN_VALUE(dyn, RS_DEPTH_BIAS_FACTORS, + rs.depth_bias.exact, false); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetRenderingAttachmentLocationsKHR( + VkCommandBuffer commandBuffer, + const VkRenderingAttachmentLocationInfoKHR* pLocationInfo) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + assert(pLocationInfo->colorAttachmentCount <= MESA_VK_MAX_COLOR_ATTACHMENTS); + for (uint32_t i = 0; i < pLocationInfo->colorAttachmentCount; i++) { + uint8_t val = + pLocationInfo->pColorAttachmentLocations[i] == VK_ATTACHMENT_UNUSED ? + MESA_VK_ATTACHMENT_UNUSED : pLocationInfo->pColorAttachmentLocations[i]; + SET_DYN_VALUE(dyn, COLOR_ATTACHMENT_MAP, cal.color_map[i], val); + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetRenderingInputAttachmentIndicesKHR( + VkCommandBuffer commandBuffer, + const VkRenderingInputAttachmentIndexInfoKHR* pLocationInfo) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + assert(pLocationInfo->colorAttachmentCount <= MESA_VK_MAX_COLOR_ATTACHMENTS); + for (uint32_t i = 0; i < pLocationInfo->colorAttachmentCount; i++) { + uint8_t val = + pLocationInfo->pColorAttachmentInputIndices[i] == VK_ATTACHMENT_UNUSED ? + MESA_VK_ATTACHMENT_UNUSED : pLocationInfo->pColorAttachmentInputIndices[i]; + SET_DYN_VALUE(dyn, INPUT_ATTACHMENT_MAP, + ial.color_map[i], val); + } + + uint8_t depth_att = + (pLocationInfo->pDepthInputAttachmentIndex == NULL || + *pLocationInfo->pDepthInputAttachmentIndex == VK_ATTACHMENT_UNUSED) ? + MESA_VK_ATTACHMENT_UNUSED : *pLocationInfo->pDepthInputAttachmentIndex; + uint8_t stencil_att = + (pLocationInfo->pStencilInputAttachmentIndex == NULL || + *pLocationInfo->pStencilInputAttachmentIndex == VK_ATTACHMENT_UNUSED) ? + MESA_VK_ATTACHMENT_UNUSED : *pLocationInfo->pStencilInputAttachmentIndex; + SET_DYN_VALUE(dyn, INPUT_ATTACHMENT_MAP, ial.depth_att, depth_att); + SET_DYN_VALUE(dyn, INPUT_ATTACHMENT_MAP, ial.stencil_att, stencil_att); +} + +/* These are stubs required by VK_EXT_shader_object */ + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetViewportWScalingEnableNV( + VkCommandBuffer commandBuffer, + VkBool32 viewportWScalingEnable) +{ +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetCoverageReductionModeNV( + VkCommandBuffer commandBuffer, + VkCoverageReductionModeNV coverageReductionMode) +{ +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetCoverageToColorEnableNV( + VkCommandBuffer commandBuffer, + VkBool32 coverageToColorEnable) +{ +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetCoverageToColorLocationNV( + VkCommandBuffer commandBuffer, + uint32_t coverageToColorLocation) +{ +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetCoverageModulationModeNV( + VkCommandBuffer commandBuffer, + VkCoverageModulationModeNV coverageModulationMode) +{ +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetCoverageModulationTableEnableNV( + VkCommandBuffer commandBuffer, + VkBool32 coverageModulationTableEnable) +{ +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetCoverageModulationTableNV( + VkCommandBuffer commandBuffer, + uint32_t coverageModulationTableCount, + const float* pCoverageModulationTable) +{ +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetRepresentativeFragmentTestEnableNV( + VkCommandBuffer commandBuffer, + VkBool32 representativeFragmentTestEnable) +{ +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetShadingRateImageEnableNV( + VkCommandBuffer commandBuffer, + VkBool32 shadingRateImageEnable) +{ +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetViewportSwizzleNV( + VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewportSwizzleNV* pViewportSwizzles) +{ +} + +const char * +vk_dynamic_graphic_state_to_str(enum mesa_vk_dynamic_graphics_state state) +{ +#define NAME(name) \ + case MESA_VK_DYNAMIC_##name: return #name + + switch (state) { + NAME(VI); + NAME(VI_BINDINGS_VALID); + NAME(VI_BINDING_STRIDES); + NAME(IA_PRIMITIVE_TOPOLOGY); + NAME(IA_PRIMITIVE_RESTART_ENABLE); + NAME(TS_PATCH_CONTROL_POINTS); + NAME(TS_DOMAIN_ORIGIN); + NAME(VP_VIEWPORT_COUNT); + NAME(VP_VIEWPORTS); + NAME(VP_SCISSOR_COUNT); + NAME(VP_SCISSORS); + NAME(VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE); + NAME(DR_RECTANGLES); + NAME(DR_MODE); + NAME(DR_ENABLE); + NAME(RS_RASTERIZER_DISCARD_ENABLE); + NAME(RS_DEPTH_CLAMP_ENABLE); + NAME(RS_DEPTH_CLIP_ENABLE); + NAME(RS_POLYGON_MODE); + NAME(RS_CULL_MODE); + NAME(RS_FRONT_FACE); + NAME(RS_CONSERVATIVE_MODE); + NAME(RS_EXTRA_PRIMITIVE_OVERESTIMATION_SIZE); + NAME(RS_RASTERIZATION_ORDER_AMD); + NAME(RS_PROVOKING_VERTEX); + NAME(RS_RASTERIZATION_STREAM); + NAME(RS_DEPTH_BIAS_ENABLE); + NAME(RS_DEPTH_BIAS_FACTORS); + NAME(RS_LINE_WIDTH); + NAME(RS_LINE_MODE); + NAME(RS_LINE_STIPPLE_ENABLE); + NAME(RS_LINE_STIPPLE); + NAME(FSR); + NAME(MS_RASTERIZATION_SAMPLES); + NAME(MS_SAMPLE_MASK); + NAME(MS_ALPHA_TO_COVERAGE_ENABLE); + NAME(MS_ALPHA_TO_ONE_ENABLE); + NAME(MS_SAMPLE_LOCATIONS_ENABLE); + NAME(MS_SAMPLE_LOCATIONS); + NAME(DS_DEPTH_TEST_ENABLE); + NAME(DS_DEPTH_WRITE_ENABLE); + NAME(DS_DEPTH_COMPARE_OP); + NAME(DS_DEPTH_BOUNDS_TEST_ENABLE); + NAME(DS_DEPTH_BOUNDS_TEST_BOUNDS); + NAME(DS_STENCIL_TEST_ENABLE); + NAME(DS_STENCIL_OP); + NAME(DS_STENCIL_COMPARE_MASK); + NAME(DS_STENCIL_WRITE_MASK); + NAME(DS_STENCIL_REFERENCE); + NAME(CB_LOGIC_OP_ENABLE); + NAME(CB_LOGIC_OP); + NAME(CB_ATTACHMENT_COUNT); + NAME(CB_COLOR_WRITE_ENABLES); + NAME(CB_BLEND_ENABLES); + NAME(CB_BLEND_EQUATIONS); + NAME(CB_WRITE_MASKS); + NAME(CB_BLEND_CONSTANTS); + NAME(ATTACHMENT_FEEDBACK_LOOP_ENABLE); + NAME(COLOR_ATTACHMENT_MAP); + default: unreachable("Invalid state"); + } + +#undef NAME +} diff --git a/src/vulkan/runtime/vk_graphics_state.h b/src/vulkan/runtime/vk_graphics_state.h new file mode 100644 index 00000000000..9fa4bf7b638 --- /dev/null +++ b/src/vulkan/runtime/vk_graphics_state.h @@ -0,0 +1,1274 @@ +/* + * Copyright © 2022 Collabora, Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_GRAPHICS_STATE_H +#define VK_GRAPHICS_STATE_H + +#include "vulkan/vulkan_core.h" + +#include "vk_limits.h" + +#include "util/bitset.h" +#include "util/enum_operators.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_command_buffer; +struct vk_device; + +/** Enumeration of all Vulkan dynamic graphics states + * + * Enumerants are named with both the abreviation of the state group to which + * the state belongs as well as the name of the state itself. These are + * intended to pretty closely match the VkDynamicState enum but may not match + * perfectly all the time. + */ +enum mesa_vk_dynamic_graphics_state { + MESA_VK_DYNAMIC_VI, + MESA_VK_DYNAMIC_VI_BINDINGS_VALID, + MESA_VK_DYNAMIC_VI_BINDING_STRIDES, + MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY, + MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE, + MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS, + MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN, + MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT, + MESA_VK_DYNAMIC_VP_VIEWPORTS, + MESA_VK_DYNAMIC_VP_SCISSOR_COUNT, + MESA_VK_DYNAMIC_VP_SCISSORS, + MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE, + MESA_VK_DYNAMIC_DR_RECTANGLES, + MESA_VK_DYNAMIC_DR_MODE, + MESA_VK_DYNAMIC_DR_ENABLE, + MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE, + MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE, + MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE, + MESA_VK_DYNAMIC_RS_POLYGON_MODE, + MESA_VK_DYNAMIC_RS_CULL_MODE, + MESA_VK_DYNAMIC_RS_FRONT_FACE, + MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE, + MESA_VK_DYNAMIC_RS_EXTRA_PRIMITIVE_OVERESTIMATION_SIZE, + MESA_VK_DYNAMIC_RS_RASTERIZATION_ORDER_AMD, + MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX, + MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM, + MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE, + MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS, + MESA_VK_DYNAMIC_RS_LINE_WIDTH, + MESA_VK_DYNAMIC_RS_LINE_MODE, + MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE, + MESA_VK_DYNAMIC_RS_LINE_STIPPLE, + MESA_VK_DYNAMIC_FSR, + MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES, + MESA_VK_DYNAMIC_MS_SAMPLE_MASK, + MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE, + MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE, + MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE, + MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS, + MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE, + MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE, + MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP, + MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE, + MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS, + MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE, + MESA_VK_DYNAMIC_DS_STENCIL_OP, + MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK, + MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK, + MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE, + MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE, + MESA_VK_DYNAMIC_CB_LOGIC_OP, + MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT, + MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES, + MESA_VK_DYNAMIC_CB_BLEND_ENABLES, + MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS, + MESA_VK_DYNAMIC_CB_WRITE_MASKS, + MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS, + MESA_VK_DYNAMIC_RP_ATTACHMENTS, + MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE, + MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP, + MESA_VK_DYNAMIC_INPUT_ATTACHMENT_MAP, + + /* Must be left at the end */ + MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX, +}; + +#define MESA_VK_ATTACHMENT_UNUSED (0xff) + +/** Populate a bitset with dynamic states + * + * This function maps a VkPipelineDynamicStateCreateInfo to a bitset indexed + * by mesa_vk_dynamic_graphics_state enumerants. + * + * :param dynamic: |out| Bitset to populate + * :param info: |in| VkPipelineDynamicStateCreateInfo or NULL + */ +void +vk_get_dynamic_graphics_states(BITSET_WORD *dynamic, + const VkPipelineDynamicStateCreateInfo *info); + +/***/ +struct vk_vertex_binding_state { + /** VkVertexInputBindingDescription::stride */ + uint16_t stride; + + /** VkVertexInputBindingDescription::inputRate */ + uint16_t input_rate; + + /** VkVertexInputBindingDivisorDescriptionKHR::divisor or 1 */ + uint32_t divisor; +}; + +/***/ +struct vk_vertex_attribute_state { + /** VkVertexInputAttributeDescription::binding */ + uint32_t binding; + + /** VkVertexInputAttributeDescription::format */ + VkFormat format; + + /** VkVertexInputAttributeDescription::offset */ + uint32_t offset; +}; + +/***/ +struct vk_vertex_input_state { + /** Bitset of which bindings are valid, indexed by binding */ + uint32_t bindings_valid; + struct vk_vertex_binding_state bindings[MESA_VK_MAX_VERTEX_BINDINGS]; + + /** Bitset of which attributes are valid, indexed by location */ + uint32_t attributes_valid; + struct vk_vertex_attribute_state attributes[MESA_VK_MAX_VERTEX_ATTRIBUTES]; +}; + +/***/ +struct vk_input_assembly_state { + /** VkPipelineInputAssemblyStateCreateInfo::topology + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_IA_PRIMITIVE_TOPOLOGY + */ + uint8_t primitive_topology; + + /** VkPipelineInputAssemblyStateCreateInfo::primitiveRestartEnable + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_IA_PRIMITIVE_RESTART_ENABLE + */ + bool primitive_restart_enable; +}; + +/***/ +struct vk_tessellation_state { + /** VkPipelineTessellationStateCreateInfo::patchControlPoints + * + * MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS + */ + uint8_t patch_control_points; + + /** VkPipelineTessellationDomainOriginStateCreateInfo::domainOrigin + * + * MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN + */ + uint8_t domain_origin; +}; + +/***/ +struct vk_viewport_state { + /** VkPipelineViewportDepthClipControlCreateInfoEXT::negativeOneToOne + */ + bool depth_clip_negative_one_to_one; + + /** VkPipelineViewportStateCreateInfo::viewportCount + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_VP_VIEWPORT_COUNT + */ + uint8_t viewport_count; + + /** VkPipelineViewportStateCreateInfo::scissorCount + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_VP_SCISSOR_COUNT + */ + uint8_t scissor_count; + + /** VkPipelineViewportStateCreateInfo::pViewports + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_VP_VIEWPORTS + */ + VkViewport viewports[MESA_VK_MAX_VIEWPORTS]; + + /** VkPipelineViewportStateCreateInfo::pScissors + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_VP_SCISSORS + */ + VkRect2D scissors[MESA_VK_MAX_SCISSORS]; +}; + +/***/ +struct vk_discard_rectangles_state { + /** VkPipelineDiscardRectangleStateCreateInfoEXT::discardRectangleMode */ + VkDiscardRectangleModeEXT mode; + + /** VkPipelineDiscardRectangleStateCreateInfoEXT::discardRectangleCount */ + uint32_t rectangle_count; + + /** VkPipelineDiscardRectangleStateCreateInfoEXT::pDiscardRectangles */ + VkRect2D rectangles[MESA_VK_MAX_DISCARD_RECTANGLES]; +}; + +enum ENUM_PACKED vk_mesa_depth_clip_enable { + /** Depth clipping should be disabled */ + VK_MESA_DEPTH_CLIP_ENABLE_FALSE = 0, + + /** Depth clipping should be enabled */ + VK_MESA_DEPTH_CLIP_ENABLE_TRUE = 1, + + /** Depth clipping should be enabled iff depth clamping is disabled */ + VK_MESA_DEPTH_CLIP_ENABLE_NOT_CLAMP, +}; + +/***/ +struct vk_rasterization_state { + /** VkPipelineRasterizationStateCreateInfo::rasterizerDiscardEnable + * + * This will be false if rasterizer discard is dynamic + * + * MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE + */ + bool rasterizer_discard_enable; + + /** VkPipelineRasterizationStateCreateInfo::depthClampEnable + * + * MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE + */ + bool depth_clamp_enable; + + /** VkPipelineRasterizationDepthClipStateCreateInfoEXT::depthClipEnable + * + * MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE + */ + enum vk_mesa_depth_clip_enable depth_clip_enable; + + /** VkPipelineRasterizationStateCreateInfo::polygonMode + * + * MESA_VK_DYNAMIC_RS_POLYGON_MODE_ENABLEDEPTH_CLIP_ENABLE + */ + VkPolygonMode polygon_mode; + + /** VkPipelineRasterizationStateCreateInfo::cullMode + * + * MESA_VK_DYNAMIC_RS_CULL_MODE + */ + VkCullModeFlags cull_mode; + + /** VkPipelineRasterizationStateCreateInfo::frontFace + * + * MESA_VK_DYNAMIC_RS_FRONT_FACE + */ + VkFrontFace front_face; + + /** VkPipelineRasterizationConservativeStateCreateInfoEXT::conservativeRasterizationMode + * + * MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE + */ + VkConservativeRasterizationModeEXT conservative_mode; + + /** VkPipelineRasterizationConservativeStateCreateInfoEXT::extraPrimitiveOverestimationSize + * + * MESA_VK_DYNAMIC_RS_EXTRA_PRIMITIVE_OVERESTIMATION_SIZE + */ + float extra_primitive_overestimation_size; + + /** VkPipelineRasterizationStateRasterizationOrderAMD::rasterizationOrder */ + VkRasterizationOrderAMD rasterization_order_amd; + + /** VkPipelineRasterizationProvokingVertexStateCreateInfoEXT::provokingVertexMode + * + * MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX + */ + VkProvokingVertexModeEXT provoking_vertex; + + /** VkPipelineRasterizationStateStreamCreateInfoEXT::rasterizationStream + * + * MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM + */ + uint32_t rasterization_stream; + + struct { + /** VkPipelineRasterizationStateCreateInfo::depthBiasEnable + * + * MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE + */ + bool enable; + + /** VkPipelineRasterizationStateCreateInfo::depthBiasConstantFactor + * + * MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS + */ + float constant; + + /** VkPipelineRasterizationStateCreateInfo::depthBiasClamp + * + * MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS + */ + float clamp; + + /** VkPipelineRasterizationStateCreateInfo::depthBiasSlopeFactor + * + * MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS + */ + float slope; + + /** VkDepthBiasRepresentationInfoEXT::depthBiasRepresentation + * + * MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS + */ + VkDepthBiasRepresentationEXT representation; + + /** VkDepthBiasRepresentationInfoEXT::depthBiasExact + * + * MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS + */ + bool exact; + } depth_bias; + + struct { + /** VkPipelineRasterizationStateCreateInfo::lineWidth + * + * MESA_VK_DYNAMIC_RS_LINE_WIDTH + */ + float width; + + /** VkPipelineRasterizationLineStateCreateInfoKHR::lineRasterizationMode + * + * Will be set to VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR if + * VkPipelineRasterizationLineStateCreateInfoKHR is not provided. + * + * MESA_VK_DYNAMIC_RS_LINE_MODE + */ + VkLineRasterizationModeKHR mode; + + struct { + /** VkPipelineRasterizationLineStateCreateInfoKHR::stippledLineEnable + * + * MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE + */ + bool enable; + + /** VkPipelineRasterizationLineStateCreateInfoKHR::lineStippleFactor + * + * MESA_VK_DYNAMIC_RS_LINE_STIPPLE + */ + uint32_t factor; + + /** VkPipelineRasterizationLineStateCreateInfoKHR::lineStipplePattern + * + * MESA_VK_DYNAMIC_RS_LINE_STIPPLE + */ + uint16_t pattern; + } stipple; + } line; +}; + +static inline bool +vk_rasterization_state_depth_clip_enable(const struct vk_rasterization_state *rs) +{ + switch (rs->depth_clip_enable) { + case VK_MESA_DEPTH_CLIP_ENABLE_FALSE: return false; + case VK_MESA_DEPTH_CLIP_ENABLE_TRUE: return true; + case VK_MESA_DEPTH_CLIP_ENABLE_NOT_CLAMP: return !rs->depth_clamp_enable; + } + unreachable("Invalid depth clip enable"); +} + +/***/ +struct vk_fragment_shading_rate_state { + /** VkPipelineFragmentShadingRateStateCreateInfoKHR::fragmentSize + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_FSR + */ + VkExtent2D fragment_size; + + /** VkPipelineFragmentShadingRateStateCreateInfoKHR::combinerOps + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_FSR + */ + VkFragmentShadingRateCombinerOpKHR combiner_ops[2]; +}; + +/***/ +struct vk_sample_locations_state { + /** VkSampleLocationsInfoEXT::sampleLocationsPerPixel */ + VkSampleCountFlagBits per_pixel; + + /** VkSampleLocationsInfoEXT::sampleLocationGridSize */ + VkExtent2D grid_size; + + /** VkSampleLocationsInfoEXT::sampleLocations */ + VkSampleLocationEXT locations[MESA_VK_MAX_SAMPLE_LOCATIONS]; +}; + +/***/ +struct vk_multisample_state { + /** VkPipelineMultisampleStateCreateInfo::rasterizationSamples */ + VkSampleCountFlagBits rasterization_samples; + + /** VkPipelineMultisampleStateCreateInfo::sampleShadingEnable */ + bool sample_shading_enable; + + /** VkPipelineMultisampleStateCreateInfo::minSampleShading */ + float min_sample_shading; + + /** VkPipelineMultisampleStateCreateInfo::pSampleMask */ + uint16_t sample_mask; + + /** VkPipelineMultisampleStateCreateInfo::alphaToCoverageEnable */ + bool alpha_to_coverage_enable; + + /** VkPipelineMultisampleStateCreateInfo::alphaToOneEnable */ + bool alpha_to_one_enable; + + /** VkPipelineSampleLocationsStateCreateInfoEXT::sampleLocationsEnable + * + * This will be true if sample locations enable dynamic. + */ + bool sample_locations_enable; + + /** VkPipelineSampleLocationsStateCreateInfoEXT::sampleLocationsInfo + * + * May be NULL for dynamic sample locations. + */ + const struct vk_sample_locations_state *sample_locations; +}; + +/** Represents the stencil test state for a face */ +struct vk_stencil_test_face_state { + /* + * MESA_VK_DYNAMIC_GRAPHICS_STATE_DS_STENCIL_OP + */ + struct { + /** VkStencilOpState::failOp */ + uint8_t fail; + + /** VkStencilOpState::passOp */ + uint8_t pass; + + /** VkStencilOpState::depthFailOp */ + uint8_t depth_fail; + + /** VkStencilOpState::compareOp */ + uint8_t compare; + } op; + + /** VkStencilOpState::compareMask + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_DS_STENCIL_COMPARE_MASK + */ + uint8_t compare_mask; + + /** VkStencilOpState::writeMask + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_DS_STENCIL_WRITE_MASK + */ + uint8_t write_mask; + + /** VkStencilOpState::reference + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_DS_STENCIL_REFERENCE + */ + uint8_t reference; +}; + +/***/ +struct vk_depth_stencil_state { + struct { + /** VkPipelineDepthStencilStateCreateInfo::depthTestEnable + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_DS_DEPTH_TEST_ENABLE + */ + bool test_enable; + + /** VkPipelineDepthStencilStateCreateInfo::depthWriteEnable + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_DS_DEPTH_WRITE_ENABLE + */ + bool write_enable; + + /** VkPipelineDepthStencilStateCreateInfo::depthCompareOp + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_DS_DEPTH_COMPARE_OP + */ + VkCompareOp compare_op; + + struct { + /** VkPipelineDepthStencilStateCreateInfo::depthBoundsTestEnable + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_DS_DEPTH_BOUNDS_TEST_ENABLE + */ + bool enable; + + /** VkPipelineDepthStencilStateCreateInfo::min/maxDepthBounds + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_DS_DEPTH_BOUNDS_TEST_BOUNDS + */ + float min, max; + } bounds_test; + } depth; + + struct { + /** VkPipelineDepthStencilStateCreateInfo::stencilTestEnable + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_DS_STENCIL_TEST_ENABLE + */ + bool test_enable; + + /** Whether or not stencil is should be written + * + * This does not map directly to any particular Vulkan API state and is + * initialized to true. If independent stencil disable ever becomes a + * thing, it will use this state. vk_optimize_depth_stencil_state() may + * set this to false if it can prove that the stencil test will never + * alter the stencil value. + */ + bool write_enable; + + /** VkPipelineDepthStencilStateCreateInfo::front */ + struct vk_stencil_test_face_state front; + + /** VkPipelineDepthStencilStateCreateInfo::back */ + struct vk_stencil_test_face_state back; + } stencil; +}; + +/** Optimize a depth/stencil state + * + * The way depth and stencil testing is specified, there are many case where, + * regardless of depth/stencil writes being enabled, nothing actually gets + * written due to some other bit of state being set. In the presence of + * discards, it's fairly easy to get into cases where early depth/stencil + * testing is disabled on some hardware, leading to a fairly big performance + * hit. This function attempts to optimize the depth stencil state and + * disable writes and sometimes even testing whenever possible. + * + * :param ds: |inout| The depth stencil state to optimize + * :param ds_aspects: |in| Which image aspects are present in the + * render pass. + * :param consider_write_mask: |in| If true, the write mask will be taken + * into account when optimizing. If + * false, it will be ignored. + */ +void vk_optimize_depth_stencil_state(struct vk_depth_stencil_state *ds, + VkImageAspectFlags ds_aspects, + bool consider_write_mask); + +struct vk_color_blend_attachment_state { + /** VkPipelineColorBlendAttachmentState::blendEnable + * + * This will be true if blend enables are dynamic + * + * MESA_VK_DYNAMIC_CB_BLEND_ENABLES + */ + bool blend_enable; + + /** VkPipelineColorBlendAttachmentState::srcColorBlendFactor + * + * MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS + */ + uint8_t src_color_blend_factor; + + /** VkPipelineColorBlendAttachmentState::dstColorBlendFactor + * + * MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS + */ + uint8_t dst_color_blend_factor; + + /** VkPipelineColorBlendAttachmentState::srcAlphaBlendFactor + * + * MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS + */ + uint8_t src_alpha_blend_factor; + + /** VkPipelineColorBlendAttachmentState::dstAlphaBlendFactor + * + * MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS + */ + uint8_t dst_alpha_blend_factor; + + /** VkPipelineColorBlendAttachmentState::colorWriteMask + * + * MESA_VK_DYNAMIC_CB_WRITE_MASKS + */ + uint8_t write_mask; + + /** VkPipelineColorBlendAttachmentState::colorBlendOp + * + * MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS + */ + VkBlendOp color_blend_op; + + /** VkPipelineColorBlendAttachmentState::alphaBlendOp + * + * MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS + */ + VkBlendOp alpha_blend_op; +}; + +/***/ +struct vk_color_blend_state { + /** VkPipelineColorBlendStateCreateInfo::logicOpEnable + * + * MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE, + */ + bool logic_op_enable; + + /** VkPipelineColorBlendStateCreateInfo::logicOp + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_CB_LOGIC_OP, + */ + uint8_t logic_op; + + /** VkPipelineColorBlendStateCreateInfo::attachmentCount + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_CB_ATTACHMENT_COUNT, + */ + uint8_t attachment_count; + + /** VkPipelineColorWriteCreateInfoEXT::pColorWriteEnables + * + * Bitmask of color write enables, indexed by color attachment index. + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_CB_COLOR_WRITE_ENABLES, + */ + uint8_t color_write_enables; + + /** VkPipelineColorBlendStateCreateInfo::pAttachments */ + struct vk_color_blend_attachment_state attachments[MESA_VK_MAX_COLOR_ATTACHMENTS]; + + /** VkPipelineColorBlendStateCreateInfo::blendConstants + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_CB_BLEND_CONSTANTS, + */ + float blend_constants[4]; +}; + +enum vk_rp_attachment_flags { + MESA_VK_RP_ATTACHMENT_NONE = 0, + + MESA_VK_RP_ATTACHMENT_COLOR_0_BIT = (1 << 0), + MESA_VK_RP_ATTACHMENT_COLOR_1_BIT = (1 << 1), + MESA_VK_RP_ATTACHMENT_COLOR_2_BIT = (1 << 2), + MESA_VK_RP_ATTACHMENT_COLOR_3_BIT = (1 << 3), + MESA_VK_RP_ATTACHMENT_COLOR_4_BIT = (1 << 4), + MESA_VK_RP_ATTACHMENT_COLOR_5_BIT = (1 << 5), + MESA_VK_RP_ATTACHMENT_COLOR_6_BIT = (1 << 6), + MESA_VK_RP_ATTACHMENT_COLOR_7_BIT = (1 << 7), + MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS = 0xff, + + MESA_VK_RP_ATTACHMENT_DEPTH_BIT = (1 << 8), + MESA_VK_RP_ATTACHMENT_STENCIL_BIT = (1 << 9), + + MESA_VK_RP_ATTACHMENT_INFO_INVALID = 0xffff, +}; +MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(vk_rp_attachment_flags) +static_assert(MESA_VK_MAX_COLOR_ATTACHMENTS == 8, + "This enum must match the global runtime limit"); + +#define MESA_VK_RP_ATTACHMENT_COLOR_BIT(n) \ + ((enum vk_rp_attachment_flags)(MESA_VK_RP_ATTACHMENT_COLOR_0_BIT << (n))) + +/***/ +struct vk_input_attachment_location_state { + /** VkRenderingInputAttachmentIndexInfoKHR::pColorAttachmentLocations + * + * MESA_VK_DYNAMIC_INPUT_ATTACHMENT_MAP + */ + uint8_t color_map[MESA_VK_MAX_COLOR_ATTACHMENTS]; + + /** VkRenderingInputAttachmentIndexInfoKHR::pDepthInputAttachmentIndex + * + * MESA_VK_DYNAMIC_INPUT_ATTACHMENT_MAP + */ + uint8_t depth_att; + + /** VkRenderingInputAttachmentIndexInfoKHR::pStencilInputAttachmentIndex + * + * MESA_VK_DYNAMIC_INPUT_ATTACHMENT_MAP + */ + uint8_t stencil_att; +}; + +/***/ +struct vk_color_attachment_location_state { + /** VkRenderingAttachmentLocationInfoKHR::pColorAttachmentLocations + * + * MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP + */ + uint8_t color_map[MESA_VK_MAX_COLOR_ATTACHMENTS]; +}; + +/***/ +struct vk_render_pass_state { + /** Set of image aspects bound as color/depth/stencil attachments + * + * Set to MESA_VK_RP_ATTACHMENT_INFO_INVALID to indicate that attachment + * info is invalid. + */ + enum vk_rp_attachment_flags attachments; + + /** VkPipelineRenderingCreateInfo::viewMask */ + uint32_t view_mask; + + /** VkPipelineRenderingCreateInfo::colorAttachmentCount */ + uint8_t color_attachment_count; + + /** VkPipelineRenderingCreateInfo::pColorAttachmentFormats */ + VkFormat color_attachment_formats[MESA_VK_MAX_COLOR_ATTACHMENTS]; + + /** VkPipelineRenderingCreateInfo::depthAttachmentFormat */ + VkFormat depth_attachment_format; + + /** VkPipelineRenderingCreateInfo::stencilAttachmentFormat */ + VkFormat stencil_attachment_format; + + /** VkAttachmentSampleCountInfoAMD::pColorAttachmentSamples */ + uint8_t color_attachment_samples[MESA_VK_MAX_COLOR_ATTACHMENTS]; + + /** VkAttachmentSampleCountInfoAMD::depthStencilAttachmentSamples */ + uint8_t depth_stencil_attachment_samples; +}; + +static inline bool +vk_render_pass_state_has_attachment_info(const struct vk_render_pass_state *rp) +{ + return rp->attachments != MESA_VK_RP_ATTACHMENT_INFO_INVALID; +} + +static inline VkImageAspectFlags +vk_pipeline_flags_feedback_loops(VkPipelineCreateFlags2KHR flags) +{ + VkImageAspectFlags feedback_loops = 0; + if (flags & + VK_PIPELINE_CREATE_2_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) + feedback_loops |= VK_IMAGE_ASPECT_COLOR_BIT; + if (flags & + VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) + feedback_loops |= VK_IMAGE_ASPECT_DEPTH_BIT; + return feedback_loops; +} + +/** Struct representing all dynamic graphics state + * + * Before invoking any core functions, the driver must properly populate + * initialize this struct: + * + * - Initialize using vk_default_dynamic_graphics_state, if desired + * - Set vi to a driver-allocated vk_vertex_input_state struct + * - Set ms.sample_locations to a driver-allocated + * vk_sample_locations_state struct + */ +struct vk_dynamic_graphics_state { + /** Vertex input state + * + * Must be provided by the driver if VK_EXT_vertex_input_dynamic_state is + * supported. + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_VI + */ + struct vk_vertex_input_state *vi; + + /* This is a copy of vi->bindings_valid, used when the vertex input state + * is precompiled in the pipeline (so that vi is NULL) but the strides are + * set dynamically. + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_VI_BINDINGS_VALID + */ + uint32_t vi_bindings_valid; + + /** Vertex binding strides + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_VI_BINDING_STRIDES + */ + uint16_t vi_binding_strides[MESA_VK_MAX_VERTEX_BINDINGS]; + + /** Input assembly state */ + struct vk_input_assembly_state ia; + + /** Tessellation state */ + struct vk_tessellation_state ts; + + /** Viewport state */ + struct vk_viewport_state vp; + + /** Discard rectangles state */ + struct { + /** Custom enable + * + * MESA_VK_DYNAMIC_DR_ENABLE + */ + bool enable; + + /** Mode + * + * MESA_VK_DYNAMIC_DR_MODE + */ + VkDiscardRectangleModeEXT mode; + + /** Rectangles + * + * MESA_VK_DYNAMIC_DR_RECTANGLES + */ + VkRect2D rectangles[MESA_VK_MAX_DISCARD_RECTANGLES]; + + /** Number of rectangles + * + * MESA_VK_DYNAMIC_GRAPHICS_STATE_DR_RECTANGLES + */ + uint32_t rectangle_count; + } dr; + + /** Rasterization state */ + struct vk_rasterization_state rs; + + /* Fragment shading rate state */ + struct vk_fragment_shading_rate_state fsr; + + /** Multisample state */ + struct { + /** Rasterization samples + * + * MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES + */ + VkSampleCountFlagBits rasterization_samples; + + /** Sample mask + * + * MESA_VK_DYNAMIC_MS_SAMPLE_MASK + */ + uint16_t sample_mask; + + /** Alpha to coverage enable + * + * MESA_VK_DYNAMIC_MS_ALPHA_TO_CONVERAGE_ENABLE + */ + bool alpha_to_coverage_enable; + + /** Alpha to one enable + * + * MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE + */ + bool alpha_to_one_enable; + + /** Custom sample locations enable + * + * MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE + */ + bool sample_locations_enable; + + /** Sample locations + * + * Must be provided by the driver if VK_EXT_sample_locations is + * supported. + * + * MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS + */ + struct vk_sample_locations_state *sample_locations; + } ms; + + /** Depth stencil state */ + struct vk_depth_stencil_state ds; + + /** Color blend state */ + struct vk_color_blend_state cb; + + struct { + enum vk_rp_attachment_flags attachments; + } rp; + + /** MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE */ + VkImageAspectFlags feedback_loops; + + /** MESA_VK_DYNAMIC_INPUT_ATTACHMENT_MAP */ + struct vk_input_attachment_location_state ial; + + /** MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP */ + struct vk_color_attachment_location_state cal; + + /** For pipelines, which bits of dynamic state are set */ + BITSET_DECLARE(set, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + + /** For command buffers, which bits of dynamic state have changed */ + BITSET_DECLARE(dirty, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); +}; + +/***/ +struct vk_graphics_pipeline_all_state { + struct vk_vertex_input_state vi; + struct vk_input_assembly_state ia; + struct vk_tessellation_state ts; + struct vk_viewport_state vp; + struct vk_discard_rectangles_state dr; + struct vk_rasterization_state rs; + struct vk_fragment_shading_rate_state fsr; + struct vk_multisample_state ms; + struct vk_sample_locations_state ms_sample_locations; + struct vk_depth_stencil_state ds; + struct vk_color_blend_state cb; + struct vk_input_attachment_location_state ial; + struct vk_color_attachment_location_state cal; + struct vk_render_pass_state rp; +}; + +/***/ +struct vk_graphics_pipeline_state { + /** Bitset of which states are dynamic */ + BITSET_DECLARE(dynamic, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + + VkShaderStageFlags shader_stages; + + /** Flags from VkGraphicsPipelineCreateInfo::flags that are considered part + * of a stage and need to be merged when linking libraries. + * + * For drivers which use vk_render_pass, this will also include flags + * generated based on subpass self-dependencies and fragment density map. + */ + VkPipelineCreateFlags2KHR pipeline_flags; + + /* True if there are feedback loops that do not involve input attachments + * managed by the driver. This is set to true by the runtime if there + * are loops indicated by a pipeline flag (which may involve any image + * rather than only input attachments under the control of the driver) or + * there was no driver-provided render pass info struct (because input + * attachments for emulated renderpasses cannot be managed by the driver). + */ + bool feedback_loop_not_input_only; + + /** Vertex input state */ + const struct vk_vertex_input_state *vi; + + /** Input assembly state */ + const struct vk_input_assembly_state *ia; + + /** Tessellation state */ + const struct vk_tessellation_state *ts; + + /** Viewport state */ + const struct vk_viewport_state *vp; + + /** Discard Rectangles state */ + const struct vk_discard_rectangles_state *dr; + + /** Rasterization state */ + const struct vk_rasterization_state *rs; + + /** Fragment shading rate state */ + const struct vk_fragment_shading_rate_state *fsr; + + /** Multiesample state */ + const struct vk_multisample_state *ms; + + /** Depth stencil state */ + const struct vk_depth_stencil_state *ds; + + /** Color blend state */ + const struct vk_color_blend_state *cb; + + /** Input attachment mapping state */ + const struct vk_input_attachment_location_state *ial; + + /** Color attachment mapping state */ + const struct vk_color_attachment_location_state *cal; + + /** Render pass state */ + const struct vk_render_pass_state *rp; +}; + +/** Populate a vk_graphics_pipeline_state from VkGraphicsPipelineCreateInfo + * + * This function crawls the provided VkGraphicsPipelineCreateInfo and uses it + * to populate the vk_graphics_pipeline_state. Upon returning from this + * function, all pointers in `state` will either be `NULL` or point to a valid + * sub-state structure. Whenever an extension struct is missing, a reasonable + * default value is provided whenever possible. Some states may be left NULL + * if the state does not exist (such as when rasterizer discard is enabled) or + * if all of the corresponding states are dynamic. + * + * This function assumes that the vk_graphics_pipeline_state is already valid + * (i.e., all pointers are NULL or point to valid states). Any states already + * present are assumed to be identical to how we would populate them from + * VkGraphicsPipelineCreateInfo. + * + * This function can operate in one of two modes with respect to how the + * memory for states is allocated. If a `vk_graphics_pipeline_all_state` + * struct is provided, any newly populated states will point to the relevant + * field in `all`. If `all == NULL`, it attempts to dynamically allocate any + * newly required states using the provided allocator and scope. The pointer + * to this new blob of memory is returned via `alloc_ptr_out` and must + * eventually be freed by the driver. + * + * :param device: |in| The Vulkan device + * :param state: |out| The graphics pipeline state to populate + * :param info: |in| The pCreateInfo from vkCreateGraphicsPipelines + * :param driver_rp: |in| Renderpass state if the driver implements render + * passes itself. This should be NULL for drivers + * that use the common render pass infrastructure + * built on top of dynamic rendering. + * :param driver_rp_flags: |in| Pipeline create flags implied by the + * renderpass or subpass if the driver implements + * render passes itself. This is only used if + * driver_rp is non-NULL. + * :param all: |in| The vk_graphics_pipeline_all_state to use to + * back any newly needed states. If NULL, newly + * needed states will be dynamically allocated + * instead. + * :param alloc: |in| Allocation callbacks for dynamically allocating + * new state memory. + * :param scope: |in| Allocation scope for dynamically allocating new + * state memory. + * :param alloc_ptr_out: |out| Will be populated with a pointer to any newly + * allocated state. The driver is responsible for + * freeing this pointer. + */ +VkResult +vk_graphics_pipeline_state_fill(const struct vk_device *device, + struct vk_graphics_pipeline_state *state, + const VkGraphicsPipelineCreateInfo *info, + const struct vk_render_pass_state *driver_rp, + VkPipelineCreateFlags2KHR driver_rp_flags, + struct vk_graphics_pipeline_all_state *all, + const VkAllocationCallbacks *alloc, + VkSystemAllocationScope scope, + void **alloc_ptr_out); + +/** Populate a vk_graphics_pipeline_state from another one. + * + * This allocates space for graphics pipeline state and copies it from another + * pipeline state. It ignores state in `old_state` which is not set and does + * not allocate memory if the entire group is unused. The intended use-case is + * for drivers that may be able to precompile some state ahead of time, to + * avoid allocating memory for it in pipeline libraries. The workflow looks + * something like this: + * + * struct vk_graphics_pipeline_all_state all; + * struct vk_graphics_pipeline_state state; + * vk_graphics_pipeline_state_fill(dev, &state, ..., &all, NULL, 0, NULL); + * + * ... + * + * BITSET_DECLARE(set_state, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + * vk_graphics_pipeline_get_state(&state, &set_state); + * + * ... + * + * if (BITSET_TEST(set_state, MESA_VK_DYNAMIC_FOO)) { + * emit_foo(&state.foo, ...); + * BITSET_SET(state.dynamic, MESA_VK_DYNAMIC_FOO); + * } + * + * ... + * + * if (pipeline->is_library) { + * library = pipeline_to_library(pipeline); + * vk_graphics_pipeline_state_copy(dev, &library->state, &state, ...); + * } + * + * In this case we will avoid allocating memory for `library->state.foo`. + * + * :param device: |in| The Vulkan device + * :param state: |out| The graphics pipeline state to populate + * :param old_state: |in| The graphics pipeline state to copy from + * :param alloc: |in| Allocation callbacks for dynamically allocating + * new state memory. + * :param scope: |in| Allocation scope for dynamically allocating new + * state memory. + * :param alloc_ptr_out: |out| Will be populated with a pointer to any newly + * allocated state. The driver is responsible for + * freeing this pointer. + */ +VkResult +vk_graphics_pipeline_state_copy(const struct vk_device *device, + struct vk_graphics_pipeline_state *state, + const struct vk_graphics_pipeline_state *old_state, + const VkAllocationCallbacks *alloc, + VkSystemAllocationScope scope, + void **alloc_ptr_out); + +/** Merge one vk_graphics_pipeline_state into another + * + * Both the destination and source states are assumed to be valid (i.e., all + * pointers are NULL or point to valid states). Any states which exist in + * both are expected to be identical and the state already in dst is used. + * The only exception here is render pass state which may be only partially + * defined in which case the fully defined one (if any) is used. + * + * :param dst: |out| The destination state. When the function returns, this + * will be the union of the original dst and src. + * :param src: |in| The source state + */ +void +vk_graphics_pipeline_state_merge(struct vk_graphics_pipeline_state *dst, + const struct vk_graphics_pipeline_state *src); + +/** Get the states which will be set for a given vk_graphics_pipeline_state + * + * Return which states should be set when the pipeline is bound. + */ +void +vk_graphics_pipeline_get_state(const struct vk_graphics_pipeline_state *state, + BITSET_WORD *set_state_out); + +/** Initialize a vk_dynamic_graphics_state with defaults + * + * :param dyn: |out| Dynamic graphics state to initizlie + */ +void +vk_dynamic_graphics_state_init(struct vk_dynamic_graphics_state *dyn); + +/** Clear a vk_dynamic_graphics_state to defaults + * + * :param dyn: |out| Dynamic graphics state to initizlie + */ +void +vk_dynamic_graphics_state_clear(struct vk_dynamic_graphics_state *dyn); + +/** Initialize a vk_dynamic_graphics_state for a pipeline + * + * :param dyn: |out| Dynamic graphics state to initizlie + * :param supported: |in| Bitset of all dynamic state supported by the driver. + * :param p: |in| The pipeline state from which to initialize the + * dynamic state. + */ +void +vk_dynamic_graphics_state_fill(struct vk_dynamic_graphics_state *dyn, + const struct vk_graphics_pipeline_state *p); + +/** Mark all states in the given vk_dynamic_graphics_state dirty + * + * :param d: |out| Dynamic graphics state struct + */ +static inline void +vk_dynamic_graphics_state_dirty_all(struct vk_dynamic_graphics_state *d) +{ + BITSET_SET_RANGE(d->dirty, 0, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX - 1); +} + +/** Mark all states in the given vk_dynamic_graphics_state not dirty + * + * :param d: |out| Dynamic graphics state struct + */ +static inline void +vk_dynamic_graphics_state_clear_dirty(struct vk_dynamic_graphics_state *d) +{ + BITSET_ZERO(d->dirty); +} + +/** Test if any states in the given vk_dynamic_graphics_state are dirty + * + * :param d: |in| Dynamic graphics state struct to test + * :returns: true if any state is dirty + */ +static inline bool +vk_dynamic_graphics_state_any_dirty(const struct vk_dynamic_graphics_state *d) +{ + return BITSET_TEST_RANGE(d->dirty, + 0, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX - 1); +} + +/** Copies all set state from src to dst + * + * Both src and dst are assumed to be properly initialized dynamic state + * structs. Anything not set in src, as indicated by src->set, is ignored and + * those bits of dst are left untouched. + * + * :param dst: |out| Copy destination + * :param src: |in| Copy source + */ +void +vk_dynamic_graphics_state_copy(struct vk_dynamic_graphics_state *dst, + const struct vk_dynamic_graphics_state *src); + +/** Set all of the state in src on a command buffer + * + * Anything not set, as indicated by src->set, is ignored and those states in + * the command buffer are left untouched. + * + * :param cmd: |inout| Command buffer to update + * :param src: |in| State to set + */ +void +vk_cmd_set_dynamic_graphics_state(struct vk_command_buffer *cmd, + const struct vk_dynamic_graphics_state *src); + +/** Set vertex binding strides on a command buffer + * + * This is the dynamic state part of vkCmdBindVertexBuffers2(). + * + * :param cmd: |inout| Command buffer to update + * :param first_binding: |in| First binding to update + * :param binding_count: |in| Number of bindings to update + * :param strides: |in| binding_count many stride values to set + */ +void +vk_cmd_set_vertex_binding_strides(struct vk_command_buffer *cmd, + uint32_t first_binding, + uint32_t binding_count, + const VkDeviceSize *strides); + +/* Set color attachment count for blending on a command buffer. + * + * This is an implicit part of starting a subpass or a secondary command + * buffer in a subpass. + */ +void +vk_cmd_set_cb_attachment_count(struct vk_command_buffer *cmd, + uint32_t attachment_count); + +/* Set render pass attachments on a command buffer. + * + * This is required for VK_EXT_shader_object in order to disable attachments + * based on bound shaders. + */ +void +vk_cmd_set_rp_attachments(struct vk_command_buffer *cmd, + enum vk_rp_attachment_flags attachments); + +const char * +vk_dynamic_graphic_state_to_str(enum mesa_vk_dynamic_graphics_state state); + +/** Check whether the color attachment location map is the identity + * + * :param cal: |in| Color attachment location state + */ +static inline bool +vk_color_attachment_location_state_is_identity( + const struct vk_color_attachment_location_state *cal) +{ + for (unsigned i = 0; i < ARRAY_SIZE(cal->color_map); i++) { + if (cal->color_map[i] != i) + return false; + } + return true; +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_GRAPHICS_STATE_H */ diff --git a/src/vulkan/runtime/vk_image.c b/src/vulkan/runtime/vk_image.c new file mode 100644 index 00000000000..cada2dd6761 --- /dev/null +++ b/src/vulkan/runtime/vk_image.c @@ -0,0 +1,1040 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_image.h" + +#if DETECT_OS_LINUX || DETECT_OS_BSD +#include <drm-uapi/drm_fourcc.h> +#endif + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_format.h" +#include "vk_format_info.h" +#include "vk_log.h" +#include "vk_physical_device.h" +#include "vk_render_pass.h" +#include "vk_util.h" +#include "vulkan/wsi/wsi_common.h" + +#if DETECT_OS_ANDROID +#include "vk_android.h" +#include <vulkan/vulkan_android.h> +#endif + +void +vk_image_init(struct vk_device *device, + struct vk_image *image, + const VkImageCreateInfo *pCreateInfo) +{ + vk_object_base_init(device, &image->base, VK_OBJECT_TYPE_IMAGE); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + assert(pCreateInfo->mipLevels > 0); + assert(pCreateInfo->arrayLayers > 0); + assert(pCreateInfo->samples > 0); + assert(pCreateInfo->extent.width > 0); + assert(pCreateInfo->extent.height > 0); + assert(pCreateInfo->extent.depth > 0); + + if (pCreateInfo->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) + assert(pCreateInfo->imageType == VK_IMAGE_TYPE_2D); + if (pCreateInfo->flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT) + assert(pCreateInfo->imageType == VK_IMAGE_TYPE_3D); + + image->create_flags = pCreateInfo->flags; + image->image_type = pCreateInfo->imageType; + vk_image_set_format(image, pCreateInfo->format); + image->extent = vk_image_sanitize_extent(image, pCreateInfo->extent); + image->mip_levels = pCreateInfo->mipLevels; + image->array_layers = pCreateInfo->arrayLayers; + image->samples = pCreateInfo->samples; + image->tiling = pCreateInfo->tiling; + image->usage = pCreateInfo->usage; + image->sharing_mode = pCreateInfo->sharingMode; + + if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + const VkImageStencilUsageCreateInfo *stencil_usage_info = + vk_find_struct_const(pCreateInfo->pNext, + IMAGE_STENCIL_USAGE_CREATE_INFO); + image->stencil_usage = + stencil_usage_info ? stencil_usage_info->stencilUsage : + pCreateInfo->usage; + } else { + image->stencil_usage = 0; + } + + const VkExternalMemoryImageCreateInfo *ext_mem_info = + vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO); + if (ext_mem_info) + image->external_handle_types = ext_mem_info->handleTypes; + else + image->external_handle_types = 0; + + const struct wsi_image_create_info *wsi_info = + vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); + image->wsi_legacy_scanout = wsi_info && wsi_info->scanout; + +#if DETECT_OS_LINUX || DETECT_OS_BSD + image->drm_format_mod = ((1ULL << 56) - 1) /* DRM_FORMAT_MOD_INVALID */; +#endif + +#if DETECT_OS_ANDROID + const VkExternalFormatANDROID *ext_format = + vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_FORMAT_ANDROID); + if (ext_format && ext_format->externalFormat != 0) { + assert(image->format == VK_FORMAT_UNDEFINED); + assert(image->external_handle_types & + VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID); + vk_image_set_format(image, (VkFormat)ext_format->externalFormat); + } + + image->ahb_format = vk_image_format_to_ahb_format(image->format); +#endif + + const VkImageCompressionControlEXT *compr_info = + vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT); + if (compr_info) + image->compr_flags = compr_info->flags; +} + +void * +vk_image_create(struct vk_device *device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size) +{ + struct vk_image *image = + vk_zalloc2(&device->alloc, alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (image == NULL) + return NULL; + + vk_image_init(device, image, pCreateInfo); + + return image; +} + +void +vk_image_finish(struct vk_image *image) +{ + vk_object_base_finish(&image->base); +} + +void +vk_image_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_image *image) +{ + vk_object_free(device, alloc, image); +} + +#if DETECT_OS_LINUX || DETECT_OS_BSD +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetImageDrmFormatModifierPropertiesEXT(UNUSED VkDevice device, + VkImage _image, + VkImageDrmFormatModifierPropertiesEXT *pProperties) +{ + VK_FROM_HANDLE(vk_image, image, _image); + + assert(pProperties->sType == + VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT); + + assert(image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT); + pProperties->drmFormatModifier = image->drm_format_mod; + + return VK_SUCCESS; +} +#endif + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetImageSubresourceLayout(VkDevice _device, VkImage _image, + const VkImageSubresource *pSubresource, + VkSubresourceLayout *pLayout) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + const VkImageSubresource2KHR subresource = { + .sType = VK_STRUCTURE_TYPE_IMAGE_SUBRESOURCE_2_KHR, + .imageSubresource = *pSubresource, + }; + + VkSubresourceLayout2KHR layout = { + .sType = VK_STRUCTURE_TYPE_SUBRESOURCE_LAYOUT_2_KHR + }; + + device->dispatch_table.GetImageSubresourceLayout2KHR(_device, _image, + &subresource, &layout); + + *pLayout = layout.subresourceLayout; +} + +void +vk_image_set_format(struct vk_image *image, VkFormat format) +{ + image->format = format; + image->aspects = vk_format_aspects(format); +} + +VkImageUsageFlags +vk_image_usage(const struct vk_image *image, + VkImageAspectFlags aspect_mask) +{ + /* From the Vulkan 1.2.131 spec: + * + * "If the image was has a depth-stencil format and was created with + * a VkImageStencilUsageCreateInfo structure included in the pNext + * chain of VkImageCreateInfo, the usage is calculated based on the + * subresource.aspectMask provided: + * + * - If aspectMask includes only VK_IMAGE_ASPECT_STENCIL_BIT, the + * implicit usage is equal to + * VkImageStencilUsageCreateInfo::stencilUsage. + * + * - If aspectMask includes only VK_IMAGE_ASPECT_DEPTH_BIT, the + * implicit usage is equal to VkImageCreateInfo::usage. + * + * - If both aspects are included in aspectMask, the implicit usage + * is equal to the intersection of VkImageCreateInfo::usage and + * VkImageStencilUsageCreateInfo::stencilUsage. + */ + if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { + return image->stencil_usage; + } else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) { + return image->usage & image->stencil_usage; + } else { + /* This also handles the color case */ + return image->usage; + } +} + +#define VK_IMAGE_ASPECT_ANY_COLOR_MASK_MESA ( \ + VK_IMAGE_ASPECT_COLOR_BIT | \ + VK_IMAGE_ASPECT_PLANE_0_BIT | \ + VK_IMAGE_ASPECT_PLANE_1_BIT | \ + VK_IMAGE_ASPECT_PLANE_2_BIT) + +/** Expands the given aspect mask relative to the image + * + * If the image has color plane aspects VK_IMAGE_ASPECT_COLOR_BIT has been + * requested, this returns the aspects of the underlying image. + * + * For example, + * + * VK_IMAGE_ASPECT_COLOR_BIT + * + * will be converted to + * + * VK_IMAGE_ASPECT_PLANE_0_BIT | + * VK_IMAGE_ASPECT_PLANE_1_BIT | + * VK_IMAGE_ASPECT_PLANE_2_BIT + * + * for an image of format VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM. + */ +VkImageAspectFlags +vk_image_expand_aspect_mask(const struct vk_image *image, + VkImageAspectFlags aspect_mask) +{ + if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { + assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_MASK_MESA); + return image->aspects; + } else { + assert(aspect_mask && !(aspect_mask & ~image->aspects)); + return aspect_mask; + } +} + +VkExtent3D +vk_image_extent_to_elements(const struct vk_image *image, VkExtent3D extent) +{ + const struct util_format_description *fmt = + vk_format_description(image->format); + + extent = vk_image_sanitize_extent(image, extent); + extent.width = DIV_ROUND_UP(extent.width, fmt->block.width); + extent.height = DIV_ROUND_UP(extent.height, fmt->block.height); + extent.depth = DIV_ROUND_UP(extent.depth, fmt->block.depth); + + return extent; +} + +VkOffset3D +vk_image_offset_to_elements(const struct vk_image *image, VkOffset3D offset) +{ + const struct util_format_description *fmt = + vk_format_description(image->format); + + offset = vk_image_sanitize_offset(image, offset); + + assert(offset.x % fmt->block.width == 0); + assert(offset.y % fmt->block.height == 0); + assert(offset.z % fmt->block.depth == 0); + + offset.x /= fmt->block.width; + offset.y /= fmt->block.height; + offset.z /= fmt->block.depth; + + return offset; +} + +struct vk_image_buffer_layout +vk_image_buffer_copy_layout(const struct vk_image *image, + const VkBufferImageCopy2* region) +{ + VkExtent3D extent = vk_image_sanitize_extent(image, region->imageExtent); + + const uint32_t row_length = region->bufferRowLength ? + region->bufferRowLength : extent.width; + const uint32_t image_height = region->bufferImageHeight ? + region->bufferImageHeight : extent.height; + + const VkImageAspectFlags aspect = region->imageSubresource.aspectMask; + VkFormat format = vk_format_get_aspect_format(image->format, aspect); + const struct util_format_description *fmt = vk_format_description(format); + + assert(fmt->block.bits % 8 == 0); + const uint32_t element_size_B = fmt->block.bits / 8; + + const uint32_t row_stride_B = + DIV_ROUND_UP(row_length, fmt->block.width) * element_size_B; + const uint64_t image_stride_B = + DIV_ROUND_UP(image_height, fmt->block.height) * (uint64_t)row_stride_B; + + return (struct vk_image_buffer_layout) { + .row_length = row_length, + .image_height = image_height, + .element_size_B = element_size_B, + .row_stride_B = row_stride_B, + .image_stride_B = image_stride_B, + }; +} + +struct vk_image_buffer_layout +vk_memory_to_image_copy_layout(const struct vk_image *image, + const VkMemoryToImageCopyEXT* region) +{ + const VkBufferImageCopy2 bic = { + .bufferOffset = 0, + .bufferRowLength = region->memoryRowLength, + .bufferImageHeight = region->memoryImageHeight, + .imageSubresource = region->imageSubresource, + .imageOffset = region->imageOffset, + .imageExtent = region->imageExtent, + }; + return vk_image_buffer_copy_layout(image, &bic); +} + +struct vk_image_buffer_layout +vk_image_to_memory_copy_layout(const struct vk_image *image, + const VkImageToMemoryCopyEXT* region) +{ + const VkBufferImageCopy2 bic = { + .bufferOffset = 0, + .bufferRowLength = region->memoryRowLength, + .bufferImageHeight = region->memoryImageHeight, + .imageSubresource = region->imageSubresource, + .imageOffset = region->imageOffset, + .imageExtent = region->imageExtent, + }; + return vk_image_buffer_copy_layout(image, &bic); +} + +static VkComponentSwizzle +remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) +{ + return swizzle == VK_COMPONENT_SWIZZLE_IDENTITY ? component : swizzle; +} + +void +vk_image_view_init(struct vk_device *device, + struct vk_image_view *image_view, + bool driver_internal, + const VkImageViewCreateInfo *pCreateInfo) +{ + vk_object_base_init(device, &image_view->base, VK_OBJECT_TYPE_IMAGE_VIEW); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); + VK_FROM_HANDLE(vk_image, image, pCreateInfo->image); + + image_view->create_flags = pCreateInfo->flags; + image_view->image = image; + image_view->view_type = pCreateInfo->viewType; + + image_view->format = pCreateInfo->format; + if (image_view->format == VK_FORMAT_UNDEFINED) + image_view->format = image->format; + + if (!driver_internal) { + switch (image_view->view_type) { + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + assert(image->image_type == VK_IMAGE_TYPE_1D); + break; + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + if (image->create_flags & (VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT | + VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT)) + assert(image->image_type == VK_IMAGE_TYPE_3D); + else + assert(image->image_type == VK_IMAGE_TYPE_2D); + break; + case VK_IMAGE_VIEW_TYPE_3D: + assert(image->image_type == VK_IMAGE_TYPE_3D); + break; + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + assert(image->image_type == VK_IMAGE_TYPE_2D); + assert(image->create_flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT); + break; + default: + unreachable("Invalid image view type"); + } + } + + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + + if (driver_internal) { + image_view->aspects = range->aspectMask; + image_view->view_format = image_view->format; + } else { + image_view->aspects = + vk_image_expand_aspect_mask(image, range->aspectMask); + + assert(!(image_view->aspects & ~image->aspects)); + + /* From the Vulkan 1.2.184 spec: + * + * "If the image has a multi-planar format and + * subresourceRange.aspectMask is VK_IMAGE_ASPECT_COLOR_BIT, and image + * has been created with a usage value not containing any of the + * VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR, + * VK_IMAGE_USAGE_VIDEO_DECODE_SRC_BIT_KHR, + * VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR, + * VK_IMAGE_USAGE_VIDEO_ENCODE_DST_BIT_KHR, + * VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR, and + * VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR flags, then the format must + * be identical to the image format, and the sampler to be used with the + * image view must enable sampler Y′CBCR conversion." + * + * Since no one implements video yet, we can ignore the bits about video + * create flags and assume YCbCr formats match. + */ + if ((image->aspects & VK_IMAGE_ASPECT_PLANE_1_BIT) && + (range->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)) + assert(image_view->format == image->format); + + /* From the Vulkan 1.2.184 spec: + * + * "Each depth/stencil format is only compatible with itself." + */ + if (image_view->aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) + assert(image_view->format == image->format); + + if (!(image->create_flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) + assert(image_view->format == image->format); + + /* Restrict the format to only the planes chosen. + * + * For combined depth and stencil images, this means the depth-only or + * stencil-only format if only one aspect is chosen and the full + * combined format if both aspects are chosen. + * + * For single-plane color images, we just take the format as-is. For + * multi-plane views of multi-plane images, this means we want the full + * multi-plane format. For single-plane views of multi-plane images, we + * want a format compatible with the one plane. Fortunately, this is + * already what the client gives us. The Vulkan 1.2.184 spec says: + * + * "If image was created with the VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT + * and the image has a multi-planar format, and if + * subresourceRange.aspectMask is VK_IMAGE_ASPECT_PLANE_0_BIT, + * VK_IMAGE_ASPECT_PLANE_1_BIT, or VK_IMAGE_ASPECT_PLANE_2_BIT, + * format must be compatible with the corresponding plane of the + * image, and the sampler to be used with the image view must not + * enable sampler Y′CBCR conversion." + */ + if (image_view->aspects == VK_IMAGE_ASPECT_STENCIL_BIT) { + image_view->view_format = vk_format_stencil_only(image_view->format); + } else if (image_view->aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { + image_view->view_format = vk_format_depth_only(image_view->format); + } else { + image_view->view_format = image_view->format; + } + } + + image_view->swizzle = (VkComponentMapping) { + .r = remap_swizzle(pCreateInfo->components.r, VK_COMPONENT_SWIZZLE_R), + .g = remap_swizzle(pCreateInfo->components.g, VK_COMPONENT_SWIZZLE_G), + .b = remap_swizzle(pCreateInfo->components.b, VK_COMPONENT_SWIZZLE_B), + .a = remap_swizzle(pCreateInfo->components.a, VK_COMPONENT_SWIZZLE_A), + }; + + assert(range->layerCount > 0); + assert(range->baseMipLevel < image->mip_levels); + + image_view->base_mip_level = range->baseMipLevel; + image_view->level_count = vk_image_subresource_level_count(image, range); + image_view->base_array_layer = range->baseArrayLayer; + image_view->layer_count = vk_image_subresource_layer_count(image, range); + + const VkImageViewMinLodCreateInfoEXT *min_lod_info = + vk_find_struct_const(pCreateInfo, IMAGE_VIEW_MIN_LOD_CREATE_INFO_EXT); + image_view->min_lod = min_lod_info ? min_lod_info->minLod : 0.0f; + + /* From the Vulkan 1.3.215 spec: + * + * VUID-VkImageViewMinLodCreateInfoEXT-minLod-06456 + * + * "minLod must be less or equal to the index of the last mipmap level + * accessible to the view." + */ + assert(image_view->min_lod <= image_view->base_mip_level + + image_view->level_count - 1); + + image_view->extent = + vk_image_mip_level_extent(image, image_view->base_mip_level); + + /* By default storage uses the same as the image properties, but it can be + * overriden with VkImageViewSlicedCreateInfoEXT. + */ + image_view->storage.z_slice_offset = 0; + image_view->storage.z_slice_count = image_view->extent.depth; + + const VkImageViewSlicedCreateInfoEXT *sliced_info = + vk_find_struct_const(pCreateInfo, IMAGE_VIEW_SLICED_CREATE_INFO_EXT); + assert(image_view->base_mip_level + image_view->level_count + <= image->mip_levels); + switch (image->image_type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + assert(image_view->base_array_layer + image_view->layer_count + <= image->array_layers); + break; + case VK_IMAGE_TYPE_3D: + if (sliced_info && image_view->view_type == VK_IMAGE_VIEW_TYPE_3D) { + unsigned total = image_view->extent.depth; + image_view->storage.z_slice_offset = sliced_info->sliceOffset; + assert(image_view->storage.z_slice_offset < total); + if (sliced_info->sliceCount == VK_REMAINING_3D_SLICES_EXT) { + image_view->storage.z_slice_count = total - image_view->storage.z_slice_offset; + } else { + image_view->storage.z_slice_count = sliced_info->sliceCount; + } + } else if (image_view->view_type != VK_IMAGE_VIEW_TYPE_3D) { + image_view->storage.z_slice_offset = image_view->base_array_layer; + image_view->storage.z_slice_count = image_view->layer_count; + } + assert(image_view->storage.z_slice_offset + image_view->storage.z_slice_count + <= image->extent.depth); + assert(image_view->base_array_layer + image_view->layer_count + <= image_view->extent.depth); + break; + } + + /* If we are creating a color view from a depth/stencil image we compute + * usage from the underlying depth/stencil aspects. + */ + const VkImageUsageFlags image_usage = + vk_image_usage(image, image_view->aspects); + const VkImageViewUsageCreateInfo *usage_info = + vk_find_struct_const(pCreateInfo, IMAGE_VIEW_USAGE_CREATE_INFO); + image_view->usage = usage_info ? usage_info->usage : image_usage; + assert(driver_internal || !(image_view->usage & ~image_usage)); +} + +void +vk_image_view_finish(struct vk_image_view *image_view) +{ + vk_object_base_finish(&image_view->base); +} + +void * +vk_image_view_create(struct vk_device *device, + bool driver_internal, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size) +{ + struct vk_image_view *image_view = + vk_zalloc2(&device->alloc, alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (image_view == NULL) + return NULL; + + vk_image_view_init(device, image_view, driver_internal, pCreateInfo); + + return image_view; +} + +void +vk_image_view_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_image_view *image_view) +{ + vk_object_free(device, alloc, image_view); +} + +bool +vk_image_layout_is_read_only(VkImageLayout layout, + VkImageAspectFlagBits aspect) +{ + assert(util_bitcount(aspect) == 1); + + switch (layout) { + case VK_IMAGE_LAYOUT_UNDEFINED: + case VK_IMAGE_LAYOUT_PREINITIALIZED: + return true; /* These are only used for layout transitions */ + + case VK_IMAGE_LAYOUT_GENERAL: + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT: + case VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR: + return false; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + case VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR: + case VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + return true; + + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: + return aspect == VK_IMAGE_ASPECT_DEPTH_BIT; + + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: + return aspect == VK_IMAGE_ASPECT_STENCIL_BIT; + + case VK_IMAGE_LAYOUT_MAX_ENUM: + case VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR: + case VK_IMAGE_LAYOUT_VIDEO_DECODE_SRC_KHR: + case VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR: + case VK_IMAGE_LAYOUT_VIDEO_ENCODE_DST_KHR: + case VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR: + case VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR: + unreachable("Invalid image layout."); + } + + unreachable("Invalid image layout."); +} + +bool +vk_image_layout_is_depth_only(VkImageLayout layout) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL: + return true; + + default: + return false; + } +} + +static VkResult +vk_image_create_get_format_list_uncompressed(struct vk_device *device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFormat **formats, + uint32_t *format_count) +{ + const struct vk_format_class_info *class = + vk_format_get_class_info(pCreateInfo->format); + + *formats = NULL; + *format_count = 0; + + if (class->format_count < 2) + return VK_SUCCESS; + + *formats = vk_alloc2(&device->alloc, pAllocator, + sizeof(VkFormat) * class->format_count, + alignof(VkFormat), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (*formats == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + memcpy(*formats, class->formats, sizeof(VkFormat) * class->format_count); + *format_count = class->format_count; + + return VK_SUCCESS; +} + +static VkResult +vk_image_create_get_format_list_compressed(struct vk_device *device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFormat **formats, + uint32_t *format_count) +{ + if ((pCreateInfo->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) == 0) { + return vk_image_create_get_format_list_uncompressed(device, + pCreateInfo, + pAllocator, + formats, + format_count); + } + + const struct vk_format_class_info *class = + vk_format_get_class_info(pCreateInfo->format); + const struct vk_format_class_info *uncompr_class = NULL; + + switch (vk_format_get_blocksizebits(pCreateInfo->format)) { + case 64: + uncompr_class = vk_format_class_get_info(MESA_VK_FORMAT_CLASS_64_BIT); + break; + case 128: + uncompr_class = vk_format_class_get_info(MESA_VK_FORMAT_CLASS_128_BIT); + break; + } + + if (!uncompr_class) + return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED); + + uint32_t fmt_count = class->format_count + uncompr_class->format_count; + + *formats = vk_alloc2(&device->alloc, pAllocator, + sizeof(VkFormat) * fmt_count, + alignof(VkFormat), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (*formats == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + memcpy(*formats, class->formats, sizeof(VkFormat) * class->format_count); + memcpy(*formats + class->format_count, uncompr_class->formats, + sizeof(VkFormat) * uncompr_class->format_count); + *format_count = class->format_count + uncompr_class->format_count; + + return VK_SUCCESS; +} + +/* Get a list of compatible formats when VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT + * or VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT is set. This list is + * either retrieved from a VkImageFormatListCreateInfo passed to the creation + * chain, or forged from the default compatible list specified in the + * "formats-compatibility-classes" section of the spec. + * + * The value returned in *formats must be freed with + * vk_free2(&device->alloc, pAllocator), and should not live past the + * vkCreateImage() call (allocated in the COMMAND scope). + */ +VkResult +vk_image_create_get_format_list(struct vk_device *device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFormat **formats, + uint32_t *format_count) +{ + *formats = NULL; + *format_count = 0; + + if (!(pCreateInfo->flags & + (VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | + VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT))) { + return VK_SUCCESS; + } + + /* "Each depth/stencil format is only compatible with itself." */ + if (vk_format_is_depth_or_stencil(pCreateInfo->format)) + return VK_SUCCESS; + + const VkImageFormatListCreateInfo *format_list = (const VkImageFormatListCreateInfo *) + vk_find_struct_const(pCreateInfo->pNext, IMAGE_FORMAT_LIST_CREATE_INFO); + + if (format_list) { + if (!format_list->viewFormatCount) + return VK_SUCCESS; + + *formats = vk_alloc2(&device->alloc, pAllocator, + sizeof(VkFormat) * format_list->viewFormatCount, + alignof(VkFormat), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (*formats == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + memcpy(*formats, format_list->pViewFormats, sizeof(VkFormat) * format_list->viewFormatCount); + *format_count = format_list->viewFormatCount; + return VK_SUCCESS; + } + + if (vk_format_is_compressed(pCreateInfo->format)) + return vk_image_create_get_format_list_compressed(device, + pCreateInfo, + pAllocator, + formats, + format_count); + + return vk_image_create_get_format_list_uncompressed(device, + pCreateInfo, + pAllocator, + formats, + format_count); +} + +/* From the Vulkan Specification 1.2.166 - VkAttachmentReference2: + * + * "If layout only specifies the layout of the depth aspect of the + * attachment, the layout of the stencil aspect is specified by the + * stencilLayout member of a VkAttachmentReferenceStencilLayout structure + * included in the pNext chain. Otherwise, layout describes the layout for + * all relevant image aspects." + */ +VkImageLayout +vk_att_ref_stencil_layout(const VkAttachmentReference2 *att_ref, + const VkAttachmentDescription2 *attachments) +{ + /* From VUID-VkAttachmentReference2-attachment-04755: + * "If attachment is not VK_ATTACHMENT_UNUSED, and the format of the + * referenced attachment is a depth/stencil format which includes both + * depth and stencil aspects [...] + */ + if (att_ref->attachment == VK_ATTACHMENT_UNUSED || + !vk_format_has_stencil(attachments[att_ref->attachment].format)) + return VK_IMAGE_LAYOUT_UNDEFINED; + + const VkAttachmentReferenceStencilLayout *stencil_ref = + vk_find_struct_const(att_ref->pNext, ATTACHMENT_REFERENCE_STENCIL_LAYOUT); + + if (stencil_ref) + return stencil_ref->stencilLayout; + + /* From VUID-VkAttachmentReference2-attachment-04755: + * "If attachment is not VK_ATTACHMENT_UNUSED, and the format of the + * referenced attachment is a depth/stencil format which includes both + * depth and stencil aspects, and layout is + * VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL or + * VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL, the pNext chain must include + * a VkAttachmentReferenceStencilLayout structure." + */ + assert(!vk_image_layout_is_depth_only(att_ref->layout)); + + return att_ref->layout; +} + +/* From the Vulkan Specification 1.2.184: + * + * "If the pNext chain includes a VkAttachmentDescriptionStencilLayout + * structure, then the stencilInitialLayout and stencilFinalLayout members + * specify the initial and final layouts of the stencil aspect of a + * depth/stencil format, and initialLayout and finalLayout only apply to the + * depth aspect. For depth-only formats, the + * VkAttachmentDescriptionStencilLayout structure is ignored. For + * stencil-only formats, the initial and final layouts of the stencil aspect + * are taken from the VkAttachmentDescriptionStencilLayout structure if + * present, or initialLayout and finalLayout if not present." + * + * "If format is a depth/stencil format, and either initialLayout or + * finalLayout does not specify a layout for the stencil aspect, then the + * application must specify the initial and final layouts of the stencil + * aspect by including a VkAttachmentDescriptionStencilLayout structure in + * the pNext chain." + */ +VkImageLayout +vk_att_desc_stencil_layout(const VkAttachmentDescription2 *att_desc, bool final) +{ + if (!vk_format_has_stencil(att_desc->format)) + return VK_IMAGE_LAYOUT_UNDEFINED; + + const VkAttachmentDescriptionStencilLayout *stencil_desc = + vk_find_struct_const(att_desc->pNext, ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT); + + if (stencil_desc) { + return final ? + stencil_desc->stencilFinalLayout : + stencil_desc->stencilInitialLayout; + } + + const VkImageLayout main_layout = + final ? att_desc->finalLayout : att_desc->initialLayout; + + /* From VUID-VkAttachmentDescription2-format-03302/03303: + * "If format is a depth/stencil format which includes both depth and + * stencil aspects, and initial/finalLayout is + * VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL or + * VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL, the pNext chain must include + * a VkAttachmentDescriptionStencilLayout structure." + */ + assert(!vk_image_layout_is_depth_only(main_layout)); + + return main_layout; +} + +VkImageUsageFlags +vk_image_layout_to_usage_flags(VkImageLayout layout, + VkImageAspectFlagBits aspect) +{ + assert(util_bitcount(aspect) == 1); + + switch (layout) { + case VK_IMAGE_LAYOUT_UNDEFINED: + case VK_IMAGE_LAYOUT_PREINITIALIZED: + return 0u; + + case VK_IMAGE_LAYOUT_GENERAL: + return ~0u; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + assert(aspect & VK_IMAGE_ASPECT_ANY_COLOR_MASK_MESA); + return VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + assert(aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + return VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL: + assert(aspect & VK_IMAGE_ASPECT_DEPTH_BIT); + return vk_image_layout_to_usage_flags( + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, aspect); + + case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL: + assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT); + return vk_image_layout_to_usage_flags( + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, aspect); + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + assert(aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + return VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL: + assert(aspect & VK_IMAGE_ASPECT_DEPTH_BIT); + return vk_image_layout_to_usage_flags( + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, aspect); + + case VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL: + assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT); + return vk_image_layout_to_usage_flags( + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, aspect); + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + return VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + return VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: + if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) { + return vk_image_layout_to_usage_flags( + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, aspect); + } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + return vk_image_layout_to_usage_flags( + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, aspect); + } else { + assert(!"Must be a depth/stencil aspect"); + return 0; + } + + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: + if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) { + return vk_image_layout_to_usage_flags( + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, aspect); + } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + return vk_image_layout_to_usage_flags( + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, aspect); + } else { + assert(!"Must be a depth/stencil aspect"); + return 0; + } + + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); + /* This needs to be handled specially by the caller */ + return 0; + + case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR: + assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); + return vk_image_layout_to_usage_flags(VK_IMAGE_LAYOUT_GENERAL, aspect); + + case VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR: + assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); + return VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR; + + case VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT: + assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); + return VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT; + + case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL: + if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT || + aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + return VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } else { + assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); + return VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + return VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + + case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT: + case VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR: + if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT || + aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + return VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | + VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + } else { + assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); + return VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | + VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + } + + case VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR: + return VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + case VK_IMAGE_LAYOUT_VIDEO_DECODE_SRC_KHR: + return VK_IMAGE_USAGE_VIDEO_DECODE_SRC_BIT_KHR; + case VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR: + return VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + case VK_IMAGE_LAYOUT_VIDEO_ENCODE_DST_KHR: + return VK_IMAGE_USAGE_VIDEO_ENCODE_DST_BIT_KHR; + case VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR: + return VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; + case VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR: + return VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR; + case VK_IMAGE_LAYOUT_MAX_ENUM: + unreachable("Invalid image layout."); + } + + unreachable("Invalid image layout."); +} diff --git a/src/vulkan/runtime/vk_image.h b/src/vulkan/runtime/vk_image.h new file mode 100644 index 00000000000..d69009a4abb --- /dev/null +++ b/src/vulkan/runtime/vk_image.h @@ -0,0 +1,382 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_IMAGE_H +#define VK_IMAGE_H + +#include "vk_object.h" + +#include "util/detect_os.h" +#include "util/u_math.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_image { + struct vk_object_base base; + + VkImageCreateFlags create_flags; + VkImageType image_type; + + /* format is from VkImageCreateInfo::format or + * VkExternalFormatANDROID::externalFormat. This works because only one of + * them can be defined and the runtime uses VkFormat for external formats. + */ + VkFormat format; + + VkExtent3D extent; + uint32_t mip_levels; + uint32_t array_layers; + VkSampleCountFlagBits samples; + VkImageTiling tiling; + VkImageUsageFlags usage; + VkSharingMode sharing_mode; + + /* Derived from format */ + VkImageAspectFlags aspects; + + /* VK_EXT_separate_stencil_usage */ + VkImageUsageFlags stencil_usage; + + /* VK_KHR_external_memory */ + VkExternalMemoryHandleTypeFlags external_handle_types; + + /* VK_EXT_image_compression_control */ + VkImageCompressionFlagsEXT compr_flags; + + /* wsi_image_create_info::scanout */ + bool wsi_legacy_scanout; + +#if DETECT_OS_LINUX || DETECT_OS_BSD + /* VK_EXT_drm_format_modifier + * + * Initialized by vk_image_create/init() to DRM_FORMAT_MOD_INVALID. It's + * the job of the driver to parse the VK_EXT_drm_format_modifier extension + * structs and choose the actual modifier. + * + * Must be DRM_FORMAT_MOD_INVALID unless tiling is + * VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT. + */ + uint64_t drm_format_mod; +#endif + +#if DETECT_OS_ANDROID + /* AHARDWAREBUFFER_FORMAT for this image or 0 + * + * A default is provided by the Vulkan runtime code based on the VkFormat + * but it may be overridden by the driver as needed. + */ + uint32_t ahb_format; +#endif +}; +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_image, base, VkImage, + VK_OBJECT_TYPE_IMAGE); + +void vk_image_init(struct vk_device *device, + struct vk_image *image, + const VkImageCreateInfo *pCreateInfo); +void vk_image_finish(struct vk_image *image); + +void *vk_image_create(struct vk_device *device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size); +void vk_image_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_image *image); + +VkResult +vk_image_create_get_format_list(struct vk_device *device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFormat **formats, + uint32_t *format_count); + +void vk_image_set_format(struct vk_image *image, VkFormat format); + +VkImageUsageFlags vk_image_usage(const struct vk_image *image, + VkImageAspectFlags aspect_mask); + +VkImageAspectFlags vk_image_expand_aspect_mask(const struct vk_image *image, + VkImageAspectFlags aspect_mask); + +static inline VkExtent3D +vk_image_mip_level_extent(const struct vk_image *image, + uint32_t mip_level) +{ + const VkExtent3D extent = { + u_minify(image->extent.width, mip_level), + u_minify(image->extent.height, mip_level), + u_minify(image->extent.depth, mip_level), + }; + return extent; +} + +/* This is defined as a macro so that it works for both + * VkImageSubresourceRange and VkImageSubresourceLayers + */ +#define vk_image_subresource_layer_count(_image, _range) \ + ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \ + (_image)->array_layers - (_range)->baseArrayLayer : (_range)->layerCount) + +static inline uint32_t +vk_image_subresource_level_count(const struct vk_image *image, + const VkImageSubresourceRange *range) +{ + return range->levelCount == VK_REMAINING_MIP_LEVELS ? + image->mip_levels - range->baseMipLevel : range->levelCount; +} + +static inline VkExtent3D +vk_image_sanitize_extent(const struct vk_image *image, + const VkExtent3D imageExtent) +{ + switch (image->image_type) { + case VK_IMAGE_TYPE_1D: + return (VkExtent3D) { imageExtent.width, 1, 1 }; + case VK_IMAGE_TYPE_2D: + return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 }; + case VK_IMAGE_TYPE_3D: + return imageExtent; + default: + unreachable("invalid image type"); + } +} + +VkExtent3D +vk_image_extent_to_elements(const struct vk_image *image, VkExtent3D extent); + +static inline VkOffset3D +vk_image_sanitize_offset(const struct vk_image *image, + const VkOffset3D imageOffset) +{ + switch (image->image_type) { + case VK_IMAGE_TYPE_1D: + return (VkOffset3D) { imageOffset.x, 0, 0 }; + case VK_IMAGE_TYPE_2D: + return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 }; + case VK_IMAGE_TYPE_3D: + return imageOffset; + default: + unreachable("invalid image type"); + } +} + +VkOffset3D +vk_image_offset_to_elements(const struct vk_image *image, VkOffset3D offset); + +struct vk_image_buffer_layout { + /** + * VkBufferImageCopy2::bufferRowLength or + * VkBufferImageCopy2::extent::width as needed. + */ + uint32_t row_length; + + /** + * VkBufferImageCopy2::bufferImageHeight or + * VkBufferImageCopy2::extent::height as needed. + */ + uint32_t image_height; + + /** Size of a single element (pixel or compressed block) in bytes */ + uint32_t element_size_B; + + /** Row stride in bytes */ + uint32_t row_stride_B; + + /** Image (or layer) stride in bytes + * + * For 1D or 2D array images, this is the stride in bytes between array + * slices. For 3D images, this is the stride in bytes between fixed-Z + * slices. + */ + uint64_t image_stride_B; +}; + +struct vk_image_buffer_layout +vk_image_buffer_copy_layout(const struct vk_image *image, + const VkBufferImageCopy2* region); + +struct vk_image_buffer_layout +vk_memory_to_image_copy_layout(const struct vk_image *image, + const VkMemoryToImageCopyEXT* region); + +struct vk_image_buffer_layout +vk_image_to_memory_copy_layout(const struct vk_image *image, + const VkImageToMemoryCopyEXT* region); + +struct vk_image_view { + struct vk_object_base base; + + VkImageViewCreateFlags create_flags; + struct vk_image *image; + VkImageViewType view_type; + + /** VkImageViewCreateInfo::format or vk_image::format */ + VkFormat format; + + /** Image view format, relative to the selected aspects + * + * For a depth/stencil image: + * + * - If vk_image_view::aspects contains both depth and stencil, this will + * be the full depth/stencil format of the image. + * + * - If only one aspect is selected, this will be the depth-only or + * stencil-only format, as per the selected aspect. + * + * For color images, we have three cases: + * + * 1. It's a single-plane image in which case this is the unmodified + * format provided to VkImageViewCreateInfo::format or + * vk_image::format. + * + * 2. It's a YCbCr view of a multi-plane image in which case the + * client will have asked for VK_IMAGE_ASPECT_COLOR_BIT and the + * format provided will be the full planar format. In this case, + * the format will be the full format containing all the planes. + * + * 3. It's a single-plane view of a multi-plane image in which case + * the client will have asked for VK_IMAGE_ASPECT_PLANE_N_BIT and + * will have provided a format compatible with that specific + * plane of the multi-planar format. In this case, the format will be + * the plane-compatible format requested by the client. + */ + VkFormat view_format; + + /* Component mapping, aka swizzle + * + * Unlike the swizzle provided via VkImageViewCreateInfo::components, this + * will never contain VK_COMPONENT_SWIZZLE_IDENTITY. It will be resolved + * to VK_COMPONENT_SWIZZLE_R/G/B/A, as appropriate. + */ + VkComponentMapping swizzle; + + /** Aspects from the image represented by this view + * + * For depth/stencil images, this is the aspectMask provided by + * VkImageViewCreateinfo::subresourceRange::aspectMask. + * + * For color images, we have three cases: + * + * 1. It's a single-plane image in which case this only aspect is + * VK_IMAGE_ASPECT_COLOR_BIT. + * + * 2. It's a YCbCr view of a multi-plane image in which case the + * client will have asked for VK_IMAGE_ASPECT_COLOR_BIT and the + * format provided will be the full planar format. In this case, + * aspects will be the full set of plane aspects in the image. + * + * 3. It's a single-plane view of a multi-plane image in which case + * the client will have asked for VK_IMAGE_ASPECT_PLANE_N_BIT and + * will have provided a format compatible with that specific + * plane of the multi-planar format. In this case, aspects will be + * VK_IMAGE_ASPECT_PLANE_N_BIT where N is the selected plane. + * + * This seems almost backwards from the API but ensures that + * vk_image_view::aspects is always a subset of vk_image::aspects. + */ + VkImageAspectFlags aspects; + + uint32_t base_mip_level; + uint32_t level_count; + uint32_t base_array_layer; + uint32_t layer_count; + + /* VK_EXT_sliced_view_of_3d */ + struct { + /* VkImageViewSlicedCreateInfoEXT::sliceOffset + * + * This field will be 0 for 1D and 2D images, 2D views of 3D images, or + * when no VkImageViewSlicedCreateInfoEXT is provided. + */ + uint32_t z_slice_offset; + + /* VkImageViewSlicedCreateInfoEXT::sliceCount + * + * This field will be 1 for 1D and 2D images or 2D views of 3D images. + * For 3D views, it will be VkImageViewSlicedCreateInfoEXT::sliceCount + * or image view depth (see vk_image_view::extent) when no + * VkImageViewSlicedCreateInfoEXT is provided. + */ + uint32_t z_slice_count; + } storage; + + /* VK_EXT_image_view_min_lod */ + float min_lod; + + /* Image extent at LOD 0 */ + VkExtent3D extent; + + /* VK_KHR_maintenance2 */ + VkImageUsageFlags usage; +}; +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_image_view, base, VkImageView, + VK_OBJECT_TYPE_IMAGE_VIEW); + +void vk_image_view_init(struct vk_device *device, + struct vk_image_view *image_view, + bool driver_internal, + const VkImageViewCreateInfo *pCreateInfo); +void vk_image_view_finish(struct vk_image_view *image_view); + +void *vk_image_view_create(struct vk_device *device, + bool driver_internal, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size); +void vk_image_view_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_image_view *image_view); + +static inline VkImageSubresourceRange +vk_image_view_subresource_range(const struct vk_image_view *view) +{ + VkImageSubresourceRange range = { + .aspectMask = view->aspects, + .baseMipLevel = view->base_mip_level, + .levelCount = view->level_count, + .baseArrayLayer = view->base_array_layer, + .layerCount = view->layer_count, + }; + + return range; +} + +bool vk_image_layout_is_read_only(VkImageLayout layout, + VkImageAspectFlagBits aspect); +bool vk_image_layout_is_depth_only(VkImageLayout layout); + +VkImageUsageFlags vk_image_layout_to_usage_flags(VkImageLayout layout, + VkImageAspectFlagBits aspect); + +VkImageLayout vk_att_ref_stencil_layout(const VkAttachmentReference2 *att_ref, + const VkAttachmentDescription2 *attachments); +VkImageLayout vk_att_desc_stencil_layout(const VkAttachmentDescription2 *att_desc, + bool final); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_IMAGE_H */ diff --git a/src/vulkan/runtime/vk_instance.c b/src/vulkan/runtime/vk_instance.c new file mode 100644 index 00000000000..186452d16a4 --- /dev/null +++ b/src/vulkan/runtime/vk_instance.c @@ -0,0 +1,644 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_instance.h" + +#include "util/libdrm.h" +#include "util/perf/cpu_trace.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_dispatch_trampolines.h" +#include "vk_log.h" +#include "vk_util.h" +#include "vk_debug_utils.h" +#include "vk_physical_device.h" + +#if !VK_LITE_RUNTIME_INSTANCE +#include "compiler/glsl_types.h" +#endif + +#define VERSION_IS_1_0(version) \ + (VK_API_VERSION_MAJOR(version) == 1 && VK_API_VERSION_MINOR(version) == 0) + +static const struct debug_control trace_options[] = { + {"rmv", VK_TRACE_MODE_RMV}, + {NULL, 0}, +}; + +VkResult +vk_instance_init(struct vk_instance *instance, + const struct vk_instance_extension_table *supported_extensions, + const struct vk_instance_dispatch_table *dispatch_table, + const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc) +{ + memset(instance, 0, sizeof(*instance)); + vk_object_base_instance_init(instance, &instance->base, VK_OBJECT_TYPE_INSTANCE); + instance->alloc = *alloc; + + util_cpu_trace_init(); + + /* VK_EXT_debug_utils */ + /* These messengers will only be used during vkCreateInstance or + * vkDestroyInstance calls. We do this first so that it's safe to use + * vk_errorf and friends. + */ + list_inithead(&instance->debug_utils.instance_callbacks); + vk_foreach_struct_const(ext, pCreateInfo->pNext) { + if (ext->sType == + VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT) { + const VkDebugUtilsMessengerCreateInfoEXT *debugMessengerCreateInfo = + (const VkDebugUtilsMessengerCreateInfoEXT *)ext; + struct vk_debug_utils_messenger *messenger = + vk_alloc2(alloc, alloc, sizeof(struct vk_debug_utils_messenger), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!messenger) + return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_instance_init(instance, &messenger->base, + VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT); + + messenger->alloc = *alloc; + messenger->severity = debugMessengerCreateInfo->messageSeverity; + messenger->type = debugMessengerCreateInfo->messageType; + messenger->callback = debugMessengerCreateInfo->pfnUserCallback; + messenger->data = debugMessengerCreateInfo->pUserData; + + list_addtail(&messenger->link, + &instance->debug_utils.instance_callbacks); + } + } + + uint32_t instance_version = VK_API_VERSION_1_0; + if (dispatch_table->EnumerateInstanceVersion) + dispatch_table->EnumerateInstanceVersion(&instance_version); + + instance->app_info = (struct vk_app_info) { .api_version = 0 }; + if (pCreateInfo->pApplicationInfo) { + const VkApplicationInfo *app = pCreateInfo->pApplicationInfo; + + instance->app_info.app_name = + vk_strdup(&instance->alloc, app->pApplicationName, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + instance->app_info.app_version = app->applicationVersion; + + instance->app_info.engine_name = + vk_strdup(&instance->alloc, app->pEngineName, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + instance->app_info.engine_version = app->engineVersion; + + instance->app_info.api_version = app->apiVersion; + } + + /* From the Vulkan 1.2.199 spec: + * + * "Note: + * + * Providing a NULL VkInstanceCreateInfo::pApplicationInfo or providing + * an apiVersion of 0 is equivalent to providing an apiVersion of + * VK_MAKE_API_VERSION(0,1,0,0)." + */ + if (instance->app_info.api_version == 0) + instance->app_info.api_version = VK_API_VERSION_1_0; + + /* From the Vulkan 1.2.199 spec: + * + * VUID-VkApplicationInfo-apiVersion-04010 + * + * "If apiVersion is not 0, then it must be greater than or equal to + * VK_API_VERSION_1_0" + */ + assert(instance->app_info.api_version >= VK_API_VERSION_1_0); + + /* From the Vulkan 1.2.199 spec: + * + * "Vulkan 1.0 implementations were required to return + * VK_ERROR_INCOMPATIBLE_DRIVER if apiVersion was larger than 1.0. + * Implementations that support Vulkan 1.1 or later must not return + * VK_ERROR_INCOMPATIBLE_DRIVER for any value of apiVersion." + */ + if (VERSION_IS_1_0(instance_version) && + !VERSION_IS_1_0(instance->app_info.api_version)) + return VK_ERROR_INCOMPATIBLE_DRIVER; + + instance->supported_extensions = supported_extensions; + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + int idx; + for (idx = 0; idx < VK_INSTANCE_EXTENSION_COUNT; idx++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + vk_instance_extensions[idx].extensionName) == 0) + break; + } + + if (idx >= VK_INSTANCE_EXTENSION_COUNT) + return vk_errorf(instance, VK_ERROR_EXTENSION_NOT_PRESENT, + "%s not supported", + pCreateInfo->ppEnabledExtensionNames[i]); + + if (!supported_extensions->extensions[idx]) + return vk_errorf(instance, VK_ERROR_EXTENSION_NOT_PRESENT, + "%s not supported", + pCreateInfo->ppEnabledExtensionNames[i]); + +#ifdef ANDROID_STRICT + if (!vk_android_allowed_instance_extensions.extensions[idx]) + return vk_errorf(instance, VK_ERROR_EXTENSION_NOT_PRESENT, + "%s not supported", + pCreateInfo->ppEnabledExtensionNames[i]); +#endif + + instance->enabled_extensions.extensions[idx] = true; + } + + instance->dispatch_table = *dispatch_table; + + /* Add common entrypoints without overwriting driver-provided ones. */ + vk_instance_dispatch_table_from_entrypoints( + &instance->dispatch_table, &vk_common_instance_entrypoints, false); + + if (mtx_init(&instance->debug_report.callbacks_mutex, mtx_plain) != 0) + return vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + + list_inithead(&instance->debug_report.callbacks); + + if (mtx_init(&instance->debug_utils.callbacks_mutex, mtx_plain) != 0) { + mtx_destroy(&instance->debug_report.callbacks_mutex); + return vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + } + + list_inithead(&instance->debug_utils.callbacks); + + list_inithead(&instance->physical_devices.list); + + if (mtx_init(&instance->physical_devices.mutex, mtx_plain) != 0) { + mtx_destroy(&instance->debug_report.callbacks_mutex); + mtx_destroy(&instance->debug_utils.callbacks_mutex); + return vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + } + + instance->trace_mode = parse_debug_string(getenv("MESA_VK_TRACE"), trace_options); + instance->trace_frame = (uint32_t)debug_get_num_option("MESA_VK_TRACE_FRAME", 0xFFFFFFFF); + instance->trace_trigger_file = secure_getenv("MESA_VK_TRACE_TRIGGER"); + +#if !VK_LITE_RUNTIME_INSTANCE + glsl_type_singleton_init_or_ref(); +#endif + + return VK_SUCCESS; +} + +static void +destroy_physical_devices(struct vk_instance *instance) +{ + list_for_each_entry_safe(struct vk_physical_device, pdevice, + &instance->physical_devices.list, link) { + list_del(&pdevice->link); + instance->physical_devices.destroy(pdevice); + } +} + +void +vk_instance_finish(struct vk_instance *instance) +{ + destroy_physical_devices(instance); + +#if !VK_LITE_RUNTIME_INSTANCE + glsl_type_singleton_decref(); +#endif + + if (unlikely(!list_is_empty(&instance->debug_utils.callbacks))) { + list_for_each_entry_safe(struct vk_debug_utils_messenger, messenger, + &instance->debug_utils.callbacks, link) { + list_del(&messenger->link); + vk_object_base_finish(&messenger->base); + vk_free2(&instance->alloc, &messenger->alloc, messenger); + } + } + if (unlikely(!list_is_empty(&instance->debug_utils.instance_callbacks))) { + list_for_each_entry_safe(struct vk_debug_utils_messenger, messenger, + &instance->debug_utils.instance_callbacks, + link) { + list_del(&messenger->link); + vk_object_base_finish(&messenger->base); + vk_free2(&instance->alloc, &messenger->alloc, messenger); + } + } + mtx_destroy(&instance->debug_report.callbacks_mutex); + mtx_destroy(&instance->debug_utils.callbacks_mutex); + mtx_destroy(&instance->physical_devices.mutex); + vk_free(&instance->alloc, (char *)instance->app_info.app_name); + vk_free(&instance->alloc, (char *)instance->app_info.engine_name); + vk_object_base_finish(&instance->base); +} + +VkResult +vk_enumerate_instance_extension_properties( + const struct vk_instance_extension_table *supported_extensions, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + VK_OUTARRAY_MAKE_TYPED(VkExtensionProperties, out, pProperties, pPropertyCount); + + for (int i = 0; i < VK_INSTANCE_EXTENSION_COUNT; i++) { + if (!supported_extensions->extensions[i]) + continue; + +#ifdef ANDROID_STRICT + if (!vk_android_allowed_instance_extensions.extensions[i]) + continue; +#endif + + vk_outarray_append_typed(VkExtensionProperties, &out, prop) { + *prop = vk_instance_extensions[i]; + } + } + + return vk_outarray_status(&out); +} + +PFN_vkVoidFunction +vk_instance_get_proc_addr(const struct vk_instance *instance, + const struct vk_instance_entrypoint_table *entrypoints, + const char *name) +{ + PFN_vkVoidFunction func; + + /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly + * when we have to return valid function pointers, NULL, or it's left + * undefined. See the table for exact details. + */ + if (name == NULL) + return NULL; + +#define LOOKUP_VK_ENTRYPOINT(entrypoint) \ + if (strcmp(name, "vk" #entrypoint) == 0) \ + return (PFN_vkVoidFunction)entrypoints->entrypoint + + LOOKUP_VK_ENTRYPOINT(EnumerateInstanceExtensionProperties); + LOOKUP_VK_ENTRYPOINT(EnumerateInstanceLayerProperties); + LOOKUP_VK_ENTRYPOINT(EnumerateInstanceVersion); + LOOKUP_VK_ENTRYPOINT(CreateInstance); + + /* GetInstanceProcAddr() can also be called with a NULL instance. + * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057 + */ + LOOKUP_VK_ENTRYPOINT(GetInstanceProcAddr); + +#undef LOOKUP_VK_ENTRYPOINT + + /* Beginning with ICD interface v7, the following functions can also be + * retrieved via vk_icdGetInstanceProcAddr. + */ + + if (strcmp(name, "vk_icdNegotiateLoaderICDInterfaceVersion") == 0) + return (PFN_vkVoidFunction)vk_icdNegotiateLoaderICDInterfaceVersion; + if (strcmp(name, "vk_icdGetPhysicalDeviceProcAddr") == 0) + return (PFN_vkVoidFunction)vk_icdGetPhysicalDeviceProcAddr; +#ifdef _WIN32 + if (strcmp(name, "vk_icdEnumerateAdapterPhysicalDevices") == 0) + return (PFN_vkVoidFunction)vk_icdEnumerateAdapterPhysicalDevices; +#endif + + if (instance == NULL) + return NULL; + + func = vk_instance_dispatch_table_get_if_supported(&instance->dispatch_table, + name, + instance->app_info.api_version, + &instance->enabled_extensions); + if (func != NULL) + return func; + + func = vk_physical_device_dispatch_table_get_if_supported(&vk_physical_device_trampolines, + name, + instance->app_info.api_version, + &instance->enabled_extensions); + if (func != NULL) + return func; + + func = vk_device_dispatch_table_get_if_supported(&vk_device_trampolines, + name, + instance->app_info.api_version, + &instance->enabled_extensions, + NULL); + if (func != NULL) + return func; + + return NULL; +} + +PFN_vkVoidFunction +vk_instance_get_proc_addr_unchecked(const struct vk_instance *instance, + const char *name) +{ + PFN_vkVoidFunction func; + + if (instance == NULL || name == NULL) + return NULL; + + func = vk_instance_dispatch_table_get(&instance->dispatch_table, name); + if (func != NULL) + return func; + + func = vk_physical_device_dispatch_table_get( + &vk_physical_device_trampolines, name); + if (func != NULL) + return func; + + func = vk_device_dispatch_table_get(&vk_device_trampolines, name); + if (func != NULL) + return func; + + return NULL; +} + +PFN_vkVoidFunction +vk_instance_get_physical_device_proc_addr(const struct vk_instance *instance, + const char *name) +{ + if (instance == NULL || name == NULL) + return NULL; + + return vk_physical_device_dispatch_table_get_if_supported(&vk_physical_device_trampolines, + name, + instance->app_info.api_version, + &instance->enabled_extensions); +} + +void +vk_instance_add_driver_trace_modes(struct vk_instance *instance, + const struct debug_control *modes) +{ + instance->trace_mode |= parse_debug_string(getenv("MESA_VK_TRACE"), modes); +} + +static VkResult +enumerate_drm_physical_devices_locked(struct vk_instance *instance) +{ + /* libdrm returns a maximum of 256 devices (see MAX_DRM_NODES in libdrm) */ + drmDevicePtr devices[256]; + int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); + + if (max_devices < 1) + return VK_SUCCESS; + + VkResult result; + for (uint32_t i = 0; i < (uint32_t)max_devices; i++) { + struct vk_physical_device *pdevice; + result = instance->physical_devices.try_create_for_drm(instance, devices[i], &pdevice); + + /* Incompatible DRM device, skip. */ + if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { + result = VK_SUCCESS; + continue; + } + + /* Error creating the physical device, report the error. */ + if (result != VK_SUCCESS) + break; + + list_addtail(&pdevice->link, &instance->physical_devices.list); + } + + drmFreeDevices(devices, max_devices); + return result; +} + +static VkResult +enumerate_physical_devices_locked(struct vk_instance *instance) +{ + if (instance->physical_devices.enumerate) { + VkResult result = instance->physical_devices.enumerate(instance); + if (result != VK_ERROR_INCOMPATIBLE_DRIVER) + return result; + } + + VkResult result = VK_SUCCESS; + + if (instance->physical_devices.try_create_for_drm) { + result = enumerate_drm_physical_devices_locked(instance); + if (result != VK_SUCCESS) { + destroy_physical_devices(instance); + return result; + } + } + + return result; +} + +static VkResult +enumerate_physical_devices(struct vk_instance *instance) +{ + VkResult result = VK_SUCCESS; + + mtx_lock(&instance->physical_devices.mutex); + if (!instance->physical_devices.enumerated) { + result = enumerate_physical_devices_locked(instance); + if (result == VK_SUCCESS) + instance->physical_devices.enumerated = true; + } + mtx_unlock(&instance->physical_devices.mutex); + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_EnumeratePhysicalDevices(VkInstance _instance, uint32_t *pPhysicalDeviceCount, + VkPhysicalDevice *pPhysicalDevices) +{ + VK_FROM_HANDLE(vk_instance, instance, _instance); + VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, pPhysicalDeviceCount); + + VkResult result = enumerate_physical_devices(instance); + if (result != VK_SUCCESS) + return result; + + list_for_each_entry(struct vk_physical_device, pdevice, + &instance->physical_devices.list, link) { + vk_outarray_append_typed(VkPhysicalDevice, &out, element) { + *element = vk_physical_device_to_handle(pdevice); + } + } + + return vk_outarray_status(&out); +} + +#ifdef _WIN32 +/* Note: This entrypoint is not exported from ICD DLLs, and is only exposed via + * vk_icdGetInstanceProcAddr for loaders with interface v7. This is to avoid + * a design flaw in the original loader implementation, which prevented enumeration + * of physical devices that didn't have a LUID. This flaw was fixed prior to the + * implementation of v7, so v7 loaders are unaffected, and it's safe to support this. + */ +VKAPI_ATTR VkResult VKAPI_CALL +vk_icdEnumerateAdapterPhysicalDevices(VkInstance _instance, LUID adapterLUID, + uint32_t *pPhysicalDeviceCount, + VkPhysicalDevice *pPhysicalDevices) +{ + VK_FROM_HANDLE(vk_instance, instance, _instance); + VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, pPhysicalDeviceCount); + + VkResult result = enumerate_physical_devices(instance); + if (result != VK_SUCCESS) + return result; + + list_for_each_entry(struct vk_physical_device, pdevice, + &instance->physical_devices.list, link) { + if (pdevice->properties.deviceLUIDValid && + memcmp(pdevice->properties.deviceLUID, &adapterLUID, sizeof(adapterLUID)) == 0) { + vk_outarray_append_typed(VkPhysicalDevice, &out, element) { + *element = vk_physical_device_to_handle(pdevice); + } + } + } + + return vk_outarray_status(&out); +} +#endif + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_EnumeratePhysicalDeviceGroups(VkInstance _instance, uint32_t *pGroupCount, + VkPhysicalDeviceGroupProperties *pGroupProperties) +{ + VK_FROM_HANDLE(vk_instance, instance, _instance); + VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out, pGroupProperties, + pGroupCount); + + VkResult result = enumerate_physical_devices(instance); + if (result != VK_SUCCESS) + return result; + + list_for_each_entry(struct vk_physical_device, pdevice, + &instance->physical_devices.list, link) { + vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p) { + p->physicalDeviceCount = 1; + memset(p->physicalDevices, 0, sizeof(p->physicalDevices)); + p->physicalDevices[0] = vk_physical_device_to_handle(pdevice); + p->subsetAllocation = false; + } + } + + return vk_outarray_status(&out); +} + +/* For Windows, PUBLIC is default-defined to __declspec(dllexport) to automatically export the + * public entrypoints from a DLL. However, this declspec needs to match between declaration and + * definition, and this attribute is not present on the prototypes specified in vk_icd.h. Instead, + * we'll use a .def file to manually export these entrypoints on Windows. + */ +#ifdef _WIN32 +#undef PUBLIC +#define PUBLIC +#endif + +/* With version 4+ of the loader interface the ICD should expose + * vk_icdGetPhysicalDeviceProcAddr() + */ +PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, + const char *pName) +{ + VK_FROM_HANDLE(vk_instance, instance, _instance); + return vk_instance_get_physical_device_proc_addr(instance, pName); +} + +static uint32_t vk_icd_version = 7; + +uint32_t +vk_get_negotiated_icd_version(void) +{ + return vk_icd_version; +} + +PUBLIC VKAPI_ATTR VkResult VKAPI_CALL +vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) +{ + /* For the full details on loader interface versioning, see + * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. + * What follows is a condensed summary, to help you navigate the large and + * confusing official doc. + * + * - Loader interface v0 is incompatible with later versions. We don't + * support it. + * + * - In loader interface v1: + * - The first ICD entrypoint called by the loader is + * vk_icdGetInstanceProcAddr(). The ICD must statically expose this + * entrypoint. + * - The ICD must statically expose no other Vulkan symbol unless it is + * linked with -Bsymbolic. + * - Each dispatchable Vulkan handle created by the ICD must be + * a pointer to a struct whose first member is VK_LOADER_DATA. The + * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. + * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and + * vkDestroySurfaceKHR(). The ICD must be capable of working with + * such loader-managed surfaces. + * + * - Loader interface v2 differs from v1 in: + * - The first ICD entrypoint called by the loader is + * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must + * statically expose this entrypoint. + * + * - Loader interface v3 differs from v2 in: + * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), + * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, + * because the loader no longer does so. + * + * - Loader interface v4 differs from v3 in: + * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr(). + * + * - Loader interface v5 differs from v4 in: + * - The ICD must support Vulkan API version 1.1 and must not return + * VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a + * Vulkan Loader with interface v4 or smaller is being used and the + * application provides an API version that is greater than 1.0. + * + * - Loader interface v6 differs from v5 in: + * - Windows ICDs may export vk_icdEnumerateAdapterPhysicalDevices, + * to tie a physical device to a WDDM adapter LUID. This allows the + * loader to sort physical devices according to the same policy as other + * graphics APIs. + * - Note: A design flaw in the loader implementation of v6 means we do + * not actually support returning this function to v6 loaders. See the + * comments around the implementation above. It's still fine to report + * version number 6 without this method being implemented, however. + * + * - Loader interface v7 differs from v6 in: + * - If implemented, the ICD must return the following functions via + * vk_icdGetInstanceProcAddr: + * - vk_icdNegotiateLoaderICDInterfaceVersion + * - vk_icdGetPhysicalDeviceProcAddr + * - vk_icdEnumerateAdapterPhysicalDevices + * Exporting these functions from the ICD is optional. If + * vk_icdNegotiateLoaderICDInterfaceVersion is not exported from the + * module, or if VK_LUNARG_direct_driver_loading is being used, then + * vk_icdGetInstanceProcAddr will be the first method called, to query + * for vk_icdNegotiateLoaderICDInterfaceVersion. + */ + vk_icd_version = MIN2(vk_icd_version, *pSupportedVersion); + *pSupportedVersion = vk_icd_version; + return VK_SUCCESS; +} diff --git a/src/vulkan/runtime/vk_instance.h b/src/vulkan/runtime/vk_instance.h new file mode 100644 index 00000000000..f0e6a3fdb99 --- /dev/null +++ b/src/vulkan/runtime/vk_instance.h @@ -0,0 +1,253 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_INSTANCE_H +#define VK_INSTANCE_H + +#include "vk_dispatch_table.h" +#include "vk_extensions.h" +#include "vk_object.h" + +#include "c11/threads.h" +#include "util/list.h" +#include "util/u_debug.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_app_info { + /** VkApplicationInfo::pApplicationName */ + const char* app_name; + + /** VkApplicationInfo::applicationVersion */ + uint32_t app_version; + + /** VkApplicationInfo::pEngineName */ + const char* engine_name; + + /** VkApplicationInfo::engineVersion */ + uint32_t engine_version; + + /** VkApplicationInfo::apiVersion or `VK_API_VERSION_1_0` + * + * If the application does not provide a `pApplicationInfo` or the + * `apiVersion` field is 0, this is set to `VK_API_VERSION_1_0`. + */ + uint32_t api_version; +}; + +struct _drmDevice; +struct vk_physical_device; + +enum vk_trace_mode { + /** Radeon Memory Visualizer */ + VK_TRACE_MODE_RMV = 1 << 0, + + /** Number of common trace modes. */ + VK_TRACE_MODE_COUNT = 1, +}; + +/** Base struct for all `VkInstance` implementations + * + * This contains data structures necessary for detecting enabled extensions, + * handling entrypoint dispatch, and implementing `vkGetInstanceProcAddr()`. + * It also contains data copied from the `VkInstanceCreateInfo` such as the + * application information. + */ +struct vk_instance { + struct vk_object_base base; + + /** Allocator used when creating this instance + * + * This is used as a fall-back for when a NULL pAllocator is passed into a + * device-level create function such as vkCreateImage(). + */ + VkAllocationCallbacks alloc; + + /** VkInstanceCreateInfo::pApplicationInfo */ + struct vk_app_info app_info; + + /** Table of all supported instance extensions + * + * This is the static const struct passed by the driver as the + * `supported_extensions` parameter to `vk_instance_init()`. + */ + const struct vk_instance_extension_table *supported_extensions; + + /** Table of all enabled instance extensions + * + * This is generated automatically as part of `vk_instance_init()` from + * VkInstanceCreateInfo::ppEnabledExtensionNames. + */ + struct vk_instance_extension_table enabled_extensions; + + /** Instance-level dispatch table */ + struct vk_instance_dispatch_table dispatch_table; + + /* VK_EXT_debug_report debug callbacks */ + struct { + mtx_t callbacks_mutex; + struct list_head callbacks; + } debug_report; + + /* VK_EXT_debug_utils */ + struct { + /* These callbacks are only used while creating or destroying an + * instance + */ + struct list_head instance_callbacks; + mtx_t callbacks_mutex; + /* Persistent callbacks */ + struct list_head callbacks; + } debug_utils; + + /** List of all physical devices and callbacks + * + * This is used for automatic physical device creation, + * deletion and enumeration. + */ + struct { + struct list_head list; + bool enumerated; + + /** Enumerate physical devices for this instance + * + * The driver can implement this callback for custom physical device + * enumeration. The returned value must be a valid return code of + * vkEnumeratePhysicalDevices. + * + * Note that the loader calls vkEnumeratePhysicalDevices of all + * installed ICDs and fails device enumeration when any of the calls + * fails. The driver should return VK_SUCCESS when it does not find any + * compatible device. + * + * If this callback is not set, try_create_for_drm will be used for + * enumeration. + */ + VkResult (*enumerate)(struct vk_instance *instance); + + /** Try to create a physical device for a drm device + * + * The returned value must be a valid return code of + * vkEnumeratePhysicalDevices, or VK_ERROR_INCOMPATIBLE_DRIVER. When + * VK_ERROR_INCOMPATIBLE_DRIVER is returned, the error and the drm + * device are silently ignored. + */ + VkResult (*try_create_for_drm)(struct vk_instance *instance, + struct _drmDevice *device, + struct vk_physical_device **out); + + /** Handle the destruction of a physical device + * + * This callback has to be implemented when using common physical device + * management. The device pointer and any resource allocated for the + * device should be freed here. + */ + void (*destroy)(struct vk_physical_device *pdevice); + + mtx_t mutex; + } physical_devices; + + /** Enabled tracing modes */ + uint64_t trace_mode; + + uint32_t trace_frame; + char *trace_trigger_file; +}; + +VK_DEFINE_HANDLE_CASTS(vk_instance, base, VkInstance, + VK_OBJECT_TYPE_INSTANCE); + +/** Initialize a vk_instance + * + * Along with initializing the data structures in `vk_instance`, this function + * validates the Vulkan version number provided by the client and checks that + * every extension specified by + * ``VkInstanceCreateInfo::ppEnabledExtensionNames`` is actually supported by + * the implementation and returns `VK_ERROR_EXTENSION_NOT_PRESENT` if an + * unsupported extension is requested. + * + * :param instance: |out| The instance to initialize + * :param supported_extensions: |in| Table of all instance extensions supported + * by this instance + * :param dispatch_table: |in| Instance-level dispatch table + * :param pCreateInfo: |in| VkInstanceCreateInfo pointer passed to + * `vkCreateInstance()` + * :param alloc: |in| Allocation callbacks used to create this + * instance; must not be `NULL` + */ +VkResult MUST_CHECK +vk_instance_init(struct vk_instance *instance, + const struct vk_instance_extension_table *supported_extensions, + const struct vk_instance_dispatch_table *dispatch_table, + const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc); + +/** Tears down a vk_instance + * + * :param instance: |out| The instance to tear down + */ +void +vk_instance_finish(struct vk_instance *instance); + +/** Implementaiton of vkEnumerateInstanceExtensionProperties() */ +VkResult +vk_enumerate_instance_extension_properties( + const struct vk_instance_extension_table *supported_extensions, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties); + +/** Implementaiton of vkGetInstanceProcAddr() */ +PFN_vkVoidFunction +vk_instance_get_proc_addr(const struct vk_instance *instance, + const struct vk_instance_entrypoint_table *entrypoints, + const char *name); + +/** Unchecked version of vk_instance_get_proc_addr + * + * This is identical to `vk_instance_get_proc_addr()` except that it doesn't + * check whether extensions are enabled before returning function pointers. + * This is useful in window-system code where we may use extensions without + * the client explicitly enabling them. + */ +PFN_vkVoidFunction +vk_instance_get_proc_addr_unchecked(const struct vk_instance *instance, + const char *name); + +/** Implementaiton of vk_icdGetPhysicalDeviceProcAddr() */ +PFN_vkVoidFunction +vk_instance_get_physical_device_proc_addr(const struct vk_instance *instance, + const char *name); + +void +vk_instance_add_driver_trace_modes(struct vk_instance *instance, + const struct debug_control *modes); + +uint32_t +vk_get_negotiated_icd_version(void); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_INSTANCE_H */ diff --git a/src/vulkan/runtime/vk_limits.h b/src/vulkan/runtime/vk_limits.h new file mode 100644 index 00000000000..50bfde0c0eb --- /dev/null +++ b/src/vulkan/runtime/vk_limits.h @@ -0,0 +1,99 @@ +/* + * Copyright © 2022 Collabora, LTD + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_LIMITS_H +#define VK_LIMITS_H + +/* Maximun number of shader stages in a single graphics pipeline */ +#define MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES 5 + +#define MESA_VK_MAX_DESCRIPTOR_SETS 32 + +/* From the Vulkan 1.3.274 spec: + * + * VUID-VkPipelineLayoutCreateInfo-pPushConstantRanges-00292 + * + * "Any two elements of pPushConstantRanges must not include the same + * stage in stageFlags" + * + * and + * + * VUID-VkPushConstantRange-stageFlags-requiredbitmask + * + * "stageFlags must not be 0" + * + * This means that the number of push constant ranges is effectively bounded + * by the number of possible shader stages. Not the number of stages that can + * be compiled together (a pipeline layout can be used in multiple pipelnes + * wth different sets of shaders) but the total number of stage bits supported + * by the implementation. Currently, those are + * + * - VK_SHADER_STAGE_VERTEX_BIT + * - VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT + * - VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT + * - VK_SHADER_STAGE_GEOMETRY_BIT + * - VK_SHADER_STAGE_FRAGMENT_BIT + * - VK_SHADER_STAGE_COMPUTE_BIT + * - VK_SHADER_STAGE_RAYGEN_BIT_KHR + * - VK_SHADER_STAGE_ANY_HIT_BIT_KHR + * - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR + * - VK_SHADER_STAGE_MISS_BIT_KHR + * - VK_SHADER_STAGE_INTERSECTION_BIT_KHR + * - VK_SHADER_STAGE_CALLABLE_BIT_KHR + * - VK_SHADER_STAGE_TASK_BIT_EXT + * - VK_SHADER_STAGE_MESH_BIT_EXT + */ +#define MESA_VK_MAX_PUSH_CONSTANT_RANGES 14 + +#define MESA_VK_MAX_VERTEX_BINDINGS 32 +#define MESA_VK_MAX_VERTEX_ATTRIBUTES 32 + +/* As of June 29, 2022, according to vulkan.gpuinfo.org, 99% of all reports + * listed a max vertex stride that fits in 16 bits. + */ +#define MESA_VK_MAX_VERTEX_BINDING_STRIDE UINT16_MAX + +#define MESA_VK_MAX_VIEWPORTS 16 +#define MESA_VK_MAX_SCISSORS 16 +#define MESA_VK_MAX_DISCARD_RECTANGLES 4 + +/* As of June 29, 2022, according to vulkan.gpuinfo.org, no reports list more + * than 16 samples for framebufferColorSampleCounts except one layer running + * on top of WARP on Windows. + */ +#define MESA_VK_MAX_SAMPLES 16 + +/* As of June 29, 2022, according to vulkan.gpuinfo.org, the only GPUs + * claiming support for maxSampleLocationGridSize greater than 1x1 is AMD + * which supports 2x2 but only up to 8 samples. + */ +#define MESA_VK_MAX_SAMPLE_LOCATIONS 32 + +#define MESA_VK_MAX_COLOR_ATTACHMENTS 8 + +/* Since VkSubpassDescription2::viewMask is a 32-bit integer, there are a + * maximum of 32 possible views. + */ +#define MESA_VK_MAX_MULTIVIEW_VIEW_COUNT 32 + +#endif /* VK_LIMITS_H */ diff --git a/src/vulkan/runtime/vk_log.c b/src/vulkan/runtime/vk_log.c new file mode 100644 index 00000000000..afe42aedcd7 --- /dev/null +++ b/src/vulkan/runtime/vk_log.c @@ -0,0 +1,357 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_log.h" +#include "vk_debug_utils.h" +#include "vk_debug_report.h" + +#include "vk_command_buffer.h" +#include "vk_enum_to_str.h" +#include "vk_queue.h" +#include "vk_device.h" +#include "vk_physical_device.h" + +#include "util/ralloc.h" +#include "util/log.h" + +static struct vk_device * +vk_object_to_device(struct vk_object_base *obj) +{ + assert(obj->device); + return obj->device; +} + +static struct vk_physical_device * +vk_object_to_physical_device(struct vk_object_base *obj) +{ + switch (obj->type) { + case VK_OBJECT_TYPE_INSTANCE: + unreachable("Unsupported object type"); + case VK_OBJECT_TYPE_PHYSICAL_DEVICE: + return container_of(obj, struct vk_physical_device, base); + case VK_OBJECT_TYPE_SURFACE_KHR: + case VK_OBJECT_TYPE_DISPLAY_KHR: + case VK_OBJECT_TYPE_DISPLAY_MODE_KHR: + case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT: + case VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT: + unreachable("Unsupported object type"); + default: + return vk_object_to_device(obj)->physical; + } +} + +static struct vk_instance * +vk_object_to_instance(struct vk_object_base *obj) +{ + if (obj == NULL) + return NULL; + + if (obj->type == VK_OBJECT_TYPE_INSTANCE) { + return container_of(obj, struct vk_instance, base); + } else { + return vk_object_to_physical_device(obj)->instance; + } +} + +void +__vk_log_impl(VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT types, + int object_count, + const void **objects_or_instance, + const char *file, + int line, + const char *format, + ...) +{ + struct vk_instance *instance = NULL; + struct vk_object_base **objects = NULL; + if (object_count == 0) { + instance = (struct vk_instance *) objects_or_instance; + } else { + objects = (struct vk_object_base **) objects_or_instance; + for (unsigned i = 0; i < object_count; i++) { + if (unlikely(objects[i] == NULL)) { + mesa_logw("vk_log*() called with NULL object\n"); + continue; + } + + if (unlikely(!objects[i]->client_visible)) { + mesa_logw("vk_log*() called with client-invisible object %p " + "of type %s", objects[i], + vk_ObjectType_to_str(objects[i]->type)); + } + + if (!instance) { + instance = vk_object_to_instance(objects[i]); + assert(instance->base.client_visible); + } else { + assert(vk_object_to_instance(objects[i]) == instance); + } + break; + } + } + +#if !MESA_DEBUG + if (unlikely(!instance) || + (likely(list_is_empty(&instance->debug_utils.callbacks)) && + likely(list_is_empty(&instance->debug_report.callbacks)))) + return; +#endif + + va_list va; + char *message = NULL; + + va_start(va, format); + message = ralloc_vasprintf(NULL, format, va); + va_end(va); + + char *message_idname = ralloc_asprintf(NULL, "%s:%d", file, line); + +#if MESA_DEBUG + switch (severity) { + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: + mesa_logd("%s: %s", message_idname, message); + break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: + mesa_logi("%s: %s", message_idname, message); + break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: + if (types & VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT) + mesa_logw("%s: PERF: %s", message_idname, message); + else + mesa_logw("%s: %s", message_idname, message); + break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: + mesa_loge("%s: %s", message_idname, message); + break; + default: + unreachable("Invalid debug message severity"); + break; + } + + if (!instance) { + ralloc_free(message); + ralloc_free(message_idname); + return; + } +#endif + + if (!instance->base.client_visible) { + vk_debug_message_instance(instance, severity, types, + message_idname, 0, message); + ralloc_free(message); + ralloc_free(message_idname); + return; + } + + /* If VK_EXT_debug_utils messengers have been set up, form the + * message */ + if (!list_is_empty(&instance->debug_utils.callbacks)) { + VkDebugUtilsMessengerCallbackDataEXT cb_data = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CALLBACK_DATA_EXT, + .pMessageIdName = message_idname, + .messageIdNumber = 0, + .pMessage = message, + }; + + VkDebugUtilsObjectNameInfoEXT *object_name_infos = + ralloc_array(NULL, VkDebugUtilsObjectNameInfoEXT, object_count); + + ASSERTED int cmdbuf_n = 0, queue_n = 0, obj_n = 0; + for (int i = 0; i < object_count; i++) { + struct vk_object_base *base = objects[i]; + if (base == NULL || !base->client_visible) + continue; + + switch (base->type) { + case VK_OBJECT_TYPE_COMMAND_BUFFER: { + /* We allow at most one command buffer to be submitted at a time */ + assert(++cmdbuf_n <= 1); + struct vk_command_buffer *cmd_buffer = + (struct vk_command_buffer *)base; + if (cmd_buffer->labels.size > 0) { + cb_data.cmdBufLabelCount = util_dynarray_num_elements( + &cmd_buffer->labels, VkDebugUtilsLabelEXT); + cb_data.pCmdBufLabels = cmd_buffer->labels.data; + } + break; + } + + case VK_OBJECT_TYPE_QUEUE: { + /* We allow at most one queue to be submitted at a time */ + assert(++queue_n <= 1); + struct vk_queue *queue = (struct vk_queue *)base; + if (queue->labels.size > 0) { + cb_data.queueLabelCount = + util_dynarray_num_elements(&queue->labels, VkDebugUtilsLabelEXT); + cb_data.pQueueLabels = queue->labels.data; + } + break; + } + default: + break; + } + + object_name_infos[obj_n++] = (VkDebugUtilsObjectNameInfoEXT){ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .pNext = NULL, + .objectType = base->type, + .objectHandle = (uint64_t)(uintptr_t)base, + .pObjectName = base->object_name, + }; + } + cb_data.objectCount = obj_n; + cb_data.pObjects = object_name_infos; + + vk_debug_message(instance, severity, types, &cb_data); + + ralloc_free(object_name_infos); + } + + /* If VK_EXT_debug_report callbacks also have been set up, forward + * the message there as well */ + if (!list_is_empty(&instance->debug_report.callbacks)) { + VkDebugReportFlagsEXT flags = 0; + + switch (severity) { + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: + flags |= VK_DEBUG_REPORT_DEBUG_BIT_EXT; + break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: + flags |= VK_DEBUG_REPORT_INFORMATION_BIT_EXT; + break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: + if (types & VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT) + flags |= VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT; + else + flags |= VK_DEBUG_REPORT_WARNING_BIT_EXT; + break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: + flags |= VK_DEBUG_REPORT_ERROR_BIT_EXT; + break; + default: + unreachable("Invalid debug message severity"); + break; + } + + /* VK_EXT_debug_report-provided callback accepts only one object + * related to the message. Since they are given to us in + * decreasing order of importance, we're forwarding the first + * one. + */ + vk_debug_report(instance, flags, object_count ? objects[0] : NULL, 0, + 0, message_idname, message); + } + + ralloc_free(message); + ralloc_free(message_idname); +} + +static struct vk_object_base * +vk_object_for_error(struct vk_object_base *obj, VkResult error) +{ + if (obj == NULL) + return NULL; + + switch (error) { + case VK_ERROR_OUT_OF_HOST_MEMORY: + case VK_ERROR_LAYER_NOT_PRESENT: + case VK_ERROR_EXTENSION_NOT_PRESENT: + case VK_ERROR_UNKNOWN: + return &vk_object_to_instance(obj)->base; + case VK_ERROR_FEATURE_NOT_PRESENT: + return &vk_object_to_physical_device(obj)->base; + case VK_ERROR_OUT_OF_DEVICE_MEMORY: + case VK_ERROR_MEMORY_MAP_FAILED: + case VK_ERROR_TOO_MANY_OBJECTS: + return &vk_object_to_device(obj)->base; + default: + return obj; + } +} + +VkResult +__vk_errorv(const void *_obj, VkResult error, + const char *file, int line, + const char *format, va_list va) +{ + struct vk_object_base *object = (struct vk_object_base *)_obj; + struct vk_instance *instance = vk_object_to_instance(object); + object = vk_object_for_error(object, error); + + /* If object->client_visible isn't set then the object hasn't been fully + * constructed and we shouldn't hand it back to the client. This typically + * happens if an error is thrown during object construction. This is safe + * to do as long as vk_object_base_init() has already been called. + */ + if (object && !object->client_visible) + object = NULL; + + const char *error_str = vk_Result_to_str(error); + + if (format) { + char *message = ralloc_vasprintf(NULL, format, va); + + if (object) { + __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, + VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT, + VK_LOG_OBJS(object), file, line, + "%s (%s)", message, error_str); + } else { + __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, + VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT, + VK_LOG_NO_OBJS(instance), file, line, + "%s (%s)", message, error_str); + } + + ralloc_free(message); + } else { + if (object) { + __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, + VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT, + VK_LOG_OBJS(object), file, line, + "%s", error_str); + } else { + __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, + VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT, + VK_LOG_NO_OBJS(instance), file, line, + "%s", error_str); + } + } + + return error; +} + +VkResult +__vk_errorf(const void *_obj, VkResult error, + const char *file, int line, + const char *format, ...) +{ + va_list va; + + va_start(va, format); + VkResult result = __vk_errorv(_obj, error, file, line, format, va); + va_end(va); + + return result; +} diff --git a/src/vulkan/runtime/vk_log.h b/src/vulkan/runtime/vk_log.h new file mode 100644 index 00000000000..5d0c230f2a8 --- /dev/null +++ b/src/vulkan/runtime/vk_log.h @@ -0,0 +1,97 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_instance.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* __VK_ARG_N(...) returns the number of arguments provided to it */ +#define __VK_ARG_SEQ(_1,_2,_3,_4,_5,_6,_7,_8,N,...) N +#define __VK_ARG_N(...) __VK_ARG_SEQ(__VA_ARGS__,8,7,6,5,4,3,2,1,0) + +#define VK_LOG_OBJS(...) \ + __VK_ARG_N(__VA_ARGS__), (const void*[]){__VA_ARGS__} + +#define VK_LOG_NO_OBJS(instance) 0, (const void**)instance + +#define vk_logd(objects_macro, format, ...) \ + __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, \ + VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT, \ + objects_macro, __FILE__, __LINE__, format, ## __VA_ARGS__) + +#define vk_logi(objects_macro, format, ...) \ + __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT, \ + VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT, \ + objects_macro, __FILE__, __LINE__, format, ## __VA_ARGS__) + +#define vk_logw(objects_macro, format, ...) \ + __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT, \ + VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT, \ + objects_macro, __FILE__, __LINE__, format, ## __VA_ARGS__) + +#define vk_loge(objects_macro, format, ...) \ + __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, \ + VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT, \ + objects_macro, __FILE__, __LINE__, format, ## __VA_ARGS__) + +#define vk_perf(objects_macro, format, ...) \ + __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT, \ + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, \ + objects_macro, __FILE__, __LINE__, format, ## __VA_ARGS__) + +#define __vk_log(severity, type, object_count, \ + objects_or_instance, file, line, format, ...) \ + __vk_log_impl(severity, type, object_count, objects_or_instance, \ + file, line, format, ## __VA_ARGS__) + +void PRINTFLIKE(7, 8) +__vk_log_impl(VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT types, + int object_count, + const void **objects_or_instance, + const char *file, + int line, + const char *format, + ...); + +#define vk_error(obj, error) \ + __vk_errorf(obj, error, __FILE__, __LINE__, NULL) + +#define vk_errorf(obj, error, ...) \ + __vk_errorf(obj, error, __FILE__, __LINE__, __VA_ARGS__) + +VkResult +__vk_errorv(const void *_obj, VkResult error, + const char *file, int line, + const char *format, va_list va); + +VkResult PRINTFLIKE(5, 6) +__vk_errorf(const void *_obj, VkResult error, + const char *file, int line, + const char *format, ...); + +#ifdef __cplusplus +} +#endif
\ No newline at end of file diff --git a/src/vulkan/runtime/vk_meta.c b/src/vulkan/runtime/vk_meta.c new file mode 100644 index 00000000000..cb7aee3ca3a --- /dev/null +++ b/src/vulkan/runtime/vk_meta.c @@ -0,0 +1,592 @@ +/* + * Copyright © 2022 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_meta_private.h" + +#include "vk_command_buffer.h" +#include "vk_device.h" +#include "vk_pipeline.h" +#include "vk_util.h" + +#include "util/hash_table.h" + +#include <string.h> + +struct cache_key { + VkObjectType obj_type; + uint32_t key_size; + const void *key_data; +}; + +static struct cache_key * +cache_key_create(VkObjectType obj_type, const void *key_data, size_t key_size) +{ + assert(key_size <= UINT32_MAX); + + struct cache_key *key = malloc(sizeof(*key) + key_size); + *key = (struct cache_key) { + .obj_type = obj_type, + .key_size = key_size, + .key_data = key + 1, + }; + memcpy(key + 1, key_data, key_size); + + return key; +} + +static uint32_t +cache_key_hash(const void *_key) +{ + const struct cache_key *key = _key; + + assert(sizeof(key->obj_type) == 4); + uint32_t hash = _mesa_hash_u32(&key->obj_type); + return _mesa_hash_data_with_seed(key->key_data, key->key_size, hash); +} + +static bool +cache_key_equal(const void *_a, const void *_b) +{ + const struct cache_key *a = _a, *b = _b; + if (a->obj_type != b->obj_type || a->key_size != b->key_size) + return false; + + return memcmp(a->key_data, b->key_data, a->key_size) == 0; +} + +static void +destroy_object(struct vk_device *device, struct vk_object_base *obj) +{ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + switch (obj->type) { + case VK_OBJECT_TYPE_BUFFER: + disp->DestroyBuffer(_device, (VkBuffer)(uintptr_t)obj, NULL); + break; + case VK_OBJECT_TYPE_IMAGE_VIEW: + disp->DestroyImageView(_device, (VkImageView)(uintptr_t)obj, NULL); + break; + case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT: + disp->DestroyDescriptorSetLayout(_device, (VkDescriptorSetLayout)(uintptr_t)obj, NULL); + break; + case VK_OBJECT_TYPE_PIPELINE_LAYOUT: + disp->DestroyPipelineLayout(_device, (VkPipelineLayout)(uintptr_t)obj, NULL); + break; + case VK_OBJECT_TYPE_PIPELINE: + disp->DestroyPipeline(_device, (VkPipeline)(uintptr_t)obj, NULL); + break; + case VK_OBJECT_TYPE_SAMPLER: + disp->DestroySampler(_device, (VkSampler)(uintptr_t)obj, NULL); + break; + default: + unreachable("Unsupported object type"); + } +} + +VkResult +vk_meta_device_init(struct vk_device *device, + struct vk_meta_device *meta) +{ + memset(meta, 0, sizeof(*meta)); + + meta->cache = _mesa_hash_table_create(NULL, cache_key_hash, + cache_key_equal); + simple_mtx_init(&meta->cache_mtx, mtx_plain); + + meta->cmd_draw_rects = vk_meta_draw_rects; + meta->cmd_draw_volume = vk_meta_draw_volume; + + return VK_SUCCESS; +} + +void +vk_meta_device_finish(struct vk_device *device, + struct vk_meta_device *meta) +{ + hash_table_foreach(meta->cache, entry) { + free((void *)entry->key); + destroy_object(device, entry->data); + } + _mesa_hash_table_destroy(meta->cache, NULL); + simple_mtx_destroy(&meta->cache_mtx); +} + +uint64_t +vk_meta_lookup_object(struct vk_meta_device *meta, + VkObjectType obj_type, + const void *key_data, size_t key_size) +{ + assert(key_size >= sizeof(enum vk_meta_object_key_type)); + assert(*(enum vk_meta_object_key_type *)key_data != + VK_META_OBJECT_KEY_TYPE_INVALID); + + struct cache_key key = { + .obj_type = obj_type, + .key_size = key_size, + .key_data = key_data, + }; + + uint32_t hash = cache_key_hash(&key); + + simple_mtx_lock(&meta->cache_mtx); + struct hash_entry *entry = + _mesa_hash_table_search_pre_hashed(meta->cache, hash, &key); + simple_mtx_unlock(&meta->cache_mtx); + + if (entry == NULL) + return 0; + + struct vk_object_base *obj = entry->data; + assert(obj->type == obj_type); + + return (uint64_t)(uintptr_t)obj; +} + +uint64_t +vk_meta_cache_object(struct vk_device *device, + struct vk_meta_device *meta, + const void *key_data, size_t key_size, + VkObjectType obj_type, + uint64_t handle) +{ + assert(key_size >= sizeof(enum vk_meta_object_key_type)); + assert(*(enum vk_meta_object_key_type *)key_data != + VK_META_OBJECT_KEY_TYPE_INVALID); + + struct cache_key *key = cache_key_create(obj_type, key_data, key_size); + struct vk_object_base *obj = + vk_object_base_from_u64_handle(handle, obj_type); + + uint32_t hash = cache_key_hash(key); + + simple_mtx_lock(&meta->cache_mtx); + struct hash_entry *entry = + _mesa_hash_table_search_pre_hashed(meta->cache, hash, key); + if (entry == NULL) + _mesa_hash_table_insert_pre_hashed(meta->cache, hash, key, obj); + simple_mtx_unlock(&meta->cache_mtx); + + if (entry != NULL) { + /* We raced and found that object already in the cache */ + free(key); + destroy_object(device, obj); + return (uint64_t)(uintptr_t)entry->data; + } else { + /* Return the newly inserted object */ + return (uint64_t)(uintptr_t)obj; + } +} + +VkResult +vk_meta_create_sampler(struct vk_device *device, + struct vk_meta_device *meta, + const VkSamplerCreateInfo *info, + const void *key_data, size_t key_size, + VkSampler *sampler_out) +{ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + VkSampler sampler; + VkResult result = disp->CreateSampler(_device, info, NULL, &sampler); + if (result != VK_SUCCESS) + return result; + + *sampler_out = (VkSampler) + vk_meta_cache_object(device, meta, key_data, key_size, + VK_OBJECT_TYPE_SAMPLER, + (uint64_t)sampler); + return VK_SUCCESS; +} + +VkResult +vk_meta_create_descriptor_set_layout(struct vk_device *device, + struct vk_meta_device *meta, + const VkDescriptorSetLayoutCreateInfo *info, + const void *key_data, size_t key_size, + VkDescriptorSetLayout *layout_out) +{ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + VkDescriptorSetLayout layout; + VkResult result = disp->CreateDescriptorSetLayout(_device, info, + NULL, &layout); + if (result != VK_SUCCESS) + return result; + + *layout_out = (VkDescriptorSetLayout) + vk_meta_cache_object(device, meta, key_data, key_size, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, + (uint64_t)layout); + return VK_SUCCESS; +} + +static VkResult +vk_meta_get_descriptor_set_layout(struct vk_device *device, + struct vk_meta_device *meta, + const VkDescriptorSetLayoutCreateInfo *info, + const void *key_data, size_t key_size, + VkDescriptorSetLayout *layout_out) +{ + VkDescriptorSetLayout cached = + vk_meta_lookup_descriptor_set_layout(meta, key_data, key_size); + if (cached != VK_NULL_HANDLE) { + *layout_out = cached; + return VK_SUCCESS; + } + + return vk_meta_create_descriptor_set_layout(device, meta, info, + key_data, key_size, + layout_out); +} + +VkResult +vk_meta_create_pipeline_layout(struct vk_device *device, + struct vk_meta_device *meta, + const VkPipelineLayoutCreateInfo *info, + const void *key_data, size_t key_size, + VkPipelineLayout *layout_out) +{ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + VkPipelineLayout layout; + VkResult result = disp->CreatePipelineLayout(_device, info, NULL, &layout); + if (result != VK_SUCCESS) + return result; + + *layout_out = (VkPipelineLayout) + vk_meta_cache_object(device, meta, key_data, key_size, + VK_OBJECT_TYPE_PIPELINE_LAYOUT, + (uint64_t)layout); + return VK_SUCCESS; +} + +VkResult +vk_meta_get_pipeline_layout(struct vk_device *device, + struct vk_meta_device *meta, + const VkDescriptorSetLayoutCreateInfo *desc_info, + const VkPushConstantRange *push_range, + const void *key_data, size_t key_size, + VkPipelineLayout *layout_out) +{ + VkPipelineLayout cached = + vk_meta_lookup_pipeline_layout(meta, key_data, key_size); + if (cached != VK_NULL_HANDLE) { + *layout_out = cached; + return VK_SUCCESS; + } + + VkDescriptorSetLayout set_layout = VK_NULL_HANDLE; + if (desc_info != NULL) { + VkResult result = + vk_meta_get_descriptor_set_layout(device, meta, desc_info, + key_data, key_size, &set_layout); + if (result != VK_SUCCESS) + return result; + } + + const VkPipelineLayoutCreateInfo layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = set_layout != VK_NULL_HANDLE ? 1 : 0, + .pSetLayouts = &set_layout, + .pushConstantRangeCount = push_range != NULL ? 1 : 0, + .pPushConstantRanges = push_range, + }; + + return vk_meta_create_pipeline_layout(device, meta, &layout_info, + key_data, key_size, layout_out); +} + +static VkResult +create_rect_list_pipeline(struct vk_device *device, + struct vk_meta_device *meta, + const VkGraphicsPipelineCreateInfo *info, + VkPipeline *pipeline_out) +{ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + VkGraphicsPipelineCreateInfo info_local = *info; + + /* We always configure for layered rendering for now */ + bool use_gs = meta->use_gs_for_layer; + + STACK_ARRAY(VkPipelineShaderStageCreateInfo, stages, + info->stageCount + 1 + use_gs); + uint32_t stage_count = 0; + + VkPipelineShaderStageNirCreateInfoMESA vs_nir_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA, + .nir = vk_meta_draw_rects_vs_nir(meta, use_gs), + }; + stages[stage_count++] = (VkPipelineShaderStageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &vs_nir_info, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .pName = "main", + }; + + VkPipelineShaderStageNirCreateInfoMESA gs_nir_info; + if (use_gs) { + gs_nir_info = (VkPipelineShaderStageNirCreateInfoMESA) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA, + .nir = vk_meta_draw_rects_gs_nir(meta), + }; + stages[stage_count++] = (VkPipelineShaderStageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &gs_nir_info, + .stage = VK_SHADER_STAGE_GEOMETRY_BIT, + .pName = "main", + }; + } + + for (uint32_t i = 0; i < info->stageCount; i++) { + assert(info->pStages[i].stage != VK_SHADER_STAGE_VERTEX_BIT); + if (use_gs) + assert(info->pStages[i].stage != VK_SHADER_STAGE_GEOMETRY_BIT); + stages[stage_count++] = info->pStages[i]; + } + + info_local.stageCount = stage_count; + info_local.pStages = stages; + info_local.pVertexInputState = &vk_meta_draw_rects_vi_state; + info_local.pViewportState = &vk_meta_draw_rects_vs_state; + + uint32_t dyn_count = info->pDynamicState != NULL ? + info->pDynamicState->dynamicStateCount : 0; + + STACK_ARRAY(VkDynamicState, dyn_state, dyn_count + 2); + for (uint32_t i = 0; i < dyn_count; i++) + dyn_state[i] = info->pDynamicState->pDynamicStates[i]; + + dyn_state[dyn_count + 0] = VK_DYNAMIC_STATE_VIEWPORT; + dyn_state[dyn_count + 1] = VK_DYNAMIC_STATE_SCISSOR; + + const VkPipelineDynamicStateCreateInfo dyn_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = dyn_count + 2, + .pDynamicStates = dyn_state, + }; + + info_local.pDynamicState = &dyn_info; + + VkResult result = disp->CreateGraphicsPipelines(_device, VK_NULL_HANDLE, + 1, &info_local, NULL, + pipeline_out); + + STACK_ARRAY_FINISH(dyn_state); + STACK_ARRAY_FINISH(stages); + + return result; +} + +static const VkPipelineRasterizationStateCreateInfo default_rs_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = false, + .depthBiasEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, +}; + +static const VkPipelineDepthStencilStateCreateInfo default_ds_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = false, + .depthBoundsTestEnable = false, + .stencilTestEnable = false, +}; + +VkResult +vk_meta_create_graphics_pipeline(struct vk_device *device, + struct vk_meta_device *meta, + const VkGraphicsPipelineCreateInfo *info, + const struct vk_meta_rendering_info *render, + const void *key_data, size_t key_size, + VkPipeline *pipeline_out) +{ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + VkResult result; + + VkGraphicsPipelineCreateInfo info_local = *info; + + /* Add in the rendering info */ + VkPipelineRenderingCreateInfo r_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, + .viewMask = render->view_mask, + .colorAttachmentCount = render->color_attachment_count, + .pColorAttachmentFormats = render->color_attachment_formats, + .depthAttachmentFormat = render->depth_attachment_format, + .stencilAttachmentFormat = render->stencil_attachment_format, + }; + __vk_append_struct(&info_local, &r_info); + + /* Assume rectangle pipelines */ + if (info_local.pInputAssemblyState == NULL) + info_local.pInputAssemblyState = &vk_meta_draw_rects_ia_state; + + if (info_local.pRasterizationState == NULL) + info_local.pRasterizationState = &default_rs_info; + + VkPipelineMultisampleStateCreateInfo ms_info; + if (info_local.pMultisampleState == NULL) { + ms_info = (VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = render->samples, + }; + info_local.pMultisampleState = &ms_info; + } + + if (info_local.pDepthStencilState == NULL) + info_local.pDepthStencilState = &default_ds_info; + + VkPipelineColorBlendStateCreateInfo cb_info; + VkPipelineColorBlendAttachmentState cb_att[MESA_VK_MAX_COLOR_ATTACHMENTS]; + if (info_local.pColorBlendState == NULL) { + for (uint32_t i = 0; i < render->color_attachment_count; i++) { + cb_att[i] = (VkPipelineColorBlendAttachmentState) { + .blendEnable = false, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT, + }; + } + cb_info = (VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .attachmentCount = render->color_attachment_count, + .pAttachments = cb_att, + }; + info_local.pColorBlendState = &cb_info; + } + + VkPipeline pipeline; + if (info_local.pInputAssemblyState->topology == + VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA) { + result = create_rect_list_pipeline(device, meta, + &info_local, + &pipeline); + } else { + result = disp->CreateGraphicsPipelines(_device, VK_NULL_HANDLE, + 1, &info_local, + NULL, &pipeline); + } + if (unlikely(result != VK_SUCCESS)) + return result; + + *pipeline_out = (VkPipeline)vk_meta_cache_object(device, meta, + key_data, key_size, + VK_OBJECT_TYPE_PIPELINE, + (uint64_t)pipeline); + return VK_SUCCESS; +} + +VkResult +vk_meta_create_compute_pipeline(struct vk_device *device, + struct vk_meta_device *meta, + const VkComputePipelineCreateInfo *info, + const void *key_data, size_t key_size, + VkPipeline *pipeline_out) +{ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + VkPipeline pipeline; + VkResult result = disp->CreateComputePipelines(_device, VK_NULL_HANDLE, + 1, info, NULL, &pipeline); + if (result != VK_SUCCESS) + return result; + + *pipeline_out = (VkPipeline)vk_meta_cache_object(device, meta, + key_data, key_size, + VK_OBJECT_TYPE_PIPELINE, + (uint64_t)pipeline); + return VK_SUCCESS; +} + +void +vk_meta_object_list_init(struct vk_meta_object_list *mol) +{ + util_dynarray_init(&mol->arr, NULL); +} + +void +vk_meta_object_list_reset(struct vk_device *device, + struct vk_meta_object_list *mol) +{ + util_dynarray_foreach(&mol->arr, struct vk_object_base *, obj) + destroy_object(device, *obj); + + util_dynarray_clear(&mol->arr); +} + +void +vk_meta_object_list_finish(struct vk_device *device, + struct vk_meta_object_list *mol) +{ + vk_meta_object_list_reset(device, mol); + util_dynarray_fini(&mol->arr); +} + +VkResult +vk_meta_create_buffer(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkBufferCreateInfo *info, + VkBuffer *buffer_out) +{ + struct vk_device *device = cmd->base.device; + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + VkResult result = disp->CreateBuffer(_device, info, NULL, buffer_out); + if (unlikely(result != VK_SUCCESS)) + return result; + + vk_meta_object_list_add_handle(&cmd->meta_objects, + VK_OBJECT_TYPE_BUFFER, + (uint64_t)*buffer_out); + return VK_SUCCESS; +} + +VkResult +vk_meta_create_image_view(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkImageViewCreateInfo *info, + VkImageView *image_view_out) +{ + struct vk_device *device = cmd->base.device; + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + VkResult result = disp->CreateImageView(_device, info, NULL, image_view_out); + if (unlikely(result != VK_SUCCESS)) + return result; + + vk_meta_object_list_add_handle(&cmd->meta_objects, + VK_OBJECT_TYPE_IMAGE_VIEW, + (uint64_t)*image_view_out); + return VK_SUCCESS; +} diff --git a/src/vulkan/runtime/vk_meta.h b/src/vulkan/runtime/vk_meta.h new file mode 100644 index 00000000000..dd113b0ea13 --- /dev/null +++ b/src/vulkan/runtime/vk_meta.h @@ -0,0 +1,299 @@ +/* + * Copyright © 2022 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_META_H +#define VK_META_H + +#include "vk_limits.h" +#include "vk_object.h" + +#include "util/simple_mtx.h" +#include "util/u_dynarray.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct hash_table; +struct vk_command_buffer; +struct vk_device; +struct vk_image; + +struct vk_meta_rect { + uint32_t x0, y0, x1, y1; + float z; + uint32_t layer; +}; + +#define VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA (VkPrimitiveTopology)11 + +struct vk_meta_device { + struct hash_table *cache; + simple_mtx_t cache_mtx; + + uint32_t max_bind_map_buffer_size_B; + bool use_layered_rendering; + bool use_gs_for_layer; + bool use_stencil_export; + + VkResult (*cmd_bind_map_buffer)(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + VkBuffer buffer, + void **map_out); + + void (*cmd_draw_rects)(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + uint32_t rect_count, + const struct vk_meta_rect *rects); + + void (*cmd_draw_volume)(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const struct vk_meta_rect *rect, + uint32_t layer_count); +}; + +VkResult vk_meta_device_init(struct vk_device *device, + struct vk_meta_device *meta); +void vk_meta_device_finish(struct vk_device *device, + struct vk_meta_device *meta); + +/** Keys should start with one of these to ensure uniqueness */ +enum vk_meta_object_key_type { + VK_META_OBJECT_KEY_TYPE_INVALID = 0, + VK_META_OBJECT_KEY_CLEAR_PIPELINE, + VK_META_OBJECT_KEY_BLIT_PIPELINE, + VK_META_OBJECT_KEY_BLIT_SAMPLER, +}; + +uint64_t vk_meta_lookup_object(struct vk_meta_device *meta, + VkObjectType obj_type, + const void *key_data, size_t key_size); + +uint64_t vk_meta_cache_object(struct vk_device *device, + struct vk_meta_device *meta, + const void *key_data, size_t key_size, + VkObjectType obj_type, + uint64_t handle); + +static inline VkDescriptorSetLayout +vk_meta_lookup_descriptor_set_layout(struct vk_meta_device *meta, + const void *key_data, size_t key_size) +{ + return (VkDescriptorSetLayout) + vk_meta_lookup_object(meta, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, + key_data, key_size); +} + +static inline VkPipelineLayout +vk_meta_lookup_pipeline_layout(struct vk_meta_device *meta, + const void *key_data, size_t key_size) +{ + return (VkPipelineLayout) + vk_meta_lookup_object(meta, VK_OBJECT_TYPE_PIPELINE_LAYOUT, + key_data, key_size); +} + +static inline VkPipeline +vk_meta_lookup_pipeline(struct vk_meta_device *meta, + const void *key_data, size_t key_size) +{ + return (VkPipeline)vk_meta_lookup_object(meta, VK_OBJECT_TYPE_PIPELINE, + key_data, key_size); +} + +static inline VkSampler +vk_meta_lookup_sampler(struct vk_meta_device *meta, + const void *key_data, size_t key_size) +{ + return (VkSampler)vk_meta_lookup_object(meta, VK_OBJECT_TYPE_SAMPLER, + key_data, key_size); +} + +struct vk_meta_rendering_info { + uint32_t view_mask; + uint32_t samples; + uint32_t color_attachment_count; + VkFormat color_attachment_formats[MESA_VK_MAX_COLOR_ATTACHMENTS]; + VkFormat depth_attachment_format; + VkFormat stencil_attachment_format; +}; + +VkResult +vk_meta_create_descriptor_set_layout(struct vk_device *device, + struct vk_meta_device *meta, + const VkDescriptorSetLayoutCreateInfo *info, + const void *key_data, size_t key_size, + VkDescriptorSetLayout *layout_out); + +VkResult +vk_meta_create_pipeline_layout(struct vk_device *device, + struct vk_meta_device *meta, + const VkPipelineLayoutCreateInfo *info, + const void *key_data, size_t key_size, + VkPipelineLayout *layout_out); + +VkResult +vk_meta_get_pipeline_layout(struct vk_device *device, + struct vk_meta_device *meta, + const VkDescriptorSetLayoutCreateInfo *desc_info, + const VkPushConstantRange *push_range, + const void *key_data, size_t key_size, + VkPipelineLayout *layout_out); + +VkResult +vk_meta_create_graphics_pipeline(struct vk_device *device, + struct vk_meta_device *meta, + const VkGraphicsPipelineCreateInfo *info, + const struct vk_meta_rendering_info *render, + const void *key_data, size_t key_size, + VkPipeline *pipeline_out); + +VkResult +vk_meta_create_compute_pipeline(struct vk_device *device, + struct vk_meta_device *meta, + const VkComputePipelineCreateInfo *info, + const void *key_data, size_t key_size, + VkPipeline *pipeline_out); + +VkResult +vk_meta_create_sampler(struct vk_device *device, + struct vk_meta_device *meta, + const VkSamplerCreateInfo *info, + const void *key_data, size_t key_size, + VkSampler *sampler_out); + +struct vk_meta_object_list { + struct util_dynarray arr; +}; + +void vk_meta_object_list_init(struct vk_meta_object_list *mol); +void vk_meta_object_list_reset(struct vk_device *device, + struct vk_meta_object_list *mol); +void vk_meta_object_list_finish(struct vk_device *device, + struct vk_meta_object_list *mol); + +static inline void +vk_meta_object_list_add_obj(struct vk_meta_object_list *mol, + struct vk_object_base *obj) +{ + util_dynarray_append(&mol->arr, struct vk_object_base *, obj); +} + +static inline void +vk_meta_object_list_add_handle(struct vk_meta_object_list *mol, + VkObjectType obj_type, + uint64_t handle) +{ + vk_meta_object_list_add_obj(mol, + vk_object_base_from_u64_handle(handle, obj_type)); +} + +VkResult vk_meta_create_buffer(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkBufferCreateInfo *info, + VkBuffer *buffer_out); +VkResult vk_meta_create_image_view(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkImageViewCreateInfo *info, + VkImageView *image_view_out); + +void vk_meta_draw_rects(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + uint32_t rect_count, + const struct vk_meta_rect *rects); + +void vk_meta_draw_volume(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const struct vk_meta_rect *rect, + uint32_t layer_count); + +void vk_meta_clear_attachments(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const struct vk_meta_rendering_info *render, + uint32_t attachment_count, + const VkClearAttachment *attachments, + uint32_t rect_count, + const VkClearRect *rects); + +void vk_meta_clear_rendering(struct vk_meta_device *meta, + struct vk_command_buffer *cmd, + const VkRenderingInfo *pRenderingInfo); + +void vk_meta_clear_color_image(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image *image, + VkImageLayout image_layout, + VkFormat format, + const VkClearColorValue *color, + uint32_t range_count, + const VkImageSubresourceRange *ranges); + +void vk_meta_clear_depth_stencil_image(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image *image, + VkImageLayout image_layout, + const VkClearDepthStencilValue *depth_stencil, + uint32_t range_count, + const VkImageSubresourceRange *ranges); + +void vk_meta_blit_image(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image *src_image, + VkFormat src_format, + VkImageLayout src_image_layout, + struct vk_image *dst_image, + VkFormat dst_format, + VkImageLayout dst_image_layout, + uint32_t region_count, + const VkImageBlit2 *regions, + VkFilter filter); + +void vk_meta_blit_image2(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkBlitImageInfo2 *blit); + +void vk_meta_resolve_image(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image *src_image, + VkFormat src_format, + VkImageLayout src_image_layout, + struct vk_image *dst_image, + VkFormat dst_format, + VkImageLayout dst_image_layout, + uint32_t region_count, + const VkImageResolve2 *regions, + VkResolveModeFlagBits resolve_mode, + VkResolveModeFlagBits stencil_resolve_mode); + +void vk_meta_resolve_image2(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkResolveImageInfo2 *resolve); + +void vk_meta_resolve_rendering(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkRenderingInfo *pRenderingInfo); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_META_H */ diff --git a/src/vulkan/runtime/vk_meta_blit_resolve.c b/src/vulkan/runtime/vk_meta_blit_resolve.c new file mode 100644 index 00000000000..955f1544df0 --- /dev/null +++ b/src/vulkan/runtime/vk_meta_blit_resolve.c @@ -0,0 +1,1013 @@ +/* + * Copyright © 2022 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_meta_private.h" + +#include "vk_command_buffer.h" +#include "vk_device.h" +#include "vk_format.h" +#include "vk_pipeline.h" + +#include "nir_builder.h" + +struct vk_meta_blit_key { + enum vk_meta_object_key_type key_type; + enum glsl_sampler_dim dim; + VkSampleCountFlagBits src_samples; + VkResolveModeFlagBits resolve_mode; + VkResolveModeFlagBits stencil_resolve_mode; + bool stencil_as_discard; + VkFormat dst_format; + VkImageAspectFlags aspects; +}; + +static enum glsl_sampler_dim +vk_image_sampler_dim(const struct vk_image *image) +{ + switch (image->image_type) { + case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D; + case VK_IMAGE_TYPE_2D: + if (image->samples > 1) + return GLSL_SAMPLER_DIM_MS; + else + return GLSL_SAMPLER_DIM_2D; + case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D; + default: unreachable("Invalid image type"); + } +} + +enum blit_desc_binding { + BLIT_DESC_BINDING_SAMPLER, + BLIT_DESC_BINDING_COLOR, + BLIT_DESC_BINDING_DEPTH, + BLIT_DESC_BINDING_STENCIL, +}; + +static enum blit_desc_binding +aspect_to_tex_binding(VkImageAspectFlagBits aspect) +{ + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: return BLIT_DESC_BINDING_COLOR; + case VK_IMAGE_ASPECT_DEPTH_BIT: return BLIT_DESC_BINDING_DEPTH; + case VK_IMAGE_ASPECT_STENCIL_BIT: return BLIT_DESC_BINDING_STENCIL; + default: unreachable("Unsupported aspect"); + } +} + +struct vk_meta_blit_push_data { + float x_off, y_off, x_scale, y_scale; + float z_off, z_scale; + int32_t arr_delta; + uint32_t stencil_bit; +}; + +static inline void +compute_off_scale(uint32_t src_level_size, + uint32_t src0, uint32_t src1, + uint32_t dst0, uint32_t dst1, + uint32_t *dst0_out, uint32_t *dst1_out, + float *off_out, float *scale_out) +{ + assert(src0 <= src_level_size && src1 <= src_level_size); + + if (dst0 < dst1) { + *dst0_out = dst0; + *dst1_out = dst1; + } else { + *dst0_out = dst1; + *dst1_out = dst0; + + /* Flip the source region */ + uint32_t tmp = src0; + src0 = src1; + src1 = tmp; + } + + double src_region_size = (double)src1 - (double)src0; + assert(src_region_size != 0); + + double dst_region_size = (double)*dst1_out - (double)*dst0_out; + assert(dst_region_size > 0); + + double src_offset = src0 / (double)src_level_size; + double dst_scale = src_region_size / (src_level_size * dst_region_size); + double dst_offset = (double)*dst0_out * dst_scale; + + *off_out = src_offset - dst_offset; + *scale_out = dst_scale; +} + +static inline nir_def * +load_struct_var(nir_builder *b, nir_variable *var, uint32_t field) +{ + nir_deref_instr *deref = + nir_build_deref_struct(b, nir_build_deref_var(b, var), field); + return nir_load_deref(b, deref); +} + +static nir_def * +build_tex_resolve(nir_builder *b, nir_deref_instr *t, + nir_def *coord, + VkSampleCountFlagBits samples, + VkResolveModeFlagBits resolve_mode) +{ + nir_def *accum = nir_txf_ms_deref(b, t, coord, nir_imm_int(b, 0)); + if (resolve_mode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) + return accum; + + const enum glsl_base_type base_type = + glsl_get_sampler_result_type(t->type); + + for (unsigned i = 1; i < samples; i++) { + nir_def *val = nir_txf_ms_deref(b, t, coord, nir_imm_int(b, i)); + switch (resolve_mode) { + case VK_RESOLVE_MODE_AVERAGE_BIT: + assert(base_type == GLSL_TYPE_FLOAT); + accum = nir_fadd(b, accum, val); + break; + + case VK_RESOLVE_MODE_MIN_BIT: + switch (base_type) { + case GLSL_TYPE_UINT: + accum = nir_umin(b, accum, val); + break; + case GLSL_TYPE_INT: + accum = nir_imin(b, accum, val); + break; + case GLSL_TYPE_FLOAT: + accum = nir_fmin(b, accum, val); + break; + default: + unreachable("Invalid sample result type"); + } + break; + + case VK_RESOLVE_MODE_MAX_BIT: + switch (base_type) { + case GLSL_TYPE_UINT: + accum = nir_umax(b, accum, val); + break; + case GLSL_TYPE_INT: + accum = nir_imax(b, accum, val); + break; + case GLSL_TYPE_FLOAT: + accum = nir_fmax(b, accum, val); + break; + default: + unreachable("Invalid sample result type"); + } + break; + + default: + unreachable("Unsupported resolve mode"); + } + } + + if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT) + accum = nir_fmul_imm(b, accum, 1.0 / samples); + + return accum; +} + +static nir_shader * +build_blit_shader(const struct vk_meta_blit_key *key) +{ + nir_builder build; + if (key->resolve_mode || key->stencil_resolve_mode) { + build = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, + "vk-meta-resolve"); + } else { + build = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, + NULL, "vk-meta-blit"); + } + nir_builder *b = &build; + + struct glsl_struct_field push_fields[] = { + { .type = glsl_vec4_type(), .name = "xy_xform", .offset = 0 }, + { .type = glsl_vec4_type(), .name = "z_xform", .offset = 16 }, + }; + const struct glsl_type *push_iface_type = + glsl_interface_type(push_fields, ARRAY_SIZE(push_fields), + GLSL_INTERFACE_PACKING_STD140, + false /* row_major */, "push"); + nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const, + push_iface_type, "push"); + + nir_def *xy_xform = load_struct_var(b, push, 0); + nir_def *xy_off = nir_channels(b, xy_xform, 3 << 0); + nir_def *xy_scale = nir_channels(b, xy_xform, 3 << 2); + + nir_def *out_coord_xy = nir_load_frag_coord(b); + out_coord_xy = nir_trim_vector(b, out_coord_xy, 2); + nir_def *src_coord_xy = nir_ffma(b, out_coord_xy, xy_scale, xy_off); + + nir_def *z_xform = load_struct_var(b, push, 1); + nir_def *out_layer = nir_load_layer_id(b); + nir_def *src_coord; + if (key->dim == GLSL_SAMPLER_DIM_3D) { + nir_def *z_off = nir_channel(b, z_xform, 0); + nir_def *z_scale = nir_channel(b, z_xform, 1); + nir_def *out_coord_z = nir_fadd_imm(b, nir_u2f32(b, out_layer), 0.5); + nir_def *src_coord_z = nir_ffma(b, out_coord_z, z_scale, z_off); + src_coord = nir_vec3(b, nir_channel(b, src_coord_xy, 0), + nir_channel(b, src_coord_xy, 1), + src_coord_z); + } else { + nir_def *arr_delta = nir_channel(b, z_xform, 2); + nir_def *in_layer = nir_iadd(b, out_layer, arr_delta); + if (key->dim == GLSL_SAMPLER_DIM_1D) { + src_coord = nir_vec2(b, nir_channel(b, src_coord_xy, 0), + nir_u2f32(b, in_layer)); + } else { + assert(key->dim == GLSL_SAMPLER_DIM_2D || + key->dim == GLSL_SAMPLER_DIM_MS); + src_coord = nir_vec3(b, nir_channel(b, src_coord_xy, 0), + nir_channel(b, src_coord_xy, 1), + nir_u2f32(b, in_layer)); + } + } + + nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, + glsl_bare_sampler_type(), NULL); + sampler->data.descriptor_set = 0; + sampler->data.binding = BLIT_DESC_BINDING_SAMPLER; + nir_deref_instr *s = nir_build_deref_var(b, sampler); + + u_foreach_bit(a, key->aspects) { + VkImageAspectFlagBits aspect = (1 << a); + + enum glsl_base_type base_type; + unsigned out_location, out_comps; + const char *tex_name, *out_name; + VkResolveModeFlagBits resolve_mode; + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + tex_name = "color_tex"; + if (vk_format_is_sint(key->dst_format)) + base_type = GLSL_TYPE_INT; + else if (vk_format_is_uint(key->dst_format)) + base_type = GLSL_TYPE_UINT; + else + base_type = GLSL_TYPE_FLOAT; + resolve_mode = key->resolve_mode; + out_name = "gl_FragData[0]"; + out_location = FRAG_RESULT_DATA0; + out_comps = 4; + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + tex_name = "depth_tex"; + base_type = GLSL_TYPE_FLOAT; + resolve_mode = key->resolve_mode; + out_name = "gl_FragDepth"; + out_location = FRAG_RESULT_DEPTH; + out_comps = 1; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + tex_name = "stencil_tex"; + base_type = GLSL_TYPE_UINT; + resolve_mode = key->stencil_resolve_mode; + out_name = "gl_FragStencilRef"; + out_location = FRAG_RESULT_STENCIL; + out_comps = 1; + break; + default: + unreachable("Unsupported aspect"); + } + + const bool is_array = key->dim != GLSL_SAMPLER_DIM_3D; + const struct glsl_type *texture_type = + glsl_sampler_type(key->dim, false, is_array, base_type); + nir_variable *texture = nir_variable_create(b->shader, nir_var_uniform, + texture_type, tex_name); + texture->data.descriptor_set = 0; + texture->data.binding = aspect_to_tex_binding(aspect); + nir_deref_instr *t = nir_build_deref_var(b, texture); + + nir_def *val; + if (resolve_mode == VK_RESOLVE_MODE_NONE) { + val = nir_txl_deref(b, t, s, src_coord, nir_imm_float(b, 0)); + } else { + val = build_tex_resolve(b, t, nir_f2u32(b, src_coord), + key->src_samples, resolve_mode); + } + val = nir_trim_vector(b, val, out_comps); + + if (key->stencil_as_discard) { + assert(key->aspects == VK_IMAGE_ASPECT_STENCIL_BIT); + nir_def *stencil_bit = nir_channel(b, z_xform, 3); + nir_discard_if(b, nir_ieq(b, nir_iand(b, val, stencil_bit), + nir_imm_int(b, 0))); + } else { + const struct glsl_type *out_type = + glsl_vector_type(base_type, out_comps); + nir_variable *out = nir_variable_create(b->shader, nir_var_shader_out, + out_type, out_name); + out->data.location = out_location; + + nir_store_var(b, out, val, BITFIELD_MASK(out_comps)); + } + } + + return b->shader; +} + +static VkResult +get_blit_pipeline_layout(struct vk_device *device, + struct vk_meta_device *meta, + VkPipelineLayout *layout_out) +{ + const char key[] = "vk-meta-blit-pipeline-layout"; + + const VkDescriptorSetLayoutBinding bindings[] = {{ + .binding = BLIT_DESC_BINDING_SAMPLER, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }, { + .binding = BLIT_DESC_BINDING_COLOR, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }, { + .binding = BLIT_DESC_BINDING_DEPTH, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }, { + .binding = BLIT_DESC_BINDING_STENCIL, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }}; + + const VkDescriptorSetLayoutCreateInfo desc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = ARRAY_SIZE(bindings), + .pBindings = bindings, + }; + + const VkPushConstantRange push_range = { + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .offset = 0, + .size = sizeof(struct vk_meta_blit_push_data), + }; + + return vk_meta_get_pipeline_layout(device, meta, &desc_info, &push_range, + key, sizeof(key), layout_out); +} + +static VkResult +get_blit_pipeline(struct vk_device *device, + struct vk_meta_device *meta, + const struct vk_meta_blit_key *key, + VkPipelineLayout layout, + VkPipeline *pipeline_out) +{ + VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key, sizeof(*key)); + if (from_cache != VK_NULL_HANDLE) { + *pipeline_out = from_cache; + return VK_SUCCESS; + } + + const VkPipelineShaderStageNirCreateInfoMESA fs_nir_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA, + .nir = build_blit_shader(key), + }; + const VkPipelineShaderStageCreateInfo fs_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &fs_nir_info, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .pName = "main", + }; + + VkPipelineDepthStencilStateCreateInfo ds_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + }; + VkDynamicState dyn_tmp; + VkPipelineDynamicStateCreateInfo dyn_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + }; + struct vk_meta_rendering_info render = { + .samples = 1, + }; + if (key->aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + render.color_attachment_count = 1; + render.color_attachment_formats[0] = key->dst_format; + } + if (key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + ds_info.depthTestEnable = VK_TRUE; + ds_info.depthWriteEnable = VK_TRUE; + ds_info.depthCompareOp = VK_COMPARE_OP_ALWAYS; + render.depth_attachment_format = key->dst_format; + } + if (key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + ds_info.stencilTestEnable = VK_TRUE; + ds_info.front.compareOp = VK_COMPARE_OP_ALWAYS; + ds_info.front.passOp = VK_STENCIL_OP_REPLACE; + ds_info.front.compareMask = ~0u; + ds_info.front.writeMask = ~0u; + ds_info.front.reference = ~0; + ds_info.back = ds_info.front; + if (key->stencil_as_discard) { + dyn_tmp = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK; + dyn_info.dynamicStateCount = 1; + dyn_info.pDynamicStates = &dyn_tmp; + } + render.stencil_attachment_format = key->dst_format; + } + + const VkGraphicsPipelineCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 1, + .pStages = &fs_info, + .pDepthStencilState = &ds_info, + .pDynamicState = &dyn_info, + .layout = layout, + }; + + VkResult result = vk_meta_create_graphics_pipeline(device, meta, &info, + &render, + key, sizeof(*key), + pipeline_out); + ralloc_free(fs_nir_info.nir); + + return result; +} + +static VkResult +get_blit_sampler(struct vk_device *device, + struct vk_meta_device *meta, + VkFilter filter, + VkSampler *sampler_out) +{ + struct { + enum vk_meta_object_key_type key_type; + VkFilter filter; + } key; + + memset(&key, 0, sizeof(key)); + key.key_type = VK_META_OBJECT_KEY_BLIT_SAMPLER; + key.filter = filter; + + VkSampler from_cache = vk_meta_lookup_sampler(meta, &key, sizeof(key)); + if (from_cache != VK_NULL_HANDLE) { + *sampler_out = from_cache; + return VK_SUCCESS; + } + + const VkSamplerCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = filter, + .minFilter = filter, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .unnormalizedCoordinates = VK_FALSE, + }; + + return vk_meta_create_sampler(device, meta, &info, + &key, sizeof(key), sampler_out); +} + +static void +do_blit(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image *src_image, + VkFormat src_format, + VkImageLayout src_image_layout, + VkImageSubresourceLayers src_subres, + struct vk_image *dst_image, + VkFormat dst_format, + VkImageLayout dst_image_layout, + VkImageSubresourceLayers dst_subres, + VkSampler sampler, + struct vk_meta_blit_key *key, + struct vk_meta_blit_push_data *push, + const struct vk_meta_rect *dst_rect, + uint32_t dst_layer_count) +{ + struct vk_device *device = cmd->base.device; + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkResult result; + + VkPipelineLayout pipeline_layout; + result = get_blit_pipeline_layout(device, meta, &pipeline_layout); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + uint32_t desc_count = 0; + VkDescriptorImageInfo image_infos[3]; + VkWriteDescriptorSet desc_writes[3]; + + if (sampler != VK_NULL_HANDLE) { + image_infos[desc_count] = (VkDescriptorImageInfo) { + .sampler = sampler, + }; + desc_writes[desc_count] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = BLIT_DESC_BINDING_SAMPLER, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .descriptorCount = 1, + .pImageInfo = &image_infos[desc_count], + }; + desc_count++; + } + + u_foreach_bit(a, src_subres.aspectMask) { + VkImageAspectFlagBits aspect = (1 << a); + + VkImageView src_view; + const VkImageViewUsageCreateInfo src_view_usage = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + }; + const VkImageViewCreateInfo src_view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &src_view_usage, + .image = vk_image_to_handle(src_image), + .viewType = vk_image_sampled_view_type(src_image), + .format = src_format, + .subresourceRange = { + .aspectMask = aspect, + .baseMipLevel = src_subres.mipLevel, + .levelCount = 1, + .baseArrayLayer = src_subres.baseArrayLayer, + .layerCount = src_subres.layerCount, + }, + }; + result = vk_meta_create_image_view(cmd, meta, &src_view_info, + &src_view); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + assert(desc_count < ARRAY_SIZE(image_infos)); + assert(desc_count < ARRAY_SIZE(desc_writes)); + image_infos[desc_count] = (VkDescriptorImageInfo) { + .imageView = src_view, + }; + desc_writes[desc_count] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = aspect_to_tex_binding(aspect), + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .pImageInfo = &image_infos[desc_count], + }; + desc_count++; + } + + disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_layout, 0, + desc_count, desc_writes); + + assert(dst_subres.aspectMask == src_subres.aspectMask); + VkImageAspectFlags aspects_left = dst_subres.aspectMask; + + while (aspects_left) { + key->aspects = aspects_left; + + /* If we need to write stencil via iterative discard, it has to be + * written by itself because otherwise the discards would also throw + * away color or depth data. + */ + if ((key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + key->aspects != VK_IMAGE_ASPECT_STENCIL_BIT && + !meta->use_stencil_export) + key->aspects &= ~VK_IMAGE_ASPECT_STENCIL_BIT; + + key->stencil_as_discard = key->aspects == VK_IMAGE_ASPECT_STENCIL_BIT && + !meta->use_stencil_export; + + VkImageView dst_view; + const VkImageViewUsageCreateInfo dst_view_usage = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, + .usage = (key->aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT : + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + }; + const VkImageViewCreateInfo dst_view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &dst_view_usage, + .image = vk_image_to_handle(dst_image), + .viewType = vk_image_sampled_view_type(dst_image), + .format = dst_format, + .subresourceRange = { + .aspectMask = dst_subres.aspectMask, + .baseMipLevel = dst_subres.mipLevel, + .levelCount = 1, + .baseArrayLayer = dst_subres.baseArrayLayer, + .layerCount = dst_subres.layerCount, + }, + }; + result = vk_meta_create_image_view(cmd, meta, &dst_view_info, + &dst_view); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + const VkRenderingAttachmentInfo vk_att = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = dst_view, + .imageLayout = dst_image_layout, + .loadOp = key->stencil_as_discard ? VK_ATTACHMENT_LOAD_OP_CLEAR : + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + }; + VkRenderingInfo vk_render = { + .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, + .renderArea = { + .offset = { + dst_rect->x0, + dst_rect->y0 + }, + .extent = { + dst_rect->x1 - dst_rect->x0, + dst_rect->y1 - dst_rect->y0 + }, + }, + .layerCount = dst_rect->layer + dst_layer_count, + }; + + if (key->aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + vk_render.colorAttachmentCount = 1; + vk_render.pColorAttachments = &vk_att; + } + if (key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + vk_render.pDepthAttachment = &vk_att; + if (key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + vk_render.pStencilAttachment = &vk_att; + + disp->CmdBeginRendering(vk_command_buffer_to_handle(cmd), &vk_render); + + VkPipeline pipeline; + result = get_blit_pipeline(device, meta, key, + pipeline_layout, &pipeline); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + + if (key->stencil_as_discard) { + for (uint32_t i = 0; i < 8; i++) { + push->stencil_bit = BITFIELD_BIT(i); + disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), + pipeline_layout, + VK_SHADER_STAGE_FRAGMENT_BIT, + 0, sizeof(*push), push); + + disp->CmdSetStencilWriteMask(vk_command_buffer_to_handle(cmd), + VK_STENCIL_FACE_FRONT_AND_BACK, + push->stencil_bit); + + meta->cmd_draw_volume(cmd, meta, dst_rect, dst_layer_count); + } + } else { + disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), + pipeline_layout, + VK_SHADER_STAGE_FRAGMENT_BIT, + 0, sizeof(*push), push); + + meta->cmd_draw_volume(cmd, meta, dst_rect, dst_layer_count); + } + + disp->CmdEndRendering(vk_command_buffer_to_handle(cmd)); + + aspects_left &= ~key->aspects; + } +} + +void +vk_meta_blit_image(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image *src_image, + VkFormat src_format, + VkImageLayout src_image_layout, + struct vk_image *dst_image, + VkFormat dst_format, + VkImageLayout dst_image_layout, + uint32_t region_count, + const VkImageBlit2 *regions, + VkFilter filter) +{ + struct vk_device *device = cmd->base.device; + VkResult result; + + VkSampler sampler; + result = get_blit_sampler(device, meta, filter, &sampler); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + struct vk_meta_blit_key key; + memset(&key, 0, sizeof(key)); + key.key_type = VK_META_OBJECT_KEY_BLIT_PIPELINE; + key.src_samples = src_image->samples; + key.dim = vk_image_sampler_dim(src_image); + key.dst_format = dst_format; + + for (uint32_t r = 0; r < region_count; r++) { + struct vk_meta_blit_push_data push = {0}; + struct vk_meta_rect dst_rect = {0}; + + uint32_t src_level = regions[r].srcSubresource.mipLevel; + VkExtent3D src_extent = vk_image_mip_level_extent(src_image, src_level); + + compute_off_scale(src_extent.width, + regions[r].srcOffsets[0].x, + regions[r].srcOffsets[1].x, + regions[r].dstOffsets[0].x, + regions[r].dstOffsets[1].x, + &dst_rect.x0, &dst_rect.x1, + &push.x_off, &push.x_scale); + compute_off_scale(src_extent.height, + regions[r].srcOffsets[0].y, + regions[r].srcOffsets[1].y, + regions[r].dstOffsets[0].y, + regions[r].dstOffsets[1].y, + &dst_rect.y0, &dst_rect.y1, + &push.y_off, &push.y_scale); + + VkImageSubresourceLayers src_subres = regions[r].srcSubresource; + src_subres.layerCount = + vk_image_subresource_layer_count(src_image, &src_subres); + + VkImageSubresourceLayers dst_subres = regions[r].dstSubresource; + dst_subres.layerCount = + vk_image_subresource_layer_count(dst_image, &dst_subres); + + uint32_t dst_layer_count; + if (src_image->image_type == VK_IMAGE_TYPE_3D) { + uint32_t layer0, layer1; + compute_off_scale(src_extent.depth, + regions[r].srcOffsets[0].z, + regions[r].srcOffsets[1].z, + regions[r].dstOffsets[0].z, + regions[r].dstOffsets[1].z, + &layer0, &layer1, + &push.z_off, &push.z_scale); + dst_rect.layer = layer0; + dst_layer_count = layer1 - layer0; + } else { + assert(src_subres.layerCount == dst_subres.layerCount); + dst_layer_count = dst_subres.layerCount; + push.arr_delta = dst_subres.baseArrayLayer - + src_subres.baseArrayLayer; + } + + do_blit(cmd, meta, + src_image, src_format, src_image_layout, src_subres, + dst_image, dst_format, dst_image_layout, dst_subres, + sampler, &key, &push, &dst_rect, dst_layer_count); + } +} + +void +vk_meta_blit_image2(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkBlitImageInfo2 *blit) +{ + VK_FROM_HANDLE(vk_image, src_image, blit->srcImage); + VK_FROM_HANDLE(vk_image, dst_image, blit->dstImage); + + vk_meta_blit_image(cmd, meta, + src_image, src_image->format, blit->srcImageLayout, + dst_image, dst_image->format, blit->dstImageLayout, + blit->regionCount, blit->pRegions, blit->filter); +} + +void +vk_meta_resolve_image(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image *src_image, + VkFormat src_format, + VkImageLayout src_image_layout, + struct vk_image *dst_image, + VkFormat dst_format, + VkImageLayout dst_image_layout, + uint32_t region_count, + const VkImageResolve2 *regions, + VkResolveModeFlagBits resolve_mode, + VkResolveModeFlagBits stencil_resolve_mode) +{ + struct vk_meta_blit_key key; + memset(&key, 0, sizeof(key)); + key.key_type = VK_META_OBJECT_KEY_BLIT_PIPELINE; + key.dim = vk_image_sampler_dim(src_image); + key.src_samples = src_image->samples; + key.resolve_mode = resolve_mode; + key.stencil_resolve_mode = stencil_resolve_mode; + key.dst_format = dst_format; + + for (uint32_t r = 0; r < region_count; r++) { + struct vk_meta_blit_push_data push = { + .x_off = regions[r].srcOffset.x - regions[r].dstOffset.x, + .y_off = regions[r].srcOffset.y - regions[r].dstOffset.y, + .x_scale = 1, + .y_scale = 1, + }; + struct vk_meta_rect dst_rect = { + .x0 = regions[r].dstOffset.x, + .y0 = regions[r].dstOffset.y, + .x1 = regions[r].dstOffset.x + regions[r].extent.width, + .y1 = regions[r].dstOffset.y + regions[r].extent.height, + }; + + VkImageSubresourceLayers src_subres = regions[r].srcSubresource; + src_subres.layerCount = + vk_image_subresource_layer_count(src_image, &src_subres); + + VkImageSubresourceLayers dst_subres = regions[r].dstSubresource; + dst_subres.layerCount = + vk_image_subresource_layer_count(dst_image, &dst_subres); + + do_blit(cmd, meta, + src_image, src_format, src_image_layout, src_subres, + dst_image, dst_format, dst_image_layout, dst_subres, + VK_NULL_HANDLE, &key, &push, &dst_rect, + dst_subres.layerCount); + } +} + +void +vk_meta_resolve_image2(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkResolveImageInfo2 *resolve) +{ + VK_FROM_HANDLE(vk_image, src_image, resolve->srcImage); + VK_FROM_HANDLE(vk_image, dst_image, resolve->dstImage); + + VkResolveModeFlagBits resolve_mode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + if (vk_format_is_color(src_image->format) && + !vk_format_is_int(src_image->format)) + resolve_mode = VK_RESOLVE_MODE_AVERAGE_BIT; + + vk_meta_resolve_image(cmd, meta, + src_image, src_image->format, resolve->srcImageLayout, + dst_image, dst_image->format, resolve->dstImageLayout, + resolve->regionCount, resolve->pRegions, + resolve_mode, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT); +} + +static void +vk_meta_resolve_attachment(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image_view *src_view, + VkImageLayout src_image_layout, + struct vk_image_view *dst_view, + VkImageLayout dst_image_layout, + VkImageAspectFlags resolve_aspects, + VkResolveModeFlagBits resolve_mode, + VkResolveModeFlagBits stencil_resolve_mode, + VkRect2D area, uint32_t layer_count, + uint32_t view_mask) +{ + VkImageResolve2 region = { + .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2, + .srcSubresource = { + .aspectMask = resolve_aspects, + .mipLevel = src_view->base_mip_level, + }, + .srcOffset = { area.offset.x, area.offset.y, 0}, + .dstSubresource = { + .aspectMask = resolve_aspects, + .mipLevel = dst_view->base_mip_level, + }, + .dstOffset = { area.offset.x, area.offset.y, 0}, + .extent = { area.extent.width, area.extent.height, 1}, + }; + + if (view_mask) { + u_foreach_bit(v, view_mask) { + region.srcSubresource.baseArrayLayer = src_view->base_array_layer + v; + region.srcSubresource.layerCount = 1; + region.dstSubresource.baseArrayLayer = dst_view->base_array_layer + v; + region.dstSubresource.layerCount = 1; + + vk_meta_resolve_image(cmd, meta, + src_view->image, src_view->format, + src_image_layout, + dst_view->image, dst_view->format, + dst_image_layout, + 1, ®ion, resolve_mode, stencil_resolve_mode); + } + } else { + region.srcSubresource.baseArrayLayer = src_view->base_array_layer; + region.srcSubresource.layerCount = layer_count; + region.dstSubresource.baseArrayLayer = dst_view->base_array_layer; + region.dstSubresource.layerCount = layer_count; + + vk_meta_resolve_image(cmd, meta, + src_view->image, src_view->format, + src_image_layout, + dst_view->image, dst_view->format, + dst_image_layout, + 1, ®ion, resolve_mode, stencil_resolve_mode); + } +} + +void +vk_meta_resolve_rendering(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkRenderingInfo *pRenderingInfo) +{ + for (uint32_t c = 0; c < pRenderingInfo->colorAttachmentCount; c++) { + const VkRenderingAttachmentInfo *att = + &pRenderingInfo->pColorAttachments[c]; + if (att->resolveMode == VK_RESOLVE_MODE_NONE) + continue; + + VK_FROM_HANDLE(vk_image_view, view, att->imageView); + VK_FROM_HANDLE(vk_image_view, res_view, att->resolveImageView); + + vk_meta_resolve_attachment(cmd, meta, view, att->imageLayout, + res_view, att->resolveImageLayout, + VK_IMAGE_ASPECT_COLOR_BIT, + att->resolveMode, VK_RESOLVE_MODE_NONE, + pRenderingInfo->renderArea, + pRenderingInfo->layerCount, + pRenderingInfo->viewMask); + } + + const VkRenderingAttachmentInfo *d_att = pRenderingInfo->pDepthAttachment; + if (d_att && d_att->resolveMode == VK_RESOLVE_MODE_NONE) + d_att = NULL; + + const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment; + if (s_att && s_att->resolveMode == VK_RESOLVE_MODE_NONE) + s_att = NULL; + + if (s_att != NULL || d_att != NULL) { + if (s_att != NULL && d_att != NULL && + s_att->imageView == d_att->imageView && + s_att->resolveImageView == d_att->resolveImageView) { + VK_FROM_HANDLE(vk_image_view, view, d_att->imageView); + VK_FROM_HANDLE(vk_image_view, res_view, d_att->resolveImageView); + + vk_meta_resolve_attachment(cmd, meta, view, d_att->imageLayout, + res_view, d_att->resolveImageLayout, + VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT, + d_att->resolveMode, s_att->resolveMode, + pRenderingInfo->renderArea, + pRenderingInfo->layerCount, + pRenderingInfo->viewMask); + } else { + if (d_att != NULL) { + VK_FROM_HANDLE(vk_image_view, view, d_att->imageView); + VK_FROM_HANDLE(vk_image_view, res_view, d_att->resolveImageView); + + vk_meta_resolve_attachment(cmd, meta, view, d_att->imageLayout, + res_view, d_att->resolveImageLayout, + VK_IMAGE_ASPECT_DEPTH_BIT, + d_att->resolveMode, VK_RESOLVE_MODE_NONE, + pRenderingInfo->renderArea, + pRenderingInfo->layerCount, + pRenderingInfo->viewMask); + } + + if (s_att != NULL) { + VK_FROM_HANDLE(vk_image_view, view, s_att->imageView); + VK_FROM_HANDLE(vk_image_view, res_view, s_att->resolveImageView); + + vk_meta_resolve_attachment(cmd, meta, view, s_att->imageLayout, + res_view, s_att->resolveImageLayout, + VK_IMAGE_ASPECT_STENCIL_BIT, + VK_RESOLVE_MODE_NONE, s_att->resolveMode, + pRenderingInfo->renderArea, + pRenderingInfo->layerCount, + pRenderingInfo->viewMask); + } + } + } +} diff --git a/src/vulkan/runtime/vk_meta_clear.c b/src/vulkan/runtime/vk_meta_clear.c new file mode 100644 index 00000000000..638db130403 --- /dev/null +++ b/src/vulkan/runtime/vk_meta_clear.c @@ -0,0 +1,609 @@ +/* + * Copyright © 2022 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_meta_private.h" + +#include "vk_command_buffer.h" +#include "vk_device.h" +#include "vk_format.h" +#include "vk_image.h" +#include "vk_pipeline.h" +#include "vk_util.h" + +#include "nir_builder.h" + +struct vk_meta_clear_key { + enum vk_meta_object_key_type key_type; + struct vk_meta_rendering_info render; + uint8_t color_attachments_cleared; + bool clear_depth; + bool clear_stencil; +}; + +struct vk_meta_clear_push_data { + VkClearColorValue color_values[MESA_VK_MAX_COLOR_ATTACHMENTS]; +}; + +static nir_shader * +build_clear_shader(const struct vk_meta_clear_key *key) +{ + nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, + NULL, "vk-meta-clear"); + nir_builder *b = &build; + + struct glsl_struct_field push_field = { + .type = glsl_array_type(glsl_vec4_type(), + MESA_VK_MAX_COLOR_ATTACHMENTS, + 16 /* explicit_stride */), + .name = "color_values", + }; + const struct glsl_type *push_iface_type = + glsl_interface_type(&push_field, 1, GLSL_INTERFACE_PACKING_STD140, + false /* row_major */, "push"); + + nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const, + push_iface_type, "push"); + nir_deref_instr *push_arr = + nir_build_deref_struct(b, nir_build_deref_var(b, push), 0); + + u_foreach_bit(a, key->color_attachments_cleared) { + nir_def *color_value = + nir_load_deref(b, nir_build_deref_array_imm(b, push_arr, a)); + + const struct glsl_type *out_type; + if (vk_format_is_sint(key->render.color_attachment_formats[a])) + out_type = glsl_ivec4_type(); + else if (vk_format_is_uint(key->render.color_attachment_formats[a])) + out_type = glsl_uvec4_type(); + else + out_type = glsl_vec4_type(); + + char out_name[8]; + snprintf(out_name, sizeof(out_name), "color%u", a); + + nir_variable *out = nir_variable_create(b->shader, nir_var_shader_out, + out_type, out_name); + out->data.location = FRAG_RESULT_DATA0 + a; + + nir_store_var(b, out, color_value, 0xf); + } + + return b->shader; +} + +static VkResult +get_clear_pipeline_layout(struct vk_device *device, + struct vk_meta_device *meta, + VkPipelineLayout *layout_out) +{ + const char key[] = "vk-meta-clear-pipeline-layout"; + + VkPipelineLayout from_cache = + vk_meta_lookup_pipeline_layout(meta, key, sizeof(key)); + if (from_cache != VK_NULL_HANDLE) { + *layout_out = from_cache; + return VK_SUCCESS; + } + + const VkPushConstantRange push_range = { + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .offset = 0, + .size = sizeof(struct vk_meta_clear_push_data), + }; + + const VkPipelineLayoutCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_range, + }; + + return vk_meta_create_pipeline_layout(device, meta, &info, + key, sizeof(key), layout_out); +} + +static VkResult +get_clear_pipeline(struct vk_device *device, + struct vk_meta_device *meta, + const struct vk_meta_clear_key *key, + VkPipelineLayout layout, + VkPipeline *pipeline_out) +{ + VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key, sizeof(*key)); + if (from_cache != VK_NULL_HANDLE) { + *pipeline_out = from_cache; + return VK_SUCCESS; + } + + const VkPipelineShaderStageNirCreateInfoMESA fs_nir_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA, + .nir = build_clear_shader(key), + }; + const VkPipelineShaderStageCreateInfo fs_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &fs_nir_info, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .pName = "main", + }; + + VkPipelineDepthStencilStateCreateInfo ds_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + }; + const VkDynamicState dyn_stencil_ref = VK_DYNAMIC_STATE_STENCIL_REFERENCE; + VkPipelineDynamicStateCreateInfo dyn_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + }; + if (key->clear_depth) { + ds_info.depthTestEnable = VK_TRUE; + ds_info.depthWriteEnable = VK_TRUE; + ds_info.depthCompareOp = VK_COMPARE_OP_ALWAYS; + } + if (key->clear_stencil) { + ds_info.stencilTestEnable = VK_TRUE; + ds_info.front.compareOp = VK_COMPARE_OP_ALWAYS; + ds_info.front.passOp = VK_STENCIL_OP_REPLACE; + ds_info.front.compareMask = ~0u; + ds_info.front.writeMask = ~0u; + ds_info.back = ds_info.front; + dyn_info.dynamicStateCount = 1; + dyn_info.pDynamicStates = &dyn_stencil_ref; + } + + const VkGraphicsPipelineCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 1, + .pStages = &fs_info, + .pDepthStencilState = &ds_info, + .pDynamicState = &dyn_info, + .layout = layout, + }; + + VkResult result = vk_meta_create_graphics_pipeline(device, meta, &info, + &key->render, + key, sizeof(*key), + pipeline_out); + ralloc_free(fs_nir_info.nir); + + return result; +} + +static int +vk_meta_rect_cmp_layer(const void *_a, const void *_b) +{ + const struct vk_meta_rect *a = _a, *b = _b; + assert(a->layer <= INT_MAX && b->layer <= INT_MAX); + return a->layer - b->layer; +} + +void +vk_meta_clear_attachments(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const struct vk_meta_rendering_info *render, + uint32_t attachment_count, + const VkClearAttachment *attachments, + uint32_t clear_rect_count, + const VkClearRect *clear_rects) +{ + struct vk_device *device = cmd->base.device; + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkResult result; + + struct vk_meta_clear_key key; + memset(&key, 0, sizeof(key)); + key.key_type = VK_META_OBJECT_KEY_CLEAR_PIPELINE; + vk_meta_rendering_info_copy(&key.render, render); + + struct vk_meta_clear_push_data push = {0}; + float depth_value = 1.0f; + uint32_t stencil_value = 0; + + for (uint32_t i = 0; i < attachment_count; i++) { + if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + const uint32_t a = attachments[i].colorAttachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + + assert(a < MESA_VK_MAX_COLOR_ATTACHMENTS); + if (render->color_attachment_formats[a] == VK_FORMAT_UNDEFINED) + continue; + + key.color_attachments_cleared |= BITFIELD_BIT(a); + push.color_values[a] = attachments[i].clearValue.color; + } + if (attachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { + key.clear_depth = true; + depth_value = attachments[i].clearValue.depthStencil.depth; + } + if (attachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + key.clear_stencil = true; + stencil_value = attachments[i].clearValue.depthStencil.stencil; + } + } + + VkPipelineLayout layout; + result = get_clear_pipeline_layout(device, meta, &layout); + if (unlikely(result != VK_SUCCESS)) { + /* TODO: Report error */ + return; + } + + VkPipeline pipeline; + result = get_clear_pipeline(device, meta, &key, layout, &pipeline); + if (unlikely(result != VK_SUCCESS)) { + /* TODO: Report error */ + return; + } + + disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + + if (key.clear_stencil) { + disp->CmdSetStencilReference(vk_command_buffer_to_handle(cmd), + VK_STENCIL_FACE_FRONT_AND_BACK, + stencil_value); + } + + disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), + layout, VK_SHADER_STAGE_FRAGMENT_BIT, + 0, sizeof(push), &push); + + if (render->view_mask == 0) { + if (clear_rect_count == 1 && clear_rects[0].layerCount > 1) { + struct vk_meta_rect rect = { + .x0 = clear_rects[0].rect.offset.x, + .x1 = clear_rects[0].rect.offset.x + + clear_rects[0].rect.extent.width, + .y0 = clear_rects[0].rect.offset.y, + .y1 = clear_rects[0].rect.offset.y + + clear_rects[0].rect.extent.height, + .z = depth_value, + .layer = clear_rects[0].baseArrayLayer, + }; + + meta->cmd_draw_volume(cmd, meta, &rect, clear_rects[0].layerCount); + } else { + uint32_t max_rect_count = 0; + for (uint32_t r = 0; r < clear_rect_count; r++) + max_rect_count += clear_rects[r].layerCount; + + STACK_ARRAY(struct vk_meta_rect, rects, max_rect_count); + + uint32_t rect_count = 0; + for (uint32_t r = 0; r < clear_rect_count; r++) { + struct vk_meta_rect rect = { + .x0 = clear_rects[r].rect.offset.x, + .x1 = clear_rects[r].rect.offset.x + + clear_rects[r].rect.extent.width, + .y0 = clear_rects[r].rect.offset.y, + .y1 = clear_rects[r].rect.offset.y + + clear_rects[r].rect.extent.height, + .z = depth_value, + }; + for (uint32_t a = 0; a < clear_rects[r].layerCount; a++) { + rect.layer = clear_rects[r].baseArrayLayer + a; + rects[rect_count++] = rect; + } + } + assert(rect_count <= max_rect_count); + + /* If we have more than one clear rect, sort by layer in the hopes + * the hardware more or less does all the clears for one layer before + * moving on to the next, thus reducing cache thrashing. + */ + qsort(rects, rect_count, sizeof(*rects), vk_meta_rect_cmp_layer); + + meta->cmd_draw_rects(cmd, meta, rect_count, rects); + + STACK_ARRAY_FINISH(rects); + } + } else { + const uint32_t rect_count = clear_rect_count * + util_bitcount(render->view_mask); + STACK_ARRAY(struct vk_meta_rect, rects, rect_count); + + uint32_t rect_idx = 0; + u_foreach_bit(v, render->view_mask) { + for (uint32_t r = 0; r < clear_rect_count; r++) { + assert(clear_rects[r].baseArrayLayer == 0); + assert(clear_rects[r].layerCount == 1); + rects[rect_idx++] = (struct vk_meta_rect) { + .x0 = clear_rects[r].rect.offset.x, + .x1 = clear_rects[r].rect.offset.x + + clear_rects[r].rect.extent.width, + .y0 = clear_rects[r].rect.offset.y, + .y1 = clear_rects[r].rect.offset.y + + clear_rects[r].rect.extent.height, + .z = depth_value, + .layer = v, + }; + } + } + assert(rect_idx == rect_count); + + meta->cmd_draw_rects(cmd, meta, rect_count, rects); + + STACK_ARRAY_FINISH(rects); + } +} + +void +vk_meta_clear_rendering(struct vk_meta_device *meta, + struct vk_command_buffer *cmd, + const VkRenderingInfo *pRenderingInfo) +{ + assert(!(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)); + + struct vk_meta_rendering_info render = { + .view_mask = pRenderingInfo->viewMask, + .color_attachment_count = pRenderingInfo->colorAttachmentCount, + }; + + uint32_t clear_count = 0; + VkClearAttachment clear_att[MESA_VK_MAX_COLOR_ATTACHMENTS + 1]; + for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) { + const VkRenderingAttachmentInfo *att_info = + &pRenderingInfo->pColorAttachments[i]; + if (att_info->imageView == VK_NULL_HANDLE || + att_info->loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR) + continue; + + VK_FROM_HANDLE(vk_image_view, iview, att_info->imageView); + render.color_attachment_formats[i] = iview->format; + assert(render.samples == 0 || render.samples == iview->image->samples); + render.samples = MAX2(render.samples, iview->image->samples); + + clear_att[clear_count++] = (VkClearAttachment) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = i, + .clearValue = att_info->clearValue, + }; + } + + /* One more for depth/stencil, if needed */ + clear_att[clear_count] = (VkClearAttachment) { .aspectMask = 0, }; + + const VkRenderingAttachmentInfo *d_att_info = + pRenderingInfo->pDepthAttachment; + if (d_att_info != NULL && d_att_info->imageView != VK_NULL_HANDLE && + d_att_info->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { + VK_FROM_HANDLE(vk_image_view, iview, d_att_info->imageView); + render.depth_attachment_format = iview->format; + render.samples = MAX2(render.samples, iview->image->samples); + + clear_att[clear_count].aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; + clear_att[clear_count].clearValue.depthStencil.depth = + d_att_info->clearValue.depthStencil.depth; + } + + const VkRenderingAttachmentInfo *s_att_info = + pRenderingInfo->pStencilAttachment; + if (s_att_info != NULL && s_att_info->imageView != VK_NULL_HANDLE && + s_att_info->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { + VK_FROM_HANDLE(vk_image_view, iview, s_att_info->imageView); + render.stencil_attachment_format = iview->format; + render.samples = MAX2(render.samples, iview->image->samples); + + clear_att[clear_count].aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + clear_att[clear_count].clearValue.depthStencil.stencil = + s_att_info->clearValue.depthStencil.depth; + } + if (clear_att[clear_count].aspectMask != 0) + clear_count++; + + if (clear_count > 0) { + const VkClearRect clear_rect = { + .rect = pRenderingInfo->renderArea, + .baseArrayLayer = 0, + .layerCount = pRenderingInfo->viewMask ? + 1 : pRenderingInfo->layerCount, + }; + vk_meta_clear_attachments(cmd, meta, &render, + clear_count, clear_att, + 1, &clear_rect); + } +} + +static void +clear_image_level_layers(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image *image, + VkImageLayout image_layout, + VkFormat format, + const VkClearValue *clear_value, + VkImageAspectFlags aspects, + uint32_t level, + uint32_t base_array_layer, + uint32_t layer_count) +{ + struct vk_device *device = cmd->base.device; + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkCommandBuffer _cmd = vk_command_buffer_to_handle(cmd); + VkResult result; + + const VkImageViewCreateInfo view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = vk_image_to_handle(image), + .viewType = vk_image_render_view_type(image, layer_count), + .format = format, + .subresourceRange = { + .aspectMask = aspects, + .baseMipLevel = level, + .levelCount = 1, + .baseArrayLayer = base_array_layer, + .layerCount = layer_count, + } + }; + + VkImageView image_view; + result = vk_meta_create_image_view(cmd, meta, &view_info, &image_view); + if (unlikely(result != VK_SUCCESS)) { + /* TODO: Report error */ + return; + } + + const VkExtent3D level_extent = vk_image_mip_level_extent(image, level); + + VkRenderingAttachmentInfo vk_att = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = image_view, + .imageLayout = image_layout, + .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + }; + VkRenderingInfo vk_render = { + .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, + .renderArea = { + .offset = { 0, 0 }, + .extent = { level_extent.width, level_extent.height }, + }, + .layerCount = layer_count, + }; + struct vk_meta_rendering_info meta_render = { + .samples = image->samples, + }; + + if (image->aspects == VK_IMAGE_ASPECT_COLOR_BIT) { + vk_render.colorAttachmentCount = 1; + vk_render.pColorAttachments = &vk_att; + meta_render.color_attachment_count = 1; + meta_render.color_attachment_formats[0] = format; + } + + if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + vk_render.pDepthAttachment = &vk_att; + meta_render.depth_attachment_format = format; + } + + if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + vk_render.pStencilAttachment = &vk_att; + meta_render.stencil_attachment_format = format; + } + + const VkClearAttachment clear_att = { + .aspectMask = aspects, + .colorAttachment = 0, + .clearValue = *clear_value, + }; + + const VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { level_extent.width, level_extent.height }, + }, + .baseArrayLayer = 0, + .layerCount = layer_count, + }; + + disp->CmdBeginRendering(_cmd, &vk_render); + + vk_meta_clear_attachments(cmd, meta, &meta_render, + 1, &clear_att, 1, &clear_rect); + + disp->CmdEndRendering(_cmd); +} + +static void +clear_image_level(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image *image, + VkImageLayout image_layout, + VkFormat format, + const VkClearValue *clear_value, + uint32_t level, + const VkImageSubresourceRange *range) +{ + const VkExtent3D level_extent = vk_image_mip_level_extent(image, level); + + uint32_t base_array_layer, layer_count; + if (image->image_type == VK_IMAGE_TYPE_3D) { + base_array_layer = 0; + layer_count = level_extent.depth; + } else { + base_array_layer = range->baseArrayLayer; + layer_count = vk_image_subresource_layer_count(image, range); + } + + if (layer_count > 1 && !meta->use_layered_rendering) { + for (uint32_t a = 0; a < layer_count; a++) { + clear_image_level_layers(cmd, meta, image, image_layout, + format, clear_value, + range->aspectMask, level, + base_array_layer + a, 1); + } + } else { + clear_image_level_layers(cmd, meta, image, image_layout, + format, clear_value, + range->aspectMask, level, + base_array_layer, layer_count); + } +} + +void +vk_meta_clear_color_image(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image *image, + VkImageLayout image_layout, + VkFormat format, + const VkClearColorValue *color, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + const VkClearValue clear_value = { + .color = *color, + }; + for (uint32_t r = 0; r < range_count; r++) { + const uint32_t level_count = + vk_image_subresource_level_count(image, &ranges[r]); + + for (uint32_t l = 0; l < level_count; l++) { + clear_image_level(cmd, meta, image, image_layout, + format, &clear_value, + ranges[r].baseMipLevel + l, + &ranges[r]); + } + } +} + +void +vk_meta_clear_depth_stencil_image(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + struct vk_image *image, + VkImageLayout image_layout, + const VkClearDepthStencilValue *depth_stencil, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + const VkClearValue clear_value = { + .depthStencil = *depth_stencil, + }; + for (uint32_t r = 0; r < range_count; r++) { + const uint32_t level_count = + vk_image_subresource_level_count(image, &ranges[r]); + + for (uint32_t l = 0; l < level_count; l++) { + clear_image_level(cmd, meta, image, image_layout, + image->format, &clear_value, + ranges[r].baseMipLevel + l, + &ranges[r]); + } + } +} diff --git a/src/vulkan/runtime/vk_meta_draw_rects.c b/src/vulkan/runtime/vk_meta_draw_rects.c new file mode 100644 index 00000000000..fd76e582b97 --- /dev/null +++ b/src/vulkan/runtime/vk_meta_draw_rects.c @@ -0,0 +1,337 @@ +/* + * Copyright © 2022 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_meta_private.h" + +#include "vk_command_buffer.h" +#include "vk_command_pool.h" +#include "vk_device.h" + +#include "nir_builder.h" + +const VkPipelineVertexInputStateCreateInfo vk_meta_draw_rects_vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &(const VkVertexInputBindingDescription) { + .binding = 0, + .stride = 4 * sizeof(uint32_t), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }, + .vertexAttributeDescriptionCount = 1, + .pVertexAttributeDescriptions = &(const VkVertexInputAttributeDescription) { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0, + }, +}; + +const VkPipelineInputAssemblyStateCreateInfo vk_meta_draw_rects_ia_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA, + .primitiveRestartEnable = VK_FALSE, +}; + +const VkPipelineViewportStateCreateInfo vk_meta_draw_rects_vs_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, +}; + +nir_shader * +vk_meta_draw_rects_vs_nir(struct vk_meta_device *device, bool use_gs) +{ + nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, + "vk-meta-draw-rects-vs"); + nir_builder *b = &build; + + nir_variable *in = nir_variable_create(b->shader, nir_var_shader_in, + glsl_uvec4_type(), "vtx_in"); + in->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *pos = + nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(), + use_gs ? "pos_out" : "gl_Position"); + pos->data.location = use_gs ? VARYING_SLOT_VAR0 : VARYING_SLOT_POS; + + nir_variable *layer = + nir_variable_create(b->shader, nir_var_shader_out, glsl_int_type(), + use_gs ? "layer_out" : "gl_Layer"); + layer->data.location = use_gs ? VARYING_SLOT_VAR1 : VARYING_SLOT_LAYER; + + nir_def *vtx = nir_load_var(b, in); + nir_store_var(b, pos, nir_vec4(b, nir_channel(b, vtx, 0), + nir_channel(b, vtx, 1), + nir_channel(b, vtx, 2), + nir_imm_float(b, 1)), + 0xf); + + nir_store_var(b, layer, nir_iadd(b, nir_load_instance_id(b), + nir_channel(b, vtx, 3)), + 0x1); + + return b->shader; +} + +nir_shader * +vk_meta_draw_rects_gs_nir(struct vk_meta_device *device) +{ + nir_builder build = + nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, NULL, + "vk-meta-draw-rects-gs"); + nir_builder *b = &build; + + nir_variable *pos_in = + nir_variable_create(b->shader, nir_var_shader_in, + glsl_array_type(glsl_vec4_type(), 3, 0), "pos_in"); + pos_in->data.location = VARYING_SLOT_VAR0; + + nir_variable *layer_in = + nir_variable_create(b->shader, nir_var_shader_in, + glsl_array_type(glsl_int_type(), 3, 0), "layer_in"); + layer_in->data.location = VARYING_SLOT_VAR1; + + nir_variable *pos_out = + nir_variable_create(b->shader, nir_var_shader_out, + glsl_vec4_type(), "gl_Position"); + pos_out->data.location = VARYING_SLOT_POS; + + nir_variable *layer_out = + nir_variable_create(b->shader, nir_var_shader_out, + glsl_int_type(), "gl_Layer"); + layer_out->data.location = VARYING_SLOT_LAYER; + + for (unsigned i = 0; i < 3; i++) { + nir_deref_instr *pos_in_deref = + nir_build_deref_array_imm(b, nir_build_deref_var(b, pos_in), i); + nir_deref_instr *layer_in_deref = + nir_build_deref_array_imm(b, nir_build_deref_var(b, layer_in), i); + + nir_store_var(b, pos_out, nir_load_deref(b, pos_in_deref), 0xf); + nir_store_var(b, layer_out, nir_load_deref(b, layer_in_deref), 1); + nir_emit_vertex(b); + } + + nir_end_primitive(b); + + struct shader_info *info = &build.shader->info; + info->gs.input_primitive = MESA_PRIM_TRIANGLES; + info->gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP; + info->gs.vertices_in = 3; + info->gs.vertices_out = 3; + info->gs.invocations = 1; + info->gs.active_stream_mask = 1; + + return b->shader; +} + +struct vertex { + float x, y, z; + uint32_t layer; +}; + +static void +setup_viewport_scissor(struct vk_command_buffer *cmd, + uint32_t rect_count, + const struct vk_meta_rect *rects, + float *x_scale, float *y_scale) +{ + const struct vk_device_dispatch_table *disp = + &cmd->base.device->dispatch_table; + VkCommandBuffer _cmd = vk_command_buffer_to_handle(cmd); + + assert(rects[0].x0 < rects[0].x1 && rects[0].y0 < rects[0].y1); + uint32_t xbits = rects[0].x1 - 1, ybits = rects[0].y1 - 1; + float zmin = rects[0].z, zmax = rects[0].z; + for (uint32_t r = 1; r < rect_count; r++) { + assert(rects[r].x0 < rects[r].x1 && rects[r].y0 < rects[r].y1); + xbits |= rects[r].x1 - 1; + ybits |= rects[r].y1 - 1; + zmin = fminf(zmin, rects[r].z); + zmax = fminf(zmax, rects[r].z); + } + + /* Annoyingly, we don't actually know the render area. We assume that all + * our rects are inside the render area. We further assume the maximum + * image and/or viewport size is a power of two. This means we can round + * up to a power of two without going outside any maximums. Using a power + * of two will ensure we don't lose precision when scaling coordinates. + */ + int xmax_log2 = 1 + util_logbase2(xbits); + int ymax_log2 = 1 + util_logbase2(ybits); + + assert(xmax_log2 >= 0 && xmax_log2 <= 31); + assert(ymax_log2 >= 0 && ymax_log2 <= 31); + + /* We don't care about precise bounds on Z, only that it's inside [0, 1] if + * the implementaiton only supports [0, 1]. + */ + if (zmin >= 0.0f && zmax <= 1.0f) { + zmin = 0.0f; + zmax = 1.0f; + } + + VkViewport viewport = { + .x = 0, + .y = 0, + .width = ldexpf(1.0, xmax_log2), + .height = ldexpf(1.0, ymax_log2), + .minDepth = zmin, + .maxDepth = zmax, + }; + disp->CmdSetViewport(_cmd, 0, 1, &viewport); + + VkRect2D scissor = { + .offset = { 0, 0 }, + .extent = { 1u << xmax_log2, 1u << ymax_log2 }, + }; + disp->CmdSetScissor(_cmd, 0, 1, &scissor); + + /* Scaling factors */ + *x_scale = ldexpf(2.0, -xmax_log2); + *y_scale = ldexpf(2.0, -ymax_log2); +} + +static const uint32_t rect_vb_size_B = 6 * 4 * sizeof(float); + +static VkResult +create_vertex_buffer(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + float x_scale, float y_scale, + uint32_t rect_count, + const struct vk_meta_rect *rects, + VkBuffer *buffer_out) +{ + VkResult result; + + const VkBufferCreateInfo vtx_buffer_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = rect_count * rect_vb_size_B, + .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .queueFamilyIndexCount = 1, + .pQueueFamilyIndices = &cmd->pool->queue_family_index, + }; + + result = vk_meta_create_buffer(cmd, meta, &vtx_buffer_info, buffer_out); + if (unlikely(result != VK_SUCCESS)) + return result; + + void *map; + result = meta->cmd_bind_map_buffer(cmd, meta, *buffer_out, &map); + if (unlikely(result != VK_SUCCESS)) + return result; + + for (uint32_t r = 0; r < rect_count; r++) { + float x0 = rects[r].x0 * x_scale - 1.0f; + float y0 = rects[r].y0 * y_scale - 1.0f; + float x1 = rects[r].x1 * x_scale - 1.0f; + float y1 = rects[r].y1 * y_scale - 1.0f; + float z = rects[r].z; + uint32_t w = rects[r].layer; + + struct vertex rect_vb_data[6] = { + { x0, y1, z, w }, + { x0, y0, z, w }, + { x1, y1, z, w }, + + { x1, y0, z, w }, + { x1, y1, z, w }, + { x0, y0, z, w }, + }; + assert(sizeof(rect_vb_data) == rect_vb_size_B); + memcpy((char *)map + r * rect_vb_size_B, rect_vb_data, rect_vb_size_B); + } + + return VK_SUCCESS; +} + +void +vk_meta_draw_volume(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const struct vk_meta_rect *rect, + uint32_t layer_count) +{ + const struct vk_device_dispatch_table *disp = + &cmd->base.device->dispatch_table; + VkCommandBuffer _cmd = vk_command_buffer_to_handle(cmd); + + float x_scale, y_scale; + setup_viewport_scissor(cmd, 1, rect, &x_scale, &y_scale); + + VkBuffer vtx_buffer; + VkResult result = create_vertex_buffer(cmd, meta, x_scale, y_scale, + 1, rect, &vtx_buffer); + if (unlikely(result != VK_SUCCESS)) { + /* TODO: Report error */ + return; + } + + const VkDeviceSize zero = 0; + disp->CmdBindVertexBuffers(_cmd, 0, 1, &vtx_buffer, &zero); + + disp->CmdDraw(_cmd, 6, layer_count, 0, 0); +} + +void +vk_meta_draw_rects(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + uint32_t rect_count, + const struct vk_meta_rect *rects) +{ + const struct vk_device_dispatch_table *disp = + &cmd->base.device->dispatch_table; + VkCommandBuffer _cmd = vk_command_buffer_to_handle(cmd); + + /* Two triangles with VK_FORMAT_R16G16_UINT */ + const uint32_t rect_vb_size_B = 6 * 3 * sizeof(float); + const uint32_t rects_per_draw = + meta->max_bind_map_buffer_size_B / rect_vb_size_B; + + if (rect_count == 0) + return; + + float x_scale, y_scale; + setup_viewport_scissor(cmd, rect_count, rects, &x_scale, &y_scale); + + uint32_t next_rect = 0; + while (next_rect < rect_count) { + const uint32_t count = MIN2(rects_per_draw, rect_count - next_rect); + + VkBuffer vtx_buffer; + VkResult result = create_vertex_buffer(cmd, meta, x_scale, y_scale, + count, &rects[next_rect], + &vtx_buffer); + if (unlikely(result != VK_SUCCESS)) { + /* TODO: Report error */ + return; + } + + const VkDeviceSize zero = 0; + disp->CmdBindVertexBuffers(_cmd, 0, 1, &vtx_buffer, &zero); + + disp->CmdDraw(_cmd, 6 * count, 1, 0, 0); + + next_rect += count; + } + assert(next_rect == rect_count); +} diff --git a/src/vulkan/runtime/vk_meta_private.h b/src/vulkan/runtime/vk_meta_private.h new file mode 100644 index 00000000000..a8b2a97d91b --- /dev/null +++ b/src/vulkan/runtime/vk_meta_private.h @@ -0,0 +1,87 @@ +/* + * Copyright © 2022 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_META_PRIVATE_H +#define VK_META_PRIVATE_H + +#include "vk_image.h" +#include "vk_meta.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const VkPipelineVertexInputStateCreateInfo vk_meta_draw_rects_vi_state; +extern const VkPipelineInputAssemblyStateCreateInfo vk_meta_draw_rects_ia_state; +extern const VkPipelineViewportStateCreateInfo vk_meta_draw_rects_vs_state; + +struct nir_shader * +vk_meta_draw_rects_vs_nir(struct vk_meta_device *device, bool use_gs); + +struct nir_shader * +vk_meta_draw_rects_gs_nir(struct vk_meta_device *device); + +static inline void +vk_meta_rendering_info_copy(struct vk_meta_rendering_info *dst, + const struct vk_meta_rendering_info *src) +{ + dst->view_mask = src->view_mask; + dst->samples = src->samples; + dst->color_attachment_count = src->color_attachment_count; + for (uint32_t a = 0; a < src->color_attachment_count; a++) + dst->color_attachment_formats[a] = src->color_attachment_formats[a]; + dst->depth_attachment_format = src->depth_attachment_format; + dst->stencil_attachment_format = src->stencil_attachment_format; +} + +static inline VkImageViewType +vk_image_sampled_view_type(const struct vk_image *image) +{ + switch (image->image_type) { + case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D; + default: unreachable("Invalid image type"); + } +} + +static inline VkImageViewType +vk_image_render_view_type(const struct vk_image *image, uint32_t layer_count) +{ + switch (image->image_type) { + case VK_IMAGE_TYPE_1D: + return layer_count == 1 ? VK_IMAGE_VIEW_TYPE_1D : + VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case VK_IMAGE_TYPE_2D: + case VK_IMAGE_TYPE_3D: + return layer_count == 1 ? VK_IMAGE_VIEW_TYPE_2D : + VK_IMAGE_VIEW_TYPE_2D_ARRAY; + default: + unreachable("Invalid image type"); + } +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_META_PRIVATE_H */ diff --git a/src/vulkan/runtime/vk_nir.c b/src/vulkan/runtime/vk_nir.c new file mode 100644 index 00000000000..c36d38b9634 --- /dev/null +++ b/src/vulkan/runtime/vk_nir.c @@ -0,0 +1,203 @@ +/* + * Copyright © 2015 Intel Corporation + * Copyright © 2022 Collabora, LTD + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_nir.h" + +#include "compiler/nir/nir_xfb_info.h" +#include "compiler/spirv/nir_spirv.h" +#include "vk_log.h" +#include "vk_util.h" + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +uint32_t +vk_spirv_version(const uint32_t *spirv_data, size_t spirv_size_B) +{ + assert(spirv_size_B >= 8); + assert(spirv_data[0] == SPIR_V_MAGIC_NUMBER); + return spirv_data[1]; +} + +static void +spirv_nir_debug(void *private_data, + enum nir_spirv_debug_level level, + size_t spirv_offset, + const char *message) +{ + const struct vk_object_base *log_obj = private_data; + + switch (level) { + case NIR_SPIRV_DEBUG_LEVEL_INFO: + //vk_logi(VK_LOG_OBJS(log_obj), "SPIR-V offset %lu: %s", + // (unsigned long) spirv_offset, message); + break; + case NIR_SPIRV_DEBUG_LEVEL_WARNING: + vk_logw(VK_LOG_OBJS(log_obj), "SPIR-V offset %lu: %s", + (unsigned long) spirv_offset, message); + break; + case NIR_SPIRV_DEBUG_LEVEL_ERROR: + vk_loge(VK_LOG_OBJS(log_obj), "SPIR-V offset %lu: %s", + (unsigned long) spirv_offset, message); + break; + default: + break; + } +} + +bool +nir_vk_is_not_xfb_output(nir_variable *var, void *data) +{ + if (var->data.mode != nir_var_shader_out) + return true; + + /* From the Vulkan 1.3.259 spec: + * + * VUID-StandaloneSpirv-Offset-04716 + * + * "Only variables or block members in the output interface decorated + * with Offset can be captured for transform feedback, and those + * variables or block members must also be decorated with XfbBuffer + * and XfbStride, or inherit XfbBuffer and XfbStride decorations from + * a block containing them" + * + * glslang generates gl_PerVertex builtins when they are not declared, + * enabled XFB should not prevent them from being DCE'd. + * + * The logic should match nir_gather_xfb_info_with_varyings + */ + + if (!var->data.explicit_xfb_buffer) + return true; + + bool is_array_block = var->interface_type != NULL && + glsl_type_is_array(var->type) && + glsl_without_array(var->type) == var->interface_type; + + if (!is_array_block) { + return !var->data.explicit_offset; + } else { + /* For array of blocks we have to check each element */ + unsigned aoa_size = glsl_get_aoa_size(var->type); + const struct glsl_type *itype = var->interface_type; + unsigned nfields = glsl_get_length(itype); + for (unsigned b = 0; b < aoa_size; b++) { + for (unsigned f = 0; f < nfields; f++) { + if (glsl_get_struct_field_offset(itype, f) >= 0) + return false; + } + } + + return true; + } +} + +nir_shader * +vk_spirv_to_nir(struct vk_device *device, + const uint32_t *spirv_data, size_t spirv_size_B, + gl_shader_stage stage, const char *entrypoint_name, + enum gl_subgroup_size subgroup_size, + const VkSpecializationInfo *spec_info, + const struct spirv_to_nir_options *spirv_options, + const struct nir_shader_compiler_options *nir_options, + bool internal, + void *mem_ctx) +{ + assert(spirv_size_B >= 4 && spirv_size_B % 4 == 0); + assert(spirv_data[0] == SPIR_V_MAGIC_NUMBER); + + struct spirv_to_nir_options spirv_options_local = *spirv_options; + spirv_options_local.debug.func = spirv_nir_debug; + spirv_options_local.debug.private_data = (void *)device; + spirv_options_local.subgroup_size = subgroup_size; + + uint32_t num_spec_entries = 0; + struct nir_spirv_specialization *spec_entries = + vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries); + + nir_shader *nir = spirv_to_nir(spirv_data, spirv_size_B / 4, + spec_entries, num_spec_entries, + stage, entrypoint_name, + &spirv_options_local, nir_options); + free(spec_entries); + + if (nir == NULL) + return NULL; + + assert(nir->info.stage == stage); + nir_validate_shader(nir, "after spirv_to_nir"); + nir_validate_ssa_dominance(nir, "after spirv_to_nir"); + if (mem_ctx != NULL) + ralloc_steal(mem_ctx, nir); + + nir->info.internal = internal; + + /* We have to lower away local constant initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS_V(nir, nir_lower_returns); + NIR_PASS_V(nir, nir_inline_functions); + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_deref); + + /* Pick off the single entrypoint that we want */ + nir_remove_non_entrypoints(nir); + + /* Now that we've deleted all but the main function, we can go ahead and + * lower the rest of the constant initializers. We do this here so that + * nir_remove_dead_variables and split_per_member_structs below see the + * corresponding stores. + */ + NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); + + /* Split member structs. We do this before lower_io_to_temporaries so that + * it doesn't lower system values to temporaries by accident. + */ + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_split_per_member_structs); + + nir_remove_dead_variables_options dead_vars_opts = { + .can_remove_var = nir_vk_is_not_xfb_output, + }; + NIR_PASS_V(nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out | nir_var_system_value | + nir_var_shader_call_data | nir_var_ray_hit_attrib, + &dead_vars_opts); + + /* This needs to happen after remove_dead_vars because GLSLang likes to + * insert dead clip/cull vars and we don't want to clip/cull based on + * uninitialized garbage. + */ + NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); + + if (nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_TESS_EVAL || + nir->info.stage == MESA_SHADER_GEOMETRY) + NIR_PASS_V(nir, nir_shader_gather_xfb_info); + + NIR_PASS_V(nir, nir_propagate_invariant, false); + + return nir; +} diff --git a/src/vulkan/runtime/vk_nir.h b/src/vulkan/runtime/vk_nir.h new file mode 100644 index 00000000000..48b1ba8915e --- /dev/null +++ b/src/vulkan/runtime/vk_nir.h @@ -0,0 +1,57 @@ +/* + * Copyright © 2022 Collabora, LTD + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_NIR_H +#define VK_NIR_H + +#include "nir.h" +#include "vulkan/vulkan_core.h" + +struct spirv_to_nir_options; +struct vk_device; + +#ifdef __cplusplus +extern "C" { +#endif + +uint32_t vk_spirv_version(const uint32_t *spirv_data, size_t spirv_size_B); + +bool +nir_vk_is_not_xfb_output(nir_variable *var, void *data); + +nir_shader * +vk_spirv_to_nir(struct vk_device *device, + const uint32_t *spirv_data, size_t spirv_size_B, + gl_shader_stage stage, const char *entrypoint_name, + enum gl_subgroup_size subgroup_size, + const VkSpecializationInfo *spec_info, + const struct spirv_to_nir_options *spirv_options, + const struct nir_shader_compiler_options *nir_options, + bool internal, + void *mem_ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_NIR_H */ diff --git a/src/vulkan/runtime/vk_nir_convert_ycbcr.c b/src/vulkan/runtime/vk_nir_convert_ycbcr.c new file mode 100644 index 00000000000..8a660954284 --- /dev/null +++ b/src/vulkan/runtime/vk_nir_convert_ycbcr.c @@ -0,0 +1,459 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_nir_convert_ycbcr.h" + +#include "vk_format.h" +#include "vk_ycbcr_conversion.h" + +#include <math.h> + +static nir_def * +y_range(nir_builder *b, + nir_def *y_channel, + int bpc, + VkSamplerYcbcrRange range) +{ + switch (range) { + case VK_SAMPLER_YCBCR_RANGE_ITU_FULL: + return y_channel; + case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW: + return nir_fmul_imm(b, + nir_fadd_imm(b, + nir_fmul_imm(b, y_channel, + pow(2, bpc) - 1), + -16.0f * pow(2, bpc - 8)), + 1.0f / (219.0f * pow(2, bpc - 8))); + + default: + unreachable("missing Ycbcr range"); + return NULL; + } +} + +static nir_def * +chroma_range(nir_builder *b, + nir_def *chroma_channel, + int bpc, + VkSamplerYcbcrRange range) +{ + switch (range) { + case VK_SAMPLER_YCBCR_RANGE_ITU_FULL: + return nir_fadd(b, chroma_channel, + nir_imm_float(b, -pow(2, bpc - 1) / (pow(2, bpc) - 1.0f))); + case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW: + return nir_fmul_imm(b, + nir_fadd_imm(b, + nir_fmul_imm(b, chroma_channel, + pow(2, bpc) - 1), + -128.0f * pow(2, bpc - 8)), + 1.0f / (224.0f * pow(2, bpc - 8))); + default: + unreachable("missing Ycbcr range"); + return NULL; + } +} + +typedef struct nir_const_value_3_4 { + nir_const_value v[3][4]; +} nir_const_value_3_4; + +static const nir_const_value_3_4 * +ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversion model) +{ + switch (model) { + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601: { + static const nir_const_value_3_4 bt601 = { { + { { .f32 = 1.402f }, { .f32 = 1.0f }, { .f32 = 0.0f }, { .f32 = 0.0f } }, + { { .f32 = -0.714136286201022f }, { .f32 = 1.0f }, { .f32 = -0.344136286201022f }, { .f32 = 0.0f } }, + { { .f32 = 0.0f }, { .f32 = 1.0f }, { .f32 = 1.772f }, { .f32 = 0.0f } }, + } }; + + return &bt601; + } + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709: { + static const nir_const_value_3_4 bt709 = { { + { { .f32 = 1.5748031496063f }, { .f32 = 1.0f }, { .f32 = 0.0f }, { .f32 = 0.0f } }, + { { .f32 = -0.468125209181067f }, { .f32 = 1.0f }, { .f32 = -0.187327487470334f }, { .f32 = 0.0f } }, + { { .f32 = 0.0f }, { .f32 = 1.0f }, { .f32 = 1.85563184264242f }, { .f32 = 0.0f } }, + } }; + + return &bt709; + } + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020: { + static const nir_const_value_3_4 bt2020 = { { + { { .f32 = 1.4746f }, { .f32 = 1.0f }, { .f32 = 0.0f }, { .f32 = 0.0f } }, + { { .f32 = -0.571353126843658f }, { .f32 = 1.0f }, { .f32 = -0.164553126843658f }, { .f32 = 0.0f } }, + { { .f32 = 0.0f }, { .f32 = 1.0f }, { .f32 = 1.8814f }, { .f32 = 0.0f } }, + } }; + + return &bt2020; + } + default: + unreachable("missing Ycbcr model"); + return NULL; + } +} + +nir_def * +nir_convert_ycbcr_to_rgb(nir_builder *b, + VkSamplerYcbcrModelConversion model, + VkSamplerYcbcrRange range, + nir_def *raw_channels, + uint32_t *bpcs) +{ + nir_def *expanded_channels = + nir_vec4(b, + chroma_range(b, nir_channel(b, raw_channels, 0), bpcs[0], range), + y_range(b, nir_channel(b, raw_channels, 1), bpcs[1], range), + chroma_range(b, nir_channel(b, raw_channels, 2), bpcs[2], range), + nir_channel(b, raw_channels, 3)); + + if (model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY) + return expanded_channels; + + const nir_const_value_3_4 *conversion_matrix = + ycbcr_model_to_rgb_matrix(model); + + nir_def *converted_channels[] = { + nir_fdot(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix->v[0])), + nir_fdot(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix->v[1])), + nir_fdot(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix->v[2])) + }; + + return nir_vec4(b, + converted_channels[0], converted_channels[1], + converted_channels[2], nir_channel(b, raw_channels, 3)); +} + +struct ycbcr_state { + nir_builder *builder; + nir_def *image_size; + nir_tex_instr *origin_tex; + nir_deref_instr *tex_deref; + const struct vk_ycbcr_conversion_state *conversion; + const struct vk_format_ycbcr_info *format_ycbcr_info; +}; + +/* TODO: we should probably replace this with a push constant/uniform. */ +static nir_def * +get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture) +{ + if (state->image_size) + return state->image_size; + + nir_builder *b = state->builder; + const struct glsl_type *type = texture->type; + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1); + + tex->op = nir_texop_txs; + tex->sampler_dim = glsl_get_sampler_dim(type); + tex->is_array = glsl_sampler_type_is_array(type); + tex->is_shadow = glsl_sampler_type_is_shadow(type); + tex->dest_type = nir_type_int32; + + tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, + &texture->def); + + nir_def_init(&tex->instr, &tex->def, nir_tex_instr_dest_size(tex), 32); + nir_builder_instr_insert(b, &tex->instr); + + state->image_size = nir_i2f32(b, &tex->def); + + return state->image_size; +} + +static nir_def * +implicit_downsampled_coord(nir_builder *b, + nir_def *value, + nir_def *max_value, + int div_scale) +{ + return nir_fadd(b, + value, + nir_frcp(b, + nir_fmul(b, + nir_imm_float(b, div_scale), + max_value))); +} + +static nir_def * +implicit_downsampled_coords(struct ycbcr_state *state, + nir_def *old_coords, + const struct vk_format_ycbcr_plane *format_plane) +{ + nir_builder *b = state->builder; + const struct vk_ycbcr_conversion_state *conversion = state->conversion; + nir_def *image_size = get_texture_size(state, state->tex_deref); + nir_def *comp[4] = { NULL, }; + int c; + + for (c = 0; c < ARRAY_SIZE(conversion->chroma_offsets); c++) { + if (format_plane->denominator_scales[c] > 1 && + conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) { + comp[c] = implicit_downsampled_coord(b, + nir_channel(b, old_coords, c), + nir_channel(b, image_size, c), + format_plane->denominator_scales[c]); + } else { + comp[c] = nir_channel(b, old_coords, c); + } + } + + /* Leave other coordinates untouched */ + for (; c < old_coords->num_components; c++) + comp[c] = nir_channel(b, old_coords, c); + + return nir_vec(b, comp, old_coords->num_components); +} + +static nir_def * +create_plane_tex_instr_implicit(struct ycbcr_state *state, + uint32_t plane) +{ + nir_builder *b = state->builder; + const struct vk_ycbcr_conversion_state *conversion = state->conversion; + const struct vk_format_ycbcr_plane *format_plane = + &state->format_ycbcr_info->planes[plane]; + nir_tex_instr *old_tex = state->origin_tex; + nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs + 1); + + for (uint32_t i = 0; i < old_tex->num_srcs; i++) { + tex->src[i].src_type = old_tex->src[i].src_type; + + switch (old_tex->src[i].src_type) { + case nir_tex_src_coord: + if (format_plane->has_chroma && conversion->chroma_reconstruction) { + tex->src[i].src = + nir_src_for_ssa(implicit_downsampled_coords(state, + old_tex->src[i].src.ssa, + format_plane)); + break; + } + FALLTHROUGH; + default: + tex->src[i].src = nir_src_for_ssa(old_tex->src[i].src.ssa); + break; + } + } + tex->src[tex->num_srcs - 1] = nir_tex_src_for_ssa(nir_tex_src_plane, + nir_imm_int(b, plane)); + tex->sampler_dim = old_tex->sampler_dim; + tex->dest_type = old_tex->dest_type; + + tex->op = old_tex->op; + tex->coord_components = old_tex->coord_components; + tex->is_new_style_shadow = old_tex->is_new_style_shadow; + tex->component = old_tex->component; + + tex->texture_index = old_tex->texture_index; + tex->sampler_index = old_tex->sampler_index; + tex->is_array = old_tex->is_array; + + nir_def_init(&tex->instr, &tex->def, old_tex->def.num_components, + old_tex->def.bit_size); + nir_builder_instr_insert(b, &tex->instr); + + return &tex->def; +} + +static unsigned +swizzle_to_component(VkComponentSwizzle swizzle) +{ + switch (swizzle) { + case VK_COMPONENT_SWIZZLE_R: + return 0; + case VK_COMPONENT_SWIZZLE_G: + return 1; + case VK_COMPONENT_SWIZZLE_B: + return 2; + case VK_COMPONENT_SWIZZLE_A: + return 3; + default: + unreachable("invalid channel"); + return 0; + } +} + +struct lower_ycbcr_tex_state { + nir_vk_ycbcr_conversion_lookup_cb cb; + const void *cb_data; +}; + +static bool +lower_ycbcr_tex_instr(nir_builder *b, nir_instr *instr, void *_state) +{ + const struct lower_ycbcr_tex_state *state = _state; + + if (instr->type != nir_instr_type_tex) + return false; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + + /* For the following instructions, we don't apply any change and let the + * instruction apply to the first plane. + */ + if (tex->op == nir_texop_txs || + tex->op == nir_texop_query_levels || + tex->op == nir_texop_lod) + return false; + + int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + assert(deref_src_idx >= 0); + nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src); + + nir_variable *var = nir_deref_instr_get_variable(deref); + uint32_t set = var->data.descriptor_set; + uint32_t binding = var->data.binding; + + assert(tex->texture_index == 0); + unsigned array_index = 0; + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + if (!nir_src_is_const(deref->arr.index)) + return false; + array_index = nir_src_as_uint(deref->arr.index); + } + + const struct vk_ycbcr_conversion_state *conversion = + state->cb(state->cb_data, set, binding, array_index); + if (conversion == NULL) + return false; + + const struct vk_format_ycbcr_info *format_ycbcr_info = + vk_format_get_ycbcr_info(conversion->format); + + /* This can happen if the driver hasn't done a good job of filtering on + * sampler creation and lets through a VkYcbcrConversion object which isn't + * actually YCbCr. We're supposed to ignore those. + */ + if (format_ycbcr_info == NULL) + return false; + + b->cursor = nir_before_instr(&tex->instr); + + VkFormat y_format = VK_FORMAT_UNDEFINED; + for (uint32_t p = 0; p < format_ycbcr_info->n_planes; p++) { + if (!format_ycbcr_info->planes[p].has_chroma) + y_format = format_ycbcr_info->planes[p].format; + } + assert(y_format != VK_FORMAT_UNDEFINED); + const struct util_format_description *y_format_desc = + util_format_description(vk_format_to_pipe_format(y_format)); + uint8_t y_bpc = y_format_desc->channel[0].size; + + /* |ycbcr_comp| holds components in the order : Cr-Y-Cb */ + nir_def *zero = nir_imm_float(b, 0.0f); + nir_def *one = nir_imm_float(b, 1.0f); + /* Use extra 2 channels for following swizzle */ + nir_def *ycbcr_comp[5] = { zero, zero, zero, one, zero }; + + uint8_t ycbcr_bpcs[5]; + memset(ycbcr_bpcs, y_bpc, sizeof(ycbcr_bpcs)); + + /* Go through all the planes and gather the samples into a |ycbcr_comp| + * while applying a swizzle required by the spec: + * + * R, G, B should respectively map to Cr, Y, Cb + */ + for (uint32_t p = 0; p < format_ycbcr_info->n_planes; p++) { + const struct vk_format_ycbcr_plane *format_plane = + &format_ycbcr_info->planes[p]; + + struct ycbcr_state tex_state = { + .builder = b, + .origin_tex = tex, + .tex_deref = deref, + .conversion = conversion, + .format_ycbcr_info = format_ycbcr_info, + }; + nir_def *plane_sample = create_plane_tex_instr_implicit(&tex_state, p); + + for (uint32_t pc = 0; pc < 4; pc++) { + VkComponentSwizzle ycbcr_swizzle = format_plane->ycbcr_swizzle[pc]; + if (ycbcr_swizzle == VK_COMPONENT_SWIZZLE_ZERO) + continue; + + unsigned ycbcr_component = swizzle_to_component(ycbcr_swizzle); + ycbcr_comp[ycbcr_component] = nir_channel(b, plane_sample, pc); + + /* Also compute the number of bits for each component. */ + const struct util_format_description *plane_format_desc = + util_format_description(vk_format_to_pipe_format(format_plane->format)); + ycbcr_bpcs[ycbcr_component] = plane_format_desc->channel[pc].size; + } + } + + /* Now remaps components to the order specified by the conversion. */ + nir_def *swizzled_comp[4] = { NULL, }; + uint32_t swizzled_bpcs[4] = { 0, }; + + for (uint32_t i = 0; i < ARRAY_SIZE(conversion->mapping); i++) { + /* Maps to components in |ycbcr_comp| */ + static const uint32_t swizzle_mapping[] = { + [VK_COMPONENT_SWIZZLE_ZERO] = 4, + [VK_COMPONENT_SWIZZLE_ONE] = 3, + [VK_COMPONENT_SWIZZLE_R] = 0, + [VK_COMPONENT_SWIZZLE_G] = 1, + [VK_COMPONENT_SWIZZLE_B] = 2, + [VK_COMPONENT_SWIZZLE_A] = 3, + }; + const VkComponentSwizzle m = conversion->mapping[i]; + + if (m == VK_COMPONENT_SWIZZLE_IDENTITY) { + swizzled_comp[i] = ycbcr_comp[i]; + swizzled_bpcs[i] = ycbcr_bpcs[i]; + } else { + swizzled_comp[i] = ycbcr_comp[swizzle_mapping[m]]; + swizzled_bpcs[i] = ycbcr_bpcs[swizzle_mapping[m]]; + } + } + + nir_def *result = nir_vec(b, swizzled_comp, 4); + if (conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) { + result = nir_convert_ycbcr_to_rgb(b, conversion->ycbcr_model, + conversion->ycbcr_range, + result, + swizzled_bpcs); + } + + nir_def_rewrite_uses(&tex->def, result); + nir_instr_remove(&tex->instr); + + return true; +} + +bool nir_vk_lower_ycbcr_tex(nir_shader *nir, + nir_vk_ycbcr_conversion_lookup_cb cb, + const void *cb_data) +{ + struct lower_ycbcr_tex_state state = { + .cb = cb, + .cb_data = cb_data, + }; + + return nir_shader_instructions_pass(nir, lower_ycbcr_tex_instr, + nir_metadata_block_index | + nir_metadata_dominance, + &state); +} diff --git a/src/vulkan/runtime/vk_nir_convert_ycbcr.h b/src/vulkan/runtime/vk_nir_convert_ycbcr.h new file mode 100644 index 00000000000..b17a8cb83c7 --- /dev/null +++ b/src/vulkan/runtime/vk_nir_convert_ycbcr.h @@ -0,0 +1,56 @@ +/* + * Copyright © 2020 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_NIR_CONVERT_YCBCR_H +#define VK_NIR_CONVERT_YCBCR_H + +#include "nir.h" +#include "nir_builder.h" +#include "vulkan/vulkan_core.h" + +#ifdef __cplusplus +extern "C" { +#endif + +nir_def * +nir_convert_ycbcr_to_rgb(nir_builder *b, + VkSamplerYcbcrModelConversion model, + VkSamplerYcbcrRange range, + nir_def *raw_channels, + uint32_t *bpcs); + +struct vk_ycbcr_conversion; + +typedef const struct vk_ycbcr_conversion_state * + (*nir_vk_ycbcr_conversion_lookup_cb)(const void *data, uint32_t set, + uint32_t binding, uint32_t array_index); + +bool nir_vk_lower_ycbcr_tex(nir_shader *nir, + nir_vk_ycbcr_conversion_lookup_cb cb, + const void *cb_data); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* VK_NIR_CONVERT_YCBCR_H */ diff --git a/src/vulkan/runtime/vk_object.c b/src/vulkan/runtime/vk_object.c new file mode 100644 index 00000000000..7015342924e --- /dev/null +++ b/src/vulkan/runtime/vk_object.c @@ -0,0 +1,363 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_object.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_instance.h" +#include "vk_device.h" +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "vk_enum_to_str.h" + +void +vk_object_base_init(struct vk_device *device, + struct vk_object_base *base, + VkObjectType obj_type) +{ + base->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + base->type = obj_type; + base->client_visible = false; + base->device = device; + base->instance = NULL; + base->object_name = NULL; + util_sparse_array_init(&base->private_data, sizeof(uint64_t), 8); +} + +void vk_object_base_instance_init(struct vk_instance *instance, + struct vk_object_base *base, + VkObjectType obj_type) +{ + base->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + base->type = obj_type; + base->client_visible = false; + base->device = NULL; + base->instance = instance; + base->object_name = NULL; + util_sparse_array_init(&base->private_data, sizeof(uint64_t), 8); +} + +void +vk_object_base_finish(struct vk_object_base *base) +{ + util_sparse_array_finish(&base->private_data); + + if (base->object_name == NULL) + return; + + assert(base->device != NULL || base->instance != NULL); + if (base->device) + vk_free(&base->device->alloc, base->object_name); + else + vk_free(&base->instance->alloc, base->object_name); +} + +void +vk_object_base_recycle(struct vk_object_base *base) +{ + struct vk_device *device = base->device; + VkObjectType obj_type = base->type; + vk_object_base_finish(base); + vk_object_base_init(device, base, obj_type); +} + +void * +vk_object_alloc(struct vk_device *device, + const VkAllocationCallbacks *alloc, + size_t size, + VkObjectType obj_type) +{ + void *ptr = vk_alloc2(&device->alloc, alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (ptr == NULL) + return NULL; + + vk_object_base_init(device, (struct vk_object_base *)ptr, obj_type); + + return ptr; +} + +void * +vk_object_zalloc(struct vk_device *device, + const VkAllocationCallbacks *alloc, + size_t size, + VkObjectType obj_type) +{ + void *ptr = vk_zalloc2(&device->alloc, alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (ptr == NULL) + return NULL; + + vk_object_base_init(device, (struct vk_object_base *)ptr, obj_type); + + return ptr; +} + +void * +vk_object_multialloc(struct vk_device *device, + struct vk_multialloc *ma, + const VkAllocationCallbacks *alloc, + VkObjectType obj_type) +{ + void *ptr = vk_multialloc_alloc2(ma, &device->alloc, alloc, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (ptr == NULL) + return NULL; + + vk_object_base_init(device, (struct vk_object_base *)ptr, obj_type); + + return ptr; +} + +void * +vk_object_multizalloc(struct vk_device *device, + struct vk_multialloc *ma, + const VkAllocationCallbacks *alloc, + VkObjectType obj_type) +{ + void *ptr = vk_multialloc_zalloc2(ma, &device->alloc, alloc, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (ptr == NULL) + return NULL; + + vk_object_base_init(device, (struct vk_object_base *)ptr, obj_type); + + return ptr; +} + +void +vk_object_free(struct vk_device *device, + const VkAllocationCallbacks *alloc, + void *data) +{ + vk_object_base_finish((struct vk_object_base *)data); + vk_free2(&device->alloc, alloc, data); +} + +VkResult +vk_private_data_slot_create(struct vk_device *device, + const VkPrivateDataSlotCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPrivateDataSlot* pPrivateDataSlot) +{ + struct vk_private_data_slot *slot = + vk_alloc2(&device->alloc, pAllocator, sizeof(*slot), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (slot == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + vk_object_base_init(device, &slot->base, + VK_OBJECT_TYPE_PRIVATE_DATA_SLOT); + slot->index = p_atomic_inc_return(&device->private_data_next_index); + + *pPrivateDataSlot = vk_private_data_slot_to_handle(slot); + + return VK_SUCCESS; +} + +void +vk_private_data_slot_destroy(struct vk_device *device, + VkPrivateDataSlot privateDataSlot, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_private_data_slot, slot, privateDataSlot); + if (slot == NULL) + return; + + vk_object_base_finish(&slot->base); + vk_free2(&device->alloc, pAllocator, slot); +} + +static VkResult +get_swapchain_private_data_locked(struct vk_device *device, + uint64_t objectHandle, + struct vk_private_data_slot *slot, + uint64_t **private_data) +{ + if (unlikely(device->swapchain_private == NULL)) { + /* Even though VkSwapchain/Surface are non-dispatchable objects, we know + * a priori that these are actually pointers so we can use + * the pointer hash table for them. + */ + device->swapchain_private = _mesa_pointer_hash_table_create(NULL); + if (device->swapchain_private == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + struct hash_entry *entry = + _mesa_hash_table_search(device->swapchain_private, + (void *)(uintptr_t)objectHandle); + if (unlikely(entry == NULL)) { + struct util_sparse_array *swapchain_private = + ralloc(device->swapchain_private, struct util_sparse_array); + util_sparse_array_init(swapchain_private, sizeof(uint64_t), 8); + + entry = _mesa_hash_table_insert(device->swapchain_private, + (void *)(uintptr_t)objectHandle, + swapchain_private); + if (entry == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + struct util_sparse_array *swapchain_private = entry->data; + *private_data = util_sparse_array_get(swapchain_private, slot->index); + + return VK_SUCCESS; +} + +static VkResult +vk_object_base_private_data(struct vk_device *device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlot privateDataSlot, + uint64_t **private_data) +{ + VK_FROM_HANDLE(vk_private_data_slot, slot, privateDataSlot); + + /* There is an annoying spec corner here on Android. Because WSI is + * implemented in the Vulkan loader which doesn't know about the + * VK_EXT_private_data extension, we have to handle VkSwapchainKHR in the + * driver as a special case. On future versions of Android where the + * loader does understand VK_EXT_private_data, we'll never see a + * vkGet/SetPrivateData call on a swapchain because the loader will + * handle it. + */ +#if DETECT_OS_ANDROID + if (objectType == VK_OBJECT_TYPE_SWAPCHAIN_KHR || + objectType == VK_OBJECT_TYPE_SURFACE_KHR) { +#else + if (objectType == VK_OBJECT_TYPE_SURFACE_KHR) { +#endif + mtx_lock(&device->swapchain_private_mtx); + VkResult result = get_swapchain_private_data_locked(device, objectHandle, + slot, private_data); + mtx_unlock(&device->swapchain_private_mtx); + return result; + } + + struct vk_object_base *obj = + vk_object_base_from_u64_handle(objectHandle, objectType); + *private_data = util_sparse_array_get(&obj->private_data, slot->index); + + return VK_SUCCESS; +} + +VkResult +vk_object_base_set_private_data(struct vk_device *device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlot privateDataSlot, + uint64_t data) +{ + uint64_t *private_data; + VkResult result = vk_object_base_private_data(device, + objectType, objectHandle, + privateDataSlot, + &private_data); + if (unlikely(result != VK_SUCCESS)) + return result; + + *private_data = data; + return VK_SUCCESS; +} + +void +vk_object_base_get_private_data(struct vk_device *device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlot privateDataSlot, + uint64_t *pData) +{ + uint64_t *private_data; + VkResult result = vk_object_base_private_data(device, + objectType, objectHandle, + privateDataSlot, + &private_data); + if (likely(result == VK_SUCCESS)) { + *pData = *private_data; + } else { + *pData = 0; + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreatePrivateDataSlot(VkDevice _device, + const VkPrivateDataSlotCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPrivateDataSlot *pPrivateDataSlot) +{ + VK_FROM_HANDLE(vk_device, device, _device); + return vk_private_data_slot_create(device, pCreateInfo, pAllocator, + pPrivateDataSlot); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyPrivateDataSlot(VkDevice _device, + VkPrivateDataSlot privateDataSlot, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + vk_private_data_slot_destroy(device, privateDataSlot, pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_SetPrivateData(VkDevice _device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlot privateDataSlot, + uint64_t data) +{ + VK_FROM_HANDLE(vk_device, device, _device); + return vk_object_base_set_private_data(device, + objectType, objectHandle, + privateDataSlot, data); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetPrivateData(VkDevice _device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlot privateDataSlot, + uint64_t *pData) +{ + VK_FROM_HANDLE(vk_device, device, _device); + vk_object_base_get_private_data(device, + objectType, objectHandle, + privateDataSlot, pData); +} + +const char * +vk_object_base_name(struct vk_object_base *obj) +{ + if (obj->object_name) + return obj->object_name; + + obj->object_name = vk_asprintf(&obj->device->alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, + "%s(0x%"PRIx64")", + vk_ObjectType_to_ObjectName(obj->type), + (uint64_t)(uintptr_t)obj); + + return obj->object_name; +} diff --git a/src/vulkan/runtime/vk_object.h b/src/vulkan/runtime/vk_object.h new file mode 100644 index 00000000000..c94c7050215 --- /dev/null +++ b/src/vulkan/runtime/vk_object.h @@ -0,0 +1,294 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_OBJECT_H +#define VK_OBJECT_H + +#include <vulkan/vulkan_core.h> +#include <vulkan/vk_icd.h> + +#include "c11/threads.h" +#include "util/detect_os.h" +#include "util/macros.h" +#include "util/sparse_array.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct hash_table; + +struct vk_device; + +/** Base struct for all Vulkan objects */ +struct vk_object_base { + VK_LOADER_DATA _loader_data; + + /** Type of this object + * + * This is used for runtime type checking when casting to and from Vulkan + * handle types since compile-time type checking doesn't always work. + */ + VkObjectType type; + + /* True if this object is fully constructed and visible to the client */ + bool client_visible; + + /** Pointer to the device in which this object exists, if any + * + * This is NULL for instances and physical devices but should point to a + * valid vk_device for almost everything else. (There are a few WSI + * objects that don't inherit from a device.) + */ + struct vk_device *device; + + /** Pointer to the instance in which this object exists + * + * This is NULL for device level objects as it's main purpose is to make + * the instance allocator reachable for freeing data owned by instance + * level objects. + */ + struct vk_instance *instance; + + /* For VK_EXT_private_data */ + struct util_sparse_array private_data; + + /* VK_EXT_debug_utils */ + char *object_name; +}; + +/** Initialize a vk_base_object + * + * :param device: |in| The vk_device this object was created from or NULL + * :param base: |out| The vk_object_base to initialize + * :param obj_type: |in| The VkObjectType of the object being initialized + */ +void vk_object_base_init(struct vk_device *device, + struct vk_object_base *base, + VkObjectType obj_type); + +/** Initialize a vk_base_object for an instance level object + * + * :param instance: |in| The vk_instance this object was created from + * :param base: |out| The vk_object_base to initialize + * :param obj_type: |in| The VkObjectType of the object being initialized + */ +void vk_object_base_instance_init(struct vk_instance *instance, + struct vk_object_base *base, + VkObjectType obj_type); + +/** Tear down a vk_object_base + * + * :param base: |out| The vk_object_base being torn down + */ +void vk_object_base_finish(struct vk_object_base *base); + +/** Recycles a vk_object_base + * + * This should be called when an object is recycled and handed back to the + * client as if it were a new object. When it's called is not important as + * long as it's called between when the client thinks the object was destroyed + * and when the client sees it again as a supposedly new object. + * + * :param base: |inout| The vk_object_base being recycled + */ +void vk_object_base_recycle(struct vk_object_base *base); + +static inline void +vk_object_base_assert_valid(ASSERTED struct vk_object_base *base, + ASSERTED VkObjectType obj_type) +{ + assert(base == NULL || base->type == obj_type); +} + +static inline struct vk_object_base * +vk_object_base_from_u64_handle(uint64_t handle, VkObjectType obj_type) +{ + struct vk_object_base *base = (struct vk_object_base *)(uintptr_t)handle; + vk_object_base_assert_valid(base, obj_type); + return base; +} + +/** Define handle cast macros for the given dispatchable handle type + * + * For a given `driver_struct`, this defines `driver_struct_to_handle()` and + * `driver_struct_from_handle()` helpers which provide type-safe (as much as + * possible with Vulkan handle types) casts to and from the `driver_struct` + * type. As an added layer of protection, these casts use the provided + * `VkObjectType` to assert that the object is of the correct type when + * running with a debug build. + * + * :param __driver_type: The name of the driver struct; it is assumed this is + * the name of a struct type and ``struct`` will be + * prepended automatically + * + * :param __base: The name of the vk_base_object member + * + * :param __VkType: The Vulkan object type such as VkImage + * + * :param __VK_TYPE: The VkObjectType corresponding to __VkType, such as + * VK_OBJECT_TYPE_IMAGE + */ +#define VK_DEFINE_HANDLE_CASTS(__driver_type, __base, __VkType, __VK_TYPE) \ + static inline struct __driver_type * \ + __driver_type ## _from_handle(__VkType _handle) \ + { \ + struct vk_object_base *base = (struct vk_object_base *)_handle; \ + vk_object_base_assert_valid(base, __VK_TYPE); \ + STATIC_ASSERT(offsetof(struct __driver_type, __base) == 0); \ + return (struct __driver_type *) base; \ + } \ + \ + static inline __VkType \ + __driver_type ## _to_handle(struct __driver_type *_obj) \ + { \ + vk_object_base_assert_valid(&_obj->__base, __VK_TYPE); \ + if (_obj != NULL) \ + _obj->__base.client_visible = true; \ + return (__VkType) _obj; \ + } + +/** Define handle cast macros for the given non-dispatchable handle type + * + * For a given `driver_struct`, this defines `driver_struct_to_handle()` and + * `driver_struct_from_handle()` helpers which provide type-safe (as much as + * possible with Vulkan handle types) casts to and from the `driver_struct` + * type. As an added layer of protection, these casts use the provided + * `VkObjectType` to assert that the object is of the correct type when + * running with a debug build. + * + * :param __driver_type: The name of the driver struct; it is assumed this is + * the name of a struct type and ``struct`` will be + * prepended automatically + * + * :param __base: The name of the vk_base_object member + * + * :param __VkType: The Vulkan object type such as VkImage + * + * :param __VK_TYPE: The VkObjectType corresponding to __VkType, such as + * VK_OBJECT_TYPE_IMAGE + */ +#define VK_DEFINE_NONDISP_HANDLE_CASTS(__driver_type, __base, __VkType, __VK_TYPE) \ + UNUSED static inline struct __driver_type * \ + __driver_type ## _from_handle(__VkType _handle) \ + { \ + struct vk_object_base *base = \ + (struct vk_object_base *)(uintptr_t)_handle; \ + vk_object_base_assert_valid(base, __VK_TYPE); \ + STATIC_ASSERT(offsetof(struct __driver_type, __base) == 0); \ + return (struct __driver_type *)base; \ + } \ + \ + UNUSED static inline __VkType \ + __driver_type ## _to_handle(struct __driver_type *_obj) \ + { \ + vk_object_base_assert_valid(&_obj->__base, __VK_TYPE); \ + if (_obj != NULL) \ + _obj->__base.client_visible = true; \ + return (__VkType)(uintptr_t) _obj; \ + } + +/** Declares a __driver_type pointer which represents __handle + * + * :param __driver_type: The name of the driver struct; it is assumed this is + * the name of a struct type and ``struct`` will be + * prepended automatically + * + * :param __name: The name of the declared pointer + * + * :param __handle: The Vulkan object handle with which to initialize + * `__name` + */ +#define VK_FROM_HANDLE(__driver_type, __name, __handle) \ + struct __driver_type *__name = __driver_type ## _from_handle(__handle) + +/* Helpers for vk object (de)allocation and (de)initialization */ +void * +vk_object_alloc(struct vk_device *device, + const VkAllocationCallbacks *alloc, + size_t size, + VkObjectType vk_obj_type); + +void * +vk_object_zalloc(struct vk_device *device, + const VkAllocationCallbacks *alloc, + size_t size, + VkObjectType vk_obj_type); + +struct vk_multialloc; + +void * +vk_object_multialloc(struct vk_device *device, + struct vk_multialloc *ma, + const VkAllocationCallbacks *alloc, + VkObjectType vk_obj_type); + +void * +vk_object_multizalloc(struct vk_device *device, + struct vk_multialloc *ma, + const VkAllocationCallbacks *alloc, + VkObjectType vk_obj_type); + +void +vk_object_free(struct vk_device *device, + const VkAllocationCallbacks *alloc, + void *data); + + +struct vk_private_data_slot { + struct vk_object_base base; + uint32_t index; +}; +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_private_data_slot, base, + VkPrivateDataSlot, + VK_OBJECT_TYPE_PRIVATE_DATA_SLOT); + +VkResult +vk_private_data_slot_create(struct vk_device *device, + const VkPrivateDataSlotCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPrivateDataSlot* pPrivateDataSlot); +void +vk_private_data_slot_destroy(struct vk_device *device, + VkPrivateDataSlot privateDataSlot, + const VkAllocationCallbacks *pAllocator); +VkResult +vk_object_base_set_private_data(struct vk_device *device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlot privateDataSlot, + uint64_t data); +void +vk_object_base_get_private_data(struct vk_device *device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlot privateDataSlot, + uint64_t *pData); + +const char * +vk_object_base_name(struct vk_object_base *obj); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_OBJECT_H */ diff --git a/src/vulkan/runtime/vk_physical_device.c b/src/vulkan/runtime/vk_physical_device.c new file mode 100644 index 00000000000..c524ee313da --- /dev/null +++ b/src/vulkan/runtime/vk_physical_device.c @@ -0,0 +1,293 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_physical_device.h" + +#include "vk_common_entrypoints.h" +#include "vk_util.h" + +VkResult +vk_physical_device_init(struct vk_physical_device *pdevice, + struct vk_instance *instance, + const struct vk_device_extension_table *supported_extensions, + const struct vk_features *supported_features, + const struct vk_properties *properties, + const struct vk_physical_device_dispatch_table *dispatch_table) +{ + memset(pdevice, 0, sizeof(*pdevice)); + vk_object_base_instance_init(instance, &pdevice->base, VK_OBJECT_TYPE_PHYSICAL_DEVICE); + pdevice->instance = instance; + + if (supported_extensions != NULL) + pdevice->supported_extensions = *supported_extensions; + + if (supported_features != NULL) + pdevice->supported_features = *supported_features; + + if (properties != NULL) + pdevice->properties = *properties; + + pdevice->dispatch_table = *dispatch_table; + + /* Add common entrypoints without overwriting driver-provided ones. */ + vk_physical_device_dispatch_table_from_entrypoints( + &pdevice->dispatch_table, &vk_common_physical_device_entrypoints, false); + + /* TODO */ + pdevice->disk_cache = NULL; + + return VK_SUCCESS; +} + +void +vk_physical_device_finish(struct vk_physical_device *physical_device) +{ + vk_object_base_finish(&physical_device->base); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, + uint32_t *pPropertyCount, + VkLayerProperties *pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return VK_ERROR_LAYER_NOT_PRESENT; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, + const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + VK_OUTARRAY_MAKE_TYPED(VkExtensionProperties, out, pProperties, pPropertyCount); + + for (int i = 0; i < VK_DEVICE_EXTENSION_COUNT; i++) { + if (!pdevice->supported_extensions.extensions[i]) + continue; + +#ifdef ANDROID_STRICT + if (!vk_android_allowed_device_extensions.extensions[i]) + continue; +#endif + + vk_outarray_append_typed(VkExtensionProperties, &out, prop) { + *prop = vk_device_extensions[i]; + } + } + + return vk_outarray_status(&out); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures *pFeatures) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + + /* Don't zero-init this struct since the driver fills it out entirely */ + VkPhysicalDeviceFeatures2 features2; + features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + features2.pNext = NULL; + + pdevice->dispatch_table.GetPhysicalDeviceFeatures2(physicalDevice, + &features2); + *pFeatures = features2.features; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties *pProperties) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + + /* Don't zero-init this struct since the driver fills it out entirely */ + VkPhysicalDeviceProperties2 props2; + props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + props2.pNext = NULL; + + pdevice->dispatch_table.GetPhysicalDeviceProperties2(physicalDevice, + &props2); + *pProperties = props2.properties; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, + uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties *pQueueFamilyProperties) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + + if (!pQueueFamilyProperties) { + pdevice->dispatch_table.GetPhysicalDeviceQueueFamilyProperties2(physicalDevice, + pQueueFamilyPropertyCount, + NULL); + return; + } + + STACK_ARRAY(VkQueueFamilyProperties2, props2, *pQueueFamilyPropertyCount); + + for (unsigned i = 0; i < *pQueueFamilyPropertyCount; ++i) { + props2[i].sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2; + props2[i].pNext = NULL; + } + + pdevice->dispatch_table.GetPhysicalDeviceQueueFamilyProperties2(physicalDevice, + pQueueFamilyPropertyCount, + props2); + + for (unsigned i = 0; i < *pQueueFamilyPropertyCount; ++i) + pQueueFamilyProperties[i] = props2[i].queueFamilyProperties; + + STACK_ARRAY_FINISH(props2); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties *pMemoryProperties) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + + /* Don't zero-init this struct since the driver fills it out entirely */ + VkPhysicalDeviceMemoryProperties2 props2; + props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2; + props2.pNext = NULL; + + pdevice->dispatch_table.GetPhysicalDeviceMemoryProperties2(physicalDevice, + &props2); + *pMemoryProperties = props2.memoryProperties; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties *pFormatProperties) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + + /* Don't zero-init this struct since the driver fills it out entirely */ + VkFormatProperties2 props2; + props2.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2; + props2.pNext = NULL; + + pdevice->dispatch_table.GetPhysicalDeviceFormatProperties2(physicalDevice, + format, &props2); + *pFormatProperties = props2.formatProperties; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags flags, + VkImageFormatProperties *pImageFormatProperties) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + + VkPhysicalDeviceImageFormatInfo2 info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .format = format, + .type = type, + .tiling = tiling, + .usage = usage, + .flags = flags + }; + + /* Don't zero-init this struct since the driver fills it out entirely */ + VkImageFormatProperties2 props2; + props2.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2; + props2.pNext = NULL; + + VkResult result = + pdevice->dispatch_table.GetPhysicalDeviceImageFormatProperties2(physicalDevice, + &info, &props2); + *pImageFormatProperties = props2.imageFormatProperties; + + return result; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkSampleCountFlagBits samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t *pNumProperties, + VkSparseImageFormatProperties *pProperties) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + + VkPhysicalDeviceSparseImageFormatInfo2 info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SPARSE_IMAGE_FORMAT_INFO_2, + .format = format, + .type = type, + .samples = samples, + .usage = usage, + .tiling = tiling + }; + + if (!pProperties) { + pdevice->dispatch_table.GetPhysicalDeviceSparseImageFormatProperties2(physicalDevice, + &info, + pNumProperties, + NULL); + return; + } + + STACK_ARRAY(VkSparseImageFormatProperties2, props2, *pNumProperties); + + for (unsigned i = 0; i < *pNumProperties; ++i) { + props2[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_FORMAT_PROPERTIES_2; + props2[i].pNext = NULL; + } + + pdevice->dispatch_table.GetPhysicalDeviceSparseImageFormatProperties2(physicalDevice, + &info, + pNumProperties, + props2); + + for (unsigned i = 0; i < *pNumProperties; ++i) + pProperties[i] = props2[i].properties; + + STACK_ARRAY_FINISH(props2); +} + +/* VK_EXT_tooling_info */ +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice, + uint32_t *pToolCount, + VkPhysicalDeviceToolProperties *pToolProperties) +{ + VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceToolProperties, out, pToolProperties, pToolCount); + + return vk_outarray_status(&out); +} diff --git a/src/vulkan/runtime/vk_physical_device.h b/src/vulkan/runtime/vk_physical_device.h new file mode 100644 index 00000000000..e7da1ec34da --- /dev/null +++ b/src/vulkan/runtime/vk_physical_device.h @@ -0,0 +1,152 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_PHYSICAL_DEVICE_H +#define VK_PHYSICAL_DEVICE_H + +#include "vk_dispatch_table.h" +#include "vk_extensions.h" +#include "vk_object.h" +#include "vk_physical_device_features.h" +#include "vk_physical_device_properties.h" + +#include "util/list.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct disk_cache; +struct wsi_device; +struct vk_sync_type; +struct vk_pipeline_cache_object_ops; + +/** Base struct for all VkPhysicalDevice implementations + */ +struct vk_physical_device { + struct vk_object_base base; + + /* See vk_instance::pdevices::list */ + struct list_head link; + + /** Instance which is the parent of this physical device */ + struct vk_instance *instance; + + /** Table of all supported device extensions + * + * This table is initialized from the `supported_extensions` parameter + * passed to `vk_physical_device_init()` if not `NULL`. If a `NULL` + * extension table is passed, all extensions are initialized to false and + * it's the responsibility of the driver to populate the table. This may + * be useful if the driver's physical device initialization order is such + * that extension support cannot be determined until significant physical + * device setup work has already been done. + */ + struct vk_device_extension_table supported_extensions; + + /** Table of all supported features + * + * This table is initialized from the `supported_features` parameter + * passed to `vk_physical_device_init()` if not `NULL`. If a `NULL` + * features table is passed, all features are initialized to false and + * it's the responsibility of the driver to populate the table. This may + * be useful if the driver's physical device initialization order is such + * that feature support cannot be determined until significant physical + * device setup work has already been done. + */ + struct vk_features supported_features; + + /** Table of all physical device properties which is initialized similarly + * to supported_features + */ + struct vk_properties properties; + + /** Physical-device-level dispatch table */ + struct vk_physical_device_dispatch_table dispatch_table; + + /** Disk cache, or NULL */ + struct disk_cache *disk_cache; + + /** WSI device, or NULL */ + struct wsi_device *wsi_device; + + /** A null-terminated array of supported sync types, in priority order + * + * The common implementations of VkFence and VkSemaphore use this list to + * determine what vk_sync_type to use for each scenario. The list is + * walked and the first vk_sync_type matching their criterion is taken. + * For instance, VkFence requires that it not be a timeline and support + * reset and CPU wait. If an external handle type is requested, that is + * considered just one more criterion. + */ + const struct vk_sync_type *const *supported_sync_types; + + /** A null-terminated array of supported pipeline cache object types + * + * The common implementation of VkPipelineCache uses this to remember the + * type of objects stored in the cache and deserialize them immediately + * when importing the cache. If an object type isn't in this list, then it + * will be loaded as a raw data object and then deserialized when we first + * look it up. Deserializing immediately avoids a copy but may be more + * expensive for objects that aren't hit. + */ + const struct vk_pipeline_cache_object_ops *const *pipeline_cache_import_ops; +}; + +VK_DEFINE_HANDLE_CASTS(vk_physical_device, base, VkPhysicalDevice, + VK_OBJECT_TYPE_PHYSICAL_DEVICE); + +/** Initialize a vk_physical_device + * + * :param physical_device: |out| The physical device to initialize + * :param instance: |in| The instance which is the parent of this + * physical device + * :param supported_extensions: |in| Table of all device extensions supported + * by this physical device + * :param supported_features: |in| Table of all features supported by this + * physical device + * :param dispatch_table: |in| Physical-device-level dispatch table + */ +VkResult MUST_CHECK +vk_physical_device_init(struct vk_physical_device *physical_device, + struct vk_instance *instance, + const struct vk_device_extension_table *supported_extensions, + const struct vk_features *supported_features, + const struct vk_properties *properties, + const struct vk_physical_device_dispatch_table *dispatch_table); + +/** Tears down a vk_physical_device + * + * :param physical_device: |out| The physical device to tear down + */ +void +vk_physical_device_finish(struct vk_physical_device *physical_device); + +VkResult +vk_physical_device_check_device_features(struct vk_physical_device *physical_device, + const VkDeviceCreateInfo *pCreateInfo); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_PHYSICAL_DEVICE_H */ diff --git a/src/vulkan/runtime/vk_pipeline.c b/src/vulkan/runtime/vk_pipeline.c new file mode 100644 index 00000000000..a22f682ecb9 --- /dev/null +++ b/src/vulkan/runtime/vk_pipeline.c @@ -0,0 +1,2186 @@ +/* + * Copyright © 2022 Collabora, LTD + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_pipeline.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_command_buffer.h" +#include "vk_descriptor_set_layout.h" +#include "vk_device.h" +#include "vk_graphics_state.h" +#include "vk_log.h" +#include "vk_nir.h" +#include "vk_physical_device.h" +#include "vk_pipeline_layout.h" +#include "vk_shader.h" +#include "vk_shader_module.h" +#include "vk_util.h" + +#include "nir_serialize.h" + +#include "util/mesa-sha1.h" + +bool +vk_pipeline_shader_stage_is_null(const VkPipelineShaderStageCreateInfo *info) +{ + if (info->module != VK_NULL_HANDLE) + return false; + + vk_foreach_struct_const(ext, info->pNext) { + if (ext->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO || + ext->sType == VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT) + return false; + } + + return true; +} + +static nir_shader * +get_builtin_nir(const VkPipelineShaderStageCreateInfo *info) +{ + VK_FROM_HANDLE(vk_shader_module, module, info->module); + + nir_shader *nir = NULL; + if (module != NULL) { + nir = module->nir; + } else { + const VkPipelineShaderStageNirCreateInfoMESA *nir_info = + vk_find_struct_const(info->pNext, PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA); + if (nir_info != NULL) + nir = nir_info->nir; + } + + if (nir == NULL) + return NULL; + + assert(nir->info.stage == vk_to_mesa_shader_stage(info->stage)); + ASSERTED nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir); + assert(strcmp(entrypoint->function->name, info->pName) == 0); + assert(info->pSpecializationInfo == NULL); + + return nir; +} + +static uint32_t +get_required_subgroup_size(const void *info_pNext) +{ + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *rss_info = + vk_find_struct_const(info_pNext, + PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO); + return rss_info != NULL ? rss_info->requiredSubgroupSize : 0; +} + +enum gl_subgroup_size +vk_get_subgroup_size(uint32_t spirv_version, + gl_shader_stage stage, + const void *info_pNext, + bool allow_varying, + bool require_full) +{ + uint32_t req_subgroup_size = get_required_subgroup_size(info_pNext); + if (req_subgroup_size > 0) { + assert(util_is_power_of_two_nonzero(req_subgroup_size)); + assert(req_subgroup_size >= 8 && req_subgroup_size <= 128); + return req_subgroup_size; + } else if (allow_varying || spirv_version >= 0x10600) { + /* Starting with SPIR-V 1.6, varying subgroup size the default */ + return SUBGROUP_SIZE_VARYING; + } else if (require_full) { + assert(stage == MESA_SHADER_COMPUTE); + return SUBGROUP_SIZE_FULL_SUBGROUPS; + } else { + return SUBGROUP_SIZE_API_CONSTANT; + } +} + +VkResult +vk_pipeline_shader_stage_to_nir(struct vk_device *device, + const VkPipelineShaderStageCreateInfo *info, + const struct spirv_to_nir_options *spirv_options, + const struct nir_shader_compiler_options *nir_options, + void *mem_ctx, nir_shader **nir_out) +{ + VK_FROM_HANDLE(vk_shader_module, module, info->module); + const gl_shader_stage stage = vk_to_mesa_shader_stage(info->stage); + + assert(info->sType == VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO); + + nir_shader *builtin_nir = get_builtin_nir(info); + if (builtin_nir != NULL) { + nir_validate_shader(builtin_nir, "internal shader"); + + nir_shader *clone = nir_shader_clone(mem_ctx, builtin_nir); + if (clone == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + assert(clone->options == NULL || clone->options == nir_options); + clone->options = nir_options; + + *nir_out = clone; + return VK_SUCCESS; + } + + const uint32_t *spirv_data; + uint32_t spirv_size; + if (module != NULL) { + spirv_data = (uint32_t *)module->data; + spirv_size = module->size; + } else { + const VkShaderModuleCreateInfo *minfo = + vk_find_struct_const(info->pNext, SHADER_MODULE_CREATE_INFO); + if (unlikely(minfo == NULL)) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "No shader module provided"); + } + spirv_data = minfo->pCode; + spirv_size = minfo->codeSize; + } + + enum gl_subgroup_size subgroup_size = vk_get_subgroup_size( + vk_spirv_version(spirv_data, spirv_size), + stage, info->pNext, + info->flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT, + info->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT); + + nir_shader *nir = vk_spirv_to_nir(device, spirv_data, spirv_size, stage, + info->pName, subgroup_size, + info->pSpecializationInfo, + spirv_options, nir_options, + false /* internal */, + mem_ctx); + if (nir == NULL) + return vk_errorf(device, VK_ERROR_UNKNOWN, "spirv_to_nir failed"); + + *nir_out = nir; + + return VK_SUCCESS; +} + +void +vk_pipeline_hash_shader_stage(const VkPipelineShaderStageCreateInfo *info, + const struct vk_pipeline_robustness_state *rstate, + unsigned char *stage_sha1) +{ + VK_FROM_HANDLE(vk_shader_module, module, info->module); + + const nir_shader *builtin_nir = get_builtin_nir(info); + if (builtin_nir != NULL) { + /* Internal NIR module: serialize and hash the NIR shader. + * We don't need to hash other info fields since they should match the + * NIR data. + */ + struct blob blob; + + blob_init(&blob); + nir_serialize(&blob, builtin_nir, false); + assert(!blob.out_of_memory); + _mesa_sha1_compute(blob.data, blob.size, stage_sha1); + blob_finish(&blob); + return; + } + + const VkShaderModuleCreateInfo *minfo = + vk_find_struct_const(info->pNext, SHADER_MODULE_CREATE_INFO); + const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo = + vk_find_struct_const(info->pNext, PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT); + + struct mesa_sha1 ctx; + + _mesa_sha1_init(&ctx); + + _mesa_sha1_update(&ctx, &info->flags, sizeof(info->flags)); + + assert(util_bitcount(info->stage) == 1); + _mesa_sha1_update(&ctx, &info->stage, sizeof(info->stage)); + + if (module) { + _mesa_sha1_update(&ctx, module->hash, sizeof(module->hash)); + } else if (minfo) { + blake3_hash spirv_hash; + + _mesa_blake3_compute(minfo->pCode, minfo->codeSize, spirv_hash); + _mesa_sha1_update(&ctx, spirv_hash, sizeof(spirv_hash)); + } else { + /* It is legal to pass in arbitrary identifiers as long as they don't exceed + * the limit. Shaders with bogus identifiers are more or less guaranteed to fail. */ + assert(iinfo); + assert(iinfo->identifierSize <= VK_MAX_SHADER_MODULE_IDENTIFIER_SIZE_EXT); + _mesa_sha1_update(&ctx, iinfo->pIdentifier, iinfo->identifierSize); + } + + if (rstate) { + _mesa_sha1_update(&ctx, &rstate->storage_buffers, sizeof(rstate->storage_buffers)); + _mesa_sha1_update(&ctx, &rstate->uniform_buffers, sizeof(rstate->uniform_buffers)); + _mesa_sha1_update(&ctx, &rstate->vertex_inputs, sizeof(rstate->vertex_inputs)); + _mesa_sha1_update(&ctx, &rstate->images, sizeof(rstate->images)); + } + + _mesa_sha1_update(&ctx, info->pName, strlen(info->pName)); + + if (info->pSpecializationInfo) { + _mesa_sha1_update(&ctx, info->pSpecializationInfo->pMapEntries, + info->pSpecializationInfo->mapEntryCount * + sizeof(*info->pSpecializationInfo->pMapEntries)); + _mesa_sha1_update(&ctx, info->pSpecializationInfo->pData, + info->pSpecializationInfo->dataSize); + } + + uint32_t req_subgroup_size = get_required_subgroup_size(info); + _mesa_sha1_update(&ctx, &req_subgroup_size, sizeof(req_subgroup_size)); + + _mesa_sha1_final(&ctx, stage_sha1); +} + +static VkPipelineRobustnessBufferBehaviorEXT +vk_device_default_robust_buffer_behavior(const struct vk_device *device) +{ + if (device->enabled_features.robustBufferAccess2) { + return VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT; + } else if (device->enabled_features.robustBufferAccess) { + return VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT; + } else { + return VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT; + } +} + +static VkPipelineRobustnessImageBehaviorEXT +vk_device_default_robust_image_behavior(const struct vk_device *device) +{ + if (device->enabled_features.robustImageAccess2) { + return VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT; + } else if (device->enabled_features.robustImageAccess) { + return VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT; + } else { + return VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT; + } +} + +void +vk_pipeline_robustness_state_fill(const struct vk_device *device, + struct vk_pipeline_robustness_state *rs, + const void *pipeline_pNext, + const void *shader_stage_pNext) +{ + rs->uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT; + rs->storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT; + rs->vertex_inputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT; + rs->images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DEVICE_DEFAULT_EXT; + + const VkPipelineRobustnessCreateInfoEXT *shader_info = + vk_find_struct_const(shader_stage_pNext, + PIPELINE_ROBUSTNESS_CREATE_INFO_EXT); + if (shader_info) { + rs->storage_buffers = shader_info->storageBuffers; + rs->uniform_buffers = shader_info->uniformBuffers; + rs->vertex_inputs = shader_info->vertexInputs; + rs->images = shader_info->images; + } else { + const VkPipelineRobustnessCreateInfoEXT *pipeline_info = + vk_find_struct_const(pipeline_pNext, + PIPELINE_ROBUSTNESS_CREATE_INFO_EXT); + if (pipeline_info) { + rs->storage_buffers = pipeline_info->storageBuffers; + rs->uniform_buffers = pipeline_info->uniformBuffers; + rs->vertex_inputs = pipeline_info->vertexInputs; + rs->images = pipeline_info->images; + } + } + + if (rs->storage_buffers == + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT) + rs->storage_buffers = vk_device_default_robust_buffer_behavior(device); + + if (rs->uniform_buffers == + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT) + rs->uniform_buffers = vk_device_default_robust_buffer_behavior(device); + + if (rs->vertex_inputs == + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT) + rs->vertex_inputs = vk_device_default_robust_buffer_behavior(device); + + if (rs->images == VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DEVICE_DEFAULT_EXT) + rs->images = vk_device_default_robust_image_behavior(device); +} + +void * +vk_pipeline_zalloc(struct vk_device *device, + const struct vk_pipeline_ops *ops, + VkPipelineBindPoint bind_point, + VkPipelineCreateFlags2KHR flags, + const VkAllocationCallbacks *alloc, + size_t size) +{ + struct vk_pipeline *pipeline; + + pipeline = vk_object_zalloc(device, alloc, size, VK_OBJECT_TYPE_PIPELINE); + if (pipeline == NULL) + return NULL; + + pipeline->ops = ops; + pipeline->bind_point = bind_point; + pipeline->flags = flags; + + return pipeline; +} + +void +vk_pipeline_free(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_pipeline *pipeline) +{ + vk_object_free(device, alloc, &pipeline->base); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyPipeline(VkDevice _device, + VkPipeline _pipeline, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline, pipeline, _pipeline); + + if (pipeline == NULL) + return; + + pipeline->ops->destroy(device, pipeline, pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetPipelineExecutablePropertiesKHR( + VkDevice _device, + const VkPipelineInfoKHR *pPipelineInfo, + uint32_t *pExecutableCount, + VkPipelineExecutablePropertiesKHR *pProperties) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline, pipeline, pPipelineInfo->pipeline); + + return pipeline->ops->get_executable_properties(device, pipeline, + pExecutableCount, + pProperties); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetPipelineExecutableStatisticsKHR( + VkDevice _device, + const VkPipelineExecutableInfoKHR *pExecutableInfo, + uint32_t *pStatisticCount, + VkPipelineExecutableStatisticKHR *pStatistics) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline, pipeline, pExecutableInfo->pipeline); + + return pipeline->ops->get_executable_statistics(device, pipeline, + pExecutableInfo->executableIndex, + pStatisticCount, pStatistics); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetPipelineExecutableInternalRepresentationsKHR( + VkDevice _device, + const VkPipelineExecutableInfoKHR *pExecutableInfo, + uint32_t *pInternalRepresentationCount, + VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline, pipeline, pExecutableInfo->pipeline); + + return pipeline->ops->get_internal_representations(device, pipeline, + pExecutableInfo->executableIndex, + pInternalRepresentationCount, + pInternalRepresentations); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdBindPipeline(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + VK_FROM_HANDLE(vk_pipeline, pipeline, _pipeline); + + assert(pipeline->bind_point == pipelineBindPoint); + + pipeline->ops->cmd_bind(cmd_buffer, pipeline); +} + +static const struct vk_pipeline_cache_object_ops pipeline_shader_cache_ops; + +static struct vk_shader * +vk_shader_from_cache_obj(struct vk_pipeline_cache_object *object) +{ + assert(object->ops == &pipeline_shader_cache_ops); + return container_of(object, struct vk_shader, pipeline.cache_obj); +} + +static bool +vk_pipeline_shader_serialize(struct vk_pipeline_cache_object *object, + struct blob *blob) +{ + struct vk_shader *shader = vk_shader_from_cache_obj(object); + struct vk_device *device = shader->base.device; + + return shader->ops->serialize(device, shader, blob); +} + +static void +vk_shader_init_cache_obj(struct vk_device *device, struct vk_shader *shader, + const void *key_data, size_t key_size) +{ + assert(key_size == sizeof(shader->pipeline.cache_key)); + memcpy(&shader->pipeline.cache_key, key_data, + sizeof(shader->pipeline.cache_key)); + + vk_pipeline_cache_object_init(device, &shader->pipeline.cache_obj, + &pipeline_shader_cache_ops, + &shader->pipeline.cache_key, + sizeof(shader->pipeline.cache_key)); +} + +static struct vk_pipeline_cache_object * +vk_pipeline_shader_deserialize(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size, + struct blob_reader *blob) +{ + struct vk_device *device = cache->base.device; + const struct vk_device_shader_ops *ops = device->shader_ops; + + /* TODO: Do we really want to always use the latest version? */ + const uint32_t version = device->physical->properties.shaderBinaryVersion; + + struct vk_shader *shader; + VkResult result = ops->deserialize(device, blob, version, + &device->alloc, &shader); + if (result != VK_SUCCESS) { + assert(result == VK_ERROR_OUT_OF_HOST_MEMORY); + return NULL; + } + + vk_shader_init_cache_obj(device, shader, key_data, key_size); + + return &shader->pipeline.cache_obj; +} + +static void +vk_pipeline_shader_destroy(struct vk_device *device, + struct vk_pipeline_cache_object *object) +{ + struct vk_shader *shader = vk_shader_from_cache_obj(object); + assert(shader->base.device == device); + + vk_shader_destroy(device, shader, &device->alloc); +} + +static const struct vk_pipeline_cache_object_ops pipeline_shader_cache_ops = { + .serialize = vk_pipeline_shader_serialize, + .deserialize = vk_pipeline_shader_deserialize, + .destroy = vk_pipeline_shader_destroy, +}; + +static struct vk_shader * +vk_shader_ref(struct vk_shader *shader) +{ + vk_pipeline_cache_object_ref(&shader->pipeline.cache_obj); + return shader; +} + +static void +vk_shader_unref(struct vk_device *device, struct vk_shader *shader) +{ + vk_pipeline_cache_object_unref(device, &shader->pipeline.cache_obj); +} + +PRAGMA_DIAGNOSTIC_PUSH +PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) +struct vk_pipeline_tess_info { + unsigned tcs_vertices_out : 8; + unsigned primitive_mode : 2; /* tess_primitive_mode */ + unsigned spacing : 2; /* gl_tess_spacing */ + unsigned ccw : 1; + unsigned point_mode : 1; + unsigned _pad : 18; +}; +PRAGMA_DIAGNOSTIC_POP +static_assert(sizeof(struct vk_pipeline_tess_info) == 4, + "This struct has no holes"); + +static void +vk_pipeline_gather_nir_tess_info(const nir_shader *nir, + struct vk_pipeline_tess_info *info) +{ + info->tcs_vertices_out = nir->info.tess.tcs_vertices_out; + info->primitive_mode = nir->info.tess._primitive_mode; + info->spacing = nir->info.tess.spacing; + info->ccw = nir->info.tess.ccw; + info->point_mode = nir->info.tess.point_mode; +} + +static void +vk_pipeline_replace_nir_tess_info(nir_shader *nir, + const struct vk_pipeline_tess_info *info) +{ + nir->info.tess.tcs_vertices_out = info->tcs_vertices_out; + nir->info.tess._primitive_mode = info->primitive_mode; + nir->info.tess.spacing = info->spacing; + nir->info.tess.ccw = info->ccw; + nir->info.tess.point_mode = info->point_mode; +} + +static void +vk_pipeline_tess_info_merge(struct vk_pipeline_tess_info *dst, + const struct vk_pipeline_tess_info *src) +{ + /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says: + * + * "PointMode. Controls generation of points rather than triangles + * or lines. This functionality defaults to disabled, and is + * enabled if either shader stage includes the execution mode. + * + * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw, + * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd, + * and OutputVertices, it says: + * + * "One mode must be set in at least one of the tessellation + * shader stages." + * + * So, the fields can be set in either the TCS or TES, but they must + * agree if set in both. + */ + assert(dst->tcs_vertices_out == 0 || + src->tcs_vertices_out == 0 || + dst->tcs_vertices_out == src->tcs_vertices_out); + dst->tcs_vertices_out |= src->tcs_vertices_out; + + static_assert(TESS_SPACING_UNSPECIFIED == 0, ""); + assert(dst->spacing == TESS_SPACING_UNSPECIFIED || + src->spacing == TESS_SPACING_UNSPECIFIED || + dst->spacing == src->spacing); + dst->spacing |= src->spacing; + + static_assert(TESS_PRIMITIVE_UNSPECIFIED == 0, ""); + assert(dst->primitive_mode == TESS_PRIMITIVE_UNSPECIFIED || + src->primitive_mode == TESS_PRIMITIVE_UNSPECIFIED || + dst->primitive_mode == src->primitive_mode); + dst->primitive_mode |= src->primitive_mode; + dst->ccw |= src->ccw; + dst->point_mode |= src->point_mode; +} + +struct vk_pipeline_precomp_shader { + struct vk_pipeline_cache_object cache_obj; + + /* Key for this cache_obj in the pipeline cache. + * + * This is always the output of vk_pipeline_hash_shader_stage() so it must + * be a SHA1 hash. + */ + uint8_t cache_key[SHA1_DIGEST_LENGTH]; + + gl_shader_stage stage; + + struct vk_pipeline_robustness_state rs; + + /* Tessellation info if the shader is a tessellation shader */ + struct vk_pipeline_tess_info tess; + + /* Hash of the vk_pipeline_precomp_shader + * + * This is the hash of the final compiled NIR together with tess info and + * robustness state. It's used as a key for final binary lookups. By + * having this as a separate key, we can de-duplicate cases where you have + * different SPIR-V or specialization constants but end up compiling the + * same NIR shader in the end anyway. + */ + blake3_hash blake3; + + struct blob nir_blob; +}; + +static struct vk_pipeline_precomp_shader * +vk_pipeline_precomp_shader_ref(struct vk_pipeline_precomp_shader *shader) +{ + vk_pipeline_cache_object_ref(&shader->cache_obj); + return shader; +} + +static void +vk_pipeline_precomp_shader_unref(struct vk_device *device, + struct vk_pipeline_precomp_shader *shader) +{ + vk_pipeline_cache_object_unref(device, &shader->cache_obj); +} + +static const struct vk_pipeline_cache_object_ops pipeline_precomp_shader_cache_ops; + +static struct vk_pipeline_precomp_shader * +vk_pipeline_precomp_shader_from_cache_obj(struct vk_pipeline_cache_object *obj) +{ + assert(obj->ops == & pipeline_precomp_shader_cache_ops); + return container_of(obj, struct vk_pipeline_precomp_shader, cache_obj); +} + +static struct vk_pipeline_precomp_shader * +vk_pipeline_precomp_shader_create(struct vk_device *device, + const void *key_data, size_t key_size, + const struct vk_pipeline_robustness_state *rs, + nir_shader *nir) +{ + struct blob blob; + blob_init(&blob); + + nir_serialize(&blob, nir, false); + + if (blob.out_of_memory) + goto fail_blob; + + struct vk_pipeline_precomp_shader *shader = + vk_zalloc(&device->alloc, sizeof(*shader), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (shader == NULL) + goto fail_blob; + + assert(sizeof(shader->cache_key) == key_size); + memcpy(shader->cache_key, key_data, sizeof(shader->cache_key)); + + vk_pipeline_cache_object_init(device, &shader->cache_obj, + &pipeline_precomp_shader_cache_ops, + shader->cache_key, + sizeof(shader->cache_key)); + + shader->stage = nir->info.stage; + shader->rs = *rs; + + vk_pipeline_gather_nir_tess_info(nir, &shader->tess); + + struct mesa_blake3 blake3_ctx; + _mesa_blake3_init(&blake3_ctx); + _mesa_blake3_update(&blake3_ctx, rs, sizeof(*rs)); + _mesa_blake3_update(&blake3_ctx, blob.data, blob.size); + _mesa_blake3_final(&blake3_ctx, shader->blake3); + + shader->nir_blob = blob; + + return shader; + +fail_blob: + blob_finish(&blob); + + return NULL; +} + +static bool +vk_pipeline_precomp_shader_serialize(struct vk_pipeline_cache_object *obj, + struct blob *blob) +{ + struct vk_pipeline_precomp_shader *shader = + vk_pipeline_precomp_shader_from_cache_obj(obj); + + blob_write_uint32(blob, shader->stage); + blob_write_bytes(blob, &shader->rs, sizeof(shader->rs)); + blob_write_bytes(blob, &shader->tess, sizeof(shader->tess)); + blob_write_bytes(blob, shader->blake3, sizeof(shader->blake3)); + blob_write_uint64(blob, shader->nir_blob.size); + blob_write_bytes(blob, shader->nir_blob.data, shader->nir_blob.size); + + return !blob->out_of_memory; +} + +static struct vk_pipeline_cache_object * +vk_pipeline_precomp_shader_deserialize(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size, + struct blob_reader *blob) +{ + struct vk_device *device = cache->base.device; + + struct vk_pipeline_precomp_shader *shader = + vk_zalloc(&device->alloc, sizeof(*shader), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (shader == NULL) + return NULL; + + assert(sizeof(shader->cache_key) == key_size); + memcpy(shader->cache_key, key_data, sizeof(shader->cache_key)); + + vk_pipeline_cache_object_init(device, &shader->cache_obj, + &pipeline_precomp_shader_cache_ops, + shader->cache_key, + sizeof(shader->cache_key)); + + shader->stage = blob_read_uint32(blob); + blob_copy_bytes(blob, &shader->rs, sizeof(shader->rs)); + blob_copy_bytes(blob, &shader->tess, sizeof(shader->tess)); + blob_copy_bytes(blob, shader->blake3, sizeof(shader->blake3)); + + uint64_t nir_size = blob_read_uint64(blob); + if (blob->overrun || nir_size > SIZE_MAX) + goto fail_shader; + + const void *nir_data = blob_read_bytes(blob, nir_size); + if (blob->overrun) + goto fail_shader; + + blob_init(&shader->nir_blob); + blob_write_bytes(&shader->nir_blob, nir_data, nir_size); + if (shader->nir_blob.out_of_memory) + goto fail_nir_blob; + + return &shader->cache_obj; + +fail_nir_blob: + blob_finish(&shader->nir_blob); +fail_shader: + vk_pipeline_cache_object_finish(&shader->cache_obj); + vk_free(&device->alloc, shader); + + return NULL; +} + +static void +vk_pipeline_precomp_shader_destroy(struct vk_device *device, + struct vk_pipeline_cache_object *obj) +{ + struct vk_pipeline_precomp_shader *shader = + vk_pipeline_precomp_shader_from_cache_obj(obj); + + blob_finish(&shader->nir_blob); + vk_pipeline_cache_object_finish(&shader->cache_obj); + vk_free(&device->alloc, shader); +} + +static nir_shader * +vk_pipeline_precomp_shader_get_nir(const struct vk_pipeline_precomp_shader *shader, + const struct nir_shader_compiler_options *nir_options) +{ + struct blob_reader blob; + blob_reader_init(&blob, shader->nir_blob.data, shader->nir_blob.size); + + nir_shader *nir = nir_deserialize(NULL, nir_options, &blob); + if (blob.overrun) { + ralloc_free(nir); + return NULL; + } + + return nir; +} + +static const struct vk_pipeline_cache_object_ops pipeline_precomp_shader_cache_ops = { + .serialize = vk_pipeline_precomp_shader_serialize, + .deserialize = vk_pipeline_precomp_shader_deserialize, + .destroy = vk_pipeline_precomp_shader_destroy, +}; + +static VkResult +vk_pipeline_precompile_shader(struct vk_device *device, + struct vk_pipeline_cache *cache, + VkPipelineCreateFlags2KHR pipeline_flags, + const void *pipeline_info_pNext, + const VkPipelineShaderStageCreateInfo *info, + struct vk_pipeline_precomp_shader **ps_out) +{ + const struct vk_device_shader_ops *ops = device->shader_ops; + VkResult result; + + struct vk_pipeline_robustness_state rs; + vk_pipeline_robustness_state_fill(device, &rs, + pipeline_info_pNext, + info->pNext); + + uint8_t stage_sha1[SHA1_DIGEST_LENGTH]; + vk_pipeline_hash_shader_stage(info, &rs, stage_sha1); + + if (cache != NULL) { + struct vk_pipeline_cache_object *cache_obj = + vk_pipeline_cache_lookup_object(cache, stage_sha1, sizeof(stage_sha1), + &pipeline_precomp_shader_cache_ops, + NULL /* cache_hit */); + if (cache_obj != NULL) { + *ps_out = vk_pipeline_precomp_shader_from_cache_obj(cache_obj); + return VK_SUCCESS; + } + } + + if (pipeline_flags & + VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) + return VK_PIPELINE_COMPILE_REQUIRED; + + const gl_shader_stage stage = vk_to_mesa_shader_stage(info->stage); + const struct nir_shader_compiler_options *nir_options = + ops->get_nir_options(device->physical, stage, &rs); + const struct spirv_to_nir_options spirv_options = + ops->get_spirv_options(device->physical, stage, &rs); + + nir_shader *nir; + result = vk_pipeline_shader_stage_to_nir(device, info, &spirv_options, + nir_options, NULL, &nir); + if (result != VK_SUCCESS) + return result; + + if (ops->preprocess_nir != NULL) + ops->preprocess_nir(device->physical, nir); + + struct vk_pipeline_precomp_shader *shader = + vk_pipeline_precomp_shader_create(device, stage_sha1, + sizeof(stage_sha1), + &rs, nir); + ralloc_free(nir); + if (shader == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (cache != NULL) { + struct vk_pipeline_cache_object *cache_obj = &shader->cache_obj; + cache_obj = vk_pipeline_cache_add_object(cache, cache_obj); + shader = vk_pipeline_precomp_shader_from_cache_obj(cache_obj); + } + + *ps_out = shader; + + return VK_SUCCESS; +} + +struct vk_pipeline_stage { + gl_shader_stage stage; + + struct vk_pipeline_precomp_shader *precomp; + struct vk_shader *shader; +}; + +static int +cmp_vk_pipeline_stages(const void *_a, const void *_b) +{ + const struct vk_pipeline_stage *a = _a, *b = _b; + return vk_shader_cmp_graphics_stages(a->stage, b->stage); +} + +static bool +vk_pipeline_stage_is_null(const struct vk_pipeline_stage *stage) +{ + return stage->precomp == NULL && stage->shader == NULL; +} + +static void +vk_pipeline_stage_finish(struct vk_device *device, + struct vk_pipeline_stage *stage) +{ + if (stage->precomp != NULL) + vk_pipeline_precomp_shader_unref(device, stage->precomp); + + if (stage->shader) + vk_shader_unref(device, stage->shader); +} + +static struct vk_pipeline_stage +vk_pipeline_stage_clone(const struct vk_pipeline_stage *in) +{ + struct vk_pipeline_stage out = { + .stage = in->stage, + }; + + if (in->precomp) + out.precomp = vk_pipeline_precomp_shader_ref(in->precomp); + + if (in->shader) + out.shader = vk_shader_ref(in->shader); + + return out; +} + +struct vk_graphics_pipeline { + struct vk_pipeline base; + + union { + struct { + struct vk_graphics_pipeline_all_state all_state; + struct vk_graphics_pipeline_state state; + } lib; + + struct { + struct vk_vertex_input_state _dynamic_vi; + struct vk_sample_locations_state _dynamic_sl; + struct vk_dynamic_graphics_state dynamic; + } linked; + }; + + uint32_t set_layout_count; + struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS]; + + uint32_t stage_count; + struct vk_pipeline_stage stages[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES]; +}; + +static void +vk_graphics_pipeline_destroy(struct vk_device *device, + struct vk_pipeline *pipeline, + const VkAllocationCallbacks *pAllocator) +{ + struct vk_graphics_pipeline *gfx_pipeline = + container_of(pipeline, struct vk_graphics_pipeline, base); + + for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) + vk_pipeline_stage_finish(device, &gfx_pipeline->stages[i]); + + for (uint32_t i = 0; i < gfx_pipeline->set_layout_count; i++) { + if (gfx_pipeline->set_layouts[i] != NULL) + vk_descriptor_set_layout_unref(device, gfx_pipeline->set_layouts[i]); + } + + vk_pipeline_free(device, pAllocator, pipeline); +} + +static bool +vk_device_supports_stage(struct vk_device *device, + gl_shader_stage stage) +{ + const struct vk_features *features = &device->physical->supported_features; + + switch (stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + return true; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + return features->tessellationShader; + case MESA_SHADER_GEOMETRY: + return features->geometryShader; + case MESA_SHADER_TASK: + return features->taskShader; + case MESA_SHADER_MESH: + return features->meshShader; + default: + return false; + } +} + +static const gl_shader_stage all_gfx_stages[] = { + MESA_SHADER_VERTEX, + MESA_SHADER_TESS_CTRL, + MESA_SHADER_TESS_EVAL, + MESA_SHADER_GEOMETRY, + MESA_SHADER_TASK, + MESA_SHADER_MESH, + MESA_SHADER_FRAGMENT, +}; + +static void +vk_graphics_pipeline_cmd_bind(struct vk_command_buffer *cmd_buffer, + struct vk_pipeline *pipeline) +{ + struct vk_device *device = cmd_buffer->base.device; + const struct vk_device_shader_ops *ops = device->shader_ops; + + struct vk_graphics_pipeline *gfx_pipeline = NULL; + struct vk_shader *stage_shader[PIPE_SHADER_MESH_TYPES] = { NULL, }; + if (pipeline != NULL) { + assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); + assert(!(pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)); + gfx_pipeline = container_of(pipeline, struct vk_graphics_pipeline, base); + + for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) { + struct vk_shader *shader = gfx_pipeline->stages[i].shader; + stage_shader[shader->stage] = shader; + } + } + + uint32_t stage_count = 0; + gl_shader_stage stages[ARRAY_SIZE(all_gfx_stages)]; + struct vk_shader *shaders[ARRAY_SIZE(all_gfx_stages)]; + + VkShaderStageFlags vk_stages = 0; + for (uint32_t i = 0; i < ARRAY_SIZE(all_gfx_stages); i++) { + gl_shader_stage stage = all_gfx_stages[i]; + if (!vk_device_supports_stage(device, stage)) { + assert(stage_shader[stage] == NULL); + continue; + } + + vk_stages |= mesa_to_vk_shader_stage(stage); + + stages[stage_count] = stage; + shaders[stage_count] = stage_shader[stage]; + stage_count++; + } + ops->cmd_bind_shaders(cmd_buffer, stage_count, stages, shaders); + + if (gfx_pipeline != NULL) { + cmd_buffer->pipeline_shader_stages |= vk_stages; + ops->cmd_set_dynamic_graphics_state(cmd_buffer, + &gfx_pipeline->linked.dynamic); + } else { + cmd_buffer->pipeline_shader_stages &= ~vk_stages; + } +} + +static VkShaderCreateFlagsEXT +vk_pipeline_to_shader_flags(VkPipelineCreateFlags2KHR pipeline_flags, + gl_shader_stage stage) +{ + VkShaderCreateFlagsEXT shader_flags = 0; + + if (pipeline_flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) + shader_flags |= VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA; + + if (stage == MESA_SHADER_FRAGMENT) { + if (pipeline_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) + shader_flags |= VK_SHADER_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_EXT; + + if (pipeline_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT) + shader_flags |= VK_SHADER_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT; + } + + if (stage == MESA_SHADER_COMPUTE) { + if (pipeline_flags & VK_PIPELINE_CREATE_2_DISPATCH_BASE_BIT_KHR) + shader_flags |= VK_SHADER_CREATE_DISPATCH_BASE_BIT_EXT; + } + + return shader_flags; +} + +static VkResult +vk_graphics_pipeline_compile_shaders(struct vk_device *device, + struct vk_pipeline_cache *cache, + struct vk_graphics_pipeline *pipeline, + struct vk_pipeline_layout *pipeline_layout, + const struct vk_graphics_pipeline_state *state, + uint32_t stage_count, + struct vk_pipeline_stage *stages, + VkPipelineCreationFeedback *stage_feedbacks) +{ + const struct vk_device_shader_ops *ops = device->shader_ops; + VkResult result; + + if (stage_count == 0) + return VK_SUCCESS; + + /* If we're linking, throw away any previously compiled shaders as they + * likely haven't been properly linked. We keep the precompiled shaders + * and we still look it up in the cache so it may still be fast. + */ + if (pipeline->base.flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT) { + for (uint32_t i = 0; i < stage_count; i++) { + if (stages[i].shader != NULL) { + vk_shader_unref(device, stages[i].shader); + stages[i].shader = NULL; + } + } + } + + bool have_all_shaders = true; + VkShaderStageFlags all_stages = 0; + struct vk_pipeline_precomp_shader *tcs_precomp = NULL, *tes_precomp = NULL; + for (uint32_t i = 0; i < stage_count; i++) { + all_stages |= mesa_to_vk_shader_stage(stages[i].stage); + + if (stages[i].shader == NULL) + have_all_shaders = false; + + if (stages[i].stage == MESA_SHADER_TESS_CTRL) + tcs_precomp = stages[i].precomp; + + if (stages[i].stage == MESA_SHADER_TESS_EVAL) + tes_precomp = stages[i].precomp; + } + + /* If we already have a shader for each stage, there's nothing to do. */ + if (have_all_shaders) + return VK_SUCCESS; + + struct vk_pipeline_tess_info tess_info = { ._pad = 0 }; + if (tcs_precomp != NULL && tes_precomp != NULL) { + tess_info = tcs_precomp->tess; + vk_pipeline_tess_info_merge(&tess_info, &tes_precomp->tess); + } + + struct mesa_blake3 blake3_ctx; + _mesa_blake3_init(&blake3_ctx); + for (uint32_t i = 0; i < pipeline->set_layout_count; i++) { + if (pipeline->set_layouts[i] != NULL) { + _mesa_blake3_update(&blake3_ctx, pipeline->set_layouts[i]->blake3, + sizeof(pipeline->set_layouts[i]->blake3)); + } + } + if (pipeline_layout != NULL) { + _mesa_blake3_update(&blake3_ctx, &pipeline_layout->push_ranges, + sizeof(pipeline_layout->push_ranges[0]) * + pipeline_layout->push_range_count); + } + blake3_hash layout_blake3; + _mesa_blake3_final(&blake3_ctx, layout_blake3); + + /* Partition the shaders */ + uint32_t part_count; + uint32_t partition[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES + 1] = { 0 }; + if (pipeline->base.flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT) { + partition[1] = stage_count; + part_count = 1; + } else if (ops->link_geom_stages) { + if (stages[0].stage == MESA_SHADER_FRAGMENT) { + assert(stage_count == 1); + partition[1] = stage_count; + part_count = 1; + } else if (stages[stage_count - 1].stage == MESA_SHADER_FRAGMENT) { + /* In this case we have both */ + assert(stage_count > 1); + partition[1] = stage_count - 1; + partition[2] = stage_count; + part_count = 2; + } else { + /* In this case we only have geometry */ + partition[1] = stage_count; + part_count = 1; + } + } else { + /* Otherwise, we're don't want to link anything */ + part_count = stage_count; + for (uint32_t i = 0; i < stage_count; i++) + partition[i + 1] = i + 1; + } + + for (uint32_t p = 0; p < part_count; p++) { + const int64_t part_start = os_time_get_nano(); + + /* Don't try to re-compile any fast-link shaders */ + if (!(pipeline->base.flags & + VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT)) { + assert(partition[p + 1] == partition[p] + 1); + if (stages[partition[p]].shader != NULL) + continue; + } + + struct vk_shader_pipeline_cache_key shader_key = { 0 }; + + _mesa_blake3_init(&blake3_ctx); + + VkShaderStageFlags part_stages = 0; + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + const struct vk_pipeline_stage *stage = &stages[i]; + + part_stages |= mesa_to_vk_shader_stage(stage->stage); + _mesa_blake3_update(&blake3_ctx, stage->precomp->blake3, + sizeof(stage->precomp->blake3)); + + VkShaderCreateFlagsEXT shader_flags = + vk_pipeline_to_shader_flags(pipeline->base.flags, stage->stage); + _mesa_blake3_update(&blake3_ctx, &shader_flags, sizeof(shader_flags)); + } + + blake3_hash state_blake3; + ops->hash_graphics_state(device->physical, state, + part_stages, state_blake3); + + _mesa_blake3_update(&blake3_ctx, state_blake3, sizeof(state_blake3)); + _mesa_blake3_update(&blake3_ctx, layout_blake3, sizeof(layout_blake3)); + + if (part_stages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) + _mesa_blake3_update(&blake3_ctx, &tess_info, sizeof(tess_info)); + + /* The set of geometry stages used together is used to generate the + * nextStage mask as well as VK_SHADER_CREATE_NO_TASK_SHADER_BIT_EXT. + */ + const VkShaderStageFlags geom_stages = + all_stages & ~VK_SHADER_STAGE_FRAGMENT_BIT; + _mesa_blake3_update(&blake3_ctx, &geom_stages, sizeof(geom_stages)); + + _mesa_blake3_final(&blake3_ctx, shader_key.blake3); + + if (cache != NULL) { + /* From the Vulkan 1.3.278 spec: + * + * "VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT + * indicates that a readily usable pipeline or pipeline stage was + * found in the pipelineCache specified by the application in the + * pipeline creation command. + * + * [...] + * + * Note + * + * Implementations are encouraged to provide a meaningful signal + * to applications using this bit. The intention is to communicate + * to the application that the pipeline or pipeline stage was + * created “as fast as it gets” using the pipeline cache provided + * by the application. If an implementation uses an internal + * cache, it is discouraged from setting this bit as the feedback + * would be unactionable." + * + * The cache_hit value returned by vk_pipeline_cache_lookup_object() + * is only set to true when the shader is found in the provided + * pipeline cache. It is left false if we fail to find it in the + * memory cache but find it in the disk cache even though that's + * still a cache hit from the perspective of the compile pipeline. + */ + bool all_shaders_found = true; + bool all_cache_hits = true; + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + struct vk_pipeline_stage *stage = &stages[i]; + + shader_key.stage = stage->stage; + + if (stage->shader) { + /* If we have a shader from some library pipeline and the key + * matches, just use that. + */ + if (memcmp(&stage->shader->pipeline.cache_key, + &shader_key, sizeof(shader_key)) == 0) + continue; + + /* Otherwise, throw it away */ + vk_shader_unref(device, stage->shader); + stage->shader = NULL; + } + + bool cache_hit = false; + struct vk_pipeline_cache_object *cache_obj = + vk_pipeline_cache_lookup_object(cache, &shader_key, + sizeof(shader_key), + &pipeline_shader_cache_ops, + &cache_hit); + if (cache_obj != NULL) { + assert(stage->shader == NULL); + stage->shader = vk_shader_from_cache_obj(cache_obj); + } else { + all_shaders_found = false; + } + + if (cache_obj == NULL && !cache_hit) + all_cache_hits = false; + } + + if (all_cache_hits && cache != device->mem_cache) { + /* The pipeline cache only really helps if we hit for everything + * in the partition. Otherwise, we have to go re-compile it all + * anyway. + */ + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + struct vk_pipeline_stage *stage = &stages[i]; + + stage_feedbacks[stage->stage].flags |= + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; + } + } + + if (all_shaders_found) { + /* Update duration to take cache lookups into account */ + const int64_t part_end = os_time_get_nano(); + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + struct vk_pipeline_stage *stage = &stages[i]; + stage_feedbacks[stage->stage].duration += part_end - part_start; + } + continue; + } + } + + if (pipeline->base.flags & + VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) + return VK_PIPELINE_COMPILE_REQUIRED; + + struct vk_shader_compile_info infos[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES]; + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + struct vk_pipeline_stage *stage = &stages[i]; + + VkShaderCreateFlagsEXT shader_flags = + vk_pipeline_to_shader_flags(pipeline->base.flags, stage->stage); + + if (partition[p + 1] - partition[p] > 1) + shader_flags |= VK_SHADER_CREATE_LINK_STAGE_BIT_EXT; + + if ((part_stages & VK_SHADER_STAGE_MESH_BIT_EXT) && + !(geom_stages & VK_SHADER_STAGE_TASK_BIT_EXT)) + shader_flags = VK_SHADER_CREATE_NO_TASK_SHADER_BIT_EXT; + + VkShaderStageFlags next_stage; + if (stage->stage == MESA_SHADER_FRAGMENT) { + next_stage = 0; + } else if (i + 1 < stage_count) { + /* We hash geom_stages above so this is safe */ + next_stage = mesa_to_vk_shader_stage(stages[i + 1].stage); + } else { + /* We're the last geometry stage */ + next_stage = VK_SHADER_STAGE_FRAGMENT_BIT; + } + + const struct nir_shader_compiler_options *nir_options = + ops->get_nir_options(device->physical, stage->stage, + &stage->precomp->rs); + + nir_shader *nir = + vk_pipeline_precomp_shader_get_nir(stage->precomp, nir_options); + if (nir == NULL) { + for (uint32_t j = partition[p]; j < i; j++) + ralloc_free(infos[i].nir); + + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (stage->stage == MESA_SHADER_TESS_CTRL || + stage->stage == MESA_SHADER_TESS_EVAL) + vk_pipeline_replace_nir_tess_info(nir, &tess_info); + + const VkPushConstantRange *push_range = NULL; + if (pipeline_layout != NULL) { + for (uint32_t r = 0; r < pipeline_layout->push_range_count; r++) { + if (pipeline_layout->push_ranges[r].stageFlags & + mesa_to_vk_shader_stage(stage->stage)) { + assert(push_range == NULL); + push_range = &pipeline_layout->push_ranges[r]; + } + } + } + + infos[i] = (struct vk_shader_compile_info) { + .stage = stage->stage, + .flags = shader_flags, + .next_stage_mask = next_stage, + .nir = nir, + .robustness = &stage->precomp->rs, + .set_layout_count = pipeline->set_layout_count, + .set_layouts = pipeline->set_layouts, + .push_constant_range_count = push_range != NULL, + .push_constant_ranges = push_range != NULL ? push_range : NULL, + }; + } + + /* vk_shader_ops::compile() consumes the NIR regardless of whether or + * not it succeeds and only generates shaders on success. Once this + * returns, we own the shaders but not the NIR in infos. + */ + struct vk_shader *shaders[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES]; + result = ops->compile(device, partition[p + 1] - partition[p], + &infos[partition[p]], + state, + &device->alloc, + &shaders[partition[p]]); + if (result != VK_SUCCESS) + return result; + + const int64_t part_end = os_time_get_nano(); + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + struct vk_pipeline_stage *stage = &stages[i]; + + shader_key.stage = stage->stage; + vk_shader_init_cache_obj(device, shaders[i], &shader_key, + sizeof(shader_key)); + + if (stage->shader == NULL) { + struct vk_pipeline_cache_object *cache_obj = + &shaders[i]->pipeline.cache_obj; + if (cache != NULL) + cache_obj = vk_pipeline_cache_add_object(cache, cache_obj); + + stage->shader = vk_shader_from_cache_obj(cache_obj); + } else { + /* This can fail to happen if only some of the shaders were found + * in the pipeline cache. In this case, we just throw away the + * shader as vk_pipeline_cache_add_object() would throw it away + * for us anyway. + */ + assert(memcmp(&stage->shader->pipeline.cache_key, + &shaders[i]->pipeline.cache_key, + sizeof(shaders[i]->pipeline.cache_key)) == 0); + + vk_shader_unref(device, shaders[i]); + } + + stage_feedbacks[stage->stage].duration += part_end - part_start; + } + } + + return VK_SUCCESS; +} + +static VkResult +vk_graphics_pipeline_get_executable_properties( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t *executable_count, + VkPipelineExecutablePropertiesKHR *properties) +{ + struct vk_graphics_pipeline *gfx_pipeline = + container_of(pipeline, struct vk_graphics_pipeline, base); + VkResult result; + + if (properties == NULL) { + *executable_count = 0; + for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) { + struct vk_shader *shader = gfx_pipeline->stages[i].shader; + + uint32_t shader_exec_count = 0; + result = shader->ops->get_executable_properties(device, shader, + &shader_exec_count, + NULL); + assert(result == VK_SUCCESS); + *executable_count += shader_exec_count; + } + } else { + uint32_t arr_len = *executable_count; + *executable_count = 0; + for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) { + struct vk_shader *shader = gfx_pipeline->stages[i].shader; + + uint32_t shader_exec_count = arr_len - *executable_count; + result = shader->ops->get_executable_properties(device, shader, + &shader_exec_count, + &properties[*executable_count]); + if (result != VK_SUCCESS) + return result; + + *executable_count += shader_exec_count; + } + } + + return VK_SUCCESS; +} + +static inline struct vk_shader * +vk_graphics_pipeline_executable_shader(struct vk_device *device, + struct vk_graphics_pipeline *gfx_pipeline, + uint32_t *executable_index) +{ + for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) { + struct vk_shader *shader = gfx_pipeline->stages[i].shader; + + uint32_t shader_exec_count = 0; + shader->ops->get_executable_properties(device, shader, + &shader_exec_count, NULL); + + if (*executable_index < shader_exec_count) + return shader; + else + *executable_index -= shader_exec_count; + } + + return NULL; +} + +static VkResult +vk_graphics_pipeline_get_executable_statistics( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *statistic_count, + VkPipelineExecutableStatisticKHR *statistics) +{ + struct vk_graphics_pipeline *gfx_pipeline = + container_of(pipeline, struct vk_graphics_pipeline, base); + + struct vk_shader *shader = + vk_graphics_pipeline_executable_shader(device, gfx_pipeline, + &executable_index); + if (shader == NULL) { + *statistic_count = 0; + return VK_SUCCESS; + } + + return shader->ops->get_executable_statistics(device, shader, + executable_index, + statistic_count, + statistics); +} + +static VkResult +vk_graphics_pipeline_get_internal_representations( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *internal_representation_count, + VkPipelineExecutableInternalRepresentationKHR* internal_representations) +{ + struct vk_graphics_pipeline *gfx_pipeline = + container_of(pipeline, struct vk_graphics_pipeline, base); + + struct vk_shader *shader = + vk_graphics_pipeline_executable_shader(device, gfx_pipeline, + &executable_index); + if (shader == NULL) { + *internal_representation_count = 0; + return VK_SUCCESS; + } + + return shader->ops->get_executable_internal_representations( + device, shader, executable_index, + internal_representation_count, internal_representations); +} + +static const struct vk_pipeline_ops vk_graphics_pipeline_ops = { + .destroy = vk_graphics_pipeline_destroy, + .get_executable_statistics = vk_graphics_pipeline_get_executable_statistics, + .get_executable_properties = vk_graphics_pipeline_get_executable_properties, + .get_internal_representations = vk_graphics_pipeline_get_internal_representations, + .cmd_bind = vk_graphics_pipeline_cmd_bind, +}; + +static VkResult +vk_create_graphics_pipeline(struct vk_device *device, + struct vk_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout); + const int64_t pipeline_start = os_time_get_nano(); + VkResult result; + + const VkPipelineCreateFlags2KHR pipeline_flags = + vk_graphics_pipeline_create_flags(pCreateInfo); + + const VkPipelineCreationFeedbackCreateInfo *feedback_info = + vk_find_struct_const(pCreateInfo->pNext, + PIPELINE_CREATION_FEEDBACK_CREATE_INFO); + + const VkPipelineLibraryCreateInfoKHR *libs_info = + vk_find_struct_const(pCreateInfo->pNext, + PIPELINE_LIBRARY_CREATE_INFO_KHR); + + struct vk_graphics_pipeline *pipeline = + vk_pipeline_zalloc(device, &vk_graphics_pipeline_ops, + VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_flags, pAllocator, sizeof(*pipeline)); + if (pipeline == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct vk_pipeline_stage stages[PIPE_SHADER_MESH_TYPES]; + memset(stages, 0, sizeof(stages)); + + VkPipelineCreationFeedback stage_feedbacks[PIPE_SHADER_MESH_TYPES]; + memset(stage_feedbacks, 0, sizeof(stage_feedbacks)); + + struct vk_graphics_pipeline_state state_tmp, *state; + struct vk_graphics_pipeline_all_state all_state_tmp, *all_state; + if (pipeline->base.flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) { + /* For pipeline libraries, the state is stored in the pipeline */ + state = &pipeline->lib.state; + all_state = &pipeline->lib.all_state; + } else { + /* For linked pipelines, we throw the state away at the end of pipeline + * creation and only keep the dynamic state. + */ + memset(&state_tmp, 0, sizeof(state_tmp)); + state = &state_tmp; + all_state = &all_state_tmp; + } + + /* If we have libraries, import them first. */ + if (libs_info) { + for (uint32_t i = 0; i < libs_info->libraryCount; i++) { + VK_FROM_HANDLE(vk_pipeline, lib_pipeline, libs_info->pLibraries[i]); + assert(lib_pipeline->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); + assert(lib_pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR); + struct vk_graphics_pipeline *lib_gfx_pipeline = + container_of(lib_pipeline, struct vk_graphics_pipeline, base); + + vk_graphics_pipeline_state_merge(state, &lib_gfx_pipeline->lib.state); + + pipeline->set_layout_count = MAX2(pipeline->set_layout_count, + lib_gfx_pipeline->set_layout_count); + for (uint32_t i = 0; i < lib_gfx_pipeline->set_layout_count; i++) { + if (lib_gfx_pipeline->set_layouts[i] == NULL) + continue; + + if (pipeline->set_layouts[i] == NULL) { + pipeline->set_layouts[i] = + vk_descriptor_set_layout_ref(lib_gfx_pipeline->set_layouts[i]); + } + } + + for (uint32_t i = 0; i < lib_gfx_pipeline->stage_count; i++) { + const struct vk_pipeline_stage *lib_stage = + &lib_gfx_pipeline->stages[i]; + + /* We shouldn't have duplicated stages in the imported pipeline + * but it's cheap enough to protect against it so we may as well. + */ + assert(lib_stage->stage < ARRAY_SIZE(stages)); + assert(vk_pipeline_stage_is_null(&stages[lib_stage->stage])); + if (!vk_pipeline_stage_is_null(&stages[lib_stage->stage])) + continue; + + stages[lib_stage->stage] = vk_pipeline_stage_clone(lib_stage); + } + } + } + + result = vk_graphics_pipeline_state_fill(device, state, + pCreateInfo, + NULL /* driver_rp */, + 0 /* driver_rp_flags */, + all_state, + NULL, 0, NULL); + if (result != VK_SUCCESS) + goto fail_stages; + + if (!(pipeline->base.flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) { + pipeline->linked.dynamic.vi = &pipeline->linked._dynamic_vi; + pipeline->linked.dynamic.ms.sample_locations = + &pipeline->linked._dynamic_sl; + vk_dynamic_graphics_state_fill(&pipeline->linked.dynamic, &state_tmp); + } + + if (pipeline_layout != NULL) { + pipeline->set_layout_count = MAX2(pipeline->set_layout_count, + pipeline_layout->set_count); + for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { + if (pipeline_layout->set_layouts[i] == NULL) + continue; + + if (pipeline->set_layouts[i] == NULL) { + pipeline->set_layouts[i] = + vk_descriptor_set_layout_ref(pipeline_layout->set_layouts[i]); + } + } + } + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + const VkPipelineShaderStageCreateInfo *stage_info = + &pCreateInfo->pStages[i]; + + const int64_t stage_start = os_time_get_nano(); + + assert(util_bitcount(stage_info->stage) == 1); + if (!(state->shader_stages & stage_info->stage)) + continue; + + gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage); + assert(vk_device_supports_stage(device, stage)); + + stage_feedbacks[stage].flags |= + VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; + + if (!vk_pipeline_stage_is_null(&stages[stage])) + continue; + + struct vk_pipeline_precomp_shader *precomp; + result = vk_pipeline_precompile_shader(device, cache, pipeline_flags, + pCreateInfo->pNext, + stage_info, + &precomp); + if (result != VK_SUCCESS) + goto fail_stages; + + stages[stage] = (struct vk_pipeline_stage) { + .stage = stage, + .precomp = precomp, + }; + + const int64_t stage_end = os_time_get_nano(); + stage_feedbacks[stage].duration += stage_end - stage_start; + } + + /* Compact the array of stages */ + uint32_t stage_count = 0; + for (uint32_t s = 0; s < ARRAY_SIZE(stages); s++) { + assert(s >= stage_count); + if (!vk_pipeline_stage_is_null(&stages[s])) + stages[stage_count++] = stages[s]; + } + for (uint32_t s = stage_count; s < ARRAY_SIZE(stages); s++) + memset(&stages[s], 0, sizeof(stages[s])); + + /* Sort so we always give the driver shaders in order. + * + * This makes everything easier for everyone. This also helps stabilize + * shader keys so that we get a cache hit even if the client gives us + * the stages in a different order. + */ + qsort(stages, stage_count, sizeof(*stages), cmp_vk_pipeline_stages); + + result = vk_graphics_pipeline_compile_shaders(device, cache, pipeline, + pipeline_layout, state, + stage_count, stages, + stage_feedbacks); + if (result != VK_SUCCESS) + goto fail_stages; + + /* Throw away precompiled shaders unless the client explicitly asks us to + * keep them. + */ + if (!(pipeline_flags & + VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT)) { + for (uint32_t i = 0; i < stage_count; i++) { + if (stages[i].precomp != NULL) { + vk_pipeline_precomp_shader_unref(device, stages[i].precomp); + stages[i].precomp = NULL; + } + } + } + + pipeline->stage_count = stage_count; + for (uint32_t i = 0; i < stage_count; i++) + pipeline->stages[i] = stages[i]; + + const int64_t pipeline_end = os_time_get_nano(); + if (feedback_info != NULL) { + VkPipelineCreationFeedback pipeline_feedback = { + .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, + .duration = pipeline_end - pipeline_start, + }; + + /* From the Vulkan 1.3.275 spec: + * + * "An implementation should set the + * VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT + * bit if it was able to avoid the large majority of pipeline or + * pipeline stage creation work by using the pipelineCache parameter" + * + * We really shouldn't set this bit unless all the shaders hit the + * cache. + */ + uint32_t cache_hit_count = 0; + for (uint32_t i = 0; i < stage_count; i++) { + const gl_shader_stage stage = stages[i].stage; + if (stage_feedbacks[stage].flags & + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT) + cache_hit_count++; + } + if (cache_hit_count > 0 && cache_hit_count == stage_count) { + pipeline_feedback.flags |= + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; + } + + *feedback_info->pPipelineCreationFeedback = pipeline_feedback; + + /* VUID-VkGraphicsPipelineCreateInfo-pipelineStageCreationFeedbackCount-06594 */ + assert(feedback_info->pipelineStageCreationFeedbackCount == 0 || + feedback_info->pipelineStageCreationFeedbackCount == + pCreateInfo->stageCount); + for (uint32_t i = 0; + i < feedback_info->pipelineStageCreationFeedbackCount; i++) { + const gl_shader_stage stage = + vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage); + + feedback_info->pPipelineStageCreationFeedbacks[i] = + stage_feedbacks[stage]; + } + } + + *pPipeline = vk_pipeline_to_handle(&pipeline->base); + + return VK_SUCCESS; + +fail_stages: + for (uint32_t i = 0; i < ARRAY_SIZE(stages); i++) + vk_pipeline_stage_finish(device, &stages[i]); + + vk_graphics_pipeline_destroy(device, &pipeline->base, pAllocator); + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateGraphicsPipelines(VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkGraphicsPipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache); + VkResult first_error_or_success = VK_SUCCESS; + + /* Use implicit pipeline cache if there's no cache set */ + if (!cache && device->mem_cache) + cache = device->mem_cache; + + /* From the Vulkan 1.3.274 spec: + * + * "When attempting to create many pipelines in a single command, it is + * possible that creation may fail for a subset of them. In this case, + * the corresponding elements of pPipelines will be set to + * VK_NULL_HANDLE. + */ + memset(pPipelines, 0, createInfoCount * sizeof(*pPipelines)); + + unsigned i = 0; + for (; i < createInfoCount; i++) { + VkResult result = vk_create_graphics_pipeline(device, cache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + if (result == VK_SUCCESS) + continue; + + if (first_error_or_success == VK_SUCCESS) + first_error_or_success = result; + + /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it + * is not obvious what error should be report upon 2 different failures. + */ + if (result != VK_PIPELINE_COMPILE_REQUIRED) + return result; + + const VkPipelineCreateFlags2KHR flags = + vk_graphics_pipeline_create_flags(&pCreateInfos[i]); + if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR) + return result; + } + + return first_error_or_success; +} + +struct vk_compute_pipeline { + struct vk_pipeline base; + struct vk_shader *shader; +}; + +static void +vk_compute_pipeline_destroy(struct vk_device *device, + struct vk_pipeline *pipeline, + const VkAllocationCallbacks *pAllocator) +{ + struct vk_compute_pipeline *comp_pipeline = + container_of(pipeline, struct vk_compute_pipeline, base); + + vk_shader_unref(device, comp_pipeline->shader); + vk_pipeline_free(device, pAllocator, pipeline); +} + +static void +vk_compute_pipeline_cmd_bind(struct vk_command_buffer *cmd_buffer, + struct vk_pipeline *pipeline) +{ + struct vk_device *device = cmd_buffer->base.device; + const struct vk_device_shader_ops *ops = device->shader_ops; + + struct vk_shader *shader = NULL; + if (pipeline != NULL) { + assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE); + struct vk_compute_pipeline *comp_pipeline = + container_of(pipeline, struct vk_compute_pipeline, base); + + shader = comp_pipeline->shader; + + cmd_buffer->pipeline_shader_stages |= VK_SHADER_STAGE_COMPUTE_BIT; + } else { + cmd_buffer->pipeline_shader_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + gl_shader_stage stage = MESA_SHADER_COMPUTE; + ops->cmd_bind_shaders(cmd_buffer, 1, &stage, &shader); +} + +static VkResult +vk_pipeline_compile_compute_stage(struct vk_device *device, + struct vk_pipeline_cache *cache, + struct vk_compute_pipeline *pipeline, + struct vk_pipeline_layout *pipeline_layout, + struct vk_pipeline_stage *stage, + bool *cache_hit) +{ + const struct vk_device_shader_ops *ops = device->shader_ops; + VkResult result; + + const VkPushConstantRange *push_range = NULL; + if (pipeline_layout != NULL) { + for (uint32_t r = 0; r < pipeline_layout->push_range_count; r++) { + if (pipeline_layout->push_ranges[r].stageFlags & + VK_SHADER_STAGE_COMPUTE_BIT) { + assert(push_range == NULL); + push_range = &pipeline_layout->push_ranges[r]; + } + } + } + + VkShaderCreateFlagsEXT shader_flags = + vk_pipeline_to_shader_flags(pipeline->base.flags, MESA_SHADER_COMPUTE); + + struct mesa_blake3 blake3_ctx; + _mesa_blake3_init(&blake3_ctx); + + _mesa_blake3_update(&blake3_ctx, stage->precomp->blake3, + sizeof(stage->precomp->blake3)); + + _mesa_blake3_update(&blake3_ctx, &shader_flags, sizeof(shader_flags)); + + for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { + if (pipeline_layout->set_layouts[i] != NULL) { + _mesa_blake3_update(&blake3_ctx, + pipeline_layout->set_layouts[i]->blake3, + sizeof(pipeline_layout->set_layouts[i]->blake3)); + } + } + if (push_range != NULL) + _mesa_blake3_update(&blake3_ctx, push_range, sizeof(*push_range)); + + struct vk_shader_pipeline_cache_key shader_key = { + .stage = MESA_SHADER_COMPUTE, + }; + _mesa_blake3_final(&blake3_ctx, shader_key.blake3); + + if (cache != NULL) { + struct vk_pipeline_cache_object *cache_obj = + vk_pipeline_cache_lookup_object(cache, &shader_key, + sizeof(shader_key), + &pipeline_shader_cache_ops, + cache_hit); + if (cache_obj != NULL) { + stage->shader = vk_shader_from_cache_obj(cache_obj); + return VK_SUCCESS; + } + } + + if (pipeline->base.flags & + VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) + return VK_PIPELINE_COMPILE_REQUIRED; + + const struct nir_shader_compiler_options *nir_options = + ops->get_nir_options(device->physical, stage->stage, + &stage->precomp->rs); + + nir_shader *nir = vk_pipeline_precomp_shader_get_nir(stage->precomp, + nir_options); + if (nir == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* vk_device_shader_ops::compile() consumes the NIR regardless of whether + * or not it succeeds and only generates shaders on success. Once compile() + * returns, we own the shaders but not the NIR in infos. + */ + struct vk_shader_compile_info compile_info = { + .stage = stage->stage, + .flags = shader_flags, + .next_stage_mask = 0, + .nir = nir, + .robustness = &stage->precomp->rs, + .set_layout_count = pipeline_layout->set_count, + .set_layouts = pipeline_layout->set_layouts, + .push_constant_range_count = push_range != NULL, + .push_constant_ranges = push_range != NULL ? push_range : NULL, + }; + + struct vk_shader *shader; + result = ops->compile(device, 1, &compile_info, NULL, + &device->alloc, &shader); + if (result != VK_SUCCESS) + return result; + + vk_shader_init_cache_obj(device, shader, &shader_key, sizeof(shader_key)); + + struct vk_pipeline_cache_object *cache_obj = &shader->pipeline.cache_obj; + if (cache != NULL) + cache_obj = vk_pipeline_cache_add_object(cache, cache_obj); + + stage->shader = vk_shader_from_cache_obj(cache_obj); + + return VK_SUCCESS; +} + +static VkResult +vk_compute_pipeline_get_executable_properties( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t *executable_count, + VkPipelineExecutablePropertiesKHR *properties) +{ + struct vk_compute_pipeline *comp_pipeline = + container_of(pipeline, struct vk_compute_pipeline, base); + struct vk_shader *shader = comp_pipeline->shader; + + return shader->ops->get_executable_properties(device, shader, + executable_count, + properties); +} + +static VkResult +vk_compute_pipeline_get_executable_statistics( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *statistic_count, + VkPipelineExecutableStatisticKHR *statistics) +{ + struct vk_compute_pipeline *comp_pipeline = + container_of(pipeline, struct vk_compute_pipeline, base); + struct vk_shader *shader = comp_pipeline->shader; + + return shader->ops->get_executable_statistics(device, shader, + executable_index, + statistic_count, + statistics); +} + +static VkResult +vk_compute_pipeline_get_internal_representations( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *internal_representation_count, + VkPipelineExecutableInternalRepresentationKHR* internal_representations) +{ + struct vk_compute_pipeline *comp_pipeline = + container_of(pipeline, struct vk_compute_pipeline, base); + struct vk_shader *shader = comp_pipeline->shader; + + return shader->ops->get_executable_internal_representations( + device, shader, executable_index, + internal_representation_count, internal_representations); +} + +static const struct vk_pipeline_ops vk_compute_pipeline_ops = { + .destroy = vk_compute_pipeline_destroy, + .get_executable_statistics = vk_compute_pipeline_get_executable_statistics, + .get_executable_properties = vk_compute_pipeline_get_executable_properties, + .get_internal_representations = vk_compute_pipeline_get_internal_representations, + .cmd_bind = vk_compute_pipeline_cmd_bind, +}; + +static VkResult +vk_create_compute_pipeline(struct vk_device *device, + struct vk_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout); + int64_t pipeline_start = os_time_get_nano(); + VkResult result; + + const VkPipelineCreateFlags2KHR pipeline_flags = + vk_compute_pipeline_create_flags(pCreateInfo); + + const VkPipelineCreationFeedbackCreateInfo *feedback_info = + vk_find_struct_const(pCreateInfo->pNext, + PIPELINE_CREATION_FEEDBACK_CREATE_INFO); + + struct vk_compute_pipeline *pipeline = + vk_pipeline_zalloc(device, &vk_compute_pipeline_ops, + VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_flags, pAllocator, sizeof(*pipeline)); + if (pipeline == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct vk_pipeline_stage stage = { + .stage = MESA_SHADER_COMPUTE, + }; + result = vk_pipeline_precompile_shader(device, cache, pipeline_flags, + pCreateInfo->pNext, + &pCreateInfo->stage, + &stage.precomp); + if (result != VK_SUCCESS) + goto fail_pipeline; + + bool cache_hit; + result = vk_pipeline_compile_compute_stage(device, cache, pipeline, + pipeline_layout, &stage, + &cache_hit); + if (result != VK_SUCCESS) + goto fail_stage; + + if (stage.precomp != NULL) + vk_pipeline_precomp_shader_unref(device, stage.precomp); + pipeline->shader = stage.shader; + + const int64_t pipeline_end = os_time_get_nano(); + if (feedback_info != NULL) { + VkPipelineCreationFeedback pipeline_feedback = { + .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, + .duration = pipeline_end - pipeline_start, + }; + if (cache_hit && cache != device->mem_cache) { + pipeline_feedback.flags |= + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; + } + + *feedback_info->pPipelineCreationFeedback = pipeline_feedback; + if (feedback_info->pipelineStageCreationFeedbackCount > 0) { + feedback_info->pPipelineStageCreationFeedbacks[0] = + pipeline_feedback; + } + } + + *pPipeline = vk_pipeline_to_handle(&pipeline->base); + + return VK_SUCCESS; + +fail_stage: + vk_pipeline_stage_finish(device, &stage); +fail_pipeline: + vk_pipeline_free(device, pAllocator, &pipeline->base); + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateComputePipelines(VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkComputePipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache); + VkResult first_error_or_success = VK_SUCCESS; + + /* Use implicit pipeline cache if there's no cache set */ + if (!cache && device->mem_cache) + cache = device->mem_cache; + + /* From the Vulkan 1.3.274 spec: + * + * "When attempting to create many pipelines in a single command, it is + * possible that creation may fail for a subset of them. In this case, + * the corresponding elements of pPipelines will be set to + * VK_NULL_HANDLE. + */ + memset(pPipelines, 0, createInfoCount * sizeof(*pPipelines)); + + unsigned i = 0; + for (; i < createInfoCount; i++) { + VkResult result = vk_create_compute_pipeline(device, cache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + if (result == VK_SUCCESS) + continue; + + if (first_error_or_success == VK_SUCCESS) + first_error_or_success = result; + + /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it + * is not obvious what error should be report upon 2 different failures. + */ + if (result != VK_PIPELINE_COMPILE_REQUIRED) + return result; + + const VkPipelineCreateFlags2KHR flags = + vk_compute_pipeline_create_flags(&pCreateInfos[i]); + if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR) + return result; + } + + return first_error_or_success; +} + +void +vk_cmd_unbind_pipelines_for_stages(struct vk_command_buffer *cmd_buffer, + VkShaderStageFlags stages) +{ + stages &= cmd_buffer->pipeline_shader_stages; + + if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT) + vk_graphics_pipeline_cmd_bind(cmd_buffer, NULL); + + if (stages & VK_SHADER_STAGE_COMPUTE_BIT) + vk_compute_pipeline_cmd_bind(cmd_buffer, NULL); +} diff --git a/src/vulkan/runtime/vk_pipeline.h b/src/vulkan/runtime/vk_pipeline.h new file mode 100644 index 00000000000..ed05d567a8f --- /dev/null +++ b/src/vulkan/runtime/vk_pipeline.h @@ -0,0 +1,210 @@ +/* + * Copyright © 2022 Collabora, LTD + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_PIPELINE_H +#define VK_PIPELINE_H + +#include "vk_object.h" +#include "vk_util.h" + +#include <stdbool.h> + +struct nir_shader; +struct nir_shader_compiler_options; +struct spirv_to_nir_options; +struct vk_command_buffer; +struct vk_device; + +#ifdef __cplusplus +extern "C" { +#endif + +#define VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA \ + (VkStructureType)1000290001 + +#define VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA_cast \ + VkPipelineShaderStageNirCreateInfoMESA + +typedef struct VkPipelineShaderStageNirCreateInfoMESA { + VkStructureType sType; + const void *pNext; + struct nir_shader *nir; +} VkPipelineShaderStageNirCreateInfoMESA; + +bool +vk_pipeline_shader_stage_is_null(const VkPipelineShaderStageCreateInfo *info); + +VkResult +vk_pipeline_shader_stage_to_nir(struct vk_device *device, + const VkPipelineShaderStageCreateInfo *info, + const struct spirv_to_nir_options *spirv_options, + const struct nir_shader_compiler_options *nir_options, + void *mem_ctx, struct nir_shader **nir_out); + +enum gl_subgroup_size +vk_get_subgroup_size(uint32_t spirv_version, + gl_shader_stage stage, + const void *info_pNext, + bool allow_varying, + bool require_full); + +struct vk_pipeline_robustness_state { + VkPipelineRobustnessBufferBehaviorEXT storage_buffers; + VkPipelineRobustnessBufferBehaviorEXT uniform_buffers; + VkPipelineRobustnessBufferBehaviorEXT vertex_inputs; + VkPipelineRobustnessImageBehaviorEXT images; +}; + +/** Hash VkPipelineShaderStageCreateInfo info + * + * Returns the hash of a VkPipelineShaderStageCreateInfo: + * SHA1(info->module->sha1, + * info->pName, + * vk_stage_to_mesa_stage(info->stage), + * info->pSpecializationInfo) + * + * Can only be used if VkPipelineShaderStageCreateInfo::module is a + * vk_shader_module object. + */ +void +vk_pipeline_hash_shader_stage(const VkPipelineShaderStageCreateInfo *info, + const struct vk_pipeline_robustness_state *rstate, + unsigned char *stage_sha1); + +void +vk_pipeline_robustness_state_fill(const struct vk_device *device, + struct vk_pipeline_robustness_state *rs, + const void *pipeline_pNext, + const void *shader_stage_pNext); + +static inline VkPipelineCreateFlags2KHR +vk_compute_pipeline_create_flags(const VkComputePipelineCreateInfo *info) +{ + const VkPipelineCreateFlags2CreateInfoKHR *flags2 = + vk_find_struct_const(info->pNext, + PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR); + if (flags2) + return flags2->flags; + else + return info->flags; +} + +static inline VkPipelineCreateFlags2KHR +vk_graphics_pipeline_create_flags(const VkGraphicsPipelineCreateInfo *info) +{ + const VkPipelineCreateFlags2CreateInfoKHR *flags2 = + vk_find_struct_const(info->pNext, + PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR); + if (flags2) + return flags2->flags; + else + return info->flags; +} + +static inline VkPipelineCreateFlags2KHR +vk_rt_pipeline_create_flags(const VkRayTracingPipelineCreateInfoKHR *info) +{ + const VkPipelineCreateFlags2CreateInfoKHR *flags2 = + vk_find_struct_const(info->pNext, + PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR); + if (flags2) + return flags2->flags; + else + return info->flags; +} + +#ifdef VK_ENABLE_BETA_EXTENSIONS +static inline VkPipelineCreateFlags2KHR +vk_graph_pipeline_create_flags(const VkExecutionGraphPipelineCreateInfoAMDX *info) +{ + const VkPipelineCreateFlags2CreateInfoKHR *flags2 = + vk_find_struct_const(info->pNext, + PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR); + if (flags2) + return flags2->flags; + else + return info->flags; +} +#endif + +struct vk_pipeline_ops; + +struct vk_pipeline { + struct vk_object_base base; + + const struct vk_pipeline_ops *ops; + + VkPipelineBindPoint bind_point; + VkPipelineCreateFlags2KHR flags; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_pipeline, base, VkPipeline, + VK_OBJECT_TYPE_PIPELINE); + +struct vk_pipeline_ops { + void (*destroy)(struct vk_device *device, + struct vk_pipeline *pipeline, + const VkAllocationCallbacks *pAllocator); + + VkResult (*get_executable_properties)(struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t *executable_count, + VkPipelineExecutablePropertiesKHR *properties); + + VkResult (*get_executable_statistics)(struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *statistic_count, + VkPipelineExecutableStatisticKHR *statistics); + + VkResult (*get_internal_representations)( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *internal_representation_count, + VkPipelineExecutableInternalRepresentationKHR* internal_representations); + + void (*cmd_bind)(struct vk_command_buffer *cmd_buffer, + struct vk_pipeline *pipeline); +}; + +void *vk_pipeline_zalloc(struct vk_device *device, + const struct vk_pipeline_ops *ops, + VkPipelineBindPoint bind_point, + VkPipelineCreateFlags2KHR flags, + const VkAllocationCallbacks *alloc, + size_t size); + +void vk_pipeline_free(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_pipeline *pipeline); + +void +vk_cmd_unbind_pipelines_for_stages(struct vk_command_buffer *cmd_buffer, + VkShaderStageFlags stages); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_PIPELINE_H */ diff --git a/src/vulkan/runtime/vk_pipeline_cache.c b/src/vulkan/runtime/vk_pipeline_cache.c new file mode 100644 index 00000000000..71471dd0239 --- /dev/null +++ b/src/vulkan/runtime/vk_pipeline_cache.c @@ -0,0 +1,852 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_pipeline_cache.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_log.h" +#include "vk_physical_device.h" + +#include "compiler/nir/nir_serialize.h" + +#include "util/blob.h" +#include "util/u_debug.h" +#include "util/disk_cache.h" +#include "util/hash_table.h" +#include "util/set.h" + +#define vk_pipeline_cache_log(cache, ...) \ + if (cache->base.client_visible) \ + vk_logw(VK_LOG_OBJS(cache), __VA_ARGS__) + +static bool +vk_raw_data_cache_object_serialize(struct vk_pipeline_cache_object *object, + struct blob *blob) +{ + struct vk_raw_data_cache_object *data_obj = + container_of(object, struct vk_raw_data_cache_object, base); + + blob_write_bytes(blob, data_obj->data, data_obj->data_size); + + return true; +} + +static struct vk_pipeline_cache_object * +vk_raw_data_cache_object_deserialize(struct vk_pipeline_cache *cache, + const void *key_data, + size_t key_size, + struct blob_reader *blob) +{ + /* We consume the entire blob_reader. Each call to ops->deserialize() + * happens with a brand new blob reader for error checking anyway so we + * can assume the blob consumes the entire reader and we don't need to + * serialize the data size separately. + */ + assert(blob->current < blob->end); + size_t data_size = blob->end - blob->current; + const void *data = blob_read_bytes(blob, data_size); + + struct vk_raw_data_cache_object *data_obj = + vk_raw_data_cache_object_create(cache->base.device, key_data, key_size, + data, data_size); + + return data_obj ? &data_obj->base : NULL; +} + +static void +vk_raw_data_cache_object_destroy(struct vk_device *device, + struct vk_pipeline_cache_object *object) +{ + struct vk_raw_data_cache_object *data_obj = + container_of(object, struct vk_raw_data_cache_object, base); + + vk_free(&device->alloc, data_obj); +} + +const struct vk_pipeline_cache_object_ops vk_raw_data_cache_object_ops = { + .serialize = vk_raw_data_cache_object_serialize, + .deserialize = vk_raw_data_cache_object_deserialize, + .destroy = vk_raw_data_cache_object_destroy, +}; + +struct vk_raw_data_cache_object * +vk_raw_data_cache_object_create(struct vk_device *device, + const void *key_data, size_t key_size, + const void *data, size_t data_size) +{ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct vk_raw_data_cache_object, data_obj, 1); + VK_MULTIALLOC_DECL_SIZE(&ma, char, obj_key_data, key_size); + VK_MULTIALLOC_DECL_SIZE(&ma, char, obj_data, data_size); + + if (!vk_multialloc_alloc(&ma, &device->alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) + return NULL; + + vk_pipeline_cache_object_init(device, &data_obj->base, + &vk_raw_data_cache_object_ops, + obj_key_data, key_size); + data_obj->data = obj_data; + data_obj->data_size = data_size; + + memcpy(obj_key_data, key_data, key_size); + memcpy(obj_data, data, data_size); + + return data_obj; +} + +static bool +object_keys_equal(const void *void_a, const void *void_b) +{ + const struct vk_pipeline_cache_object *a = void_a, *b = void_b; + if (a->key_size != b->key_size) + return false; + + return memcmp(a->key_data, b->key_data, a->key_size) == 0; +} + +static uint32_t +object_key_hash(const void *void_object) +{ + const struct vk_pipeline_cache_object *object = void_object; + return _mesa_hash_data(object->key_data, object->key_size); +} + +static void +vk_pipeline_cache_lock(struct vk_pipeline_cache *cache) +{ + + if (!(cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT)) + simple_mtx_lock(&cache->lock); +} + +static void +vk_pipeline_cache_unlock(struct vk_pipeline_cache *cache) +{ + if (!(cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT)) + simple_mtx_unlock(&cache->lock); +} + +/* cache->lock must be held when calling */ +static void +vk_pipeline_cache_remove_object(struct vk_pipeline_cache *cache, + uint32_t hash, + struct vk_pipeline_cache_object *object) +{ + struct set_entry *entry = + _mesa_set_search_pre_hashed(cache->object_cache, hash, object); + if (entry && entry->key == (const void *)object) { + /* Drop the reference owned by the cache */ + if (!cache->weak_ref) + vk_pipeline_cache_object_unref(cache->base.device, object); + + _mesa_set_remove(cache->object_cache, entry); + } +} + +static inline struct vk_pipeline_cache_object * +vk_pipeline_cache_object_weak_ref(struct vk_pipeline_cache *cache, + struct vk_pipeline_cache_object *object) +{ + assert(!object->weak_owner); + p_atomic_set(&object->weak_owner, cache); + return object; +} + +void +vk_pipeline_cache_object_unref(struct vk_device *device, struct vk_pipeline_cache_object *object) +{ + assert(object && p_atomic_read(&object->ref_cnt) >= 1); + + struct vk_pipeline_cache *weak_owner = p_atomic_read(&object->weak_owner); + if (!weak_owner) { + if (p_atomic_dec_zero(&object->ref_cnt)) + object->ops->destroy(device, object); + } else { + vk_pipeline_cache_lock(weak_owner); + bool destroy = p_atomic_dec_zero(&object->ref_cnt); + if (destroy) { + uint32_t hash = object_key_hash(object); + vk_pipeline_cache_remove_object(weak_owner, hash, object); + } + vk_pipeline_cache_unlock(weak_owner); + if (destroy) + object->ops->destroy(device, object); + } +} + +static bool +vk_pipeline_cache_object_serialize(struct vk_pipeline_cache *cache, + struct vk_pipeline_cache_object *object, + struct blob *blob, uint32_t *data_size) +{ + if (object->ops->serialize == NULL) + return false; + + assert(blob->size == align64(blob->size, VK_PIPELINE_CACHE_BLOB_ALIGN)); + size_t start = blob->size; + + /* Special case for if we're writing to a NULL blob (just to get the size) + * and we already know the data size of the allocation. This should make + * the first GetPipelineCacheData() call to get the data size faster in the + * common case where a bunch of our objects were loaded from a previous + * cache or where we've already serialized the cache once. + */ + if (blob->data == NULL && blob->fixed_allocation) { + *data_size = p_atomic_read(&object->data_size); + if (*data_size > 0) { + blob_write_bytes(blob, NULL, *data_size); + return true; + } + } + + if (!object->ops->serialize(object, blob)) { + vk_pipeline_cache_log(cache, "Failed to serialize pipeline cache object"); + return false; + } + + size_t size = blob->size - start; + if (size > UINT32_MAX) { + vk_pipeline_cache_log(cache, "Skipping giant (4 GiB or larger) object"); + return false; + } + + if (blob->out_of_memory) { + vk_pipeline_cache_log(cache, + "Insufficient memory for pipeline cache data"); + return false; + } + + *data_size = (uint32_t)size; + p_atomic_set(&object->data_size, *data_size); + + return true; +} + +static struct vk_pipeline_cache_object * +vk_pipeline_cache_object_deserialize(struct vk_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *data, size_t data_size, + const struct vk_pipeline_cache_object_ops *ops) +{ + if (ops == NULL) + ops = &vk_raw_data_cache_object_ops; + + if (unlikely(ops->deserialize == NULL)) { + vk_pipeline_cache_log(cache, + "Pipeline cache object cannot be deserialized"); + return NULL; + } + + struct blob_reader reader; + blob_reader_init(&reader, data, data_size); + + struct vk_pipeline_cache_object *object = + ops->deserialize(cache, key_data, key_size, &reader); + + if (object == NULL) + return NULL; + + assert(reader.current == reader.end && !reader.overrun); + assert(object->ops == ops); + assert(object->ref_cnt == 1); + assert(object->key_size == key_size); + assert(memcmp(object->key_data, key_data, key_size) == 0); + + return object; +} + +static struct vk_pipeline_cache_object * +vk_pipeline_cache_insert_object(struct vk_pipeline_cache *cache, + struct vk_pipeline_cache_object *object) +{ + assert(object->ops != NULL); + + if (cache->object_cache == NULL) + return object; + + uint32_t hash = object_key_hash(object); + + vk_pipeline_cache_lock(cache); + bool found = false; + struct set_entry *entry = _mesa_set_search_or_add_pre_hashed( + cache->object_cache, hash, object, &found); + + struct vk_pipeline_cache_object *result = NULL; + /* add reference to either the found or inserted object */ + if (found) { + struct vk_pipeline_cache_object *found_object = (void *)entry->key; + if (found_object->ops != object->ops) { + /* The found object in the cache isn't fully formed. Replace it. */ + assert(!cache->weak_ref); + assert(found_object->ops == &vk_raw_data_cache_object_ops); + assert(object->ref_cnt == 1); + entry->key = object; + object = found_object; + } + + result = vk_pipeline_cache_object_ref((void *)entry->key); + } else { + result = object; + if (!cache->weak_ref) + vk_pipeline_cache_object_ref(result); + else + vk_pipeline_cache_object_weak_ref(cache, result); + } + vk_pipeline_cache_unlock(cache); + + if (found) { + vk_pipeline_cache_object_unref(cache->base.device, object); + } + return result; +} + +struct vk_pipeline_cache_object * +vk_pipeline_cache_lookup_object(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size, + const struct vk_pipeline_cache_object_ops *ops, + bool *cache_hit) +{ + assert(key_size <= UINT32_MAX); + assert(ops != NULL); + + if (cache_hit != NULL) + *cache_hit = false; + + struct vk_pipeline_cache_object key = { + .key_data = key_data, + .key_size = key_size, + }; + uint32_t hash = object_key_hash(&key); + + struct vk_pipeline_cache_object *object = NULL; + + if (cache != NULL && cache->object_cache != NULL) { + vk_pipeline_cache_lock(cache); + struct set_entry *entry = + _mesa_set_search_pre_hashed(cache->object_cache, hash, &key); + if (entry) { + object = vk_pipeline_cache_object_ref((void *)entry->key); + if (cache_hit != NULL) + *cache_hit = true; + } + vk_pipeline_cache_unlock(cache); + } + + if (object == NULL) { + struct disk_cache *disk_cache = cache->base.device->physical->disk_cache; + if (!cache->skip_disk_cache && disk_cache && cache->object_cache) { + cache_key cache_key; + disk_cache_compute_key(disk_cache, key_data, key_size, cache_key); + + size_t data_size; + uint8_t *data = disk_cache_get(disk_cache, cache_key, &data_size); + if (data) { + object = vk_pipeline_cache_object_deserialize(cache, + key_data, key_size, + data, data_size, + ops); + free(data); + if (object != NULL) { + return vk_pipeline_cache_insert_object(cache, object); + } + } + } + + /* No disk cache or not found in the disk cache */ + return NULL; + } + + if (object->ops == &vk_raw_data_cache_object_ops && + ops != &vk_raw_data_cache_object_ops) { + /* The object isn't fully formed yet and we need to deserialize it into + * a real object before it can be used. + */ + struct vk_raw_data_cache_object *data_obj = + container_of(object, struct vk_raw_data_cache_object, base); + + struct vk_pipeline_cache_object *real_object = + vk_pipeline_cache_object_deserialize(cache, + data_obj->base.key_data, + data_obj->base.key_size, + data_obj->data, + data_obj->data_size, ops); + if (real_object == NULL) { + vk_pipeline_cache_log(cache, + "Deserializing pipeline cache object failed"); + + vk_pipeline_cache_lock(cache); + vk_pipeline_cache_remove_object(cache, hash, object); + vk_pipeline_cache_unlock(cache); + vk_pipeline_cache_object_unref(cache->base.device, object); + return NULL; + } + + vk_pipeline_cache_object_unref(cache->base.device, object); + object = vk_pipeline_cache_insert_object(cache, real_object); + } + + assert(object->ops == ops); + + return object; +} + +struct vk_pipeline_cache_object * +vk_pipeline_cache_add_object(struct vk_pipeline_cache *cache, + struct vk_pipeline_cache_object *object) +{ + struct vk_pipeline_cache_object *inserted = + vk_pipeline_cache_insert_object(cache, object); + + if (object == inserted) { + /* If it wasn't in the object cache, it might not be in the disk cache + * either. Better try and add it. + */ + + struct disk_cache *disk_cache = cache->base.device->physical->disk_cache; + if (!cache->skip_disk_cache && object->ops->serialize && disk_cache) { + struct blob blob; + blob_init(&blob); + + if (object->ops->serialize(object, &blob) && !blob.out_of_memory) { + cache_key cache_key; + disk_cache_compute_key(disk_cache, object->key_data, + object->key_size, cache_key); + + disk_cache_put(disk_cache, cache_key, blob.data, blob.size, NULL); + } + + blob_finish(&blob); + } + } + + return inserted; +} + +struct vk_pipeline_cache_object * +vk_pipeline_cache_create_and_insert_object(struct vk_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *data, size_t data_size, + const struct vk_pipeline_cache_object_ops *ops) +{ + struct disk_cache *disk_cache = cache->base.device->physical->disk_cache; + if (!cache->skip_disk_cache && disk_cache) { + cache_key cache_key; + disk_cache_compute_key(disk_cache, key_data, key_size, cache_key); + disk_cache_put(disk_cache, cache_key, data, data_size, NULL); + } + + struct vk_pipeline_cache_object *object = + vk_pipeline_cache_object_deserialize(cache, key_data, key_size, data, + data_size, ops); + + if (object) + object = vk_pipeline_cache_insert_object(cache, object); + + return object; +} + +nir_shader * +vk_pipeline_cache_lookup_nir(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size, + const struct nir_shader_compiler_options *nir_options, + bool *cache_hit, void *mem_ctx) +{ + struct vk_pipeline_cache_object *object = + vk_pipeline_cache_lookup_object(cache, key_data, key_size, + &vk_raw_data_cache_object_ops, + cache_hit); + if (object == NULL) + return NULL; + + struct vk_raw_data_cache_object *data_obj = + container_of(object, struct vk_raw_data_cache_object, base); + + struct blob_reader blob; + blob_reader_init(&blob, data_obj->data, data_obj->data_size); + + nir_shader *nir = nir_deserialize(mem_ctx, nir_options, &blob); + vk_pipeline_cache_object_unref(cache->base.device, object); + + if (blob.overrun) { + ralloc_free(nir); + return NULL; + } + + return nir; +} + +void +vk_pipeline_cache_add_nir(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size, + const nir_shader *nir) +{ + struct blob blob; + blob_init(&blob); + + nir_serialize(&blob, nir, false); + if (blob.out_of_memory) { + vk_pipeline_cache_log(cache, "Ran out of memory serializing NIR shader"); + blob_finish(&blob); + return; + } + + struct vk_raw_data_cache_object *data_obj = + vk_raw_data_cache_object_create(cache->base.device, + key_data, key_size, + blob.data, blob.size); + blob_finish(&blob); + + struct vk_pipeline_cache_object *cached = + vk_pipeline_cache_add_object(cache, &data_obj->base); + vk_pipeline_cache_object_unref(cache->base.device, cached); +} + +static int32_t +find_type_for_ops(const struct vk_physical_device *pdevice, + const struct vk_pipeline_cache_object_ops *ops) +{ + const struct vk_pipeline_cache_object_ops *const *import_ops = + pdevice->pipeline_cache_import_ops; + + if (import_ops == NULL) + return -1; + + for (int32_t i = 0; import_ops[i]; i++) { + if (import_ops[i] == ops) + return i; + } + + return -1; +} + +static const struct vk_pipeline_cache_object_ops * +find_ops_for_type(const struct vk_physical_device *pdevice, + int32_t type) +{ + const struct vk_pipeline_cache_object_ops *const *import_ops = + pdevice->pipeline_cache_import_ops; + + if (import_ops == NULL || type < 0) + return NULL; + + return import_ops[type]; +} + +static void +vk_pipeline_cache_load(struct vk_pipeline_cache *cache, + const void *data, size_t size) +{ + struct blob_reader blob; + blob_reader_init(&blob, data, size); + + struct vk_pipeline_cache_header header; + blob_copy_bytes(&blob, &header, sizeof(header)); + uint32_t count = blob_read_uint32(&blob); + if (blob.overrun) + return; + + if (memcmp(&header, &cache->header, sizeof(header)) != 0) + return; + + for (uint32_t i = 0; i < count; i++) { + int32_t type = blob_read_uint32(&blob); + uint32_t key_size = blob_read_uint32(&blob); + uint32_t data_size = blob_read_uint32(&blob); + const void *key_data = blob_read_bytes(&blob, key_size); + blob_reader_align(&blob, VK_PIPELINE_CACHE_BLOB_ALIGN); + const void *data = blob_read_bytes(&blob, data_size); + if (blob.overrun) + break; + + const struct vk_pipeline_cache_object_ops *ops = + find_ops_for_type(cache->base.device->physical, type); + + struct vk_pipeline_cache_object *object = + vk_pipeline_cache_create_and_insert_object(cache, key_data, key_size, + data, data_size, ops); + + if (object == NULL) { + vk_pipeline_cache_log(cache, "Failed to load pipeline cache object"); + continue; + } + + vk_pipeline_cache_object_unref(cache->base.device, object); + } +} + +struct vk_pipeline_cache * +vk_pipeline_cache_create(struct vk_device *device, + const struct vk_pipeline_cache_create_info *info, + const VkAllocationCallbacks *pAllocator) +{ + static const struct VkPipelineCacheCreateInfo default_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, + }; + struct vk_pipeline_cache *cache; + + const struct VkPipelineCacheCreateInfo *pCreateInfo = + info->pCreateInfo != NULL ? info->pCreateInfo : &default_create_info; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); + + cache = vk_object_zalloc(device, pAllocator, sizeof(*cache), + VK_OBJECT_TYPE_PIPELINE_CACHE); + if (cache == NULL) + return NULL; + + cache->flags = pCreateInfo->flags; + cache->weak_ref = info->weak_ref; +#ifndef ENABLE_SHADER_CACHE + cache->skip_disk_cache = true; +#else + cache->skip_disk_cache = info->skip_disk_cache; +#endif + + struct VkPhysicalDeviceProperties pdevice_props; + device->physical->dispatch_table.GetPhysicalDeviceProperties( + vk_physical_device_to_handle(device->physical), &pdevice_props); + + cache->header = (struct vk_pipeline_cache_header) { + .header_size = sizeof(struct vk_pipeline_cache_header), + .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, + .vendor_id = pdevice_props.vendorID, + .device_id = pdevice_props.deviceID, + }; + memcpy(cache->header.uuid, pdevice_props.pipelineCacheUUID, VK_UUID_SIZE); + + simple_mtx_init(&cache->lock, mtx_plain); + + if (info->force_enable || + debug_get_bool_option("VK_ENABLE_PIPELINE_CACHE", true)) { + cache->object_cache = _mesa_set_create(NULL, object_key_hash, + object_keys_equal); + } + + if (cache->object_cache && pCreateInfo->initialDataSize > 0) { + vk_pipeline_cache_load(cache, pCreateInfo->pInitialData, + pCreateInfo->initialDataSize); + } + + return cache; +} + +void +vk_pipeline_cache_destroy(struct vk_pipeline_cache *cache, + const VkAllocationCallbacks *pAllocator) +{ + if (cache->object_cache) { + if (!cache->weak_ref) { + set_foreach(cache->object_cache, entry) { + vk_pipeline_cache_object_unref(cache->base.device, (void *)entry->key); + } + } else { + assert(cache->object_cache->entries == 0); + } + _mesa_set_destroy(cache->object_cache, NULL); + } + simple_mtx_destroy(&cache->lock); + vk_object_free(cache->base.device, pAllocator, cache); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreatePipelineCache(VkDevice _device, + const VkPipelineCacheCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineCache *pPipelineCache) +{ + VK_FROM_HANDLE(vk_device, device, _device); + struct vk_pipeline_cache *cache; + + struct vk_pipeline_cache_create_info info = { + .pCreateInfo = pCreateInfo, + }; + cache = vk_pipeline_cache_create(device, &info, pAllocator); + if (cache == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + *pPipelineCache = vk_pipeline_cache_to_handle(cache); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyPipelineCache(VkDevice device, + VkPipelineCache pipelineCache, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache); + + if (cache == NULL) + return; + + assert(cache->base.device == vk_device_from_handle(device)); + vk_pipeline_cache_destroy(cache, pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetPipelineCacheData(VkDevice _device, + VkPipelineCache pipelineCache, + size_t *pDataSize, + void *pData) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache); + + struct blob blob; + if (pData) { + blob_init_fixed(&blob, pData, *pDataSize); + } else { + blob_init_fixed(&blob, NULL, SIZE_MAX); + } + + blob_write_bytes(&blob, &cache->header, sizeof(cache->header)); + + uint32_t count = 0; + intptr_t count_offset = blob_reserve_uint32(&blob); + if (count_offset < 0) { + *pDataSize = 0; + blob_finish(&blob); + return VK_INCOMPLETE; + } + + vk_pipeline_cache_lock(cache); + + VkResult result = VK_SUCCESS; + if (cache->object_cache != NULL) { + set_foreach(cache->object_cache, entry) { + struct vk_pipeline_cache_object *object = (void *)entry->key; + + if (object->ops->serialize == NULL) + continue; + + size_t blob_size_save = blob.size; + + int32_t type = find_type_for_ops(device->physical, object->ops); + blob_write_uint32(&blob, type); + blob_write_uint32(&blob, object->key_size); + intptr_t data_size_resv = blob_reserve_uint32(&blob); + blob_write_bytes(&blob, object->key_data, object->key_size); + + if (!blob_align(&blob, VK_PIPELINE_CACHE_BLOB_ALIGN)) { + result = VK_INCOMPLETE; + break; + } + + uint32_t data_size; + if (!vk_pipeline_cache_object_serialize(cache, object, + &blob, &data_size)) { + blob.size = blob_size_save; + if (blob.out_of_memory) { + result = VK_INCOMPLETE; + break; + } + + /* Failed for some other reason; keep going */ + continue; + } + + /* vk_pipeline_cache_object_serialize should have failed */ + assert(!blob.out_of_memory); + + assert(data_size_resv >= 0); + blob_overwrite_uint32(&blob, data_size_resv, data_size); + + count++; + } + } + + vk_pipeline_cache_unlock(cache); + + blob_overwrite_uint32(&blob, count_offset, count); + + *pDataSize = blob.size; + + blob_finish(&blob); + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_MergePipelineCaches(VkDevice _device, + VkPipelineCache dstCache, + uint32_t srcCacheCount, + const VkPipelineCache *pSrcCaches) +{ + VK_FROM_HANDLE(vk_pipeline_cache, dst, dstCache); + VK_FROM_HANDLE(vk_device, device, _device); + assert(dst->base.device == device); + assert(!dst->weak_ref); + + if (!dst->object_cache) + return VK_SUCCESS; + + vk_pipeline_cache_lock(dst); + + for (uint32_t i = 0; i < srcCacheCount; i++) { + VK_FROM_HANDLE(vk_pipeline_cache, src, pSrcCaches[i]); + assert(src->base.device == device); + + if (!src->object_cache) + continue; + + assert(src != dst); + if (src == dst) + continue; + + vk_pipeline_cache_lock(src); + + set_foreach(src->object_cache, src_entry) { + struct vk_pipeline_cache_object *src_object = (void *)src_entry->key; + + bool found_in_dst = false; + struct set_entry *dst_entry = + _mesa_set_search_or_add_pre_hashed(dst->object_cache, + src_entry->hash, + src_object, &found_in_dst); + if (found_in_dst) { + struct vk_pipeline_cache_object *dst_object = (void *)dst_entry->key; + if (dst_object->ops == &vk_raw_data_cache_object_ops && + src_object->ops != &vk_raw_data_cache_object_ops) { + /* Even though dst has the object, it only has the blob version + * which isn't as useful. Replace it with the real object. + */ + vk_pipeline_cache_object_unref(device, dst_object); + dst_entry->key = vk_pipeline_cache_object_ref(src_object); + } + } else { + /* We inserted src_object in dst so it needs a reference */ + assert(dst_entry->key == (const void *)src_object); + vk_pipeline_cache_object_ref(src_object); + } + } + + vk_pipeline_cache_unlock(src); + } + + vk_pipeline_cache_unlock(dst); + + return VK_SUCCESS; +} diff --git a/src/vulkan/runtime/vk_pipeline_cache.h b/src/vulkan/runtime/vk_pipeline_cache.h new file mode 100644 index 00000000000..993bbabb0be --- /dev/null +++ b/src/vulkan/runtime/vk_pipeline_cache.h @@ -0,0 +1,314 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_PIPELINE_CACHE_H +#define VK_PIPELINE_CACHE_H + +#include "vk_object.h" +#include "vk_util.h" + +#include "util/simple_mtx.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* #include "util/blob.h" */ +struct blob; +struct blob_reader; + +/* #include "util/set.h" */ +struct set; + +/* #include "compiler/nir/nir.h" */ +struct nir_shader; +struct nir_shader_compiler_options; + +struct vk_pipeline_cache; +struct vk_pipeline_cache_object; + +#define VK_PIPELINE_CACHE_BLOB_ALIGN 8 + +struct vk_pipeline_cache_object_ops { + /** Writes this cache object to the given blob + * + * Because the cache works with both raw blob data and driver object data + * and can't always tell the difference between the two, we have to be very + * careful about alignments when [de]serializing. When serialize() is + * called, the blob will be aligned to VK_PIPELINE_CACHE_BLOB_ALIGN. The + * driver must be careful to not [de]serialize any data types which require + * a higher alignment. When deserialize() is called, the blob_reader is + * also guaranteed to be aligned to VK_PIPELINE_CACHE_BLOB_ALIGN. + * + * Returns true on success + * + * This function is optional. Objects without [de]serialization support + * will still be cached in memory but will not be placed in the disk cache + * and will not exported to the client when vkGetPipelineCacheData() is + * called. + */ + bool (*serialize)(struct vk_pipeline_cache_object *object, + struct blob *blob); + + /** Constructs an object from cached data + * + * See serialize() for details about data alignment. + * + * returns the created object + * + * This function is optional. + */ + struct vk_pipeline_cache_object *(*deserialize)(struct vk_pipeline_cache *cache, + const void *key_data, + size_t key_size, + struct blob_reader *blob); + + /** Destroys the object + * + * Called when vk_pipeline_cache_object.ref_cnt hits 0. + */ + void (*destroy)(struct vk_device *device, + struct vk_pipeline_cache_object *object); +}; + +/** Base struct for cached objects + * + * A vk_pipeline_cache stores any number of vk_pipeline_cache_object's, each + * of which has an associated key of arbitrary size. Cached objects are + * reference counted so that they can exist in multiple caches (for example, + * when vkMergePipelineCaches() is called) and so that they can persist after + * the pipeline cache is destroyed. Each object also has a pointer to a + * vk_pipeline_cache_object_ops table which the pipeline cache uses to + * [de]serialize the object and clean it up when the reference count hits 0. + * + * The rest of the details of any given object are entirely up to the driver. + * The driver may even have multiple types of objects (distinguished by their + * vk_pipeline_cache_object_ops table) in the cache so long as it guarantees + * it never has two objects of different types with the same key. + */ +struct vk_pipeline_cache_object { + const struct vk_pipeline_cache_object_ops *ops; + struct vk_pipeline_cache *weak_owner; + uint32_t ref_cnt; + + uint32_t data_size; + const void *key_data; + uint32_t key_size; +}; + +static inline void +vk_pipeline_cache_object_init(struct vk_device *device, + struct vk_pipeline_cache_object *object, + const struct vk_pipeline_cache_object_ops *ops, + const void *key_data, uint32_t key_size) +{ + memset(object, 0, sizeof(*object)); + object->ops = ops; + p_atomic_set(&object->ref_cnt, 1); + object->data_size = 0; /* Unknown */ + object->key_data = key_data; + object->key_size = key_size; +} + +static inline void +vk_pipeline_cache_object_finish(struct vk_pipeline_cache_object *object) +{ + assert(p_atomic_read(&object->ref_cnt) <= 1); +} + +static inline struct vk_pipeline_cache_object * +vk_pipeline_cache_object_ref(struct vk_pipeline_cache_object *object) +{ + assert(object && p_atomic_read(&object->ref_cnt) >= 1); + p_atomic_inc(&object->ref_cnt); + return object; +} + +void +vk_pipeline_cache_object_unref(struct vk_device *device, + struct vk_pipeline_cache_object *object); + +/** A generic implementation of VkPipelineCache */ +struct vk_pipeline_cache { + struct vk_object_base base; + + /* pCreateInfo::flags */ + VkPipelineCacheCreateFlags flags; + bool weak_ref; + bool skip_disk_cache; + + struct vk_pipeline_cache_header header; + + /** Protects object_cache */ + simple_mtx_t lock; + + struct set *object_cache; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_pipeline_cache, base, VkPipelineCache, + VK_OBJECT_TYPE_PIPELINE_CACHE) + +struct vk_pipeline_cache_create_info { + /* The pCreateInfo for this pipeline cache, if any. + * + * For driver-internal caches, this is allowed to be NULL. + */ + const VkPipelineCacheCreateInfo *pCreateInfo; + + /** If true, ignore VK_ENABLE_PIPELINE_CACHE and enable anyway */ + bool force_enable; + + /** If true, the cache operates in weak reference mode. + * + * The weak reference mode is designed for device-global caches for the + * purpose of de-duplicating identical shaders and pipelines. In the weak + * reference mode, an object's reference count is not incremented when it is + * added to the cache. Therefore the object will be destroyed as soon as + * there's no external references to it, and the runtime will perform the + * necessary bookkeeping to remove the dead reference from this cache's table. + * + * As the weak reference mode is designed for driver-internal use, it has + * several limitations: + * - Merging against a weak reference mode cache is not supported. + * - Lazy deserialization from vk_raw_data_cache_object_ops is not supported. + * - An object can only belong to up to one weak reference mode cache. + * - The cache must outlive the object, as the object will try to access its + * owner when it's destroyed. + */ + bool weak_ref; + + /** If true, do not attempt to use the disk cache */ + bool skip_disk_cache; +}; + +struct vk_pipeline_cache * +vk_pipeline_cache_create(struct vk_device *device, + const struct vk_pipeline_cache_create_info *info, + const VkAllocationCallbacks *pAllocator); +void +vk_pipeline_cache_destroy(struct vk_pipeline_cache *cache, + const VkAllocationCallbacks *pAllocator); + +/** Attempts to look up an object in the cache by key + * + * If an object is found in the cache matching the given key, *cache_hit is + * set to true and a reference to that object is returned. + * + * If the driver sets vk_device.disk_cache, we attempt to look up any missing + * objects in the disk cache before declaring failure. If an object is found + * in the disk cache but not the in-memory cache, *cache_hit is set to false. + * + * The deserialization of pipeline cache objects found in the cache data + * provided via VkPipelineCacheCreateInfo::pInitialData happens during + * vk_pipeline_cache_lookup() rather than during vkCreatePipelineCache(). + * Prior to the first vk_pipeline_cache_lookup() of a given object, it is + * stored as an internal raw data object with the same hash. This allows us + * to avoid any complex object type tagging in the serialized cache. It does, + * however, mean that drivers need to be careful to ensure that objects with + * different types (ops) have different keys. + * + * Returns a reference to the object, if found + */ +struct vk_pipeline_cache_object * MUST_CHECK +vk_pipeline_cache_lookup_object(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size, + const struct vk_pipeline_cache_object_ops *ops, + bool *cache_hit); + +/** Adds an object to the pipeline cache + * + * This function adds the given object to the pipeline cache. We do not + * specify a key here because the key is part of the object. See also + * vk_pipeline_cache_object_init(). + * + * This function consumes a reference to the object and returns a reference to + * the (possibly different) object in the cache. The intended usage pattern + * is as follows: + * + * key = compute_key(); + * struct vk_pipeline_cache_object *object = + * vk_pipeline_cache_lookup_object(cache, &key, sizeof(key), + * &driver_type_ops, &cache_hit); + * if (object != NULL) + * return container_of(object, driver_type, base); + * + * object = do_compile(); + * assert(object != NULL); + * + * object = vk_pipeline_cache_add_object(cache, object); + * return container_of(object, driver_type, base); + */ +struct vk_pipeline_cache_object * MUST_CHECK +vk_pipeline_cache_add_object(struct vk_pipeline_cache *cache, + struct vk_pipeline_cache_object *object); + +/** Creates and inserts an object into the pipeline cache + * + * This function takes serialized data and emplaces the deserialized object + * into the pipeline cache. It is the responsibility of the caller to + * specify a deserialize() function that properly initializes the object. + * + * This function can be used to avoid an extra serialize() step for + * disk-cache insertion. For the intended usage pattern, see + * vk_pipeline_cache_add_object(). + * + */ +struct vk_pipeline_cache_object * +vk_pipeline_cache_create_and_insert_object(struct vk_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *data, size_t data_size, + const struct vk_pipeline_cache_object_ops *ops); + +struct nir_shader * +vk_pipeline_cache_lookup_nir(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size, + const struct nir_shader_compiler_options *nir_options, + bool *cache_hit, void *mem_ctx); +void +vk_pipeline_cache_add_nir(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size, + const struct nir_shader *nir); + +/** Specialized type of vk_pipeline_cache_object for raw data objects. + * + * This cache object implementation, together with vk_raw_data_cache_object_ops, + * can be used to cache plain objects as well as already serialized data. + */ +struct vk_raw_data_cache_object { + struct vk_pipeline_cache_object base; + + const void *data; + size_t data_size; +}; + +struct vk_raw_data_cache_object * +vk_raw_data_cache_object_create(struct vk_device *device, + const void *key_data, size_t key_size, + const void *data, size_t data_size); + +extern const struct vk_pipeline_cache_object_ops vk_raw_data_cache_object_ops; + +#ifdef __cplusplus +} +#endif + +#endif /* VK_PIPELINE_CACHE_H */ diff --git a/src/vulkan/runtime/vk_pipeline_layout.c b/src/vulkan/runtime/vk_pipeline_layout.c new file mode 100644 index 00000000000..77653464835 --- /dev/null +++ b/src/vulkan/runtime/vk_pipeline_layout.c @@ -0,0 +1,144 @@ +/* + * Copyright © 2022 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_pipeline_layout.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_descriptor_set_layout.h" +#include "vk_device.h" +#include "vk_log.h" + +#include "util/mesa-sha1.h" + +static void +vk_pipeline_layout_init(struct vk_device *device, + struct vk_pipeline_layout *layout, + const VkPipelineLayoutCreateInfo *pCreateInfo) +{ + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + assert(pCreateInfo->setLayoutCount <= MESA_VK_MAX_DESCRIPTOR_SETS); + + vk_object_base_init(device, &layout->base, VK_OBJECT_TYPE_PIPELINE_LAYOUT); + + layout->ref_cnt = 1; + layout->create_flags = pCreateInfo->flags; + layout->set_count = pCreateInfo->setLayoutCount; + layout->destroy = vk_pipeline_layout_destroy; + + for (uint32_t s = 0; s < pCreateInfo->setLayoutCount; s++) { + VK_FROM_HANDLE(vk_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[s]); + + if (set_layout != NULL) + layout->set_layouts[s] = vk_descriptor_set_layout_ref(set_layout); + else + layout->set_layouts[s] = NULL; + } + + assert(pCreateInfo->pushConstantRangeCount < + MESA_VK_MAX_PUSH_CONSTANT_RANGES); + layout->push_range_count = pCreateInfo->pushConstantRangeCount; + for (uint32_t r = 0; r < pCreateInfo->pushConstantRangeCount; r++) + layout->push_ranges[r] = pCreateInfo->pPushConstantRanges[r]; +} + +void * +vk_pipeline_layout_zalloc(struct vk_device *device, size_t size, + const VkPipelineLayoutCreateInfo *pCreateInfo) +{ + /* Because we're reference counting and lifetimes may not be what the + * client expects, these have to be allocated off the device and not as + * their own object. + */ + struct vk_pipeline_layout *layout = + vk_zalloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (layout == NULL) + return NULL; + + vk_pipeline_layout_init(device, layout, pCreateInfo); + return layout; +} + +void * +vk_pipeline_layout_multizalloc(struct vk_device *device, + struct vk_multialloc *ma, + const VkPipelineLayoutCreateInfo *pCreateInfo) +{ + struct vk_pipeline_layout *layout = + vk_multialloc_zalloc(ma, &device->alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (layout == NULL) + return NULL; + + vk_pipeline_layout_init(device, layout, pCreateInfo); + return layout; +} + + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreatePipelineLayout(VkDevice _device, + const VkPipelineLayoutCreateInfo *pCreateInfo, + UNUSED const VkAllocationCallbacks *pAllocator, + VkPipelineLayout *pPipelineLayout) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + struct vk_pipeline_layout *layout = + vk_pipeline_layout_zalloc(device, sizeof(struct vk_pipeline_layout), + pCreateInfo); + if (layout == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + *pPipelineLayout = vk_pipeline_layout_to_handle(layout); + + return VK_SUCCESS; +} + +void +vk_pipeline_layout_destroy(struct vk_device *device, + struct vk_pipeline_layout *layout) +{ + assert(layout && layout->ref_cnt == 0); + + for (uint32_t s = 0; s < layout->set_count; s++) { + if (layout->set_layouts[s] != NULL) + vk_descriptor_set_layout_unref(device, layout->set_layouts[s]); + } + + vk_object_free(device, NULL, layout); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyPipelineLayout(VkDevice _device, + VkPipelineLayout pipelineLayout, + UNUSED const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline_layout, layout, pipelineLayout); + + if (layout == NULL) + return; + + vk_pipeline_layout_unref(device, layout); +} diff --git a/src/vulkan/runtime/vk_pipeline_layout.h b/src/vulkan/runtime/vk_pipeline_layout.h new file mode 100644 index 00000000000..f71110c20a5 --- /dev/null +++ b/src/vulkan/runtime/vk_pipeline_layout.h @@ -0,0 +1,118 @@ +/* + * Copyright © 2022 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_PIPELINE_LAYOUT_H +#define VK_PIPELINE_LAYOUT_H + +#include "vk_limits.h" +#include "vk_object.h" + +#include "util/u_atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_descriptor_set_layout; + +struct vk_pipeline_layout { + struct vk_object_base base; + + /** Reference count + * + * It's often necessary to store a pointer to the descriptor set layout in + * the descriptor so that any entrypoint which has access to a descriptor + * set also has the layout. While layouts are often passed into various + * entrypoints, they're notably missing from vkUpdateDescriptorSets(). In + * order to implement descriptor writes, you either need to stash a pointer + * to the descriptor set layout in the descriptor set or you need to copy + * all of the relevant information. Storing a pointer is a lot cheaper. + * + * Because descriptor set layout lifetimes and descriptor set lifetimes are + * not guaranteed to coincide, we have to reference count if we're going to + * do this. + */ + uint32_t ref_cnt; + + /** VkPipelineLayoutCreateInfo::flags */ + VkPipelineLayoutCreateFlagBits create_flags; + + /** Number of descriptor set layouts in this pipeline layout */ + uint32_t set_count; + + /** Array of pointers to descriptor set layouts, indexed by set index */ + struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS]; + + /** Number of push constant ranges in this pipeline layout */ + uint32_t push_range_count; + + /** Array of push constant ranges */ + VkPushConstantRange push_ranges[MESA_VK_MAX_PUSH_CONSTANT_RANGES]; + + /** Destroy callback + * + * Will be initially set to vk_pipeline_layout_destroy() but may be set to + * a driver-specific callback which does driver-specific clean-up and then + * calls vk_pipeline_layout_destroy(). + */ + void (*destroy)(struct vk_device *device, + struct vk_pipeline_layout *layout); +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_pipeline_layout, base, VkPipelineLayout, + VK_OBJECT_TYPE_PIPELINE_LAYOUT); + +void * +vk_pipeline_layout_zalloc(struct vk_device *device, size_t size, + const VkPipelineLayoutCreateInfo *pCreateInfo); + +void * +vk_pipeline_layout_multizalloc(struct vk_device *device, + struct vk_multialloc *ma, + const VkPipelineLayoutCreateInfo *pCreateInfo); + +void vk_pipeline_layout_destroy(struct vk_device *device, + struct vk_pipeline_layout *layout); + +static inline struct vk_pipeline_layout * +vk_pipeline_layout_ref(struct vk_pipeline_layout *layout) +{ + assert(layout && layout->ref_cnt >= 1); + p_atomic_inc(&layout->ref_cnt); + return layout; +} + +static inline void +vk_pipeline_layout_unref(struct vk_device *device, + struct vk_pipeline_layout *layout) +{ + assert(layout && layout->ref_cnt >= 1); + if (p_atomic_dec_zero(&layout->ref_cnt)) + layout->destroy(device, layout); +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_PIPELINE_LAYOUT_H */ + diff --git a/src/vulkan/runtime/vk_query_pool.c b/src/vulkan/runtime/vk_query_pool.c new file mode 100644 index 00000000000..59294f414f3 --- /dev/null +++ b/src/vulkan/runtime/vk_query_pool.c @@ -0,0 +1,101 @@ +/* + * Copyright © 2022 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_query_pool.h" + +#include "vk_alloc.h" +#include "vk_command_buffer.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" + +void +vk_query_pool_init(struct vk_device *device, + struct vk_query_pool *query_pool, + const VkQueryPoolCreateInfo *pCreateInfo) +{ + vk_object_base_init(device, &query_pool->base, VK_OBJECT_TYPE_QUERY_POOL); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + + query_pool->query_type = pCreateInfo->queryType; + query_pool->query_count = pCreateInfo->queryCount; + query_pool->pipeline_statistics = + pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS ? + pCreateInfo->pipelineStatistics : 0; +} + +void * +vk_query_pool_create(struct vk_device *device, + const VkQueryPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size) +{ + struct vk_query_pool *query_pool = + vk_zalloc2(&device->alloc, alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (query_pool == NULL) + return NULL; + + vk_query_pool_init(device, query_pool, pCreateInfo); + + return query_pool; +} + +void +vk_query_pool_finish(struct vk_query_pool *query_pool) +{ + vk_object_base_finish(&query_pool->base); +} + +void +vk_query_pool_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_query_pool *query_pool) +{ + vk_object_free(device, alloc, query_pool); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdBeginQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdBeginQueryIndexedEXT(commandBuffer, queryPool, query, flags, 0); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdEndQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdEndQueryIndexedEXT(commandBuffer, queryPool, query, 0); +} diff --git a/src/vulkan/runtime/vk_query_pool.h b/src/vulkan/runtime/vk_query_pool.h new file mode 100644 index 00000000000..ee04eee9ff6 --- /dev/null +++ b/src/vulkan/runtime/vk_query_pool.h @@ -0,0 +1,64 @@ +/* + * Copyright © 2022 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_QUERY_POOL_H +#define VK_QUERY_POOL_H + +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_query_pool { + struct vk_object_base base; + + /** VkQueryPoolCreateInfo::queryType */ + VkQueryType query_type; + + /** VkQueryPoolCreateInfo::queryCount */ + uint32_t query_count; + + /** VkQueryPoolCreateInfo::pipelineStatistics + * + * If query_type != VK_QUERY_TYPE_PIPELINE_STATISTICS, this will be zero. + */ + VkQueryPipelineStatisticFlags pipeline_statistics; +}; + +void vk_query_pool_init(struct vk_device *device, + struct vk_query_pool *query_pool, + const VkQueryPoolCreateInfo *pCreateInfo); +void vk_query_pool_finish(struct vk_query_pool *query_pool); +void *vk_query_pool_create(struct vk_device *device, + const VkQueryPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size); +void vk_query_pool_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_query_pool *query_pool); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_QUERY_POOL_H */ diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c new file mode 100644 index 00000000000..c8b55b58b0a --- /dev/null +++ b/src/vulkan/runtime/vk_queue.c @@ -0,0 +1,1339 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_queue.h" + +#include "util/perf/cpu_trace.h" +#include "util/u_debug.h" +#include <inttypes.h> + +#include "vk_alloc.h" +#include "vk_command_buffer.h" +#include "vk_command_pool.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_fence.h" +#include "vk_log.h" +#include "vk_physical_device.h" +#include "vk_semaphore.h" +#include "vk_sync.h" +#include "vk_sync_binary.h" +#include "vk_sync_dummy.h" +#include "vk_sync_timeline.h" +#include "vk_util.h" + +#include "vulkan/wsi/wsi_common.h" + +static VkResult +vk_queue_start_submit_thread(struct vk_queue *queue); + +VkResult +vk_queue_init(struct vk_queue *queue, struct vk_device *device, + const VkDeviceQueueCreateInfo *pCreateInfo, + uint32_t index_in_family) +{ + VkResult result = VK_SUCCESS; + int ret; + + memset(queue, 0, sizeof(*queue)); + vk_object_base_init(device, &queue->base, VK_OBJECT_TYPE_QUEUE); + + list_addtail(&queue->link, &device->queues); + + queue->flags = pCreateInfo->flags; + queue->queue_family_index = pCreateInfo->queueFamilyIndex; + + assert(index_in_family < pCreateInfo->queueCount); + queue->index_in_family = index_in_family; + + queue->submit.mode = device->submit_mode; + if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND) + queue->submit.mode = VK_QUEUE_SUBMIT_MODE_IMMEDIATE; + + list_inithead(&queue->submit.submits); + + ret = mtx_init(&queue->submit.mutex, mtx_plain); + if (ret == thrd_error) { + result = vk_errorf(queue, VK_ERROR_UNKNOWN, "mtx_init failed"); + goto fail_mutex; + } + + ret = cnd_init(&queue->submit.push); + if (ret == thrd_error) { + result = vk_errorf(queue, VK_ERROR_UNKNOWN, "cnd_init failed"); + goto fail_push; + } + + ret = cnd_init(&queue->submit.pop); + if (ret == thrd_error) { + result = vk_errorf(queue, VK_ERROR_UNKNOWN, "cnd_init failed"); + goto fail_pop; + } + + if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) { + result = vk_queue_start_submit_thread(queue); + if (result != VK_SUCCESS) + goto fail_thread; + } + + util_dynarray_init(&queue->labels, NULL); + queue->region_begin = true; + + return VK_SUCCESS; + +fail_thread: + cnd_destroy(&queue->submit.pop); +fail_pop: + cnd_destroy(&queue->submit.push); +fail_push: + mtx_destroy(&queue->submit.mutex); +fail_mutex: + return result; +} + +VkResult +_vk_queue_set_lost(struct vk_queue *queue, + const char *file, int line, + const char *msg, ...) +{ + if (queue->_lost.lost) + return VK_ERROR_DEVICE_LOST; + + queue->_lost.lost = true; + queue->_lost.error_file = file; + queue->_lost.error_line = line; + + va_list ap; + va_start(ap, msg); + vsnprintf(queue->_lost.error_msg, sizeof(queue->_lost.error_msg), msg, ap); + va_end(ap); + + p_atomic_inc(&queue->base.device->_lost.lost); + + if (debug_get_bool_option("MESA_VK_ABORT_ON_DEVICE_LOSS", false)) { + _vk_device_report_lost(queue->base.device); + abort(); + } + + return VK_ERROR_DEVICE_LOST; +} + +static struct vk_queue_submit * +vk_queue_submit_alloc(struct vk_queue *queue, + uint32_t wait_count, + uint32_t command_buffer_count, + uint32_t buffer_bind_count, + uint32_t image_opaque_bind_count, + uint32_t image_bind_count, + uint32_t bind_entry_count, + uint32_t image_bind_entry_count, + uint32_t signal_count, + VkSparseMemoryBind **bind_entries, + VkSparseImageMemoryBind **image_bind_entries) +{ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct vk_queue_submit, submit, 1); + VK_MULTIALLOC_DECL(&ma, struct vk_sync_wait, waits, wait_count); + VK_MULTIALLOC_DECL(&ma, struct vk_command_buffer *, command_buffers, + command_buffer_count); + VK_MULTIALLOC_DECL(&ma, VkSparseBufferMemoryBindInfo, buffer_binds, + buffer_bind_count); + VK_MULTIALLOC_DECL(&ma, VkSparseImageOpaqueMemoryBindInfo, + image_opaque_binds, image_opaque_bind_count); + VK_MULTIALLOC_DECL(&ma, VkSparseImageMemoryBindInfo, image_binds, + image_bind_count); + VK_MULTIALLOC_DECL(&ma, VkSparseMemoryBind, + bind_entries_local, bind_entry_count); + VK_MULTIALLOC_DECL(&ma, VkSparseImageMemoryBind, image_bind_entries_local, + image_bind_entry_count); + VK_MULTIALLOC_DECL(&ma, struct vk_sync_signal, signals, signal_count); + VK_MULTIALLOC_DECL(&ma, struct vk_sync *, wait_temps, wait_count); + + struct vk_sync_timeline_point **wait_points = NULL, **signal_points = NULL; + if (queue->base.device->timeline_mode == VK_DEVICE_TIMELINE_MODE_EMULATED) { + vk_multialloc_add(&ma, &wait_points, + struct vk_sync_timeline_point *, wait_count); + vk_multialloc_add(&ma, &signal_points, + struct vk_sync_timeline_point *, signal_count); + } + + if (!vk_multialloc_zalloc(&ma, &queue->base.device->alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) + return NULL; + + submit->wait_count = wait_count; + submit->command_buffer_count = command_buffer_count; + submit->signal_count = signal_count; + submit->buffer_bind_count = buffer_bind_count; + submit->image_opaque_bind_count = image_opaque_bind_count; + submit->image_bind_count = image_bind_count; + + submit->waits = waits; + submit->command_buffers = command_buffers; + submit->signals = signals; + submit->buffer_binds = buffer_binds; + submit->image_opaque_binds = image_opaque_binds; + submit->image_binds = image_binds; + submit->_wait_temps = wait_temps; + submit->_wait_points = wait_points; + submit->_signal_points = signal_points; + + if (bind_entries) + *bind_entries = bind_entries_local; + + if (image_bind_entries) + *image_bind_entries = image_bind_entries_local; + + return submit; +} + +static void +vk_queue_submit_cleanup(struct vk_queue *queue, + struct vk_queue_submit *submit) +{ + for (uint32_t i = 0; i < submit->wait_count; i++) { + if (submit->_wait_temps[i] != NULL) + vk_sync_destroy(queue->base.device, submit->_wait_temps[i]); + } + + if (submit->_mem_signal_temp != NULL) + vk_sync_destroy(queue->base.device, submit->_mem_signal_temp); + + if (submit->_wait_points != NULL) { + for (uint32_t i = 0; i < submit->wait_count; i++) { + if (unlikely(submit->_wait_points[i] != NULL)) { + vk_sync_timeline_point_release(queue->base.device, + submit->_wait_points[i]); + } + } + } + + if (submit->_signal_points != NULL) { + for (uint32_t i = 0; i < submit->signal_count; i++) { + if (unlikely(submit->_signal_points[i] != NULL)) { + vk_sync_timeline_point_free(queue->base.device, + submit->_signal_points[i]); + } + } + } +} + +static void +vk_queue_submit_free(struct vk_queue *queue, + struct vk_queue_submit *submit) +{ + vk_free(&queue->base.device->alloc, submit); +} + +static void +vk_queue_submit_destroy(struct vk_queue *queue, + struct vk_queue_submit *submit) +{ + vk_queue_submit_cleanup(queue, submit); + vk_queue_submit_free(queue, submit); +} + +static void +vk_queue_push_submit(struct vk_queue *queue, + struct vk_queue_submit *submit) +{ + mtx_lock(&queue->submit.mutex); + list_addtail(&submit->link, &queue->submit.submits); + cnd_signal(&queue->submit.push); + mtx_unlock(&queue->submit.mutex); +} + +static VkResult +vk_queue_drain(struct vk_queue *queue) +{ + VkResult result = VK_SUCCESS; + + mtx_lock(&queue->submit.mutex); + while (!list_is_empty(&queue->submit.submits)) { + if (vk_device_is_lost(queue->base.device)) { + result = VK_ERROR_DEVICE_LOST; + break; + } + + int ret = cnd_wait(&queue->submit.pop, &queue->submit.mutex); + if (ret == thrd_error) { + result = vk_queue_set_lost(queue, "cnd_wait failed"); + break; + } + } + mtx_unlock(&queue->submit.mutex); + + return result; +} + +static VkResult +vk_queue_submit_final(struct vk_queue *queue, + struct vk_queue_submit *submit) +{ + VkResult result; + + /* Now that we know all our time points exist, fetch the time point syncs + * from any vk_sync_timelines. While we're here, also compact down the + * list of waits to get rid of any trivial timeline waits. + */ + uint32_t wait_count = 0; + for (uint32_t i = 0; i < submit->wait_count; i++) { + /* A timeline wait on 0 is always a no-op */ + if ((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) && + submit->waits[i].wait_value == 0) + continue; + + /* Waits on dummy vk_syncs are no-ops */ + if (vk_sync_type_is_dummy(submit->waits[i].sync->type)) { + /* We are about to lose track of this wait, if it has a temporary + * we need to destroy it now, as vk_queue_submit_cleanup will not + * know about it */ + if (submit->_wait_temps[i] != NULL) { + vk_sync_destroy(queue->base.device, submit->_wait_temps[i]); + submit->waits[i].sync = NULL; + } + continue; + } + + /* For emulated timelines, we have a binary vk_sync associated with + * each time point and pass the binary vk_sync to the driver. + */ + struct vk_sync_timeline *timeline = + vk_sync_as_timeline(submit->waits[i].sync); + if (timeline) { + assert(queue->base.device->timeline_mode == + VK_DEVICE_TIMELINE_MODE_EMULATED); + result = vk_sync_timeline_get_point(queue->base.device, timeline, + submit->waits[i].wait_value, + &submit->_wait_points[i]); + if (unlikely(result != VK_SUCCESS)) { + result = vk_queue_set_lost(queue, + "Time point >= %"PRIu64" not found", + submit->waits[i].wait_value); + } + + /* This can happen if the point is long past */ + if (submit->_wait_points[i] == NULL) + continue; + + submit->waits[i].sync = &submit->_wait_points[i]->sync; + submit->waits[i].wait_value = 0; + } + + struct vk_sync_binary *binary = + vk_sync_as_binary(submit->waits[i].sync); + if (binary) { + submit->waits[i].sync = &binary->timeline; + submit->waits[i].wait_value = binary->next_point; + } + + assert((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) || + submit->waits[i].wait_value == 0); + + assert(wait_count <= i); + if (wait_count < i) { + submit->waits[wait_count] = submit->waits[i]; + submit->_wait_temps[wait_count] = submit->_wait_temps[i]; + if (submit->_wait_points) + submit->_wait_points[wait_count] = submit->_wait_points[i]; + } + wait_count++; + } + + assert(wait_count <= submit->wait_count); + submit->wait_count = wait_count; + + for (uint32_t i = 0; i < submit->signal_count; i++) { + assert((submit->signals[i].sync->flags & VK_SYNC_IS_TIMELINE) || + submit->signals[i].signal_value == 0); + + struct vk_sync_binary *binary = + vk_sync_as_binary(submit->signals[i].sync); + if (binary) { + submit->signals[i].sync = &binary->timeline; + submit->signals[i].signal_value = ++binary->next_point; + } + } + + result = queue->driver_submit(queue, submit); + if (unlikely(result != VK_SUCCESS)) + return result; + + if (submit->_signal_points) { + for (uint32_t i = 0; i < submit->signal_count; i++) { + if (submit->_signal_points[i] == NULL) + continue; + + vk_sync_timeline_point_install(queue->base.device, + submit->_signal_points[i]); + submit->_signal_points[i] = NULL; + } + } + + return VK_SUCCESS; +} + +VkResult +vk_queue_flush(struct vk_queue *queue, uint32_t *submit_count_out) +{ + VkResult result = VK_SUCCESS; + + assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_DEFERRED); + + mtx_lock(&queue->submit.mutex); + + uint32_t submit_count = 0; + while (!list_is_empty(&queue->submit.submits)) { + struct vk_queue_submit *submit = + list_first_entry(&queue->submit.submits, + struct vk_queue_submit, link); + + for (uint32_t i = 0; i < submit->wait_count; i++) { + /* In emulated timeline mode, only emulated timelines are allowed */ + if (!vk_sync_type_is_vk_sync_timeline(submit->waits[i].sync->type)) { + assert(!(submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE)); + continue; + } + + result = vk_sync_wait(queue->base.device, + submit->waits[i].sync, + submit->waits[i].wait_value, + VK_SYNC_WAIT_PENDING, 0); + if (result == VK_TIMEOUT) { + /* This one's not ready yet */ + result = VK_SUCCESS; + goto done; + } else if (result != VK_SUCCESS) { + result = vk_queue_set_lost(queue, "Wait for time points failed"); + goto done; + } + } + + result = vk_queue_submit_final(queue, submit); + if (unlikely(result != VK_SUCCESS)) { + result = vk_queue_set_lost(queue, "queue::driver_submit failed"); + goto done; + } + + submit_count++; + + list_del(&submit->link); + + vk_queue_submit_destroy(queue, submit); + } + +done: + if (submit_count) + cnd_broadcast(&queue->submit.pop); + + mtx_unlock(&queue->submit.mutex); + + if (submit_count_out) + *submit_count_out = submit_count; + + return result; +} + +static int +vk_queue_submit_thread_func(void *_data) +{ + struct vk_queue *queue = _data; + VkResult result; + + mtx_lock(&queue->submit.mutex); + + while (queue->submit.thread_run) { + if (list_is_empty(&queue->submit.submits)) { + int ret = cnd_wait(&queue->submit.push, &queue->submit.mutex); + if (ret == thrd_error) { + mtx_unlock(&queue->submit.mutex); + vk_queue_set_lost(queue, "cnd_wait failed"); + return 1; + } + continue; + } + + struct vk_queue_submit *submit = + list_first_entry(&queue->submit.submits, + struct vk_queue_submit, link); + + /* Drop the lock while we wait */ + mtx_unlock(&queue->submit.mutex); + + result = vk_sync_wait_many(queue->base.device, + submit->wait_count, submit->waits, + VK_SYNC_WAIT_PENDING, UINT64_MAX); + if (unlikely(result != VK_SUCCESS)) { + vk_queue_set_lost(queue, "Wait for time points failed"); + return 1; + } + + result = vk_queue_submit_final(queue, submit); + if (unlikely(result != VK_SUCCESS)) { + vk_queue_set_lost(queue, "queue::driver_submit failed"); + return 1; + } + + /* Do all our cleanup of individual fences etc. outside the lock. + * We can't actually remove it from the list yet. We have to do + * that under the lock. + */ + vk_queue_submit_cleanup(queue, submit); + + mtx_lock(&queue->submit.mutex); + + /* Only remove the submit from from the list and free it after + * queue->submit() has completed. This ensures that, when + * vk_queue_drain() completes, there are no more pending jobs. + */ + list_del(&submit->link); + vk_queue_submit_free(queue, submit); + + cnd_broadcast(&queue->submit.pop); + } + + mtx_unlock(&queue->submit.mutex); + return 0; +} + +static VkResult +vk_queue_start_submit_thread(struct vk_queue *queue) +{ + int ret; + + mtx_lock(&queue->submit.mutex); + queue->submit.thread_run = true; + mtx_unlock(&queue->submit.mutex); + + ret = thrd_create(&queue->submit.thread, + vk_queue_submit_thread_func, + queue); + if (ret == thrd_error) + return vk_errorf(queue, VK_ERROR_UNKNOWN, "thrd_create failed"); + + return VK_SUCCESS; +} + +static void +vk_queue_stop_submit_thread(struct vk_queue *queue) +{ + vk_queue_drain(queue); + + /* Kick the thread to disable it */ + mtx_lock(&queue->submit.mutex); + queue->submit.thread_run = false; + cnd_signal(&queue->submit.push); + mtx_unlock(&queue->submit.mutex); + + thrd_join(queue->submit.thread, NULL); + + assert(list_is_empty(&queue->submit.submits)); + queue->submit.mode = VK_QUEUE_SUBMIT_MODE_IMMEDIATE; +} + +VkResult +vk_queue_enable_submit_thread(struct vk_queue *queue) +{ + assert(vk_device_supports_threaded_submit(queue->base.device)); + + if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) + return VK_SUCCESS; + + VkResult result = vk_queue_start_submit_thread(queue); + if (result != VK_SUCCESS) + return result; + + queue->submit.mode = VK_QUEUE_SUBMIT_MODE_THREADED; + + return VK_SUCCESS; +} + +struct vulkan_submit_info { + const void *pNext; + + uint32_t command_buffer_count; + const VkCommandBufferSubmitInfo *command_buffers; + + uint32_t wait_count; + const VkSemaphoreSubmitInfo *waits; + + uint32_t signal_count; + const VkSemaphoreSubmitInfo *signals; + + uint32_t buffer_bind_count; + const VkSparseBufferMemoryBindInfo *buffer_binds; + + uint32_t image_opaque_bind_count; + const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds; + + uint32_t image_bind_count; + const VkSparseImageMemoryBindInfo *image_binds; + + struct vk_fence *fence; +}; + +static VkResult +vk_queue_submit(struct vk_queue *queue, + const struct vulkan_submit_info *info) +{ + struct vk_device *device = queue->base.device; + VkResult result; + uint32_t sparse_memory_bind_entry_count = 0; + uint32_t sparse_memory_image_bind_entry_count = 0; + VkSparseMemoryBind *sparse_memory_bind_entries = NULL; + VkSparseImageMemoryBind *sparse_memory_image_bind_entries = NULL; + + for (uint32_t i = 0; i < info->buffer_bind_count; ++i) + sparse_memory_bind_entry_count += info->buffer_binds[i].bindCount; + + for (uint32_t i = 0; i < info->image_opaque_bind_count; ++i) + sparse_memory_bind_entry_count += info->image_opaque_binds[i].bindCount; + + for (uint32_t i = 0; i < info->image_bind_count; ++i) + sparse_memory_image_bind_entry_count += info->image_binds[i].bindCount; + + const struct wsi_memory_signal_submit_info *mem_signal = + vk_find_struct_const(info->pNext, WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA); + bool signal_mem_sync = mem_signal != NULL && + mem_signal->memory != VK_NULL_HANDLE && + queue->base.device->create_sync_for_memory != NULL; + + struct vk_queue_submit *submit = + vk_queue_submit_alloc(queue, info->wait_count, + info->command_buffer_count, + info->buffer_bind_count, + info->image_opaque_bind_count, + info->image_bind_count, + sparse_memory_bind_entry_count, + sparse_memory_image_bind_entry_count, + info->signal_count + + signal_mem_sync + (info->fence != NULL), + &sparse_memory_bind_entries, + &sparse_memory_image_bind_entries); + if (unlikely(submit == NULL)) + return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* From the Vulkan 1.2.194 spec: + * + * "If the VkSubmitInfo::pNext chain does not include this structure, + * the batch defaults to use counter pass index 0." + */ + const VkPerformanceQuerySubmitInfoKHR *perf_info = + vk_find_struct_const(info->pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR); + submit->perf_pass_index = perf_info ? perf_info->counterPassIndex : 0; + + bool has_binary_permanent_semaphore_wait = false; + for (uint32_t i = 0; i < info->wait_count; i++) { + VK_FROM_HANDLE(vk_semaphore, semaphore, + info->waits[i].semaphore); + + /* From the Vulkan 1.2.194 spec: + * + * "Applications can import a semaphore payload into an existing + * semaphore using an external semaphore handle. The effects of the + * import operation will be either temporary or permanent, as + * specified by the application. If the import is temporary, the + * implementation must restore the semaphore to its prior permanent + * state after submitting the next semaphore wait operation." + * + * and + * + * VUID-VkImportSemaphoreFdInfoKHR-flags-03323 + * + * "If flags contains VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, the + * VkSemaphoreTypeCreateInfo::semaphoreType field of the semaphore + * from which handle or name was exported must not be + * VK_SEMAPHORE_TYPE_TIMELINE" + */ + struct vk_sync *sync; + if (semaphore->temporary) { + assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY); + sync = submit->_wait_temps[i] = semaphore->temporary; + semaphore->temporary = NULL; + } else { + if (semaphore->type == VK_SEMAPHORE_TYPE_BINARY) { + if (vk_device_supports_threaded_submit(device)) + assert(semaphore->permanent.type->move); + has_binary_permanent_semaphore_wait = true; + } + + sync = &semaphore->permanent; + } + + uint64_t wait_value = semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE ? + info->waits[i].value : 0; + + submit->waits[i] = (struct vk_sync_wait) { + .sync = sync, + .stage_mask = info->waits[i].stageMask, + .wait_value = wait_value, + }; + } + + for (uint32_t i = 0; i < info->command_buffer_count; i++) { + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, + info->command_buffers[i].commandBuffer); + assert(info->command_buffers[i].deviceMask == 0 || + info->command_buffers[i].deviceMask == 1); + assert(cmd_buffer->pool->queue_family_index == queue->queue_family_index); + + /* Some drivers don't call vk_command_buffer_begin/end() yet and, for + * those, we'll see initial layout. However, this is enough to catch + * command buffers which get submitted without calling EndCommandBuffer. + */ + assert(cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_INITIAL || + cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_EXECUTABLE || + cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_PENDING); + cmd_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_PENDING; + + submit->command_buffers[i] = cmd_buffer; + } + + sparse_memory_bind_entry_count = 0; + sparse_memory_image_bind_entry_count = 0; + + if (info->buffer_binds) + typed_memcpy(submit->buffer_binds, info->buffer_binds, info->buffer_bind_count); + + for (uint32_t i = 0; i < info->buffer_bind_count; ++i) { + VkSparseMemoryBind *binds = sparse_memory_bind_entries + + sparse_memory_bind_entry_count; + submit->buffer_binds[i].pBinds = binds; + typed_memcpy(binds, info->buffer_binds[i].pBinds, + info->buffer_binds[i].bindCount); + + sparse_memory_bind_entry_count += info->buffer_binds[i].bindCount; + } + + if (info->image_opaque_binds) + typed_memcpy(submit->image_opaque_binds, info->image_opaque_binds, + info->image_opaque_bind_count); + + for (uint32_t i = 0; i < info->image_opaque_bind_count; ++i) { + VkSparseMemoryBind *binds = sparse_memory_bind_entries + + sparse_memory_bind_entry_count; + submit->image_opaque_binds[i].pBinds = binds; + typed_memcpy(binds, info->image_opaque_binds[i].pBinds, + info->image_opaque_binds[i].bindCount); + + sparse_memory_bind_entry_count += info->image_opaque_binds[i].bindCount; + } + + if (info->image_binds) + typed_memcpy(submit->image_binds, info->image_binds, info->image_bind_count); + + for (uint32_t i = 0; i < info->image_bind_count; ++i) { + VkSparseImageMemoryBind *binds = sparse_memory_image_bind_entries + + sparse_memory_image_bind_entry_count; + submit->image_binds[i].pBinds = binds; + typed_memcpy(binds, info->image_binds[i].pBinds, + info->image_binds[i].bindCount); + + sparse_memory_image_bind_entry_count += info->image_binds[i].bindCount; + } + + for (uint32_t i = 0; i < info->signal_count; i++) { + VK_FROM_HANDLE(vk_semaphore, semaphore, + info->signals[i].semaphore); + + struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore); + uint64_t signal_value = info->signals[i].value; + if (semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE) { + if (signal_value == 0) { + result = vk_queue_set_lost(queue, + "Tried to signal a timeline with value 0"); + goto fail; + } + } else { + signal_value = 0; + } + + /* For emulated timelines, we need to associate a binary vk_sync with + * each time point and pass the binary vk_sync to the driver. We could + * do this in vk_queue_submit_final but it might require doing memory + * allocation and we don't want to to add extra failure paths there. + * Instead, allocate and replace the driver-visible vk_sync now and + * we'll insert it into the timeline in vk_queue_submit_final. The + * insert step is guaranteed to not fail. + */ + struct vk_sync_timeline *timeline = vk_sync_as_timeline(sync); + if (timeline) { + assert(queue->base.device->timeline_mode == + VK_DEVICE_TIMELINE_MODE_EMULATED); + result = vk_sync_timeline_alloc_point(queue->base.device, timeline, + signal_value, + &submit->_signal_points[i]); + if (unlikely(result != VK_SUCCESS)) + goto fail; + + sync = &submit->_signal_points[i]->sync; + signal_value = 0; + } + + submit->signals[i] = (struct vk_sync_signal) { + .sync = sync, + .stage_mask = info->signals[i].stageMask, + .signal_value = signal_value, + }; + } + + uint32_t signal_count = info->signal_count; + if (signal_mem_sync) { + struct vk_sync *mem_sync; + result = queue->base.device->create_sync_for_memory(queue->base.device, + mem_signal->memory, + true, &mem_sync); + if (unlikely(result != VK_SUCCESS)) + goto fail; + + submit->_mem_signal_temp = mem_sync; + + assert(submit->signals[signal_count].sync == NULL); + submit->signals[signal_count++] = (struct vk_sync_signal) { + .sync = mem_sync, + .stage_mask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + }; + } + + if (info->fence != NULL) { + assert(submit->signals[signal_count].sync == NULL); + submit->signals[signal_count++] = (struct vk_sync_signal) { + .sync = vk_fence_get_active_sync(info->fence), + .stage_mask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + }; + } + + assert(signal_count == submit->signal_count); + + /* If this device supports threaded submit, we can't rely on the client + * ordering requirements to ensure submits happen in the right order. Even + * if this queue doesn't have a submit thread, another queue (possibly in a + * different process) may and that means we our dependencies may not have + * been submitted to the kernel yet. Do a quick zero-timeout WAIT_PENDING + * on all the wait semaphores to see if we need to start up our own thread. + */ + if (device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND && + queue->submit.mode != VK_QUEUE_SUBMIT_MODE_THREADED) { + assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_IMMEDIATE); + + result = vk_sync_wait_many(queue->base.device, + submit->wait_count, submit->waits, + VK_SYNC_WAIT_PENDING, 0); + if (result == VK_TIMEOUT) + result = vk_queue_enable_submit_thread(queue); + if (unlikely(result != VK_SUCCESS)) + goto fail; + } + + switch (queue->submit.mode) { + case VK_QUEUE_SUBMIT_MODE_IMMEDIATE: + result = vk_queue_submit_final(queue, submit); + if (unlikely(result != VK_SUCCESS)) + goto fail; + + /* If threaded submit is possible on this device, we need to ensure that + * binary semaphore payloads get reset so that any other threads can + * properly wait on them for dependency checking. Because we don't + * currently have a submit thread, we can directly reset that binary + * semaphore payloads. + * + * If we the vk_sync is in our signal et, we can consider it to have + * been both reset and signaled by queue_submit_final(). A reset in + * this case would be wrong because it would throw away our signal + * operation. If we don't signal the vk_sync, then we need to reset it. + */ + if (vk_device_supports_threaded_submit(device) && + has_binary_permanent_semaphore_wait) { + for (uint32_t i = 0; i < submit->wait_count; i++) { + if ((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) || + submit->_wait_temps[i] != NULL) + continue; + + bool was_signaled = false; + for (uint32_t j = 0; j < submit->signal_count; j++) { + if (submit->signals[j].sync == submit->waits[i].sync) { + was_signaled = true; + break; + } + } + + if (!was_signaled) { + result = vk_sync_reset(queue->base.device, + submit->waits[i].sync); + if (unlikely(result != VK_SUCCESS)) + goto fail; + } + } + } + + vk_queue_submit_destroy(queue, submit); + return result; + + case VK_QUEUE_SUBMIT_MODE_DEFERRED: + vk_queue_push_submit(queue, submit); + return vk_device_flush(queue->base.device); + + case VK_QUEUE_SUBMIT_MODE_THREADED: + if (has_binary_permanent_semaphore_wait) { + for (uint32_t i = 0; i < info->wait_count; i++) { + VK_FROM_HANDLE(vk_semaphore, semaphore, + info->waits[i].semaphore); + + if (semaphore->type != VK_SEMAPHORE_TYPE_BINARY) + continue; + + /* From the Vulkan 1.2.194 spec: + * + * "When a batch is submitted to a queue via a queue + * submission, and it includes semaphores to be waited on, + * it defines a memory dependency between prior semaphore + * signal operations and the batch, and defines semaphore + * wait operations. + * + * Such semaphore wait operations set the semaphores + * created with a VkSemaphoreType of + * VK_SEMAPHORE_TYPE_BINARY to the unsignaled state." + * + * For threaded submit, we depend on tracking the unsignaled + * state of binary semaphores to determine when we can safely + * submit. The VK_SYNC_WAIT_PENDING check above as well as the + * one in the sumbit thread depend on all binary semaphores + * being reset when they're not in active use from the point + * of view of the client's CPU timeline. This means we need to + * reset them inside vkQueueSubmit and cannot wait until the + * actual submit which happens later in the thread. + * + * We've already stolen temporary semaphore payloads above as + * part of basic semaphore processing. We steal permanent + * semaphore payloads here by way of vk_sync_move. For shared + * semaphores, this can be a bit expensive (sync file import + * and export) but, for non-shared semaphores, it can be made + * fairly cheap. Also, we only do this semaphore swapping in + * the case where you have real timelines AND the client is + * using timeline semaphores with wait-before-signal (that's + * the only way to get a submit thread) AND mixing those with + * waits on binary semaphores AND said binary semaphore is + * using its permanent payload. In other words, this code + * should basically only ever get executed in CTS tests. + */ + if (submit->_wait_temps[i] != NULL) + continue; + + assert(submit->waits[i].sync == &semaphore->permanent); + + /* From the Vulkan 1.2.194 spec: + * + * VUID-vkQueueSubmit-pWaitSemaphores-03238 + * + * "All elements of the pWaitSemaphores member of all + * elements of pSubmits created with a VkSemaphoreType of + * VK_SEMAPHORE_TYPE_BINARY must reference a semaphore + * signal operation that has been submitted for execution + * and any semaphore signal operations on which it depends + * (if any) must have also been submitted for execution." + * + * Therefore, we can safely do a blocking wait here and it + * won't actually block for long. This ensures that the + * vk_sync_move below will succeed. + */ + result = vk_sync_wait(queue->base.device, + submit->waits[i].sync, 0, + VK_SYNC_WAIT_PENDING, UINT64_MAX); + if (unlikely(result != VK_SUCCESS)) + goto fail; + + result = vk_sync_create(queue->base.device, + semaphore->permanent.type, + 0 /* flags */, + 0 /* initial value */, + &submit->_wait_temps[i]); + if (unlikely(result != VK_SUCCESS)) + goto fail; + + result = vk_sync_move(queue->base.device, + submit->_wait_temps[i], + &semaphore->permanent); + if (unlikely(result != VK_SUCCESS)) + goto fail; + + submit->waits[i].sync = submit->_wait_temps[i]; + } + } + + vk_queue_push_submit(queue, submit); + + if (signal_mem_sync) { + /* If we're signaling a memory object, we have to ensure that + * vkQueueSubmit does not return until the kernel submission has + * happened. Otherwise, we may get a race between this process + * and whatever is going to wait on the object where the other + * process may wait before we've submitted our work. Drain the + * queue now to avoid this. It's the responsibility of the caller + * to ensure that any vkQueueSubmit which signals a memory object + * has fully resolved dependencies. + */ + result = vk_queue_drain(queue); + if (unlikely(result != VK_SUCCESS)) + return result; + } + + return VK_SUCCESS; + + case VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND: + unreachable("Invalid vk_queue::submit.mode"); + } + unreachable("Invalid submit mode"); + +fail: + vk_queue_submit_destroy(queue, submit); + return result; +} + +VkResult +vk_queue_wait_before_present(struct vk_queue *queue, + const VkPresentInfoKHR *pPresentInfo) +{ + if (vk_device_is_lost(queue->base.device)) + return VK_ERROR_DEVICE_LOST; + + /* From the Vulkan 1.2.194 spec: + * + * VUID-vkQueuePresentKHR-pWaitSemaphores-03268 + * + * "All elements of the pWaitSemaphores member of pPresentInfo must + * reference a semaphore signal operation that has been submitted for + * execution and any semaphore signal operations on which it depends (if + * any) must have also been submitted for execution." + * + * As with vkQueueSubmit above, we need to ensure that any binary + * semaphores we use in this present actually exist. If we don't have + * timeline semaphores, this is a non-issue. If they're emulated, then + * this is ensured for us by the vk_device_flush() at the end of every + * vkQueueSubmit() and every vkSignalSemaphore(). For real timeline + * semaphores, however, we need to do a wait. Thanks to the above bit of + * spec text, that wait should never block for long. + */ + if (!vk_device_supports_threaded_submit(queue->base.device)) + return VK_SUCCESS; + + const uint32_t wait_count = pPresentInfo->waitSemaphoreCount; + + if (wait_count == 0) + return VK_SUCCESS; + + STACK_ARRAY(struct vk_sync_wait, waits, wait_count); + + for (uint32_t i = 0; i < wait_count; i++) { + VK_FROM_HANDLE(vk_semaphore, semaphore, + pPresentInfo->pWaitSemaphores[i]); + + /* From the Vulkan 1.2.194 spec: + * + * VUID-vkQueuePresentKHR-pWaitSemaphores-03267 + * + * "All elements of the pWaitSemaphores member of pPresentInfo must + * be created with a VkSemaphoreType of VK_SEMAPHORE_TYPE_BINARY." + */ + assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY); + + waits[i] = (struct vk_sync_wait) { + .sync = vk_semaphore_get_active_sync(semaphore), + .stage_mask = ~(VkPipelineStageFlags2)0, + }; + } + + VkResult result = vk_sync_wait_many(queue->base.device, wait_count, waits, + VK_SYNC_WAIT_PENDING, UINT64_MAX); + + STACK_ARRAY_FINISH(waits); + + /* Check again, just in case */ + if (vk_device_is_lost(queue->base.device)) + return VK_ERROR_DEVICE_LOST; + + return result; +} + +static VkResult +vk_queue_signal_sync(struct vk_queue *queue, + struct vk_sync *sync, + uint32_t signal_value) +{ + struct vk_queue_submit *submit = vk_queue_submit_alloc(queue, 0, 0, 0, 0, 0, + 0, 0, 1, NULL, NULL); + if (unlikely(submit == NULL)) + return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->signals[0] = (struct vk_sync_signal) { + .sync = sync, + .stage_mask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .signal_value = signal_value, + }; + + VkResult result; + switch (queue->submit.mode) { + case VK_QUEUE_SUBMIT_MODE_IMMEDIATE: + result = vk_queue_submit_final(queue, submit); + vk_queue_submit_destroy(queue, submit); + return result; + + case VK_QUEUE_SUBMIT_MODE_DEFERRED: + vk_queue_push_submit(queue, submit); + return vk_device_flush(queue->base.device); + + case VK_QUEUE_SUBMIT_MODE_THREADED: + vk_queue_push_submit(queue, submit); + return VK_SUCCESS; + + case VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND: + unreachable("Invalid vk_queue::submit.mode"); + } + unreachable("Invalid timeline mode"); +} + +void +vk_queue_finish(struct vk_queue *queue) +{ + if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) + vk_queue_stop_submit_thread(queue); + + while (!list_is_empty(&queue->submit.submits)) { + assert(vk_device_is_lost_no_report(queue->base.device)); + + struct vk_queue_submit *submit = + list_first_entry(&queue->submit.submits, + struct vk_queue_submit, link); + + list_del(&submit->link); + vk_queue_submit_destroy(queue, submit); + } + +#if DETECT_OS_ANDROID + if (queue->anb_semaphore != VK_NULL_HANDLE) { + struct vk_device *device = queue->base.device; + device->dispatch_table.DestroySemaphore(vk_device_to_handle(device), + queue->anb_semaphore, NULL); + } +#endif + + cnd_destroy(&queue->submit.pop); + cnd_destroy(&queue->submit.push); + mtx_destroy(&queue->submit.mutex); + + util_dynarray_fini(&queue->labels); + list_del(&queue->link); + vk_object_base_finish(&queue->base); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_QueueSubmit2(VkQueue _queue, + uint32_t submitCount, + const VkSubmitInfo2 *pSubmits, + VkFence _fence) +{ + VK_FROM_HANDLE(vk_queue, queue, _queue); + VK_FROM_HANDLE(vk_fence, fence, _fence); + + if (vk_device_is_lost(queue->base.device)) + return VK_ERROR_DEVICE_LOST; + + if (submitCount == 0) { + if (fence == NULL) { + return VK_SUCCESS; + } else { + return vk_queue_signal_sync(queue, vk_fence_get_active_sync(fence), 0); + } + } + + for (uint32_t i = 0; i < submitCount; i++) { + struct vulkan_submit_info info = { + .pNext = pSubmits[i].pNext, + .command_buffer_count = pSubmits[i].commandBufferInfoCount, + .command_buffers = pSubmits[i].pCommandBufferInfos, + .wait_count = pSubmits[i].waitSemaphoreInfoCount, + .waits = pSubmits[i].pWaitSemaphoreInfos, + .signal_count = pSubmits[i].signalSemaphoreInfoCount, + .signals = pSubmits[i].pSignalSemaphoreInfos, + .fence = i == submitCount - 1 ? fence : NULL + }; + VkResult result = vk_queue_submit(queue, &info); + if (unlikely(result != VK_SUCCESS)) + return result; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_QueueBindSparse(VkQueue _queue, + uint32_t bindInfoCount, + const VkBindSparseInfo *pBindInfo, + VkFence _fence) +{ + VK_FROM_HANDLE(vk_queue, queue, _queue); + VK_FROM_HANDLE(vk_fence, fence, _fence); + + if (vk_device_is_lost(queue->base.device)) + return VK_ERROR_DEVICE_LOST; + + if (bindInfoCount == 0) { + if (fence == NULL) { + return VK_SUCCESS; + } else { + return vk_queue_signal_sync(queue, vk_fence_get_active_sync(fence), 0); + } + } + + for (uint32_t i = 0; i < bindInfoCount; i++) { + const VkTimelineSemaphoreSubmitInfo *timeline_info = + vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO); + const uint64_t *wait_values = NULL; + const uint64_t *signal_values = NULL; + + if (timeline_info && timeline_info->waitSemaphoreValueCount) { + /* From the Vulkan 1.3.204 spec: + * + * VUID-VkBindSparseInfo-pNext-03248 + * + * "If the pNext chain of this structure includes a VkTimelineSemaphoreSubmitInfo structure + * and any element of pSignalSemaphores was created with a VkSemaphoreType of + * VK_SEMAPHORE_TYPE_TIMELINE, then its signalSemaphoreValueCount member must equal + * signalSemaphoreCount" + */ + assert(timeline_info->waitSemaphoreValueCount == pBindInfo[i].waitSemaphoreCount); + wait_values = timeline_info->pWaitSemaphoreValues; + } + + if (timeline_info && timeline_info->signalSemaphoreValueCount) { + /* From the Vulkan 1.3.204 spec: + * + * VUID-VkBindSparseInfo-pNext-03247 + * + * "If the pNext chain of this structure includes a VkTimelineSemaphoreSubmitInfo structure + * and any element of pWaitSemaphores was created with a VkSemaphoreType of + * VK_SEMAPHORE_TYPE_TIMELINE, then its waitSemaphoreValueCount member must equal + * waitSemaphoreCount" + */ + assert(timeline_info->signalSemaphoreValueCount == pBindInfo[i].signalSemaphoreCount); + signal_values = timeline_info->pSignalSemaphoreValues; + } + + STACK_ARRAY(VkSemaphoreSubmitInfo, wait_semaphore_infos, + pBindInfo[i].waitSemaphoreCount); + STACK_ARRAY(VkSemaphoreSubmitInfo, signal_semaphore_infos, + pBindInfo[i].signalSemaphoreCount); + + if (!wait_semaphore_infos || !signal_semaphore_infos) { + STACK_ARRAY_FINISH(wait_semaphore_infos); + STACK_ARRAY_FINISH(signal_semaphore_infos); + return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + for (uint32_t j = 0; j < pBindInfo[i].waitSemaphoreCount; j++) { + wait_semaphore_infos[j] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = pBindInfo[i].pWaitSemaphores[j], + .value = wait_values ? wait_values[j] : 0, + }; + } + + for (uint32_t j = 0; j < pBindInfo[i].signalSemaphoreCount; j++) { + signal_semaphore_infos[j] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = pBindInfo[i].pSignalSemaphores[j], + .value = signal_values ? signal_values[j] : 0, + }; + } + struct vulkan_submit_info info = { + .pNext = pBindInfo[i].pNext, + .wait_count = pBindInfo[i].waitSemaphoreCount, + .waits = wait_semaphore_infos, + .signal_count = pBindInfo[i].signalSemaphoreCount, + .signals = signal_semaphore_infos, + .buffer_bind_count = pBindInfo[i].bufferBindCount, + .buffer_binds = pBindInfo[i].pBufferBinds, + .image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount, + .image_opaque_binds = pBindInfo[i].pImageOpaqueBinds, + .image_bind_count = pBindInfo[i].imageBindCount, + .image_binds = pBindInfo[i].pImageBinds, + .fence = i == bindInfoCount - 1 ? fence : NULL + }; + VkResult result = vk_queue_submit(queue, &info); + + STACK_ARRAY_FINISH(wait_semaphore_infos); + STACK_ARRAY_FINISH(signal_semaphore_infos); + + if (unlikely(result != VK_SUCCESS)) + return result; + } + + return VK_SUCCESS; +} + +static const struct vk_sync_type * +get_cpu_wait_type(struct vk_physical_device *pdevice) +{ + for (const struct vk_sync_type *const *t = + pdevice->supported_sync_types; *t; t++) { + if (((*t)->features & VK_SYNC_FEATURE_BINARY) && + ((*t)->features & VK_SYNC_FEATURE_CPU_WAIT)) + return *t; + } + + unreachable("You must have a non-timeline CPU wait sync type"); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_QueueWaitIdle(VkQueue _queue) +{ + MESA_TRACE_FUNC(); + + VK_FROM_HANDLE(vk_queue, queue, _queue); + VkResult result; + + if (vk_device_is_lost(queue->base.device)) + return VK_ERROR_DEVICE_LOST; + + const struct vk_sync_type *sync_type = + get_cpu_wait_type(queue->base.device->physical); + + struct vk_sync *sync; + result = vk_sync_create(queue->base.device, sync_type, 0, 0, &sync); + if (unlikely(result != VK_SUCCESS)) + return result; + + result = vk_queue_signal_sync(queue, sync, 0); + if (unlikely(result != VK_SUCCESS)) + return result; + + result = vk_sync_wait(queue->base.device, sync, 0, + VK_SYNC_WAIT_COMPLETE, UINT64_MAX); + + vk_sync_destroy(queue->base.device, sync); + + VkResult device_status = vk_device_check_status(queue->base.device); + if (device_status != VK_SUCCESS) + return device_status; + + return result; +} diff --git a/src/vulkan/runtime/vk_queue.h b/src/vulkan/runtime/vk_queue.h new file mode 100644 index 00000000000..814f9fefcdd --- /dev/null +++ b/src/vulkan/runtime/vk_queue.h @@ -0,0 +1,250 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_QUEUE_H +#define VK_QUEUE_H + +#include "vk_device.h" + +#include "c11/threads.h" + +#include "util/list.h" +#include "util/u_dynarray.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_command_buffer; +struct vk_queue_submit; +struct vk_sync; +struct vk_sync_wait; +struct vk_sync_signal; +struct vk_sync_timeline_point; + +struct vk_queue { + struct vk_object_base base; + + /* Link in vk_device::queues */ + struct list_head link; + + /* VkDeviceQueueCreateInfo::flags */ + VkDeviceQueueCreateFlags flags; + + /* VkDeviceQueueCreateInfo::queueFamilyIndex */ + uint32_t queue_family_index; + + /* Which queue this is within the queue family */ + uint32_t index_in_family; + + /** Driver queue submit hook + * + * When using the common implementation of vkQueueSubmit(), this function + * is called to do the final submit to the kernel driver after all + * semaphore dependencies have been resolved. Depending on the timeline + * mode and application usage, this function may be called directly from + * the client thread on which vkQueueSubmit was called or from a runtime- + * managed submit thread. We do, however, guarantee that as long as the + * client follows the Vulkan threading rules, this function will never be + * called by the runtime concurrently on the same queue. + */ + VkResult (*driver_submit)(struct vk_queue *queue, + struct vk_queue_submit *submit); + + struct { + /** Current submit mode + * + * This represents the exact current submit mode for this specific queue + * which may be different from `vk_device::submit_mode`. In particular, + * this will never be `VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND`. + * Instead, when the device submit mode is + * `VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND`, the queue submit mode + * will be one of `VK_QUEUE_SUBMIT_MODE_THREADED` or + * `VK_QUEUE_SUBMIT_MODE_IMMEDIATE` depending on whether or not a submit + * thread is currently running for this queue. If the device submit + * mode is `VK_QUEUE_SUBMIT_MODE_DEFERRED`, every queue in the device + * will use `VK_QUEUE_SUBMIT_MODE_DEFERRED` because the deferred submit + * model depends on regular flushing instead of independent threads. + */ + enum vk_queue_submit_mode mode; + + mtx_t mutex; + cnd_t push; + cnd_t pop; + + struct list_head submits; + + bool thread_run; + thrd_t thread; + } submit; + + struct { + /* Only set once atomically by the queue */ + int lost; + int error_line; + const char *error_file; + char error_msg[80]; + } _lost; + + /** + * VK_EXT_debug_utils + * + * The next two fields represent debug labels storage. + * + * VK_EXT_debug_utils spec requires that upon triggering a debug message + * with a queue attached to it, all "active" labels will also be provided + * to the callback. The spec describes two distinct ways of attaching a + * debug label to the queue: opening a label region and inserting a single + * label. + * + * Label region is active between the corresponding `*BeginDebugUtilsLabel` + * and `*EndDebugUtilsLabel` calls. The spec doesn't mention any limits on + * nestedness of label regions. This implementation assumes that there + * aren't any. + * + * The spec, however, doesn't explain the lifetime of a label submitted by + * an `*InsertDebugUtilsLabel` call. The LunarG whitepaper [1] (pp 12-15) + * provides a more detailed explanation along with some examples. According + * to those, such label remains active until the next `*DebugUtilsLabel` + * call. This means that there can be no more than one such label at a + * time. + * + * ``labels`` contains all active labels at this point in order of + * submission ``region_begin`` denotes whether the most recent label opens + * a new region If ``labels`` is empty ``region_begin`` must be true. + * + * Anytime we modify labels, we first check for ``region_begin``. If it's + * false, it means that the most recent label was submitted by + * `*InsertDebugUtilsLabel` and we need to remove it before doing anything + * else. + * + * See the discussion here: + * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10318#note_1061317 + * + * [1] https://www.lunarg.com/wp-content/uploads/2018/05/Vulkan-Debug-Utils_05_18_v1.pdf + */ + struct util_dynarray labels; + bool region_begin; + +#if DETECT_OS_ANDROID + /** SYNC_FD signal semaphore for vkQueueSignalReleaseImageANDROID + * + * VK_ANDROID_native_buffer enforces explicit fencing on the present api + * boundary. To avoid assuming all waitSemaphores exportable to sync file + * and to capture pending cmds in the queue, we do a simple submission and + * signal a SYNC_FD handle type external sempahore for native fence export. + * + * This plays the same role as wsi_swapchain::dma_buf_semaphore for WSI. + * The VK_ANDROID_native_buffer spec hides the swapchain object from the + * icd, so we have to cache the semaphore in common vk_queue. + * + * This also makes it easier to add additional cmds to prepare the wsi + * image for implementations requiring such (e.g. for layout transition). + */ + VkSemaphore anb_semaphore; +#endif +}; + +VK_DEFINE_HANDLE_CASTS(vk_queue, base, VkQueue, VK_OBJECT_TYPE_QUEUE) + +VkResult MUST_CHECK +vk_queue_init(struct vk_queue *queue, struct vk_device *device, + const VkDeviceQueueCreateInfo *pCreateInfo, + uint32_t index_in_family); + +void +vk_queue_finish(struct vk_queue *queue); + +static inline bool +vk_queue_is_empty(struct vk_queue *queue) +{ + return list_is_empty(&queue->submit.submits); +} + +/** Enables threaded submit on this queue + * + * This should be called by the driver if it wants to be able to block inside + * `vk_queue::driver_submit`. Once this function has been called, the queue + * will always use a submit thread for all submissions. You must have called + * vk_device_enabled_threaded_submit() before calling this function. + */ +VkResult vk_queue_enable_submit_thread(struct vk_queue *queue); + +VkResult vk_queue_flush(struct vk_queue *queue, uint32_t *submit_count_out); + +VkResult vk_queue_wait_before_present(struct vk_queue *queue, + const VkPresentInfoKHR *pPresentInfo); + +VkResult PRINTFLIKE(4, 5) +_vk_queue_set_lost(struct vk_queue *queue, + const char *file, int line, + const char *msg, ...); + +#define vk_queue_set_lost(queue, ...) \ + _vk_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) + +static inline bool +vk_queue_is_lost(struct vk_queue *queue) +{ + return queue->_lost.lost; +} + +#define vk_foreach_queue(queue, device) \ + list_for_each_entry(struct vk_queue, queue, &(device)->queues, link) + +#define vk_foreach_queue_safe(queue, device) \ + list_for_each_entry_safe(struct vk_queue, queue, &(device)->queues, link) + +struct vk_queue_submit { + struct list_head link; + + uint32_t wait_count; + uint32_t command_buffer_count; + uint32_t signal_count; + + uint32_t buffer_bind_count; + uint32_t image_opaque_bind_count; + uint32_t image_bind_count; + + struct vk_sync_wait *waits; + struct vk_command_buffer **command_buffers; + struct vk_sync_signal *signals; + + VkSparseBufferMemoryBindInfo *buffer_binds; + VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds; + VkSparseImageMemoryBindInfo *image_binds; + + uint32_t perf_pass_index; + + /* Used internally; should be ignored by drivers */ + struct vk_sync **_wait_temps; + struct vk_sync *_mem_signal_temp; + struct vk_sync_timeline_point **_wait_points; + struct vk_sync_timeline_point **_signal_points; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* VK_QUEUE_H */ diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c new file mode 100644 index 00000000000..9eb69987383 --- /dev/null +++ b/src/vulkan/runtime/vk_render_pass.c @@ -0,0 +1,2500 @@ +/* + * Copyright © 2020 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_render_pass.h" + +#include "vk_alloc.h" +#include "vk_command_buffer.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_format.h" +#include "vk_framebuffer.h" +#include "vk_image.h" +#include "vk_util.h" + +#include "util/log.h" + +static void +translate_references(VkAttachmentReference2 **reference_ptr, + uint32_t reference_count, + const VkAttachmentReference *reference, + const VkRenderPassCreateInfo *pass_info, + bool is_input_attachment) +{ + VkAttachmentReference2 *reference2 = *reference_ptr; + *reference_ptr += reference_count; + for (uint32_t i = 0; i < reference_count; i++) { + reference2[i] = (VkAttachmentReference2) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, + .pNext = NULL, + .attachment = reference[i].attachment, + .layout = reference[i].layout, + }; + + if (is_input_attachment && + reference2[i].attachment != VK_ATTACHMENT_UNUSED) { + assert(reference2[i].attachment < pass_info->attachmentCount); + const VkAttachmentDescription *att = + &pass_info->pAttachments[reference2[i].attachment]; + reference2[i].aspectMask = vk_format_aspects(att->format); + } + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateRenderPass(VkDevice _device, + const VkRenderPassCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + uint32_t reference_count = 0; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + reference_count += pCreateInfo->pSubpasses[i].inputAttachmentCount; + reference_count += pCreateInfo->pSubpasses[i].colorAttachmentCount; + if (pCreateInfo->pSubpasses[i].pResolveAttachments) + reference_count += pCreateInfo->pSubpasses[i].colorAttachmentCount; + if (pCreateInfo->pSubpasses[i].pDepthStencilAttachment) + reference_count += 1; + } + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, VkRenderPassCreateInfo2, create_info, 1); + VK_MULTIALLOC_DECL(&ma, VkSubpassDescription2, subpasses, + pCreateInfo->subpassCount); + VK_MULTIALLOC_DECL(&ma, VkAttachmentDescription2, attachments, + pCreateInfo->attachmentCount); + VK_MULTIALLOC_DECL(&ma, VkSubpassDependency2, dependencies, + pCreateInfo->dependencyCount); + VK_MULTIALLOC_DECL(&ma, VkAttachmentReference2, references, + reference_count); + if (!vk_multialloc_alloc2(&ma, &device->alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + VkAttachmentReference2 *reference_ptr = references; + + const VkRenderPassMultiviewCreateInfo *multiview_info = NULL; + const VkRenderPassInputAttachmentAspectCreateInfo *aspect_info = NULL; + vk_foreach_struct_const(ext, pCreateInfo->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_RENDER_PASS_INPUT_ATTACHMENT_ASPECT_CREATE_INFO: + aspect_info = (const VkRenderPassInputAttachmentAspectCreateInfo *)ext; + /* We don't care about this information */ + break; + + case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO: + multiview_info = (const VkRenderPassMultiviewCreateInfo*) ext; + break; + + case VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT: + /* pass this through to CreateRenderPass2 */ + break; + + default: + mesa_logd("%s: ignored VkStructureType %u\n", __func__, ext->sType); + break; + } + } + + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + attachments[i] = (VkAttachmentDescription2) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2, + .pNext = NULL, + .flags = pCreateInfo->pAttachments[i].flags, + .format = pCreateInfo->pAttachments[i].format, + .samples = pCreateInfo->pAttachments[i].samples, + .loadOp = pCreateInfo->pAttachments[i].loadOp, + .storeOp = pCreateInfo->pAttachments[i].storeOp, + .stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp, + .stencilStoreOp = pCreateInfo->pAttachments[i].stencilStoreOp, + .initialLayout = pCreateInfo->pAttachments[i].initialLayout, + .finalLayout = pCreateInfo->pAttachments[i].finalLayout, + }; + } + + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + subpasses[i] = (VkSubpassDescription2) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2, + .pNext = NULL, + .flags = pCreateInfo->pSubpasses[i].flags, + .pipelineBindPoint = pCreateInfo->pSubpasses[i].pipelineBindPoint, + .viewMask = 0, + .inputAttachmentCount = pCreateInfo->pSubpasses[i].inputAttachmentCount, + .colorAttachmentCount = pCreateInfo->pSubpasses[i].colorAttachmentCount, + .preserveAttachmentCount = pCreateInfo->pSubpasses[i].preserveAttachmentCount, + .pPreserveAttachments = pCreateInfo->pSubpasses[i].pPreserveAttachments, + }; + + if (multiview_info && multiview_info->subpassCount) { + assert(multiview_info->subpassCount == pCreateInfo->subpassCount); + subpasses[i].viewMask = multiview_info->pViewMasks[i]; + } + + subpasses[i].pInputAttachments = reference_ptr; + translate_references(&reference_ptr, + subpasses[i].inputAttachmentCount, + pCreateInfo->pSubpasses[i].pInputAttachments, + pCreateInfo, true); + subpasses[i].pColorAttachments = reference_ptr; + translate_references(&reference_ptr, + subpasses[i].colorAttachmentCount, + pCreateInfo->pSubpasses[i].pColorAttachments, + pCreateInfo, false); + subpasses[i].pResolveAttachments = NULL; + if (pCreateInfo->pSubpasses[i].pResolveAttachments) { + subpasses[i].pResolveAttachments = reference_ptr; + translate_references(&reference_ptr, + subpasses[i].colorAttachmentCount, + pCreateInfo->pSubpasses[i].pResolveAttachments, + pCreateInfo, false); + } + subpasses[i].pDepthStencilAttachment = NULL; + if (pCreateInfo->pSubpasses[i].pDepthStencilAttachment) { + subpasses[i].pDepthStencilAttachment = reference_ptr; + translate_references(&reference_ptr, 1, + pCreateInfo->pSubpasses[i].pDepthStencilAttachment, + pCreateInfo, false); + } + } + + assert(reference_ptr == references + reference_count); + + if (aspect_info != NULL) { + for (uint32_t i = 0; i < aspect_info->aspectReferenceCount; i++) { + const VkInputAttachmentAspectReference *ref = + &aspect_info->pAspectReferences[i]; + + assert(ref->subpass < pCreateInfo->subpassCount); + VkSubpassDescription2 *subpass = &subpasses[ref->subpass]; + + assert(ref->inputAttachmentIndex < subpass->inputAttachmentCount); + VkAttachmentReference2 *att = (VkAttachmentReference2 *) + &subpass->pInputAttachments[ref->inputAttachmentIndex]; + + att->aspectMask = ref->aspectMask; + } + } + + for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { + dependencies[i] = (VkSubpassDependency2) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, + .pNext = NULL, + .srcSubpass = pCreateInfo->pDependencies[i].srcSubpass, + .dstSubpass = pCreateInfo->pDependencies[i].dstSubpass, + .srcStageMask = pCreateInfo->pDependencies[i].srcStageMask, + .dstStageMask = pCreateInfo->pDependencies[i].dstStageMask, + .srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask, + .dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask, + .dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags, + .viewOffset = 0, + }; + + if (multiview_info && multiview_info->dependencyCount) { + assert(multiview_info->dependencyCount == pCreateInfo->dependencyCount); + dependencies[i].viewOffset = multiview_info->pViewOffsets[i]; + } + } + + *create_info = (VkRenderPassCreateInfo2) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2, + .pNext = pCreateInfo->pNext, + .flags = pCreateInfo->flags, + .attachmentCount = pCreateInfo->attachmentCount, + .pAttachments = attachments, + .subpassCount = pCreateInfo->subpassCount, + .pSubpasses = subpasses, + .dependencyCount = pCreateInfo->dependencyCount, + .pDependencies = dependencies, + }; + + if (multiview_info && multiview_info->correlationMaskCount > 0) { + create_info->correlatedViewMaskCount = multiview_info->correlationMaskCount; + create_info->pCorrelatedViewMasks = multiview_info->pCorrelationMasks; + } + + VkResult result = + device->dispatch_table.CreateRenderPass2(_device, create_info, + pAllocator, pRenderPass); + + vk_free2(&device->alloc, pAllocator, create_info); + + return result; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdBeginRenderPass(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) +{ + /* We don't have a vk_command_buffer object but we can assume, since we're + * using common dispatch, that it's a vk_object of some sort. + */ + struct vk_object_base *disp = (struct vk_object_base *)commandBuffer; + + VkSubpassBeginInfo info = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO, + .contents = contents, + }; + + disp->device->dispatch_table.CmdBeginRenderPass2(commandBuffer, + pRenderPassBegin, &info); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdEndRenderPass(VkCommandBuffer commandBuffer) +{ + /* We don't have a vk_command_buffer object but we can assume, since we're + * using common dispatch, that it's a vk_object of some sort. + */ + struct vk_object_base *disp = (struct vk_object_base *)commandBuffer; + + VkSubpassEndInfo info = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, + }; + + disp->device->dispatch_table.CmdEndRenderPass2(commandBuffer, &info); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdNextSubpass(VkCommandBuffer commandBuffer, + VkSubpassContents contents) +{ + /* We don't have a vk_command_buffer object but we can assume, since we're + * using common dispatch, that it's a vk_object of some sort. + */ + struct vk_object_base *disp = (struct vk_object_base *)commandBuffer; + + VkSubpassBeginInfo begin_info = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO, + .contents = contents, + }; + + VkSubpassEndInfo end_info = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, + }; + + disp->device->dispatch_table.CmdNextSubpass2(commandBuffer, &begin_info, + &end_info); +} + +static unsigned +num_subpass_attachments2(const VkSubpassDescription2 *desc) +{ + bool has_depth_stencil_attachment = + desc->pDepthStencilAttachment != NULL && + desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED; + + const VkSubpassDescriptionDepthStencilResolve *ds_resolve = + vk_find_struct_const(desc->pNext, + SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); + + bool has_depth_stencil_resolve_attachment = + ds_resolve != NULL && ds_resolve->pDepthStencilResolveAttachment && + ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED; + + const VkFragmentShadingRateAttachmentInfoKHR *fsr_att_info = + vk_find_struct_const(desc->pNext, + FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); + + bool has_fragment_shading_rate_attachment = + fsr_att_info && fsr_att_info->pFragmentShadingRateAttachment && + fsr_att_info->pFragmentShadingRateAttachment->attachment != VK_ATTACHMENT_UNUSED; + + return desc->inputAttachmentCount + + desc->colorAttachmentCount + + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + + has_depth_stencil_attachment + + has_depth_stencil_resolve_attachment + + has_fragment_shading_rate_attachment; +} + +static void +vk_render_pass_attachment_init(struct vk_render_pass_attachment *att, + const VkAttachmentDescription2 *desc) +{ + *att = (struct vk_render_pass_attachment) { + .format = desc->format, + .aspects = vk_format_aspects(desc->format), + .samples = desc->samples, + .view_mask = 0, + .load_op = desc->loadOp, + .store_op = desc->storeOp, + .stencil_load_op = desc->stencilLoadOp, + .stencil_store_op = desc->stencilStoreOp, + .initial_layout = desc->initialLayout, + .final_layout = desc->finalLayout, + .initial_stencil_layout = vk_att_desc_stencil_layout(desc, false), + .final_stencil_layout = vk_att_desc_stencil_layout(desc, true), + }; +} + +static void +vk_subpass_attachment_init(struct vk_subpass_attachment *att, + struct vk_render_pass *pass, + uint32_t subpass_idx, + const VkAttachmentReference2 *ref, + const VkAttachmentDescription2 *attachments, + VkImageUsageFlagBits usage) +{ + if (ref->attachment >= pass->attachment_count) { + assert(ref->attachment == VK_ATTACHMENT_UNUSED); + *att = (struct vk_subpass_attachment) { + .attachment = VK_ATTACHMENT_UNUSED, + }; + return; + } + + struct vk_render_pass_attachment *pass_att = + &pass->attachments[ref->attachment]; + + *att = (struct vk_subpass_attachment) { + .attachment = ref->attachment, + .aspects = vk_format_aspects(pass_att->format), + .usage = usage, + .layout = ref->layout, + .stencil_layout = vk_att_ref_stencil_layout(ref, attachments), + }; + + switch (usage) { + case VK_IMAGE_USAGE_TRANSFER_DST_BIT: + break; /* No special aspect requirements */ + + case VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT: + /* From the Vulkan 1.2.184 spec: + * + * "aspectMask is ignored when this structure is used to describe + * anything other than an input attachment reference." + */ + assert(!(ref->aspectMask & ~att->aspects)); + att->aspects = ref->aspectMask; + break; + + case VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT: + case VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR: + assert(att->aspects == VK_IMAGE_ASPECT_COLOR_BIT); + break; + + case VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT: + assert(!(att->aspects & ~(VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT))); + break; + + default: + unreachable("Invalid subpass attachment usage"); + } +} + +static void +vk_subpass_attachment_link_resolve(struct vk_subpass_attachment *att, + struct vk_subpass_attachment *resolve, + const VkRenderPassCreateInfo2 *info) +{ + if (resolve->attachment == VK_ATTACHMENT_UNUSED) + return; + + assert(att->attachment != VK_ATTACHMENT_UNUSED); + att->resolve = resolve; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateRenderPass2(VkDevice _device, + const VkRenderPassCreateInfo2 *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2); + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct vk_render_pass, pass, 1); + VK_MULTIALLOC_DECL(&ma, struct vk_render_pass_attachment, attachments, + pCreateInfo->attachmentCount); + VK_MULTIALLOC_DECL(&ma, struct vk_subpass, subpasses, + pCreateInfo->subpassCount); + VK_MULTIALLOC_DECL(&ma, struct vk_subpass_dependency, dependencies, + pCreateInfo->dependencyCount); + + uint32_t subpass_attachment_count = 0; + uint32_t subpass_color_attachment_count = 0; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + subpass_attachment_count += + num_subpass_attachments2(&pCreateInfo->pSubpasses[i]); + subpass_color_attachment_count += + pCreateInfo->pSubpasses[i].colorAttachmentCount; + } + VK_MULTIALLOC_DECL(&ma, struct vk_subpass_attachment, subpass_attachments, + subpass_attachment_count); + VK_MULTIALLOC_DECL(&ma, VkFormat, subpass_color_formats, + subpass_color_attachment_count); + VK_MULTIALLOC_DECL(&ma, VkSampleCountFlagBits, subpass_color_samples, + subpass_color_attachment_count); + + if (!vk_object_multizalloc(device, &ma, pAllocator, + VK_OBJECT_TYPE_RENDER_PASS)) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + pass->attachment_count = pCreateInfo->attachmentCount; + pass->attachments = attachments; + pass->subpass_count = pCreateInfo->subpassCount; + pass->subpasses = subpasses; + pass->dependency_count = pCreateInfo->dependencyCount; + pass->dependencies = dependencies; + + for (uint32_t a = 0; a < pCreateInfo->attachmentCount; a++) { + vk_render_pass_attachment_init(&pass->attachments[a], + &pCreateInfo->pAttachments[a]); + } + + struct vk_subpass_attachment *next_subpass_attachment = subpass_attachments; + VkFormat *next_subpass_color_format = subpass_color_formats; + VkSampleCountFlagBits *next_subpass_color_samples = subpass_color_samples; + for (uint32_t s = 0; s < pCreateInfo->subpassCount; s++) { + const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[s]; + struct vk_subpass *subpass = &pass->subpasses[s]; + const VkMultisampledRenderToSingleSampledInfoEXT *mrtss = + vk_find_struct_const(desc->pNext, MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT); + if (mrtss && !mrtss->multisampledRenderToSingleSampledEnable) + mrtss = NULL; + + subpass->attachment_count = num_subpass_attachments2(desc); + subpass->attachments = next_subpass_attachment; + + /* From the Vulkan 1.3.204 spec: + * + * VUID-VkRenderPassCreateInfo2-viewMask-03058 + * + * "The VkSubpassDescription2::viewMask member of all elements of + * pSubpasses must either all be 0, or all not be 0" + */ + if (desc->viewMask) + pass->is_multiview = true; + assert(pass->is_multiview == (desc->viewMask != 0)); + + /* For all view masks in the vk_render_pass data structure, we use a + * mask of 1 for non-multiview instead of a mask of 0. + */ + subpass->view_mask = desc->viewMask ? desc->viewMask : 1; + pass->view_mask |= subpass->view_mask; + + subpass->input_count = desc->inputAttachmentCount; + if (desc->inputAttachmentCount > 0) { + subpass->input_attachments = next_subpass_attachment; + next_subpass_attachment += desc->inputAttachmentCount; + + for (uint32_t a = 0; a < desc->inputAttachmentCount; a++) { + vk_subpass_attachment_init(&subpass->input_attachments[a], + pass, s, + &desc->pInputAttachments[a], + pCreateInfo->pAttachments, + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT); + } + } + + subpass->color_count = desc->colorAttachmentCount; + if (desc->colorAttachmentCount > 0) { + subpass->color_attachments = next_subpass_attachment; + next_subpass_attachment += desc->colorAttachmentCount; + + for (uint32_t a = 0; a < desc->colorAttachmentCount; a++) { + vk_subpass_attachment_init(&subpass->color_attachments[a], + pass, s, + &desc->pColorAttachments[a], + pCreateInfo->pAttachments, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + } + } + + if (desc->pResolveAttachments) { + subpass->color_resolve_count = desc->colorAttachmentCount; + subpass->color_resolve_attachments = next_subpass_attachment; + next_subpass_attachment += desc->colorAttachmentCount; + + for (uint32_t a = 0; a < desc->colorAttachmentCount; a++) { + vk_subpass_attachment_init(&subpass->color_resolve_attachments[a], + pass, s, + &desc->pResolveAttachments[a], + pCreateInfo->pAttachments, + VK_IMAGE_USAGE_TRANSFER_DST_BIT); + vk_subpass_attachment_link_resolve(&subpass->color_attachments[a], + &subpass->color_resolve_attachments[a], + pCreateInfo); + } + } + + if (desc->pDepthStencilAttachment && + desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { + subpass->depth_stencil_attachment = next_subpass_attachment++; + + vk_subpass_attachment_init(subpass->depth_stencil_attachment, + pass, s, + desc->pDepthStencilAttachment, + pCreateInfo->pAttachments, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + } + + const VkSubpassDescriptionDepthStencilResolve *ds_resolve = + vk_find_struct_const(desc->pNext, + SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); + + if (ds_resolve) { + if (ds_resolve->pDepthStencilResolveAttachment && + ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { + subpass->depth_stencil_resolve_attachment = next_subpass_attachment++; + + vk_subpass_attachment_init(subpass->depth_stencil_resolve_attachment, + pass, s, + ds_resolve->pDepthStencilResolveAttachment, + pCreateInfo->pAttachments, + VK_IMAGE_USAGE_TRANSFER_DST_BIT); + vk_subpass_attachment_link_resolve(subpass->depth_stencil_attachment, + subpass->depth_stencil_resolve_attachment, + pCreateInfo); + } + if (subpass->depth_stencil_resolve_attachment || mrtss) { + /* From the Vulkan 1.3.204 spec: + * + * VUID-VkSubpassDescriptionDepthStencilResolve-pDepthStencilResolveAttachment-03178 + * + * "If pDepthStencilResolveAttachment is not NULL and does not + * have the value VK_ATTACHMENT_UNUSED, depthResolveMode and + * stencilResolveMode must not both be VK_RESOLVE_MODE_NONE" + */ + assert(ds_resolve->depthResolveMode != VK_RESOLVE_MODE_NONE || + ds_resolve->stencilResolveMode != VK_RESOLVE_MODE_NONE); + + subpass->depth_resolve_mode = ds_resolve->depthResolveMode; + subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode; + } + } + + const VkFragmentShadingRateAttachmentInfoKHR *fsr_att_info = + vk_find_struct_const(desc->pNext, + FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); + + if (fsr_att_info && fsr_att_info->pFragmentShadingRateAttachment && + fsr_att_info->pFragmentShadingRateAttachment->attachment != VK_ATTACHMENT_UNUSED) { + subpass->fragment_shading_rate_attachment = next_subpass_attachment++; + vk_subpass_attachment_init(subpass->fragment_shading_rate_attachment, + pass, s, + fsr_att_info->pFragmentShadingRateAttachment, + pCreateInfo->pAttachments, + VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR); + subpass->fragment_shading_rate_attachment_texel_size = + fsr_att_info->shadingRateAttachmentTexelSize; + subpass->pipeline_flags |= + VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR; + } + + /* Figure out any self-dependencies */ + assert(desc->colorAttachmentCount <= 32); + for (uint32_t a = 0; a < desc->inputAttachmentCount; a++) { + if (desc->pInputAttachments[a].attachment == VK_ATTACHMENT_UNUSED) + continue; + + for (uint32_t c = 0; c < desc->colorAttachmentCount; c++) { + if (desc->pColorAttachments[c].attachment == + desc->pInputAttachments[a].attachment) { + subpass->input_attachments[a].layout = + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + subpass->color_attachments[c].layout = + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + subpass->pipeline_flags |= + VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + } + } + + if (desc->pDepthStencilAttachment != NULL && + desc->pDepthStencilAttachment->attachment == + desc->pInputAttachments[a].attachment) { + VkImageAspectFlags aspects = + subpass->input_attachments[a].aspects; + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + subpass->input_attachments[a].layout = + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + subpass->depth_stencil_attachment->layout = + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + subpass->pipeline_flags |= + VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + } + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + subpass->input_attachments[a].stencil_layout = + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + subpass->depth_stencil_attachment->stencil_layout = + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + subpass->pipeline_flags |= + VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + } + } + } + + VkFormat *color_formats = NULL; + VkSampleCountFlagBits *color_samples = NULL; + VkSampleCountFlagBits samples = 0; + if (desc->colorAttachmentCount > 0) { + color_formats = next_subpass_color_format; + color_samples = next_subpass_color_samples; + for (uint32_t a = 0; a < desc->colorAttachmentCount; a++) { + const VkAttachmentReference2 *ref = &desc->pColorAttachments[a]; + if (ref->attachment >= pCreateInfo->attachmentCount) { + color_formats[a] = VK_FORMAT_UNDEFINED; + color_samples[a] = VK_SAMPLE_COUNT_1_BIT; + } else { + const VkAttachmentDescription2 *att = + &pCreateInfo->pAttachments[ref->attachment]; + + color_formats[a] = att->format; + color_samples[a] = att->samples; + + samples |= att->samples; + } + } + next_subpass_color_format += desc->colorAttachmentCount; + next_subpass_color_samples += desc->colorAttachmentCount; + } + + VkFormat depth_format = VK_FORMAT_UNDEFINED; + VkFormat stencil_format = VK_FORMAT_UNDEFINED; + VkSampleCountFlagBits depth_stencil_samples = VK_SAMPLE_COUNT_1_BIT; + if (desc->pDepthStencilAttachment != NULL) { + const VkAttachmentReference2 *ref = desc->pDepthStencilAttachment; + if (ref->attachment < pCreateInfo->attachmentCount) { + const VkAttachmentDescription2 *att = + &pCreateInfo->pAttachments[ref->attachment]; + + if (vk_format_has_depth(att->format)) + depth_format = att->format; + if (vk_format_has_stencil(att->format)) + stencil_format = att->format; + + depth_stencil_samples = att->samples; + + samples |= att->samples; + } + } + + subpass->sample_count_info_amd = (VkAttachmentSampleCountInfoAMD) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, + .pNext = NULL, + .colorAttachmentCount = desc->colorAttachmentCount, + .pColorAttachmentSamples = color_samples, + .depthStencilAttachmentSamples = depth_stencil_samples, + }; + + subpass->pipeline_info = (VkPipelineRenderingCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, + .pNext = &subpass->sample_count_info_amd, + .viewMask = desc->viewMask, + .colorAttachmentCount = desc->colorAttachmentCount, + .pColorAttachmentFormats = color_formats, + .depthAttachmentFormat = depth_format, + .stencilAttachmentFormat = stencil_format, + }; + + subpass->inheritance_info = (VkCommandBufferInheritanceRenderingInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO, + .pNext = &subpass->sample_count_info_amd, + /* If we're inheriting, the contents are clearly in secondaries */ + .flags = VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT, + .viewMask = desc->viewMask, + .colorAttachmentCount = desc->colorAttachmentCount, + .pColorAttachmentFormats = color_formats, + .depthAttachmentFormat = depth_format, + .stencilAttachmentFormat = stencil_format, + .rasterizationSamples = samples, + }; + + if (mrtss) { + assert(mrtss->multisampledRenderToSingleSampledEnable); + subpass->mrtss = (VkMultisampledRenderToSingleSampledInfoEXT) { + .sType = VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, + .multisampledRenderToSingleSampledEnable = VK_TRUE, + .rasterizationSamples = mrtss->rasterizationSamples, + }; + } + } + assert(next_subpass_attachment == + subpass_attachments + subpass_attachment_count); + assert(next_subpass_color_format == + subpass_color_formats + subpass_color_attachment_count); + assert(next_subpass_color_samples == + subpass_color_samples + subpass_color_attachment_count); + + /* Walk backwards over the subpasses to compute view masks and + * last_subpass masks for all attachments. + */ + for (uint32_t s = 0; s < pCreateInfo->subpassCount; s++) { + struct vk_subpass *subpass = + &pass->subpasses[(pCreateInfo->subpassCount - 1) - s]; + + /* First, compute last_subpass for all the attachments */ + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + struct vk_subpass_attachment *att = &subpass->attachments[a]; + if (att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + assert(att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *pass_att = + &pass->attachments[att->attachment]; + + att->last_subpass = subpass->view_mask & ~pass_att->view_mask; + } + + /* Then compute pass_att->view_mask. We do the two separately so that + * we end up with the right last_subpass even if the same attachment is + * used twice within a subpass. + */ + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *att = &subpass->attachments[a]; + if (att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + assert(att->attachment < pass->attachment_count); + struct vk_render_pass_attachment *pass_att = + &pass->attachments[att->attachment]; + + pass_att->view_mask |= subpass->view_mask; + } + } + + pass->dependency_count = pCreateInfo->dependencyCount; + for (uint32_t d = 0; d < pCreateInfo->dependencyCount; d++) { + const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[d]; + + pass->dependencies[d] = (struct vk_subpass_dependency) { + .flags = dep->dependencyFlags, + .src_subpass = dep->srcSubpass, + .dst_subpass = dep->dstSubpass, + .src_stage_mask = (VkPipelineStageFlags2)dep->srcStageMask, + .dst_stage_mask = (VkPipelineStageFlags2)dep->dstStageMask, + .src_access_mask = (VkAccessFlags2)dep->srcAccessMask, + .dst_access_mask = (VkAccessFlags2)dep->dstAccessMask, + .view_offset = dep->viewOffset, + }; + + /* From the Vulkan 1.3.204 spec: + * + * "If a VkMemoryBarrier2 is included in the pNext chain, + * srcStageMask, dstStageMask, srcAccessMask, and dstAccessMask + * parameters are ignored. The synchronization and access scopes + * instead are defined by the parameters of VkMemoryBarrier2." + */ + const VkMemoryBarrier2 *barrier = + vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2); + if (barrier != NULL) { + pass->dependencies[d].src_stage_mask = barrier->srcStageMask; + pass->dependencies[d].dst_stage_mask = barrier->dstStageMask; + pass->dependencies[d].src_access_mask = barrier->srcAccessMask; + pass->dependencies[d].dst_access_mask = barrier->dstAccessMask; + } + } + + const VkRenderPassFragmentDensityMapCreateInfoEXT *fdm_info = + vk_find_struct_const(pCreateInfo->pNext, + RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT); + if (fdm_info) { + pass->fragment_density_map = fdm_info->fragmentDensityMapAttachment; + } else { + pass->fragment_density_map.attachment = VK_ATTACHMENT_UNUSED; + pass->fragment_density_map.layout = VK_IMAGE_LAYOUT_UNDEFINED; + } + + *pRenderPass = vk_render_pass_to_handle(pass); + + return VK_SUCCESS; +} + +const VkPipelineRenderingCreateInfo * +vk_get_pipeline_rendering_create_info(const VkGraphicsPipelineCreateInfo *info) +{ + VK_FROM_HANDLE(vk_render_pass, render_pass, info->renderPass); + if (render_pass != NULL) { + assert(info->subpass < render_pass->subpass_count); + return &render_pass->subpasses[info->subpass].pipeline_info; + } + + return vk_find_struct_const(info->pNext, PIPELINE_RENDERING_CREATE_INFO); +} + +VkPipelineCreateFlags2KHR +vk_get_pipeline_rendering_flags(const VkGraphicsPipelineCreateInfo *info) +{ + VkPipelineCreateFlags2KHR rendering_flags = 0; + + VK_FROM_HANDLE(vk_render_pass, render_pass, info->renderPass); + if (render_pass != NULL) { + rendering_flags |= render_pass->subpasses[info->subpass].pipeline_flags; + if (render_pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) + rendering_flags |= + VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT; + } + + return rendering_flags; +} + +const VkAttachmentSampleCountInfoAMD * +vk_get_pipeline_sample_count_info_amd(const VkGraphicsPipelineCreateInfo *info) +{ + VK_FROM_HANDLE(vk_render_pass, render_pass, info->renderPass); + if (render_pass != NULL) { + assert(info->subpass < render_pass->subpass_count); + return &render_pass->subpasses[info->subpass].sample_count_info_amd; + } + + return vk_find_struct_const(info->pNext, ATTACHMENT_SAMPLE_COUNT_INFO_AMD); +} + +const VkCommandBufferInheritanceRenderingInfo * +vk_get_command_buffer_inheritance_rendering_info( + VkCommandBufferLevel level, + const VkCommandBufferBeginInfo *pBeginInfo) +{ + /* From the Vulkan 1.3.204 spec: + * + * "VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT specifies that a + * secondary command buffer is considered to be entirely inside a render + * pass. If this is a primary command buffer, then this bit is ignored." + * + * Since we're only concerned with the continue case here, we can ignore + * any primary command buffers. + */ + if (level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) + return NULL; + + if (!(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) + return NULL; + + const VkCommandBufferInheritanceInfo *inheritance = + pBeginInfo->pInheritanceInfo; + + /* From the Vulkan 1.3.204 spec: + * + * "If VkCommandBufferInheritanceInfo::renderPass is not VK_NULL_HANDLE, + * or VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT is not specified + * in VkCommandBufferBeginInfo::flags, parameters of this structure are + * ignored." + * + * If we have a render pass that wins, even if a + * VkCommandBufferInheritanceRenderingInfo struct is included in the pNext + * chain. + */ + VK_FROM_HANDLE(vk_render_pass, render_pass, inheritance->renderPass); + if (render_pass != NULL) { + assert(inheritance->subpass < render_pass->subpass_count); + return &render_pass->subpasses[inheritance->subpass].inheritance_info; + } + + return vk_find_struct_const(inheritance->pNext, + COMMAND_BUFFER_INHERITANCE_RENDERING_INFO); +} + +const VkRenderingInfo * +vk_get_command_buffer_inheritance_as_rendering_resume( + VkCommandBufferLevel level, + const VkCommandBufferBeginInfo *pBeginInfo, + void *stack_data) +{ + struct vk_gcbiarr_data *data = stack_data; + + /* From the Vulkan 1.3.204 spec: + * + * "VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT specifies that a + * secondary command buffer is considered to be entirely inside a render + * pass. If this is a primary command buffer, then this bit is ignored." + * + * Since we're only concerned with the continue case here, we can ignore + * any primary command buffers. + */ + if (level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) + return NULL; + + if (!(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) + return NULL; + + const VkCommandBufferInheritanceInfo *inheritance = + pBeginInfo->pInheritanceInfo; + + VK_FROM_HANDLE(vk_render_pass, pass, inheritance->renderPass); + if (pass == NULL) + return NULL; + + assert(inheritance->subpass < pass->subpass_count); + const struct vk_subpass *subpass = &pass->subpasses[inheritance->subpass]; + + VK_FROM_HANDLE(vk_framebuffer, fb, inheritance->framebuffer); + if (fb == NULL || (fb->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT)) + return NULL; + + data->rendering = (VkRenderingInfo) { + .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, + .flags = VK_RENDERING_RESUMING_BIT, + .renderArea = { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + }, + .layerCount = fb->layers, + .viewMask = pass->is_multiview ? subpass->view_mask : 0, + }; + + VkRenderingAttachmentInfo *attachments = data->attachments; + + for (unsigned i = 0; i < subpass->color_count; i++) { + const struct vk_subpass_attachment *sp_att = + &subpass->color_attachments[i]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) { + attachments[i] = (VkRenderingAttachmentInfo) { + .imageView = VK_NULL_HANDLE, + }; + continue; + } + + assert(sp_att->attachment < pass->attachment_count); + attachments[i] = (VkRenderingAttachmentInfo) { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = fb->attachments[sp_att->attachment], + .imageLayout = sp_att->layout, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + }; + } + data->rendering.colorAttachmentCount = subpass->color_count; + data->rendering.pColorAttachments = attachments; + attachments += subpass->color_count; + + if (subpass->depth_stencil_attachment) { + const struct vk_subpass_attachment *sp_att = + subpass->depth_stencil_attachment; + assert(sp_att->attachment < pass->attachment_count); + + VK_FROM_HANDLE(vk_image_view, iview, fb->attachments[sp_att->attachment]); + if (iview->image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + *attachments = (VkRenderingAttachmentInfo) { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = vk_image_view_to_handle(iview), + .imageLayout = sp_att->layout, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + }; + data->rendering.pDepthAttachment = attachments++; + } + + if (iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + *attachments = (VkRenderingAttachmentInfo) { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = vk_image_view_to_handle(iview), + .imageLayout = sp_att->stencil_layout, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + }; + data->rendering.pStencilAttachment = attachments++; + } + } + + if (subpass->fragment_shading_rate_attachment) { + const struct vk_subpass_attachment *sp_att = + subpass->fragment_shading_rate_attachment; + assert(sp_att->attachment < pass->attachment_count); + + data->fsr_att = (VkRenderingFragmentShadingRateAttachmentInfoKHR) { + .sType = VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR, + .imageView = fb->attachments[sp_att->attachment], + .imageLayout = sp_att->layout, + .shadingRateAttachmentTexelSize = + subpass->fragment_shading_rate_attachment_texel_size, + }; + __vk_append_struct(&data->rendering, &data->fsr_att); + } + + /* Append this one last because it lives in the subpass and we don't want + * to be changed by appending other structures later. + */ + if (subpass->mrtss.multisampledRenderToSingleSampledEnable) + __vk_append_struct(&data->rendering, (void *)&subpass->mrtss); + + return &data->rendering; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyRenderPass(VkDevice _device, + VkRenderPass renderPass, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_render_pass, pass, renderPass); + + if (!pass) + return; + + vk_object_free(device, pAllocator, pass); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetRenderAreaGranularity(VkDevice device, + VkRenderPass renderPass, + VkExtent2D *pGranularity) +{ + *pGranularity = (VkExtent2D){1, 1}; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetRenderingAreaGranularityKHR( + VkDevice _device, const VkRenderingAreaInfoKHR *pRenderingAreaInfo, + VkExtent2D *pGranularity) +{ + *pGranularity = (VkExtent2D) { 1, 1 }; +} + +static VkRenderPassSampleLocationsBeginInfoEXT * +clone_rp_sample_locations(const VkRenderPassSampleLocationsBeginInfoEXT *loc) +{ + uint32_t sl_count = 0; + + for (uint32_t i = 0; i < loc->attachmentInitialSampleLocationsCount; i++) { + const VkAttachmentSampleLocationsEXT *att_sl_in = + &loc->pAttachmentInitialSampleLocations[i]; + sl_count += att_sl_in->sampleLocationsInfo.sampleLocationsCount; + } + for (uint32_t i = 0; i < loc->postSubpassSampleLocationsCount; i++) { + const VkSubpassSampleLocationsEXT *sp_sl_in = + &loc->pPostSubpassSampleLocations[i]; + sl_count += sp_sl_in->sampleLocationsInfo.sampleLocationsCount; + } + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, VkRenderPassSampleLocationsBeginInfoEXT, new_loc, 1); + VK_MULTIALLOC_DECL(&ma, VkAttachmentSampleLocationsEXT, new_att_sl, + loc->attachmentInitialSampleLocationsCount); + VK_MULTIALLOC_DECL(&ma, VkSubpassSampleLocationsEXT, new_sp_sl, + loc->postSubpassSampleLocationsCount); + VK_MULTIALLOC_DECL(&ma, VkSampleLocationEXT, sl, sl_count); + if (!vk_multialloc_alloc(&ma, vk_default_allocator(), + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + return NULL; + + VkSampleLocationEXT *next_sl = sl; + for (uint32_t i = 0; i < loc->attachmentInitialSampleLocationsCount; i++) { + const VkAttachmentSampleLocationsEXT *att_sl_in = + &loc->pAttachmentInitialSampleLocations[i]; + const VkSampleLocationsInfoEXT *sli_in = &att_sl_in->sampleLocationsInfo; + + typed_memcpy(next_sl, sli_in->pSampleLocations, + sli_in->sampleLocationsCount); + + new_att_sl[i] = (VkAttachmentSampleLocationsEXT) { + .attachmentIndex = att_sl_in->attachmentIndex, + .sampleLocationsInfo = { + .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT, + .sampleLocationsPerPixel = sli_in->sampleLocationsPerPixel, + .sampleLocationGridSize = sli_in->sampleLocationGridSize, + .sampleLocationsCount = sli_in->sampleLocationsCount, + .pSampleLocations = next_sl, + }, + }; + + next_sl += sli_in->sampleLocationsCount; + } + + for (uint32_t i = 0; i < loc->postSubpassSampleLocationsCount; i++) { + const VkSubpassSampleLocationsEXT *sp_sl_in = + &loc->pPostSubpassSampleLocations[i]; + const VkSampleLocationsInfoEXT *sli_in = &sp_sl_in->sampleLocationsInfo; + + typed_memcpy(next_sl, sli_in->pSampleLocations, + sli_in->sampleLocationsCount); + + new_sp_sl[i] = (VkSubpassSampleLocationsEXT) { + .subpassIndex = sp_sl_in->subpassIndex, + .sampleLocationsInfo = { + .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT, + .sampleLocationsPerPixel = sli_in->sampleLocationsPerPixel, + .sampleLocationGridSize = sli_in->sampleLocationGridSize, + .sampleLocationsCount = sli_in->sampleLocationsCount, + .pSampleLocations = next_sl, + }, + }; + + next_sl += sli_in->sampleLocationsCount; + } + + assert(next_sl == sl + sl_count); + + *new_loc = (VkRenderPassSampleLocationsBeginInfoEXT) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT, + .attachmentInitialSampleLocationsCount = loc->attachmentInitialSampleLocationsCount, + .pAttachmentInitialSampleLocations = new_att_sl, + .postSubpassSampleLocationsCount = loc->postSubpassSampleLocationsCount, + .pPostSubpassSampleLocations = new_sp_sl, + }; + + return new_loc; +} + +static const VkSampleLocationsInfoEXT * +get_subpass_sample_locations(const VkRenderPassSampleLocationsBeginInfoEXT *loc, + uint32_t subpass_idx) +{ + for (uint32_t i = 0; i < loc->postSubpassSampleLocationsCount; i++) { + if (loc->pPostSubpassSampleLocations[i].subpassIndex == subpass_idx) + return &loc->pPostSubpassSampleLocations[i].sampleLocationsInfo; + } + + return NULL; +} + +static bool +vk_image_layout_supports_input_attachment(VkImageLayout layout) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_GENERAL: + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR: + case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT: + return true; + default: + return false; + } +} + +struct stage_access { + VkPipelineStageFlagBits2 stages; + VkAccessFlagBits2 access; +}; + +static bool +vk_image_layout_are_all_aspects_read_only(VkImageLayout layout, + VkImageAspectFlags aspects) +{ + u_foreach_bit(a, aspects) { + VkImageAspectFlagBits aspect = 1u << a; + if (!vk_image_layout_is_read_only(layout, aspect)) + return false; + } + return true; +} + +static struct stage_access +stage_access_for_layout(VkImageLayout layout, VkImageAspectFlags aspects) +{ + VkPipelineStageFlagBits2 stages = 0; + VkAccessFlagBits2 access = 0; + + if (vk_image_layout_supports_input_attachment(layout)) { + stages |= VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT; + access |= VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT; + } + + if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + access |= VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + if (!vk_image_layout_are_all_aspects_read_only(layout, aspects)) { + access |= VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + + /* It might be a resolve attachment */ + stages |= VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT; + access |= VK_ACCESS_2_TRANSFER_WRITE_BIT; + } + } else { + /* Color */ + if (!vk_image_layout_are_all_aspects_read_only(layout, aspects)) { + /* There are no read-only color attachments */ + stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + access |= VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; + + /* It might be a resolve attachment */ + stages |= VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT; + access |= VK_ACCESS_2_TRANSFER_WRITE_BIT; + } + } + + return (struct stage_access) { + .stages = stages, + .access = access, + }; +} + +static void +transition_image_range(const struct vk_image_view *image_view, + VkImageSubresourceRange range, + VkImageLayout old_layout, + VkImageLayout new_layout, + VkImageLayout old_stencil_layout, + VkImageLayout new_stencil_layout, + const VkSampleLocationsInfoEXT *sample_locations, + uint32_t *barrier_count, + uint32_t max_barrier_count, + VkImageMemoryBarrier2 *barriers) +{ + VkImageAspectFlags aspects_left = range.aspectMask; + while (aspects_left) { + range.aspectMask = aspects_left; + + /* If we have a depth/stencil image and one of the layouts doesn't match + * between depth and stencil, we need two barriers. Restrict to depth + * and we'll pick up stencil on the next iteration. + */ + if (range.aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT) && + (old_layout != old_stencil_layout || + new_layout != new_stencil_layout)) + range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + + if (range.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { + /* We're down to a single aspect bit so this is going to be the last + * iteration and it's fine to stomp the input variables here. + */ + old_layout = old_stencil_layout; + new_layout = new_stencil_layout; + } + + if (new_layout != old_layout) { + /* We could go about carefully calculating every possible way the + * attachment may have been used in the render pass or we can break + * out the big hammer and throw in any stage and access flags + * possible for the given layouts. + */ + struct stage_access src_sa, dst_sa; + src_sa = stage_access_for_layout(old_layout, range.aspectMask); + dst_sa = stage_access_for_layout(new_layout, range.aspectMask); + + assert(*barrier_count < max_barrier_count); + barriers[(*barrier_count)++] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = sample_locations, + .srcStageMask = src_sa.stages, + .srcAccessMask = src_sa.access, + .dstStageMask = dst_sa.stages, + .dstAccessMask = dst_sa.access, + .oldLayout = old_layout, + .newLayout = new_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = vk_image_to_handle(image_view->image), + .subresourceRange = range, + }; + } + + aspects_left &= ~range.aspectMask; + } +} + +static bool +can_use_attachment_initial_layout(struct vk_command_buffer *cmd_buffer, + uint32_t att_idx, + uint32_t view_mask, + VkImageLayout *layout_out, + VkImageLayout *stencil_layout_out) +{ + const struct vk_render_pass *pass = cmd_buffer->render_pass; + const struct vk_framebuffer *framebuffer = cmd_buffer->framebuffer; + const struct vk_render_pass_attachment *rp_att = &pass->attachments[att_idx]; + struct vk_attachment_state *att_state = &cmd_buffer->attachments[att_idx]; + const struct vk_image_view *image_view = att_state->image_view; + + if ((rp_att->aspects & ~VK_IMAGE_ASPECT_STENCIL_BIT) && + rp_att->load_op != VK_ATTACHMENT_LOAD_OP_CLEAR) + return false; + + if ((rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + rp_att->stencil_load_op != VK_ATTACHMENT_LOAD_OP_CLEAR) + return false; + + if (cmd_buffer->render_area.offset.x != 0 || + cmd_buffer->render_area.offset.y != 0 || + cmd_buffer->render_area.extent.width != image_view->extent.width || + cmd_buffer->render_area.extent.height != image_view->extent.height) + return false; + + if (image_view->image->image_type == VK_IMAGE_TYPE_3D) { + /* For 3D images, the view has to be the whole thing */ + if (image_view->base_array_layer != 0) + return false; + + if (pass->is_multiview) { + if (!util_is_power_of_two_or_zero(view_mask + 1) || + util_last_bit(view_mask) != image_view->layer_count) + return false; + } else { + if (framebuffer->layers != image_view->layer_count) + return false; + } + } + + /* Finally, check if the entire thing is undefined. It's ok to smash the + * view_mask now as the only thing using it will be the loop below. + */ + + /* 3D is stupidly special. See transition_attachment() */ + if (image_view->image->image_type == VK_IMAGE_TYPE_3D) + view_mask = 1; + + VkImageLayout layout = VK_IMAGE_LAYOUT_MAX_ENUM; + VkImageLayout stencil_layout = VK_IMAGE_LAYOUT_MAX_ENUM; + + assert(view_mask != 0); + u_foreach_bit(view, view_mask) { + assert(view >= 0 && view < MESA_VK_MAX_MULTIVIEW_VIEW_COUNT); + struct vk_attachment_view_state *att_view_state = &att_state->views[view]; + + if (rp_att->aspects & ~VK_IMAGE_ASPECT_STENCIL_BIT) { + if (layout == VK_IMAGE_LAYOUT_MAX_ENUM) + layout = att_view_state->layout; + else if (layout != att_view_state->layout) + return false; + } + + if (rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + if (stencil_layout == VK_IMAGE_LAYOUT_MAX_ENUM) + stencil_layout = att_view_state->stencil_layout; + else if (stencil_layout != att_view_state->stencil_layout) + return false; + } + } + + if (layout != VK_IMAGE_LAYOUT_MAX_ENUM) + *layout_out = layout; + else if (layout_out != NULL) + *layout_out = VK_IMAGE_LAYOUT_UNDEFINED; + + if (stencil_layout != VK_IMAGE_LAYOUT_MAX_ENUM) + *stencil_layout_out = stencil_layout; + else if (stencil_layout_out != NULL) + *stencil_layout_out = VK_IMAGE_LAYOUT_UNDEFINED; + + return true; +} + +uint32_t +vk_command_buffer_get_attachment_layout(const struct vk_command_buffer *cmd_buffer, + const struct vk_image *image, + VkImageLayout *out_layout, + VkImageLayout *out_stencil_layout) +{ + const struct vk_render_pass *render_pass = cmd_buffer->render_pass; + assert(render_pass != NULL); + + const struct vk_subpass *subpass = + &render_pass->subpasses[cmd_buffer->subpass_idx]; + int first_view = ffs(subpass->view_mask) - 1; + + for (uint32_t a = 0; a < render_pass->attachment_count; a++) { + if (cmd_buffer->attachments[a].image_view->image == image) { + *out_layout = cmd_buffer->attachments[a].views[first_view].layout; + *out_stencil_layout = + cmd_buffer->attachments[a].views[first_view].stencil_layout; + return a; + } + } + unreachable("Image not found in attachments"); +} + +void +vk_command_buffer_set_attachment_layout(struct vk_command_buffer *cmd_buffer, + uint32_t att_idx, + VkImageLayout layout, + VkImageLayout stencil_layout) +{ + const struct vk_render_pass *render_pass = cmd_buffer->render_pass; + const struct vk_subpass *subpass = + &render_pass->subpasses[cmd_buffer->subpass_idx]; + uint32_t view_mask = subpass->view_mask; + struct vk_attachment_state *att_state = &cmd_buffer->attachments[att_idx]; + + u_foreach_bit(view, view_mask) { + assert(view >= 0 && view < MESA_VK_MAX_MULTIVIEW_VIEW_COUNT); + struct vk_attachment_view_state *att_view_state = &att_state->views[view]; + + att_view_state->layout = layout; + att_view_state->stencil_layout = stencil_layout; + } +} + +static void +transition_attachment(struct vk_command_buffer *cmd_buffer, + uint32_t att_idx, + uint32_t view_mask, + VkImageLayout layout, + VkImageLayout stencil_layout, + uint32_t *barrier_count, + uint32_t max_barrier_count, + VkImageMemoryBarrier2 *barriers) +{ + const struct vk_render_pass *pass = cmd_buffer->render_pass; + const struct vk_framebuffer *framebuffer = cmd_buffer->framebuffer; + const struct vk_render_pass_attachment *pass_att = + &pass->attachments[att_idx]; + struct vk_attachment_state *att_state = &cmd_buffer->attachments[att_idx]; + const struct vk_image_view *image_view = att_state->image_view; + + /* 3D is stupidly special. From the Vulkan 1.3.204 spec: + * + * "When the VkImageSubresourceRange structure is used to select a + * subset of the slices of a 3D image’s mip level in order to create + * a 2D or 2D array image view of a 3D image created with + * VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT, baseArrayLayer and + * layerCount specify the first slice index and the number of slices + * to include in the created image view. Such an image view can be + * used as a framebuffer attachment that refers only to the specified + * range of slices of the selected mip level. However, any layout + * transitions performed on such an attachment view during a render + * pass instance still apply to the entire subresource referenced + * which includes all the slices of the selected mip level." + * + * To deal with this, we expand out the layer range to include the + * entire 3D image and treat them as having only a single view even when + * multiview is enabled. This later part means that we effectively only + * track one image layout for the entire attachment rather than one per + * view like we do for all the others. + */ + if (image_view->image->image_type == VK_IMAGE_TYPE_3D) + view_mask = 1; + + u_foreach_bit(view, view_mask) { + assert(view >= 0 && view < MESA_VK_MAX_MULTIVIEW_VIEW_COUNT); + struct vk_attachment_view_state *att_view_state = &att_state->views[view]; + + /* First, check to see if we even need a transition */ + if (att_view_state->layout == layout && + att_view_state->stencil_layout == stencil_layout) + continue; + + VkImageSubresourceRange range = { + .aspectMask = pass_att->aspects, + .baseMipLevel = image_view->base_mip_level, + .levelCount = 1, + }; + + /* From the Vulkan 1.3.207 spec: + * + * "Automatic layout transitions apply to the entire image + * subresource attached to the framebuffer. If multiview is not + * enabled and the attachment is a view of a 1D or 2D image, the + * automatic layout transitions apply to the number of layers + * specified by VkFramebufferCreateInfo::layers. If multiview is + * enabled and the attachment is a view of a 1D or 2D image, the + * automatic layout transitions apply to the layers corresponding to + * views which are used by some subpass in the render pass, even if + * that subpass does not reference the given attachment. If the + * attachment view is a 2D or 2D array view of a 3D image, even if + * the attachment view only refers to a subset of the slices of the + * selected mip level of the 3D image, automatic layout transitions + * apply to the entire subresource referenced which is the entire mip + * level in this case." + */ + if (image_view->image->image_type == VK_IMAGE_TYPE_3D) { + assert(view == 0); + range.baseArrayLayer = 0; + range.layerCount = image_view->extent.depth; + } else if (pass->is_multiview) { + range.baseArrayLayer = image_view->base_array_layer + view; + range.layerCount = 1; + } else { + assert(view == 0); + range.baseArrayLayer = image_view->base_array_layer; + range.layerCount = framebuffer->layers; + } + + transition_image_range(image_view, range, + att_view_state->layout, layout, + att_view_state->stencil_layout, stencil_layout, + att_view_state->sample_locations, + barrier_count, max_barrier_count, barriers); + + att_view_state->layout = layout; + att_view_state->stencil_layout = stencil_layout; + } +} + +static void +load_attachment(struct vk_command_buffer *cmd_buffer, + uint32_t att_idx, uint32_t view_mask, + VkImageLayout layout, VkImageLayout stencil_layout) +{ + const struct vk_render_pass *pass = cmd_buffer->render_pass; + const struct vk_framebuffer *framebuffer = cmd_buffer->framebuffer; + const struct vk_render_pass_attachment *rp_att = &pass->attachments[att_idx]; + struct vk_attachment_state *att_state = &cmd_buffer->attachments[att_idx]; + struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + /* Don't load any views we've already loaded */ + view_mask &= ~att_state->views_loaded; + if (view_mask == 0) + return; + + /* From here on, if we return, we loaded the views */ + att_state->views_loaded |= view_mask; + + /* We only need to load/store if there's a clear */ + bool need_load_store = false; + if ((rp_att->aspects & ~VK_IMAGE_ASPECT_STENCIL_BIT) && + rp_att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + need_load_store = true; + + if ((rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + rp_att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + need_load_store = true; + + if (!need_load_store) + return; + + const VkRenderingAttachmentInfo att = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = vk_image_view_to_handle(att_state->image_view), + .imageLayout = layout, + .loadOp = rp_att->load_op, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .clearValue = att_state->clear_value, + }; + + const VkRenderingAttachmentInfo stencil_att = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = vk_image_view_to_handle(att_state->image_view), + .imageLayout = stencil_layout, + .loadOp = rp_att->stencil_load_op, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .clearValue = att_state->clear_value, + }; + + VkRenderingInfo render = { + .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, + .renderArea = cmd_buffer->render_area, + .layerCount = pass->is_multiview ? 1 : framebuffer->layers, + .viewMask = pass->is_multiview ? view_mask : 0, + }; + + if (rp_att->aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) { + if (rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + render.pDepthAttachment = &att; + if (rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + render.pStencilAttachment = &stencil_att; + } else { + render.colorAttachmentCount = 1; + render.pColorAttachments = &att; + } + + disp->CmdBeginRendering(vk_command_buffer_to_handle(cmd_buffer), &render); + disp->CmdEndRendering(vk_command_buffer_to_handle(cmd_buffer)); +} + +static void +begin_subpass(struct vk_command_buffer *cmd_buffer, + const VkSubpassBeginInfo *begin_info) +{ + const struct vk_render_pass *pass = cmd_buffer->render_pass; + const struct vk_framebuffer *framebuffer = cmd_buffer->framebuffer; + const uint32_t subpass_idx = cmd_buffer->subpass_idx; + assert(subpass_idx < pass->subpass_count); + const struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; + struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + /* First, we figure out all our attachments and attempt to handle image + * layout transitions and load ops as part of vkCmdBeginRendering if we + * can. For any we can't handle this way, we'll need explicit barriers + * or quick vkCmdBegin/EndRendering to do the load op. + */ + + STACK_ARRAY(VkRenderingAttachmentInfo, color_attachments, + subpass->color_count); + STACK_ARRAY(VkRenderingAttachmentInitialLayoutInfoMESA, + color_attachment_initial_layouts, + subpass->color_count); + + for (uint32_t i = 0; i < subpass->color_count; i++) { + const struct vk_subpass_attachment *sp_att = + &subpass->color_attachments[i]; + VkRenderingAttachmentInfo *color_attachment = &color_attachments[i]; + + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) { + *color_attachment = (VkRenderingAttachmentInfo) { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = VK_NULL_HANDLE, + }; + continue; + } + + assert(sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + struct vk_attachment_state *att_state = + &cmd_buffer->attachments[sp_att->attachment]; + + *color_attachment = (VkRenderingAttachmentInfo) { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = vk_image_view_to_handle(att_state->image_view), + .imageLayout = sp_att->layout, + }; + + if (!(subpass->view_mask & att_state->views_loaded)) { + /* None of these views have been used before */ + color_attachment->loadOp = rp_att->load_op; + color_attachment->clearValue = att_state->clear_value; + att_state->views_loaded |= subpass->view_mask; + + VkImageLayout initial_layout; + if (can_use_attachment_initial_layout(cmd_buffer, + sp_att->attachment, + subpass->view_mask, + &initial_layout, NULL) && + sp_att->layout != initial_layout) { + assert(color_attachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR); + + VkRenderingAttachmentInitialLayoutInfoMESA *color_initial_layout = + &color_attachment_initial_layouts[i]; + *color_initial_layout = (VkRenderingAttachmentInitialLayoutInfoMESA) { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA, + .initialLayout = initial_layout, + }; + __vk_append_struct(color_attachment, color_initial_layout); + + vk_command_buffer_set_attachment_layout(cmd_buffer, + sp_att->attachment, + sp_att->layout, + VK_IMAGE_LAYOUT_UNDEFINED); + } + } else { + /* We've seen at least one of the views of this attachment before so + * we need to LOAD_OP_LOAD. + */ + color_attachment->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + } + + if (!(subpass->view_mask & ~sp_att->last_subpass)) { + /* This is the last subpass for every view */ + color_attachment->storeOp = rp_att->store_op; + } else { + /* For at least one of our views, this isn't the last subpass + * + * In the edge case where we have lots of weird overlap between view + * masks of different subThis may mean that we get STORE_OP_STORE in + * some places where it may have wanted STORE_OP_NONE but that should + * be harmless. + */ + color_attachment->storeOp = VK_ATTACHMENT_STORE_OP_STORE; + } + + if (sp_att->resolve != NULL) { + assert(sp_att->resolve->attachment < pass->attachment_count); + struct vk_attachment_state *res_att_state = + &cmd_buffer->attachments[sp_att->resolve->attachment]; + + /* Resolve attachments are entirely overwritten by the resolve + * operation so the load op really doesn't matter. We can consider + * the resolve as being the load. + */ + res_att_state->views_loaded |= subpass->view_mask; + + if (vk_format_is_int(res_att_state->image_view->format)) + color_attachment->resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + else + color_attachment->resolveMode = VK_RESOLVE_MODE_AVERAGE_BIT; + + color_attachment->resolveImageView = + vk_image_view_to_handle(res_att_state->image_view); + color_attachment->resolveImageLayout = sp_att->resolve->layout; + } else if (subpass->mrtss.multisampledRenderToSingleSampledEnable && + rp_att->samples == VK_SAMPLE_COUNT_1_BIT) { + if (vk_format_is_int(att_state->image_view->format)) + color_attachment->resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + else + color_attachment->resolveMode = VK_RESOLVE_MODE_AVERAGE_BIT; + } + } + + VkRenderingAttachmentInfo depth_attachment = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + }; + VkRenderingAttachmentInfo stencil_attachment = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + }; + VkRenderingAttachmentInitialLayoutInfoMESA depth_initial_layout = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA, + }; + VkRenderingAttachmentInitialLayoutInfoMESA stencil_initial_layout = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA, + }; + + const VkSampleLocationsInfoEXT *sample_locations = NULL; + if (subpass->depth_stencil_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->depth_stencil_attachment; + + assert(sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + struct vk_attachment_state *att_state = + &cmd_buffer->attachments[sp_att->attachment]; + + assert(sp_att->aspects == rp_att->aspects); + if (rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + depth_attachment.imageView = + vk_image_view_to_handle(att_state->image_view); + depth_attachment.imageLayout = sp_att->layout; + } + + if (rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + stencil_attachment.imageView = + vk_image_view_to_handle(att_state->image_view); + stencil_attachment.imageLayout = sp_att->stencil_layout; + } + + if (!(subpass->view_mask & att_state->views_loaded)) { + /* None of these views have been used before */ + depth_attachment.loadOp = rp_att->load_op; + depth_attachment.clearValue = att_state->clear_value; + stencil_attachment.loadOp = rp_att->stencil_load_op; + stencil_attachment.clearValue = att_state->clear_value; + att_state->views_loaded |= subpass->view_mask; + + VkImageLayout initial_layout, initial_stencil_layout; + if (can_use_attachment_initial_layout(cmd_buffer, + sp_att->attachment, + subpass->view_mask, + &initial_layout, + &initial_stencil_layout)) { + if ((rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && + sp_att->layout != initial_layout) { + assert(depth_attachment.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR); + depth_initial_layout.initialLayout = initial_layout; + __vk_append_struct(&depth_attachment, + &depth_initial_layout); + } + + if ((rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + sp_att->stencil_layout != initial_stencil_layout) { + assert(stencil_attachment.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR); + stencil_initial_layout.initialLayout = initial_stencil_layout; + __vk_append_struct(&stencil_attachment, + &stencil_initial_layout); + } + + vk_command_buffer_set_attachment_layout(cmd_buffer, + sp_att->attachment, + sp_att->layout, + sp_att->stencil_layout); + } + } else { + /* We've seen at least one of the views of this attachment before so + * we need to LOAD_OP_LOAD. + */ + depth_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + } + + if (!(subpass->view_mask & ~sp_att->last_subpass)) { + /* This is the last subpass for every view */ + depth_attachment.storeOp = rp_att->store_op; + stencil_attachment.storeOp = rp_att->stencil_store_op; + } else { + /* For at least one of our views, this isn't the last subpass + * + * In the edge case where we have lots of weird overlap between view + * masks of different subThis may mean that we get STORE_OP_STORE in + * some places where it may have wanted STORE_OP_NONE but that should + * be harmless. + */ + depth_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + } + + /* From the Vulkan 1.3.212 spec: + * + * "If the current render pass does not use the attachment as a + * depth/stencil attachment in any subpass that happens-before, the + * automatic layout transition uses the sample locations state + * specified in the sampleLocationsInfo member of the element of the + * VkRenderPassSampleLocationsBeginInfoEXT::pAttachmentInitialSampleLocations + * array for which the attachmentIndex member equals the attachment + * index of the attachment, if one is specified. Otherwise, the + * automatic layout transition uses the sample locations state + * specified in the sampleLocationsInfo member of the element of the + * VkRenderPassSampleLocationsBeginInfoEXT::pPostSubpassSampleLocations + * array for which the subpassIndex member equals the index of the + * subpass that last used the attachment as a depth/stencil + * attachment, if one is specified." + * + * Unfortunately, this says nothing whatsoever about multiview. + * However, since multiview render passes are described as a single-view + * render pass repeated per-view, we assume this is per-view. + */ + if (cmd_buffer->pass_sample_locations != NULL && + (att_state->image_view->image->create_flags & + VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT)) { + sample_locations = + get_subpass_sample_locations(cmd_buffer->pass_sample_locations, + subpass_idx); + + u_foreach_bit(view, subpass->view_mask) + att_state->views[view].sample_locations = sample_locations; + } + + if (sp_att->resolve != NULL || + (subpass->mrtss.multisampledRenderToSingleSampledEnable && + rp_att->samples == VK_SAMPLE_COUNT_1_BIT)) { + const struct vk_subpass_attachment *res_sp_att = sp_att->resolve ? sp_att->resolve : sp_att; + assert(res_sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *res_rp_att = + &pass->attachments[res_sp_att->attachment]; + struct vk_attachment_state *res_att_state = + &cmd_buffer->attachments[res_sp_att->attachment]; + + /* From the Vulkan 1.3.204 spec: + * + * "VkSubpassDescriptionDepthStencilResolve::depthResolveMode is + * ignored if the VkFormat of the pDepthStencilResolveAttachment + * does not have a depth component. Similarly, + * VkSubpassDescriptionDepthStencilResolve::stencilResolveMode is + * ignored if the VkFormat of the pDepthStencilResolveAttachment + * does not have a stencil component." + * + * TODO: Should we handle this here or when we create the render + * pass? Handling it here makes load ops "correct" in the sense + * that, if we resolve to the wrong aspect, we will still consider + * it bound and clear it if requested. + */ + VkResolveModeFlagBits depth_resolve_mode = VK_RESOLVE_MODE_NONE; + if (res_rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + depth_resolve_mode = subpass->depth_resolve_mode; + + VkResolveModeFlagBits stencil_resolve_mode = VK_RESOLVE_MODE_NONE; + if (res_rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + stencil_resolve_mode = subpass->stencil_resolve_mode; + + VkImageAspectFlags resolved_aspects = 0; + + if (depth_resolve_mode != VK_RESOLVE_MODE_NONE) { + depth_attachment.resolveMode = depth_resolve_mode; + if (sp_att->resolve) { + depth_attachment.resolveImageView = + vk_image_view_to_handle(res_att_state->image_view); + depth_attachment.resolveImageLayout = + sp_att->resolve->layout; + } + + resolved_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + } + + if (stencil_resolve_mode != VK_RESOLVE_MODE_NONE) { + stencil_attachment.resolveMode = stencil_resolve_mode; + if (sp_att->resolve) { + stencil_attachment.resolveImageView = + vk_image_view_to_handle(res_att_state->image_view); + stencil_attachment.resolveImageLayout = + sp_att->resolve->stencil_layout; + } + + resolved_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + + if (sp_att->resolve && resolved_aspects == rp_att->aspects) { + /* The resolve attachment is entirely overwritten by the + * resolve operation so the load op really doesn't matter. + * We can consider the resolve as being the load. + */ + res_att_state->views_loaded |= subpass->view_mask; + } + } + } + + /* Next, handle any barriers we need. This may include a general + * VkMemoryBarrier for subpass dependencies and it may include some + * number of VkImageMemoryBarriers for layout transitions. + */ + + bool needs_mem_barrier = false; + VkMemoryBarrier2 mem_barrier = { + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + }; + for (uint32_t d = 0; d < pass->dependency_count; d++) { + const struct vk_subpass_dependency *dep = &pass->dependencies[d]; + if (dep->dst_subpass != subpass_idx) + continue; + + if (dep->flags & VK_DEPENDENCY_VIEW_LOCAL_BIT) { + /* From the Vulkan 1.3.204 spec: + * + * VUID-VkSubpassDependency2-dependencyFlags-03091 + * + * "If dependencyFlags includes VK_DEPENDENCY_VIEW_LOCAL_BIT, + * dstSubpass must not be equal to VK_SUBPASS_EXTERNAL" + */ + assert(dep->src_subpass != VK_SUBPASS_EXTERNAL); + + assert(dep->src_subpass < pass->subpass_count); + const struct vk_subpass *src_subpass = + &pass->subpasses[dep->src_subpass]; + + /* Figure out the set of views in the source subpass affected by this + * dependency. + */ + uint32_t src_dep_view_mask = subpass->view_mask; + if (dep->view_offset >= 0) + src_dep_view_mask <<= dep->view_offset; + else + src_dep_view_mask >>= -dep->view_offset; + + /* From the Vulkan 1.3.204 spec: + * + * "If the dependency is view-local, then each view (dstView) in + * the destination subpass depends on the view dstView + + * pViewOffsets[dependency] in the source subpass. If there is not + * such a view in the source subpass, then this dependency does + * not affect that view in the destination subpass." + */ + if (!(src_subpass->view_mask & src_dep_view_mask)) + continue; + } + + needs_mem_barrier = true; + mem_barrier.srcStageMask |= dep->src_stage_mask; + mem_barrier.srcAccessMask |= dep->src_access_mask; + mem_barrier.dstStageMask |= dep->dst_stage_mask; + mem_barrier.dstAccessMask |= dep->dst_access_mask; + } + + if (subpass_idx == 0) { + /* From the Vulkan 1.3.232 spec: + * + * "If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the + * first subpass that uses an attachment, then an implicit subpass + * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it + * is used in. The implicit subpass dependency only exists if there + * exists an automatic layout transition away from initialLayout. The + * subpass dependency operates as if defined with the following + * parameters: + * + * VkSubpassDependency implicitDependency = { + * .srcSubpass = VK_SUBPASS_EXTERNAL; + * .dstSubpass = firstSubpass; // First subpass attachment is used in + * .srcStageMask = VK_PIPELINE_STAGE_NONE; + * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + * .srcAccessMask = 0; + * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + * .dependencyFlags = 0; + * };" + * + * We could track individual subpasses and attachments and views to make + * sure we only insert this barrier when it's absolutely necessary. + * However, this is only going to happen for the first subpass and + * you're probably going to take a stall in BeginRenderPass() anyway. + * If this is ever a perf problem, we can re-evaluate and do something + * more intellegent at that time. + */ + needs_mem_barrier = true; + mem_barrier.dstStageMask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + mem_barrier.dstAccessMask |= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + uint32_t max_image_barrier_count = 0; + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + assert(sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + + max_image_barrier_count += util_bitcount(subpass->view_mask) * + util_bitcount(rp_att->aspects); + } + if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) + max_image_barrier_count += util_bitcount(subpass->view_mask); + STACK_ARRAY(VkImageMemoryBarrier2, image_barriers, max_image_barrier_count); + uint32_t image_barrier_count = 0; + + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + /* If we're using an initial layout, the attachment will already be + * marked as transitioned and this will be a no-op. + */ + transition_attachment(cmd_buffer, sp_att->attachment, + subpass->view_mask, + sp_att->layout, sp_att->stencil_layout, + &image_barrier_count, + max_image_barrier_count, + image_barriers); + } + if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) { + transition_attachment(cmd_buffer, pass->fragment_density_map.attachment, + subpass->view_mask, + pass->fragment_density_map.layout, + VK_IMAGE_LAYOUT_UNDEFINED, + &image_barrier_count, + max_image_barrier_count, + image_barriers); + } + assert(image_barrier_count <= max_image_barrier_count); + + if (needs_mem_barrier || image_barrier_count > 0) { + const VkDependencyInfo dependency_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = 0, + .memoryBarrierCount = needs_mem_barrier ? 1 : 0, + .pMemoryBarriers = needs_mem_barrier ? &mem_barrier : NULL, + .imageMemoryBarrierCount = image_barrier_count, + .pImageMemoryBarriers = image_barrier_count > 0 ? + image_barriers : NULL, + }; + cmd_buffer->runtime_rp_barrier = true; + disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), + &dependency_info); + cmd_buffer->runtime_rp_barrier = false; + } + + STACK_ARRAY_FINISH(image_barriers); + + /* Next, handle any VK_ATTACHMENT_LOAD_OP_CLEAR that we couldn't handle + * directly by emitting a quick vkCmdBegin/EndRendering to do the load. + */ + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + load_attachment(cmd_buffer, sp_att->attachment, subpass->view_mask, + sp_att->layout, sp_att->stencil_layout); + } + + /* TODO: Handle preserve attachments + * + * For immediate renderers, this isn't a big deal as LOAD_OP_LOAD and + * STORE_OP_STORE are effectively free. However, before this gets used on + * a tiling GPU, we should really hook up preserve attachments and use them + * to determine when we can use LOAD/STORE_OP_DONT_CARE between subpasses. + */ + + VkRenderingInfo rendering = { + .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, + .renderArea = cmd_buffer->render_area, + .layerCount = pass->is_multiview ? 1 : framebuffer->layers, + .viewMask = pass->is_multiview ? subpass->view_mask : 0, + .colorAttachmentCount = subpass->color_count, + .pColorAttachments = color_attachments, + .pDepthAttachment = &depth_attachment, + .pStencilAttachment = &stencil_attachment, + }; + + VkRenderingFragmentShadingRateAttachmentInfoKHR fsr_attachment; + if (subpass->fragment_shading_rate_attachment) { + const struct vk_subpass_attachment *sp_att = + subpass->fragment_shading_rate_attachment; + + assert(sp_att->attachment < pass->attachment_count); + struct vk_attachment_state *att_state = + &cmd_buffer->attachments[sp_att->attachment]; + + /* Fragment shading rate attachments have no loadOp (it's implicitly + * LOAD_OP_LOAD) so we need to ensure the load op happens. + */ + load_attachment(cmd_buffer, sp_att->attachment, subpass->view_mask, + sp_att->layout, sp_att->stencil_layout); + + fsr_attachment = (VkRenderingFragmentShadingRateAttachmentInfoKHR) { + .sType = VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR, + .imageView = vk_image_view_to_handle(att_state->image_view), + .imageLayout = sp_att->layout, + .shadingRateAttachmentTexelSize = + subpass->fragment_shading_rate_attachment_texel_size, + }; + __vk_append_struct(&rendering, &fsr_attachment); + } + + VkRenderingFragmentDensityMapAttachmentInfoEXT fdm_attachment; + if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) { + assert(pass->fragment_density_map.attachment < pass->attachment_count); + struct vk_attachment_state *att_state = + &cmd_buffer->attachments[pass->fragment_density_map.attachment]; + + /* From the Vulkan 1.3.125 spec: + * + * VUID-VkRenderPassFragmentDensityMapCreateInfoEXT-fragmentDensityMapAttachment-02550 + * + * If fragmentDensityMapAttachment is not VK_ATTACHMENT_UNUSED, + * fragmentDensityMapAttachment must reference an attachment with a + * loadOp equal to VK_ATTACHMENT_LOAD_OP_LOAD or + * VK_ATTACHMENT_LOAD_OP_DONT_CARE + * + * This means we don't have to implement the load op. + */ + + fdm_attachment = (VkRenderingFragmentDensityMapAttachmentInfoEXT) { + .sType = VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_INFO_EXT, + .imageView = vk_image_view_to_handle(att_state->image_view), + .imageLayout = pass->fragment_density_map.layout, + }; + __vk_append_struct(&rendering, &fdm_attachment); + } + + VkSampleLocationsInfoEXT sample_locations_tmp; + if (sample_locations) { + sample_locations_tmp = *sample_locations; + __vk_append_struct(&rendering, &sample_locations_tmp); + } + + /* Append this one last because it lives in the subpass and we don't want + * to be changed by appending other structures later. + */ + if (subpass->mrtss.multisampledRenderToSingleSampledEnable) + __vk_append_struct(&rendering, (void *)&subpass->mrtss); + + disp->CmdBeginRendering(vk_command_buffer_to_handle(cmd_buffer), + &rendering); + + STACK_ARRAY_FINISH(color_attachments); + STACK_ARRAY_FINISH(color_attachment_initial_layouts); +} + +static void +end_subpass(struct vk_command_buffer *cmd_buffer, + const VkSubpassEndInfo *end_info) +{ + const struct vk_render_pass *pass = cmd_buffer->render_pass; + const uint32_t subpass_idx = cmd_buffer->subpass_idx; + struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + disp->CmdEndRendering(vk_command_buffer_to_handle(cmd_buffer)); + + bool needs_mem_barrier = false; + VkMemoryBarrier2 mem_barrier = { + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + }; + for (uint32_t d = 0; d < pass->dependency_count; d++) { + const struct vk_subpass_dependency *dep = &pass->dependencies[d]; + if (dep->src_subpass != subpass_idx) + continue; + + if (dep->dst_subpass != VK_SUBPASS_EXTERNAL) + continue; + + needs_mem_barrier = true; + mem_barrier.srcStageMask |= dep->src_stage_mask; + mem_barrier.srcAccessMask |= dep->src_access_mask; + mem_barrier.dstStageMask |= dep->dst_stage_mask; + mem_barrier.dstAccessMask |= dep->dst_access_mask; + } + + if (subpass_idx == pass->subpass_count - 1) { + /* From the Vulkan 1.3.232 spec: + * + * "Similarly, if there is no subpass dependency from the last + * subpass that uses an attachment to VK_SUBPASS_EXTERNAL, then an + * implicit subpass dependency exists from the last subpass it is + * used in to VK_SUBPASS_EXTERNAL. The implicit subpass dependency + * only exists if there exists an automatic layout transition into + * finalLayout. The subpass dependency operates as if defined with + * the following parameters: + * + * VkSubpassDependency implicitDependency = { + * .srcSubpass = lastSubpass; // Last subpass attachment is used in + * .dstSubpass = VK_SUBPASS_EXTERNAL; + * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + * .dstStageMask = VK_PIPELINE_STAGE_NONE; + * .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + * .dstAccessMask = 0; + * .dependencyFlags = 0; + * };" + * + * We could track individual subpasses and attachments and views to make + * sure we only insert this barrier when it's absolutely necessary. + * However, this is only going to happen for the last subpass and + * you're probably going to take a stall in EndRenderPass() anyway. + * If this is ever a perf problem, we can re-evaluate and do something + * more intellegent at that time. + */ + needs_mem_barrier = true; + mem_barrier.srcStageMask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + mem_barrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + if (needs_mem_barrier) { + const VkDependencyInfo dependency_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = 0, + .memoryBarrierCount = 1, + .pMemoryBarriers = &mem_barrier, + }; + cmd_buffer->runtime_rp_barrier = true; + disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), + &dependency_info); + cmd_buffer->runtime_rp_barrier = false; + } +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo *pRenderPassBeginInfo, + const VkSubpassBeginInfo *pSubpassBeginInfo) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + VK_FROM_HANDLE(vk_render_pass, pass, pRenderPassBeginInfo->renderPass); + VK_FROM_HANDLE(vk_framebuffer, framebuffer, + pRenderPassBeginInfo->framebuffer); + + assert(cmd_buffer->render_pass == NULL); + cmd_buffer->render_pass = pass; + cmd_buffer->subpass_idx = 0; + + assert(cmd_buffer->framebuffer == NULL); + cmd_buffer->framebuffer = framebuffer; + + cmd_buffer->render_area = pRenderPassBeginInfo->renderArea; + + assert(cmd_buffer->attachments == NULL); + if (pass->attachment_count > ARRAY_SIZE(cmd_buffer->_attachments)) { + cmd_buffer->attachments = malloc(pass->attachment_count * + sizeof(*cmd_buffer->attachments)); + } else { + cmd_buffer->attachments = cmd_buffer->_attachments; + } + + const VkRenderPassAttachmentBeginInfo *attach_begin = + vk_find_struct_const(pRenderPassBeginInfo, + RENDER_PASS_ATTACHMENT_BEGIN_INFO); + if (!attach_begin) + assert(pass->attachment_count == framebuffer->attachment_count); + + const VkImageView *image_views; + if (attach_begin && attach_begin->attachmentCount != 0) { + assert(attach_begin->attachmentCount == pass->attachment_count); + image_views = attach_begin->pAttachments; + } else { + assert(framebuffer->attachment_count >= pass->attachment_count); + image_views = framebuffer->attachments; + } + + for (uint32_t a = 0; a < pass->attachment_count; ++a) { + VK_FROM_HANDLE(vk_image_view, image_view, image_views[a]); + const struct vk_render_pass_attachment *pass_att = &pass->attachments[a]; + struct vk_attachment_state *att_state = &cmd_buffer->attachments[a]; + + /* From the Vulkan 1.3.204 spec: + * + * VUID-VkFramebufferCreateInfo-pAttachments-00880 + * + * "If renderpass is not VK_NULL_HANDLE and flags does not include + * VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT, each element of pAttachments + * must have been created with a VkFormat value that matches the + * VkFormat specified by the corresponding VkAttachmentDescription in + * renderPass" + * + * and + * + * VUID-VkRenderPassBeginInfo-framebuffer-03216 + * + * "If framebuffer was created with a VkFramebufferCreateInfo::flags + * value that included VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT, each + * element of the pAttachments member of a + * VkRenderPassAttachmentBeginInfo structure included in the pNext + * chain must be a VkImageView of an image created with a value of + * VkImageViewCreateInfo::format equal to the corresponding value of + * VkAttachmentDescription::format in renderPass" + */ + assert(image_view->format == pass_att->format); + + /* From the Vulkan 1.3.204 spec: + * + * VUID-VkFramebufferCreateInfo-pAttachments-00881 + * + * "If renderpass is not VK_NULL_HANDLE and flags does not include + * VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT, each element of pAttachments + * must have been created with a samples value that matches the + * samples value specified by the corresponding + * VkAttachmentDescription in renderPass" + * + * and + * + * UID-VkRenderPassBeginInfo-framebuffer-03217 + * + * "If framebuffer was created with a VkFramebufferCreateInfo::flags + * value that included VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT, each + * element of the pAttachments member of a + * VkRenderPassAttachmentBeginInfo structure included in the pNext + * chain must be a VkImageView of an image created with a value of + * VkImageCreateInfo::samples equal to the corresponding value of + * VkAttachmentDescription::samples in renderPass" + */ + assert(image_view->image->samples == pass_att->samples); + + /* From the Vulkan 1.3.204 spec: + * + * If multiview is enabled and the shading rate attachment has + * multiple layers, the shading rate attachment texel is selected + * from the layer determined by the ViewIndex built-in. If multiview + * is disabled, and both the shading rate attachment and the + * framebuffer have multiple layers, the shading rate attachment + * texel is selected from the layer determined by the Layer built-in. + * Otherwise, the texel is unconditionally selected from the first + * layer of the attachment. + */ + if (!(image_view->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)) + assert(util_last_bit(pass_att->view_mask) <= image_view->layer_count); + + *att_state = (struct vk_attachment_state) { + .image_view = image_view, + .views_loaded = 0, + }; + + for (uint32_t v = 0; v < MESA_VK_MAX_MULTIVIEW_VIEW_COUNT; v++) { + att_state->views[v] = (struct vk_attachment_view_state) { + .layout = pass_att->initial_layout, + .stencil_layout = pass_att->initial_stencil_layout, + }; + } + + if (a < pRenderPassBeginInfo->clearValueCount) + att_state->clear_value = pRenderPassBeginInfo->pClearValues[a]; + } + + const VkRenderPassSampleLocationsBeginInfoEXT *rp_sl_info = + vk_find_struct_const(pRenderPassBeginInfo->pNext, + RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT); + if (rp_sl_info) { + cmd_buffer->pass_sample_locations = clone_rp_sample_locations(rp_sl_info); + assert(cmd_buffer->pass_sample_locations); + + for (uint32_t i = 0; i < rp_sl_info->attachmentInitialSampleLocationsCount; i++) { + const VkAttachmentSampleLocationsEXT *att_sl = + &rp_sl_info->pAttachmentInitialSampleLocations[i]; + + assert(att_sl->attachmentIndex < pass->attachment_count); + struct vk_attachment_state *att_state = + &cmd_buffer->attachments[att_sl->attachmentIndex]; + + /* Sample locations only matter for depth/stencil images created with + * VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT + */ + if (vk_format_is_depth_or_stencil(att_state->image_view->format) && + (att_state->image_view->image->create_flags & + VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT)) { + for (uint32_t v = 0; v < MESA_VK_MAX_MULTIVIEW_VIEW_COUNT; v++) + att_state->views[v].sample_locations = &att_sl->sampleLocationsInfo; + } + } + } + + begin_subpass(cmd_buffer, pSubpassBeginInfo); +} + +void +vk_command_buffer_reset_render_pass(struct vk_command_buffer *cmd_buffer) +{ + cmd_buffer->render_pass = NULL; + cmd_buffer->subpass_idx = 0; + cmd_buffer->framebuffer = NULL; + if (cmd_buffer->attachments != cmd_buffer->_attachments) + free(cmd_buffer->attachments); + cmd_buffer->attachments = NULL; + if (cmd_buffer->pass_sample_locations != NULL) + vk_free(vk_default_allocator(), cmd_buffer->pass_sample_locations); + cmd_buffer->pass_sample_locations = NULL; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdNextSubpass2(VkCommandBuffer commandBuffer, + const VkSubpassBeginInfo *pSubpassBeginInfo, + const VkSubpassEndInfo *pSubpassEndInfo) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + + end_subpass(cmd_buffer, pSubpassEndInfo); + cmd_buffer->subpass_idx++; + begin_subpass(cmd_buffer, pSubpassBeginInfo); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdEndRenderPass2(VkCommandBuffer commandBuffer, + const VkSubpassEndInfo *pSubpassEndInfo) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + const struct vk_render_pass *pass = cmd_buffer->render_pass; + struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + end_subpass(cmd_buffer, pSubpassEndInfo); + + /* Make sure all our attachments end up in their finalLayout */ + + uint32_t max_image_barrier_count = 0; + for (uint32_t a = 0; a < pass->attachment_count; a++) { + const struct vk_render_pass_attachment *rp_att = &pass->attachments[a]; + + max_image_barrier_count += util_bitcount(pass->view_mask) * + util_bitcount(rp_att->aspects); + } + STACK_ARRAY(VkImageMemoryBarrier2, image_barriers, max_image_barrier_count); + uint32_t image_barrier_count = 0; + + for (uint32_t a = 0; a < pass->attachment_count; a++) { + const struct vk_render_pass_attachment *rp_att = &pass->attachments[a]; + + transition_attachment(cmd_buffer, a, pass->view_mask, + rp_att->final_layout, + rp_att->final_stencil_layout, + &image_barrier_count, + max_image_barrier_count, + image_barriers); + } + assert(image_barrier_count <= max_image_barrier_count); + + if (image_barrier_count > 0) { + const VkDependencyInfo dependency_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = 0, + .imageMemoryBarrierCount = image_barrier_count, + .pImageMemoryBarriers = image_barriers, + }; + cmd_buffer->runtime_rp_barrier = true; + disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), + &dependency_info); + cmd_buffer->runtime_rp_barrier = false; + } + + STACK_ARRAY_FINISH(image_barriers); + + vk_command_buffer_reset_render_pass(cmd_buffer); +} diff --git a/src/vulkan/runtime/vk_render_pass.h b/src/vulkan/runtime/vk_render_pass.h new file mode 100644 index 00000000000..9acd65aa3ad --- /dev/null +++ b/src/vulkan/runtime/vk_render_pass.h @@ -0,0 +1,461 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_RENDER_PASS_H +#define VK_RENDER_PASS_H + +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_command_buffer; +struct vk_image; + +/** + * Pseudo-extension struct that may be chained into VkRenderingAttachmentInfo + * to indicate an initial layout for the attachment. This is only allowed if + * all of the following conditions are met: + * + * 1. VkRenderingAttachmentInfo::loadOp == LOAD_OP_CLEAR + * + * 2. VkRenderingInfo::renderArea is tne entire image view LOD + * + * 3. For 3D image attachments, VkRenderingInfo::viewMask == 0 AND + * VkRenderingInfo::layerCount references the entire bound image view + * OR VkRenderingInfo::viewMask is dense (no holes) and references the + * entire bound image view. (2D and 2D array images have no such + * requirement.) + * + * If this struct is included in the pNext chain of a + * VkRenderingAttachmentInfo, the driver is responsible for transitioning the + * bound region of the image from + * VkRenderingAttachmentInitialLayoutInfoMESA::initialLayout to + * VkRenderingAttachmentInfo::imageLayout prior to rendering. + */ +typedef struct VkRenderingAttachmentInitialLayoutInfoMESA { + VkStructureType sType; +#define VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA (VkStructureType)1000044901 +#define VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA_cast VkRenderingAttachmentInitialLayoutInfoMESA + const void* pNext; + + /** Initial layout of the attachment */ + VkImageLayout initialLayout; +} VkRenderingAttachmentInitialLayoutInfoMESA; + +/***/ +struct vk_subpass_attachment { + /** VkAttachmentReference2::attachment */ + uint32_t attachment; + + /** Aspects referenced by this attachment + * + * For an input attachment, this is VkAttachmentReference2::aspectMask. + * For all others, it's equal to the vk_render_pass_attachment::aspects. + */ + VkImageAspectFlags aspects; + + /** Usage for this attachment + * + * This is a single VK_IMAGE_USAGE_* describing the usage of this subpass + * attachment. Resolve attachments are VK_IMAGE_USAGE_TRANSFER_DST_BIT. + */ + VkImageUsageFlagBits usage; + + /** VkAttachmentReference2::layout */ + VkImageLayout layout; + + /** VkAttachmentReferenceStencilLayout::stencilLayout + * + * If VK_KHR_separate_depth_stencil_layouts is not used, this will be + * layout if the attachment contains stencil and VK_IMAGE_LAYOUT_UNDEFINED + * otherwise. + */ + VkImageLayout stencil_layout; + + /** A per-view mask for if this is the last use of this attachment + * + * If the same render pass attachment is used multiple ways within a + * subpass, corresponding last_subpass bits will be set in all of them. + * For the non-multiview case, only the first bit is used. + */ + uint32_t last_subpass; + + /** Resolve attachment, if any */ + struct vk_subpass_attachment *resolve; +}; + +/***/ +struct vk_subpass { + /** Count of all attachments referenced by this subpass */ + uint32_t attachment_count; + + /** Array of all attachments referenced by this subpass */ + struct vk_subpass_attachment *attachments; + + /** VkSubpassDescription2::inputAttachmentCount */ + uint32_t input_count; + + /** VkSubpassDescription2::pInputAttachments */ + struct vk_subpass_attachment *input_attachments; + + /** VkSubpassDescription2::colorAttachmentCount */ + uint32_t color_count; + + /** VkSubpassDescription2::pColorAttachments */ + struct vk_subpass_attachment *color_attachments; + + /** VkSubpassDescription2::colorAttachmentCount or zero */ + uint32_t color_resolve_count; + + /** VkSubpassDescription2::pResolveAttachments */ + struct vk_subpass_attachment *color_resolve_attachments; + + /** VkSubpassDescription2::pDepthStencilAttachment */ + struct vk_subpass_attachment *depth_stencil_attachment; + + /** VkSubpassDescriptionDepthStencilResolve::pDepthStencilResolveAttachment */ + struct vk_subpass_attachment *depth_stencil_resolve_attachment; + + /** VkFragmentShadingRateAttachmentInfoKHR::pFragmentShadingRateAttachment */ + struct vk_subpass_attachment *fragment_shading_rate_attachment; + + /** VkSubpassDescription2::viewMask or 1 for non-multiview + * + * For all view masks in the vk_render_pass data structure, we use a mask + * of 1 for non-multiview instead of a mask of 0. To determine if the + * render pass is multiview or not, see vk_render_pass::is_multiview. + */ + uint32_t view_mask; + + /** VkSubpassDescriptionDepthStencilResolve::depthResolveMode */ + VkResolveModeFlagBits depth_resolve_mode; + + /** VkSubpassDescriptionDepthStencilResolve::stencilResolveMode */ + VkResolveModeFlagBits stencil_resolve_mode; + + /** VkFragmentShadingRateAttachmentInfoKHR::shadingRateAttachmentTexelSize */ + VkExtent2D fragment_shading_rate_attachment_texel_size; + + /** Extra VkPipelineCreateFlags for this subpass */ + VkPipelineCreateFlagBits2KHR pipeline_flags; + + /** VkAttachmentSampleCountInfoAMD for this subpass + * + * This is in the pNext chain of pipeline_info and inheritance_info. + */ + VkAttachmentSampleCountInfoAMD sample_count_info_amd; + + /** VkPipelineRenderingCreateInfo for this subpass + * + * Returned by vk_get_pipeline_rendering_create_info() if + * VkGraphicsPipelineCreateInfo::renderPass != VK_NULL_HANDLE. + */ + VkPipelineRenderingCreateInfo pipeline_info; + + /** VkCommandBufferInheritanceRenderingInfo for this subpass + * + * Returned by vk_get_command_buffer_inheritance_rendering_info() if + * VkCommandBufferInheritanceInfo::renderPass != VK_NULL_HANDLE. + */ + VkCommandBufferInheritanceRenderingInfo inheritance_info; + + /** VkMultisampledRenderToSingleSampledInfoEXT for this subpass */ + VkMultisampledRenderToSingleSampledInfoEXT mrtss; +}; + +/***/ +struct vk_render_pass_attachment { + /** VkAttachmentDescription2::format */ + VkFormat format; + + /** Aspects contained in format */ + VkImageAspectFlags aspects; + + /** VkAttachmentDescription2::samples */ + uint32_t samples; + + /** Views in which this attachment is used, 0 for unused + * + * For non-multiview, this will be 1 if the attachment is used. + */ + uint32_t view_mask; + + /** VkAttachmentDescription2::loadOp */ + VkAttachmentLoadOp load_op; + + /** VkAttachmentDescription2::storeOp */ + VkAttachmentStoreOp store_op; + + /** VkAttachmentDescription2::stencilLoadOp */ + VkAttachmentLoadOp stencil_load_op; + + /** VkAttachmentDescription2::stencilStoreOp */ + VkAttachmentStoreOp stencil_store_op; + + /** VkAttachmentDescription2::initialLayout */ + VkImageLayout initial_layout; + + /** VkAttachmentDescription2::finalLayout */ + VkImageLayout final_layout; + + /** VkAttachmentDescriptionStencilLayout::stencilInitialLayout + * + * If VK_KHR_separate_depth_stencil_layouts is not used, this will be + * initial_layout if format contains stencil and VK_IMAGE_LAYOUT_UNDEFINED + * otherwise. + */ + VkImageLayout initial_stencil_layout; + + /** VkAttachmentDescriptionStencilLayout::stencilFinalLayout + * + * If VK_KHR_separate_depth_stencil_layouts is not used, this will be + * final_layout if format contains stencil and VK_IMAGE_LAYOUT_UNDEFINED + * otherwise. + */ + VkImageLayout final_stencil_layout; +}; + +/***/ +struct vk_subpass_dependency { + /** VkSubpassDependency2::dependencyFlags */ + VkDependencyFlags flags; + + /** VkSubpassDependency2::srcSubpass */ + uint32_t src_subpass; + + /** VkSubpassDependency2::dstSubpass */ + uint32_t dst_subpass; + + /** VkSubpassDependency2::srcStageMask */ + VkPipelineStageFlags2 src_stage_mask; + + /** VkSubpassDependency2::dstStageMask */ + VkPipelineStageFlags2 dst_stage_mask; + + /** VkSubpassDependency2::srcAccessMask */ + VkAccessFlags2 src_access_mask; + + /** VkSubpassDependency2::dstAccessMask */ + VkAccessFlags2 dst_access_mask; + + /** VkSubpassDependency2::viewOffset */ + int32_t view_offset; +}; + +/***/ +struct vk_render_pass { + struct vk_object_base base; + + /** True if this render pass uses multiview + * + * This is true if all subpasses have viewMask != 0. + */ + bool is_multiview; + + /** Views used by this render pass or 1 for non-multiview */ + uint32_t view_mask; + + /** VkRenderPassCreateInfo2::attachmentCount */ + uint32_t attachment_count; + + /** VkRenderPassCreateInfo2::pAttachments */ + struct vk_render_pass_attachment *attachments; + + /** VkRenderPassCreateInfo2::subpassCount */ + uint32_t subpass_count; + + /** VkRenderPassCreateInfo2::subpasses */ + struct vk_subpass *subpasses; + + /** VkRenderPassCreateInfo2::dependencyCount */ + uint32_t dependency_count; + + /** VkRenderPassFragmentDensityMapCreateInfoEXT::fragmentDensityMapAttachment */ + VkAttachmentReference fragment_density_map; + + /** VkRenderPassCreateInfo2::pDependencies */ + struct vk_subpass_dependency *dependencies; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_render_pass, base, VkRenderPass, + VK_OBJECT_TYPE_RENDER_PASS); + +/** Returns the VkPipelineRenderingCreateInfo for a graphics pipeline + * + * For render-pass-free drivers, this can be used in the implementation of + * vkCreateGraphicsPipelines to get the VkPipelineRenderingCreateInfo. If + * VkGraphicsPipelineCreateInfo::renderPass is not VK_NULL_HANDLE, it will + * return a representation of the specified subpass as a + * VkPipelineRenderingCreateInfo. If VkGraphicsPipelineCreateInfo::renderPass + * is VK_NULL_HANDLE and there is a VkPipelineRenderingCreateInfo in the pNext + * chain of VkGraphicsPipelineCreateInfo, it will return that. + * + * :param info: |in| One of the pCreateInfos from vkCreateGraphicsPipelines + */ +const VkPipelineRenderingCreateInfo * +vk_get_pipeline_rendering_create_info(const VkGraphicsPipelineCreateInfo *info); + +/** Returns any extra VkPipelineCreateFlags from the render pass + * + * For render-pass-free drivers, this can be used to get any extra pipeline + * create flags implied by the render pass. In particular, a render pass may + * want to add one or both of the following: + * + * - VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT + * - VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT + * - VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR + * - VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT + * + * If VkGraphicsPipelineCreateInfo::renderPass is VK_NULL_HANDLE, the relevant + * flags from VkGraphicsPipelineCreateInfo::flags will be returned. + * + * :param info: |in| One of the pCreateInfos from vkCreateGraphicsPipelines + */ +VkPipelineCreateFlags2KHR +vk_get_pipeline_rendering_flags(const VkGraphicsPipelineCreateInfo *info); + +/** Returns the VkAttachmentSampleCountInfoAMD for a graphics pipeline + * + * For render-pass-free drivers, this can be used in the implementaiton of + * vkCreateGraphicsPipelines to get the VkAttachmentSampleCountInfoAMD. If + * VkGraphicsPipelineCreateInfo::renderPass is not VK_NULL_HANDLE, it will + * return the sample counts from the specified subpass as a + * VkAttachmentSampleCountInfoAMD. If VkGraphicsPipelineCreateInfo::renderPass + * is VK_NULL_HANDLE and there is a VkAttachmentSampleCountInfoAMD in the pNext + * chain of VkGraphicsPipelineCreateInfo, it will return that. + * + * :param info: |in| One of the pCreateInfos from vkCreateGraphicsPipelines + */ +const VkAttachmentSampleCountInfoAMD * +vk_get_pipeline_sample_count_info_amd(const VkGraphicsPipelineCreateInfo *info); + +/** + * Returns the VkCommandBufferInheritanceRenderingInfo for secondary command + * buffer execution + * + * For render-pass-free drivers, this can be used in the implementation of + * vkCmdExecuteCommands to get the VkCommandBufferInheritanceRenderingInfo. + * If VkCommandBufferInheritanceInfo::renderPass is not VK_NULL_HANDLE, it + * will return a representation of the specified subpass as a + * VkCommandBufferInheritanceRenderingInfo. If + * VkCommandBufferInheritanceInfo::renderPass is not VK_NULL_HANDLE and there + * is a VkCommandBufferInheritanceRenderingInfo in the pNext chain of + * VkCommandBufferBeginInfo, it will return that. + * + * :param level: |in| The nesting level of this command buffer + * :param pBeginInfo: |in| The pBeginInfo from vkBeginCommandBuffer + */ +const VkCommandBufferInheritanceRenderingInfo * +vk_get_command_buffer_inheritance_rendering_info( + VkCommandBufferLevel level, + const VkCommandBufferBeginInfo *pBeginInfo); + +struct vk_gcbiarr_data { + VkRenderingInfo rendering; + VkRenderingFragmentShadingRateAttachmentInfoKHR fsr_att; + VkRenderingAttachmentInfo attachments[]; +}; + +#define VK_GCBIARR_DATA_SIZE(max_color_rts) (\ + sizeof(struct vk_gcbiarr_data) + \ + sizeof(VkRenderingAttachmentInfo) * ((max_color_rts) + 2) \ +) + +/** + * Constructs a VkRenderingInfo for the inheritance rendering info + * + * For render-pass-free drivers, this can be used in the implementaiton of + * vkCmdExecuteCommands to get a VkRenderingInfo representing the subpass and + * framebuffer provided via the inheritance info for a command buffer created + * with VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT. The mental model + * here is that VkExecuteCommands() implicitly suspends the render pass and + * VkBeginCommandBuffer() resumes it. If a VkRenderingInfo cannot be + * constructed due to a missing framebuffer or similar, NULL will be + * returned. + * + * :param level: |in| The nesting level of this command buffer + * :param pBeginInfo: |in| The pBeginInfo from vkBeginCommandBuffer + * :param stack_data: |out| An opaque blob of data which will be overwritten by + * this function, passed in from the caller to avoid + * heap allocations. It must be at least + * VK_GCBIARR_DATA_SIZE(max_color_rts) bytes. + */ +const VkRenderingInfo * +vk_get_command_buffer_inheritance_as_rendering_resume( + VkCommandBufferLevel level, + const VkCommandBufferBeginInfo *pBeginInfo, + void *stack_data); + +/** + * Return true if the subpass dependency is framebuffer-local. + */ +static bool +vk_subpass_dependency_is_fb_local(const VkSubpassDependency2 *dep, + VkPipelineStageFlags2 src_stage_mask, + VkPipelineStageFlags2 dst_stage_mask) +{ + if (dep->srcSubpass == VK_SUBPASS_EXTERNAL || + dep->dstSubpass == VK_SUBPASS_EXTERNAL) + return true; + + /* This is straight from the Vulkan 1.2 spec, section 7.1.4 "Framebuffer + * Region Dependencies": + */ + const VkPipelineStageFlags2 framebuffer_space_stages = + VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + + const VkPipelineStageFlags2 src_framebuffer_space_stages = + framebuffer_space_stages | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT; + const VkPipelineStageFlags2 dst_framebuffer_space_stages = + framebuffer_space_stages | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT; + + /* Check for frambuffer-space dependency. */ + if ((src_stage_mask & ~src_framebuffer_space_stages) || + (dst_stage_mask & ~dst_framebuffer_space_stages)) + return false; + + /* Check for framebuffer-local dependency. */ + return dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT; +} + +uint32_t +vk_command_buffer_get_attachment_layout(const struct vk_command_buffer *cmd_buffer, + const struct vk_image *image, + VkImageLayout *out_layout, + VkImageLayout *out_stencil_layout); + +void +vk_command_buffer_set_attachment_layout(struct vk_command_buffer *cmd_buffer, + uint32_t att_idx, + VkImageLayout layout, + VkImageLayout stencil_layout); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_RENDER_PASS_H */ diff --git a/src/vulkan/runtime/vk_sampler.c b/src/vulkan/runtime/vk_sampler.c new file mode 100644 index 00000000000..bda852ebf90 --- /dev/null +++ b/src/vulkan/runtime/vk_sampler.c @@ -0,0 +1,169 @@ +/* + * Copyright © 2022 Collabora, LTD + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_sampler.h" + +#include "vk_format.h" +#include "vk_util.h" +#include "vk_ycbcr_conversion.h" + +VkClearColorValue +vk_border_color_value(VkBorderColor color) +{ + switch (color) { + case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: + return (VkClearColorValue) { .float32 = { 0, 0, 0, 0 } }; + case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: + return (VkClearColorValue) { .int32 = { 0, 0, 0, 0 } }; + case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: + return (VkClearColorValue) { .float32 = { 0, 0, 0, 1 } }; + case VK_BORDER_COLOR_INT_OPAQUE_BLACK: + return (VkClearColorValue) { .int32 = { 0, 0, 0, 1 } }; + case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: + return (VkClearColorValue) { .float32 = { 1, 1, 1, 1 } }; + case VK_BORDER_COLOR_INT_OPAQUE_WHITE: + return (VkClearColorValue) { .int32 = { 1, 1, 1, 1 } }; + default: + unreachable("Invalid or custom border color enum"); + } +} + +bool +vk_border_color_is_int(VkBorderColor color) +{ + switch (color) { + case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: + case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: + case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: + return false; + case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_INT_OPAQUE_BLACK: + case VK_BORDER_COLOR_INT_OPAQUE_WHITE: + case VK_BORDER_COLOR_INT_CUSTOM_EXT: + return true; + default: + unreachable("Invalid border color enum"); + } +} + +VkClearColorValue +vk_sampler_border_color_value(const VkSamplerCreateInfo *pCreateInfo, + VkFormat *format_out) +{ + if (vk_border_color_is_custom(pCreateInfo->borderColor)) { + const VkSamplerCustomBorderColorCreateInfoEXT *border_color_info = + vk_find_struct_const(pCreateInfo->pNext, + SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); + if (format_out) + *format_out = border_color_info->format; + + return border_color_info->customBorderColor; + } else { + if (format_out) + *format_out = VK_FORMAT_UNDEFINED; + + return vk_border_color_value(pCreateInfo->borderColor); + } +} + +void * +vk_sampler_create(struct vk_device *device, + const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size) +{ + struct vk_sampler *sampler; + + sampler = vk_object_zalloc(device, alloc, size, VK_OBJECT_TYPE_SAMPLER); + if (!sampler) + return NULL; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler->format = VK_FORMAT_UNDEFINED; + sampler->border_color = pCreateInfo->borderColor; + sampler->reduction_mode = VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE; + + if (!vk_border_color_is_custom(pCreateInfo->borderColor)) { + sampler->border_color_value = + vk_border_color_value(pCreateInfo->borderColor); + } + + vk_foreach_struct_const(ext, pCreateInfo->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT: { + const VkSamplerCustomBorderColorCreateInfoEXT *cbc_info = (void *)ext; + if (!vk_border_color_is_custom(pCreateInfo->borderColor)) + break; + + sampler->border_color_value = cbc_info->customBorderColor; + if (cbc_info->format != VK_FORMAT_UNDEFINED) + sampler->format = cbc_info->format; + break; + } + + case VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO: { + const VkSamplerReductionModeCreateInfo *rm_info = (void *)ext; + sampler->reduction_mode = rm_info->reductionMode; + break; + } + + case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO: { + const VkSamplerYcbcrConversionInfo *yc_info = (void *)ext; + VK_FROM_HANDLE(vk_ycbcr_conversion, conversion, yc_info->conversion); + + /* From the Vulkan 1.2.259 spec: + * + * "A VkSamplerYcbcrConversionInfo must be provided for samplers + * to be used with image views that access + * VK_IMAGE_ASPECT_COLOR_BIT if the format is one of the formats + * that require a sampler YCbCr conversion, or if the image view + * has an external format." + * + * This means that on Android we can end up with one of these even if + * YCbCr isn't being used at all. Leave sampler->ycbcr_conversion NULL + * if it isn't a YCbCr format. + */ + if (vk_format_get_ycbcr_info(conversion->state.format) == NULL) + break; + + sampler->ycbcr_conversion = conversion; + sampler->format = conversion->state.format; + break; + } + default: + break; + } + } + + return sampler; +} + +void +vk_sampler_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_sampler *sampler) +{ + vk_object_free(device, alloc, sampler); +} diff --git a/src/vulkan/runtime/vk_sampler.h b/src/vulkan/runtime/vk_sampler.h new file mode 100644 index 00000000000..541b02916c2 --- /dev/null +++ b/src/vulkan/runtime/vk_sampler.h @@ -0,0 +1,98 @@ +/* + * Copyright © 2022 Collabora, LTD + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_SAMPLER_H +#define VK_SAMPLER_H + +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline bool +vk_border_color_is_custom(VkBorderColor color) +{ + return color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || + color == VK_BORDER_COLOR_INT_CUSTOM_EXT; +} + +VkClearColorValue vk_border_color_value(VkBorderColor color); +bool vk_border_color_is_int(VkBorderColor color); + +VkClearColorValue +vk_sampler_border_color_value(const VkSamplerCreateInfo *pCreateInfo, + VkFormat *format_out); + +struct vk_sampler { + struct vk_object_base base; + + /** Format of paired image views or VK_FORMAT_UNDEFINED + * + * This is taken either from VkSamplerYcbcrConversionCreateInfo::format or + * VkSamplerCustomBorderColorCreateInfoEXT::format. + */ + VkFormat format; + + /** VkSamplerCreateInfo::borderColor */ + VkBorderColor border_color; + + /** Border color value + * + * If VkSamplerCreateInfo::borderColor is one of the Vulkan 1.0 enumerated + * border colors, this will be the VkClearColorValue representation of that + * value. VkSamplerCreateInfo::borderColor is VK_BORDER_COLOR_*_CUSTOM_EXT, + * this is VkSamplerCustomBorderColorCreateInfoEXT::customBorderColor. + */ + VkClearColorValue border_color_value; + + /** + * VkSamplerReductionModeCreateInfo::reductionMode or + * VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE. + */ + VkSamplerReductionMode reduction_mode; + + /** VkSamplerYcbcrConversionInfo::conversion or NULL + * + * We ensure that this is always NULL whenever vk_sampler::format is not a + * YCbCr format. This is important on Android where YCbCr conversion + * objects are required for all EXTERNAL formats, even if they are not + * YCbCr formats. + */ + struct vk_ycbcr_conversion *ycbcr_conversion; +}; +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_sampler, base, VkSampler, + VK_OBJECT_TYPE_SAMPLER); + +void *vk_sampler_create(struct vk_device *device, + const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + size_t size); +void vk_sampler_destroy(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_sampler *sampler); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_SAMPLER_H */ diff --git a/src/vulkan/runtime/vk_semaphore.c b/src/vulkan/runtime/vk_semaphore.c new file mode 100644 index 00000000000..7044ed9aea2 --- /dev/null +++ b/src/vulkan/runtime/vk_semaphore.c @@ -0,0 +1,723 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_semaphore.h" + +#include "util/os_time.h" +#include "util/perf/cpu_trace.h" + +#ifdef _WIN32 +#include <windows.h> +#else +#include <unistd.h> +#endif + +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_log.h" +#include "vk_physical_device.h" +#include "vk_util.h" + +static VkExternalSemaphoreHandleTypeFlags +vk_sync_semaphore_import_types(const struct vk_sync_type *type, + VkSemaphoreType semaphore_type) +{ + VkExternalSemaphoreHandleTypeFlags handle_types = 0; + + if (type->import_opaque_fd) + handle_types |= VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + + if (type->export_sync_file && semaphore_type == VK_SEMAPHORE_TYPE_BINARY) + handle_types |= VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + + if (type->import_win32_handle) { + handle_types |= VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; + if (type->features & VK_SYNC_FEATURE_TIMELINE) + handle_types |= VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE_BIT; + } + + return handle_types; +} + +static VkExternalSemaphoreHandleTypeFlags +vk_sync_semaphore_export_types(const struct vk_sync_type *type, + VkSemaphoreType semaphore_type) +{ + VkExternalSemaphoreHandleTypeFlags handle_types = 0; + + if (type->export_opaque_fd) + handle_types |= VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + + if (type->export_sync_file && semaphore_type == VK_SEMAPHORE_TYPE_BINARY) + handle_types |= VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + + if (type->export_win32_handle) { + handle_types |= VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; + if (type->features & VK_SYNC_FEATURE_TIMELINE) + handle_types |= VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE_BIT; + } + + return handle_types; +} + +static VkExternalSemaphoreHandleTypeFlags +vk_sync_semaphore_handle_types(const struct vk_sync_type *type, + VkSemaphoreType semaphore_type) +{ + return vk_sync_semaphore_export_types(type, semaphore_type) & + vk_sync_semaphore_import_types(type, semaphore_type); +} + +static const struct vk_sync_type * +get_semaphore_sync_type(struct vk_physical_device *pdevice, + VkSemaphoreType semaphore_type, + VkExternalSemaphoreHandleTypeFlags handle_types) +{ + assert(semaphore_type == VK_SEMAPHORE_TYPE_BINARY || + semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE); + + enum vk_sync_features req_features = VK_SYNC_FEATURE_GPU_WAIT; + if (semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE) { + req_features |= VK_SYNC_FEATURE_TIMELINE | + VK_SYNC_FEATURE_CPU_WAIT; + } else { + req_features |= VK_SYNC_FEATURE_BINARY; + } + + for (const struct vk_sync_type *const *t = + pdevice->supported_sync_types; *t; t++) { + if (req_features & ~(*t)->features) + continue; + + if (handle_types & ~vk_sync_semaphore_handle_types(*t, semaphore_type)) + continue; + + return *t; + } + + return NULL; +} + +static VkSemaphoreType +get_semaphore_type(const void *pNext, uint64_t *initial_value) +{ + const VkSemaphoreTypeCreateInfo *type_info = + vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO); + + if (!type_info) + return VK_SEMAPHORE_TYPE_BINARY; + + if (initial_value) + *initial_value = type_info->initialValue; + return type_info->semaphoreType; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateSemaphore(VkDevice _device, + const VkSemaphoreCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSemaphore *pSemaphore) +{ + VK_FROM_HANDLE(vk_device, device, _device); + struct vk_semaphore *semaphore; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO); + + uint64_t initial_value = 0; + const VkSemaphoreType semaphore_type = + get_semaphore_type(pCreateInfo->pNext, &initial_value); + + if (semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE) + assert(device->timeline_mode != VK_DEVICE_TIMELINE_MODE_NONE); + + const VkExportSemaphoreCreateInfo *export = + vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO); + VkExternalSemaphoreHandleTypeFlags handle_types = + export ? export->handleTypes : 0; + + const struct vk_sync_type *sync_type = + get_semaphore_sync_type(device->physical, semaphore_type, handle_types); + if (sync_type == NULL) { + /* We should always be able to get a semaphore type for internal */ + assert(get_semaphore_sync_type(device->physical, semaphore_type, 0) != NULL); + return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE, + "Combination of external handle types is unsupported " + "for VkSemaphore creation."); + } + + /* If the timeline mode is ASSISTED, then any permanent binary semaphore + * types need to be able to support move. We don't require this for + * temporary unless that temporary is also used as a semaphore signal + * operation which is much trickier to assert early. + */ + if (semaphore_type == VK_SEMAPHORE_TYPE_BINARY && + vk_device_supports_threaded_submit(device)) + assert(sync_type->move); + + /* Allocate a vk_semaphore + vk_sync implementation. Because the permanent + * field of vk_semaphore is the base field of the vk_sync implementation, + * we can make the 2 structures overlap. + */ + size_t size = offsetof(struct vk_semaphore, permanent) + sync_type->size; + semaphore = vk_object_zalloc(device, pAllocator, size, + VK_OBJECT_TYPE_SEMAPHORE); + if (semaphore == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + semaphore->type = semaphore_type; + + enum vk_sync_flags sync_flags = 0; + if (semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE) + sync_flags |= VK_SYNC_IS_TIMELINE; + if (handle_types) + sync_flags |= VK_SYNC_IS_SHAREABLE; + + VkResult result = vk_sync_init(device, &semaphore->permanent, + sync_type, sync_flags, initial_value); + if (result != VK_SUCCESS) { + vk_object_free(device, pAllocator, semaphore); + return result; + } + +#ifdef _WIN32 + const VkExportSemaphoreWin32HandleInfoKHR *export_win32 = + vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR); + if (export_win32) { + result = vk_sync_set_win32_export_params(device, &semaphore->permanent, export_win32->pAttributes, + export_win32->dwAccess, export_win32->name); + if (result != VK_SUCCESS) { + vk_sync_finish(device, &semaphore->permanent); + vk_object_free(device, pAllocator, semaphore); + return result; + } + } +#endif + + *pSemaphore = vk_semaphore_to_handle(semaphore); + + return VK_SUCCESS; +} + +void +vk_semaphore_reset_temporary(struct vk_device *device, + struct vk_semaphore *semaphore) +{ + if (semaphore->temporary == NULL) + return; + + vk_sync_destroy(device, semaphore->temporary); + semaphore->temporary = NULL; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroySemaphore(VkDevice _device, + VkSemaphore _semaphore, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_semaphore, semaphore, _semaphore); + + if (semaphore == NULL) + return; + + vk_semaphore_reset_temporary(device, semaphore); + vk_sync_finish(device, &semaphore->permanent); + + vk_object_free(device, pAllocator, semaphore); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetPhysicalDeviceExternalSemaphoreProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, + VkExternalSemaphoreProperties *pExternalSemaphoreProperties) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + + assert(pExternalSemaphoreInfo->sType == + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO); + const VkExternalSemaphoreHandleTypeFlagBits handle_type = + pExternalSemaphoreInfo->handleType; + + const VkSemaphoreType semaphore_type = + get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL); + + const struct vk_sync_type *sync_type = + get_semaphore_sync_type(pdevice, semaphore_type, handle_type); + if (sync_type == NULL) { + pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; + pExternalSemaphoreProperties->compatibleHandleTypes = 0; + pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; + return; + } + + VkExternalSemaphoreHandleTypeFlagBits import = + vk_sync_semaphore_import_types(sync_type, semaphore_type); + VkExternalSemaphoreHandleTypeFlagBits export = + vk_sync_semaphore_export_types(sync_type, semaphore_type); + + VkExternalSemaphoreHandleTypeFlagBits opaque_types[] = { + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT, + }; + for (uint32_t i = 0; i < ARRAY_SIZE(opaque_types); ++i) { + if (handle_type != opaque_types[i]) { + const struct vk_sync_type *opaque_sync_type = + get_semaphore_sync_type(pdevice, semaphore_type, opaque_types[i]); + + /* If we're a different vk_sync_type than the one selected when only + * an opaque type is set, then we can't import/export that opaque type. Put + * differently, there can only be one OPAQUE_FD/WIN32_HANDLE sync type. + */ + if (sync_type != opaque_sync_type) { + import &= ~opaque_types[i]; + export &= ~opaque_types[i]; + } + } + } + + VkExternalSemaphoreHandleTypeFlags compatible = import & export; + VkExternalSemaphoreFeatureFlags features = 0; + if (handle_type & export) + features |= VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT; + if (handle_type & import) + features |= VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; + + pExternalSemaphoreProperties->exportFromImportedHandleTypes = export; + pExternalSemaphoreProperties->compatibleHandleTypes = compatible; + pExternalSemaphoreProperties->externalSemaphoreFeatures = features; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetSemaphoreCounterValue(VkDevice _device, + VkSemaphore _semaphore, + uint64_t *pValue) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_semaphore, semaphore, _semaphore); + + if (vk_device_is_lost(device)) + return VK_ERROR_DEVICE_LOST; + + struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore); + return vk_sync_get_value(device, sync, pValue); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_WaitSemaphores(VkDevice _device, + const VkSemaphoreWaitInfo *pWaitInfo, + uint64_t timeout) +{ + MESA_TRACE_FUNC(); + + VK_FROM_HANDLE(vk_device, device, _device); + + if (vk_device_is_lost(device)) + return VK_ERROR_DEVICE_LOST; + + if (pWaitInfo->semaphoreCount == 0) + return VK_SUCCESS; + + uint64_t abs_timeout_ns = os_time_get_absolute_timeout(timeout); + + const uint32_t wait_count = pWaitInfo->semaphoreCount; + STACK_ARRAY(struct vk_sync_wait, waits, pWaitInfo->semaphoreCount); + + for (uint32_t i = 0; i < wait_count; i++) { + VK_FROM_HANDLE(vk_semaphore, semaphore, pWaitInfo->pSemaphores[i]); + assert(semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE); + + waits[i] = (struct vk_sync_wait) { + .sync = vk_semaphore_get_active_sync(semaphore), + .stage_mask = ~(VkPipelineStageFlags2)0, + .wait_value = pWaitInfo->pValues[i], + }; + } + + enum vk_sync_wait_flags wait_flags = VK_SYNC_WAIT_COMPLETE; + if (pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT) + wait_flags |= VK_SYNC_WAIT_ANY; + + VkResult result = vk_sync_wait_many(device, wait_count, waits, + wait_flags, abs_timeout_ns); + + STACK_ARRAY_FINISH(waits); + + VkResult device_status = vk_device_check_status(device); + if (device_status != VK_SUCCESS) + return device_status; + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_SignalSemaphore(VkDevice _device, + const VkSemaphoreSignalInfo *pSignalInfo) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_semaphore, semaphore, pSignalInfo->semaphore); + struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore); + VkResult result; + + /* From the Vulkan 1.2.194 spec: + * + * UID-VkSemaphoreSignalInfo-semaphore-03257 + * + * "semaphore must have been created with a VkSemaphoreType of + * VK_SEMAPHORE_TYPE_TIMELINE." + */ + assert(semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE); + + /* From the Vulkan 1.2.194 spec: + * + * VUID-VkSemaphoreSignalInfo-value-03258 + * + * "value must have a value greater than the current value of the + * semaphore" + * + * Since 0 is the lowest possible semaphore timeline value, we can assert + * that a non-zero signal value is provided. + */ + if (unlikely(pSignalInfo->value == 0)) { + return vk_device_set_lost(device, + "Tried to signal a timeline with value 0"); + } + + result = vk_sync_signal(device, sync, pSignalInfo->value); + if (unlikely(result != VK_SUCCESS)) + return result; + + if (device->submit_mode == VK_QUEUE_SUBMIT_MODE_DEFERRED) { + result = vk_device_flush(device); + if (unlikely(result != VK_SUCCESS)) + return result; + } + + return VK_SUCCESS; +} + +#ifdef _WIN32 + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_ImportSemaphoreWin32HandleKHR(VkDevice _device, + const VkImportSemaphoreWin32HandleInfoKHR *pImportSemaphoreWin32HandleInfo) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_semaphore, semaphore, pImportSemaphoreWin32HandleInfo->semaphore); + + assert(pImportSemaphoreWin32HandleInfo->sType == + VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR); + + const HANDLE handle = pImportSemaphoreWin32HandleInfo->handle; + const wchar_t *name = pImportSemaphoreWin32HandleInfo->name; + const VkExternalSemaphoreHandleTypeFlagBits handle_type = + pImportSemaphoreWin32HandleInfo->handleType; + + struct vk_sync *temporary = NULL, *sync; + if (pImportSemaphoreWin32HandleInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) { + /* From the Vulkan 1.2.194 spec: + * + * VUID-VkImportSemaphoreWin32HandleInfoKHR-flags-03322 + * + * "If flags contains VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, the + * VkSemaphoreTypeCreateInfo::semaphoreType field of the semaphore + * from which handle or name was exported must not be + * VK_SEMAPHORE_TYPE_TIMELINE" + */ + if (unlikely(semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE)) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "Cannot temporarily import into a timeline " + "semaphore"); + } + + const struct vk_sync_type *sync_type = + get_semaphore_sync_type(device->physical, semaphore->type, handle_type); + + VkResult result = vk_sync_create(device, sync_type, 0 /* flags */, + 0 /* initial_value */, &temporary); + if (result != VK_SUCCESS) + return result; + + sync = temporary; + } else { + sync = &semaphore->permanent; + } + assert(handle_type & + vk_sync_semaphore_handle_types(sync->type, semaphore->type)); + + VkResult result; + switch (pImportSemaphoreWin32HandleInfo->handleType) { + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT: + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE_BIT: + result = vk_sync_import_win32_handle(device, sync, handle, name); + break; + + default: + result = vk_error(semaphore, VK_ERROR_INVALID_EXTERNAL_HANDLE); + } + + if (result != VK_SUCCESS) { + if (temporary != NULL) + vk_sync_destroy(device, temporary); + return result; + } + + /* From a spec correctness point of view, we could probably replace the + * semaphore's temporary payload with the new vk_sync at the top. However, + * we choose to be nice to applications and only replace the semaphore if + * the import succeeded. + */ + if (temporary) { + vk_semaphore_reset_temporary(device, semaphore); + semaphore->temporary = temporary; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetSemaphoreWin32HandleKHR(VkDevice _device, + const VkSemaphoreGetWin32HandleInfoKHR *pGetWin32HandleInfo, + HANDLE *pHandle) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_semaphore, semaphore, pGetWin32HandleInfo->semaphore); + + assert(pGetWin32HandleInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR); + + struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore); + + VkResult result; + switch (pGetWin32HandleInfo->handleType) { + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT: + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE_BIT: + result = vk_sync_export_win32_handle(device, sync, pHandle); + if (result != VK_SUCCESS) + return result; + break; + + default: + unreachable("Invalid semaphore export handle type"); + } + + /* From the Vulkan 1.2.194 spec: + * + * "Export operations have the same transference as the specified + * handle type’s import operations. [...] If the semaphore was using + * a temporarily imported payload, the semaphore’s prior permanent + * payload will be restored." + */ + vk_semaphore_reset_temporary(device, semaphore); + + return VK_SUCCESS; +} + +#else + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_ImportSemaphoreFdKHR(VkDevice _device, + const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_semaphore, semaphore, pImportSemaphoreFdInfo->semaphore); + + assert(pImportSemaphoreFdInfo->sType == + VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR); + + const int fd = pImportSemaphoreFdInfo->fd; + const VkExternalSemaphoreHandleTypeFlagBits handle_type = + pImportSemaphoreFdInfo->handleType; + + struct vk_sync *temporary = NULL, *sync; + if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) { + /* From the Vulkan 1.2.194 spec: + * + * VUID-VkImportSemaphoreFdInfoKHR-flags-03323 + * + * "If flags contains VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, the + * VkSemaphoreTypeCreateInfo::semaphoreType field of the semaphore + * from which handle or name was exported must not be + * VK_SEMAPHORE_TYPE_TIMELINE" + */ + if (unlikely(semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE)) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "Cannot temporarily import into a timeline " + "semaphore"); + } + + const struct vk_sync_type *sync_type = + get_semaphore_sync_type(device->physical, semaphore->type, handle_type); + + VkResult result = vk_sync_create(device, sync_type, 0 /* flags */, + 0 /* initial_value */, &temporary); + if (result != VK_SUCCESS) + return result; + + sync = temporary; + } else { + sync = &semaphore->permanent; + } + assert(handle_type & + vk_sync_semaphore_handle_types(sync->type, semaphore->type)); + + VkResult result; + switch (pImportSemaphoreFdInfo->handleType) { + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: + result = vk_sync_import_opaque_fd(device, sync, fd); + break; + + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: + result = vk_sync_import_sync_file(device, sync, fd); + break; + + default: + result = vk_error(semaphore, VK_ERROR_INVALID_EXTERNAL_HANDLE); + } + + if (result != VK_SUCCESS) { + if (temporary != NULL) + vk_sync_destroy(device, temporary); + return result; + } + + /* From the Vulkan 1.2.194 spec: + * + * "Importing a semaphore payload from a file descriptor transfers + * ownership of the file descriptor from the application to the Vulkan + * implementation. The application must not perform any operations on + * the file descriptor after a successful import." + * + * If the import fails, we leave the file descriptor open. + */ + if (fd != -1) + close(fd); + + /* From a spec correctness point of view, we could probably replace the + * semaphore's temporary payload with the new vk_sync at the top. However, + * we choose to be nice to applications and only replace the semaphore if + * the import succeeded. + */ + if (temporary) { + vk_semaphore_reset_temporary(device, semaphore); + semaphore->temporary = temporary; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetSemaphoreFdKHR(VkDevice _device, + const VkSemaphoreGetFdInfoKHR *pGetFdInfo, + int *pFd) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_semaphore, semaphore, pGetFdInfo->semaphore); + + assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR); + + struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore); + + VkResult result; + switch (pGetFdInfo->handleType) { + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: + result = vk_sync_export_opaque_fd(device, sync, pFd); + if (result != VK_SUCCESS) + return result; + break; + + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: + /* From the Vulkan 1.2.194 spec: + * + * VUID-VkSemaphoreGetFdInfoKHR-handleType-03253 + * + * "If handleType refers to a handle type with copy payload + * transference semantics, semaphore must have been created with a + * VkSemaphoreType of VK_SEMAPHORE_TYPE_BINARY." + */ + if (unlikely(semaphore->type != VK_SEMAPHORE_TYPE_BINARY)) { + return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE, + "Cannot export a timeline semaphore as SYNC_FD"); + } + + /* From the Vulkan 1.2.194 spec: + * VUID-VkSemaphoreGetFdInfoKHR-handleType-03254 + * + * "If handleType refers to a handle type with copy payload + * transference semantics, semaphore must have an associated + * semaphore signal operation that has been submitted for execution + * and any semaphore signal operations on which it depends (if any) + * must have also been submitted for execution." + * + * If we have real timelines, it's possible that the time point doesn't + * exist yet and is waiting for one of our submit threads to trigger. + * However, thanks to the above bit of spec text, that wait should never + * block for long. + */ + if (vk_device_supports_threaded_submit(device)) { + result = vk_sync_wait(device, sync, 0, + VK_SYNC_WAIT_PENDING, + UINT64_MAX); + if (unlikely(result != VK_SUCCESS)) + return result; + } + + result = vk_sync_export_sync_file(device, sync, pFd); + if (unlikely(result != VK_SUCCESS)) + return result; + + /* From the Vulkan 1.2.194 spec: + * + * "Export operations have the same transference as the specified + * handle type’s import operations. Additionally, exporting a + * semaphore payload to a handle with copy transference has the same + * side effects on the source semaphore’s payload as executing a + * semaphore wait operation." + * + * In other words, exporting a sync file also resets the semaphore. We + * only care about this for the permanent payload because the temporary + * payload will be destroyed below. + */ + if (sync == &semaphore->permanent) { + result = vk_sync_reset(device, sync); + if (unlikely(result != VK_SUCCESS)) + return result; + } + break; + + default: + unreachable("Invalid semaphore export handle type"); + } + + /* From the Vulkan 1.2.194 spec: + * + * "Export operations have the same transference as the specified + * handle type’s import operations. [...] If the semaphore was using + * a temporarily imported payload, the semaphore’s prior permanent + * payload will be restored." + */ + vk_semaphore_reset_temporary(device, semaphore); + + return VK_SUCCESS; +} + +#endif /* !defined(_WIN32) */ diff --git a/src/vulkan/runtime/vk_semaphore.h b/src/vulkan/runtime/vk_semaphore.h new file mode 100644 index 00000000000..141f39a3f4d --- /dev/null +++ b/src/vulkan/runtime/vk_semaphore.h @@ -0,0 +1,78 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_SEMAPHORE_H +#define VK_SEMAPHORE_H + +#include "vk_object.h" +#include "vk_sync.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_sync; + +struct vk_semaphore { + struct vk_object_base base; + + /** VkSemaphoreTypeCreateInfo::semaphoreType */ + VkSemaphoreType type; + + /* Temporary semaphore state. + * + * A semaphore *may* have temporary state. That state is added to the + * semaphore by an import operation and is reset back to NULL when the + * semaphore is reset. A semaphore with temporary state cannot be signaled + * because the semaphore must already be signaled before the temporary + * state can be exported from the semaphore in the other process and + * imported here. + */ + struct vk_sync *temporary; + + /** Permanent semaphore state. + * + * Every semaphore has some form of permanent state. + * + * This field must be last + */ + alignas(8) struct vk_sync permanent; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_semaphore, base, VkSemaphore, + VK_OBJECT_TYPE_SEMAPHORE); + +void vk_semaphore_reset_temporary(struct vk_device *device, + struct vk_semaphore *semaphore); + +static inline struct vk_sync * +vk_semaphore_get_active_sync(struct vk_semaphore *semaphore) +{ + return semaphore->temporary ? semaphore->temporary : &semaphore->permanent; +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_SEMAPHORE_H */ + diff --git a/src/vulkan/runtime/vk_shader.c b/src/vulkan/runtime/vk_shader.c new file mode 100644 index 00000000000..d124113a0cc --- /dev/null +++ b/src/vulkan/runtime/vk_shader.c @@ -0,0 +1,573 @@ +/* + * Copyright © 2024 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_shader.h" + +#include "vk_alloc.h" +#include "vk_command_buffer.h" +#include "vk_common_entrypoints.h" +#include "vk_descriptor_set_layout.h" +#include "vk_device.h" +#include "vk_nir.h" +#include "vk_physical_device.h" +#include "vk_pipeline.h" + +#include "util/mesa-sha1.h" + +void * +vk_shader_zalloc(struct vk_device *device, + const struct vk_shader_ops *ops, + gl_shader_stage stage, + const VkAllocationCallbacks *alloc, + size_t size) +{ + /* For internal allocations, we need to allocate from the device scope + * because they might be put in pipeline caches. Importantly, it is + * impossible for the client to get at this pointer and we apply this + * heuristic before we account for allocation fallbacks so this will only + * ever happen for internal shader objectx. + */ + const VkSystemAllocationScope alloc_scope = + alloc == &device->alloc ? VK_SYSTEM_ALLOCATION_SCOPE_DEVICE + : VK_SYSTEM_ALLOCATION_SCOPE_OBJECT; + + struct vk_shader *shader = vk_zalloc2(&device->alloc, alloc, size, 8, + alloc_scope); + if (shader == NULL) + return NULL; + + vk_object_base_init(device, &shader->base, VK_OBJECT_TYPE_SHADER_EXT); + shader->ops = ops; + shader->stage = stage; + + return shader; +} + +void +vk_shader_free(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_shader *shader) +{ + vk_object_base_finish(&shader->base); + vk_free2(&device->alloc, alloc, shader); +} + +int +vk_shader_cmp_graphics_stages(gl_shader_stage a, gl_shader_stage b) +{ + static const int stage_order[MESA_SHADER_MESH + 1] = { + [MESA_SHADER_VERTEX] = 1, + [MESA_SHADER_TESS_CTRL] = 2, + [MESA_SHADER_TESS_EVAL] = 3, + [MESA_SHADER_GEOMETRY] = 4, + [MESA_SHADER_TASK] = 5, + [MESA_SHADER_MESH] = 6, + [MESA_SHADER_FRAGMENT] = 7, + }; + + assert(a < ARRAY_SIZE(stage_order) && stage_order[a] > 0); + assert(b < ARRAY_SIZE(stage_order) && stage_order[b] > 0); + + return stage_order[a] - stage_order[b]; +} + +struct stage_idx { + gl_shader_stage stage; + uint32_t idx; +}; + +static int +cmp_stage_idx(const void *_a, const void *_b) +{ + const struct stage_idx *a = _a, *b = _b; + return vk_shader_cmp_graphics_stages(a->stage, b->stage); +} + +static nir_shader * +vk_shader_to_nir(struct vk_device *device, + const VkShaderCreateInfoEXT *info, + const struct vk_pipeline_robustness_state *rs) +{ + const struct vk_device_shader_ops *ops = device->shader_ops; + + const gl_shader_stage stage = vk_to_mesa_shader_stage(info->stage); + const nir_shader_compiler_options *nir_options = + ops->get_nir_options(device->physical, stage, rs); + struct spirv_to_nir_options spirv_options = + ops->get_spirv_options(device->physical, stage, rs); + + enum gl_subgroup_size subgroup_size = vk_get_subgroup_size( + vk_spirv_version(info->pCode, info->codeSize), + stage, info->pNext, + info->flags & VK_SHADER_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, + info->flags &VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT); + + nir_shader *nir = vk_spirv_to_nir(device, + info->pCode, info->codeSize, + stage, info->pName, + subgroup_size, + info->pSpecializationInfo, + &spirv_options, nir_options, + false /* internal */, NULL); + if (nir == NULL) + return NULL; + + if (ops->preprocess_nir != NULL) + ops->preprocess_nir(device->physical, nir); + + return nir; +} + +struct set_layouts { + struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS]; +}; + +static void +vk_shader_compile_info_init(struct vk_shader_compile_info *info, + struct set_layouts *set_layouts, + const VkShaderCreateInfoEXT *vk_info, + const struct vk_pipeline_robustness_state *rs, + nir_shader *nir) +{ + for (uint32_t sl = 0; sl < vk_info->setLayoutCount; sl++) { + set_layouts->set_layouts[sl] = + vk_descriptor_set_layout_from_handle(vk_info->pSetLayouts[sl]); + } + + *info = (struct vk_shader_compile_info) { + .stage = nir->info.stage, + .flags = vk_info->flags, + .next_stage_mask = vk_info->nextStage, + .nir = nir, + .robustness = rs, + .set_layout_count = vk_info->setLayoutCount, + .set_layouts = set_layouts->set_layouts, + .push_constant_range_count = vk_info->pushConstantRangeCount, + .push_constant_ranges = vk_info->pPushConstantRanges, + }; +} + +PRAGMA_DIAGNOSTIC_PUSH +PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) +struct vk_shader_bin_header { + char mesavkshaderbin[16]; + VkDriverId driver_id; + uint8_t uuid[VK_UUID_SIZE]; + uint32_t version; + uint64_t size; + uint8_t sha1[SHA1_DIGEST_LENGTH]; + uint32_t _pad; +}; +PRAGMA_DIAGNOSTIC_POP +static_assert(sizeof(struct vk_shader_bin_header) == 72, + "This struct has no holes"); + +static void +vk_shader_bin_header_init(struct vk_shader_bin_header *header, + struct vk_physical_device *device) +{ + *header = (struct vk_shader_bin_header) { + .mesavkshaderbin = "MesaVkShaderBin", + .driver_id = device->properties.driverID, + }; + + memcpy(header->uuid, device->properties.shaderBinaryUUID, VK_UUID_SIZE); + header->version = device->properties.shaderBinaryVersion; +} + +static VkResult +vk_shader_serialize(struct vk_device *device, + struct vk_shader *shader, + struct blob *blob) +{ + struct vk_shader_bin_header header; + vk_shader_bin_header_init(&header, device->physical); + + ASSERTED intptr_t header_offset = blob_reserve_bytes(blob, sizeof(header)); + assert(header_offset == 0); + + bool success = shader->ops->serialize(device, shader, blob); + if (!success || blob->out_of_memory) + return VK_INCOMPLETE; + + /* Finalize and write the header */ + header.size = blob->size; + if (blob->data != NULL) { + assert(sizeof(header) <= blob->size); + + struct mesa_sha1 sha1_ctx; + _mesa_sha1_init(&sha1_ctx); + + /* Hash the header with a zero SHA1 */ + _mesa_sha1_update(&sha1_ctx, &header, sizeof(header)); + + /* Hash the serialized data */ + _mesa_sha1_update(&sha1_ctx, blob->data + sizeof(header), + blob->size - sizeof(header)); + + _mesa_sha1_final(&sha1_ctx, header.sha1); + + blob_overwrite_bytes(blob, header_offset, &header, sizeof(header)); + } + + return VK_SUCCESS; +} + +static VkResult +vk_shader_deserialize(struct vk_device *device, + size_t data_size, const void *data, + const VkAllocationCallbacks* pAllocator, + struct vk_shader **shader_out) +{ + const struct vk_device_shader_ops *ops = device->shader_ops; + + struct blob_reader blob; + blob_reader_init(&blob, data, data_size); + + struct vk_shader_bin_header header, ref_header; + blob_copy_bytes(&blob, &header, sizeof(header)); + if (blob.overrun) + return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + + vk_shader_bin_header_init(&ref_header, device->physical); + + if (memcmp(header.mesavkshaderbin, ref_header.mesavkshaderbin, + sizeof(header.mesavkshaderbin))) + return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + + if (header.driver_id != ref_header.driver_id) + return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + + if (memcmp(header.uuid, ref_header.uuid, sizeof(header.uuid))) + return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + + /* From the Vulkan 1.3.276 spec: + * + * "Guaranteed compatibility of shader binaries is expressed through a + * combination of the shaderBinaryUUID and shaderBinaryVersion members + * of the VkPhysicalDeviceShaderObjectPropertiesEXT structure queried + * from a physical device. Binary shaders retrieved from a physical + * device with a certain shaderBinaryUUID are guaranteed to be + * compatible with all other physical devices reporting the same + * shaderBinaryUUID and the same or higher shaderBinaryVersion." + * + * We handle the version check here on behalf of the driver and then pass + * the version into the driver's deserialize callback. + * + * If a driver doesn't want to mess with versions, they can always make the + * UUID a hash and always report version 0 and that will make this check + * effectively a no-op. + */ + if (header.version > ref_header.version) + return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + + /* Reject shader binaries that are the wrong size. */ + if (header.size != data_size) + return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + + assert(blob.current == (uint8_t *)data + sizeof(header)); + blob.end = (uint8_t *)data + data_size; + + struct mesa_sha1 sha1_ctx; + _mesa_sha1_init(&sha1_ctx); + + /* Hash the header with a zero SHA1 */ + struct vk_shader_bin_header sha1_header = header; + memset(sha1_header.sha1, 0, sizeof(sha1_header.sha1)); + _mesa_sha1_update(&sha1_ctx, &sha1_header, sizeof(sha1_header)); + + /* Hash the serialized data */ + _mesa_sha1_update(&sha1_ctx, (uint8_t *)data + sizeof(header), + data_size - sizeof(header)); + + _mesa_sha1_final(&sha1_ctx, ref_header.sha1); + if (memcmp(header.sha1, ref_header.sha1, sizeof(header.sha1))) + return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + + /* We've now verified that the header matches and that the data has the + * right SHA1 hash so it's safe to call into the driver. + */ + return ops->deserialize(device, &blob, header.version, + pAllocator, shader_out); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetShaderBinaryDataEXT(VkDevice _device, + VkShaderEXT _shader, + size_t *pDataSize, + void *pData) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_shader, shader, _shader); + VkResult result; + + /* From the Vulkan 1.3.275 spec: + * + * "If pData is NULL, then the size of the binary shader code of the + * shader object, in bytes, is returned in pDataSize. Otherwise, + * pDataSize must point to a variable set by the user to the size of the + * buffer, in bytes, pointed to by pData, and on return the variable is + * overwritten with the amount of data actually written to pData. If + * pDataSize is less than the size of the binary shader code, nothing is + * written to pData, and VK_INCOMPLETE will be returned instead of + * VK_SUCCESS." + * + * This is annoying. Unlike basically every other Vulkan data return + * method, we're not allowed to overwrite the client-provided memory region + * on VK_INCOMPLETE. This means we either need to query the blob size + * up-front by serializing twice or we need to serialize into temporary + * memory and memcpy into the client-provided region. We choose the first + * approach. + * + * In the common case, this means that vk_shader_ops::serialize will get + * called 3 times: Once for the client to get the size, once for us to + * validate the client's size, and once to actually write the data. It's a + * bit heavy-weight but this shouldn't be in a hot path and this is better + * for memory efficiency. Also, the vk_shader_ops::serialize should be + * pretty fast on a null blob. + */ + struct blob blob; + blob_init_fixed(&blob, NULL, SIZE_MAX); + result = vk_shader_serialize(device, shader, &blob); + assert(result == VK_SUCCESS); + + if (result != VK_SUCCESS) { + *pDataSize = 0; + return result; + } else if (pData == NULL) { + *pDataSize = blob.size; + return VK_SUCCESS; + } else if (blob.size > *pDataSize) { + /* No data written */ + *pDataSize = 0; + return VK_INCOMPLETE; + } + + blob_init_fixed(&blob, pData, *pDataSize); + result = vk_shader_serialize(device, shader, &blob); + assert(result == VK_SUCCESS); + + *pDataSize = blob.size; + + return result; +} + +/* The only place where we have "real" linking is graphics shaders and there + * is a limit as to how many of them can be linked together at one time. + */ +#define VK_MAX_LINKED_SHADER_STAGES MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateShadersEXT(VkDevice _device, + uint32_t createInfoCount, + const VkShaderCreateInfoEXT *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkShaderEXT *pShaders) +{ + VK_FROM_HANDLE(vk_device, device, _device); + const struct vk_device_shader_ops *ops = device->shader_ops; + VkResult first_fail_or_success = VK_SUCCESS; + + struct vk_pipeline_robustness_state rs = { + .storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT, + .uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT, + .vertex_inputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT, + .images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT, + }; + + /* From the Vulkan 1.3.274 spec: + * + * "When this function returns, whether or not it succeeds, it is + * guaranteed that every element of pShaders will have been overwritten + * by either VK_NULL_HANDLE or a valid VkShaderEXT handle." + * + * Zeroing up-front makes the error path easier. + */ + memset(pShaders, 0, createInfoCount * sizeof(*pShaders)); + + bool has_linked_spirv = false; + for (uint32_t i = 0; i < createInfoCount; i++) { + if (pCreateInfos[i].codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT && + (pCreateInfos[i].flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT)) + has_linked_spirv = true; + } + + uint32_t linked_count = 0; + struct stage_idx linked[VK_MAX_LINKED_SHADER_STAGES]; + + for (uint32_t i = 0; i < createInfoCount; i++) { + const VkShaderCreateInfoEXT *vk_info = &pCreateInfos[i]; + VkResult result = VK_SUCCESS; + + switch (vk_info->codeType) { + case VK_SHADER_CODE_TYPE_BINARY_EXT: { + /* This isn't required by Vulkan but we're allowed to fail binary + * import for basically any reason. This seems like a pretty good + * reason. + */ + if (has_linked_spirv && + (vk_info->flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT)) { + result = vk_errorf(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT, + "Cannot mix linked binary and SPIR-V"); + break; + } + + struct vk_shader *shader; + result = vk_shader_deserialize(device, vk_info->codeSize, + vk_info->pCode, pAllocator, + &shader); + if (result != VK_SUCCESS) + break; + + pShaders[i] = vk_shader_to_handle(shader); + break; + } + + case VK_SHADER_CODE_TYPE_SPIRV_EXT: { + if (vk_info->flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT) { + /* Stash it and compile later */ + assert(linked_count < ARRAY_SIZE(linked)); + linked[linked_count++] = (struct stage_idx) { + .stage = vk_to_mesa_shader_stage(vk_info->stage), + .idx = i, + }; + } else { + nir_shader *nir = vk_shader_to_nir(device, vk_info, &rs); + if (nir == NULL) { + result = vk_errorf(device, VK_ERROR_UNKNOWN, + "Failed to compile shader to NIR"); + break; + } + + struct vk_shader_compile_info info; + struct set_layouts set_layouts; + vk_shader_compile_info_init(&info, &set_layouts, + vk_info, &rs, nir); + + struct vk_shader *shader; + result = ops->compile(device, 1, &info, NULL /* state */, + pAllocator, &shader); + if (result != VK_SUCCESS) + break; + + pShaders[i] = vk_shader_to_handle(shader); + } + break; + } + + default: + unreachable("Unknown shader code type"); + } + + if (first_fail_or_success == VK_SUCCESS) + first_fail_or_success = result; + } + + if (linked_count > 0) { + struct set_layouts set_layouts[VK_MAX_LINKED_SHADER_STAGES]; + struct vk_shader_compile_info infos[VK_MAX_LINKED_SHADER_STAGES]; + VkResult result = VK_SUCCESS; + + /* Sort so we guarantee the driver always gets them in-order */ + qsort(linked, linked_count, sizeof(*linked), cmp_stage_idx); + + /* Memset for easy error handling */ + memset(infos, 0, sizeof(infos)); + + for (uint32_t l = 0; l < linked_count; l++) { + const VkShaderCreateInfoEXT *vk_info = &pCreateInfos[linked[l].idx]; + + nir_shader *nir = vk_shader_to_nir(device, vk_info, &rs); + if (nir == NULL) { + result = vk_errorf(device, VK_ERROR_UNKNOWN, + "Failed to compile shader to NIR"); + break; + } + + vk_shader_compile_info_init(&infos[l], &set_layouts[l], + vk_info, &rs, nir); + } + + if (result == VK_SUCCESS) { + struct vk_shader *shaders[VK_MAX_LINKED_SHADER_STAGES]; + + result = ops->compile(device, linked_count, infos, NULL /* state */, + pAllocator, shaders); + if (result == VK_SUCCESS) { + for (uint32_t l = 0; l < linked_count; l++) + pShaders[linked[l].idx] = vk_shader_to_handle(shaders[l]); + } + } else { + for (uint32_t l = 0; l < linked_count; l++) { + if (infos[l].nir != NULL) + ralloc_free(infos[l].nir); + } + } + + if (first_fail_or_success == VK_SUCCESS) + first_fail_or_success = result; + } + + return first_fail_or_success; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyShaderEXT(VkDevice _device, + VkShaderEXT _shader, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_shader, shader, _shader); + + if (shader == NULL) + return; + + vk_shader_destroy(device, shader, pAllocator); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdBindShadersEXT(VkCommandBuffer commandBuffer, + uint32_t stageCount, + const VkShaderStageFlagBits *pStages, + const VkShaderEXT *pShaders) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + struct vk_device *device = cmd_buffer->base.device; + const struct vk_device_shader_ops *ops = device->shader_ops; + + STACK_ARRAY(gl_shader_stage, stages, stageCount); + STACK_ARRAY(struct vk_shader *, shaders, stageCount); + + VkShaderStageFlags vk_stages = 0; + for (uint32_t i = 0; i < stageCount; i++) { + vk_stages |= pStages[i]; + stages[i] = vk_to_mesa_shader_stage(pStages[i]); + shaders[i] = pShaders != NULL ? vk_shader_from_handle(pShaders[i]) : NULL; + } + + vk_cmd_unbind_pipelines_for_stages(cmd_buffer, vk_stages); + if (vk_stages & ~VK_SHADER_STAGE_COMPUTE_BIT) + vk_cmd_set_rp_attachments(cmd_buffer, ~0); + + ops->cmd_bind_shaders(cmd_buffer, stageCount, stages, shaders); +} diff --git a/src/vulkan/runtime/vk_shader.h b/src/vulkan/runtime/vk_shader.h new file mode 100644 index 00000000000..8fb5090b129 --- /dev/null +++ b/src/vulkan/runtime/vk_shader.h @@ -0,0 +1,260 @@ +/* + * Copyright © 2024 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_SHADER_H +#define VK_SHADER_H + +#include "compiler/spirv/nir_spirv.h" +#include "vk_limits.h" +#include "vk_pipeline_cache.h" + +#include "util/mesa-blake3.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct blob; +struct nir_shader; +struct vk_command_buffer; +struct vk_device; +struct vk_descriptor_set_layout; +struct vk_dynamic_graphics_state; +struct vk_graphics_pipeline_state; +struct vk_physical_device; +struct vk_pipeline; +struct vk_pipeline_robustness_state; + +int vk_shader_cmp_graphics_stages(gl_shader_stage a, gl_shader_stage b); + +#define VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA 0x1000 + +struct vk_shader_compile_info { + gl_shader_stage stage; + VkShaderCreateFlagsEXT flags; + VkShaderStageFlags next_stage_mask; + struct nir_shader *nir; + + const struct vk_pipeline_robustness_state *robustness; + + uint32_t set_layout_count; + struct vk_descriptor_set_layout * const *set_layouts; + + uint32_t push_constant_range_count; + const VkPushConstantRange *push_constant_ranges; +}; + +struct vk_shader_ops; + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wpadded" +#endif +struct vk_shader_pipeline_cache_key { + gl_shader_stage stage; + blake3_hash blake3; +}; +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +struct vk_shader { + struct vk_object_base base; + + const struct vk_shader_ops *ops; + + gl_shader_stage stage; + + /* Used for the generic VkPipeline implementation */ + struct { + struct vk_pipeline_cache_object cache_obj; + struct vk_shader_pipeline_cache_key cache_key; + } pipeline; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_shader, base, VkShaderEXT, + VK_OBJECT_TYPE_SHADER_EXT); + +struct vk_shader_ops { + /** Destroy a vk_shader_object */ + void (*destroy)(struct vk_device *device, + struct vk_shader *shader, + const VkAllocationCallbacks* pAllocator); + + /** Serialize a vk_shader_object to a blob + * + * This function shouldn't need to do any validation of the blob data + * beyond basic sanity checking. The common implementation of + * vkGetShaderBinaryEXT verifies the blobUUID and version of input data as + * well as a size and checksum to ensure integrity. This callback is only + * invoked after validation of the input binary data. + */ + bool (*serialize)(struct vk_device *device, + const struct vk_shader *shader, + struct blob *blob); + + /** Returns executable properties for this shader + * + * This is equivalent to vkGetPipelineExecutableProperties(), only for a + * single vk_shader. + */ + VkResult (*get_executable_properties)(struct vk_device *device, + const struct vk_shader *shader, + uint32_t *executable_count, + VkPipelineExecutablePropertiesKHR *properties); + + /** Returns executable statistics for this shader + * + * This is equivalent to vkGetPipelineExecutableStatistics(), only for a + * single vk_shader. + */ + VkResult (*get_executable_statistics)(struct vk_device *device, + const struct vk_shader *shader, + uint32_t executable_index, + uint32_t *statistic_count, + VkPipelineExecutableStatisticKHR *statistics); + + /** Returns executable internal representations for this shader + * + * This is equivalent to vkGetPipelineExecutableInternalRepresentations(), + * only for a single vk_shader. + */ + VkResult (*get_executable_internal_representations)( + struct vk_device *device, + const struct vk_shader *shader, + uint32_t executable_index, + uint32_t *internal_representation_count, + VkPipelineExecutableInternalRepresentationKHR *internal_representations); +}; + +void *vk_shader_zalloc(struct vk_device *device, + const struct vk_shader_ops *ops, + gl_shader_stage stage, + const VkAllocationCallbacks *alloc, + size_t size); +void vk_shader_free(struct vk_device *device, + const VkAllocationCallbacks *alloc, + struct vk_shader *shader); + +static inline void +vk_shader_destroy(struct vk_device *device, + struct vk_shader *shader, + const VkAllocationCallbacks *alloc) +{ + shader->ops->destroy(device, shader, alloc); +} + +struct vk_device_shader_ops { + /** Retrieves a NIR compiler options struct + * + * NIR compiler options are only allowed to vary based on physical device, + * stage, and robustness state. + */ + const struct nir_shader_compiler_options *(*get_nir_options)( + struct vk_physical_device *device, + gl_shader_stage stage, + const struct vk_pipeline_robustness_state *rs); + + /** Retrieves a SPIR-V options struct + * + * SPIR-V options are only allowed to vary based on physical device, stage, + * and robustness state. + */ + struct spirv_to_nir_options (*get_spirv_options)( + struct vk_physical_device *device, + gl_shader_stage stage, + const struct vk_pipeline_robustness_state *rs); + + /** Preprocesses a NIR shader + * + * This callback is optional. + * + * If non-NULL, this callback is invoked after the SPIR-V is parsed into + * NIR and before it is handed to compile(). The driver should do as much + * generic optimization and lowering as it can here. Importantly, the + * preprocess step only knows about the NIR input and the physical device, + * not any enabled device features or pipeline state. This allows us to + * potentially cache this shader and re-use it across pipelines. + */ + void (*preprocess_nir)(struct vk_physical_device *device, nir_shader *nir); + + /** True if the driver wants geometry stages linked + * + * If set to true, geometry stages will always be compiled with + * VK_SHADER_CREATE_LINK_STAGE_BIT_EXT when pipelines are used. + */ + bool link_geom_stages; + + /** Hash a vk_graphics_state object + * + * This callback hashes whatever bits of vk_graphics_pipeline_state might + * be used to compile a shader in one of the given stages. + */ + void (*hash_graphics_state)(struct vk_physical_device *device, + const struct vk_graphics_pipeline_state *state, + VkShaderStageFlags stages, + blake3_hash blake3_out); + + /** Compile (and potentially link) a set of shaders + * + * Unlike vkCreateShadersEXT, this callback will only ever be called with + * multiple shaders if VK_SHADER_CREATE_LINK_STAGE_BIT_EXT is set on all of + * them. We also guarantee that the shaders occur in the call in Vulkan + * pipeline stage order as dictated by vk_shader_cmp_graphics_stages(). + * + * This callback consumes all input NIR shaders, regardless of whether or + * not it was successful. + */ + VkResult (*compile)(struct vk_device *device, + uint32_t shader_count, + struct vk_shader_compile_info *infos, + const struct vk_graphics_pipeline_state *state, + const VkAllocationCallbacks* pAllocator, + struct vk_shader **shaders_out); + + /** Create a vk_shader from a binary blob */ + VkResult (*deserialize)(struct vk_device *device, + struct blob_reader *blob, + uint32_t binary_version, + const VkAllocationCallbacks* pAllocator, + struct vk_shader **shader_out); + + /** Bind a set of shaders + * + * This is roughly equivalent to vkCmdBindShadersEXT() + */ + void (*cmd_bind_shaders)(struct vk_command_buffer *cmd_buffer, + uint32_t stage_count, + const gl_shader_stage *stages, + struct vk_shader ** const shaders); + + /** Sets dynamic state */ + void (*cmd_set_dynamic_graphics_state)(struct vk_command_buffer *cmd_buffer, + const struct vk_dynamic_graphics_state *state); +}; + +#ifdef __cplusplus +} +#endif + +#endif /* VK_SHADER_H */ diff --git a/src/vulkan/runtime/vk_shader_module.c b/src/vulkan/runtime/vk_shader_module.c new file mode 100644 index 00000000000..556d4f0bf2a --- /dev/null +++ b/src/vulkan/runtime/vk_shader_module.c @@ -0,0 +1,147 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_shader_module.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_log.h" +#include "vk_nir.h" +#include "vk_pipeline.h" +#include "vk_util.h" + +void vk_shader_module_init(struct vk_device *device, + struct vk_shader_module *module, + const VkShaderModuleCreateInfo *create_info) +{ + vk_object_base_init(device, &module->base, VK_OBJECT_TYPE_SHADER_MODULE); + + module->nir = NULL; + + module->size = create_info->codeSize; + memcpy(module->data, create_info->pCode, module->size); + + _mesa_blake3_compute(module->data, module->size, module->hash); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateShaderModule(VkDevice _device, + const VkShaderModuleCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkShaderModule *pShaderModule) +{ + VK_FROM_HANDLE(vk_device, device, _device); + struct vk_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = vk_alloc2(&device->alloc, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (module == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + vk_shader_module_init(device, module, pCreateInfo); + + *pShaderModule = vk_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +const uint8_t vk_shaderModuleIdentifierAlgorithmUUID[VK_UUID_SIZE] = "MESA-BLAKE3"; + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetShaderModuleIdentifierEXT(VkDevice _device, + VkShaderModule _module, + VkShaderModuleIdentifierEXT *pIdentifier) +{ + VK_FROM_HANDLE(vk_shader_module, module, _module); + memcpy(pIdentifier->identifier, module->hash, sizeof(module->hash)); + pIdentifier->identifierSize = sizeof(module->hash); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetShaderModuleCreateInfoIdentifierEXT(VkDevice _device, + const VkShaderModuleCreateInfo *pCreateInfo, + VkShaderModuleIdentifierEXT *pIdentifier) +{ + _mesa_blake3_compute(pCreateInfo->pCode, pCreateInfo->codeSize, + pIdentifier->identifier); + pIdentifier->identifierSize = sizeof(blake3_hash); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroyShaderModule(VkDevice _device, + VkShaderModule _module, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_shader_module, module, _module); + + if (!module) + return; + + /* NIR modules (which are only created internally by the driver) are not + * dynamically allocated so we should never call this for them. + * Instead the driver is responsible for freeing the NIR code when it is + * no longer needed. + */ + assert(module->nir == NULL); + + vk_object_free(device, pAllocator, module); +} + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +uint32_t +vk_shader_module_spirv_version(const struct vk_shader_module *mod) +{ + if (mod->nir != NULL) + return 0; + + return vk_spirv_version((uint32_t *)mod->data, mod->size); +} + +VkResult +vk_shader_module_to_nir(struct vk_device *device, + const struct vk_shader_module *mod, + gl_shader_stage stage, + const char *entrypoint_name, + const VkSpecializationInfo *spec_info, + const struct spirv_to_nir_options *spirv_options, + const nir_shader_compiler_options *nir_options, + void *mem_ctx, nir_shader **nir_out) +{ + const VkPipelineShaderStageCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = mesa_to_vk_shader_stage(stage), + .module = vk_shader_module_to_handle((struct vk_shader_module *)mod), + .pName = entrypoint_name, + .pSpecializationInfo = spec_info, + }; + return vk_pipeline_shader_stage_to_nir(device, &info, + spirv_options, nir_options, + mem_ctx, nir_out); +} diff --git a/src/vulkan/runtime/vk_shader_module.h b/src/vulkan/runtime/vk_shader_module.h new file mode 100644 index 00000000000..c5c81cf3778 --- /dev/null +++ b/src/vulkan/runtime/vk_shader_module.h @@ -0,0 +1,86 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VK_SHADER_MODULE_H +#define VK_SHADER_MODULE_H + +#include <vulkan/vulkan_core.h> + +#include "util/mesa-blake3.h" +#include "compiler/shader_enums.h" +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct nir_shader; +struct nir_shader_compiler_options; +struct spirv_to_nir_options; + +struct vk_shader_module { + struct vk_object_base base; + struct nir_shader *nir; + blake3_hash hash; + uint32_t size; + char data[0]; +}; + +extern const uint8_t vk_shaderModuleIdentifierAlgorithmUUID[VK_UUID_SIZE]; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_shader_module, base, VkShaderModule, + VK_OBJECT_TYPE_SHADER_MODULE) + +void vk_shader_module_init(struct vk_device *device, + struct vk_shader_module *module, + const VkShaderModuleCreateInfo *create_info); + +uint32_t vk_shader_module_spirv_version(const struct vk_shader_module *mod); + +VkResult +vk_shader_module_to_nir(struct vk_device *device, + const struct vk_shader_module *mod, + gl_shader_stage stage, + const char *entrypoint_name, + const VkSpecializationInfo *spec_info, + const struct spirv_to_nir_options *spirv_options, + const struct nir_shader_compiler_options *nir_options, + void *mem_ctx, struct nir_shader **nir_out); + +/* this should only be used for stack-allocated, temporary objects */ +#define vk_shader_module_handle_from_nir(_nir) \ + ((VkShaderModule)(uintptr_t)&(struct vk_shader_module) { \ + .base.type = VK_OBJECT_TYPE_SHADER_MODULE, \ + .nir = _nir, \ + }) +#define vk_shader_module_from_nir(_nir) \ + (struct vk_shader_module) { \ + .base.type = VK_OBJECT_TYPE_SHADER_MODULE, \ + .nir = _nir, \ + } + +#ifdef __cplusplus +} +#endif + +#endif /* VK_SHADER_MODULE_H */ diff --git a/src/vulkan/runtime/vk_standard_sample_locations.c b/src/vulkan/runtime/vk_standard_sample_locations.c new file mode 100644 index 00000000000..de3fa499301 --- /dev/null +++ b/src/vulkan/runtime/vk_standard_sample_locations.c @@ -0,0 +1,156 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_standard_sample_locations.h" + +#include "vk_graphics_state.h" + +/** + * 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8). + */ +static const struct vk_sample_locations_state sample_locations_state_1x = { + .per_pixel = VK_SAMPLE_COUNT_1_BIT, + .grid_size = { 1, 1 }, + .locations = { + { 0.5, 0.5 }, + }, +}; + + +/** + * 2x MSAA sample positions are (0.25, 0.25) and (0.75, 0.75): + * 4 c + * 4 0 + * c 1 + */ +static const struct vk_sample_locations_state sample_locations_state_2x = { + .per_pixel = VK_SAMPLE_COUNT_2_BIT, + .grid_size = { 1, 1 }, + .locations = { + { 0.75, 0.75 }, + { 0.25, 0.25 }, + }, +}; + +/** + * Sample positions: + * 2 6 a e + * 2 0 + * 6 1 + * a 2 + * e 3 + */ +static const struct vk_sample_locations_state sample_locations_state_4x = { + .per_pixel = VK_SAMPLE_COUNT_4_BIT, + .grid_size = { 1, 1 }, + .locations = { + { 0.375, 0.125 }, + { 0.875, 0.375 }, + { 0.125, 0.625 }, + { 0.625, 0.875 }, + }, +}; + +/** + * Sample positions: + * 1 3 5 7 9 b d f + * 1 7 + * 3 3 + * 5 0 + * 7 5 + * 9 2 + * b 1 + * d 4 + * f 6 + */ +static const struct vk_sample_locations_state sample_locations_state_8x = { + .per_pixel = VK_SAMPLE_COUNT_8_BIT, + .grid_size = { 1, 1 }, + .locations = { + { 0.5625, 0.3125 }, + { 0.4375, 0.6875 }, + { 0.8125, 0.5625 }, + { 0.3125, 0.1875 }, + { 0.1875, 0.8125 }, + { 0.0625, 0.4375 }, + { 0.6875, 0.9375 }, + { 0.9375, 0.0625 }, + }, +}; + +/** + * Sample positions: + * + * 0 1 2 3 4 5 6 7 8 9 a b c d e f + * 0 15 + * 1 9 + * 2 10 + * 3 7 + * 4 13 + * 5 1 + * 6 4 + * 7 3 + * 8 12 + * 9 0 + * a 2 + * b 6 + * c 11 + * d 5 + * e 8 + * f 14 + */ +static const struct vk_sample_locations_state sample_locations_state_16x = { + .per_pixel = VK_SAMPLE_COUNT_16_BIT, + .grid_size = { 1, 1 }, + .locations = { + { 0.5625, 0.5625 }, + { 0.4375, 0.3125 }, + { 0.3125, 0.6250 }, + { 0.7500, 0.4375 }, + { 0.1875, 0.3750 }, + { 0.6250, 0.8125 }, + { 0.8125, 0.6875 }, + { 0.6875, 0.1875 }, + { 0.3750, 0.8750 }, + { 0.5000, 0.0625 }, + { 0.2500, 0.1250 }, + { 0.1250, 0.7500 }, + { 0.0000, 0.5000 }, + { 0.9375, 0.2500 }, + { 0.8750, 0.9375 }, + { 0.0625, 0.0000 }, + }, +}; + +const struct vk_sample_locations_state * +vk_standard_sample_locations_state(VkSampleCountFlagBits sample_count) +{ + switch (sample_count) { + case 1: return &sample_locations_state_1x; + case 2: return &sample_locations_state_2x; + case 4: return &sample_locations_state_4x; + case 8: return &sample_locations_state_8x; + case 16: return &sample_locations_state_16x; + default: unreachable("Sample count has no standard locations"); + } +} diff --git a/src/vulkan/runtime/vk_standard_sample_locations.h b/src/vulkan/runtime/vk_standard_sample_locations.h new file mode 100644 index 00000000000..8dc57a4acd2 --- /dev/null +++ b/src/vulkan/runtime/vk_standard_sample_locations.h @@ -0,0 +1,47 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_STANDARD_SAMPLE_LOCATIONS_H +#define VK_STANDARD_SAMPLE_LOCATIONS_H + +#include "vulkan/vulkan_core.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_sample_locations_state; + +/** Returns standard sample locations for a given sample count + * + * These are the sample locations defined in the Vulkan spec for when + * standardSampleLocations is supported. + */ +const struct vk_sample_locations_state* +vk_standard_sample_locations_state(VkSampleCountFlagBits sample_count); + + +#ifdef __cplusplus +} +#endif + +#endif /* VK_STANDARD_SAMPLE_LOCATIONS_H */ diff --git a/src/vulkan/runtime/vk_sync.c b/src/vulkan/runtime/vk_sync.c new file mode 100644 index 00000000000..da680ca8a10 --- /dev/null +++ b/src/vulkan/runtime/vk_sync.c @@ -0,0 +1,446 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_sync.h" + +#include <assert.h> +#include <string.h> + +#include "util/u_debug.h" +#include "util/macros.h" +#include "util/os_time.h" + +#include "vk_alloc.h" +#include "vk_device.h" +#include "vk_log.h" + +static void +vk_sync_type_validate(const struct vk_sync_type *type) +{ + assert(type->init); + assert(type->finish); + + assert(type->features & (VK_SYNC_FEATURE_BINARY | + VK_SYNC_FEATURE_TIMELINE)); + + if (type->features & VK_SYNC_FEATURE_TIMELINE) { + assert(type->features & VK_SYNC_FEATURE_GPU_WAIT); + assert(type->features & VK_SYNC_FEATURE_CPU_WAIT); + assert(type->features & VK_SYNC_FEATURE_CPU_SIGNAL); + assert(type->features & (VK_SYNC_FEATURE_WAIT_BEFORE_SIGNAL | + VK_SYNC_FEATURE_WAIT_PENDING)); + assert(type->signal); + assert(type->get_value); + } + + if (!(type->features & VK_SYNC_FEATURE_BINARY)) { + assert(!(type->features & (VK_SYNC_FEATURE_GPU_MULTI_WAIT | + VK_SYNC_FEATURE_CPU_RESET))); + assert(!type->import_sync_file); + assert(!type->export_sync_file); + } + + if (type->features & VK_SYNC_FEATURE_CPU_WAIT) { + assert(type->wait || type->wait_many); + } else { + assert(!(type->features & (VK_SYNC_FEATURE_WAIT_ANY | + VK_SYNC_FEATURE_WAIT_PENDING))); + } + + if (type->features & VK_SYNC_FEATURE_GPU_MULTI_WAIT) + assert(type->features & VK_SYNC_FEATURE_GPU_WAIT); + + if (type->features & VK_SYNC_FEATURE_CPU_RESET) + assert(type->reset); + + if (type->features & VK_SYNC_FEATURE_CPU_SIGNAL) + assert(type->signal); +} + +VkResult +vk_sync_init(struct vk_device *device, + struct vk_sync *sync, + const struct vk_sync_type *type, + enum vk_sync_flags flags, + uint64_t initial_value) +{ + vk_sync_type_validate(type); + + if (flags & VK_SYNC_IS_TIMELINE) + assert(type->features & VK_SYNC_FEATURE_TIMELINE); + else + assert(type->features & VK_SYNC_FEATURE_BINARY); + + assert(type->size >= sizeof(*sync)); + memset(sync, 0, type->size); + sync->type = type; + sync->flags = flags; + + return type->init(device, sync, initial_value); +} + +void +vk_sync_finish(struct vk_device *device, + struct vk_sync *sync) +{ + sync->type->finish(device, sync); +} + +VkResult +vk_sync_create(struct vk_device *device, + const struct vk_sync_type *type, + enum vk_sync_flags flags, + uint64_t initial_value, + struct vk_sync **sync_out) +{ + struct vk_sync *sync; + + sync = vk_alloc(&device->alloc, type->size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (sync == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result = vk_sync_init(device, sync, type, flags, initial_value); + if (result != VK_SUCCESS) { + vk_free(&device->alloc, sync); + return result; + } + + *sync_out = sync; + + return VK_SUCCESS; +} + +void +vk_sync_destroy(struct vk_device *device, + struct vk_sync *sync) +{ + vk_sync_finish(device, sync); + vk_free(&device->alloc, sync); +} + +VkResult +vk_sync_signal(struct vk_device *device, + struct vk_sync *sync, + uint64_t value) +{ + assert(sync->type->features & VK_SYNC_FEATURE_CPU_SIGNAL); + + if (sync->flags & VK_SYNC_IS_TIMELINE) + assert(value > 0); + else + assert(value == 0); + + return sync->type->signal(device, sync, value); +} + +VkResult +vk_sync_get_value(struct vk_device *device, + struct vk_sync *sync, + uint64_t *value) +{ + assert(sync->flags & VK_SYNC_IS_TIMELINE); + return sync->type->get_value(device, sync, value); +} + +VkResult +vk_sync_reset(struct vk_device *device, + struct vk_sync *sync) +{ + assert(sync->type->features & VK_SYNC_FEATURE_CPU_RESET); + assert(!(sync->flags & VK_SYNC_IS_TIMELINE)); + return sync->type->reset(device, sync); +} + +VkResult vk_sync_move(struct vk_device *device, + struct vk_sync *dst, + struct vk_sync *src) +{ + assert(!(dst->flags & VK_SYNC_IS_TIMELINE)); + assert(!(src->flags & VK_SYNC_IS_TIMELINE)); + assert(dst->type == src->type); + + return src->type->move(device, dst, src); +} + +static void +assert_valid_wait(struct vk_sync *sync, + uint64_t wait_value, + enum vk_sync_wait_flags wait_flags) +{ + assert(sync->type->features & VK_SYNC_FEATURE_CPU_WAIT); + + if (!(sync->flags & VK_SYNC_IS_TIMELINE)) + assert(wait_value == 0); + + if (wait_flags & VK_SYNC_WAIT_PENDING) + assert(sync->type->features & VK_SYNC_FEATURE_WAIT_PENDING); +} + +static uint64_t +get_max_abs_timeout_ns(void) +{ + static int max_timeout_ms = -1; + if (max_timeout_ms < 0) + max_timeout_ms = debug_get_num_option("MESA_VK_MAX_TIMEOUT", 0); + + if (max_timeout_ms == 0) + return UINT64_MAX; + else + return os_time_get_absolute_timeout(max_timeout_ms * 1000000ull); +} + +static VkResult +__vk_sync_wait(struct vk_device *device, + struct vk_sync *sync, + uint64_t wait_value, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + assert_valid_wait(sync, wait_value, wait_flags); + + /* This doesn't make sense for a single wait */ + assert(!(wait_flags & VK_SYNC_WAIT_ANY)); + + if (sync->type->wait) { + return sync->type->wait(device, sync, wait_value, + wait_flags, abs_timeout_ns); + } else { + struct vk_sync_wait wait = { + .sync = sync, + .stage_mask = ~(VkPipelineStageFlags2)0, + .wait_value = wait_value, + }; + return sync->type->wait_many(device, 1, &wait, wait_flags, + abs_timeout_ns); + } +} + +VkResult +vk_sync_wait(struct vk_device *device, + struct vk_sync *sync, + uint64_t wait_value, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + uint64_t max_abs_timeout_ns = get_max_abs_timeout_ns(); + if (abs_timeout_ns > max_abs_timeout_ns) { + VkResult result = + __vk_sync_wait(device, sync, wait_value, wait_flags, + max_abs_timeout_ns); + if (unlikely(result == VK_TIMEOUT)) + return vk_device_set_lost(device, "Maximum timeout exceeded!"); + return result; + } else { + return __vk_sync_wait(device, sync, wait_value, wait_flags, + abs_timeout_ns); + } +} + +static bool +can_wait_many(uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags) +{ + if (waits[0].sync->type->wait_many == NULL) + return false; + + if ((wait_flags & VK_SYNC_WAIT_ANY) && + !(waits[0].sync->type->features & VK_SYNC_FEATURE_WAIT_ANY)) + return false; + + for (uint32_t i = 0; i < wait_count; i++) { + assert_valid_wait(waits[i].sync, waits[i].wait_value, wait_flags); + if (waits[i].sync->type != waits[0].sync->type) + return false; + } + + return true; +} + +static VkResult +__vk_sync_wait_many(struct vk_device *device, + uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + if (wait_count == 0) + return VK_SUCCESS; + + if (wait_count == 1) { + return __vk_sync_wait(device, waits[0].sync, waits[0].wait_value, + wait_flags & ~VK_SYNC_WAIT_ANY, abs_timeout_ns); + } + + if (can_wait_many(wait_count, waits, wait_flags)) { + return waits[0].sync->type->wait_many(device, wait_count, waits, + wait_flags, abs_timeout_ns); + } else if (wait_flags & VK_SYNC_WAIT_ANY) { + /* If we have multiple syncs and they don't support wait_any or they're + * not all the same type, there's nothing better we can do than spin. + */ + do { + for (uint32_t i = 0; i < wait_count; i++) { + VkResult result = __vk_sync_wait(device, waits[i].sync, + waits[i].wait_value, + wait_flags & ~VK_SYNC_WAIT_ANY, + 0 /* abs_timeout_ns */); + if (result != VK_TIMEOUT) + return result; + } + } while (os_time_get_nano() < abs_timeout_ns); + + return VK_TIMEOUT; + } else { + for (uint32_t i = 0; i < wait_count; i++) { + VkResult result = __vk_sync_wait(device, waits[i].sync, + waits[i].wait_value, + wait_flags, abs_timeout_ns); + if (result != VK_SUCCESS) + return result; + } + return VK_SUCCESS; + } +} + +VkResult +vk_sync_wait_many(struct vk_device *device, + uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + uint64_t max_abs_timeout_ns = get_max_abs_timeout_ns(); + if (abs_timeout_ns > max_abs_timeout_ns) { + VkResult result = + __vk_sync_wait_many(device, wait_count, waits, wait_flags, + max_abs_timeout_ns); + if (unlikely(result == VK_TIMEOUT)) + return vk_device_set_lost(device, "Maximum timeout exceeded!"); + return result; + } else { + return __vk_sync_wait_many(device, wait_count, waits, wait_flags, + abs_timeout_ns); + } +} + +VkResult +vk_sync_import_opaque_fd(struct vk_device *device, + struct vk_sync *sync, + int fd) +{ + VkResult result = sync->type->import_opaque_fd(device, sync, fd); + if (unlikely(result != VK_SUCCESS)) + return result; + + sync->flags |= VK_SYNC_IS_SHAREABLE | + VK_SYNC_IS_SHARED; + + return VK_SUCCESS; +} + +VkResult +vk_sync_export_opaque_fd(struct vk_device *device, + struct vk_sync *sync, + int *fd) +{ + assert(sync->flags & VK_SYNC_IS_SHAREABLE); + + VkResult result = sync->type->export_opaque_fd(device, sync, fd); + if (unlikely(result != VK_SUCCESS)) + return result; + + sync->flags |= VK_SYNC_IS_SHARED; + + return VK_SUCCESS; +} + +VkResult +vk_sync_import_sync_file(struct vk_device *device, + struct vk_sync *sync, + int sync_file) +{ + assert(!(sync->flags & VK_SYNC_IS_TIMELINE)); + + /* Silently handle negative file descriptors in case the driver doesn't + * want to bother. + */ + if (sync_file < 0 && sync->type->signal) + return sync->type->signal(device, sync, 0); + + return sync->type->import_sync_file(device, sync, sync_file); +} + +VkResult +vk_sync_export_sync_file(struct vk_device *device, + struct vk_sync *sync, + int *sync_file) +{ + assert(!(sync->flags & VK_SYNC_IS_TIMELINE)); + return sync->type->export_sync_file(device, sync, sync_file); +} + +VkResult +vk_sync_import_win32_handle(struct vk_device *device, + struct vk_sync *sync, + void *handle, + const wchar_t *name) +{ + VkResult result = sync->type->import_win32_handle(device, sync, handle, name); + if (unlikely(result != VK_SUCCESS)) + return result; + + sync->flags |= VK_SYNC_IS_SHAREABLE | + VK_SYNC_IS_SHARED; + + return VK_SUCCESS; +} + +VkResult +vk_sync_export_win32_handle(struct vk_device *device, + struct vk_sync *sync, + void **handle) +{ + assert(sync->flags & VK_SYNC_IS_SHAREABLE); + + VkResult result = sync->type->export_win32_handle(device, sync, handle); + if (unlikely(result != VK_SUCCESS)) + return result; + + sync->flags |= VK_SYNC_IS_SHARED; + + return VK_SUCCESS; +} + +VkResult +vk_sync_set_win32_export_params(struct vk_device *device, + struct vk_sync *sync, + const void *security_attributes, + uint32_t access, + const wchar_t *name) +{ + assert(sync->flags & VK_SYNC_IS_SHARED); + + return sync->type->set_win32_export_params(device, sync, security_attributes, access, name); +} diff --git a/src/vulkan/runtime/vk_sync.h b/src/vulkan/runtime/vk_sync.h new file mode 100644 index 00000000000..15d85dc9253 --- /dev/null +++ b/src/vulkan/runtime/vk_sync.h @@ -0,0 +1,410 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_SYNC_H +#define VK_SYNC_H + +#include <stdbool.h> +#include <vulkan/vulkan_core.h> + +#include "util/macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_device; +struct vk_sync; + +enum vk_sync_features { + /** Set if a sync type supports the binary mode of operation + * + * In binary mode, a vk_sync has two modes: signaled and unsignaled. If + * it supports CPU_RESET, it can be changed from signaled to unsignaled on + * the CPU via vk_sync_reset(). If it supports CPU_SIGNAL, it can be + * changed from unsignaled to signaled on the CPU via vk_sync_signal(). + * + * Binary vk_sync types may also support WAIT_PENDING in which they have a + * third hidden pending state. Once such a vk_sync has been submitted to + * the kernel driver for signaling, it is in the pending state and remains + * there until the work is complete at which point it enters the signaled + * state. This pending state is visible across processes for shared + * vk_sync types. This is used to by the threaded submit mode to ensure + * that everything gets submitted to the kernel driver in-order. + * + * A vk_sync operates in binary mode if VK_SYNC_IS_TIMELINE is not set + * in vk_sync::flags. + */ + VK_SYNC_FEATURE_BINARY = (1 << 0), + + /** Set if a sync type supports the timeline mode of operation + * + * In timeline mode, a vk_sync has a monotonically increasing 64-bit value + * which represents most recently signaled time point. Waits are relative + * to time points. Instead of waiting for the vk_sync to enter a signaled + * state, you wait for its 64-bit value to be at least some wait value. + * + * Timeline vk_sync types can also support WAIT_PENDING. In this case, the + * wait is not for a pending state, as such, but rather for someone to have + * submitted a kernel request which will signal a time point with at least + * that value. Logically, you can think of this as having two timelines, + * the real timeline and a pending timeline which runs slightly ahead of + * the real one. As with binary vk_sync types, this is used by threaded + * submit to re-order things so that the kernel requests happen in a valid + * linear order. + * + * A vk_sync operates in timeline mode if VK_SYNC_IS_TIMELINE is set in + * vk_sync::flags. + */ + VK_SYNC_FEATURE_TIMELINE = (1 << 1), + + /** Set if this sync supports GPU waits */ + VK_SYNC_FEATURE_GPU_WAIT = (1 << 2), + + /** Set if a sync type supports multiple GPU waits on one signal state + * + * The Vulkan spec for VkSemaphore requires GPU wait and signal operations + * to have a one-to-one relationship. This formally described by saying + * that the VkSemaphore gets implicitly reset on wait. However, it is + * often useful to have well-defined multi-wait. If binary vk_sync + * supports multi-wait then any number of kernel requests can be submitted + * which wait on one signal operation. This also implies that you can + * signal twice back-to-back (there are 0 waits on the first signal). + * + * This feature only applies to binary vk_sync objects. + */ + VK_SYNC_FEATURE_GPU_MULTI_WAIT = (1 << 3), + + /** Set if a sync type supports vk_sync_wait() and vk_sync_wait_many() */ + VK_SYNC_FEATURE_CPU_WAIT = (1 << 4), + + /** Set if a sync type supports vk_sync_reset() + * + * This feature only applies to binary vk_sync objects. + */ + VK_SYNC_FEATURE_CPU_RESET = (1 << 5), + + /** Set if a sync type supports vk_sync_signal() */ + VK_SYNC_FEATURE_CPU_SIGNAL = (1 << 6), + + /** Set if sync_type::wait_many supports the VK_SYNC_WAIT_ANY bit + * + * vk_sync_wait_many() will support the bit regardless. If the sync type + * doesn't support it natively, it will be emulated. + */ + VK_SYNC_FEATURE_WAIT_ANY = (1 << 7), + + /** Set if a sync type supports the VK_SYNC_WAIT_PENDING bit + * + * See VK_SYNC_FEATURE_BINARY and VK_SYNC_FEATURE_TIMELINE for descriptions + * of what this does in each case. + */ + VK_SYNC_FEATURE_WAIT_PENDING = (1 << 8), + + /** Set if a sync type natively supports wait-before-signal + * + * If this is set then the underlying OS primitive supports submitting + * kernel requests which wait on the vk_sync before submitting a kernel + * request which would cause that wait to unblock. + */ + VK_SYNC_FEATURE_WAIT_BEFORE_SIGNAL = (1 << 9), +}; + +struct vk_sync_wait; + +enum vk_sync_wait_flags { + /** Placeholder for 0 to make vk_sync_wait() calls more clear */ + VK_SYNC_WAIT_COMPLETE = 0, + + /** If set, only wait for the vk_sync operation to be pending + * + * See VK_SYNC_FEATURE_BINARY and VK_SYNC_FEATURE_TIMELINE for descriptions + * of what this does in each case. + */ + VK_SYNC_WAIT_PENDING = (1 << 0), + + /** If set, wait for any of of the vk_sync operations to complete + * + * This is as opposed to waiting for all of them. There is no guarantee + * that vk_sync_wait_many() will return immediately after the first + * operation completes but it will make a best effort to return as soon as + * possible. + */ + VK_SYNC_WAIT_ANY = (1 << 1), +}; + +struct vk_sync_type { + /** Size of this sync type */ + size_t size; + + /** Features supported by this sync type */ + enum vk_sync_features features; + + /** Initialize a vk_sync + * + * The base vk_sync will already be initialized and the sync type set + * before this function is called. If any OS primitives need to be + * allocated, that should be done here. + */ + VkResult (*init)(struct vk_device *device, + struct vk_sync *sync, + uint64_t initial_value); + + /** Finish a vk_sync + * + * This should free any internal data stored in this vk_sync. + */ + void (*finish)(struct vk_device *device, + struct vk_sync *sync); + + /** Signal a vk_sync + * + * For non-timeline sync types, value == 0. + */ + VkResult (*signal)(struct vk_device *device, + struct vk_sync *sync, + uint64_t value); + + /** Get the timeline value for a vk_sync */ + VkResult (*get_value)(struct vk_device *device, + struct vk_sync *sync, + uint64_t *value); + + /** Reset a non-timeline vk_sync */ + VkResult (*reset)(struct vk_device *device, + struct vk_sync *sync); + + /** Moves the guts of one binary vk_sync to another + * + * This moves the current binary vk_sync event from src to dst and resets + * src. If dst contained an event, it is discarded. + * + * This is required for all binary vk_sync types that can be used for a + * semaphore wait in conjunction with real timeline semaphores. + */ + VkResult (*move)(struct vk_device *device, + struct vk_sync *dst, + struct vk_sync *src); + + /** Wait on a vk_sync + * + * For a timeline vk_sync, wait_value is the timeline value to wait for. + * This function should not return VK_SUCCESS until get_value on that + * vk_sync would return a value >= wait_value. A wait_value of zero is + * allowed in which case the wait is a no-op. For a non-timeline vk_sync, + * wait_value should be ignored. + * + * This function is optional. If the sync type needs to support CPU waits, + * at least one of wait or wait_many must be provided. If one is missing, + * it will be implemented in terms of the other. + */ + VkResult (*wait)(struct vk_device *device, + struct vk_sync *sync, + uint64_t wait_value, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns); + + /** Wait for multiple vk_sync events + * + * If VK_SYNC_WAIT_ANY is set, it will return after at least one of the + * wait events is complete instead of waiting for all of them. + * + * See wait for more details. + */ + VkResult (*wait_many)(struct vk_device *device, + uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns); + + /** Permanently imports the given FD into this vk_sync + * + * This replaces the guts of the given vk_sync with whatever is in the FD. + * In a sense, this vk_sync now aliases whatever vk_sync the FD was + * exported from. + */ + VkResult (*import_opaque_fd)(struct vk_device *device, + struct vk_sync *sync, + int fd); + + /** Export the guts of this vk_sync to an FD */ + VkResult (*export_opaque_fd)(struct vk_device *device, + struct vk_sync *sync, + int *fd); + + /** Imports a sync file into this binary vk_sync + * + * If this completes successfully, the vk_sync will now signal whenever + * the sync file signals. + * + * If sync_file == -1, the vk_sync should be signaled immediately. If + * the vk_sync_type implements signal, sync_file will never be -1. + */ + VkResult (*import_sync_file)(struct vk_device *device, + struct vk_sync *sync, + int sync_file); + + /** Exports the current binary vk_sync state as a sync file. + * + * The resulting sync file will contain the current event stored in this + * binary vk_sync must be turned into a sync file. If the vk_sync is later + * modified to contain a new event, the sync file is unaffected. + */ + VkResult (*export_sync_file)(struct vk_device *device, + struct vk_sync *sync, + int *sync_file); + + /** Permanently imports the given handle or name into this vk_sync + * + * This replaces the guts of the given vk_sync with whatever is in the object. + * In a sense, this vk_sync now aliases whatever vk_sync the handle was + * exported from. + */ + VkResult (*import_win32_handle)(struct vk_device *device, + struct vk_sync *sync, + void *handle, + const wchar_t *name); + + /** Export the guts of this vk_sync to a handle and/or name */ + VkResult (*export_win32_handle)(struct vk_device *device, + struct vk_sync *sync, + void **handle); + + /** Vulkan puts these as creation params instead of export params */ + VkResult (*set_win32_export_params)(struct vk_device *device, + struct vk_sync *sync, + const void *security_attributes, + uint32_t access, + const wchar_t *name); +}; + +enum vk_sync_flags { + /** Set if the vk_sync is a timeline */ + VK_SYNC_IS_TIMELINE = (1 << 0), + + /** Set if the vk_sync can have its payload shared */ + VK_SYNC_IS_SHAREABLE = (1 << 1), + + /** Set if the vk_sync has a shared payload */ + VK_SYNC_IS_SHARED = (1 << 2), +}; + +struct vk_sync { + const struct vk_sync_type *type; + enum vk_sync_flags flags; +}; + +/* See VkSemaphoreSubmitInfo */ +struct vk_sync_wait { + struct vk_sync *sync; + VkPipelineStageFlags2 stage_mask; + uint64_t wait_value; +}; + +/* See VkSemaphoreSubmitInfo */ +struct vk_sync_signal { + struct vk_sync *sync; + VkPipelineStageFlags2 stage_mask; + uint64_t signal_value; +}; + +VkResult MUST_CHECK vk_sync_init(struct vk_device *device, + struct vk_sync *sync, + const struct vk_sync_type *type, + enum vk_sync_flags flags, + uint64_t initial_value); + +void vk_sync_finish(struct vk_device *device, + struct vk_sync *sync); + +VkResult MUST_CHECK vk_sync_create(struct vk_device *device, + const struct vk_sync_type *type, + enum vk_sync_flags flags, + uint64_t initial_value, + struct vk_sync **sync_out); + +void vk_sync_destroy(struct vk_device *device, + struct vk_sync *sync); + +VkResult MUST_CHECK vk_sync_signal(struct vk_device *device, + struct vk_sync *sync, + uint64_t value); + +VkResult MUST_CHECK vk_sync_get_value(struct vk_device *device, + struct vk_sync *sync, + uint64_t *value); + +VkResult MUST_CHECK vk_sync_reset(struct vk_device *device, + struct vk_sync *sync); + +VkResult MUST_CHECK vk_sync_wait(struct vk_device *device, + struct vk_sync *sync, + uint64_t wait_value, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns); + +VkResult MUST_CHECK vk_sync_wait_many(struct vk_device *device, + uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns); + +VkResult MUST_CHECK vk_sync_import_opaque_fd(struct vk_device *device, + struct vk_sync *sync, + int fd); + +VkResult MUST_CHECK vk_sync_export_opaque_fd(struct vk_device *device, + struct vk_sync *sync, + int *fd); + +VkResult MUST_CHECK vk_sync_import_sync_file(struct vk_device *device, + struct vk_sync *sync, + int sync_file); + +VkResult MUST_CHECK vk_sync_export_sync_file(struct vk_device *device, + struct vk_sync *sync, + int *sync_file); + +VkResult MUST_CHECK vk_sync_import_win32_handle(struct vk_device *device, + struct vk_sync *sync, + void *handle, + const wchar_t *name); + +VkResult MUST_CHECK vk_sync_export_win32_handle(struct vk_device *device, + struct vk_sync *sync, + void **handle); + +VkResult MUST_CHECK vk_sync_set_win32_export_params(struct vk_device *device, + struct vk_sync *sync, + const void *security_attributes, + uint32_t access, + const wchar_t *name); + +VkResult MUST_CHECK vk_sync_move(struct vk_device *device, + struct vk_sync *dst, + struct vk_sync *src); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_SYNC_H */ diff --git a/src/vulkan/runtime/vk_sync_binary.c b/src/vulkan/runtime/vk_sync_binary.c new file mode 100644 index 00000000000..c10cabe348a --- /dev/null +++ b/src/vulkan/runtime/vk_sync_binary.c @@ -0,0 +1,141 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_sync_binary.h" + +#include "vk_util.h" + +static struct vk_sync_binary * +to_vk_sync_binary(struct vk_sync *sync) +{ + assert(sync->type->init == vk_sync_binary_init); + + return container_of(sync, struct vk_sync_binary, sync); +} + +VkResult +vk_sync_binary_init(struct vk_device *device, + struct vk_sync *sync, + uint64_t initial_value) +{ + struct vk_sync_binary *binary = to_vk_sync_binary(sync); + + const struct vk_sync_binary_type *btype = + container_of(binary->sync.type, struct vk_sync_binary_type, sync); + + assert(!(sync->flags & VK_SYNC_IS_TIMELINE)); + assert(!(sync->flags & VK_SYNC_IS_SHAREABLE)); + + binary->next_point = (initial_value == 0); + + return vk_sync_init(device, &binary->timeline, btype->timeline_type, + VK_SYNC_IS_TIMELINE, 0 /* initial_value */); +} + +static void +vk_sync_binary_finish(struct vk_device *device, + struct vk_sync *sync) +{ + struct vk_sync_binary *binary = to_vk_sync_binary(sync); + + vk_sync_finish(device, &binary->timeline); +} + +static VkResult +vk_sync_binary_reset(struct vk_device *device, + struct vk_sync *sync) +{ + struct vk_sync_binary *binary = to_vk_sync_binary(sync); + + binary->next_point++; + + return VK_SUCCESS; +} + +static VkResult +vk_sync_binary_signal(struct vk_device *device, + struct vk_sync *sync, + uint64_t value) +{ + struct vk_sync_binary *binary = to_vk_sync_binary(sync); + + assert(value == 0); + + return vk_sync_signal(device, &binary->timeline, binary->next_point); +} + +static VkResult +vk_sync_binary_wait_many(struct vk_device *device, + uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + if (wait_count == 0) + return VK_SUCCESS; + + STACK_ARRAY(struct vk_sync_wait, timeline_waits, wait_count); + + for (uint32_t i = 0; i < wait_count; i++) { + struct vk_sync_binary *binary = to_vk_sync_binary(waits[i].sync); + + timeline_waits[i] = (struct vk_sync_wait) { + .sync = &binary->timeline, + .stage_mask = waits[i].stage_mask, + .wait_value = binary->next_point, + }; + } + + VkResult result = vk_sync_wait_many(device, wait_count, timeline_waits, + wait_flags, abs_timeout_ns); + + STACK_ARRAY_FINISH(timeline_waits); + + return result; +} + +struct vk_sync_binary_type +vk_sync_binary_get_type(const struct vk_sync_type *timeline_type) +{ + assert(timeline_type->features & VK_SYNC_FEATURE_TIMELINE); + + return (struct vk_sync_binary_type) { + .sync = { + .size = offsetof(struct vk_sync_binary, timeline) + + timeline_type->size, + .features = VK_SYNC_FEATURE_BINARY | + VK_SYNC_FEATURE_GPU_WAIT | + VK_SYNC_FEATURE_CPU_WAIT | + VK_SYNC_FEATURE_CPU_RESET | + VK_SYNC_FEATURE_CPU_SIGNAL | + VK_SYNC_FEATURE_WAIT_ANY | + VK_SYNC_FEATURE_WAIT_PENDING, + .init = vk_sync_binary_init, + .finish = vk_sync_binary_finish, + .reset = vk_sync_binary_reset, + .signal = vk_sync_binary_signal, + .wait_many = vk_sync_binary_wait_many, + }, + .timeline_type = timeline_type, + }; +} diff --git a/src/vulkan/runtime/vk_sync_binary.h b/src/vulkan/runtime/vk_sync_binary.h new file mode 100644 index 00000000000..8a4ceebd77d --- /dev/null +++ b/src/vulkan/runtime/vk_sync_binary.h @@ -0,0 +1,79 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_SYNC_BINARY_H +#define VK_SYNC_BINARY_H + +#include "vk_sync.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_sync_binary_type { + struct vk_sync_type sync; + const struct vk_sync_type *timeline_type; +}; + +struct vk_sync_binary_type +vk_sync_binary_get_type(const struct vk_sync_type *timeline_type); + +/** Implements a binary vk_sync type on top of a timeline vk_sync + * + * This is useful when targeting Windows APIs such as D3D12 which only have + * timelines and have no concept of a binary synchronization object. Because + * binary vk_sync emulation requires tracking additional state (the next time + * point), fences and semaphores created from this type cannot support any of + * the sharing APIs. + */ +struct vk_sync_binary { + struct vk_sync sync; + + uint64_t next_point; + + struct vk_sync timeline; +}; + +VkResult vk_sync_binary_init(struct vk_device *device, + struct vk_sync *sync, + uint64_t initial_value); + +static inline bool +vk_sync_type_is_vk_sync_binary(const struct vk_sync_type *type) +{ + return type->init == vk_sync_binary_init; +} + +static inline struct vk_sync_binary * +vk_sync_as_binary(struct vk_sync *sync) +{ + if (!vk_sync_type_is_vk_sync_binary(sync->type)) + return NULL; + + return container_of(sync, struct vk_sync_binary, sync); +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_TIMELINE_H */ diff --git a/src/vulkan/runtime/vk_sync_dummy.c b/src/vulkan/runtime/vk_sync_dummy.c new file mode 100644 index 00000000000..1cab72f491b --- /dev/null +++ b/src/vulkan/runtime/vk_sync_dummy.c @@ -0,0 +1,59 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_sync_dummy.h" + +static VkResult +vk_sync_dummy_init(struct vk_device *device, + struct vk_sync *sync, + uint64_t initial_value) +{ + return VK_SUCCESS; +} + +static void +vk_sync_dummy_finish(struct vk_device *device, + struct vk_sync *sync) +{ } + +static VkResult +vk_sync_dummy_wait_many(struct vk_device *device, + uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + return VK_SUCCESS; +} + +const struct vk_sync_type vk_sync_dummy_type = { + .size = sizeof(struct vk_sync), + .features = VK_SYNC_FEATURE_BINARY | + VK_SYNC_FEATURE_GPU_WAIT | + VK_SYNC_FEATURE_CPU_WAIT | + VK_SYNC_FEATURE_WAIT_ANY | + VK_SYNC_FEATURE_WAIT_PENDING, + .init = vk_sync_dummy_init, + .finish = vk_sync_dummy_finish, + .wait_many = vk_sync_dummy_wait_many, +}; diff --git a/src/vulkan/runtime/vk_sync_dummy.h b/src/vulkan/runtime/vk_sync_dummy.h new file mode 100644 index 00000000000..55c6f169d30 --- /dev/null +++ b/src/vulkan/runtime/vk_sync_dummy.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_SYNC_DUMMY_H +#define VK_SYNC_DUMMY_H + +#include "vk_sync.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const struct vk_sync_type vk_sync_dummy_type; + +static inline bool +vk_sync_type_is_dummy(const struct vk_sync_type *type) +{ + return type == &vk_sync_dummy_type; +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_SYNC_H */ diff --git a/src/vulkan/runtime/vk_sync_timeline.c b/src/vulkan/runtime/vk_sync_timeline.c new file mode 100644 index 00000000000..d2d712daa84 --- /dev/null +++ b/src/vulkan/runtime/vk_sync_timeline.c @@ -0,0 +1,541 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_sync_timeline.h" + +#include <inttypes.h> + +#include "util/os_time.h" +#include "util/timespec.h" + +#include "vk_alloc.h" +#include "vk_device.h" +#include "vk_log.h" + +static struct vk_sync_timeline * +to_vk_sync_timeline(struct vk_sync *sync) +{ + assert(sync->type->init == vk_sync_timeline_init); + + return container_of(sync, struct vk_sync_timeline, sync); +} + +static void +vk_sync_timeline_type_validate(const struct vk_sync_timeline_type *ttype) +{ + ASSERTED const enum vk_sync_features req_features = + VK_SYNC_FEATURE_BINARY | + VK_SYNC_FEATURE_GPU_WAIT | + VK_SYNC_FEATURE_GPU_MULTI_WAIT | + VK_SYNC_FEATURE_CPU_WAIT | + VK_SYNC_FEATURE_CPU_RESET; + + assert(!(req_features & ~ttype->point_sync_type->features)); +} + +VkResult +vk_sync_timeline_init(struct vk_device *device, + struct vk_sync *sync, + uint64_t initial_value) +{ + struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync); + int ret; + + ASSERTED const struct vk_sync_timeline_type *ttype = + container_of(timeline->sync.type, struct vk_sync_timeline_type, sync); + vk_sync_timeline_type_validate(ttype); + + ret = mtx_init(&timeline->mutex, mtx_plain); + if (ret != thrd_success) + return vk_errorf(device, VK_ERROR_UNKNOWN, "mtx_init failed"); + + ret = cnd_init(&timeline->cond); + if (ret != thrd_success) { + mtx_destroy(&timeline->mutex); + return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_init failed"); + } + + timeline->highest_past = + timeline->highest_pending = initial_value; + list_inithead(&timeline->pending_points); + list_inithead(&timeline->free_points); + + return VK_SUCCESS; +} + +static void +vk_sync_timeline_finish(struct vk_device *device, + struct vk_sync *sync) +{ + struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync); + + list_for_each_entry_safe(struct vk_sync_timeline_point, point, + &timeline->free_points, link) { + list_del(&point->link); + vk_sync_finish(device, &point->sync); + vk_free(&device->alloc, point); + } + list_for_each_entry_safe(struct vk_sync_timeline_point, point, + &timeline->pending_points, link) { + list_del(&point->link); + vk_sync_finish(device, &point->sync); + vk_free(&device->alloc, point); + } + + cnd_destroy(&timeline->cond); + mtx_destroy(&timeline->mutex); +} + +static struct vk_sync_timeline_point * +vk_sync_timeline_first_point(struct vk_sync_timeline *timeline) +{ + struct vk_sync_timeline_point *point = + list_first_entry(&timeline->pending_points, + struct vk_sync_timeline_point, link); + + assert(point->value <= timeline->highest_pending); + assert(point->value > timeline->highest_past); + + return point; +} + +static VkResult +vk_sync_timeline_gc_locked(struct vk_device *device, + struct vk_sync_timeline *timeline, + bool drain); + +static VkResult +vk_sync_timeline_alloc_point_locked(struct vk_device *device, + struct vk_sync_timeline *timeline, + uint64_t value, + struct vk_sync_timeline_point **point_out) +{ + struct vk_sync_timeline_point *point; + VkResult result; + + result = vk_sync_timeline_gc_locked(device, timeline, false); + if (unlikely(result != VK_SUCCESS)) + return result; + + if (list_is_empty(&timeline->free_points)) { + const struct vk_sync_timeline_type *ttype = + container_of(timeline->sync.type, struct vk_sync_timeline_type, sync); + const struct vk_sync_type *point_sync_type = ttype->point_sync_type; + + size_t size = offsetof(struct vk_sync_timeline_point, sync) + + point_sync_type->size; + + point = vk_zalloc(&device->alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!point) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + point->timeline = timeline; + + result = vk_sync_init(device, &point->sync, point_sync_type, + 0 /* flags */, 0 /* initial_value */); + if (unlikely(result != VK_SUCCESS)) { + vk_free(&device->alloc, point); + return result; + } + } else { + point = list_first_entry(&timeline->free_points, + struct vk_sync_timeline_point, link); + + if (point->sync.type->reset) { + result = vk_sync_reset(device, &point->sync); + if (unlikely(result != VK_SUCCESS)) + return result; + } + + list_del(&point->link); + } + + point->value = value; + *point_out = point; + + return VK_SUCCESS; +} + +VkResult +vk_sync_timeline_alloc_point(struct vk_device *device, + struct vk_sync_timeline *timeline, + uint64_t value, + struct vk_sync_timeline_point **point_out) +{ + VkResult result; + + mtx_lock(&timeline->mutex); + result = vk_sync_timeline_alloc_point_locked(device, timeline, value, point_out); + mtx_unlock(&timeline->mutex); + + return result; +} + +static void +vk_sync_timeline_point_free_locked(struct vk_sync_timeline *timeline, + struct vk_sync_timeline_point *point) +{ + assert(point->refcount == 0 && !point->pending); + list_add(&point->link, &timeline->free_points); +} + +void +vk_sync_timeline_point_free(struct vk_device *device, + struct vk_sync_timeline_point *point) +{ + struct vk_sync_timeline *timeline = point->timeline; + + mtx_lock(&timeline->mutex); + vk_sync_timeline_point_free_locked(timeline, point); + mtx_unlock(&timeline->mutex); +} + +static void +vk_sync_timeline_point_ref(struct vk_sync_timeline_point *point) +{ + point->refcount++; +} + +static void +vk_sync_timeline_point_unref(struct vk_sync_timeline *timeline, + struct vk_sync_timeline_point *point) +{ + assert(point->refcount > 0); + point->refcount--; + if (point->refcount == 0 && !point->pending) + vk_sync_timeline_point_free_locked(timeline, point); +} + +static void +vk_sync_timeline_point_complete(struct vk_sync_timeline *timeline, + struct vk_sync_timeline_point *point) +{ + if (!point->pending) + return; + + assert(timeline->highest_past < point->value); + timeline->highest_past = point->value; + + point->pending = false; + list_del(&point->link); + + if (point->refcount == 0) + vk_sync_timeline_point_free_locked(timeline, point); +} + +static VkResult +vk_sync_timeline_gc_locked(struct vk_device *device, + struct vk_sync_timeline *timeline, + bool drain) +{ + list_for_each_entry_safe(struct vk_sync_timeline_point, point, + &timeline->pending_points, link) { + /* timeline->higest_pending is only incremented once submission has + * happened. If this point has a greater serial, it means the point + * hasn't been submitted yet. + */ + if (point->value > timeline->highest_pending) + return VK_SUCCESS; + + /* If someone is waiting on this time point, consider it busy and don't + * try to recycle it. There's a slim possibility that it's no longer + * busy by the time we look at it but we would be recycling it out from + * under a waiter and that can lead to weird races. + * + * We walk the list in-order so if this time point is still busy so is + * every following time point + */ + assert(point->refcount >= 0); + if (point->refcount > 0 && !drain) + return VK_SUCCESS; + + /* Garbage collect any signaled point. */ + VkResult result = vk_sync_wait(device, &point->sync, 0, + VK_SYNC_WAIT_COMPLETE, + 0 /* abs_timeout_ns */); + if (result == VK_TIMEOUT) { + /* We walk the list in-order so if this time point is still busy so + * is every following time point + */ + return VK_SUCCESS; + } else if (result != VK_SUCCESS) { + return result; + } + + vk_sync_timeline_point_complete(timeline, point); + } + + return VK_SUCCESS; +} + +VkResult +vk_sync_timeline_point_install(struct vk_device *device, + struct vk_sync_timeline_point *point) +{ + struct vk_sync_timeline *timeline = point->timeline; + + mtx_lock(&timeline->mutex); + + assert(point->value > timeline->highest_pending); + timeline->highest_pending = point->value; + + assert(point->refcount == 0); + point->pending = true; + list_addtail(&point->link, &timeline->pending_points); + + int ret = cnd_broadcast(&timeline->cond); + + mtx_unlock(&timeline->mutex); + + if (ret == thrd_error) + return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_broadcast failed"); + + return VK_SUCCESS; +} + +static VkResult +vk_sync_timeline_get_point_locked(struct vk_device *device, + struct vk_sync_timeline *timeline, + uint64_t wait_value, + struct vk_sync_timeline_point **point_out) +{ + if (timeline->highest_past >= wait_value) { + /* Nothing to wait on */ + *point_out = NULL; + return VK_SUCCESS; + } + + list_for_each_entry(struct vk_sync_timeline_point, point, + &timeline->pending_points, link) { + if (point->value >= wait_value) { + vk_sync_timeline_point_ref(point); + *point_out = point; + return VK_SUCCESS; + } + } + + return VK_NOT_READY; +} + +VkResult +vk_sync_timeline_get_point(struct vk_device *device, + struct vk_sync_timeline *timeline, + uint64_t wait_value, + struct vk_sync_timeline_point **point_out) +{ + mtx_lock(&timeline->mutex); + VkResult result = vk_sync_timeline_get_point_locked(device, timeline, + wait_value, point_out); + mtx_unlock(&timeline->mutex); + + return result; +} + +void +vk_sync_timeline_point_release(struct vk_device *device, + struct vk_sync_timeline_point *point) +{ + struct vk_sync_timeline *timeline = point->timeline; + + mtx_lock(&timeline->mutex); + vk_sync_timeline_point_unref(timeline, point); + mtx_unlock(&timeline->mutex); +} + +static VkResult +vk_sync_timeline_signal_locked(struct vk_device *device, + struct vk_sync_timeline *timeline, + uint64_t value) +{ + VkResult result = vk_sync_timeline_gc_locked(device, timeline, true); + if (unlikely(result != VK_SUCCESS)) + return result; + + if (unlikely(value <= timeline->highest_past)) { + return vk_device_set_lost(device, "Timeline values must only ever " + "strictly increase."); + } + + assert(list_is_empty(&timeline->pending_points)); + assert(timeline->highest_pending == timeline->highest_past); + timeline->highest_pending = timeline->highest_past = value; + + int ret = cnd_broadcast(&timeline->cond); + if (ret == thrd_error) + return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_broadcast failed"); + + return VK_SUCCESS; +} + +static VkResult +vk_sync_timeline_signal(struct vk_device *device, + struct vk_sync *sync, + uint64_t value) +{ + struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync); + + mtx_lock(&timeline->mutex); + VkResult result = vk_sync_timeline_signal_locked(device, timeline, value); + mtx_unlock(&timeline->mutex); + + return result; +} + +static VkResult +vk_sync_timeline_get_value(struct vk_device *device, + struct vk_sync *sync, + uint64_t *value) +{ + struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync); + + mtx_lock(&timeline->mutex); + VkResult result = vk_sync_timeline_gc_locked(device, timeline, true); + mtx_unlock(&timeline->mutex); + + if (result != VK_SUCCESS) + return result; + + *value = timeline->highest_past; + + return VK_SUCCESS; +} + +static VkResult +vk_sync_timeline_wait_locked(struct vk_device *device, + struct vk_sync_timeline *timeline, + uint64_t wait_value, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + /* Wait on the queue_submit condition variable until the timeline has a + * time point pending that's at least as high as wait_value. + */ + uint64_t now_ns = os_time_get_nano(); + while (timeline->highest_pending < wait_value) { + if (now_ns >= abs_timeout_ns) + return VK_TIMEOUT; + + int ret; + if (abs_timeout_ns >= INT64_MAX) { + /* Common infinite wait case */ + ret = cnd_wait(&timeline->cond, &timeline->mutex); + } else { + /* This is really annoying. The C11 threads API uses CLOCK_REALTIME + * while all our absolute timeouts are in CLOCK_MONOTONIC. Best + * thing we can do is to convert and hope the system admin doesn't + * change the time out from under us. + */ + uint64_t rel_timeout_ns = abs_timeout_ns - now_ns; + + struct timespec now_ts, abs_timeout_ts; + timespec_get(&now_ts, TIME_UTC); + if (timespec_add_nsec(&abs_timeout_ts, &now_ts, rel_timeout_ns)) { + /* Overflowed; may as well be infinite */ + ret = cnd_wait(&timeline->cond, &timeline->mutex); + } else { + ret = cnd_timedwait(&timeline->cond, &timeline->mutex, + &abs_timeout_ts); + } + } + if (ret == thrd_error) + return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_timedwait failed"); + + /* We don't trust the timeout condition on cnd_timedwait() because of + * the potential clock issues caused by using CLOCK_REALTIME. Instead, + * update now_ns, go back to the top of the loop, and re-check. + */ + now_ns = os_time_get_nano(); + } + + if (wait_flags & VK_SYNC_WAIT_PENDING) + return VK_SUCCESS; + + VkResult result = vk_sync_timeline_gc_locked(device, timeline, false); + if (result != VK_SUCCESS) + return result; + + while (timeline->highest_past < wait_value) { + struct vk_sync_timeline_point *point = vk_sync_timeline_first_point(timeline); + + /* Drop the lock while we wait. */ + vk_sync_timeline_point_ref(point); + mtx_unlock(&timeline->mutex); + + result = vk_sync_wait(device, &point->sync, 0, + VK_SYNC_WAIT_COMPLETE, + abs_timeout_ns); + + /* Pick the mutex back up */ + mtx_lock(&timeline->mutex); + vk_sync_timeline_point_unref(timeline, point); + + /* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */ + if (result != VK_SUCCESS) + return result; + + vk_sync_timeline_point_complete(timeline, point); + } + + return VK_SUCCESS; +} + +static VkResult +vk_sync_timeline_wait(struct vk_device *device, + struct vk_sync *sync, + uint64_t wait_value, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync); + + mtx_lock(&timeline->mutex); + VkResult result = vk_sync_timeline_wait_locked(device, timeline, + wait_value, wait_flags, + abs_timeout_ns); + mtx_unlock(&timeline->mutex); + + return result; +} + +struct vk_sync_timeline_type +vk_sync_timeline_get_type(const struct vk_sync_type *point_sync_type) +{ + return (struct vk_sync_timeline_type) { + .sync = { + .size = sizeof(struct vk_sync_timeline), + .features = VK_SYNC_FEATURE_TIMELINE | + VK_SYNC_FEATURE_GPU_WAIT | + VK_SYNC_FEATURE_CPU_WAIT | + VK_SYNC_FEATURE_CPU_SIGNAL | + VK_SYNC_FEATURE_WAIT_ANY | + VK_SYNC_FEATURE_WAIT_PENDING, + .init = vk_sync_timeline_init, + .finish = vk_sync_timeline_finish, + .signal = vk_sync_timeline_signal, + .get_value = vk_sync_timeline_get_value, + .wait = vk_sync_timeline_wait, + }, + .point_sync_type = point_sync_type, + }; +} diff --git a/src/vulkan/runtime/vk_sync_timeline.h b/src/vulkan/runtime/vk_sync_timeline.h new file mode 100644 index 00000000000..d1fcf8c12ea --- /dev/null +++ b/src/vulkan/runtime/vk_sync_timeline.h @@ -0,0 +1,133 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_SYNC_TIMELINE_H +#define VK_SYNC_TIMELINE_H + +#include "c11/threads.h" +#include "util/list.h" +#include "util/macros.h" + +#include "vk_sync.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_sync_timeline_type { + struct vk_sync_type sync; + + /* Type of each individual time point */ + const struct vk_sync_type *point_sync_type; +}; + +struct vk_sync_timeline_type +vk_sync_timeline_get_type(const struct vk_sync_type *point_sync_type); + +struct vk_sync_timeline_point { + struct vk_sync_timeline *timeline; + + struct list_head link; + + uint64_t value; + + int refcount; + bool pending; + + struct vk_sync sync; +}; + +/** Implements a timeline vk_sync type on top of a binary vk_sync + * + * This is used for emulating VK_KHR_timeline_semaphores for implementations + * whose kernel driver do not yet support timeline syncobj. Since it's a + * requirement for Vulkan 1.2, it's useful to have an emulation like this. + * + * The driver should never see a vk_sync_timeline object. Instead, converting + * from vk_sync_timeline to a binary vk_sync for a particular time point is + * handled by common code. All a driver needs to do is declare its preferred + * binary vk_sync_type for emulation as follows: + * + * const struct vk_sync_type anv_bo_sync_type = { + * ... + * }; + * VK_DECL_TIMELINE_TYPE(anv_bo_timeline_sync_type, &anv_bo_sync_type); + * + * and then anv_bo_timeline_sync_type.sync can be used as a sync type to + * provide timelines. + */ +struct vk_sync_timeline { + struct vk_sync sync; + + mtx_t mutex; + cnd_t cond; + + uint64_t highest_past; + uint64_t highest_pending; + + struct list_head pending_points; + struct list_head free_points; +}; + +VkResult vk_sync_timeline_init(struct vk_device *device, + struct vk_sync *sync, + uint64_t initial_value); + +VkResult vk_sync_timeline_alloc_point(struct vk_device *device, + struct vk_sync_timeline *timeline, + uint64_t value, + struct vk_sync_timeline_point **point_out); + +void vk_sync_timeline_point_free(struct vk_device *device, + struct vk_sync_timeline_point *point); + +VkResult vk_sync_timeline_point_install(struct vk_device *device, + struct vk_sync_timeline_point *point); + +VkResult vk_sync_timeline_get_point(struct vk_device *device, + struct vk_sync_timeline *timeline, + uint64_t wait_value, + struct vk_sync_timeline_point **point_out); + +void vk_sync_timeline_point_release(struct vk_device *device, + struct vk_sync_timeline_point *point); + +static inline bool +vk_sync_type_is_vk_sync_timeline(const struct vk_sync_type *type) +{ + return type->init == vk_sync_timeline_init; +} + +static inline struct vk_sync_timeline * +vk_sync_as_timeline(struct vk_sync *sync) +{ + if (!vk_sync_type_is_vk_sync_timeline(sync->type)) + return NULL; + + return container_of(sync, struct vk_sync_timeline, sync); +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_SYNC_TIMELINE_H */ diff --git a/src/vulkan/runtime/vk_synchronization.c b/src/vulkan/runtime/vk_synchronization.c new file mode 100644 index 00000000000..701474164e4 --- /dev/null +++ b/src/vulkan/runtime/vk_synchronization.c @@ -0,0 +1,473 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_synchronization.h" + +#include "vk_alloc.h" +#include "vk_command_buffer.h" +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_queue.h" +#include "vk_util.h" +#include "../wsi/wsi_common.h" + +VkAccessFlags2 +vk_filter_src_access_flags2(VkPipelineStageFlags2 stages, + VkAccessFlags2 access) +{ + const VkPipelineStageFlags2 all_write_access = + vk_write_access2_for_pipeline_stage_flags2(stages); + + if (access & VK_ACCESS_2_MEMORY_WRITE_BIT) + access |= all_write_access; + + if (access & VK_ACCESS_2_SHADER_WRITE_BIT) + access |= VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT; + + /* We only care about write access in src flags */ + return access & all_write_access; +} + +VkAccessFlags2 +vk_filter_dst_access_flags2(VkPipelineStageFlags2 stages, + VkAccessFlags2 access) +{ + const VkPipelineStageFlags2 all_read_access = + vk_read_access2_for_pipeline_stage_flags2(stages); + + if (access & VK_ACCESS_2_MEMORY_READ_BIT) + access |= all_read_access; + + if (access & VK_ACCESS_2_SHADER_READ_BIT) + access |= VK_ACCESS_2_SHADER_SAMPLED_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR; + + /* We only care about read access in dst flags */ + return access & all_read_access; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdWriteTimestamp( + VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t query) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + struct vk_device *device = cmd_buffer->base.device; + + device->dispatch_table.CmdWriteTimestamp2(commandBuffer, + (VkPipelineStageFlags2) pipelineStage, + queryPool, + query); +} + +static VkMemoryBarrier2 +upgrade_memory_barrier(const VkMemoryBarrier *barrier, + VkPipelineStageFlags2 src_stage_mask2, + VkPipelineStageFlags2 dst_stage_mask2) +{ + return (VkMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .pNext = barrier->pNext, + .srcStageMask = src_stage_mask2, + .srcAccessMask = (VkAccessFlags2) barrier->srcAccessMask, + .dstStageMask = dst_stage_mask2, + .dstAccessMask = (VkAccessFlags2) barrier->dstAccessMask, + }; +} + +static VkBufferMemoryBarrier2 +upgrade_buffer_memory_barrier(const VkBufferMemoryBarrier *barrier, + VkPipelineStageFlags2 src_stage_mask2, + VkPipelineStageFlags2 dst_stage_mask2) +{ + return (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .pNext = barrier->pNext, + .srcStageMask = src_stage_mask2, + .srcAccessMask = (VkAccessFlags2) barrier->srcAccessMask, + .dstStageMask = dst_stage_mask2, + .dstAccessMask = (VkAccessFlags2) barrier->dstAccessMask, + .srcQueueFamilyIndex = barrier->srcQueueFamilyIndex, + .dstQueueFamilyIndex = barrier->dstQueueFamilyIndex, + .buffer = barrier->buffer, + .offset = barrier->offset, + .size = barrier->size, + }; +} + +static VkImageMemoryBarrier2 +upgrade_image_memory_barrier(const VkImageMemoryBarrier *barrier, + VkPipelineStageFlags2 src_stage_mask2, + VkPipelineStageFlags2 dst_stage_mask2) +{ + return (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = barrier->pNext, + .srcStageMask = src_stage_mask2, + .srcAccessMask = (VkAccessFlags2) barrier->srcAccessMask, + .dstStageMask = dst_stage_mask2, + .dstAccessMask = (VkAccessFlags2) barrier->dstAccessMask, + .oldLayout = barrier->oldLayout, + .newLayout = barrier->newLayout, + .srcQueueFamilyIndex = barrier->srcQueueFamilyIndex, + .dstQueueFamilyIndex = barrier->dstQueueFamilyIndex, + .image = barrier->image, + .subresourceRange = barrier->subresourceRange, + }; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdPipelineBarrier( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + VkDependencyFlags dependencyFlags, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + struct vk_device *device = cmd_buffer->base.device; + + STACK_ARRAY(VkMemoryBarrier2, memory_barriers, memoryBarrierCount); + STACK_ARRAY(VkBufferMemoryBarrier2, buffer_barriers, bufferMemoryBarrierCount); + STACK_ARRAY(VkImageMemoryBarrier2, image_barriers, imageMemoryBarrierCount); + + VkPipelineStageFlags2 src_stage_mask2 = (VkPipelineStageFlags2) srcStageMask; + VkPipelineStageFlags2 dst_stage_mask2 = (VkPipelineStageFlags2) dstStageMask; + + for (uint32_t i = 0; i < memoryBarrierCount; i++) { + memory_barriers[i] = upgrade_memory_barrier(&pMemoryBarriers[i], + src_stage_mask2, + dst_stage_mask2); + } + for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { + buffer_barriers[i] = upgrade_buffer_memory_barrier(&pBufferMemoryBarriers[i], + src_stage_mask2, + dst_stage_mask2); + } + for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { + image_barriers[i] = upgrade_image_memory_barrier(&pImageMemoryBarriers[i], + src_stage_mask2, + dst_stage_mask2); + } + + VkDependencyInfo dep_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .memoryBarrierCount = memoryBarrierCount, + .pMemoryBarriers = memory_barriers, + .bufferMemoryBarrierCount = bufferMemoryBarrierCount, + .pBufferMemoryBarriers = buffer_barriers, + .imageMemoryBarrierCount = imageMemoryBarrierCount, + .pImageMemoryBarriers = image_barriers, + }; + + device->dispatch_table.CmdPipelineBarrier2(commandBuffer, &dep_info); + + STACK_ARRAY_FINISH(memory_barriers); + STACK_ARRAY_FINISH(buffer_barriers); + STACK_ARRAY_FINISH(image_barriers); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + struct vk_device *device = cmd_buffer->base.device; + + VkMemoryBarrier2 mem_barrier = { + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = (VkPipelineStageFlags2) stageMask, + .dstStageMask = (VkPipelineStageFlags2) stageMask, + }; + VkDependencyInfo dep_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .memoryBarrierCount = 1, + .pMemoryBarriers = &mem_barrier, + }; + + device->dispatch_table.CmdSetEvent2(commandBuffer, event, &dep_info); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdResetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + struct vk_device *device = cmd_buffer->base.device; + + device->dispatch_table.CmdResetEvent2(commandBuffer, + event, + (VkPipelineStageFlags2) stageMask); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdWaitEvents( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + struct vk_device *device = cmd_buffer->base.device; + + if (eventCount == 0) + return; + + STACK_ARRAY(VkDependencyInfo, deps, eventCount); + + /* Note that dstStageMask and srcStageMask in the CmdWaitEvent2() call + * are the same. This is to match the CmdSetEvent2() call from + * vk_common_CmdSetEvent(). The actual src->dst stage barrier will + * happen as part of the CmdPipelineBarrier() call below. + */ + VkMemoryBarrier2 stage_barrier = { + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = srcStageMask, + .dstStageMask = srcStageMask, + }; + + for (uint32_t i = 0; i < eventCount; i++) { + deps[i] = (VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .memoryBarrierCount = 1, + .pMemoryBarriers = &stage_barrier, + }; + } + device->dispatch_table.CmdWaitEvents2(commandBuffer, eventCount, pEvents, deps); + + STACK_ARRAY_FINISH(deps); + + /* Setting dependency to 0 because : + * + * - For BY_REGION_BIT and VIEW_LOCAL_BIT, events are not allowed inside a + * render pass so these don't apply. + * + * - For DEVICE_GROUP_BIT, we have the following bit of spec text: + * + * "Semaphore and event dependencies are device-local and only + * execute on the one physical device that performs the + * dependency." + */ + const VkDependencyFlags dep_flags = 0; + + device->dispatch_table.CmdPipelineBarrier(commandBuffer, + srcStageMask, destStageMask, + dep_flags, + memoryBarrierCount, pMemoryBarriers, + bufferMemoryBarrierCount, pBufferMemoryBarriers, + imageMemoryBarrierCount, pImageMemoryBarriers); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdWriteBufferMarkerAMD( + VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + uint32_t marker) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + struct vk_device *device = cmd_buffer->base.device; + + device->dispatch_table.CmdWriteBufferMarker2AMD(commandBuffer, + (VkPipelineStageFlags2) pipelineStage, + dstBuffer, + dstOffset, + marker); +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_GetQueueCheckpointDataNV( + VkQueue queue, + uint32_t* pCheckpointDataCount, + VkCheckpointDataNV* pCheckpointData) +{ + unreachable("Entrypoint not implemented"); +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_QueueSubmit( + VkQueue _queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence fence) +{ + VK_FROM_HANDLE(vk_queue, queue, _queue); + struct vk_device *device = queue->base.device; + + STACK_ARRAY(VkSubmitInfo2, submit_info_2, submitCount); + STACK_ARRAY(VkPerformanceQuerySubmitInfoKHR, perf_query_submit_info, submitCount); + STACK_ARRAY(struct wsi_memory_signal_submit_info, wsi_mem_submit_info, submitCount); + + uint32_t n_wait_semaphores = 0; + uint32_t n_command_buffers = 0; + uint32_t n_signal_semaphores = 0; + for (uint32_t s = 0; s < submitCount; s++) { + n_wait_semaphores += pSubmits[s].waitSemaphoreCount; + n_command_buffers += pSubmits[s].commandBufferCount; + n_signal_semaphores += pSubmits[s].signalSemaphoreCount; + } + + STACK_ARRAY(VkSemaphoreSubmitInfo, wait_semaphores, n_wait_semaphores); + STACK_ARRAY(VkCommandBufferSubmitInfo, command_buffers, n_command_buffers); + STACK_ARRAY(VkSemaphoreSubmitInfo, signal_semaphores, n_signal_semaphores); + + n_wait_semaphores = 0; + n_command_buffers = 0; + n_signal_semaphores = 0; + + for (uint32_t s = 0; s < submitCount; s++) { + const VkTimelineSemaphoreSubmitInfo *timeline_info = + vk_find_struct_const(pSubmits[s].pNext, + TIMELINE_SEMAPHORE_SUBMIT_INFO); + const uint64_t *wait_values = NULL; + const uint64_t *signal_values = NULL; + + if (timeline_info && timeline_info->waitSemaphoreValueCount) { + /* From the Vulkan 1.3.204 spec: + * + * VUID-VkSubmitInfo-pNext-03240 + * + * "If the pNext chain of this structure includes a VkTimelineSemaphoreSubmitInfo structure + * and any element of pSignalSemaphores was created with a VkSemaphoreType of + * VK_SEMAPHORE_TYPE_TIMELINE, then its signalSemaphoreValueCount member must equal + * signalSemaphoreCount" + */ + assert(timeline_info->waitSemaphoreValueCount == pSubmits[s].waitSemaphoreCount); + wait_values = timeline_info->pWaitSemaphoreValues; + } + + if (timeline_info && timeline_info->signalSemaphoreValueCount) { + /* From the Vulkan 1.3.204 spec: + * + * VUID-VkSubmitInfo-pNext-03241 + * + * "If the pNext chain of this structure includes a VkTimelineSemaphoreSubmitInfo structure + * and any element of pWaitSemaphores was created with a VkSemaphoreType of + * VK_SEMAPHORE_TYPE_TIMELINE, then its waitSemaphoreValueCount member must equal + * waitSemaphoreCount" + */ + assert(timeline_info->signalSemaphoreValueCount == pSubmits[s].signalSemaphoreCount); + signal_values = timeline_info->pSignalSemaphoreValues; + } + + const VkDeviceGroupSubmitInfo *group_info = + vk_find_struct_const(pSubmits[s].pNext, DEVICE_GROUP_SUBMIT_INFO); + + for (uint32_t i = 0; i < pSubmits[s].waitSemaphoreCount; i++) { + wait_semaphores[n_wait_semaphores + i] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = pSubmits[s].pWaitSemaphores[i], + .value = wait_values ? wait_values[i] : 0, + .stageMask = pSubmits[s].pWaitDstStageMask[i], + .deviceIndex = group_info ? group_info->pWaitSemaphoreDeviceIndices[i] : 0, + }; + } + for (uint32_t i = 0; i < pSubmits[s].commandBufferCount; i++) { + command_buffers[n_command_buffers + i] = (VkCommandBufferSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = pSubmits[s].pCommandBuffers[i], + .deviceMask = group_info ? group_info->pCommandBufferDeviceMasks[i] : 0, + }; + } + for (uint32_t i = 0; i < pSubmits[s].signalSemaphoreCount; i++) { + signal_semaphores[n_signal_semaphores + i] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = pSubmits[s].pSignalSemaphores[i], + .value = signal_values ? signal_values[i] : 0, + .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .deviceIndex = group_info ? group_info->pSignalSemaphoreDeviceIndices[i] : 0, + }; + } + + const VkProtectedSubmitInfo *protected_info = + vk_find_struct_const(pSubmits[s].pNext, PROTECTED_SUBMIT_INFO); + + submit_info_2[s] = (VkSubmitInfo2) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .flags = ((protected_info && protected_info->protectedSubmit) ? + VK_SUBMIT_PROTECTED_BIT : 0), + .waitSemaphoreInfoCount = pSubmits[s].waitSemaphoreCount, + .pWaitSemaphoreInfos = &wait_semaphores[n_wait_semaphores], + .commandBufferInfoCount = pSubmits[s].commandBufferCount, + .pCommandBufferInfos = &command_buffers[n_command_buffers], + .signalSemaphoreInfoCount = pSubmits[s].signalSemaphoreCount, + .pSignalSemaphoreInfos = &signal_semaphores[n_signal_semaphores], + }; + + const VkPerformanceQuerySubmitInfoKHR *query_info = + vk_find_struct_const(pSubmits[s].pNext, + PERFORMANCE_QUERY_SUBMIT_INFO_KHR); + if (query_info) { + perf_query_submit_info[s] = *query_info; + perf_query_submit_info[s].pNext = NULL; + __vk_append_struct(&submit_info_2[s], &perf_query_submit_info[s]); + } + + const struct wsi_memory_signal_submit_info *mem_signal_info = + vk_find_struct_const(pSubmits[s].pNext, + WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA); + if (mem_signal_info) { + wsi_mem_submit_info[s] = *mem_signal_info; + wsi_mem_submit_info[s].pNext = NULL; + __vk_append_struct(&submit_info_2[s], &wsi_mem_submit_info[s]); + } + + n_wait_semaphores += pSubmits[s].waitSemaphoreCount; + n_command_buffers += pSubmits[s].commandBufferCount; + n_signal_semaphores += pSubmits[s].signalSemaphoreCount; + } + + VkResult result = device->dispatch_table.QueueSubmit2(_queue, + submitCount, + submit_info_2, + fence); + + STACK_ARRAY_FINISH(wait_semaphores); + STACK_ARRAY_FINISH(command_buffers); + STACK_ARRAY_FINISH(signal_semaphores); + STACK_ARRAY_FINISH(submit_info_2); + STACK_ARRAY_FINISH(perf_query_submit_info); + STACK_ARRAY_FINISH(wsi_mem_submit_info); + + return result; +} diff --git a/src/vulkan/runtime/vk_synchronization.h b/src/vulkan/runtime/vk_synchronization.h new file mode 100644 index 00000000000..5c3fd1f4992 --- /dev/null +++ b/src/vulkan/runtime/vk_synchronization.h @@ -0,0 +1,109 @@ +/* + * Copyright © 2023 Collabora, Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_SYNCHRONIZATION_H +#define VK_SYNCHRONIZATION_H + +#include <vulkan/vulkan_core.h> + +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +static inline bool +vk_pipeline_stage_flags2_has_graphics_shader(VkPipelineStageFlags2 stages) +{ + return stages & (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT | + VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | + VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT); +} + +static inline bool +vk_pipeline_stage_flags2_has_compute_shader(VkPipelineStageFlags2 stages) +{ + return stages & (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT); +} + +/** Expands pipeline stage group flags + * + * Some stages like VK_PIPELINE_SHADER_STAGE_2_ALL_GRAPHICS_BIT represent more + * than one stage. This helper expands any such bits out to the full set of + * individual stages bits they represent. + * + * Note: This helper does not handle BOTTOM/TOP_OF_PIPE. You probably want to + * use vk_expand_src/dst_stage_flags2() instead. + */ +VkPipelineStageFlags2 +vk_expand_pipeline_stage_flags2(VkPipelineStageFlags2 stages); + +static inline VkPipelineStageFlags2 +vk_expand_src_stage_flags2(VkPipelineStageFlags2 stages) +{ + if (stages & VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT) + stages |= VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + + return vk_expand_pipeline_stage_flags2(stages); +} + +static inline VkPipelineStageFlags2 +vk_expand_dst_stage_flags2(VkPipelineStageFlags2 stages) +{ + if (stages & VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) + stages |= VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + + return vk_expand_pipeline_stage_flags2(stages); +} + +/** Returns the set of read accesses allowed in the given stages */ +VkAccessFlags2 +vk_read_access2_for_pipeline_stage_flags2(VkPipelineStageFlags2 stages); + +/** Returns the set of write accesses allowed in the given stages */ +VkAccessFlags2 +vk_write_access2_for_pipeline_stage_flags2(VkPipelineStageFlags2 stages); + +VkAccessFlags2 +vk_filter_src_access_flags2(VkPipelineStageFlags2 stages, + VkAccessFlags2 access); + +VkAccessFlags2 +vk_filter_dst_access_flags2(VkPipelineStageFlags2 stages, + VkAccessFlags2 access); + +#ifdef __cplusplus +} +#endif + +#endif /* VK_SYNCHRONIZATION_H */ diff --git a/src/vulkan/runtime/vk_texcompress_astc.c b/src/vulkan/runtime/vk_texcompress_astc.c new file mode 100644 index 00000000000..a11bdca89f9 --- /dev/null +++ b/src/vulkan/runtime/vk_texcompress_astc.c @@ -0,0 +1,637 @@ +/* Copyright (c) 2017-2023 Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "vk_texcompress_astc.h" +#include "util/texcompress_astc_luts_wrap.h" +#include "vk_alloc.h" +#include "vk_buffer.h" +#include "vk_device.h" +#include "vk_format.h" +#include "vk_image.h" +#include "vk_physical_device.h" + +/* type_indexes_mask bits are set/clear for support memory type index as per + * struct VkPhysicalDeviceMemoryProperties.memoryTypes[] */ +static uint32_t +get_mem_type_index(struct vk_device *device, uint32_t type_indexes_mask, + VkMemoryPropertyFlags mem_property) +{ + const struct vk_physical_device_dispatch_table *disp = &device->physical->dispatch_table; + VkPhysicalDevice _phy_device = vk_physical_device_to_handle(device->physical); + + VkPhysicalDeviceMemoryProperties2 props2 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2, + .pNext = NULL, + }; + disp->GetPhysicalDeviceMemoryProperties2(_phy_device, &props2); + + for (uint32_t i = 0; i < props2.memoryProperties.memoryTypeCount; i++) { + if ((type_indexes_mask & (1 << i)) && + ((props2.memoryProperties.memoryTypes[i].propertyFlags & mem_property) == mem_property)) { + return i; + } + } + + return -1; +} + +static VkResult +vk_create_buffer(struct vk_device *device, VkAllocationCallbacks *allocator, + VkDeviceSize size, VkMemoryPropertyFlags mem_prop_flags, + VkBufferUsageFlags usage_flags, VkBuffer *vk_buf, + VkDeviceMemory *vk_mem) +{ + VkResult result; + VkDevice _device = vk_device_to_handle(device); + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + + VkBufferCreateInfo buffer_create_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = size, + .usage = usage_flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + result = + disp->CreateBuffer(_device, &buffer_create_info, allocator, vk_buf); + if (unlikely(result != VK_SUCCESS)) + return result; + + VkBufferMemoryRequirementsInfo2 mem_req_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, + .buffer = *vk_buf, + }; + VkMemoryRequirements2 mem_req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + }; + disp->GetBufferMemoryRequirements2(_device, &mem_req_info, &mem_req); + + uint32_t mem_type_index = get_mem_type_index( + device, mem_req.memoryRequirements.memoryTypeBits, mem_prop_flags); + if (mem_type_index == -1) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = mem_req.memoryRequirements.size, + .memoryTypeIndex = mem_type_index, + }; + result = disp->AllocateMemory(_device, &alloc_info, allocator, vk_mem); + if (unlikely(result != VK_SUCCESS)) + return result; + + disp->BindBufferMemory(_device, *vk_buf, *vk_mem, 0); + + return result; +} + +static VkResult +create_buffer_view(struct vk_device *device, VkAllocationCallbacks *allocator, + VkBufferView *buf_view, VkBuffer buf, VkFormat format, VkDeviceSize size, + VkDeviceSize offset) +{ + VkResult result; + VkDevice _device = vk_device_to_handle(device); + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + + VkBufferViewCreateInfo buffer_view_create_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .buffer = buf, + .format = format, + .offset = offset, + .range = size, + }; + result = disp->CreateBufferView(_device, &buffer_view_create_info, + allocator, buf_view); + return result; +} + +static uint8_t +get_partition_table_index(VkFormat format) +{ + switch (format) { + case VK_FORMAT_ASTC_4x4_UNORM_BLOCK: + case VK_FORMAT_ASTC_4x4_SRGB_BLOCK: + return 0; + case VK_FORMAT_ASTC_5x4_UNORM_BLOCK: + case VK_FORMAT_ASTC_5x4_SRGB_BLOCK: + return 1; + case VK_FORMAT_ASTC_5x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_5x5_SRGB_BLOCK: + return 2; + case VK_FORMAT_ASTC_6x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_6x5_SRGB_BLOCK: + return 3; + case VK_FORMAT_ASTC_6x6_UNORM_BLOCK: + case VK_FORMAT_ASTC_6x6_SRGB_BLOCK: + return 4; + case VK_FORMAT_ASTC_8x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_8x5_SRGB_BLOCK: + return 5; + case VK_FORMAT_ASTC_8x6_UNORM_BLOCK: + case VK_FORMAT_ASTC_8x6_SRGB_BLOCK: + return 6; + case VK_FORMAT_ASTC_8x8_UNORM_BLOCK: + case VK_FORMAT_ASTC_8x8_SRGB_BLOCK: + return 7; + case VK_FORMAT_ASTC_10x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x5_SRGB_BLOCK: + return 8; + case VK_FORMAT_ASTC_10x6_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x6_SRGB_BLOCK: + return 9; + case VK_FORMAT_ASTC_10x8_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x8_SRGB_BLOCK: + return 10; + case VK_FORMAT_ASTC_10x10_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x10_SRGB_BLOCK: + return 11; + case VK_FORMAT_ASTC_12x10_UNORM_BLOCK: + case VK_FORMAT_ASTC_12x10_SRGB_BLOCK: + return 12; + case VK_FORMAT_ASTC_12x12_UNORM_BLOCK: + case VK_FORMAT_ASTC_12x12_SRGB_BLOCK: + return 13; + default: + unreachable("bad astc format\n"); + return 0; + } +} + +static VkResult +astc_prepare_buffer(struct vk_device *device, + struct vk_texcompress_astc_state *astc, + VkAllocationCallbacks *allocator, + VkDeviceSize minTexelBufferOffsetAlignment, + uint8_t *single_buf_ptr, + VkDeviceSize *single_buf_size) +{ + VkResult result; + astc_decoder_lut_holder astc_lut_holder; + VkDeviceSize offset = 0; + + _mesa_init_astc_decoder_luts(&astc_lut_holder); + + const astc_decoder_lut *luts[] = { + &astc_lut_holder.color_endpoint, + &astc_lut_holder.color_endpoint_unquant, + &astc_lut_holder.weights, + &astc_lut_holder.weights_unquant, + &astc_lut_holder.trits_quints, + }; + + for (unsigned i = 0; i < ARRAY_SIZE(luts); i++) { + offset = align(offset, minTexelBufferOffsetAlignment); + if (single_buf_ptr) { + memcpy(single_buf_ptr + offset, luts[i]->data, luts[i]->size_B); + result = create_buffer_view(device, allocator, &astc->luts_buf_view[i], astc->luts_buf, + vk_format_from_pipe_format(luts[i]->format), luts[i]->size_B, + offset); + if (result != VK_SUCCESS) + return result; + } + offset += luts[i]->size_B; + } + + const VkFormat formats[] = { + VK_FORMAT_ASTC_4x4_UNORM_BLOCK, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK, + }; + + for (uint32_t i = 0; i < ARRAY_SIZE(formats); i++) { + unsigned lut_width; + unsigned lut_height; + const void *lut_data = _mesa_get_astc_decoder_partition_table( + vk_format_get_blockwidth(formats[i]), + vk_format_get_blockheight(formats[i]), + &lut_width, &lut_height); + const unsigned lut_size = lut_width * lut_height; + + offset = align(offset, minTexelBufferOffsetAlignment); + if (single_buf_ptr) { + memcpy(single_buf_ptr + offset, lut_data, lut_width * lut_height); + + result = create_buffer_view(device, allocator, &astc->partition_tbl_buf_view[i], + astc->luts_buf, VK_FORMAT_R8_UINT, lut_width * lut_height, + offset); + if (result != VK_SUCCESS) + return result; + } + offset += lut_size; + } + + *single_buf_size = offset; + return result; +} + +static VkResult +create_fill_all_luts_vulkan(struct vk_device *device, + VkAllocationCallbacks *allocator, + struct vk_texcompress_astc_state *astc) +{ + VkResult result; + VkDevice _device = vk_device_to_handle(device); + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkPhysicalDevice _phy_device = vk_physical_device_to_handle(device->physical); + const struct vk_physical_device_dispatch_table *phy_disp = &device->physical->dispatch_table; + VkDeviceSize single_buf_size; + uint8_t *single_buf_ptr; + + VkPhysicalDeviceProperties2 phy_dev_prop = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + .pNext = NULL, + }; + phy_disp->GetPhysicalDeviceProperties2(_phy_device, &phy_dev_prop); + + /* get the single_buf_size */ + result = astc_prepare_buffer(device, astc, allocator, + phy_dev_prop.properties.limits.minTexelBufferOffsetAlignment, + NULL, &single_buf_size); + + /* create gpu buffer for all the luts */ + result = vk_create_buffer(device, allocator, single_buf_size, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, + &astc->luts_buf, &astc->luts_mem); + if (unlikely(result != VK_SUCCESS)) + return result; + + disp->MapMemory(_device, astc->luts_mem, 0, VK_WHOLE_SIZE, 0, (void*)&single_buf_ptr); + + /* fill all the luts and create views */ + result = astc_prepare_buffer(device, astc, allocator, + phy_dev_prop.properties.limits.minTexelBufferOffsetAlignment, + single_buf_ptr, &single_buf_size); + + disp->UnmapMemory(_device, astc->luts_mem); + return result; +} + +static VkResult +create_layout(struct vk_device *device, VkAllocationCallbacks *allocator, + struct vk_texcompress_astc_state *astc) +{ + VkResult result; + VkDevice _device = vk_device_to_handle(device); + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + + VkDescriptorSetLayoutBinding bindings[] = { + { + .binding = 0, /* OutputImage2DArray */ + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL, + }, + { + .binding = 1, /* PayloadInput2DArray */ + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL, + }, + { + .binding = 2, /* LUTRemainingBitsToEndpointQuantizer */ + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL, + }, + { + .binding = 3, /* LUTEndpointUnquantize */ + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL, + }, + { + .binding = 4, /* LUTWeightQuantizer */ + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL, + }, + { + .binding = 5, /* LUTWeightUnquantize */ + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL, + }, + { + .binding = 6, /* LUTTritQuintDecode */ + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL, + }, + { + .binding = 7, /* LUTPartitionTable */ + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL, + }, + }; + + VkDescriptorSetLayoutCreateInfo ds_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = ARRAY_SIZE(bindings), + .pBindings = bindings, + }; + + result = disp->CreateDescriptorSetLayout(_device, &ds_create_info, + allocator, &astc->ds_layout); + if (result != VK_SUCCESS) + goto fail; + + VkPipelineLayoutCreateInfo pl_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &astc->ds_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20}, + }; + result = disp->CreatePipelineLayout(_device, &pl_create_info, allocator, + &astc->p_layout); +fail: + return result; +} + +static const uint32_t astc_spv[] = { +#include "astc_spv.h" +}; + +static VkResult +vk_astc_create_shader_module(struct vk_device *device, + VkAllocationCallbacks *allocator, + struct vk_texcompress_astc_state *astc) +{ + VkDevice _device = vk_device_to_handle(device); + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + + VkShaderModuleCreateInfo shader_module_create_info = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .codeSize = sizeof(astc_spv), + .pCode = astc_spv, + }; + + return disp->CreateShaderModule(_device, &shader_module_create_info, + allocator, &astc->shader_module); +} + +static VkResult +create_astc_decode_pipeline(struct vk_device *device, + VkAllocationCallbacks *allocator, + struct vk_texcompress_astc_state *astc, + VkPipelineCache pipeline_cache, VkFormat format) +{ + VkResult result; + VkDevice _device = vk_device_to_handle(device); + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkPipeline pipeline; + uint8_t t_i; + + t_i = get_partition_table_index(format); + + uint32_t special_data[3] = { + vk_format_get_blockwidth(format), + vk_format_get_blockheight(format), + true, + }; + VkSpecializationMapEntry special_map_entry[3] = {{ + .constantID = 0, + .offset = 0, + .size = 4, + }, + { + .constantID = 1, + .offset = 4, + .size = 4, + }, + { + .constantID = 2, + .offset = 8, + .size = 4, + }}; + + VkSpecializationInfo specialization_info = { + .mapEntryCount = 3, + .pMapEntries = special_map_entry, + .dataSize = 12, + .pData = special_data, + }; + + /* compute shader */ + VkPipelineShaderStageCreateInfo pipeline_shader_stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = astc->shader_module, + .pName = "main", + .pSpecializationInfo = &specialization_info, + }; + + VkComputePipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = pipeline_shader_stage, + .flags = 0, + .layout = astc->p_layout, + }; + + result = disp->CreateComputePipelines( + _device, pipeline_cache, 1, &vk_pipeline_info, allocator, &pipeline); + if (result != VK_SUCCESS) + return result; + + astc->pipeline[t_i] = pipeline; + astc->pipeline_mask |= (1 << t_i); + + return result; +} + +VkPipeline +vk_texcompress_astc_get_decode_pipeline(struct vk_device *device, VkAllocationCallbacks *allocator, + struct vk_texcompress_astc_state *astc, VkPipelineCache pipeline_cache, + VkFormat format) +{ + VkResult result; + uint8_t t_i = get_partition_table_index(format); + + simple_mtx_lock(&astc->mutex); + + if (astc->pipeline[t_i]) + goto unlock; + + if (!astc->shader_module) { + result = vk_astc_create_shader_module(device, allocator, astc); + if (result != VK_SUCCESS) + goto unlock; + } + + create_astc_decode_pipeline(device, allocator, astc, pipeline_cache, format); + +unlock: + simple_mtx_unlock(&astc->mutex); + return astc->pipeline[t_i]; +} + +static inline void +fill_desc_image_info_struct(VkDescriptorImageInfo *info, VkImageView img_view, + VkImageLayout img_layout) +{ + info->sampler = VK_NULL_HANDLE; + info->imageView = img_view; + info->imageLayout = img_layout; +} + +static inline void +fill_write_descriptor_set_image(VkWriteDescriptorSet *set, uint8_t bind_i, + VkDescriptorType desc_type, VkDescriptorImageInfo *image_info) +{ + set->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + set->pNext = NULL; + set->dstSet = VK_NULL_HANDLE; + set->dstBinding = bind_i; + set->dstArrayElement = 0; + set->descriptorCount = 1; + set->descriptorType = desc_type; + set->pImageInfo = image_info; + set->pBufferInfo = NULL; + set->pTexelBufferView = NULL; +} + +static inline void +fill_write_descriptor_set_uniform_texel(VkWriteDescriptorSet *set, + uint8_t bind_i, + VkBufferView *buf_view) +{ + set->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + set->pNext = NULL; + set->dstSet = VK_NULL_HANDLE; + set->dstBinding = bind_i; + set->dstArrayElement = 0; + set->descriptorCount = 1; + set->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + set->pImageInfo = NULL; + set->pBufferInfo = NULL; + set->pTexelBufferView = buf_view; +} + +void +vk_texcompress_astc_fill_write_descriptor_sets(struct vk_texcompress_astc_state *astc, + struct vk_texcompress_astc_write_descriptor_set *set, + VkImageView src_img_view, VkImageLayout src_img_layout, + VkImageView dst_img_view, + VkFormat format) +{ + unsigned desc_i; + + desc_i = 0; + fill_desc_image_info_struct(&set->dst_desc_image_info, dst_img_view, VK_IMAGE_LAYOUT_GENERAL); + fill_write_descriptor_set_image(&set->descriptor_set[desc_i], desc_i, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &set->dst_desc_image_info); + desc_i++; + fill_desc_image_info_struct(&set->src_desc_image_info, src_img_view, src_img_layout); + fill_write_descriptor_set_image(&set->descriptor_set[desc_i], desc_i, + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &set->src_desc_image_info); + /* fill luts descriptor */ + desc_i++; + for (unsigned i = 0; i < VK_TEXCOMPRESS_ASTC_NUM_LUTS; i++) { + fill_write_descriptor_set_uniform_texel(&set->descriptor_set[desc_i + i], desc_i + i, + &astc->luts_buf_view[i]); + } + desc_i += VK_TEXCOMPRESS_ASTC_NUM_LUTS; + uint8_t t_i = get_partition_table_index(format); + fill_write_descriptor_set_uniform_texel(&set->descriptor_set[desc_i], desc_i, + &astc->partition_tbl_buf_view[t_i]); + desc_i++; + assert(desc_i == ARRAY_SIZE(set->descriptor_set)); +} + +VkResult +vk_texcompress_astc_init(struct vk_device *device, VkAllocationCallbacks *allocator, + VkPipelineCache pipeline_cache, + struct vk_texcompress_astc_state **astc) +{ + VkResult result; + + /* astc memory to be freed as part of vk_astc_decode_finish() */ + *astc = vk_zalloc(allocator, sizeof(struct vk_texcompress_astc_state), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*astc == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + simple_mtx_init(&(*astc)->mutex, mtx_plain); + + result = create_fill_all_luts_vulkan(device, allocator, *astc); + if (result != VK_SUCCESS) + goto fail; + + result = create_layout(device, allocator, *astc); + +fail: + return result; +} + +void +vk_texcompress_astc_finish(struct vk_device *device, + VkAllocationCallbacks *allocator, + struct vk_texcompress_astc_state *astc) +{ + VkDevice _device = vk_device_to_handle(device); + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + + while (astc->pipeline_mask) { + uint8_t t_i = u_bit_scan(&astc->pipeline_mask); + disp->DestroyPipeline(_device, astc->pipeline[t_i], allocator); + } + + disp->DestroyPipelineLayout(_device, astc->p_layout, allocator); + disp->DestroyShaderModule(_device, astc->shader_module, allocator); + disp->DestroyDescriptorSetLayout(_device, astc->ds_layout, allocator); + + for (unsigned i = 0; i < VK_TEXCOMPRESS_ASTC_NUM_LUTS; i++) + disp->DestroyBufferView(_device, astc->luts_buf_view[i], allocator); + + for (unsigned i = 0; i < VK_TEXCOMPRESS_ASTC_NUM_PARTITION_TABLES; i++) + disp->DestroyBufferView(_device, astc->partition_tbl_buf_view[i], allocator); + + disp->DestroyBuffer(_device, astc->luts_buf, allocator); + disp->FreeMemory(_device, astc->luts_mem, allocator); + + vk_free(allocator, astc); +} diff --git a/src/vulkan/runtime/vk_texcompress_astc.h b/src/vulkan/runtime/vk_texcompress_astc.h new file mode 100644 index 00000000000..e307af5c84e --- /dev/null +++ b/src/vulkan/runtime/vk_texcompress_astc.h @@ -0,0 +1,121 @@ +/* Copyright (c) 2017-2023 Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VK_TEXCOMPRESS_ASTC_H +#define VK_TEXCOMPRESS_ASTC_H + +#include "vk_device.h" + +/* luts order matching astc glsl shader below, + * 0 - color endpoint + * 1 - color endpoint unquant + * 2 - weights + * 3 - weights unquant + * 4 - trits quints + */ +#define VK_TEXCOMPRESS_ASTC_NUM_LUTS 5 +#define VK_TEXCOMPRESS_ASTC_NUM_PARTITION_TABLES 14 +#define VK_TEXCOMPRESS_ASTC_WRITE_DESC_SET_COUNT 8 + +struct vk_texcompress_astc_state { + /* single buffer is allocated for all luts */ + VkDeviceMemory luts_mem; + VkBuffer luts_buf; + + VkBufferView luts_buf_view[VK_TEXCOMPRESS_ASTC_NUM_LUTS]; + VkBufferView partition_tbl_buf_view[VK_TEXCOMPRESS_ASTC_NUM_PARTITION_TABLES]; + + simple_mtx_t mutex; + VkDescriptorSetLayout ds_layout; + VkPipelineLayout p_layout; + VkPipeline pipeline[VK_TEXCOMPRESS_ASTC_NUM_PARTITION_TABLES]; + uint32_t pipeline_mask; + VkShaderModule shader_module; +}; + +struct vk_texcompress_astc_write_descriptor_set { + VkWriteDescriptorSet descriptor_set[VK_TEXCOMPRESS_ASTC_WRITE_DESC_SET_COUNT]; + VkDescriptorImageInfo dst_desc_image_info; + VkDescriptorImageInfo src_desc_image_info; +}; + +void +vk_texcompress_astc_fill_write_descriptor_sets(struct vk_texcompress_astc_state *astc, + struct vk_texcompress_astc_write_descriptor_set *set, + VkImageView src_img_view, VkImageLayout src_img_layout, + VkImageView dst_img_view, + VkFormat format); +VkPipeline vk_texcompress_astc_get_decode_pipeline(struct vk_device *device, + VkAllocationCallbacks *allocator, + struct vk_texcompress_astc_state *astc, + VkPipelineCache pipeline_cache, + VkFormat format); +VkResult vk_texcompress_astc_init(struct vk_device *device, + VkAllocationCallbacks *allocator, + VkPipelineCache pipeline_cache, + struct vk_texcompress_astc_state **astc); +void vk_texcompress_astc_finish(struct vk_device *device, + VkAllocationCallbacks *allocator, + struct vk_texcompress_astc_state *astc); + +static inline VkFormat +vk_texcompress_astc_emulation_format(VkFormat format) +{ + /* TODO: From VK_EXT_astc_Decode_mode spec, VK_FORMAT_R16G16B16A16_SFLOAT is the default + * option. VK_FORMAT_R8G8B8A8_UNORM is only acceptable image quality option. + */ + switch (format) { + case VK_FORMAT_ASTC_4x4_UNORM_BLOCK: + case VK_FORMAT_ASTC_5x4_UNORM_BLOCK: + case VK_FORMAT_ASTC_5x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_6x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_6x6_UNORM_BLOCK: + case VK_FORMAT_ASTC_8x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_8x6_UNORM_BLOCK: + case VK_FORMAT_ASTC_8x8_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x6_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x8_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x10_UNORM_BLOCK: + case VK_FORMAT_ASTC_12x10_UNORM_BLOCK: + case VK_FORMAT_ASTC_12x12_UNORM_BLOCK: + return VK_FORMAT_R8G8B8A8_UNORM; + case VK_FORMAT_ASTC_4x4_SRGB_BLOCK: + case VK_FORMAT_ASTC_5x4_SRGB_BLOCK: + case VK_FORMAT_ASTC_5x5_SRGB_BLOCK: + case VK_FORMAT_ASTC_6x5_SRGB_BLOCK: + case VK_FORMAT_ASTC_6x6_SRGB_BLOCK: + case VK_FORMAT_ASTC_8x5_SRGB_BLOCK: + case VK_FORMAT_ASTC_8x6_SRGB_BLOCK: + case VK_FORMAT_ASTC_8x8_SRGB_BLOCK: + case VK_FORMAT_ASTC_10x5_SRGB_BLOCK: + case VK_FORMAT_ASTC_10x6_SRGB_BLOCK: + case VK_FORMAT_ASTC_10x8_SRGB_BLOCK: + case VK_FORMAT_ASTC_10x10_SRGB_BLOCK: + case VK_FORMAT_ASTC_12x10_SRGB_BLOCK: + case VK_FORMAT_ASTC_12x12_SRGB_BLOCK: + return VK_FORMAT_R8G8B8A8_SRGB; + default: + return VK_FORMAT_UNDEFINED; + } +} + +#endif /* VK_TEXCOMPRESS_ASTC_H */ diff --git a/src/vulkan/runtime/vk_texcompress_etc2.c b/src/vulkan/runtime/vk_texcompress_etc2.c new file mode 100644 index 00000000000..558d91e95dd --- /dev/null +++ b/src/vulkan/runtime/vk_texcompress_etc2.c @@ -0,0 +1,565 @@ +/* + * Copyright 2023 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "vk_texcompress_etc2.h" + +#include "compiler/nir/nir_builder.h" +#include "vk_shader_module.h" + +/* Based on + * https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/etc2.comp + * https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/eac.comp + * + * With some differences: + * - Use the vk format to do all the settings. + * - Combine the ETC2 and EAC shaders. + * - Since we combined the above, reuse the function for the ETC2 A8 component. + * - the EAC shader doesn't do SNORM correctly, so this has that fixed. + */ + +static nir_def * +flip_endian(nir_builder *b, nir_def *src, unsigned cnt) +{ + nir_def *v[2]; + for (unsigned i = 0; i < cnt; ++i) { + nir_def *intermediate[4]; + nir_def *chan = cnt == 1 ? src : nir_channel(b, src, i); + for (unsigned j = 0; j < 4; ++j) + intermediate[j] = nir_ubfe_imm(b, chan, 8 * j, 8); + v[i] = nir_ior(b, nir_ior(b, nir_ishl_imm(b, intermediate[0], 24), nir_ishl_imm(b, intermediate[1], 16)), + nir_ior(b, nir_ishl_imm(b, intermediate[2], 8), nir_ishl_imm(b, intermediate[3], 0))); + } + return cnt == 1 ? v[0] : nir_vec(b, v, cnt); +} + +static nir_def * +etc1_color_modifier_lookup(nir_builder *b, nir_def *x, nir_def *y) +{ + const unsigned table[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}}; + nir_def *upper = nir_ieq_imm(b, y, 1); + nir_def *result = NULL; + for (unsigned i = 0; i < 8; ++i) { + nir_def *tmp = nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0])); + if (result) + result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result); + else + result = tmp; + } + return result; +} + +static nir_def * +etc2_distance_lookup(nir_builder *b, nir_def *x) +{ + const unsigned table[8] = {3, 6, 11, 16, 23, 32, 41, 64}; + nir_def *result = NULL; + for (unsigned i = 0; i < 8; ++i) { + if (result) + result = nir_bcsel(b, nir_ieq_imm(b, x, i), nir_imm_int(b, table[i]), result); + else + result = nir_imm_int(b, table[i]); + } + return result; +} + +static nir_def * +etc1_alpha_modifier_lookup(nir_builder *b, nir_def *x, nir_def *y) +{ + const unsigned table[16] = {0xe852, 0xc962, 0xc741, 0xc531, 0xb752, 0xa862, 0xa763, 0xa742, + 0x9751, 0x9741, 0x9731, 0x9641, 0x9632, 0x9210, 0x8753, 0x8642}; + nir_def *result = NULL; + for (unsigned i = 0; i < 16; ++i) { + nir_def *tmp = nir_imm_int(b, table[i]); + if (result) + result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result); + else + result = tmp; + } + return nir_ubfe(b, result, nir_imul_imm(b, y, 4), nir_imm_int(b, 4)); +} + +static nir_def * +etc_extend(nir_builder *b, nir_def *v, int bits) +{ + if (bits == 4) + return nir_imul_imm(b, v, 0x11); + return nir_ior(b, nir_ishl_imm(b, v, 8 - bits), nir_ushr_imm(b, v, bits - (8 - bits))); +} + +static nir_def * +decode_etc2_alpha(struct nir_builder *b, nir_def *alpha_payload, nir_def *linear_pixel, bool eac, nir_def *is_signed) +{ + alpha_payload = flip_endian(b, alpha_payload, 2); + nir_def *alpha_x = nir_channel(b, alpha_payload, 1); + nir_def *alpha_y = nir_channel(b, alpha_payload, 0); + nir_def *bit_offset = nir_isub_imm(b, 45, nir_imul_imm(b, linear_pixel, 3)); + nir_def *base = nir_ubfe_imm(b, alpha_y, 24, 8); + nir_def *multiplier = nir_ubfe_imm(b, alpha_y, 20, 4); + nir_def *table = nir_ubfe_imm(b, alpha_y, 16, 4); + + if (eac) { + nir_def *signed_base = nir_ibfe_imm(b, alpha_y, 24, 8); + signed_base = nir_imul_imm(b, signed_base, 8); + base = nir_iadd_imm(b, nir_imul_imm(b, base, 8), 4); + base = nir_bcsel(b, is_signed, signed_base, base); + multiplier = nir_imax(b, nir_imul_imm(b, multiplier, 8), nir_imm_int(b, 1)); + } + + nir_def *lsb_index = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x), + nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2)); + bit_offset = nir_iadd_imm(b, bit_offset, 2); + nir_def *msb = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x), + nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1)); + nir_def *mod = nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1)); + nir_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier)); + + nir_def *low_bound = nir_imm_int(b, 0); + nir_def *high_bound = nir_imm_int(b, 255); + nir_def *final_mult = nir_imm_float(b, 1 / 255.0); + if (eac) { + low_bound = nir_bcsel(b, is_signed, nir_imm_int(b, -1023), low_bound); + high_bound = nir_bcsel(b, is_signed, nir_imm_int(b, 1023), nir_imm_int(b, 2047)); + final_mult = nir_bcsel(b, is_signed, nir_imm_float(b, 1 / 1023.0), nir_imm_float(b, 1 / 2047.0)); + } + + return nir_fmul(b, nir_i2f32(b, nir_iclamp(b, a, low_bound, high_bound)), final_mult); +} + +static nir_def * +get_global_ids(nir_builder *b, unsigned num_components) +{ + unsigned mask = BITFIELD_MASK(num_components); + + nir_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask); + nir_def *block_ids = nir_channels(b, nir_load_workgroup_id(b), mask); + nir_def *block_size = + nir_channels(b, + nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1], + b->shader->info.workgroup_size[2], 0), + mask); + + return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids); +} + +static nir_shader * +etc2_build_shader(struct vk_device *dev, const struct nir_shader_compiler_options *nir_options) +{ + const struct glsl_type *sampler_type_2d = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_UINT); + const struct glsl_type *sampler_type_3d = glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_UINT); + const struct glsl_type *img_type_2d = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT); + const struct glsl_type *img_type_3d = glsl_image_type(GLSL_SAMPLER_DIM_3D, false, GLSL_TYPE_FLOAT); + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, nir_options, "meta_decode_etc"); + b.shader->info.workgroup_size[0] = 8; + b.shader->info.workgroup_size[1] = 8; + + nir_variable *input_img_2d = nir_variable_create(b.shader, nir_var_uniform, sampler_type_2d, "s_tex_2d"); + input_img_2d->data.descriptor_set = 0; + input_img_2d->data.binding = 0; + + nir_variable *input_img_3d = nir_variable_create(b.shader, nir_var_uniform, sampler_type_3d, "s_tex_3d"); + input_img_3d->data.descriptor_set = 0; + input_img_3d->data.binding = 0; + + nir_variable *output_img_2d = nir_variable_create(b.shader, nir_var_image, img_type_2d, "out_img_2d"); + output_img_2d->data.descriptor_set = 0; + output_img_2d->data.binding = 1; + + nir_variable *output_img_3d = nir_variable_create(b.shader, nir_var_image, img_type_3d, "out_img_3d"); + output_img_3d->data.descriptor_set = 0; + output_img_3d->data.binding = 1; + + nir_def *global_id = get_global_ids(&b, 3); + + nir_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16); + nir_def *consts2 = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4); + nir_def *offset = nir_channels(&b, consts, 7); + nir_def *format = nir_channel(&b, consts, 3); + nir_def *image_type = nir_channel(&b, consts2, 0); + nir_def *is_3d = nir_ieq_imm(&b, image_type, VK_IMAGE_TYPE_3D); + nir_def *coord = nir_iadd(&b, global_id, offset); + nir_def *src_coord = nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2), + nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2)); + + nir_variable *payload_var = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload"); + nir_push_if(&b, is_3d); + { + nir_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_3d), src_coord, nir_imm_int(&b, 0)); + nir_store_var(&b, payload_var, color, 0xf); + } + nir_push_else(&b, NULL); + { + nir_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_2d), src_coord, nir_imm_int(&b, 0)); + nir_store_var(&b, payload_var, color, 0xf); + } + nir_pop_if(&b, NULL); + + nir_def *pixel_coord = nir_iand_imm(&b, nir_channels(&b, coord, 3), 3); + nir_def *linear_pixel = + nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, pixel_coord, 0), 4), nir_channel(&b, pixel_coord, 1)); + + nir_def *payload = nir_load_var(&b, payload_var); + nir_variable *color = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color"); + nir_store_var(&b, color, nir_imm_vec4(&b, 1.0, 0.0, 0.0, 1.0), 0xf); + nir_push_if(&b, nir_ilt_imm(&b, format, VK_FORMAT_EAC_R11_UNORM_BLOCK)); + { + nir_def *alpha_bits_8 = nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK); + nir_def *alpha_bits_1 = nir_iand(&b, nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK), + nir_ilt_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK)); + + nir_def *color_payload = + nir_bcsel(&b, alpha_bits_8, nir_channels(&b, payload, 0xC), nir_channels(&b, payload, 3)); + color_payload = flip_endian(&b, color_payload, 2); + nir_def *color_y = nir_channel(&b, color_payload, 0); + nir_def *color_x = nir_channel(&b, color_payload, 1); + nir_def *flip = nir_test_mask(&b, color_y, 1); + nir_def *subblock = + nir_ushr_imm(&b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)), 1); + + nir_variable *punchthrough = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough"); + nir_def *punchthrough_init = nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2))); + nir_store_var(&b, punchthrough, punchthrough_init, 0x1); + + nir_variable *etc1_compat = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat"); + nir_store_var(&b, etc1_compat, nir_imm_false(&b), 0x1); + + nir_variable *alpha_result = + nir_variable_create(b.shader, nir_var_shader_temp, glsl_float_type(), "alpha_result"); + nir_push_if(&b, alpha_bits_8); + { + nir_store_var(&b, alpha_result, decode_etc2_alpha(&b, nir_channels(&b, payload, 3), linear_pixel, false, NULL), + 1); + } + nir_push_else(&b, NULL); + { + nir_store_var(&b, alpha_result, nir_imm_float(&b, 1.0), 1); + } + nir_pop_if(&b, NULL); + + const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3); + nir_variable *rgb_result = nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "rgb_result"); + nir_variable *base_rgb = nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb"); + nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 255, 0, 0), 0x7); + + nir_def *msb = nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2); + nir_def *lsb = nir_iand_imm(&b, nir_ushr(&b, color_x, linear_pixel), 1); + + nir_push_if(&b, nir_iand(&b, nir_inot(&b, alpha_bits_1), nir_inot(&b, nir_test_mask(&b, color_y, 2)))); + { + nir_store_var(&b, etc1_compat, nir_imm_true(&b), 1); + nir_def *tmp[3]; + for (unsigned i = 0; i < 3; ++i) + tmp[i] = etc_extend( + &b, + nir_iand_imm(&b, nir_ushr(&b, color_y, nir_isub_imm(&b, 28 - 8 * i, nir_imul_imm(&b, subblock, 4))), + 0xf), + 4); + nir_store_var(&b, base_rgb, nir_vec(&b, tmp, 3), 0x7); + } + nir_push_else(&b, NULL); + { + nir_def *rb = nir_ubfe_imm(&b, color_y, 27, 5); + nir_def *rd = nir_ibfe_imm(&b, color_y, 24, 3); + nir_def *gb = nir_ubfe_imm(&b, color_y, 19, 5); + nir_def *gd = nir_ibfe_imm(&b, color_y, 16, 3); + nir_def *bb = nir_ubfe_imm(&b, color_y, 11, 5); + nir_def *bd = nir_ibfe_imm(&b, color_y, 8, 3); + nir_def *r1 = nir_iadd(&b, rb, rd); + nir_def *g1 = nir_iadd(&b, gb, gd); + nir_def *b1 = nir_iadd(&b, bb, bd); + + nir_push_if(&b, nir_ugt_imm(&b, r1, 31)); + { + nir_def *r0 = + nir_ior(&b, nir_ubfe_imm(&b, color_y, 24, 2), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 27, 2), 2)); + nir_def *g0 = nir_ubfe_imm(&b, color_y, 20, 4); + nir_def *b0 = nir_ubfe_imm(&b, color_y, 16, 4); + nir_def *r2 = nir_ubfe_imm(&b, color_y, 12, 4); + nir_def *g2 = nir_ubfe_imm(&b, color_y, 8, 4); + nir_def *b2 = nir_ubfe_imm(&b, color_y, 4, 4); + nir_def *da = + nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 2), 1), nir_iand_imm(&b, color_y, 1)); + nir_def *dist = etc2_distance_lookup(&b, da); + nir_def *index = nir_ior(&b, lsb, msb); + + nir_store_var(&b, punchthrough, + nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), + 0x1); + nir_push_if(&b, nir_ieq_imm(&b, index, 0)); + { + nir_store_var(&b, rgb_result, etc_extend(&b, nir_vec3(&b, r0, g0, b0), 4), 0x7); + } + nir_push_else(&b, NULL); + { + + nir_def *tmp = nir_iadd(&b, etc_extend(&b, nir_vec3(&b, r2, g2, b2), 4), + nir_imul(&b, dist, nir_isub_imm(&b, 2, index))); + nir_store_var(&b, rgb_result, tmp, 0x7); + } + nir_pop_if(&b, NULL); + } + nir_push_else(&b, NULL); + nir_push_if(&b, nir_ugt_imm(&b, g1, 31)); + { + nir_def *r0 = nir_ubfe_imm(&b, color_y, 27, 4); + nir_def *g0 = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 24, 3), 1), + nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 20), 1)); + nir_def *b0 = + nir_ior(&b, nir_ubfe_imm(&b, color_y, 15, 3), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 16), 8)); + nir_def *r2 = nir_ubfe_imm(&b, color_y, 11, 4); + nir_def *g2 = nir_ubfe_imm(&b, color_y, 7, 4); + nir_def *b2 = nir_ubfe_imm(&b, color_y, 3, 4); + nir_def *da = nir_iand_imm(&b, color_y, 4); + nir_def *db = nir_iand_imm(&b, color_y, 1); + nir_def *d = nir_iadd(&b, da, nir_imul_imm(&b, db, 2)); + nir_def *d0 = nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0)); + nir_def *d2 = nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2)); + d = nir_bcsel(&b, nir_uge(&b, d0, d2), nir_iadd_imm(&b, d, 1), d); + nir_def *dist = etc2_distance_lookup(&b, d); + nir_def *base = nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2), nir_vec3(&b, r0, g0, b0)); + base = etc_extend(&b, base, 4); + base = nir_iadd(&b, base, nir_imul(&b, dist, nir_isub_imm(&b, 1, nir_imul_imm(&b, lsb, 2)))); + nir_store_var(&b, rgb_result, base, 0x7); + nir_store_var(&b, punchthrough, + nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), + 0x1); + } + nir_push_else(&b, NULL); + nir_push_if(&b, nir_ugt_imm(&b, b1, 31)); + { + nir_def *r0 = nir_ubfe_imm(&b, color_y, 25, 6); + nir_def *g0 = + nir_ior(&b, nir_ubfe_imm(&b, color_y, 17, 6), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 18), 0x40)); + nir_def *b0 = nir_ior( + &b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 11, 2), 3), + nir_ior(&b, nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 11), 0x20), nir_ubfe_imm(&b, color_y, 7, 3))); + nir_def *rh = + nir_ior(&b, nir_iand_imm(&b, color_y, 1), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 5), 1)); + nir_def *rv = nir_ubfe_imm(&b, color_x, 13, 6); + nir_def *gh = nir_ubfe_imm(&b, color_x, 25, 7); + nir_def *gv = nir_ubfe_imm(&b, color_x, 6, 7); + nir_def *bh = nir_ubfe_imm(&b, color_x, 19, 6); + nir_def *bv = nir_ubfe_imm(&b, color_x, 0, 6); + + r0 = etc_extend(&b, r0, 6); + g0 = etc_extend(&b, g0, 7); + b0 = etc_extend(&b, b0, 6); + rh = etc_extend(&b, rh, 6); + rv = etc_extend(&b, rv, 6); + gh = etc_extend(&b, gh, 7); + gv = etc_extend(&b, gv, 7); + bh = etc_extend(&b, bh, 6); + bv = etc_extend(&b, bv, 6); + + nir_def *rgb = nir_vec3(&b, r0, g0, b0); + nir_def *dx = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb), nir_channel(&b, pixel_coord, 0)); + nir_def *dy = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb), nir_channel(&b, pixel_coord, 1)); + rgb = nir_iadd(&b, rgb, nir_ishr_imm(&b, nir_iadd_imm(&b, nir_iadd(&b, dx, dy), 2), 2)); + nir_store_var(&b, rgb_result, rgb, 0x7); + nir_store_var(&b, punchthrough, nir_imm_false(&b), 0x1); + } + nir_push_else(&b, NULL); + { + nir_store_var(&b, etc1_compat, nir_imm_true(&b), 1); + nir_def *subblock_b = nir_ine_imm(&b, subblock, 0); + nir_def *tmp[] = { + nir_bcsel(&b, subblock_b, r1, rb), + nir_bcsel(&b, subblock_b, g1, gb), + nir_bcsel(&b, subblock_b, b1, bb), + }; + nir_store_var(&b, base_rgb, etc_extend(&b, nir_vec(&b, tmp, 3), 5), 0x7); + } + nir_pop_if(&b, NULL); + nir_pop_if(&b, NULL); + nir_pop_if(&b, NULL); + } + nir_pop_if(&b, NULL); + nir_push_if(&b, nir_load_var(&b, etc1_compat)); + { + nir_def *etc1_table_index = + nir_ubfe(&b, color_y, nir_isub_imm(&b, 5, nir_imul_imm(&b, subblock, 3)), nir_imm_int(&b, 3)); + nir_def *sgn = nir_isub_imm(&b, 1, msb); + sgn = nir_bcsel(&b, nir_load_var(&b, punchthrough), nir_imul(&b, sgn, lsb), sgn); + nir_store_var(&b, punchthrough, + nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), 0x1); + nir_def *off = nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn); + nir_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off); + nir_store_var(&b, rgb_result, result, 0x7); + } + nir_pop_if(&b, NULL); + nir_push_if(&b, nir_load_var(&b, punchthrough)); + { + nir_store_var(&b, alpha_result, nir_imm_float(&b, 0), 0x1); + nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 0, 0, 0), 0x7); + } + nir_pop_if(&b, NULL); + nir_def *col[4]; + for (unsigned i = 0; i < 3; ++i) + col[i] = nir_fdiv_imm(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)), 255.0); + col[3] = nir_load_var(&b, alpha_result); + nir_store_var(&b, color, nir_vec(&b, col, 4), 0xf); + } + nir_push_else(&b, NULL); + { /* EAC */ + nir_def *is_signed = nir_ior(&b, nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11_SNORM_BLOCK), + nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11G11_SNORM_BLOCK)); + nir_def *val[4]; + for (int i = 0; i < 2; ++i) { + val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true, is_signed); + } + val[2] = nir_imm_float(&b, 0.0); + val[3] = nir_imm_float(&b, 1.0); + nir_store_var(&b, color, nir_vec(&b, val, 4), 0xf); + } + nir_pop_if(&b, NULL); + + nir_def *outval = nir_load_var(&b, color); + nir_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, coord, 2), + nir_undef(&b, 1, 32)); + + nir_push_if(&b, is_3d); + { + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->def, img_coord, nir_undef(&b, 1, 32), outval, + nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_3D); + } + nir_push_else(&b, NULL); + { + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->def, img_coord, nir_undef(&b, 1, 32), outval, + nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true); + } + nir_pop_if(&b, NULL); + return b.shader; +} + +static VkResult +etc2_init_pipeline(struct vk_device *device, struct vk_texcompress_etc2_state *etc2) +{ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + nir_shader *cs = etc2_build_shader(device, etc2->nir_options); + + const VkComputePipelineCreateInfo pipeline_create_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = vk_shader_module_handle_from_nir(cs), + .pName = "main", + }, + .layout = etc2->pipeline_layout, + }; + + return disp->CreateComputePipelines(_device, etc2->pipeline_cache, 1, &pipeline_create_info, etc2->allocator, + &etc2->pipeline); +} + +static VkResult +etc2_init_pipeline_layout(struct vk_device *device, struct vk_texcompress_etc2_state *etc2) +{ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + const VkPipelineLayoutCreateInfo pipeline_layout_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &etc2->ds_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = + &(VkPushConstantRange){ + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .size = 20, + }, + }; + + return disp->CreatePipelineLayout(_device, &pipeline_layout_create_info, etc2->allocator, &etc2->pipeline_layout); +} + +static VkResult +etc2_init_ds_layout(struct vk_device *device, struct vk_texcompress_etc2_state *etc2) +{ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + const VkDescriptorSetLayoutCreateInfo ds_layout_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = + (VkDescriptorSetLayoutBinding[]){ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }, + }; + + return disp->CreateDescriptorSetLayout(_device, &ds_layout_create_info, etc2->allocator, &etc2->ds_layout); +} + +void +vk_texcompress_etc2_init(struct vk_device *device, struct vk_texcompress_etc2_state *etc2) +{ + simple_mtx_init(&etc2->mutex, mtx_plain); +} + +VkResult +vk_texcompress_etc2_late_init(struct vk_device *device, struct vk_texcompress_etc2_state *etc2) +{ + VkResult result = VK_SUCCESS; + + simple_mtx_lock(&etc2->mutex); + + if (!etc2->pipeline) { + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + result = etc2_init_ds_layout(device, etc2); + if (result != VK_SUCCESS) + goto out; + + result = etc2_init_pipeline_layout(device, etc2); + if (result != VK_SUCCESS) { + disp->DestroyDescriptorSetLayout(_device, etc2->ds_layout, etc2->allocator); + goto out; + } + + result = etc2_init_pipeline(device, etc2); + if (result != VK_SUCCESS) { + disp->DestroyPipelineLayout(_device, etc2->pipeline_layout, etc2->allocator); + disp->DestroyDescriptorSetLayout(_device, etc2->ds_layout, etc2->allocator); + goto out; + } + } + +out: + simple_mtx_unlock(&etc2->mutex); + return result; +} + +void +vk_texcompress_etc2_finish(struct vk_device *device, struct vk_texcompress_etc2_state *etc2) +{ + const struct vk_device_dispatch_table *disp = &device->dispatch_table; + VkDevice _device = vk_device_to_handle(device); + + if (etc2->pipeline != VK_NULL_HANDLE) + disp->DestroyPipeline(_device, etc2->pipeline, etc2->allocator); + + if (etc2->pipeline_layout != VK_NULL_HANDLE) + disp->DestroyPipelineLayout(_device, etc2->pipeline_layout, etc2->allocator); + if (etc2->ds_layout != VK_NULL_HANDLE) + disp->DestroyDescriptorSetLayout(_device, etc2->ds_layout, etc2->allocator); + + simple_mtx_destroy(&etc2->mutex); +} diff --git a/src/vulkan/runtime/vk_texcompress_etc2.h b/src/vulkan/runtime/vk_texcompress_etc2.h new file mode 100644 index 00000000000..f8e6269ebec --- /dev/null +++ b/src/vulkan/runtime/vk_texcompress_etc2.h @@ -0,0 +1,127 @@ +/* + * Copyright 2023 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef VK_TEXCOMPRESS_ETC2_H +#define VK_TEXCOMPRESS_ETC2_H + +#include "util/simple_mtx.h" + +#include "vk_device.h" +#include "vk_format.h" +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct nir_shader_compiler_options; + +struct vk_texcompress_etc2_state { + /* these are specified by the driver */ + const VkAllocationCallbacks *allocator; + const struct nir_shader_compiler_options *nir_options; + VkPipelineCache pipeline_cache; + + /* + * The pipeline is a compute pipeline with + * + * - layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + * - layout(set = 0, binding = 0) uniform utexture2DArray s_tex_2d; + * - layout(set = 0, binding = 0) uniform utexture3D s_tex_3d; + * - layout(set = 0, binding = 1) uniform image2DArray out_img_2d; + * - layout(set = 0, binding = 1) uniform image3D out_img_3d; + * - layout(push_constant) uniform Registers { + * ivec3 offset; + * int vk_format; + * int vk_image_type; + * } registers; + * + * There are other implications, such as + * + * - to make sure vkCmdCopyBufferToImage and vkCmdCopyImage are the only + * means to initialize the image data, + * - the format feature flags should not include flags that allow + * modifying the image data + * - the image tiling should be VK_IMAGE_TILING_OPTIMAL + * - the image usage flags should not include + * VK_IMAGE_USAGE_STORAGE_BIT, which can be made valid via + * VK_IMAGE_CREATE_EXTENDED_USAGE_BIT + * - the image create flags are assumed to include + * VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT and + * VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT + * - the image usage flags are assumed to include + * VK_IMAGE_USAGE_SAMPLED_BIT (for src) or VK_IMAGE_USAGE_STORAGE_BIT + * (for dst) + */ + simple_mtx_t mutex; + VkDescriptorSetLayout ds_layout; + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; +}; + +void vk_texcompress_etc2_init(struct vk_device *device, struct vk_texcompress_etc2_state *etc2); + +VkResult vk_texcompress_etc2_late_init(struct vk_device *device, struct vk_texcompress_etc2_state *etc2); + +void vk_texcompress_etc2_finish(struct vk_device *device, struct vk_texcompress_etc2_state *etc2); + +static inline VkImageViewType +vk_texcompress_etc2_image_view_type(VkImageType image_type) +{ + switch (image_type) { + case VK_IMAGE_TYPE_2D: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case VK_IMAGE_TYPE_3D: + return VK_IMAGE_VIEW_TYPE_3D; + default: + unreachable("bad image type"); + } +} + +static inline VkFormat +vk_texcompress_etc2_emulation_format(VkFormat etc2_format) +{ + switch (etc2_format) { + case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: + case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: + case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: + return VK_FORMAT_R8G8B8A8_UNORM; + case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: + case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: + case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: + return VK_FORMAT_R8G8B8A8_SRGB; + case VK_FORMAT_EAC_R11_UNORM_BLOCK: + return VK_FORMAT_R16_UNORM; + case VK_FORMAT_EAC_R11_SNORM_BLOCK: + return VK_FORMAT_R16_SNORM; + case VK_FORMAT_EAC_R11G11_UNORM_BLOCK: + return VK_FORMAT_R16G16_UNORM; + case VK_FORMAT_EAC_R11G11_SNORM_BLOCK: + return VK_FORMAT_R16G16_SNORM; + default: + return VK_FORMAT_UNDEFINED; + } +} + +static inline VkFormat +vk_texcompress_etc2_load_format(VkFormat etc2_format) +{ + return vk_format_get_blocksize(etc2_format) == 16 ? VK_FORMAT_R32G32B32A32_UINT : VK_FORMAT_R32G32_UINT; +} + +static inline VkFormat +vk_texcompress_etc2_store_format(VkFormat etc2_format) +{ + VkFormat format = vk_texcompress_etc2_emulation_format(etc2_format); + if (format == VK_FORMAT_R8G8B8A8_SRGB) + format = VK_FORMAT_R8G8B8A8_UNORM; + return format; +} + +#ifdef __cplusplus +} +#endif + +#endif /* VK_TEXCOMPRESS_ETC2_H */ diff --git a/src/vulkan/runtime/vk_video.c b/src/vulkan/runtime/vk_video.c new file mode 100644 index 00000000000..cf96c1e64de --- /dev/null +++ b/src/vulkan/runtime/vk_video.c @@ -0,0 +1,2072 @@ +/* + * Copyright © 2021 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_video.h" +#include "vk_util.h" +#include "vk_log.h" +#include "vk_alloc.h" +#include "vk_device.h" +#include "util/vl_rbsp.h" +#include "util/vl_bitstream.h" + +VkResult +vk_video_session_init(struct vk_device *device, + struct vk_video_session *vid, + const VkVideoSessionCreateInfoKHR *create_info) +{ + vk_object_base_init(device, &vid->base, VK_OBJECT_TYPE_VIDEO_SESSION_KHR); + + vid->flags = create_info->flags; + vid->op = create_info->pVideoProfile->videoCodecOperation; + vid->max_coded = create_info->maxCodedExtent; + vid->picture_format = create_info->pictureFormat; + vid->ref_format = create_info->referencePictureFormat; + vid->max_dpb_slots = create_info->maxDpbSlots; + vid->max_active_ref_pics = create_info->maxActiveReferencePictures; + + switch (vid->op) { + case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { + const struct VkVideoDecodeH264ProfileInfoKHR *h264_profile = + vk_find_struct_const(create_info->pVideoProfile->pNext, + VIDEO_DECODE_H264_PROFILE_INFO_KHR); + vid->h264.profile_idc = h264_profile->stdProfileIdc; + break; + } + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: { + const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile = + vk_find_struct_const(create_info->pVideoProfile->pNext, + VIDEO_DECODE_H265_PROFILE_INFO_KHR); + vid->h265.profile_idc = h265_profile->stdProfileIdc; + break; + } + case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: { + const struct VkVideoDecodeAV1ProfileInfoKHR *av1_profile = + vk_find_struct_const(create_info->pVideoProfile->pNext, + VIDEO_DECODE_AV1_PROFILE_INFO_KHR); + vid->av1.profile = av1_profile->stdProfile; + vid->av1.film_grain_support = av1_profile->filmGrainSupport; + break; + }; + case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: { + const struct VkVideoEncodeH264ProfileInfoKHR *h264_profile = + vk_find_struct_const(create_info->pVideoProfile->pNext, VIDEO_ENCODE_H264_PROFILE_INFO_KHR); + vid->h264.profile_idc = h264_profile->stdProfileIdc; + break; + } + case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: { + const struct VkVideoEncodeH265ProfileInfoKHR *h265_profile = + vk_find_struct_const(create_info->pVideoProfile->pNext, VIDEO_ENCODE_H265_PROFILE_INFO_KHR); + vid->h265.profile_idc = h265_profile->stdProfileIdc; + break; + } + default: + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + if (vid->op == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR || + vid->op == VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR) { + const struct VkVideoEncodeUsageInfoKHR *encode_usage_profile = + vk_find_struct_const(create_info->pVideoProfile->pNext, VIDEO_ENCODE_USAGE_INFO_KHR); + if (encode_usage_profile) { + vid->enc_usage.video_usage_hints = encode_usage_profile->videoUsageHints; + vid->enc_usage.video_content_hints = encode_usage_profile->videoContentHints; + vid->enc_usage.tuning_mode = encode_usage_profile->tuningMode; + } else { + vid->enc_usage.video_usage_hints = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR; + vid->enc_usage.video_content_hints = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR; + vid->enc_usage.tuning_mode = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR; + } + } + + return VK_SUCCESS; +} + +static void +vk_video_deep_copy_h264_sps(struct vk_video_h264_sps *dst, + const StdVideoH264SequenceParameterSet *src) +{ + memcpy(&dst->base, src, sizeof(StdVideoH264SequenceParameterSet)); + if (src->num_ref_frames_in_pic_order_cnt_cycle && src->pOffsetForRefFrame) { + memcpy(dst->offsets_for_ref_frame, src->pOffsetForRefFrame, sizeof(int32_t) * src->num_ref_frames_in_pic_order_cnt_cycle); + dst->base.pOffsetForRefFrame = dst->offsets_for_ref_frame; + } + if (src->flags.seq_scaling_matrix_present_flag && src->pScalingLists) { + memcpy(&dst->scaling_lists, src->pScalingLists, sizeof(StdVideoH264ScalingLists)); + dst->base.pScalingLists = &dst->scaling_lists; + } + if (src->flags.vui_parameters_present_flag && src->pSequenceParameterSetVui) { + memcpy(&dst->vui, src->pSequenceParameterSetVui, sizeof(StdVideoH264SequenceParameterSetVui)); + dst->base.pSequenceParameterSetVui = &dst->vui; + + if (src->pSequenceParameterSetVui->pHrdParameters) { + memcpy(&dst->vui_hrd_parameters, src->pSequenceParameterSetVui->pHrdParameters, + sizeof(StdVideoH264HrdParameters)); + dst->vui.pHrdParameters = &dst->vui_hrd_parameters; + } + } +} + +static void +vk_video_deep_copy_h264_pps(struct vk_video_h264_pps *dst, + const StdVideoH264PictureParameterSet *src) +{ + memcpy(&dst->base, src, sizeof(StdVideoH264PictureParameterSet)); + if (src->flags.pic_scaling_matrix_present_flag && src->pScalingLists) { + memcpy(&dst->scaling_lists, src->pScalingLists, sizeof(StdVideoH264ScalingLists)); + dst->base.pScalingLists = &dst->scaling_lists; + } +} + +static void +vk_video_deep_copy_h265_vps(struct vk_video_h265_vps *dst, + const StdVideoH265VideoParameterSet *src) +{ + memcpy(&dst->base, src, sizeof(StdVideoH265VideoParameterSet)); + if (src->pDecPicBufMgr) { + memcpy(&dst->dec_pic_buf_mgr, src->pDecPicBufMgr, sizeof(StdVideoH265DecPicBufMgr)); + dst->base.pDecPicBufMgr = &dst->dec_pic_buf_mgr; + } + if (src->pHrdParameters) { + memcpy(&dst->hrd_parameters, src->pHrdParameters, sizeof(StdVideoH265HrdParameters)); + dst->base.pHrdParameters = &dst->hrd_parameters; + if (src->pHrdParameters->pSubLayerHrdParametersNal) { + memcpy(&dst->hrd_parameters_nal, src->pHrdParameters->pSubLayerHrdParametersNal, + sizeof(StdVideoH265SubLayerHrdParameters)); + dst->hrd_parameters.pSubLayerHrdParametersNal = &dst->hrd_parameters_nal; + } + if (src->pHrdParameters->pSubLayerHrdParametersVcl) { + memcpy(&dst->hrd_parameters_vcl, src->pHrdParameters->pSubLayerHrdParametersVcl, + sizeof(StdVideoH265SubLayerHrdParameters)); + dst->hrd_parameters.pSubLayerHrdParametersVcl = &dst->hrd_parameters_vcl; + } + } + + if (src->pProfileTierLevel) { + memcpy(&dst->tier_level, src->pProfileTierLevel, sizeof(StdVideoH265ProfileTierLevel)); + dst->base.pProfileTierLevel = &dst->tier_level; + } +} + +static void +vk_video_deep_copy_h265_sps(struct vk_video_h265_sps *dst, + const StdVideoH265SequenceParameterSet *src) +{ + memcpy(&dst->base, src, sizeof(StdVideoH265SequenceParameterSet)); + if (src->pProfileTierLevel) { + memcpy(&dst->tier_level, src->pProfileTierLevel, sizeof(StdVideoH265ProfileTierLevel)); + dst->base.pProfileTierLevel = &dst->tier_level; + } + if (src->pDecPicBufMgr) { + memcpy(&dst->dec_pic_buf_mgr, src->pDecPicBufMgr, sizeof(StdVideoH265DecPicBufMgr)); + dst->base.pDecPicBufMgr = &dst->dec_pic_buf_mgr; + } + if (src->flags.sps_scaling_list_data_present_flag && src->pScalingLists) { + memcpy(&dst->scaling_lists, src->pScalingLists, sizeof(StdVideoH265ScalingLists)); + dst->base.pScalingLists = &dst->scaling_lists; + } + + if (src->pShortTermRefPicSet) { + memcpy(&dst->short_term_ref_pic_set, src->pShortTermRefPicSet, sizeof(StdVideoH265ShortTermRefPicSet)); + dst->base.pShortTermRefPicSet = &dst->short_term_ref_pic_set; + } + + if (src->pLongTermRefPicsSps) { + memcpy(&dst->long_term_ref_pics_sps, src->pLongTermRefPicsSps, sizeof(StdVideoH265LongTermRefPicsSps)); + dst->base.pLongTermRefPicsSps = &dst->long_term_ref_pics_sps; + } + + if (src->pSequenceParameterSetVui) { + memcpy(&dst->vui, src->pSequenceParameterSetVui, sizeof(StdVideoH265SequenceParameterSetVui)); + dst->base.pSequenceParameterSetVui = &dst->vui; + + if (src->pSequenceParameterSetVui->pHrdParameters) { + memcpy(&dst->hrd_parameters, src->pSequenceParameterSetVui->pHrdParameters, sizeof(StdVideoH265HrdParameters)); + dst->vui.pHrdParameters = &dst->hrd_parameters; + if (src->pSequenceParameterSetVui->pHrdParameters->pSubLayerHrdParametersNal) { + memcpy(&dst->hrd_parameters_nal, src->pSequenceParameterSetVui->pHrdParameters->pSubLayerHrdParametersNal, + sizeof(StdVideoH265SubLayerHrdParameters)); + dst->hrd_parameters.pSubLayerHrdParametersNal = &dst->hrd_parameters_nal; + } + if (src->pSequenceParameterSetVui->pHrdParameters->pSubLayerHrdParametersVcl) { + memcpy(&dst->hrd_parameters_vcl, src->pSequenceParameterSetVui->pHrdParameters->pSubLayerHrdParametersVcl, + sizeof(StdVideoH265SubLayerHrdParameters)); + dst->hrd_parameters.pSubLayerHrdParametersVcl = &dst->hrd_parameters_vcl; + } + } + } + if (src->flags.sps_palette_predictor_initializers_present_flag && src->pPredictorPaletteEntries) { + memcpy(&dst->palette_entries, src->pPredictorPaletteEntries, sizeof(StdVideoH265PredictorPaletteEntries)); + dst->base.pPredictorPaletteEntries = &dst->palette_entries; + } +} + +static void +vk_video_deep_copy_h265_pps(struct vk_video_h265_pps *dst, + const StdVideoH265PictureParameterSet *src) +{ + memcpy(&dst->base, src, sizeof(StdVideoH265PictureParameterSet)); + if (src->flags.pps_scaling_list_data_present_flag && src->pScalingLists) { + memcpy(&dst->scaling_lists, src->pScalingLists, sizeof(StdVideoH265ScalingLists)); + dst->base.pScalingLists = &dst->scaling_lists; + } + + if (src->flags.pps_palette_predictor_initializers_present_flag && src->pPredictorPaletteEntries) { + memcpy(&dst->palette_entries, src->pPredictorPaletteEntries, sizeof(StdVideoH265PredictorPaletteEntries)); + dst->base.pPredictorPaletteEntries = &dst->palette_entries; + } +} + + +#define FIND(PARAMSET, SS, SET, ID) \ + static struct vk_video_##SET *find_##SS##_##SET(const struct vk_video_session_parameters *params, uint32_t id) { \ + for (unsigned i = 0; i < params->SS.SET##_count; i++) { \ + if (params->SS.SET[i].base.ID == id) \ + return ¶ms->SS.SET[i]; \ + } \ + return NULL; \ + } \ + \ + static void add_##SS##_##SET(struct vk_video_session_parameters *params, \ + const PARAMSET *new_set, bool noreplace) { \ + struct vk_video_##SET *set = find_##SS##_##SET(params, new_set->ID); \ + if (set) { \ + if (noreplace) \ + return; \ + vk_video_deep_copy_##SET(set, new_set); \ + } else \ + vk_video_deep_copy_##SET(¶ms->SS.SET[params->SS.SET##_count++], new_set); \ + } \ + \ + static VkResult update_##SS##_##SET(struct vk_video_session_parameters *params, \ + uint32_t count, const PARAMSET *updates) { \ + if (params->SS.SET##_count + count >= params->SS.max_##SET##_count) \ + return VK_ERROR_TOO_MANY_OBJECTS; \ + for (unsigned _c = 0; _c < count; _c++) \ + vk_video_deep_copy_##SET(¶ms->SS.SET[params->SS.SET##_count + _c], &updates[_c]); \ + params->SS.SET##_count += count; \ + return VK_SUCCESS; \ + } + +FIND(StdVideoH264SequenceParameterSet, h264_dec, h264_sps, seq_parameter_set_id) +FIND(StdVideoH264PictureParameterSet, h264_dec, h264_pps, pic_parameter_set_id) +FIND(StdVideoH265VideoParameterSet, h265_dec, h265_vps, vps_video_parameter_set_id) +FIND(StdVideoH265SequenceParameterSet, h265_dec, h265_sps, sps_seq_parameter_set_id) +FIND(StdVideoH265PictureParameterSet, h265_dec, h265_pps, pps_pic_parameter_set_id) + +FIND(StdVideoH264SequenceParameterSet, h264_enc, h264_sps, seq_parameter_set_id) +FIND(StdVideoH264PictureParameterSet, h264_enc, h264_pps, pic_parameter_set_id) + +FIND(StdVideoH265VideoParameterSet, h265_enc, h265_vps, vps_video_parameter_set_id) +FIND(StdVideoH265SequenceParameterSet, h265_enc, h265_sps, sps_seq_parameter_set_id) +FIND(StdVideoH265PictureParameterSet, h265_enc, h265_pps, pps_pic_parameter_set_id) + +static void +init_add_h264_dec_session_parameters(struct vk_video_session_parameters *params, + const struct VkVideoDecodeH264SessionParametersAddInfoKHR *h264_add, + const struct vk_video_session_parameters *templ) +{ + unsigned i; + + if (h264_add) { + for (i = 0; i < h264_add->stdSPSCount; i++) { + add_h264_dec_h264_sps(params, &h264_add->pStdSPSs[i], false); + } + } + if (templ) { + for (i = 0; i < templ->h264_dec.h264_sps_count; i++) { + add_h264_dec_h264_sps(params, &templ->h264_dec.h264_sps[i].base, true); + } + } + + if (h264_add) { + for (i = 0; i < h264_add->stdPPSCount; i++) { + add_h264_dec_h264_pps(params, &h264_add->pStdPPSs[i], false); + } + } + if (templ) { + for (i = 0; i < templ->h264_dec.h264_pps_count; i++) { + add_h264_dec_h264_pps(params, &templ->h264_dec.h264_pps[i].base, true); + } + } +} + +static void +init_add_h264_enc_session_parameters(struct vk_video_session_parameters *params, + const struct VkVideoEncodeH264SessionParametersAddInfoKHR *h264_add, + const struct vk_video_session_parameters *templ) +{ + unsigned i; + if (h264_add) { + for (i = 0; i < h264_add->stdSPSCount; i++) { + add_h264_enc_h264_sps(params, &h264_add->pStdSPSs[i], false); + } + } + if (templ) { + for (i = 0; i < templ->h264_dec.h264_sps_count; i++) { + add_h264_enc_h264_sps(params, &templ->h264_enc.h264_sps[i].base, true); + } + } + + if (h264_add) { + for (i = 0; i < h264_add->stdPPSCount; i++) { + add_h264_enc_h264_pps(params, &h264_add->pStdPPSs[i], false); + } + } + if (templ) { + for (i = 0; i < templ->h264_enc.h264_pps_count; i++) { + add_h264_enc_h264_pps(params, &templ->h264_enc.h264_pps[i].base, true); + } + } +} + +static void +init_add_h265_dec_session_parameters(struct vk_video_session_parameters *params, + const struct VkVideoDecodeH265SessionParametersAddInfoKHR *h265_add, + const struct vk_video_session_parameters *templ) +{ + unsigned i; + + if (h265_add) { + for (i = 0; i < h265_add->stdVPSCount; i++) { + add_h265_dec_h265_vps(params, &h265_add->pStdVPSs[i], false); + } + } + if (templ) { + for (i = 0; i < templ->h265_dec.h265_vps_count; i++) { + add_h265_dec_h265_vps(params, &templ->h265_dec.h265_vps[i].base, true); + } + } + if (h265_add) { + for (i = 0; i < h265_add->stdSPSCount; i++) { + add_h265_dec_h265_sps(params, &h265_add->pStdSPSs[i], false); + } + } + if (templ) { + for (i = 0; i < templ->h265_dec.h265_sps_count; i++) { + add_h265_dec_h265_sps(params, &templ->h265_dec.h265_sps[i].base, true); + } + } + + if (h265_add) { + for (i = 0; i < h265_add->stdPPSCount; i++) { + add_h265_dec_h265_pps(params, &h265_add->pStdPPSs[i], false); + } + } + if (templ) { + for (i = 0; i < templ->h265_dec.h265_pps_count; i++) { + add_h265_dec_h265_pps(params, &templ->h265_dec.h265_pps[i].base, true); + } + } +} + +static void +init_add_h265_enc_session_parameters(struct vk_video_session_parameters *params, + const struct VkVideoEncodeH265SessionParametersAddInfoKHR *h265_add, + const struct vk_video_session_parameters *templ) +{ + unsigned i; + + if (h265_add) { + for (i = 0; i < h265_add->stdVPSCount; i++) { + add_h265_enc_h265_vps(params, &h265_add->pStdVPSs[i], false); + } + } + if (templ) { + for (i = 0; i < templ->h265_enc.h265_vps_count; i++) { + add_h265_enc_h265_vps(params, &templ->h265_enc.h265_vps[i].base, true); + } + } + if (h265_add) { + for (i = 0; i < h265_add->stdSPSCount; i++) { + add_h265_enc_h265_sps(params, &h265_add->pStdSPSs[i], false); + } + } + if (templ) { + for (i = 0; i < templ->h265_enc.h265_sps_count; i++) { + add_h265_enc_h265_sps(params, &templ->h265_enc.h265_sps[i].base, true); + } + } + + if (h265_add) { + for (i = 0; i < h265_add->stdPPSCount; i++) { + add_h265_enc_h265_pps(params, &h265_add->pStdPPSs[i], false); + } + } + if (templ) { + for (i = 0; i < templ->h265_enc.h265_pps_count; i++) { + add_h265_enc_h265_pps(params, &templ->h265_enc.h265_pps[i].base, true); + } + } +} + +static void +vk_video_deep_copy_av1_seq_hdr(struct vk_video_av1_seq_hdr *dst, + const StdVideoAV1SequenceHeader *src) +{ + memcpy(&dst->base, src, sizeof(StdVideoAV1SequenceHeader)); + if (src->pColorConfig) { + memcpy(&dst->color_config, src->pColorConfig, sizeof(StdVideoAV1ColorConfig)); + dst->base.pColorConfig = &dst->color_config; + } + if (src->pTimingInfo) { + memcpy(&dst->timing_info, src->pTimingInfo, sizeof(StdVideoAV1TimingInfo)); + dst->base.pTimingInfo = &dst->timing_info; + } +} + +VkResult +vk_video_session_parameters_init(struct vk_device *device, + struct vk_video_session_parameters *params, + const struct vk_video_session *vid, + const struct vk_video_session_parameters *templ, + const VkVideoSessionParametersCreateInfoKHR *create_info) +{ + memset(params, 0, sizeof(*params)); + vk_object_base_init(device, ¶ms->base, VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR); + + params->op = vid->op; + + switch (vid->op) { + case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { + const struct VkVideoDecodeH264SessionParametersCreateInfoKHR *h264_create = + vk_find_struct_const(create_info->pNext, VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR); + + params->h264_dec.max_h264_sps_count = h264_create->maxStdSPSCount; + params->h264_dec.max_h264_pps_count = h264_create->maxStdPPSCount; + + uint32_t sps_size = params->h264_dec.max_h264_sps_count * sizeof(struct vk_video_h264_sps); + uint32_t pps_size = params->h264_dec.max_h264_pps_count * sizeof(struct vk_video_h264_pps); + + params->h264_dec.h264_sps = vk_alloc(&device->alloc, sps_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + params->h264_dec.h264_pps = vk_alloc(&device->alloc, pps_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!params->h264_dec.h264_sps || !params->h264_dec.h264_pps) { + vk_free(&device->alloc, params->h264_dec.h264_sps); + vk_free(&device->alloc, params->h264_dec.h264_pps); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + init_add_h264_dec_session_parameters(params, h264_create->pParametersAddInfo, templ); + break; + } + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: { + const struct VkVideoDecodeH265SessionParametersCreateInfoKHR *h265_create = + vk_find_struct_const(create_info->pNext, VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR); + + params->h265_dec.max_h265_vps_count = h265_create->maxStdVPSCount; + params->h265_dec.max_h265_sps_count = h265_create->maxStdSPSCount; + params->h265_dec.max_h265_pps_count = h265_create->maxStdPPSCount; + + uint32_t vps_size = params->h265_dec.max_h265_vps_count * sizeof(struct vk_video_h265_vps); + uint32_t sps_size = params->h265_dec.max_h265_sps_count * sizeof(struct vk_video_h265_sps); + uint32_t pps_size = params->h265_dec.max_h265_pps_count * sizeof(struct vk_video_h265_pps); + + params->h265_dec.h265_vps = vk_alloc(&device->alloc, vps_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + params->h265_dec.h265_sps = vk_alloc(&device->alloc, sps_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + params->h265_dec.h265_pps = vk_alloc(&device->alloc, pps_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!params->h265_dec.h265_sps || !params->h265_dec.h265_pps || !params->h265_dec.h265_vps) { + vk_free(&device->alloc, params->h265_dec.h265_vps); + vk_free(&device->alloc, params->h265_dec.h265_sps); + vk_free(&device->alloc, params->h265_dec.h265_pps); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + init_add_h265_dec_session_parameters(params, h265_create->pParametersAddInfo, templ); + break; + } + case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: { + const struct VkVideoDecodeAV1SessionParametersCreateInfoKHR *av1_create = + vk_find_struct_const(create_info->pNext, VIDEO_DECODE_AV1_SESSION_PARAMETERS_CREATE_INFO_KHR); + if (av1_create && av1_create->pStdSequenceHeader) { + vk_video_deep_copy_av1_seq_hdr(¶ms->av1_dec.seq_hdr, + av1_create->pStdSequenceHeader); + } + break; + } + case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: { + const struct VkVideoEncodeH264SessionParametersCreateInfoKHR *h264_create = + vk_find_struct_const(create_info->pNext, VIDEO_ENCODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR); + + params->h264_enc.max_h264_sps_count = h264_create->maxStdSPSCount; + params->h264_enc.max_h264_pps_count = h264_create->maxStdPPSCount; + + uint32_t sps_size = params->h264_enc.max_h264_sps_count * sizeof(struct vk_video_h264_sps); + uint32_t pps_size = params->h264_enc.max_h264_pps_count * sizeof(struct vk_video_h264_pps); + + params->h264_enc.h264_sps = vk_alloc(&device->alloc, sps_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + params->h264_enc.h264_pps = vk_alloc(&device->alloc, pps_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!params->h264_enc.h264_sps || !params->h264_enc.h264_pps) { + vk_free(&device->alloc, params->h264_enc.h264_sps); + vk_free(&device->alloc, params->h264_enc.h264_pps); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + params->h264_enc.profile_idc = vid->h264.profile_idc; + init_add_h264_enc_session_parameters(params, h264_create->pParametersAddInfo, templ); + break; + } + case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: { + const struct VkVideoEncodeH265SessionParametersCreateInfoKHR *h265_create = + vk_find_struct_const(create_info->pNext, VIDEO_ENCODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR); + + params->h265_enc.max_h265_vps_count = h265_create->maxStdVPSCount; + params->h265_enc.max_h265_sps_count = h265_create->maxStdSPSCount; + params->h265_enc.max_h265_pps_count = h265_create->maxStdPPSCount; + + uint32_t vps_size = params->h265_enc.max_h265_vps_count * sizeof(struct vk_video_h265_vps); + uint32_t sps_size = params->h265_enc.max_h265_sps_count * sizeof(struct vk_video_h265_sps); + uint32_t pps_size = params->h265_enc.max_h265_pps_count * sizeof(struct vk_video_h265_pps); + + params->h265_enc.h265_vps = vk_alloc(&device->alloc, vps_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + params->h265_enc.h265_sps = vk_alloc(&device->alloc, sps_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + params->h265_enc.h265_pps = vk_alloc(&device->alloc, pps_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!params->h265_enc.h265_sps || !params->h265_enc.h265_pps || !params->h265_enc.h265_vps) { + vk_free(&device->alloc, params->h265_enc.h265_vps); + vk_free(&device->alloc, params->h265_enc.h265_sps); + vk_free(&device->alloc, params->h265_enc.h265_pps); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + init_add_h265_enc_session_parameters(params, h265_create->pParametersAddInfo, templ); + break; + } + default: + unreachable("Unsupported video codec operation"); + break; + } + return VK_SUCCESS; +} + +void +vk_video_session_parameters_finish(struct vk_device *device, + struct vk_video_session_parameters *params) +{ + switch (params->op) { + case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: + vk_free(&device->alloc, params->h264_dec.h264_sps); + vk_free(&device->alloc, params->h264_dec.h264_pps); + break; + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: + vk_free(&device->alloc, params->h265_dec.h265_vps); + vk_free(&device->alloc, params->h265_dec.h265_sps); + vk_free(&device->alloc, params->h265_dec.h265_pps); + break; + case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: + vk_free(&device->alloc, params->h264_enc.h264_sps); + vk_free(&device->alloc, params->h264_enc.h264_pps); + break; + case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: + vk_free(&device->alloc, params->h265_enc.h265_vps); + vk_free(&device->alloc, params->h265_enc.h265_sps); + vk_free(&device->alloc, params->h265_enc.h265_pps); + break; + default: + break; + } + vk_object_base_finish(¶ms->base); +} + +static VkResult +update_h264_dec_session_parameters(struct vk_video_session_parameters *params, + const struct VkVideoDecodeH264SessionParametersAddInfoKHR *h264_add) +{ + VkResult result = VK_SUCCESS; + + result = update_h264_dec_h264_sps(params, h264_add->stdSPSCount, h264_add->pStdSPSs); + if (result != VK_SUCCESS) + return result; + + result = update_h264_dec_h264_pps(params, h264_add->stdPPSCount, h264_add->pStdPPSs); + return result; +} + +static VkResult +update_h264_enc_session_parameters(struct vk_video_session_parameters *params, + const struct VkVideoEncodeH264SessionParametersAddInfoKHR *h264_add) +{ + VkResult result = VK_SUCCESS; + result = update_h264_enc_h264_sps(params, h264_add->stdSPSCount, h264_add->pStdSPSs); + if (result != VK_SUCCESS) + return result; + + result = update_h264_enc_h264_pps(params, h264_add->stdPPSCount, h264_add->pStdPPSs); + return result; +} + +static VkResult +update_h265_enc_session_parameters(struct vk_video_session_parameters *params, + const struct VkVideoEncodeH265SessionParametersAddInfoKHR *h265_add) +{ + VkResult result = VK_SUCCESS; + + result = update_h265_enc_h265_vps(params, h265_add->stdVPSCount, h265_add->pStdVPSs); + if (result != VK_SUCCESS) + return result; + + result = update_h265_enc_h265_sps(params, h265_add->stdSPSCount, h265_add->pStdSPSs); + if (result != VK_SUCCESS) + return result; + + result = update_h265_enc_h265_pps(params, h265_add->stdPPSCount, h265_add->pStdPPSs); + return result; +} + +static VkResult +update_h265_session_parameters(struct vk_video_session_parameters *params, + const struct VkVideoDecodeH265SessionParametersAddInfoKHR *h265_add) +{ + VkResult result = VK_SUCCESS; + result = update_h265_dec_h265_vps(params, h265_add->stdVPSCount, h265_add->pStdVPSs); + if (result != VK_SUCCESS) + return result; + + result = update_h265_dec_h265_sps(params, h265_add->stdSPSCount, h265_add->pStdSPSs); + if (result != VK_SUCCESS) + return result; + + result = update_h265_dec_h265_pps(params, h265_add->stdPPSCount, h265_add->pStdPPSs); + return result; +} + +VkResult +vk_video_session_parameters_update(struct vk_video_session_parameters *params, + const VkVideoSessionParametersUpdateInfoKHR *update) +{ + /* 39.6.5. Decoder Parameter Sets - + * "The provided H.264 SPS/PPS parameters must be within the limits specified during decoder + * creation for the decoder specified in VkVideoSessionParametersCreateInfoKHR." + */ + + /* + * There is no need to deduplicate here. + * videoSessionParameters must not already contain a StdVideoH264PictureParameterSet entry with + * both seq_parameter_set_id and pic_parameter_set_id matching any of the elements of + * VkVideoDecodeH264SessionParametersAddInfoKHR::pStdPPS + */ + VkResult result = VK_SUCCESS; + + switch (params->op) { + case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { + const struct VkVideoDecodeH264SessionParametersAddInfoKHR *h264_add = + vk_find_struct_const(update->pNext, VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR); + return update_h264_dec_session_parameters(params, h264_add); + } + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: { + const struct VkVideoDecodeH265SessionParametersAddInfoKHR *h265_add = + vk_find_struct_const(update->pNext, VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR); + + return update_h265_session_parameters(params, h265_add); + } + case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: { + const struct VkVideoEncodeH264SessionParametersAddInfoKHR *h264_add = + vk_find_struct_const(update->pNext, VIDEO_ENCODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR); + return update_h264_enc_session_parameters(params, h264_add); + } + case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: { + const struct VkVideoEncodeH265SessionParametersAddInfoKHR *h265_add = + vk_find_struct_const(update->pNext, VIDEO_ENCODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR); + return update_h265_enc_session_parameters(params, h265_add); + } + default: + unreachable("Unknown codec\n"); + } + return result; +} + +const uint8_t h264_scaling_list_default_4x4_intra[] = +{ + /* Table 7-3 - Default_4x4_Intra */ + 6, 13, 13, 20, 20, 20, 28, 28, 28, 28, 32, 32, 32, 37, 37, 42 +}; + +const uint8_t h264_scaling_list_default_4x4_inter[] = +{ + /* Table 7-3 - Default_4x4_Inter */ + 10, 14, 14, 20, 20, 20, 24, 24, 24, 24, 27, 27, 27, 30, 30, 34 +}; + +const uint8_t h264_scaling_list_default_8x8_intra[] = +{ + /* Table 7-4 - Default_8x8_Intra */ + 6, 10, 10, 13, 11, 13, 16, 16, 16, 16, 18, 18, 18, 18, 18, 23, + 23, 23, 23, 23, 23, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, + 27, 27, 27, 27, 29, 29, 29, 29, 29, 29, 29, 31, 31, 31, 31, 31, + 31, 33, 33, 33, 33, 33, 36, 36, 36, 36, 38, 38, 38, 40, 40, 42, +}; + +const uint8_t h264_scaling_list_default_8x8_inter[] = +{ + /* Table 7-4 - Default_8x8_Inter */ + 9 , 13, 13, 15, 13, 15, 17, 17, 17, 17, 19, 19, 19, 19, 19, 21, + 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 24, 24, 24, 24, + 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 27, + 27, 28, 28, 28, 28, 28, 30, 30, 30, 30, 32, 32, 32, 33, 33, 35, +}; + +void +vk_video_derive_h264_scaling_list(const StdVideoH264SequenceParameterSet *sps, + const StdVideoH264PictureParameterSet *pps, + StdVideoH264ScalingLists *list) +{ + StdVideoH264ScalingLists temp; + + /* derive SPS scaling list first, because PPS may depend on it in fall-back + * rule B */ + if (sps->flags.seq_scaling_matrix_present_flag) + { + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) + { + if (sps->pScalingLists->scaling_list_present_mask & (1 << i)) + memcpy(temp.ScalingList4x4[i], + sps->pScalingLists->ScalingList4x4[i], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + else /* fall-back rule A */ + { + if (i == 0) + memcpy(temp.ScalingList4x4[i], + h264_scaling_list_default_4x4_intra, + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + else if (i == 3) + memcpy(temp.ScalingList4x4[i], + h264_scaling_list_default_4x4_inter, + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + else + memcpy(temp.ScalingList4x4[i], + temp.ScalingList4x4[i - 1], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + } + } + + for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; j++) + { + int i = j + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; + if (sps->pScalingLists->scaling_list_present_mask & (1 << i)) + memcpy(temp.ScalingList8x8[j], sps->pScalingLists->ScalingList8x8[j], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + else /* fall-back rule A */ + { + if (i == 6) + memcpy(temp.ScalingList8x8[j], + h264_scaling_list_default_8x8_intra, + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + else if (i == 7) + memcpy(temp.ScalingList8x8[j], + h264_scaling_list_default_8x8_inter, + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + else + memcpy(temp.ScalingList8x8[j], temp.ScalingList8x8[j - 2], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + } + } + } + else + { + memset(temp.ScalingList4x4, 0x10, + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS * + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + memset(temp.ScalingList8x8, 0x10, + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS * + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + } + + if (pps->flags.pic_scaling_matrix_present_flag) + { + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) + { + if (pps->pScalingLists->scaling_list_present_mask & (1 << i)) + memcpy(list->ScalingList4x4[i], pps->pScalingLists->ScalingList4x4[i], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + else if (sps->flags.seq_scaling_matrix_present_flag) /* fall-back rule B */ + { + if (i == 0 || i == 3) + memcpy(list->ScalingList4x4[i], temp.ScalingList4x4[i], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + else + memcpy(list->ScalingList4x4[i], list->ScalingList4x4[i - 1], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + } + else /* fall-back rule A */ + { + if (i == 0) + memcpy(list->ScalingList4x4[i], + h264_scaling_list_default_4x4_intra, + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + else if (i == 3) + memcpy(list->ScalingList4x4[i], + h264_scaling_list_default_4x4_inter, + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + else + memcpy(list->ScalingList4x4[i], + list->ScalingList4x4[i - 1], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + } + } + + for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; j++) + { + int i = j + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; + if (pps->pScalingLists->scaling_list_present_mask & (1 << i)) + memcpy(list->ScalingList8x8[j], pps->pScalingLists->ScalingList8x8[j], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + else if (sps->flags.seq_scaling_matrix_present_flag) /* fall-back rule B */ + { + if (i == 6 || i == 7) + memcpy(list->ScalingList8x8[j], temp.ScalingList8x8[j], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + else + memcpy(list->ScalingList8x8[j], list->ScalingList8x8[j - 2], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + } + else /* fall-back rule A */ + { + if (i == 6) + memcpy(list->ScalingList8x8[j], + h264_scaling_list_default_8x8_intra, + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + else if (i == 7) + memcpy(list->ScalingList8x8[j], + h264_scaling_list_default_8x8_inter, + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + else + memcpy(list->ScalingList8x8[j], list->ScalingList8x8[j - 2], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + } + } + } + else + { + memcpy(list->ScalingList4x4, temp.ScalingList4x4, + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS * + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS); + memcpy(list->ScalingList8x8, temp.ScalingList8x8, + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS * + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS); + } +} + +const StdVideoH264SequenceParameterSet * +vk_video_find_h264_dec_std_sps(const struct vk_video_session_parameters *params, + uint32_t id) +{ + return &find_h264_dec_h264_sps(params, id)->base; +} + +const StdVideoH264PictureParameterSet * +vk_video_find_h264_dec_std_pps(const struct vk_video_session_parameters *params, + uint32_t id) +{ + return &find_h264_dec_h264_pps(params, id)->base; +} + +const StdVideoH265VideoParameterSet * +vk_video_find_h265_dec_std_vps(const struct vk_video_session_parameters *params, + uint32_t id) +{ + return &find_h265_dec_h265_vps(params, id)->base; +} + +const StdVideoH265SequenceParameterSet * +vk_video_find_h265_dec_std_sps(const struct vk_video_session_parameters *params, + uint32_t id) +{ + return &find_h265_dec_h265_sps(params, id)->base; +} + +const StdVideoH265PictureParameterSet * +vk_video_find_h265_dec_std_pps(const struct vk_video_session_parameters *params, + uint32_t id) +{ + return &find_h265_dec_h265_pps(params, id)->base; +} + +int +vk_video_h265_poc_by_slot(const struct VkVideoDecodeInfoKHR *frame_info, int slot) +{ + for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) { + const VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot_info = + vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR); + if (frame_info->pReferenceSlots[i].slotIndex == slot) + return dpb_slot_info->pStdReferenceInfo->PicOrderCntVal; + } + + assert(0); + + return 0; +} + +void +vk_fill_video_h265_reference_info(const VkVideoDecodeInfoKHR *frame_info, + const struct VkVideoDecodeH265PictureInfoKHR *pic, + const struct vk_video_h265_slice_params *slice_params, + struct vk_video_h265_reference ref_slots[][8]) +{ + uint8_t list_cnt = slice_params->slice_type == STD_VIDEO_H265_SLICE_TYPE_B ? 2 : 1; + uint8_t list_idx; + int i, j; + + for (list_idx = 0; list_idx < list_cnt; list_idx++) { + /* The order is + * L0: Short term current before set - Short term current after set - long term current + * L1: Short term current after set - short term current before set - long term current + */ + const uint8_t *rps[3] = { + list_idx ? pic->pStdPictureInfo->RefPicSetStCurrAfter : pic->pStdPictureInfo->RefPicSetStCurrBefore, + list_idx ? pic->pStdPictureInfo->RefPicSetStCurrBefore : pic->pStdPictureInfo->RefPicSetStCurrAfter, + pic->pStdPictureInfo->RefPicSetLtCurr + }; + + uint8_t ref_idx = 0; + for (i = 0; i < 3; i++) { + const uint8_t *cur_rps = rps[i]; + + for (j = 0; (cur_rps[j] != 0xff) && ((j + ref_idx) < 8); j++) { + ref_slots[list_idx][j + ref_idx].slot_index = cur_rps[j]; + ref_slots[list_idx][j + ref_idx].pic_order_cnt = vk_video_h265_poc_by_slot(frame_info, cur_rps[j]); + } + ref_idx += j; + } + + /* TODO: should handle cases where rpl_modification_flag is true. */ + assert(!slice_params->rpl_modification_flag[0] && !slice_params->rpl_modification_flag[1]); + } +} + +static void +h265_pred_weight_table(struct vk_video_h265_slice_params *params, + struct vl_rbsp *rbsp, + const StdVideoH265SequenceParameterSet *sps, + StdVideoH265SliceType slice_type) +{ + unsigned chroma_array_type = sps->flags.separate_colour_plane_flag ? 0 : sps->chroma_format_idc; + unsigned i, j; + + params->luma_log2_weight_denom = vl_rbsp_ue(rbsp); + + assert(params->luma_log2_weight_denom >= 0 && params->luma_log2_weight_denom < 8); + + if (chroma_array_type != 0) { + params->chroma_log2_weight_denom = params->luma_log2_weight_denom + vl_rbsp_se(rbsp); + assert(params->chroma_log2_weight_denom >= 0 && params->chroma_log2_weight_denom < 8); + } + + for (i = 0; i < params->num_ref_idx_l0_active; ++i) { + params->luma_weight_l0_flag[i] = vl_rbsp_u(rbsp, 1); + if (!params->luma_weight_l0_flag[i]) { + params->luma_weight_l0[i] = 1 << params->luma_log2_weight_denom; + params->luma_offset_l0[i] = 0; + } + } + + for (i = 0; i < params->num_ref_idx_l0_active; ++i) { + if (chroma_array_type == 0) { + params->chroma_weight_l0_flag[i] = 0; + } else { + params->chroma_weight_l0_flag[i] = vl_rbsp_u(rbsp, 1); + } + } + + for (i = 0; i < params->num_ref_idx_l0_active; ++i) { + if (params->luma_weight_l0_flag[i]) { + params->delta_luma_weight_l0[i] = vl_rbsp_se(rbsp); + params->luma_weight_l0[i] = (1 << params->luma_log2_weight_denom) + params->delta_luma_weight_l0[i]; + params->luma_offset_l0[i] = vl_rbsp_se(rbsp); + } + + if (params->chroma_weight_l0_flag[i]) { + for (j = 0; j < 2; j++) { + params->delta_chroma_weight_l0[i][j] = vl_rbsp_se(rbsp); + params->delta_chroma_offset_l0[i][j] = vl_rbsp_se(rbsp); + + params->chroma_weight_l0[i][j] = + (1 << params->chroma_log2_weight_denom) + params->delta_chroma_weight_l0[i][j]; + params->chroma_offset_l0[i][j] = CLAMP(params->delta_chroma_offset_l0[i][j] - + ((128 * params->chroma_weight_l0[i][j]) >> params->chroma_log2_weight_denom) + 128, -128, 127); + } + } else { + for (j = 0; j < 2; j++) { + params->chroma_weight_l0[i][j] = 1 << params->chroma_log2_weight_denom; + params->chroma_offset_l0[i][j] = 0; + } + } + } + + if (slice_type == STD_VIDEO_H265_SLICE_TYPE_B) { + for (i = 0; i < params->num_ref_idx_l1_active; ++i) { + params->luma_weight_l1_flag[i] = vl_rbsp_u(rbsp, 1); + if (!params->luma_weight_l1_flag[i]) { + params->luma_weight_l1[i] = 1 << params->luma_log2_weight_denom; + params->luma_offset_l1[i] = 0; + } + } + + for (i = 0; i < params->num_ref_idx_l1_active; ++i) { + if (chroma_array_type == 0) { + params->chroma_weight_l1_flag[i] = 0; + } else { + params->chroma_weight_l1_flag[i] = vl_rbsp_u(rbsp, 1); + } + } + + for (i = 0; i < params->num_ref_idx_l1_active; ++i) { + if (params->luma_weight_l1_flag[i]) { + params->delta_luma_weight_l1[i] = vl_rbsp_se(rbsp); + params->luma_weight_l1[i] = + (1 << params->luma_log2_weight_denom) + params->delta_luma_weight_l1[i]; + params->luma_offset_l1[i] = vl_rbsp_se(rbsp); + } + + if (params->chroma_weight_l1_flag[i]) { + for (j = 0; j < 2; j++) { + params->delta_chroma_weight_l1[i][j] = vl_rbsp_se(rbsp); + params->delta_chroma_offset_l1[i][j] = vl_rbsp_se(rbsp); + + params->chroma_weight_l1[i][j] = + (1 << params->chroma_log2_weight_denom) + params->delta_chroma_weight_l1[i][j]; + params->chroma_offset_l1[i][j] = CLAMP(params->delta_chroma_offset_l1[i][j] - + ((128 * params->chroma_weight_l1[i][j]) >> params->chroma_log2_weight_denom) + 128, -128, 127); + } + } else { + for (j = 0; j < 2; j++) { + params->chroma_weight_l1[i][j] = 1 << params->chroma_log2_weight_denom; + params->chroma_offset_l1[i][j] = 0; + } + } + } + } +} + +void +vk_video_parse_h265_slice_header(const struct VkVideoDecodeInfoKHR *frame_info, + const VkVideoDecodeH265PictureInfoKHR *pic_info, + const StdVideoH265SequenceParameterSet *sps, + const StdVideoH265PictureParameterSet *pps, + void *slice_data, + uint32_t slice_size, + struct vk_video_h265_slice_params *params) +{ + struct vl_vlc vlc; + const void *slice_headers[1] = { slice_data }; + vl_vlc_init(&vlc, 1, slice_headers, &slice_size); + + assert(vl_vlc_peekbits(&vlc, 24) == 0x000001); + + vl_vlc_eatbits(&vlc, 24); + + /* forbidden_zero_bit */ + vl_vlc_eatbits(&vlc, 1); + + if (vl_vlc_valid_bits(&vlc) < 15) + vl_vlc_fillbits(&vlc); + + vl_vlc_get_uimsbf(&vlc, 6); /* nal_unit_type */ + vl_vlc_get_uimsbf(&vlc, 6); /* nuh_layer_id */ + vl_vlc_get_uimsbf(&vlc, 3); /* nuh_temporal_id_plus1 */ + + struct vl_rbsp rbsp; + vl_rbsp_init(&rbsp, &vlc, 128, /* emulation_bytes */ true); + + memset(params, 0, sizeof(*params)); + + params->slice_size = slice_size; + params->first_slice_segment_in_pic_flag = vl_rbsp_u(&rbsp, 1); + + /* no_output_of_prior_pics_flag */ + if (pic_info->pStdPictureInfo->flags.IrapPicFlag) + vl_rbsp_u(&rbsp, 1); + + /* pps id */ + vl_rbsp_ue(&rbsp); + + if (!params->first_slice_segment_in_pic_flag) { + int size, num; + int bits_slice_segment_address = 0; + + if (pps->flags.dependent_slice_segments_enabled_flag) + params->dependent_slice_segment = vl_rbsp_u(&rbsp, 1); + + size = 1 << (sps->log2_min_luma_coding_block_size_minus3 + 3 + + sps->log2_diff_max_min_luma_coding_block_size); + + num = ((sps->pic_width_in_luma_samples + size - 1) / size) * + ((sps->pic_height_in_luma_samples + size - 1) / size); + + while (num > (1 << bits_slice_segment_address)) + bits_slice_segment_address++; + + /* slice_segment_address */ + params->slice_segment_address = vl_rbsp_u(&rbsp, bits_slice_segment_address); + } + + if (params->dependent_slice_segment) + return; + + for (unsigned i = 0; i < pps->num_extra_slice_header_bits; ++i) + /* slice_reserved_flag */ + vl_rbsp_u(&rbsp, 1); + + /* slice_type */ + params->slice_type = vl_rbsp_ue(&rbsp); + + if (pps->flags.output_flag_present_flag) + /* pic output flag */ + vl_rbsp_u(&rbsp, 1); + + if (sps->flags.separate_colour_plane_flag) + /* colour_plane_id */ + vl_rbsp_u(&rbsp, 2); + + if (!pic_info->pStdPictureInfo->flags.IdrPicFlag) { + /* slice_pic_order_cnt_lsb */ + params->pic_order_cnt_lsb = + vl_rbsp_u(&rbsp, sps->log2_max_pic_order_cnt_lsb_minus4 + 4); + + /* short_term_ref_pic_set_sps_flag */ + if (!vl_rbsp_u(&rbsp, 1)) { + uint8_t rps_predict = 0; + + if (sps->num_short_term_ref_pic_sets) + rps_predict = vl_rbsp_u(&rbsp, 1); + + if (rps_predict) { + /* delta_idx */ + vl_rbsp_ue(&rbsp); + /* delta_rps_sign */ + vl_rbsp_u(&rbsp, 1); + /* abs_delta_rps */ + vl_rbsp_ue(&rbsp); + + for (int i = 0 ; i <= pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx; i++) { + uint8_t used = vl_rbsp_u(&rbsp, 1); + if (!used) + vl_rbsp_u(&rbsp, 1); + } + } else { + /* num_negative_pics */ + unsigned num_neg_pics = vl_rbsp_ue(&rbsp); + /* num_positive_pics */ + unsigned num_pos_pics = vl_rbsp_ue(&rbsp); + + for(unsigned i = 0 ; i < num_neg_pics; ++i) { + /* delta_poc_s0_minus1 */ + vl_rbsp_ue(&rbsp); + /* used_by_curr_pic_s0_flag */ + vl_rbsp_u(&rbsp, 1); + } + + for(unsigned i = 0; i < num_pos_pics; ++i) { + /* delta_poc_s1_minus1 */ + vl_rbsp_ue(&rbsp); + /* used_by_curr_pic_s0_flag */ + vl_rbsp_u(&rbsp, 1); + } + } + + } else { + unsigned num_st_rps = sps->num_short_term_ref_pic_sets; + + int numbits = util_logbase2_ceil(num_st_rps); + if (numbits > 0) + /* short_term_ref_pic_set_idx */ + vl_rbsp_u(&rbsp, numbits); + } + + if (sps->flags.long_term_ref_pics_present_flag) { + unsigned num_lt_sps = 0; + + if (sps->num_long_term_ref_pics_sps > 0) + num_lt_sps = vl_rbsp_ue(&rbsp); + + unsigned num_lt_pics = vl_rbsp_ue(&rbsp); + unsigned num_refs = num_lt_pics + num_lt_sps; + + for (unsigned i = 0; i < num_refs; i++) { + if (i < num_lt_sps) { + if (sps->num_long_term_ref_pics_sps > 1) + /* lt_idx_sps */ + vl_rbsp_u(&rbsp, + util_logbase2_ceil(sps->num_long_term_ref_pics_sps)); + } else { + /* poc_lsb_lt */ + vl_rbsp_u(&rbsp, sps->log2_max_pic_order_cnt_lsb_minus4 + 4); + /* used_by_curr_pic_lt_flag */ + vl_rbsp_u(&rbsp, 1); + } + + /* poc_msb_present */ + if (vl_rbsp_u(&rbsp, 1)) { + /* delta_poc_msb_cycle_lt */ + vl_rbsp_ue(&rbsp); + } + } + } + + if (sps->flags.sps_temporal_mvp_enabled_flag) + params->temporal_mvp_enable = vl_rbsp_u(&rbsp, 1); + } + + if (sps->flags.sample_adaptive_offset_enabled_flag) { + params->sao_luma_flag = vl_rbsp_u(&rbsp, 1); + if (sps->chroma_format_idc) + params->sao_chroma_flag = vl_rbsp_u(&rbsp, 1); + } + + params->max_num_merge_cand = 5; + + if (params->slice_type != STD_VIDEO_H265_SLICE_TYPE_I) { + + params->num_ref_idx_l0_active = pps->num_ref_idx_l0_default_active_minus1 + 1; + + if (params->slice_type == STD_VIDEO_H265_SLICE_TYPE_B) + params->num_ref_idx_l1_active = pps->num_ref_idx_l1_default_active_minus1 + 1; + else + params->num_ref_idx_l1_active = 0; + + /* num_ref_idx_active_override_flag */ + if (vl_rbsp_u(&rbsp, 1)) { + params->num_ref_idx_l0_active = vl_rbsp_ue(&rbsp) + 1; + if (params->slice_type == STD_VIDEO_H265_SLICE_TYPE_B) + params->num_ref_idx_l1_active = vl_rbsp_ue(&rbsp) + 1; + } + + if (pps->flags.lists_modification_present_flag) { + params->rpl_modification_flag[0] = vl_rbsp_u(&rbsp, 1); + if (params->rpl_modification_flag[0]) { + for (int i = 0; i < params->num_ref_idx_l0_active; i++) { + /* list_entry_l0 */ + vl_rbsp_u(&rbsp, + util_logbase2_ceil(params->num_ref_idx_l0_active + params->num_ref_idx_l1_active)); + } + } + + if (params->slice_type == STD_VIDEO_H265_SLICE_TYPE_B) { + params->rpl_modification_flag[1] = vl_rbsp_u(&rbsp, 1); + if (params->rpl_modification_flag[1]) { + for (int i = 0; i < params->num_ref_idx_l1_active; i++) { + /* list_entry_l1 */ + vl_rbsp_u(&rbsp, + util_logbase2_ceil(params->num_ref_idx_l0_active + params->num_ref_idx_l1_active)); + } + } + } + } + + if (params->slice_type == STD_VIDEO_H265_SLICE_TYPE_B) + params->mvd_l1_zero_flag = vl_rbsp_u(&rbsp, 1); + + if (pps->flags.cabac_init_present_flag) + /* cabac_init_flag */ + params->cabac_init_idc = vl_rbsp_u(&rbsp, 1); + + if (params->temporal_mvp_enable) { + if (params->slice_type == STD_VIDEO_H265_SLICE_TYPE_B) + params->collocated_list = !vl_rbsp_u(&rbsp, 1); + + if (params->collocated_list == 0) { + if (params->num_ref_idx_l0_active > 1) + params->collocated_ref_idx = vl_rbsp_ue(&rbsp); + } else if (params->collocated_list == 1) { + if (params->num_ref_idx_l1_active > 1) + params->collocated_ref_idx = vl_rbsp_ue(&rbsp); + } + } + + if ((pps->flags.weighted_pred_flag && params->slice_type == STD_VIDEO_H265_SLICE_TYPE_P) || + (pps->flags.weighted_bipred_flag && params->slice_type == STD_VIDEO_H265_SLICE_TYPE_B)) { + h265_pred_weight_table(params, &rbsp, sps, params->slice_type); + } + + params->max_num_merge_cand -= vl_rbsp_ue(&rbsp); + } + + params->slice_qp_delta = vl_rbsp_se(&rbsp); + + if (pps->flags.pps_slice_chroma_qp_offsets_present_flag) { + params->slice_cb_qp_offset = vl_rbsp_se(&rbsp); + params->slice_cr_qp_offset = vl_rbsp_se(&rbsp); + } + + if (pps->flags.chroma_qp_offset_list_enabled_flag) + /* cu_chroma_qp_offset_enabled_flag */ + vl_rbsp_u(&rbsp, 1); + + if (pps->flags.deblocking_filter_control_present_flag) { + if (pps->flags.deblocking_filter_override_enabled_flag) { + /* deblocking_filter_override_flag */ + if (vl_rbsp_u(&rbsp, 1)) { + params->disable_deblocking_filter_idc = vl_rbsp_u(&rbsp, 1); + + if (!params->disable_deblocking_filter_idc) { + params->beta_offset_div2 = vl_rbsp_se(&rbsp); + params->tc_offset_div2 = vl_rbsp_se(&rbsp); + } + } else { + params->disable_deblocking_filter_idc = + pps->flags.pps_deblocking_filter_disabled_flag; + } + } + } + + if (pps->flags.pps_loop_filter_across_slices_enabled_flag && + (params->sao_luma_flag || params->sao_chroma_flag || + !params->disable_deblocking_filter_idc)) + params->loop_filter_across_slices_enable = vl_rbsp_u(&rbsp, 1); + + if (pps->flags.tiles_enabled_flag || pps->flags.entropy_coding_sync_enabled_flag) { + unsigned num_entry_points_offsets = vl_rbsp_ue(&rbsp); + + if (num_entry_points_offsets > 0) { + unsigned offset_len = vl_rbsp_ue(&rbsp) + 1; + for (unsigned i = 0; i < num_entry_points_offsets; i++) { + /* entry_point_offset_minus1 */ + vl_rbsp_u(&rbsp, offset_len); + } + } + } + + if (pps->flags.pps_extension_present_flag) { + unsigned length = vl_rbsp_ue(&rbsp); + for (unsigned i = 0; i < length; i++) + /* slice_reserved_undetermined_flag */ + vl_rbsp_u(&rbsp, 1); + } + + unsigned header_bits = + (slice_size * 8 - 24 /* start code */) - vl_vlc_bits_left(&rbsp.nal) - rbsp.removed; + params->slice_data_bytes_offset = (header_bits + 8) / 8; +} + +void +vk_video_get_profile_alignments(const VkVideoProfileListInfoKHR *profile_list, + uint32_t *width_align_out, uint32_t *height_align_out) +{ + uint32_t width_align = 1, height_align = 1; + for (unsigned i = 0; i < profile_list->profileCount; i++) { + if (profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR || + profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR + ) { + width_align = MAX2(width_align, VK_VIDEO_H264_MACROBLOCK_WIDTH); + height_align = MAX2(height_align, VK_VIDEO_H264_MACROBLOCK_HEIGHT); + } + if (profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR || + profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR + ) { + width_align = MAX2(width_align, VK_VIDEO_H265_CTU_MAX_WIDTH); + height_align = MAX2(height_align, VK_VIDEO_H265_CTU_MAX_HEIGHT); + } + if (profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { + width_align = MAX2(width_align, VK_VIDEO_AV1_BLOCK_WIDTH); + height_align = MAX2(height_align, VK_VIDEO_AV1_BLOCK_HEIGHT); + } + } + *width_align_out = width_align; + *height_align_out = height_align; +} + +static const uint8_t vk_video_h264_levels[] = {10, 11, 12, 13, 20, 21, 22, 30, 31, 32, 40, 41, 42, 50, 51, 52, 60, 61, 62}; +uint8_t +vk_video_get_h264_level(StdVideoH264LevelIdc level) +{ + assert(level <= STD_VIDEO_H264_LEVEL_IDC_6_2); + return vk_video_h264_levels[level]; +} + +const StdVideoH264SequenceParameterSet * +vk_video_find_h264_enc_std_sps(const struct vk_video_session_parameters *params, + uint32_t id) +{ + return &find_h264_enc_h264_sps(params, id)->base; +} + +const StdVideoH264PictureParameterSet * +vk_video_find_h264_enc_std_pps(const struct vk_video_session_parameters *params, + uint32_t id) +{ + return &find_h264_enc_h264_pps(params, id)->base; +} + +const StdVideoH265VideoParameterSet * +vk_video_find_h265_enc_std_vps(const struct vk_video_session_parameters *params, + uint32_t id) +{ + return &find_h265_enc_h265_vps(params, id)->base; +} + +const StdVideoH265SequenceParameterSet * +vk_video_find_h265_enc_std_sps(const struct vk_video_session_parameters *params, + uint32_t id) +{ + return &find_h265_enc_h265_sps(params, id)->base; +} + +const StdVideoH265PictureParameterSet * +vk_video_find_h265_enc_std_pps(const struct vk_video_session_parameters *params, + uint32_t id) +{ + return &find_h265_enc_h265_pps(params, id)->base; +} + +enum H264NALUType +{ + H264_NAL_UNSPECIFIED = 0, + H264_NAL_SLICE = 1, + H264_NAL_SLICEDATA_A = 2, + H264_NAL_SLICEDATA_B = 3, + H264_NAL_SLICEDATA_C = 4, + H264_NAL_IDR = 5, + H264_NAL_SEI = 6, + H264_NAL_SPS = 7, + H264_NAL_PPS = 8, + H264_NAL_ACCESS_UNIT_DEMILITER = 9, + H264_NAL_END_OF_SEQUENCE = 10, + H264_NAL_END_OF_STREAM = 11, + H264_NAL_FILLER_DATA = 12, + H264_NAL_SPS_EXTENSION = 13, + H264_NAL_PREFIX = 14, + /* 15...18 RESERVED */ + H264_NAL_AUXILIARY_SLICE = 19, + /* 20...23 RESERVED */ + /* 24...31 UNSPECIFIED */ +}; + +enum HEVCNALUnitType { + HEVC_NAL_TRAIL_N = 0, + HEVC_NAL_TRAIL_R = 1, + HEVC_NAL_TSA_N = 2, + HEVC_NAL_TSA_R = 3, + HEVC_NAL_STSA_N = 4, + HEVC_NAL_STSA_R = 5, + HEVC_NAL_RADL_N = 6, + HEVC_NAL_RADL_R = 7, + HEVC_NAL_RASL_N = 8, + HEVC_NAL_RASL_R = 9, + HEVC_NAL_VCL_N10 = 10, + HEVC_NAL_VCL_R11 = 11, + HEVC_NAL_VCL_N12 = 12, + HEVC_NAL_VCL_R13 = 13, + HEVC_NAL_VCL_N14 = 14, + HEVC_NAL_VCL_R15 = 15, + HEVC_NAL_BLA_W_LP = 16, + HEVC_NAL_BLA_W_RADL = 17, + HEVC_NAL_BLA_N_LP = 18, + HEVC_NAL_IDR_W_RADL = 19, + HEVC_NAL_IDR_N_LP = 20, + HEVC_NAL_CRA_NUT = 21, + HEVC_NAL_VPS_NUT = 32, + HEVC_NAL_SPS_NUT = 33, + HEVC_NAL_PPS_NUT = 34, +}; + +unsigned +vk_video_get_h265_nal_unit(const StdVideoEncodeH265PictureInfo *pic_info) +{ + switch (pic_info->pic_type) { + case STD_VIDEO_H265_PICTURE_TYPE_IDR: + return HEVC_NAL_IDR_W_RADL; + case STD_VIDEO_H265_PICTURE_TYPE_I: + return HEVC_NAL_CRA_NUT; + case STD_VIDEO_H265_PICTURE_TYPE_P: + return HEVC_NAL_TRAIL_R; + case STD_VIDEO_H265_PICTURE_TYPE_B: + if (pic_info->flags.IrapPicFlag) + if (pic_info->flags.is_reference) + return HEVC_NAL_RASL_R; + else + return HEVC_NAL_RASL_N; + else + if (pic_info->flags.is_reference) + return HEVC_NAL_TRAIL_R; + else + return HEVC_NAL_TRAIL_N; + break; + default: + assert(0); + break; + } + return 0; +} + +static const uint8_t vk_video_h265_levels[] = {10, 20, 21, 30, 31, 40, 41, 50, 51, 52, 60, 61, 62}; + +static uint8_t +vk_video_get_h265_level(StdVideoH265LevelIdc level) +{ + assert(level <= STD_VIDEO_H265_LEVEL_IDC_6_2); + return vk_video_h265_levels[level]; +} + +static void +emit_nalu_header(struct vl_bitstream_encoder *enc, + int nal_ref, int nal_unit) +{ + enc->prevent_start_code = false; + + vl_bitstream_put_bits(enc, 24, 0); + vl_bitstream_put_bits(enc, 8, 1); + vl_bitstream_put_bits(enc, 1, 0); + vl_bitstream_put_bits(enc, 2, nal_ref); /* SPS NAL REF */ + vl_bitstream_put_bits(enc, 5, nal_unit); /* SPS NAL UNIT */ + vl_bitstream_flush(enc); + + enc->prevent_start_code = true; +} + +static void +encode_hrd_params(struct vl_bitstream_encoder *enc, + const StdVideoH264HrdParameters *hrd) +{ + vl_bitstream_exp_golomb_ue(enc, hrd->cpb_cnt_minus1); + vl_bitstream_put_bits(enc, 4, hrd->bit_rate_scale); + vl_bitstream_put_bits(enc, 4, hrd->cpb_size_scale); + for (int sched_sel_idx = 0; sched_sel_idx <= hrd->cpb_cnt_minus1; sched_sel_idx++) { + vl_bitstream_exp_golomb_ue(enc, hrd->bit_rate_value_minus1[sched_sel_idx]); + vl_bitstream_exp_golomb_ue(enc, hrd->cpb_size_value_minus1[sched_sel_idx]); + vl_bitstream_put_bits(enc, 1, hrd->cbr_flag[sched_sel_idx]); + } + vl_bitstream_put_bits(enc, 5, hrd->initial_cpb_removal_delay_length_minus1); + vl_bitstream_put_bits(enc, 5, hrd->cpb_removal_delay_length_minus1); + vl_bitstream_put_bits(enc, 5, hrd->dpb_output_delay_length_minus1); + vl_bitstream_put_bits(enc, 5, hrd->time_offset_length); +} + +void +vk_video_encode_h264_sps(const StdVideoH264SequenceParameterSet *sps, + size_t size_limit, + size_t *data_size_ptr, + void *data_ptr) +{ + struct vl_bitstream_encoder enc; + uint32_t data_size = *data_size_ptr; + + vl_bitstream_encoder_clear(&enc, data_ptr, data_size, size_limit); + + emit_nalu_header(&enc, 3, H264_NAL_SPS); + + vl_bitstream_put_bits(&enc, 8, sps->profile_idc); + vl_bitstream_put_bits(&enc, 1, sps->flags.constraint_set0_flag); + vl_bitstream_put_bits(&enc, 1, sps->flags.constraint_set1_flag); + vl_bitstream_put_bits(&enc, 1, sps->flags.constraint_set2_flag); + vl_bitstream_put_bits(&enc, 1, sps->flags.constraint_set3_flag); + vl_bitstream_put_bits(&enc, 1, sps->flags.constraint_set4_flag); + vl_bitstream_put_bits(&enc, 1, sps->flags.constraint_set5_flag); + vl_bitstream_put_bits(&enc, 2, 0); + vl_bitstream_put_bits(&enc, 8, vk_video_get_h264_level(sps->level_idc)); + vl_bitstream_exp_golomb_ue(&enc, sps->seq_parameter_set_id); + + if (sps->profile_idc == STD_VIDEO_H264_PROFILE_IDC_HIGH /* high10 as well */) { + vl_bitstream_exp_golomb_ue(&enc, sps->chroma_format_idc); + vl_bitstream_exp_golomb_ue(&enc, sps->bit_depth_luma_minus8); + vl_bitstream_exp_golomb_ue(&enc, sps->bit_depth_chroma_minus8); + vl_bitstream_put_bits(&enc, 1, sps->flags.qpprime_y_zero_transform_bypass_flag); + vl_bitstream_put_bits(&enc, 1, sps->flags.seq_scaling_matrix_present_flag); + } + + vl_bitstream_exp_golomb_ue(&enc, sps->log2_max_frame_num_minus4); + + vl_bitstream_exp_golomb_ue(&enc, sps->pic_order_cnt_type); + if (sps->pic_order_cnt_type == 0) + vl_bitstream_exp_golomb_ue(&enc, sps->log2_max_pic_order_cnt_lsb_minus4); + + vl_bitstream_exp_golomb_ue(&enc, sps->max_num_ref_frames); + vl_bitstream_put_bits(&enc, 1, sps->flags.gaps_in_frame_num_value_allowed_flag); + vl_bitstream_exp_golomb_ue(&enc, sps->pic_width_in_mbs_minus1); + vl_bitstream_exp_golomb_ue(&enc, sps->pic_height_in_map_units_minus1); + + vl_bitstream_put_bits(&enc, 1, sps->flags.frame_mbs_only_flag); + vl_bitstream_put_bits(&enc, 1, sps->flags.direct_8x8_inference_flag); + + vl_bitstream_put_bits(&enc, 1, sps->flags.frame_cropping_flag); + if (sps->flags.frame_cropping_flag) { + vl_bitstream_exp_golomb_ue(&enc, sps->frame_crop_left_offset); + vl_bitstream_exp_golomb_ue(&enc, sps->frame_crop_right_offset); + vl_bitstream_exp_golomb_ue(&enc, sps->frame_crop_top_offset); + vl_bitstream_exp_golomb_ue(&enc, sps->frame_crop_bottom_offset); + } + + vl_bitstream_put_bits(&enc, 1, sps->flags.vui_parameters_present_flag); /* vui parameters present flag */ + if (sps->flags.vui_parameters_present_flag) { + const StdVideoH264SequenceParameterSetVui *vui = sps->pSequenceParameterSetVui; + vl_bitstream_put_bits(&enc, 1, vui->flags.aspect_ratio_info_present_flag); + + if (vui->flags.aspect_ratio_info_present_flag) { + vl_bitstream_put_bits(&enc, 8, vui->aspect_ratio_idc); + if (vui->aspect_ratio_idc == STD_VIDEO_H264_ASPECT_RATIO_IDC_EXTENDED_SAR) { + vl_bitstream_put_bits(&enc, 16, vui->sar_width); + vl_bitstream_put_bits(&enc, 16, vui->sar_height); + } + } + + vl_bitstream_put_bits(&enc, 1, vui->flags.overscan_info_present_flag); + if (vui->flags.overscan_info_present_flag) + vl_bitstream_put_bits(&enc, 1, vui->flags.overscan_appropriate_flag); + vl_bitstream_put_bits(&enc, 1, vui->flags.video_signal_type_present_flag); + if (vui->flags.video_signal_type_present_flag) { + vl_bitstream_put_bits(&enc, 3, vui->video_format); + vl_bitstream_put_bits(&enc, 1, vui->flags.video_full_range_flag); + vl_bitstream_put_bits(&enc, 1, vui->flags.color_description_present_flag); + if (vui->flags.color_description_present_flag) { + vl_bitstream_put_bits(&enc, 8, vui->colour_primaries); + vl_bitstream_put_bits(&enc, 8, vui->transfer_characteristics); + vl_bitstream_put_bits(&enc, 8, vui->matrix_coefficients); + } + } + + vl_bitstream_put_bits(&enc, 1, vui->flags.chroma_loc_info_present_flag); + if (vui->flags.chroma_loc_info_present_flag) { + vl_bitstream_exp_golomb_ue(&enc, vui->chroma_sample_loc_type_top_field); + vl_bitstream_exp_golomb_ue(&enc, vui->chroma_sample_loc_type_bottom_field); + } + vl_bitstream_put_bits(&enc, 1, vui->flags.timing_info_present_flag); + if (vui->flags.timing_info_present_flag) { + vl_bitstream_put_bits(&enc, 32, vui->num_units_in_tick); + vl_bitstream_put_bits(&enc, 32, vui->time_scale); + vl_bitstream_put_bits(&enc, 32, vui->flags.fixed_frame_rate_flag); + } + vl_bitstream_put_bits(&enc, 1, vui->flags.nal_hrd_parameters_present_flag); + if (vui->flags.nal_hrd_parameters_present_flag) + encode_hrd_params(&enc, vui->pHrdParameters); + vl_bitstream_put_bits(&enc, 1, vui->flags.vcl_hrd_parameters_present_flag); + if (vui->flags.vcl_hrd_parameters_present_flag) + encode_hrd_params(&enc, vui->pHrdParameters); + if (vui->flags.nal_hrd_parameters_present_flag || vui->flags.vcl_hrd_parameters_present_flag) + vl_bitstream_put_bits(&enc, 1, 0); + vl_bitstream_put_bits(&enc, 1, 0); + vl_bitstream_put_bits(&enc, 1, vui->flags.bitstream_restriction_flag); + if (vui->flags.bitstream_restriction_flag) { + vl_bitstream_put_bits(&enc, 1, 0); + vl_bitstream_exp_golomb_ue(&enc, 0); + vl_bitstream_exp_golomb_ue(&enc, 0); + vl_bitstream_exp_golomb_ue(&enc, 0); + vl_bitstream_exp_golomb_ue(&enc, 0); + vl_bitstream_exp_golomb_ue(&enc, vui->max_num_reorder_frames); + vl_bitstream_exp_golomb_ue(&enc, vui->max_dec_frame_buffering); + } + } + + vl_bitstream_rbsp_trailing(&enc); + + vl_bitstream_flush(&enc); + *data_size_ptr += vl_bitstream_get_byte_count(&enc); + vl_bitstream_encoder_free(&enc); +} + +void +vk_video_encode_h264_pps(const StdVideoH264PictureParameterSet *pps, + bool high_profile, + size_t size_limit, + size_t *data_size_ptr, + void *data_ptr) +{ + struct vl_bitstream_encoder enc; + uint32_t data_size = *data_size_ptr; + + vl_bitstream_encoder_clear(&enc, data_ptr, data_size, size_limit); + + emit_nalu_header(&enc, 3, H264_NAL_PPS); + + vl_bitstream_exp_golomb_ue(&enc, pps->pic_parameter_set_id); + vl_bitstream_exp_golomb_ue(&enc, pps->seq_parameter_set_id); + vl_bitstream_put_bits(&enc, 1, pps->flags.entropy_coding_mode_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.bottom_field_pic_order_in_frame_present_flag); + vl_bitstream_exp_golomb_ue(&enc, 0); /* num_slice_groups_minus1 */ + + vl_bitstream_exp_golomb_ue(&enc, pps->num_ref_idx_l0_default_active_minus1); + vl_bitstream_exp_golomb_ue(&enc, pps->num_ref_idx_l1_default_active_minus1); + vl_bitstream_put_bits(&enc, 1, pps->flags.weighted_pred_flag); + vl_bitstream_put_bits(&enc, 2, pps->weighted_bipred_idc); + vl_bitstream_exp_golomb_se(&enc, pps->pic_init_qp_minus26); + vl_bitstream_exp_golomb_se(&enc, pps->pic_init_qs_minus26); + vl_bitstream_exp_golomb_se(&enc, pps->chroma_qp_index_offset); + vl_bitstream_put_bits(&enc, 1, pps->flags.deblocking_filter_control_present_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.constrained_intra_pred_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.redundant_pic_cnt_present_flag); + + /* high profile */ + if (high_profile) { + vl_bitstream_put_bits(&enc, 1, pps->flags.transform_8x8_mode_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.pic_scaling_matrix_present_flag); + vl_bitstream_exp_golomb_se(&enc, pps->second_chroma_qp_index_offset); + } + vl_bitstream_rbsp_trailing(&enc); + + vl_bitstream_flush(&enc); + *data_size_ptr += vl_bitstream_get_byte_count(&enc); + vl_bitstream_encoder_free(&enc); +} + +static void +emit_nalu_h265_header(struct vl_bitstream_encoder *enc, + int nal_unit_type) +{ + enc->prevent_start_code = false; + + vl_bitstream_put_bits(enc, 24, 0); + vl_bitstream_put_bits(enc, 8, 1); + vl_bitstream_put_bits(enc, 1, 0); + vl_bitstream_put_bits(enc, 6, nal_unit_type); /* SPS NAL REF */ + vl_bitstream_put_bits(enc, 6, 0);//nuh_layer_id + vl_bitstream_put_bits(enc, 3, 1);//nuh_temporal_id_plus1; + vl_bitstream_flush(enc); + + enc->prevent_start_code = true; +} + +static void +encode_h265_profile_tier_level(struct vl_bitstream_encoder *enc, + const StdVideoH265ProfileTierLevel *ptl) +{ + vl_bitstream_put_bits(enc, 2, 0); + vl_bitstream_put_bits(enc, 1, ptl->flags.general_tier_flag); + vl_bitstream_put_bits(enc, 5, ptl->general_profile_idc); + + for (int j = 0; j < 32; j++) + vl_bitstream_put_bits(enc, 1, j == ptl->general_profile_idc); + + vl_bitstream_put_bits(enc, 1, ptl->flags.general_progressive_source_flag); + vl_bitstream_put_bits(enc, 1, ptl->flags.general_interlaced_source_flag); + vl_bitstream_put_bits(enc, 1, ptl->flags.general_non_packed_constraint_flag); + vl_bitstream_put_bits(enc, 1, ptl->flags.general_frame_only_constraint_flag); + vl_bitstream_put_bits(enc, 31, 0); + vl_bitstream_put_bits(enc, 13, 0); + vl_bitstream_put_bits(enc, 8, vk_video_get_h265_level(ptl->general_level_idc)); +} + +void +vk_video_encode_h265_vps(const StdVideoH265VideoParameterSet *vps, + size_t size_limit, + size_t *data_size_ptr, + void *data_ptr) +{ + struct vl_bitstream_encoder enc; + uint32_t data_size = *data_size_ptr; + + vl_bitstream_encoder_clear(&enc, data_ptr, data_size, size_limit); + + emit_nalu_h265_header(&enc, HEVC_NAL_VPS_NUT); + + vl_bitstream_put_bits(&enc, 4, vps->vps_video_parameter_set_id); + vl_bitstream_put_bits(&enc, 2, 3); + vl_bitstream_put_bits(&enc, 6, 0);//vps->vps_max_layers_minus1); + vl_bitstream_put_bits(&enc, 3, vps->vps_max_sub_layers_minus1); + vl_bitstream_put_bits(&enc, 1, vps->flags.vps_temporal_id_nesting_flag); + vl_bitstream_put_bits(&enc, 16, 0xffff); + + encode_h265_profile_tier_level(&enc, vps->pProfileTierLevel); + + vl_bitstream_put_bits(&enc, 1, vps->flags.vps_sub_layer_ordering_info_present_flag); + + for (int i = 0; i <= vps->vps_max_sub_layers_minus1; i++) { + vl_bitstream_exp_golomb_ue(&enc, vps->pDecPicBufMgr->max_dec_pic_buffering_minus1[i]); + vl_bitstream_exp_golomb_ue(&enc, vps->pDecPicBufMgr->max_num_reorder_pics[i]); + vl_bitstream_exp_golomb_ue(&enc, vps->pDecPicBufMgr->max_latency_increase_plus1[i]); + } + + + vl_bitstream_put_bits(&enc, 6, 0);//vps->vps_max_layer_id); + vl_bitstream_exp_golomb_ue(&enc, 0);//vps->vps_num_layer_sets_minus1); + vl_bitstream_put_bits(&enc, 1, vps->flags.vps_timing_info_present_flag); + + if (vps->flags.vps_timing_info_present_flag) { + vl_bitstream_put_bits(&enc, 32, vps->vps_num_units_in_tick); + vl_bitstream_put_bits(&enc, 32, vps->vps_time_scale); + vl_bitstream_put_bits(&enc, 1, vps->flags.vps_poc_proportional_to_timing_flag); + if (vps->flags.vps_poc_proportional_to_timing_flag) + vl_bitstream_exp_golomb_ue(&enc, vps->vps_num_ticks_poc_diff_one_minus1); + vl_bitstream_exp_golomb_ue(&enc, 0); + } + + vl_bitstream_put_bits(&enc, 1, 0); /* vps extension flag */ + vl_bitstream_rbsp_trailing(&enc); + + vl_bitstream_flush(&enc); + *data_size_ptr += vl_bitstream_get_byte_count(&enc); + vl_bitstream_encoder_free(&enc); +} + +static void +encode_rps(struct vl_bitstream_encoder *enc, + const StdVideoH265SequenceParameterSet *sps, + int st_rps_idx) +{ + const StdVideoH265ShortTermRefPicSet *rps = &sps->pShortTermRefPicSet[st_rps_idx]; + if (st_rps_idx != 0) + vl_bitstream_put_bits(enc, 1, rps->flags.inter_ref_pic_set_prediction_flag); + + if (rps->flags.inter_ref_pic_set_prediction_flag) { + int ref_rps_idx = st_rps_idx - (rps->delta_idx_minus1 + 1); + vl_bitstream_put_bits(enc, 1, rps->flags.delta_rps_sign); + vl_bitstream_exp_golomb_ue(enc, rps->abs_delta_rps_minus1); + + const StdVideoH265ShortTermRefPicSet *rps_ref = &sps->pShortTermRefPicSet[ref_rps_idx]; + int num_delta_pocs = rps_ref->num_negative_pics + rps_ref->num_positive_pics; + + for (int j = 0; j < num_delta_pocs; j++) { + vl_bitstream_put_bits(enc, 1, !!(rps->used_by_curr_pic_flag & (1 << j))); + if (!(rps->used_by_curr_pic_flag & (1 << j))) { + vl_bitstream_put_bits(enc, 1, !!(rps->use_delta_flag & (1 << j))); + } + } + } else { + vl_bitstream_exp_golomb_ue(enc, rps->num_negative_pics); + vl_bitstream_exp_golomb_ue(enc, rps->num_positive_pics); + + for (int i = 0; i < rps->num_negative_pics; i++) { + vl_bitstream_exp_golomb_ue(enc, rps->delta_poc_s0_minus1[i]); + vl_bitstream_put_bits(enc, 1, !!(rps->used_by_curr_pic_s0_flag & (1 << i))); + } + for (int i = 0; i < rps->num_positive_pics; i++) { + vl_bitstream_exp_golomb_ue(enc, rps->delta_poc_s1_minus1[i]); + vl_bitstream_put_bits(enc, 1, !!(rps->used_by_curr_pic_s1_flag & (1 << i))); + } + } +} + +void +vk_video_encode_h265_sps(const StdVideoH265SequenceParameterSet *sps, + size_t size_limit, + size_t *data_size_ptr, + void *data_ptr) +{ + struct vl_bitstream_encoder enc; + uint32_t data_size = *data_size_ptr; + + vl_bitstream_encoder_clear(&enc, data_ptr, data_size, size_limit); + + emit_nalu_h265_header(&enc, HEVC_NAL_SPS_NUT); + + vl_bitstream_put_bits(&enc, 4, sps->sps_video_parameter_set_id); + vl_bitstream_put_bits(&enc, 3, sps->sps_max_sub_layers_minus1); + vl_bitstream_put_bits(&enc, 1, sps->flags.sps_temporal_id_nesting_flag); + + encode_h265_profile_tier_level(&enc, sps->pProfileTierLevel); + + vl_bitstream_exp_golomb_ue(&enc, sps->sps_seq_parameter_set_id); + vl_bitstream_exp_golomb_ue(&enc, sps->chroma_format_idc); + + vl_bitstream_exp_golomb_ue(&enc, sps->pic_width_in_luma_samples); + vl_bitstream_exp_golomb_ue(&enc, sps->pic_height_in_luma_samples); + + vl_bitstream_put_bits(&enc, 1, sps->flags.conformance_window_flag); + + if (sps->flags.conformance_window_flag) { + vl_bitstream_exp_golomb_ue(&enc, sps->conf_win_left_offset); + vl_bitstream_exp_golomb_ue(&enc, sps->conf_win_right_offset); + vl_bitstream_exp_golomb_ue(&enc, sps->conf_win_top_offset); + vl_bitstream_exp_golomb_ue(&enc, sps->conf_win_bottom_offset); + } + + vl_bitstream_exp_golomb_ue(&enc, sps->bit_depth_luma_minus8); + vl_bitstream_exp_golomb_ue(&enc, sps->bit_depth_chroma_minus8); + + vl_bitstream_exp_golomb_ue(&enc, sps->log2_max_pic_order_cnt_lsb_minus4); + vl_bitstream_put_bits(&enc, 1, sps->flags.sps_sub_layer_ordering_info_present_flag); + + for (int i = 0; i <= sps->sps_max_sub_layers_minus1; i++) { + vl_bitstream_exp_golomb_ue(&enc, sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[i]); + vl_bitstream_exp_golomb_ue(&enc, sps->pDecPicBufMgr->max_num_reorder_pics[i]); + vl_bitstream_exp_golomb_ue(&enc, sps->pDecPicBufMgr->max_latency_increase_plus1[i]); + } + + vl_bitstream_exp_golomb_ue(&enc, sps->log2_min_luma_coding_block_size_minus3); + vl_bitstream_exp_golomb_ue(&enc, sps->log2_diff_max_min_luma_coding_block_size); + vl_bitstream_exp_golomb_ue(&enc, sps->log2_min_luma_transform_block_size_minus2); + vl_bitstream_exp_golomb_ue(&enc, sps->log2_diff_max_min_luma_transform_block_size); + + vl_bitstream_exp_golomb_ue(&enc, sps->max_transform_hierarchy_depth_inter); + vl_bitstream_exp_golomb_ue(&enc, sps->max_transform_hierarchy_depth_intra); + + vl_bitstream_put_bits(&enc, 1, sps->flags.scaling_list_enabled_flag); + + vl_bitstream_put_bits(&enc, 1, sps->flags.amp_enabled_flag); + vl_bitstream_put_bits(&enc, 1, sps->flags.sample_adaptive_offset_enabled_flag); + + vl_bitstream_put_bits(&enc, 1, sps->flags.pcm_enabled_flag); + + if (sps->flags.pcm_enabled_flag) { + vl_bitstream_put_bits(&enc, 4, sps->bit_depth_luma_minus8 + 7); + vl_bitstream_put_bits(&enc, 4, sps->bit_depth_chroma_minus8 + 7); + vl_bitstream_exp_golomb_ue(&enc, sps->log2_min_luma_coding_block_size_minus3); + vl_bitstream_exp_golomb_ue(&enc, sps->log2_diff_max_min_luma_coding_block_size); + vl_bitstream_put_bits(&enc, 1, sps->flags.pcm_loop_filter_disabled_flag); + } + + vl_bitstream_exp_golomb_ue(&enc, sps->num_short_term_ref_pic_sets); + for (int i = 0; i < sps->num_short_term_ref_pic_sets; i++) + encode_rps(&enc, sps, i); + + vl_bitstream_put_bits(&enc, 1, sps->flags.long_term_ref_pics_present_flag); + if (sps->flags.long_term_ref_pics_present_flag) { + vl_bitstream_exp_golomb_ue(&enc, sps->num_long_term_ref_pics_sps); + for (int i = 0; i < sps->num_long_term_ref_pics_sps; i++) { + vl_bitstream_put_bits(&enc, sps->log2_max_pic_order_cnt_lsb_minus4 + 4, sps->pLongTermRefPicsSps->lt_ref_pic_poc_lsb_sps[i]); + vl_bitstream_put_bits(&enc, 1, sps->pLongTermRefPicsSps->used_by_curr_pic_lt_sps_flag); + } + } + + vl_bitstream_put_bits(&enc, 1, sps->flags.sps_temporal_mvp_enabled_flag); + vl_bitstream_put_bits(&enc, 1, sps->flags.strong_intra_smoothing_enabled_flag); + vl_bitstream_put_bits(&enc, 1, sps->flags.vui_parameters_present_flag); + + if (sps->flags.vui_parameters_present_flag) { + const StdVideoH265SequenceParameterSetVui *vui = sps->pSequenceParameterSetVui; + vl_bitstream_put_bits(&enc, 1, vui->flags.aspect_ratio_info_present_flag); + if (vui->flags.aspect_ratio_info_present_flag) { + vl_bitstream_put_bits(&enc, 8, vui->aspect_ratio_idc); + if (vui->aspect_ratio_idc == STD_VIDEO_H265_ASPECT_RATIO_IDC_EXTENDED_SAR) { + vl_bitstream_put_bits(&enc, 16, vui->sar_width); + vl_bitstream_put_bits(&enc, 16, vui->sar_height); + } + } + vl_bitstream_put_bits(&enc, 1, vui->flags.overscan_info_present_flag); + if (vui->flags.overscan_info_present_flag) + vl_bitstream_put_bits(&enc, 1, vui->flags.overscan_appropriate_flag); + vl_bitstream_put_bits(&enc, 1, vui->flags.video_signal_type_present_flag); + if (vui->flags.video_signal_type_present_flag) { + vl_bitstream_put_bits(&enc, 3, vui->video_format); + vl_bitstream_put_bits(&enc, 1, vui->flags.video_full_range_flag); + vl_bitstream_put_bits(&enc, 1, vui->flags.colour_description_present_flag); + if (vui->flags.colour_description_present_flag) { + vl_bitstream_put_bits(&enc, 8, vui->colour_primaries); + vl_bitstream_put_bits(&enc, 8, vui->transfer_characteristics); + vl_bitstream_put_bits(&enc, 8, vui->matrix_coeffs); + } + } + vl_bitstream_put_bits(&enc, 1, vui->flags.chroma_loc_info_present_flag); + if (vui->flags.chroma_loc_info_present_flag) { + vl_bitstream_exp_golomb_ue(&enc, vui->chroma_sample_loc_type_top_field); + vl_bitstream_exp_golomb_ue(&enc, vui->chroma_sample_loc_type_bottom_field); + } + vl_bitstream_put_bits(&enc, 1, vui->flags.neutral_chroma_indication_flag); + vl_bitstream_put_bits(&enc, 1, vui->flags.field_seq_flag); + vl_bitstream_put_bits(&enc, 1, vui->flags.frame_field_info_present_flag); + vl_bitstream_put_bits(&enc, 1, vui->flags.default_display_window_flag); + if (vui->flags.default_display_window_flag) { + vl_bitstream_exp_golomb_ue(&enc, vui->def_disp_win_left_offset); + vl_bitstream_exp_golomb_ue(&enc, vui->def_disp_win_right_offset); + vl_bitstream_exp_golomb_ue(&enc, vui->def_disp_win_top_offset); + vl_bitstream_exp_golomb_ue(&enc, vui->def_disp_win_bottom_offset); + } + vl_bitstream_put_bits(&enc, 1, vui->flags.vui_timing_info_present_flag); + if (vui->flags.vui_timing_info_present_flag) { + vl_bitstream_put_bits(&enc, 32, vui->vui_num_units_in_tick); + vl_bitstream_put_bits(&enc, 32, vui->vui_time_scale); + vl_bitstream_put_bits(&enc, 1, vui->flags.vui_poc_proportional_to_timing_flag); + if (vui->flags.vui_poc_proportional_to_timing_flag) + vl_bitstream_exp_golomb_ue(&enc, vui->vui_num_ticks_poc_diff_one_minus1); + vl_bitstream_put_bits(&enc, 1, 0);//vui->flags.vui_hrd_parameters_present_flag); + // HRD + } + + vl_bitstream_put_bits(&enc, 1, vui->flags.bitstream_restriction_flag); + if (vui->flags.bitstream_restriction_flag) { + vl_bitstream_put_bits(&enc, 1, vui->flags.tiles_fixed_structure_flag); + vl_bitstream_put_bits(&enc, 1, vui->flags.motion_vectors_over_pic_boundaries_flag); + vl_bitstream_put_bits(&enc, 1, vui->flags.restricted_ref_pic_lists_flag); + vl_bitstream_exp_golomb_ue(&enc, vui->min_spatial_segmentation_idc); + vl_bitstream_exp_golomb_ue(&enc, vui->max_bytes_per_pic_denom); + vl_bitstream_exp_golomb_ue(&enc, vui->max_bits_per_min_cu_denom); + vl_bitstream_exp_golomb_ue(&enc, vui->log2_max_mv_length_horizontal); + vl_bitstream_exp_golomb_ue(&enc, vui->log2_max_mv_length_vertical); + } + } + + vl_bitstream_put_bits(&enc, 1, 0); /* sps extension flg */ + vl_bitstream_rbsp_trailing(&enc); + + vl_bitstream_flush(&enc); + *data_size_ptr += vl_bitstream_get_byte_count(&enc); + vl_bitstream_encoder_free(&enc); +} + +void +vk_video_encode_h265_pps(const StdVideoH265PictureParameterSet *pps, + size_t size_limit, + size_t *data_size_ptr, + void *data_ptr) +{ + struct vl_bitstream_encoder enc; + uint32_t data_size = *data_size_ptr; + + vl_bitstream_encoder_clear(&enc, data_ptr, data_size, size_limit); + + emit_nalu_h265_header(&enc, HEVC_NAL_PPS_NUT); + vl_bitstream_exp_golomb_ue(&enc, pps->pps_pic_parameter_set_id); + vl_bitstream_exp_golomb_ue(&enc, pps->pps_seq_parameter_set_id); + + vl_bitstream_put_bits(&enc, 1, pps->flags.dependent_slice_segments_enabled_flag); + + vl_bitstream_put_bits(&enc, 1, pps->flags.output_flag_present_flag); + vl_bitstream_put_bits(&enc, 3, pps->num_extra_slice_header_bits); + + vl_bitstream_put_bits(&enc, 1, pps->flags.sign_data_hiding_enabled_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.cabac_init_present_flag); + + vl_bitstream_exp_golomb_ue(&enc, pps->num_ref_idx_l0_default_active_minus1); + vl_bitstream_exp_golomb_ue(&enc, pps->num_ref_idx_l1_default_active_minus1); + + vl_bitstream_exp_golomb_se(&enc, pps->init_qp_minus26); + + vl_bitstream_put_bits(&enc, 1, pps->flags.constrained_intra_pred_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.transform_skip_enabled_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.cu_qp_delta_enabled_flag); + + if (pps->flags.cu_qp_delta_enabled_flag) + vl_bitstream_exp_golomb_ue(&enc, pps->diff_cu_qp_delta_depth); + + vl_bitstream_exp_golomb_se(&enc, pps->pps_cb_qp_offset); + vl_bitstream_exp_golomb_se(&enc, pps->pps_cr_qp_offset); + + vl_bitstream_put_bits(&enc, 1, pps->flags.pps_slice_chroma_qp_offsets_present_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.weighted_pred_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.weighted_bipred_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.transquant_bypass_enabled_flag); + + vl_bitstream_put_bits(&enc, 1, pps->flags.tiles_enabled_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.entropy_coding_sync_enabled_flag); + + assert (!pps->flags.tiles_enabled_flag); + + vl_bitstream_put_bits(&enc, 1, pps->flags.pps_loop_filter_across_slices_enabled_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.deblocking_filter_control_present_flag); + + if (pps->flags.deblocking_filter_control_present_flag) { + vl_bitstream_put_bits(&enc, 1, pps->flags.deblocking_filter_override_enabled_flag); + vl_bitstream_put_bits(&enc, 1, pps->flags.pps_deblocking_filter_disabled_flag); + if (!pps->flags.pps_deblocking_filter_disabled_flag) { + vl_bitstream_exp_golomb_se(&enc, pps->pps_beta_offset_div2); + vl_bitstream_exp_golomb_se(&enc, pps->pps_tc_offset_div2); + } + } + + vl_bitstream_put_bits(&enc, 1, pps->flags.pps_scaling_list_data_present_flag); + assert (!pps->flags.pps_scaling_list_data_present_flag); + + vl_bitstream_put_bits(&enc, 1, pps->flags.lists_modification_present_flag); + vl_bitstream_exp_golomb_ue(&enc, pps->log2_parallel_merge_level_minus2); + vl_bitstream_put_bits(&enc, 1, pps->flags.slice_segment_header_extension_present_flag); + + vl_bitstream_put_bits(&enc, 1, 0); /* pps extension flag */ + vl_bitstream_rbsp_trailing(&enc); + + vl_bitstream_flush(&enc); + *data_size_ptr += vl_bitstream_get_byte_count(&enc); + vl_bitstream_encoder_free(&enc); +} diff --git a/src/vulkan/runtime/vk_video.h b/src/vulkan/runtime/vk_video.h new file mode 100644 index 00000000000..8eb8814a81c --- /dev/null +++ b/src/vulkan/runtime/vk_video.h @@ -0,0 +1,348 @@ +/* + * Copyright © 2021 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_VIDEO_H +#define VK_VIDEO_H + +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_video_h264_sps { + StdVideoH264SequenceParameterSet base; + int32_t offsets_for_ref_frame[256]; + StdVideoH264ScalingLists scaling_lists; + StdVideoH264SequenceParameterSetVui vui; + StdVideoH264HrdParameters vui_hrd_parameters; +}; + +struct vk_video_h264_pps { + StdVideoH264PictureParameterSet base; + StdVideoH264ScalingLists scaling_lists; +}; + +struct vk_video_h265_vps { + StdVideoH265VideoParameterSet base; + StdVideoH265DecPicBufMgr dec_pic_buf_mgr; + StdVideoH265SubLayerHrdParameters hrd_parameters_nal; + StdVideoH265SubLayerHrdParameters hrd_parameters_vcl; + StdVideoH265HrdParameters hrd_parameters; + StdVideoH265ProfileTierLevel tier_level; +}; + +struct vk_video_h265_sps { + StdVideoH265SequenceParameterSet base; + StdVideoH265ProfileTierLevel tier_level; + StdVideoH265DecPicBufMgr dec_pic_buf_mgr; + StdVideoH265ScalingLists scaling_lists; + StdVideoH265ShortTermRefPicSet short_term_ref_pic_set; + StdVideoH265LongTermRefPicsSps long_term_ref_pics_sps; + StdVideoH265SubLayerHrdParameters hrd_parameters_nal; + StdVideoH265SubLayerHrdParameters hrd_parameters_vcl; + StdVideoH265HrdParameters hrd_parameters; + StdVideoH265SequenceParameterSetVui vui; + StdVideoH265PredictorPaletteEntries palette_entries; +}; + +struct vk_video_h265_pps { + StdVideoH265PictureParameterSet base; + StdVideoH265ScalingLists scaling_lists; + StdVideoH265PredictorPaletteEntries palette_entries; +}; + +struct vk_video_av1_seq_hdr { + StdVideoAV1SequenceHeader base; + StdVideoAV1ColorConfig color_config; + StdVideoAV1TimingInfo timing_info; +}; + +struct vk_video_session { + struct vk_object_base base; + VkVideoSessionCreateFlagsKHR flags; + VkVideoCodecOperationFlagsKHR op; + VkExtent2D max_coded; + VkFormat picture_format; + VkFormat ref_format; + uint32_t max_dpb_slots; + uint32_t max_active_ref_pics; + + struct { + VkVideoEncodeUsageFlagsKHR video_usage_hints; + VkVideoEncodeContentFlagsKHR video_content_hints; + VkVideoEncodeTuningModeKHR tuning_mode; + } enc_usage; + union { + struct { + StdVideoH264ProfileIdc profile_idc; + } h264; + struct { + StdVideoH265ProfileIdc profile_idc; + } h265; + struct { + StdVideoAV1Profile profile; + int film_grain_support; + } av1; + }; +}; + +struct vk_video_session_parameters { + struct vk_object_base base; + VkVideoCodecOperationFlagsKHR op; + union { + struct { + uint32_t max_h264_sps_count; + uint32_t max_h264_pps_count; + + uint32_t h264_sps_count; + struct vk_video_h264_sps *h264_sps; + uint32_t h264_pps_count; + struct vk_video_h264_pps *h264_pps; + } h264_dec; + + struct { + uint32_t max_h265_vps_count; + uint32_t max_h265_sps_count; + uint32_t max_h265_pps_count; + + uint32_t h265_vps_count; + struct vk_video_h265_vps *h265_vps; + uint32_t h265_sps_count; + struct vk_video_h265_sps *h265_sps; + uint32_t h265_pps_count; + struct vk_video_h265_pps *h265_pps; + } h265_dec; + + struct { + struct vk_video_av1_seq_hdr seq_hdr; + } av1_dec; + + struct { + uint32_t max_h264_sps_count; + uint32_t max_h264_pps_count; + + uint32_t h264_sps_count; + struct vk_video_h264_sps *h264_sps; + uint32_t h264_pps_count; + struct vk_video_h264_pps *h264_pps; + StdVideoH264ProfileIdc profile_idc; + } h264_enc; + + struct { + uint32_t max_h265_vps_count; + uint32_t max_h265_sps_count; + uint32_t max_h265_pps_count; + + uint32_t h265_vps_count; + struct vk_video_h265_vps *h265_vps; + uint32_t h265_sps_count; + struct vk_video_h265_sps *h265_sps; + uint32_t h265_pps_count; + struct vk_video_h265_pps *h265_pps; + } h265_enc; + }; +}; + +VkResult vk_video_session_init(struct vk_device *device, + struct vk_video_session *vid, + const VkVideoSessionCreateInfoKHR *create_info); + +VkResult vk_video_session_parameters_init(struct vk_device *device, + struct vk_video_session_parameters *params, + const struct vk_video_session *vid, + const struct vk_video_session_parameters *templ, + const VkVideoSessionParametersCreateInfoKHR *create_info); + +VkResult vk_video_session_parameters_update(struct vk_video_session_parameters *params, + const VkVideoSessionParametersUpdateInfoKHR *update); + +void vk_video_session_parameters_finish(struct vk_device *device, + struct vk_video_session_parameters *params); + +void vk_video_derive_h264_scaling_list(const StdVideoH264SequenceParameterSet *sps, + const StdVideoH264PictureParameterSet *pps, + StdVideoH264ScalingLists *list); + +const StdVideoH264SequenceParameterSet * +vk_video_find_h264_dec_std_sps(const struct vk_video_session_parameters *params, + uint32_t id); +const StdVideoH264PictureParameterSet * +vk_video_find_h264_dec_std_pps(const struct vk_video_session_parameters *params, + uint32_t id); +const StdVideoH265VideoParameterSet * +vk_video_find_h265_dec_std_vps(const struct vk_video_session_parameters *params, + uint32_t id); +const StdVideoH265SequenceParameterSet * +vk_video_find_h265_dec_std_sps(const struct vk_video_session_parameters *params, + uint32_t id); +const StdVideoH265PictureParameterSet * +vk_video_find_h265_dec_std_pps(const struct vk_video_session_parameters *params, + uint32_t id); + +struct vk_video_h265_slice_params { + uint32_t slice_size; + + uint8_t first_slice_segment_in_pic_flag; + StdVideoH265SliceType slice_type; + uint8_t dependent_slice_segment; + uint8_t temporal_mvp_enable; + uint8_t loop_filter_across_slices_enable; + int32_t pic_order_cnt_lsb; + uint8_t sao_luma_flag; + uint8_t sao_chroma_flag; + uint8_t collocated_list; + uint32_t collocated_ref_idx; + uint8_t mvd_l1_zero_flag; + + uint8_t num_ref_idx_l0_active; + uint8_t num_ref_idx_l1_active; + uint8_t rpl_modification_flag[2]; + uint8_t cabac_init_idc; + int8_t slice_qp_delta; + int8_t slice_cb_qp_offset; + int8_t slice_cr_qp_offset; + int8_t max_num_merge_cand; + uint32_t slice_data_bytes_offset; + uint8_t disable_deblocking_filter_idc; + int8_t tc_offset_div2; + int8_t beta_offset_div2; + uint32_t slice_segment_address; + + uint8_t luma_log2_weight_denom; + uint8_t chroma_log2_weight_denom; + uint8_t luma_weight_l0_flag[16]; + int16_t luma_weight_l0[16]; + int16_t luma_offset_l0[16]; + uint8_t chroma_weight_l0_flag[16]; + int16_t chroma_weight_l0[16][2]; + int16_t chroma_offset_l0[16][2]; + uint8_t luma_weight_l1_flag[16]; + int16_t luma_weight_l1[16]; + int16_t luma_offset_l1[16]; + uint8_t chroma_weight_l1_flag[16]; + int16_t chroma_weight_l1[16][2]; + int16_t chroma_offset_l1[16][2]; + + int8_t delta_luma_weight_l0[16]; + int8_t delta_luma_weight_l1[16]; + int8_t delta_chroma_weight_l0[16][2]; + int8_t delta_chroma_weight_l1[16][2]; + int16_t delta_chroma_offset_l0[16][2]; + int16_t delta_chroma_offset_l1[16][2]; +}; + +void +vk_video_parse_h265_slice_header(const struct VkVideoDecodeInfoKHR *frame_info, + const VkVideoDecodeH265PictureInfoKHR *pic_info, + const StdVideoH265SequenceParameterSet *sps, + const StdVideoH265PictureParameterSet *pps, + void *slice_data, + uint32_t slice_size, + struct vk_video_h265_slice_params *params); + + +struct vk_video_h265_reference { + const VkVideoPictureResourceInfoKHR *pPictureResource; + StdVideoDecodeH265ReferenceInfoFlags flags; + uint32_t slot_index; + int32_t pic_order_cnt; +}; + +int vk_video_h265_poc_by_slot(const struct VkVideoDecodeInfoKHR *frame_info, int slot); + +void vk_fill_video_h265_reference_info(const VkVideoDecodeInfoKHR *frame_info, + const struct VkVideoDecodeH265PictureInfoKHR *pic, + const struct vk_video_h265_slice_params *slice_params, + struct vk_video_h265_reference ref_slots[][8]); + +#define VK_VIDEO_H264_MACROBLOCK_WIDTH 16 +#define VK_VIDEO_H264_MACROBLOCK_HEIGHT 16 + +#define VK_VIDEO_H265_CTU_MAX_WIDTH 64 +#define VK_VIDEO_H265_CTU_MAX_HEIGHT 64 + +#define VK_VIDEO_AV1_BLOCK_WIDTH 128 +#define VK_VIDEO_AV1_BLOCK_HEIGHT 128 + +void +vk_video_get_profile_alignments(const VkVideoProfileListInfoKHR *profile_list, + uint32_t *width_align_out, uint32_t *height_align_out); + +uint8_t +vk_video_get_h264_level(StdVideoH264LevelIdc level); + +const StdVideoH264SequenceParameterSet * +vk_video_find_h264_enc_std_sps(const struct vk_video_session_parameters *params, + uint32_t id); +const StdVideoH264PictureParameterSet * +vk_video_find_h264_enc_std_pps(const struct vk_video_session_parameters *params, + uint32_t id); + +const StdVideoH265VideoParameterSet * +vk_video_find_h265_enc_std_vps(const struct vk_video_session_parameters *params, + uint32_t id); +const StdVideoH265SequenceParameterSet * +vk_video_find_h265_enc_std_sps(const struct vk_video_session_parameters *params, + uint32_t id); +const StdVideoH265PictureParameterSet * +vk_video_find_h265_enc_std_pps(const struct vk_video_session_parameters *params, + uint32_t id); + +void +vk_video_encode_h264_sps(const StdVideoH264SequenceParameterSet *sps, + size_t size_limit, + size_t *data_size_ptr, + void *data_ptr); + +void +vk_video_encode_h264_pps(const StdVideoH264PictureParameterSet *pps, + bool high_profile, + size_t size_limit, + size_t *data_size_ptr, + void *data_ptr); + +unsigned +vk_video_get_h265_nal_unit(const StdVideoEncodeH265PictureInfo *pic_info); + +void +vk_video_encode_h265_vps(const StdVideoH265VideoParameterSet *vps, + size_t size_limit, + size_t *data_size, + void *data_ptr); +void +vk_video_encode_h265_sps(const StdVideoH265SequenceParameterSet *sps, + size_t size_limit, + size_t* pDataSize, + void* pData); + +void +vk_video_encode_h265_pps(const StdVideoH265PictureParameterSet *pps, + size_t size_limit, + size_t *data_size, + void *data_ptr); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/vulkan/runtime/vk_ycbcr_conversion.c b/src/vulkan/runtime/vk_ycbcr_conversion.c new file mode 100644 index 00000000000..9c1da39c357 --- /dev/null +++ b/src/vulkan/runtime/vk_ycbcr_conversion.c @@ -0,0 +1,112 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vk_ycbcr_conversion.h" + +#include <vulkan/vulkan_android.h> + +#include "vk_common_entrypoints.h" +#include "vk_device.h" +#include "vk_format.h" +#include "vk_util.h" + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateSamplerYcbcrConversion(VkDevice _device, + const VkSamplerYcbcrConversionCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSamplerYcbcrConversion *pYcbcrConversion) +{ + VK_FROM_HANDLE(vk_device, device, _device); + struct vk_ycbcr_conversion *conversion; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO); + + conversion = vk_object_zalloc(device, pAllocator, sizeof(*conversion), + VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION); + if (!conversion) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + struct vk_ycbcr_conversion_state *state = &conversion->state; + + state->format = pCreateInfo->format; + state->ycbcr_model = pCreateInfo->ycbcrModel; + state->ycbcr_range = pCreateInfo->ycbcrRange; + + /* Search for VkExternalFormatANDROID and resolve the format. */ + const VkExternalFormatANDROID *android_ext_info = + vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_FORMAT_ANDROID); + + /* We assume that Android externalFormat is just a VkFormat */ + if (android_ext_info && android_ext_info->externalFormat) { + assert(pCreateInfo->format == VK_FORMAT_UNDEFINED); + state->format = android_ext_info->externalFormat; + } else { + /* The Vulkan 1.1.95 spec says: + * + * "When creating an external format conversion, the value of + * components if ignored." + */ + state->mapping[0] = pCreateInfo->components.r; + state->mapping[1] = pCreateInfo->components.g; + state->mapping[2] = pCreateInfo->components.b; + state->mapping[3] = pCreateInfo->components.a; + } + + state->chroma_offsets[0] = pCreateInfo->xChromaOffset; + state->chroma_offsets[1] = pCreateInfo->yChromaOffset; + state->chroma_filter = pCreateInfo->chromaFilter; + + const struct vk_format_ycbcr_info *ycbcr_info = + vk_format_get_ycbcr_info(state->format); + + bool has_chroma_subsampled = false; + if (ycbcr_info) { + for (uint32_t p = 0; p < ycbcr_info->n_planes; p++) { + if (ycbcr_info->planes[p].has_chroma && + (ycbcr_info->planes[p].denominator_scales[0] > 1 || + ycbcr_info->planes[p].denominator_scales[1] > 1)) + has_chroma_subsampled = true; + } + } + state->chroma_reconstruction = has_chroma_subsampled && + (state->chroma_offsets[0] == VK_CHROMA_LOCATION_COSITED_EVEN || + state->chroma_offsets[1] == VK_CHROMA_LOCATION_COSITED_EVEN); + + *pYcbcrConversion = vk_ycbcr_conversion_to_handle(conversion); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_DestroySamplerYcbcrConversion(VkDevice _device, + VkSamplerYcbcrConversion YcbcrConversion, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_ycbcr_conversion, conversion, YcbcrConversion); + + if (!conversion) + return; + + vk_object_free(device, pAllocator, conversion); +} diff --git a/src/vulkan/runtime/vk_ycbcr_conversion.h b/src/vulkan/runtime/vk_ycbcr_conversion.h new file mode 100644 index 00000000000..cc4ed3eb22b --- /dev/null +++ b/src/vulkan/runtime/vk_ycbcr_conversion.h @@ -0,0 +1,55 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_YCBCR_CONVERSION_H +#define VK_YCBCR_CONVERSION_H + +#include "vk_object.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct vk_ycbcr_conversion_state { + VkFormat format; + VkSamplerYcbcrModelConversion ycbcr_model; + VkSamplerYcbcrRange ycbcr_range; + VkComponentSwizzle mapping[4]; + VkChromaLocation chroma_offsets[2]; + VkFilter chroma_filter; + bool chroma_reconstruction; +}; + +struct vk_ycbcr_conversion { + struct vk_object_base base; + struct vk_ycbcr_conversion_state state; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(vk_ycbcr_conversion, base, + VkSamplerYcbcrConversion, + VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION) + +#ifdef __cplusplus +} +#endif + +#endif /* VK_YCBCR_CONVERSION_H */ |