diff options
Diffstat (limited to 'src/broadcom/vulkan/v3dv_device.c')
-rw-r--r-- | src/broadcom/vulkan/v3dv_device.c | 2583 |
1 files changed, 1449 insertions, 1134 deletions
diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index fec53ec38c5..827e2ce2944 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -1,5 +1,5 @@ /* - * Copyright © 2019 Raspberry Pi + * Copyright © 2019 Raspberry Pi Ltd * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -44,12 +44,18 @@ #include "compiler/v3d_compiler.h" #include "drm-uapi/v3d_drm.h" -#include "format/u_format.h" +#include "vk_drm_syncobj.h" #include "vk_util.h" +#include "git_sha1.h" #include "util/build_id.h" -#include "util/debug.h" -#include "util/u_cpu_detect.h" +#include "util/os_file.h" +#include "util/u_debug.h" +#include "util/format/u_format.h" + +#if DETECT_OS_ANDROID +#include "vk_android.h" +#endif #ifdef VK_USE_PLATFORM_XCB_KHR #include <xcb/xcb.h> @@ -62,11 +68,15 @@ #include "wayland-drm-client-protocol.h" #endif -#ifdef USE_V3D_SIMULATOR -#include "drm-uapi/i915_drm.h" -#endif +#define V3DV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION) -#define V3DV_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION) +#ifdef ANDROID_STRICT +#if ANDROID_API_LEVEL <= 32 +/* Android 12.1 and lower support only Vulkan API v1.1 */ +#undef V3DV_API_VERSION +#define V3DV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION) +#endif +#endif VKAPI_ATTR VkResult VKAPI_CALL v3dv_EnumerateInstanceVersion(uint32_t *pApiVersion) @@ -75,25 +85,32 @@ v3dv_EnumerateInstanceVersion(uint32_t *pApiVersion) return VK_SUCCESS; } -#define V3DV_HAS_SURFACE (VK_USE_PLATFORM_WIN32_KHR || \ - VK_USE_PLATFORM_WAYLAND_KHR || \ - VK_USE_PLATFORM_XCB_KHR || \ - VK_USE_PLATFORM_XLIB_KHR || \ - VK_USE_PLATFORM_DISPLAY_KHR) +#if defined(VK_USE_PLATFORM_WIN32_KHR) || \ + defined(VK_USE_PLATFORM_WAYLAND_KHR) || \ + defined(VK_USE_PLATFORM_XCB_KHR) || \ + defined(VK_USE_PLATFORM_XLIB_KHR) || \ + defined(VK_USE_PLATFORM_DISPLAY_KHR) +#define V3DV_USE_WSI_PLATFORM +#endif static const struct vk_instance_extension_table instance_extensions = { .KHR_device_group_creation = true, #ifdef VK_USE_PLATFORM_DISPLAY_KHR .KHR_display = true, + .KHR_get_display_properties2 = true, + .EXT_direct_mode_display = true, + .EXT_acquire_drm_display = true, #endif .KHR_external_fence_capabilities = true, .KHR_external_memory_capabilities = true, .KHR_external_semaphore_capabilities = true, - .KHR_get_display_properties2 = true, .KHR_get_physical_device_properties2 = true, -#ifdef V3DV_HAS_SURFACE +#ifdef V3DV_USE_WSI_PLATFORM .KHR_get_surface_capabilities2 = true, .KHR_surface = true, + .KHR_surface_protected_capabilities = true, + .EXT_surface_maintenance1 = true, + .EXT_swapchain_colorspace = true, #endif #ifdef VK_USE_PLATFORM_WAYLAND_KHR .KHR_wayland_surface = true, @@ -104,7 +121,14 @@ static const struct vk_instance_extension_table instance_extensions = { #ifdef VK_USE_PLATFORM_XLIB_KHR .KHR_xlib_surface = true, #endif +#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT + .EXT_acquire_xlib_display = true, +#endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, +#endif .EXT_debug_report = true, + .EXT_debug_utils = true, }; static void @@ -112,43 +136,350 @@ get_device_extensions(const struct v3dv_physical_device *device, struct vk_device_extension_table *ext) { *ext = (struct vk_device_extension_table) { - .KHR_bind_memory2 = true, - .KHR_copy_commands2 = true, - .KHR_dedicated_allocation = true, - .KHR_device_group = true, - .KHR_descriptor_update_template = true, - .KHR_external_fence = true, - .KHR_external_fence_fd = true, - .KHR_external_memory = true, - .KHR_external_memory_fd = true, - .KHR_external_semaphore = true, - .KHR_external_semaphore_fd = true, - .KHR_get_memory_requirements2 = true, - .KHR_image_format_list = true, - .KHR_relaxed_block_layout = true, - .KHR_maintenance1 = true, - .KHR_maintenance2 = true, - .KHR_maintenance3 = true, - .KHR_multiview = true, - .KHR_shader_non_semantic_info = true, - .KHR_sampler_mirror_clamp_to_edge = true, - .KHR_storage_buffer_storage_class = true, - .KHR_uniform_buffer_standard_layout = true, -#ifdef V3DV_HAS_SURFACE - .KHR_swapchain = true, - .KHR_incremental_present = true, + .KHR_8bit_storage = true, + .KHR_16bit_storage = true, + .KHR_bind_memory2 = true, + .KHR_buffer_device_address = true, + .KHR_copy_commands2 = true, + .KHR_create_renderpass2 = true, + .KHR_dedicated_allocation = true, + .KHR_device_group = true, + .KHR_driver_properties = true, + .KHR_descriptor_update_template = true, + .KHR_depth_stencil_resolve = true, + .KHR_dynamic_rendering = true, + .KHR_external_fence = true, + .KHR_external_fence_fd = true, + .KHR_external_memory = true, + .KHR_external_memory_fd = true, + .KHR_external_semaphore = true, + .KHR_external_semaphore_fd = true, + .KHR_format_feature_flags2 = true, + .KHR_get_memory_requirements2 = true, + .KHR_image_format_list = true, + .KHR_imageless_framebuffer = true, + .KHR_index_type_uint8 = true, + .KHR_line_rasterization = true, + .KHR_load_store_op_none = true, + .KHR_performance_query = device->caps.perfmon, + .KHR_relaxed_block_layout = true, + .KHR_maintenance1 = true, + .KHR_maintenance2 = true, + .KHR_maintenance3 = true, + .KHR_maintenance4 = true, + .KHR_multiview = true, + .KHR_pipeline_executable_properties = true, + .KHR_separate_depth_stencil_layouts = true, + .KHR_shader_expect_assume = true, + .KHR_shader_float_controls = true, + .KHR_shader_non_semantic_info = true, + .KHR_sampler_mirror_clamp_to_edge = true, + .KHR_sampler_ycbcr_conversion = true, + .KHR_spirv_1_4 = true, + .KHR_storage_buffer_storage_class = true, + .KHR_timeline_semaphore = true, + .KHR_uniform_buffer_standard_layout = true, + .KHR_shader_integer_dot_product = true, + .KHR_shader_terminate_invocation = true, + .KHR_synchronization2 = true, + .KHR_workgroup_memory_explicit_layout = true, +#ifdef V3DV_USE_WSI_PLATFORM + .KHR_swapchain = true, + .KHR_swapchain_mutable_format = true, + .KHR_incremental_present = true, +#endif + .KHR_variable_pointers = true, + .KHR_vertex_attribute_divisor = true, + .KHR_vulkan_memory_model = true, + .KHR_zero_initialize_workgroup_memory = true, + .EXT_4444_formats = true, + .EXT_attachment_feedback_loop_layout = true, + .EXT_border_color_swizzle = true, + .EXT_color_write_enable = true, + .EXT_custom_border_color = true, + .EXT_depth_clip_control = true, + .EXT_depth_clip_enable = device->devinfo.ver >= 71, + .EXT_load_store_op_none = true, + .EXT_inline_uniform_block = true, + .EXT_external_memory_dma_buf = true, + .EXT_host_query_reset = true, + .EXT_image_drm_format_modifier = true, + .EXT_image_robustness = true, + .EXT_index_type_uint8 = true, + .EXT_line_rasterization = true, + .EXT_memory_budget = true, + .EXT_multi_draw = true, + .EXT_physical_device_drm = true, + .EXT_pipeline_creation_cache_control = true, + .EXT_pipeline_creation_feedback = true, + .EXT_pipeline_robustness = true, + .EXT_primitive_topology_list_restart = true, + .EXT_private_data = true, + .EXT_provoking_vertex = true, + .EXT_separate_stencil_usage = true, + .EXT_shader_demote_to_helper_invocation = true, + .EXT_shader_module_identifier = true, + .EXT_subgroup_size_control = true, +#ifdef V3DV_USE_WSI_PLATFORM + .EXT_swapchain_maintenance1 = true, +#endif + .EXT_texel_buffer_alignment = true, + .EXT_tooling_info = true, + .EXT_vertex_attribute_divisor = true, +#if DETECT_OS_ANDROID + .ANDROID_external_memory_android_hardware_buffer = true, + .ANDROID_native_buffer = true, + .EXT_queue_family_foreign = true, +#endif + }; +} + +static void +get_features(const struct v3dv_physical_device *physical_device, + struct vk_features *features) +{ + *features = (struct vk_features) { + /* Vulkan 1.0 */ + .robustBufferAccess = true, /* This feature is mandatory */ + .fullDrawIndexUint32 = physical_device->devinfo.ver >= 71, + .imageCubeArray = true, + .independentBlend = true, + .geometryShader = true, + .tessellationShader = false, + .sampleRateShading = true, + .dualSrcBlend = false, + .logicOp = true, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = true, + .depthClamp = physical_device->devinfo.ver >= 71, + .depthBiasClamp = true, + .fillModeNonSolid = true, + .depthBounds = physical_device->devinfo.ver >= 71, + .wideLines = true, + .largePoints = true, + .alphaToOne = true, + .multiViewport = false, + .samplerAnisotropy = true, + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = true, + /* Note that textureCompressionBC requires that the driver support all + * the BC formats. V3D 4.2 only support the BC1-3, so we can't claim + * that we support it. + */ + .textureCompressionBC = false, + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = false, + .vertexPipelineStoresAndAtomics = true, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = true, + .shaderImageGatherExtended = true, + .shaderStorageImageExtendedFormats = true, + .shaderStorageImageMultisample = false, + .shaderStorageImageReadWithoutFormat = true, + .shaderStorageImageWriteWithoutFormat = false, + .shaderUniformBufferArrayDynamicIndexing = false, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderClipDistance = true, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .shaderResourceResidency = false, + .shaderResourceMinLod = false, + .sparseBinding = false, + .sparseResidencyBuffer = false, + .sparseResidencyImage2D = false, + .sparseResidencyImage3D = false, + .sparseResidency2Samples = false, + .sparseResidency4Samples = false, + .sparseResidency8Samples = false, + .sparseResidency16Samples = false, + .sparseResidencyAliased = false, + .variableMultisampleRate = false, + .inheritedQueries = true, + + /* Vulkan 1.1 */ + .storageBuffer16BitAccess = true, + .uniformAndStorageBuffer16BitAccess = true, + .storagePushConstant16 = true, + .storageInputOutput16 = false, + .multiview = true, + .multiviewGeometryShader = false, + .multiviewTessellationShader = false, + .variablePointersStorageBuffer = true, + /* FIXME: this needs support for non-constant index on UBO/SSBO */ + .variablePointers = false, + .protectedMemory = false, + .samplerYcbcrConversion = true, + .shaderDrawParameters = false, + + /* Vulkan 1.2 */ + .hostQueryReset = true, + .uniformAndStorageBuffer8BitAccess = true, + .uniformBufferStandardLayout = true, + /* V3D 4.2 wraps TMU vector accesses to 16-byte boundaries, so loads and + * stores of vectors that cross these boundaries would not work correctly + * with scalarBlockLayout and would need to be split into smaller vectors + * (and/or scalars) that don't cross these boundaries. For load/stores + * with dynamic offsets where we can't identify if the offset is + * problematic, we would always have to scalarize. Overall, this would + * not lead to best performance so let's just not support it. + */ + .scalarBlockLayout = physical_device->devinfo.ver >= 71, + /* This tells applications 2 things: + * + * 1. If they can select just one aspect for barriers. For us barriers + * decide if we need to split a job and we don't care if it is only + * for one of the aspects of the image or both, so we don't really + * benefit from seeing barriers that select just one aspect. + * + * 2. If they can program different layouts for each aspect. We + * generally don't care about layouts, so again, we don't get any + * benefits from this to limit the scope of image layout transitions. + * + * Still, Vulkan 1.2 requires this feature to be supported so we + * advertise it even though we don't really take advantage of it. + */ + .separateDepthStencilLayouts = true, + .storageBuffer8BitAccess = true, + .storagePushConstant8 = true, + .imagelessFramebuffer = true, + .timelineSemaphore = true, + + .samplerMirrorClampToEdge = true, + + /* Extended subgroup types is mandatory by Vulkan 1.2, however, it is + * only in effect if the implementation supports non 32-bit types, which + * we don't, so in practice setting it to true doesn't have any + * implications for us. + */ + .shaderSubgroupExtendedTypes = true, + .subgroupBroadcastDynamicId = true, + + .vulkanMemoryModel = true, + .vulkanMemoryModelDeviceScope = true, + .vulkanMemoryModelAvailabilityVisibilityChains = true, + + .bufferDeviceAddress = true, + .bufferDeviceAddressCaptureReplay = false, + .bufferDeviceAddressMultiDevice = false, + + /* Vulkan 1.3 */ + .inlineUniformBlock = true, + /* Inline buffers work like push constants, so after their are bound + * some of their contents may be copied into the uniform stream as soon + * as the next draw/dispatch is recorded in the command buffer. This means + * that if the client updates the buffer contents after binding it to + * a command buffer, the next queue submit of that command buffer may + * not use the latest update to the buffer contents, but the data that + * was present in the buffer at the time it was bound to the command + * buffer. + */ + .descriptorBindingInlineUniformBlockUpdateAfterBind = false, + .pipelineCreationCacheControl = true, + .privateData = true, + .maintenance4 = true, + .shaderZeroInitializeWorkgroupMemory = true, + .synchronization2 = true, + .robustImageAccess = true, + .shaderIntegerDotProduct = true, + + /* VK_EXT_4444_formats */ + .formatA4R4G4B4 = true, + .formatA4B4G4R4 = true, + + /* VK_EXT_custom_border_color */ + .customBorderColors = true, + .customBorderColorWithoutFormat = false, + + /* VK_EXT_index_type_uint8 */ + .indexTypeUint8 = true, + + /* VK_EXT_line_rasterization */ + .rectangularLines = true, + .bresenhamLines = true, + .smoothLines = true, + .stippledRectangularLines = false, + .stippledBresenhamLines = false, + .stippledSmoothLines = false, + + /* VK_EXT_color_write_enable */ + .colorWriteEnable = true, + + /* VK_KHR_pipeline_executable_properties */ + .pipelineExecutableInfo = true, + + /* VK_EXT_provoking_vertex */ + .provokingVertexLast = true, + /* FIXME: update when supporting EXT_transform_feedback */ + .transformFeedbackPreservesProvokingVertex = false, + + /* VK_EXT_vertex_attribute_divisor */ + .vertexAttributeInstanceRateDivisor = true, + .vertexAttributeInstanceRateZeroDivisor = false, + + /* VK_KHR_performance_query */ + .performanceCounterQueryPools = physical_device->caps.perfmon, + .performanceCounterMultipleQueryPools = false, + + /* VK_EXT_texel_buffer_alignment */ + .texelBufferAlignment = true, + + /* VK_KHR_workgroup_memory_explicit_layout */ + .workgroupMemoryExplicitLayout = true, + .workgroupMemoryExplicitLayoutScalarBlockLayout = false, + .workgroupMemoryExplicitLayout8BitAccess = true, + .workgroupMemoryExplicitLayout16BitAccess = true, + + /* VK_EXT_border_color_swizzle */ + .borderColorSwizzle = true, + .borderColorSwizzleFromImage = true, + + /* VK_EXT_shader_module_identifier */ + .shaderModuleIdentifier = true, + + /* VK_EXT_depth_clip_control */ + .depthClipControl = true, + + /* VK_EXT_depth_clip_enable */ + .depthClipEnable = physical_device->devinfo.ver >= 71, + + /* VK_EXT_attachment_feedback_loop_layout */ + .attachmentFeedbackLoopLayout = true, + + /* VK_EXT_primitive_topology_list_restart */ + .primitiveTopologyListRestart = true, + /* FIXME: we don't support tessellation shaders yet */ + .primitiveTopologyPatchListRestart = false, + + /* VK_EXT_pipeline_robustness */ + .pipelineRobustness = true, + + /* VK_EXT_multi_draw */ + .multiDraw = true, + + /* VK_KHR_shader_terminate_invocation */ + .shaderTerminateInvocation = true, + + /* VK_EXT_shader_demote_to_helper_invocation */ + .shaderDemoteToHelperInvocation = true, + + /* VK_EXT_subgroup_size_control */ + .subgroupSizeControl = true, + .computeFullSubgroups = true, + + /* VK_KHR_shader_expect_assume */ + .shaderExpectAssume = true, + + /* VK_KHR_dynamic_rendering */ + .dynamicRendering = true, + +#ifdef V3DV_USE_WSI_PLATFORM + /* VK_EXT_swapchain_maintenance1 */ + .swapchainMaintenance1 = true, #endif - .KHR_variable_pointers = true, - .EXT_color_write_enable = true, - .EXT_custom_border_color = true, - .EXT_external_memory_dma_buf = true, - .EXT_index_type_uint8 = true, - .EXT_physical_device_drm = true, - .EXT_pipeline_creation_cache_control = true, - .EXT_pipeline_creation_feedback = true, - .EXT_private_data = true, - .EXT_provoking_vertex = true, - .EXT_vertex_attribute_divisor = true, }; } @@ -165,6 +496,10 @@ v3dv_EnumerateInstanceExtensionProperties(const char *pLayerName, &instance_extensions, pPropertyCount, pProperties); } +static VkResult enumerate_devices(struct vk_instance *vk_instance); + +static void destroy_physical_device(struct vk_physical_device *device); + VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -186,6 +521,8 @@ v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, struct vk_instance_dispatch_table dispatch_table; vk_instance_dispatch_table_from_entrypoints( &dispatch_table, &v3dv_instance_entrypoints, true); + vk_instance_dispatch_table_from_entrypoints( + &dispatch_table, &wsi_instance_entrypoints, false); result = vk_instance_init(&instance->vk, &instance_extensions, @@ -194,12 +531,13 @@ v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, if (result != VK_SUCCESS) { vk_free(pAllocator, instance); - return vk_error(instance, result); + return vk_error(NULL, result); } v3d_process_debug_variable(); - instance->physicalDeviceCount = -1; + instance->vk.physical_devices.enumerate = enumerate_devices; + instance->vk.physical_devices.destroy = destroy_physical_device; /* We start with the default values for the pipeline_cache envvars */ instance->pipeline_cache_enabled = true; @@ -229,8 +567,6 @@ v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, } } - util_cpu_detect(); - VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); *pInstance = v3dv_instance_to_handle(instance); @@ -256,11 +592,11 @@ physical_device_finish(struct v3dv_physical_device *device) v3dv_physical_device_free_disk_cache(device); v3d_compiler_free(device->compiler); + util_sparse_array_finish(&device->bo_map); + close(device->render_fd); if (device->display_fd >= 0) close(device->display_fd); - if (device->master_fd >= 0) - close(device->master_fd); free(device->name); @@ -272,6 +608,13 @@ physical_device_finish(struct v3dv_physical_device *device) mtx_destroy(&device->mutex); } +static void +destroy_physical_device(struct vk_physical_device *device) +{ + physical_device_finish((struct v3dv_physical_device *)device); + vk_free(&device->instance->alloc, device); +} + VKAPI_ATTR void VKAPI_CALL v3dv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocator) @@ -281,12 +624,6 @@ v3dv_DestroyInstance(VkInstance _instance, if (!instance) return; - if (instance->physicalDeviceCount > 0) { - /* We support at most one physical device. */ - assert(instance->physicalDeviceCount == 1); - physical_device_finish(&instance->physicalDevice); - } - VG(VALGRIND_DESTROY_MEMPOOL(instance)); vk_instance_finish(&instance->vk); @@ -306,286 +643,39 @@ compute_heap_size() uint64_t total_ram = (uint64_t) v3d_simulator_get_mem_size(); #endif - /* We don't want to burn too much ram with the GPU. If the user has 4GiB - * or less, we use at most half. If they have more than 4GiB, we use 3/4. + /* We don't want to burn too much ram with the GPU. If the user has 4GB + * or less, we use at most half. If they have more than 4GB we limit it + * to 3/4 with a max. of 4GB since the GPU cannot address more than that. */ - uint64_t available_ram; - if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull) - available_ram = total_ram / 2; + const uint64_t MAX_HEAP_SIZE = 4ull * 1024ull * 1024ull * 1024ull; + uint64_t available; + if (total_ram <= MAX_HEAP_SIZE) + available = total_ram / 2; else - available_ram = total_ram * 3 / 4; - - return available_ram; -} - -#if !using_v3d_simulator -#ifdef VK_USE_PLATFORM_XCB_KHR -static int -create_display_fd_xcb(VkIcdSurfaceBase *surface) -{ - int fd = -1; - - xcb_connection_t *conn; - xcb_dri3_open_reply_t *reply = NULL; - if (surface) { - if (surface->platform == VK_ICD_WSI_PLATFORM_XLIB) - conn = XGetXCBConnection(((VkIcdSurfaceXlib *)surface)->dpy); - else - conn = ((VkIcdSurfaceXcb *)surface)->connection; - } else { - conn = xcb_connect(NULL, NULL); - } - - if (xcb_connection_has_error(conn)) - goto finish; - - const xcb_setup_t *setup = xcb_get_setup(conn); - xcb_screen_iterator_t iter = xcb_setup_roots_iterator(setup); - xcb_screen_t *screen = iter.data; - - xcb_dri3_open_cookie_t cookie; - cookie = xcb_dri3_open(conn, screen->root, None); - reply = xcb_dri3_open_reply(conn, cookie, NULL); - if (!reply) - goto finish; - - if (reply->nfd != 1) - goto finish; - - fd = xcb_dri3_open_reply_fds(conn, reply)[0]; - fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); - -finish: - if (!surface) - xcb_disconnect(conn); - if (reply) - free(reply); - - return fd; -} -#endif - -#ifdef VK_USE_PLATFORM_WAYLAND_KHR -struct v3dv_wayland_info { - struct wl_drm *wl_drm; - int fd; - bool is_set; - bool authenticated; -}; + available = MIN2(MAX_HEAP_SIZE, total_ram * 3 / 4); -static void -v3dv_drm_handle_device(void *data, struct wl_drm *drm, const char *device) -{ - struct v3dv_wayland_info *info = data; - info->fd = open(device, O_RDWR | O_CLOEXEC); - info->is_set = info->fd != -1; - if (!info->is_set) { - fprintf(stderr, "v3dv_drm_handle_device: could not open %s (%s)\n", - device, strerror(errno)); - return; - } - - drm_magic_t magic; - if (drmGetMagic(info->fd, &magic)) { - fprintf(stderr, "v3dv_drm_handle_device: drmGetMagic failed\n"); - close(info->fd); - info->fd = -1; - info->is_set = false; - return; - } - wl_drm_authenticate(info->wl_drm, magic); -} - -static void -v3dv_drm_handle_format(void *data, struct wl_drm *drm, uint32_t format) -{ + return available; } -static void -v3dv_drm_handle_authenticated(void *data, struct wl_drm *drm) -{ - struct v3dv_wayland_info *info = data; - info->authenticated = true; -} - -static void -v3dv_drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t value) -{ -} - -struct wl_drm_listener v3dv_drm_listener = { - .device = v3dv_drm_handle_device, - .format = v3dv_drm_handle_format, - .authenticated = v3dv_drm_handle_authenticated, - .capabilities = v3dv_drm_handle_capabilities -}; - -static void -v3dv_registry_global(void *data, - struct wl_registry *registry, - uint32_t name, - const char *interface, - uint32_t version) -{ - struct v3dv_wayland_info *info = data; - if (strcmp(interface, "wl_drm") == 0) { - info->wl_drm = wl_registry_bind(registry, name, &wl_drm_interface, - MIN2(version, 2)); - wl_drm_add_listener(info->wl_drm, &v3dv_drm_listener, data); - }; -} - -static void -v3dv_registry_global_remove_cb(void *data, - struct wl_registry *registry, - uint32_t name) -{ -} - -static int -create_display_fd_wayland(VkIcdSurfaceBase *surface) -{ - struct wl_display *display; - struct wl_registry *registry = NULL; - - struct v3dv_wayland_info info = { - .wl_drm = NULL, - .fd = -1, - .is_set = false, - .authenticated = false - }; - - if (surface) - display = ((VkIcdSurfaceWayland *) surface)->display; - else - display = wl_display_connect(NULL); - - if (!display) - return -1; - - registry = wl_display_get_registry(display); - if (!registry) { - if (!surface) - wl_display_disconnect(display); - return -1; - } - - static const struct wl_registry_listener registry_listener = { - v3dv_registry_global, - v3dv_registry_global_remove_cb - }; - wl_registry_add_listener(registry, ®istry_listener, &info); - - wl_display_roundtrip(display); /* For the registry advertisement */ - wl_display_roundtrip(display); /* For the DRM device event */ - wl_display_roundtrip(display); /* For the authentication event */ - - wl_drm_destroy(info.wl_drm); - wl_registry_destroy(registry); - - if (!surface) - wl_display_disconnect(display); - - if (!info.is_set) - return -1; - - if (!info.authenticated) - return -1; - - return info.fd; -} -#endif - -/* Acquire an authenticated display fd without a surface reference. This is the - * case where the application is making WSI allocations outside the Vulkan - * swapchain context (only Zink, for now). Since we lack information about the - * underlying surface we just try our best to figure out the correct display - * and platform to use. It should work in most cases. - */ -static void -acquire_display_device_no_surface(struct v3dv_instance *instance, - struct v3dv_physical_device *pdevice) -{ -#ifdef VK_USE_PLATFORM_WAYLAND_KHR - pdevice->display_fd = create_display_fd_wayland(NULL); -#endif - -#ifdef VK_USE_PLATFORM_XCB_KHR - if (pdevice->display_fd == -1) - pdevice->display_fd = create_display_fd_xcb(NULL); -#endif - -#ifdef VK_USE_PLATFORM_DISPLAY_KHR - if (pdevice->display_fd == - 1 && pdevice->master_fd >= 0) - pdevice->display_fd = dup(pdevice->master_fd); -#endif -} - -/* Acquire an authenticated display fd from the surface. This is the regular - * case where the application is using swapchains to create WSI allocations. - * In this case we use the surface information to figure out the correct - * display and platform combination. - */ -static void -acquire_display_device_surface(struct v3dv_instance *instance, - struct v3dv_physical_device *pdevice, - VkIcdSurfaceBase *surface) -{ - /* Mesa will set both of VK_USE_PLATFORM_{XCB,XLIB} when building with - * platform X11, so only check for XCB and rely on XCB to get an - * authenticated device also for Xlib. - */ -#ifdef VK_USE_PLATFORM_XCB_KHR - if (surface->platform == VK_ICD_WSI_PLATFORM_XCB || - surface->platform == VK_ICD_WSI_PLATFORM_XLIB) { - pdevice->display_fd = create_display_fd_xcb(surface); - } -#endif - -#ifdef VK_USE_PLATFORM_WAYLAND_KHR - if (surface->platform == VK_ICD_WSI_PLATFORM_WAYLAND) - pdevice->display_fd = create_display_fd_wayland(surface); -#endif - -#ifdef VK_USE_PLATFORM_DISPLAY_KHR - if (surface->platform == VK_ICD_WSI_PLATFORM_DISPLAY && - pdevice->master_fd >= 0) { - pdevice->display_fd = dup(pdevice->master_fd); - } -#endif -} -#endif /* !using_v3d_simulator */ - -/* Attempts to get an authenticated display fd from the display server that - * we can use to allocate BOs for presentable images. - */ -VkResult -v3dv_physical_device_acquire_display(struct v3dv_instance *instance, - struct v3dv_physical_device *pdevice, - VkIcdSurfaceBase *surface) +static uint64_t +compute_memory_budget(struct v3dv_physical_device *device) { - VkResult result = VK_SUCCESS; - mtx_lock(&pdevice->mutex); - - if (pdevice->display_fd != -1) - goto done; - - /* When running on the simulator we do everything on a single render node so - * we don't need to get an authenticated display fd from the display server. - */ + uint64_t heap_size = device->memory.memoryHeaps[0].size; + uint64_t heap_used = device->heap_used; + uint64_t sys_available; #if !using_v3d_simulator - if (surface) - acquire_display_device_surface(instance, pdevice, surface); - else - acquire_display_device_no_surface(instance, pdevice); - - if (pdevice->display_fd == -1) - result = VK_ERROR_INITIALIZATION_FAILED; + ASSERTED bool has_available_memory = + os_get_available_system_memory(&sys_available); + assert(has_available_memory); +#else + sys_available = (uint64_t) v3d_simulator_get_mem_free(); #endif -done: - mtx_unlock(&pdevice->mutex); - return result; + /* Let's not incite the app to starve the system: report at most 90% of + * available system memory. + */ + uint64_t heap_available = sys_available * 9 / 10; + return MIN2(heap_size, heap_used + heap_available); } static bool @@ -604,7 +694,8 @@ device_has_expected_features(struct v3dv_physical_device *device) { return v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_TFU) && v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CSD) && - v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH); + v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH) && + device->caps.multisync; } @@ -614,14 +705,14 @@ init_uuids(struct v3dv_physical_device *device) const struct build_id_note *note = build_id_find_nhdr_for_addr(init_uuids); if (!note) { - return vk_errorf((struct v3dv_instance*) device->vk.instance, + return vk_errorf(device->vk.instance, VK_ERROR_INITIALIZATION_FAILED, "Failed to find build-id"); } unsigned build_id_len = build_id_length(note); if (build_id_len < 20) { - return vk_errorf((struct v3dv_instance*) device->vk.instance, + return vk_errorf(device->vk.instance, VK_ERROR_INITIALIZATION_FAILED, "build-id too short. It needs to be a SHA"); } @@ -672,38 +763,46 @@ v3dv_physical_device_init_disk_cache(struct v3dv_physical_device *device) _mesa_sha1_format(timestamp, device->driver_build_sha1); assert(device->name); - device->disk_cache = disk_cache_create(device->name, timestamp, 0); + device->disk_cache = disk_cache_create(device->name, timestamp, v3d_mesa_debug); #else device->disk_cache = NULL; #endif } static VkResult -physical_device_init(struct v3dv_physical_device *device, - struct v3dv_instance *instance, - drmDevicePtr drm_render_device, - drmDevicePtr drm_primary_device) +create_physical_device(struct v3dv_instance *instance, + drmDevicePtr gpu_device, + drmDevicePtr display_device) { VkResult result = VK_SUCCESS; - int32_t master_fd = -1; + int32_t display_fd = -1; int32_t render_fd = -1; + struct v3dv_physical_device *device = + vk_zalloc(&instance->vk.alloc, sizeof(*device), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + + if (!device) + return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + struct vk_physical_device_dispatch_table dispatch_table; vk_physical_device_dispatch_table_from_entrypoints (&dispatch_table, &v3dv_physical_device_entrypoints, true); + vk_physical_device_dispatch_table_from_entrypoints( + &dispatch_table, &wsi_physical_device_entrypoints, false); - result = vk_physical_device_init(&device->vk, &instance->vk, NULL, - &dispatch_table); + result = vk_physical_device_init(&device->vk, &instance->vk, NULL, NULL, + NULL, &dispatch_table); if (result != VK_SUCCESS) goto fail; - assert(drm_render_device); - const char *path = drm_render_device->nodes[DRM_NODE_RENDER]; + assert(gpu_device); + const char *path = gpu_device->nodes[DRM_NODE_RENDER]; render_fd = open(path, O_RDWR | O_CLOEXEC); if (render_fd < 0) { fprintf(stderr, "Opening %s failed: %s\n", path, strerror(errno)); - result = VK_ERROR_INCOMPATIBLE_DRIVER; + result = VK_ERROR_INITIALIZATION_FAILED; goto fail; } @@ -714,12 +813,12 @@ physical_device_init(struct v3dv_physical_device *device, const char *primary_path; #if !using_v3d_simulator - if (drm_primary_device) - primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY]; + if (display_device) + primary_path = display_device->nodes[DRM_NODE_PRIMARY]; else primary_path = NULL; #else - primary_path = drm_render_device->nodes[DRM_NODE_PRIMARY]; + primary_path = gpu_device->nodes[DRM_NODE_PRIMARY]; #endif struct stat primary_stat = {0}, render_stat = {0}; @@ -727,8 +826,7 @@ physical_device_init(struct v3dv_physical_device *device, device->has_primary = primary_path; if (device->has_primary) { if (stat(primary_path, &primary_stat) != 0) { - result = vk_errorf(instance, - VK_ERROR_INITIALIZATION_FAILED, + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM primary node %s", primary_path); goto fail; @@ -738,8 +836,7 @@ physical_device_init(struct v3dv_physical_device *device, } if (fstat(render_fd, &render_stat) != 0) { - result = vk_errorf(instance, - VK_ERROR_INITIALIZATION_FAILED, + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM render node %s", path); goto fail; @@ -747,16 +844,24 @@ physical_device_init(struct v3dv_physical_device *device, device->has_render = true; device->render_devid = render_stat.st_rdev; - if (instance->vk.enabled_extensions.KHR_display) { +#if using_v3d_simulator + device->device_id = gpu_device->deviceinfo.pci->device_id; +#endif + + if (instance->vk.enabled_extensions.KHR_display || + instance->vk.enabled_extensions.KHR_xcb_surface || + instance->vk.enabled_extensions.KHR_xlib_surface || + instance->vk.enabled_extensions.KHR_wayland_surface || + instance->vk.enabled_extensions.EXT_acquire_drm_display) { #if !using_v3d_simulator /* Open the primary node on the vc4 display device */ - assert(drm_primary_device); - master_fd = open(primary_path, O_RDWR | O_CLOEXEC); + assert(display_device); + display_fd = open(primary_path, O_RDWR | O_CLOEXEC); #else /* There is only one device with primary and render nodes. * Open its primary node. */ - master_fd = open(primary_path, O_RDWR | O_CLOEXEC); + display_fd = open(primary_path, O_RDWR | O_CLOEXEC); #endif } @@ -765,21 +870,32 @@ physical_device_init(struct v3dv_physical_device *device, #endif device->render_fd = render_fd; /* The v3d render node */ - device->display_fd = -1; /* Authenticated vc4 primary node */ - device->master_fd = master_fd; /* Master vc4 primary node */ + device->display_fd = display_fd; /* Master vc4 primary node */ if (!v3d_get_device_info(device->render_fd, &device->devinfo, &v3dv_ioctl)) { - result = VK_ERROR_INCOMPATIBLE_DRIVER; + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "Failed to get info from device."); goto fail; } if (device->devinfo.ver < 42) { - result = VK_ERROR_INCOMPATIBLE_DRIVER; + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "Device version < 42."); goto fail; } + device->caps.cpu_queue = + v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE); + + device->caps.multisync = + v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT); + + device->caps.perfmon = + v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_PERFMON); + if (!device_has_expected_features(device)) { - result = VK_ERROR_INCOMPATIBLE_DRIVER; + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "Kernel driver doesn't have required features."); goto fail; } @@ -787,12 +903,15 @@ physical_device_init(struct v3dv_physical_device *device, if (result != VK_SUCCESS) goto fail; - device->compiler = v3d_compiler_init(&device->devinfo); + device->compiler = v3d_compiler_init(&device->devinfo, + MAX_INLINE_UNIFORM_BUFFERS); device->next_program_id = 0; ASSERTED int len = - asprintf(&device->name, "V3D %d.%d", - device->devinfo.ver / 10, device->devinfo.ver % 10); + asprintf(&device->name, "V3D %d.%d.%d", + device->devinfo.ver / 10, + device->devinfo.ver % 10, + device->devinfo.rev); assert(len != -1); v3dv_physical_device_init_disk_cache(device); @@ -811,7 +930,31 @@ physical_device_init(struct v3dv_physical_device *device, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; mem->memoryTypes[0].heapIndex = 0; - device->options.merge_jobs = getenv("V3DV_NO_MERGE_JOBS") == NULL; + /* Initialize sparse array for refcounting imported BOs */ + util_sparse_array_init(&device->bo_map, sizeof(struct v3dv_bo), 512); + + device->options.merge_jobs = !V3D_DBG(NO_MERGE_JOBS); + + device->drm_syncobj_type = vk_drm_syncobj_get_type(device->render_fd); + + /* We don't support timelines in the uAPI yet and we don't want it getting + * suddenly turned on by vk_drm_syncobj_get_type() without us adding v3dv + * code for it first. + */ + device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE; + + /* Multiwait is required for emulated timeline semaphores and is supported + * by the v3d kernel interface. + */ + device->drm_syncobj_type.features |= VK_SYNC_FEATURE_GPU_MULTI_WAIT; + + device->sync_timeline_type = + vk_sync_timeline_get_type(&device->drm_syncobj_type); + + device->sync_types[0] = &device->drm_syncobj_type; + device->sync_types[1] = &device->sync_timeline_type.sync; + device->sync_types[2] = NULL; + device->vk.supported_sync_types = device->sync_types; result = v3dv_wsi_init(device); if (result != VK_SUCCESS) { @@ -820,35 +963,46 @@ physical_device_init(struct v3dv_physical_device *device, } get_device_extensions(device, &device->vk.supported_extensions); + get_features(device, &device->vk.supported_features); - pthread_mutex_init(&device->mutex, NULL); + mtx_init(&device->mutex, mtx_plain); + + list_addtail(&device->vk.link, &instance->vk.physical_devices.list); return VK_SUCCESS; fail: vk_physical_device_finish(&device->vk); + vk_free(&instance->vk.alloc, device); if (render_fd >= 0) close(render_fd); - if (master_fd >= 0) - close(master_fd); + if (display_fd >= 0) + close(display_fd); return result; } +/* This driver hook is expected to return VK_SUCCESS (unless a memory + * allocation error happened) if no compatible device is found. If a + * compatible device is found, it may return an error code if device + * inialization failed. + */ static VkResult -enumerate_devices(struct v3dv_instance *instance) +enumerate_devices(struct vk_instance *vk_instance) { - /* TODO: Check for more devices? */ + struct v3dv_instance *instance = + container_of(vk_instance, struct v3dv_instance, vk); + + /* FIXME: Check for more devices? */ drmDevicePtr devices[8]; - VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER; int max_devices; - instance->physicalDeviceCount = 0; - max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); if (max_devices < 1) - return VK_ERROR_INCOMPATIBLE_DRIVER; + return VK_SUCCESS; + + VkResult result = VK_SUCCESS; #if !using_v3d_simulator int32_t v3d_idx = -1; @@ -856,25 +1010,24 @@ enumerate_devices(struct v3dv_instance *instance) #endif for (unsigned i = 0; i < (unsigned)max_devices; i++) { #if using_v3d_simulator - /* In the simulator, we look for an Intel render node */ + /* In the simulator, we look for an Intel/AMD render node */ const int required_nodes = (1 << DRM_NODE_RENDER) | (1 << DRM_NODE_PRIMARY); if ((devices[i]->available_nodes & required_nodes) == required_nodes && devices[i]->bustype == DRM_BUS_PCI && - devices[i]->deviceinfo.pci->vendor_id == 0x8086) { - result = physical_device_init(&instance->physicalDevice, instance, - devices[i], NULL); - if (result != VK_ERROR_INCOMPATIBLE_DRIVER) + (devices[i]->deviceinfo.pci->vendor_id == 0x8086 || + devices[i]->deviceinfo.pci->vendor_id == 0x1002)) { + result = create_physical_device(instance, devices[i], NULL); + if (result == VK_SUCCESS) break; } #else - /* On actual hardware, we should have a render node (v3d) - * and a primary node (vc4). We will need to use the primary - * to allocate WSI buffers and share them with the render node - * via prime, but that is a privileged operation so we need the - * primary node to be authenticated, and for that we need the - * display server to provide the device fd (with DRI3), so we - * here we only check that the device is present but we don't - * try to open it. + /* On actual hardware, we should have a gpu device (v3d) and a display + * device (vc4). We will need to use the display device to allocate WSI + * buffers and share them with the render node via prime, but that is a + * privileged operation so we need t have an authenticated display fd + * and for that we need the display server to provide the it (with DRI3), + * so here we only check that the device is present but we don't try to + * open it. */ if (devices[i]->bustype != DRM_BUS_PLATFORM) continue; @@ -882,7 +1035,8 @@ enumerate_devices(struct v3dv_instance *instance) if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER) { char **compat = devices[i]->deviceinfo.platform->compatible; while (*compat) { - if (strncmp(*compat, "brcm,2711-v3d", 13) == 0) { + if (strncmp(*compat, "brcm,2711-v3d", 13) == 0 || + strncmp(*compat, "brcm,2712-v3d", 13) == 0) { v3d_idx = i; break; } @@ -891,8 +1045,9 @@ enumerate_devices(struct v3dv_instance *instance) } else if (devices[i]->available_nodes & 1 << DRM_NODE_PRIMARY) { char **compat = devices[i]->deviceinfo.platform->compatible; while (*compat) { - if (strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 || - strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0 ) { + if (strncmp(*compat, "brcm,bcm2712-vc6", 16) == 0 || + strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 || + strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0) { vc4_idx = i; break; } @@ -903,345 +1058,35 @@ enumerate_devices(struct v3dv_instance *instance) } #if !using_v3d_simulator - if (v3d_idx == -1 || vc4_idx == -1) - result = VK_ERROR_INCOMPATIBLE_DRIVER; - else - result = physical_device_init(&instance->physicalDevice, instance, - devices[v3d_idx], devices[vc4_idx]); + if (v3d_idx != -1) { + drmDevicePtr v3d_device = devices[v3d_idx]; + drmDevicePtr vc4_device = vc4_idx != -1 ? devices[vc4_idx] : NULL; + result = create_physical_device(instance, v3d_device, vc4_device); + } #endif drmFreeDevices(devices, max_devices); - if (result == VK_SUCCESS) - instance->physicalDeviceCount = 1; - return result; } -static VkResult -instance_ensure_physical_device(struct v3dv_instance *instance) -{ - if (instance->physicalDeviceCount < 0) { - VkResult result = enumerate_devices(instance); - if (result != VK_SUCCESS && - result != VK_ERROR_INCOMPATIBLE_DRIVER) - return result; - } - - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -v3dv_EnumeratePhysicalDevices(VkInstance _instance, - uint32_t *pPhysicalDeviceCount, - VkPhysicalDevice *pPhysicalDevices) -{ - V3DV_FROM_HANDLE(v3dv_instance, instance, _instance); - VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount); - - VkResult result = instance_ensure_physical_device(instance); - if (result != VK_SUCCESS) - return result; - - if (instance->physicalDeviceCount == 0) - return VK_SUCCESS; - - assert(instance->physicalDeviceCount == 1); - vk_outarray_append(&out, i) { - *i = v3dv_physical_device_to_handle(&instance->physicalDevice); - } - - return vk_outarray_status(&out); -} - -VKAPI_ATTR VkResult VKAPI_CALL -v3dv_EnumeratePhysicalDeviceGroups( - VkInstance _instance, - uint32_t *pPhysicalDeviceGroupCount, - VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties) -{ - V3DV_FROM_HANDLE(v3dv_instance, instance, _instance); - VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties, - pPhysicalDeviceGroupCount); - - VkResult result = instance_ensure_physical_device(instance); - if (result != VK_SUCCESS) - return result; - - assert(instance->physicalDeviceCount == 1); - - vk_outarray_append(&out, p) { - p->physicalDeviceCount = 1; - memset(p->physicalDevices, 0, sizeof(p->physicalDevices)); - p->physicalDevices[0] = - v3dv_physical_device_to_handle(&instance->physicalDevice); - p->subsetAllocation = false; - - vk_foreach_struct(ext, p->pNext) - v3dv_debug_ignored_stype(ext->sType); - } - - return vk_outarray_status(&out); -} - -VKAPI_ATTR void VKAPI_CALL -v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures *pFeatures) -{ - memset(pFeatures, 0, sizeof(*pFeatures)); - - *pFeatures = (VkPhysicalDeviceFeatures) { - .robustBufferAccess = true, /* This feature is mandatory */ - .fullDrawIndexUint32 = false, /* Only available since V3D 4.4.9.1 */ - .imageCubeArray = true, - .independentBlend = true, - .geometryShader = true, - .tessellationShader = false, - .sampleRateShading = true, - .dualSrcBlend = false, - .logicOp = true, - .multiDrawIndirect = false, - .drawIndirectFirstInstance = true, - .depthClamp = false, - .depthBiasClamp = true, - .fillModeNonSolid = true, - .depthBounds = false, /* Only available since V3D 4.3.16.2 */ - .wideLines = true, - .largePoints = true, - .alphaToOne = true, - .multiViewport = false, - .samplerAnisotropy = true, - .textureCompressionETC2 = true, - .textureCompressionASTC_LDR = true, - /* Note that textureCompressionBC requires that the driver support all - * the BC formats. V3D 4.2 only support the BC1-3, so we can't claim - * that we support it. - */ - .textureCompressionBC = false, - .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = false, - .vertexPipelineStoresAndAtomics = true, - .fragmentStoresAndAtomics = true, - .shaderTessellationAndGeometryPointSize = true, - .shaderImageGatherExtended = false, - .shaderStorageImageExtendedFormats = true, - .shaderStorageImageMultisample = false, - .shaderStorageImageReadWithoutFormat = false, - .shaderStorageImageWriteWithoutFormat = false, - .shaderUniformBufferArrayDynamicIndexing = false, - .shaderSampledImageArrayDynamicIndexing = false, - .shaderStorageBufferArrayDynamicIndexing = false, - .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = true, - .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, - .shaderResourceResidency = false, - .shaderResourceMinLod = false, - .sparseBinding = false, - .sparseResidencyBuffer = false, - .sparseResidencyImage2D = false, - .sparseResidencyImage3D = false, - .sparseResidency2Samples = false, - .sparseResidency4Samples = false, - .sparseResidency8Samples = false, - .sparseResidency16Samples = false, - .sparseResidencyAliased = false, - .variableMultisampleRate = false, - .inheritedQueries = true, - }; -} - -VKAPI_ATTR void VKAPI_CALL -v3dv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures2 *pFeatures) -{ - v3dv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); - - VkPhysicalDeviceVulkan11Features vk11 = { - .storageBuffer16BitAccess = false, - .uniformAndStorageBuffer16BitAccess = false, - .storagePushConstant16 = false, - .storageInputOutput16 = false, - .multiview = true, - .multiviewGeometryShader = false, - .multiviewTessellationShader = false, - .variablePointersStorageBuffer = true, - /* FIXME: this needs support for non-constant index on UBO/SSBO */ - .variablePointers = false, - .protectedMemory = false, - .samplerYcbcrConversion = false, - .shaderDrawParameters = false, - }; - - vk_foreach_struct(ext, pFeatures->pNext) { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: { - VkPhysicalDeviceCustomBorderColorFeaturesEXT *features = - (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext; - features->customBorderColors = true; - features->customBorderColorWithoutFormat = false; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: { - VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *features = - (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *)ext; - features->uniformBufferStandardLayout = true; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: { - VkPhysicalDevicePrivateDataFeaturesEXT *features = - (VkPhysicalDevicePrivateDataFeaturesEXT *)ext; - features->privateData = true; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: { - VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features = - (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext; - features->indexTypeUint8 = true; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: { - VkPhysicalDeviceColorWriteEnableFeaturesEXT *features = (void *) ext; - features->colorWriteEnable = true; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: { - VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features = (void *) ext; - features->pipelineCreationCacheControl = true; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: { - VkPhysicalDeviceProvokingVertexFeaturesEXT *features = (void *) ext; - features->provokingVertexLast = true; - /* FIXME: update when supporting EXT_transform_feedback */ - features->transformFeedbackPreservesProvokingVertex = false; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { - VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features = - (void *) ext; - features->vertexAttributeInstanceRateDivisor = true; - features->vertexAttributeInstanceRateZeroDivisor = false; - break; - } - - /* Vulkan 1.1 */ - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: { - VkPhysicalDeviceVulkan11Features *features = - (VkPhysicalDeviceVulkan11Features *)ext; - memcpy(features, &vk11, sizeof(VkPhysicalDeviceVulkan11Features)); - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { - VkPhysicalDevice16BitStorageFeatures *features = (void *) ext; - features->storageBuffer16BitAccess = vk11.storageBuffer16BitAccess; - features->uniformAndStorageBuffer16BitAccess = - vk11.uniformAndStorageBuffer16BitAccess; - features->storagePushConstant16 = vk11.storagePushConstant16; - features->storageInputOutput16 = vk11.storageInputOutput16; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { - VkPhysicalDeviceMultiviewFeatures *features = (void *) ext; - features->multiview = vk11.multiview; - features->multiviewGeometryShader = vk11.multiviewGeometryShader; - features->multiviewTessellationShader = vk11.multiviewTessellationShader; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { - VkPhysicalDeviceProtectedMemoryFeatures *features = (void *) ext; - features->protectedMemory = vk11.protectedMemory; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { - VkPhysicalDeviceSamplerYcbcrConversionFeatures *features = (void *) ext; - features->samplerYcbcrConversion = vk11.samplerYcbcrConversion; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: { - VkPhysicalDeviceShaderDrawParametersFeatures *features = (void *) ext; - features->shaderDrawParameters = vk11.shaderDrawParameters; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: { - VkPhysicalDeviceVariablePointersFeatures *features = (void *) ext; - features->variablePointersStorageBuffer = - vk11.variablePointersStorageBuffer; - features->variablePointers = vk11.variablePointers; - break; - } - - default: - v3dv_debug_ignored_stype(ext->sType); - break; - } - } -} - -VKAPI_ATTR void VKAPI_CALL -v3dv_GetDeviceGroupPeerMemoryFeatures(VkDevice device, - uint32_t heapIndex, - uint32_t localDeviceIndex, - uint32_t remoteDeviceIndex, - VkPeerMemoryFeatureFlags *pPeerMemoryFeatures) -{ - assert(localDeviceIndex == 0 && remoteDeviceIndex == 0); - *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | - VK_PEER_MEMORY_FEATURE_COPY_DST_BIT | - VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | - VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; -} - uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev) { return 0x14E4; /* Broadcom */ } - -#if using_v3d_simulator -static bool -get_i915_param(int fd, uint32_t param, int *value) -{ - int tmp; - - struct drm_i915_getparam gp = { - .param = param, - .value = &tmp, - }; - - int ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); - if (ret != 0) - return false; - - *value = tmp; - return true; -} -#endif - uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev) { #if using_v3d_simulator - int devid = 0; - - if (!get_i915_param(dev->render_fd, I915_PARAM_CHIPSET_ID, &devid)) - fprintf(stderr, "Error getting device_id\n"); - - return devid; + return dev->device_id; #else switch (dev->devinfo.ver) { case 42: return 0xBE485FD3; /* Broadcom deviceID for 2711 */ + case 71: + return 0x55701C33; /* Broadcom deviceID for 2712 */ default: unreachable("Unsupported V3D version"); } @@ -1260,18 +1105,18 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, STATIC_ASSERT(MAX_STORAGE_BUFFERS >= MAX_DYNAMIC_STORAGE_BUFFERS); const uint32_t page_size = 4096; - const uint32_t mem_size = compute_heap_size(); + const uint64_t mem_size = compute_heap_size(); const uint32_t max_varying_components = 16 * 4; - const uint32_t v3d_coord_shift = 6; - - const float v3d_point_line_granularity = 2.0f / (1 << v3d_coord_shift); - const uint32_t max_fb_size = 4096; + const float v3d_point_line_granularity = 2.0f / (1 << V3D_COORD_SHIFT); + const uint32_t max_fb_size = V3D_MAX_IMAGE_DIMENSION; const VkSampleCountFlags supported_sample_counts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; + const uint8_t max_rts = V3D_MAX_RENDER_TARGETS(pdevice->devinfo.ver); + struct timespec clock_res; clock_getres(CLOCK_MONOTONIC, &clock_res); const float timestamp_period = @@ -1279,18 +1124,18 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, /* FIXME: this will probably require an in-depth review */ VkPhysicalDeviceLimits limits = { - .maxImageDimension1D = 4096, - .maxImageDimension2D = 4096, - .maxImageDimension3D = 4096, - .maxImageDimensionCube = 4096, - .maxImageArrayLayers = 2048, + .maxImageDimension1D = V3D_MAX_IMAGE_DIMENSION, + .maxImageDimension2D = V3D_MAX_IMAGE_DIMENSION, + .maxImageDimension3D = V3D_MAX_IMAGE_DIMENSION, + .maxImageDimensionCube = V3D_MAX_IMAGE_DIMENSION, + .maxImageArrayLayers = V3D_MAX_ARRAY_LAYERS, .maxTexelBufferElements = (1ul << 28), .maxUniformBufferRange = V3D_MAX_BUFFER_RANGE, .maxStorageBufferRange = V3D_MAX_BUFFER_RANGE, .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, .maxMemoryAllocationCount = mem_size / page_size, .maxSamplerAllocationCount = 64 * 1024, - .bufferImageGranularity = 256, /* A cache line */ + .bufferImageGranularity = V3D_NON_COHERENT_ATOM_SIZE, .sparseAddressSpaceSize = 0, .maxBoundDescriptorSets = MAX_SETS, .maxPerStageDescriptorSamplers = V3D_MAX_TEXTURE_SAMPLERS, @@ -1342,7 +1187,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, .maxFragmentInputComponents = max_varying_components, .maxFragmentOutputAttachments = 4, .maxFragmentDualSrcAttachments = 0, - .maxFragmentCombinedOutputResources = MAX_RENDER_TARGETS + + .maxFragmentCombinedOutputResources = max_rts + MAX_STORAGE_BUFFERS + MAX_STORAGE_IMAGES, @@ -1352,10 +1197,11 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, .maxComputeWorkGroupInvocations = 256, .maxComputeWorkGroupSize = { 256, 256, 256 }, - .subPixelPrecisionBits = v3d_coord_shift, + .subPixelPrecisionBits = V3D_COORD_SHIFT, .subTexelPrecisionBits = 8, .mipmapPrecisionBits = 8, - .maxDrawIndexedIndexValue = 0x00ffffff, + .maxDrawIndexedIndexValue = pdevice->devinfo.ver >= 71 ? + 0xffffffff : 0x00ffffff, .maxDrawIndirectCount = 0x7fffffff, .maxSamplerLodBias = 14.0f, .maxSamplerAnisotropy = 16.0f, @@ -1365,7 +1211,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, 2.0 * max_fb_size - 1 }, .viewportSubPixelBits = 0, .minMemoryMapAlignment = page_size, - .minTexelBufferOffsetAlignment = V3D_UIFBLOCK_SIZE, + .minTexelBufferOffsetAlignment = V3D_TMU_TEXEL_ALIGN, .minUniformBufferOffsetAlignment = 32, .minStorageBufferOffsetAlignment = 32, .minTexelOffset = -8, @@ -1374,7 +1220,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, .maxTexelGatherOffset = 7, .minInterpolationOffset = -0.5, .maxInterpolationOffset = 0.5, - .subPixelInterpolationOffsetBits = v3d_coord_shift, + .subPixelInterpolationOffsetBits = V3D_COORD_SHIFT, .maxFramebufferWidth = max_fb_size, .maxFramebufferHeight = max_fb_size, .maxFramebufferLayers = 256, @@ -1382,7 +1228,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, .framebufferDepthSampleCounts = supported_sample_counts, .framebufferStencilSampleCounts = supported_sample_counts, .framebufferNoAttachmentsSampleCounts = supported_sample_counts, - .maxColorAttachments = MAX_RENDER_TARGETS, + .maxColorAttachments = max_rts, .sampledImageColorSampleCounts = supported_sample_counts, .sampledImageIntegerSampleCounts = supported_sample_counts, .sampledImageDepthSampleCounts = supported_sample_counts, @@ -1404,7 +1250,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, .standardSampleLocations = false, .optimalBufferCopyOffsetAlignment = 32, .optimalBufferCopyRowPitchAlignment = 32, - .nonCoherentAtomSize = 256, + .nonCoherentAtomSize = V3D_NON_COHERENT_ATOM_SIZE, }; *pProperties = (VkPhysicalDeviceProperties) { @@ -1431,7 +1277,166 @@ v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, v3dv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); + /* We don't really have special restrictions for the maximum + * descriptors per set, other than maybe not exceeding the limits + * of addressable memory in a single allocation on either the host + * or the GPU. This will be a much larger limit than any of the + * per-stage limits already available in Vulkan though, so in practice, + * it is not expected to limit anything beyond what is already + * constrained through per-stage limits. + */ + const uint32_t max_host_descriptors = + (UINT32_MAX - sizeof(struct v3dv_descriptor_set)) / + sizeof(struct v3dv_descriptor); + const uint32_t max_gpu_descriptors = + (UINT32_MAX / v3dv_X(pdevice, max_descriptor_bo_size)()); + + VkPhysicalDeviceVulkan13Properties vk13 = { + .maxInlineUniformBlockSize = 4096, + .maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BUFFERS, + .maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BUFFERS, + .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = + MAX_INLINE_UNIFORM_BUFFERS, + .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = + MAX_INLINE_UNIFORM_BUFFERS, + .maxBufferSize = V3D_MAX_BUFFER_RANGE, + .storageTexelBufferOffsetAlignmentBytes = V3D_TMU_TEXEL_ALIGN, + .storageTexelBufferOffsetSingleTexelAlignment = false, + .uniformTexelBufferOffsetAlignmentBytes = V3D_TMU_TEXEL_ALIGN, + .uniformTexelBufferOffsetSingleTexelAlignment = false, + /* No native acceleration for integer dot product. We use NIR lowering. */ + .integerDotProduct8BitUnsignedAccelerated = false, + .integerDotProduct8BitMixedSignednessAccelerated = false, + .integerDotProduct4x8BitPackedUnsignedAccelerated = false, + .integerDotProduct4x8BitPackedSignedAccelerated = false, + .integerDotProduct4x8BitPackedMixedSignednessAccelerated = false, + .integerDotProduct16BitUnsignedAccelerated = false, + .integerDotProduct16BitSignedAccelerated = false, + .integerDotProduct16BitMixedSignednessAccelerated = false, + .integerDotProduct32BitUnsignedAccelerated = false, + .integerDotProduct32BitSignedAccelerated = false, + .integerDotProduct32BitMixedSignednessAccelerated = false, + .integerDotProduct64BitUnsignedAccelerated = false, + .integerDotProduct64BitSignedAccelerated = false, + .integerDotProduct64BitMixedSignednessAccelerated = false, + .integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false, + .integerDotProductAccumulatingSaturating8BitSignedAccelerated = false, + .integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false, + .integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false, + .integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false, + .integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false, + .integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false, + .integerDotProductAccumulatingSaturating16BitSignedAccelerated = false, + .integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false, + .integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false, + .integerDotProductAccumulatingSaturating32BitSignedAccelerated = false, + .integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false, + .integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false, + .integerDotProductAccumulatingSaturating64BitSignedAccelerated = false, + .integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false, + /* VK_EXT_subgroup_size_control */ + .minSubgroupSize = V3D_CHANNELS, + .maxSubgroupSize = V3D_CHANNELS, + .maxComputeWorkgroupSubgroups = 16, /* 256 / 16 */ + .requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT, + }; + + VkPhysicalDeviceVulkan12Properties vk12 = { + .driverID = VK_DRIVER_ID_MESA_V3DV, + .conformanceVersion = { + .major = 1, + .minor = 3, + .subminor = 6, + .patch = 1, + }, + .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, + .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, + /* FIXME: if we want to support independentResolveNone then we would + * need to honor attachment load operations on resolve attachments, + * which we currently ignore because the resolve makes them irrelevant, + * as it unconditionally writes all pixels in the render area. However, + * with independentResolveNone, it is possible to have one aspect of a + * D/S resolve attachment stay unresolved, in which case the attachment + * load operation is relevant. + * + * NOTE: implementing attachment load for resolve attachments isn't + * immediately trivial because these attachments are not part of the + * framebuffer and therefore we can't use the same mechanism we use + * for framebuffer attachments. Instead, we should probably have to + * emit a meta operation for that right at the start of the render + * pass (or subpass). + */ + .independentResolveNone = false, + .independentResolve = false, + .maxTimelineSemaphoreValueDifference = UINT64_MAX, + + .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, + .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, + .shaderSignedZeroInfNanPreserveFloat16 = true, + .shaderSignedZeroInfNanPreserveFloat32 = true, + .shaderSignedZeroInfNanPreserveFloat64 = false, + .shaderDenormPreserveFloat16 = true, + .shaderDenormPreserveFloat32 = true, + .shaderDenormPreserveFloat64 = false, + .shaderDenormFlushToZeroFloat16 = false, + .shaderDenormFlushToZeroFloat32 = false, + .shaderDenormFlushToZeroFloat64 = false, + .shaderRoundingModeRTEFloat16 = true, + .shaderRoundingModeRTEFloat32 = true, + .shaderRoundingModeRTEFloat64 = false, + .shaderRoundingModeRTZFloat16 = false, + .shaderRoundingModeRTZFloat32 = false, + .shaderRoundingModeRTZFloat64 = false, + + /* V3D doesn't support min/max filtering */ + .filterMinmaxSingleComponentFormats = false, + .filterMinmaxImageComponentMapping = false, + + .framebufferIntegerColorSampleCounts = + VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT, + }; + memset(vk12.driverName, 0, VK_MAX_DRIVER_NAME_SIZE); + snprintf(vk12.driverName, VK_MAX_DRIVER_NAME_SIZE, "V3DV Mesa"); + memset(vk12.driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE); + snprintf(vk12.driverInfo, VK_MAX_DRIVER_INFO_SIZE, + "Mesa " PACKAGE_VERSION MESA_GIT_SHA1); + + VkSubgroupFeatureFlags subgroup_ops = VK_SUBGROUP_FEATURE_BASIC_BIT; + if (pdevice->devinfo.ver >= 71) { + subgroup_ops |= VK_SUBGROUP_FEATURE_BALLOT_BIT | + VK_SUBGROUP_FEATURE_SHUFFLE_BIT | + VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | + VK_SUBGROUP_FEATURE_VOTE_BIT | + VK_SUBGROUP_FEATURE_QUAD_BIT; + } + + VkPhysicalDeviceVulkan11Properties vk11 = { + .deviceLUIDValid = false, + .subgroupSize = V3D_CHANNELS, + .subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT | + VK_SHADER_STAGE_FRAGMENT_BIT, + .subgroupSupportedOperations = subgroup_ops, + .subgroupQuadOperationsInAllStages = false, + .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES, + .maxMultiviewViewCount = MAX_MULTIVIEW_VIEW_COUNT, + .maxMultiviewInstanceIndex = UINT32_MAX - 1, + .protectedNoFault = false, + .maxPerSetDescriptors = MIN2(max_host_descriptors, max_gpu_descriptors), + /* Minimum required by the spec */ + .maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE, + }; + memcpy(vk11.deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); + memcpy(vk11.driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); + + vk_foreach_struct(ext, pProperties->pNext) { + if (vk_get_physical_device_core_1_1_property_ext(ext, &vk11)) + continue; + if (vk_get_physical_device_core_1_2_property_ext(ext, &vk12)) + continue; + if (vk_get_physical_device_core_1_3_property_ext(ext, &vk13)) + continue; + switch (ext->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: { VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = @@ -1453,15 +1458,31 @@ v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, props->maxVertexAttribDivisor = 0xffff; break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { - VkPhysicalDeviceIDProperties *id_props = - (VkPhysicalDeviceIDProperties *)ext; - memcpy(id_props->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); - memcpy(id_props->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); - /* The LUID is for Windows. */ - id_props->deviceLUIDValid = false; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR : { + VkPhysicalDevicePerformanceQueryPropertiesKHR *props = + (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext; + + props->allowCommandBufferQueryCopies = true; + break; + } +#if DETECT_OS_ANDROID +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: { + VkPhysicalDevicePresentationPropertiesANDROID *props = + (VkPhysicalDevicePresentationPropertiesANDROID *)ext; + uint64_t front_rendering_usage = 0; + struct u_gralloc *gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO); + if (gralloc != NULL) { + u_gralloc_get_front_rendering_usage(gralloc, &front_rendering_usage); + u_gralloc_destroy(&gralloc); + } + props->sharedImage = front_rendering_usage ? VK_TRUE + : VK_FALSE; break; } +#pragma GCC diagnostic pop +#endif case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: { VkPhysicalDeviceDrmPropertiesEXT *props = (VkPhysicalDeviceDrmPropertiesEXT *)ext; @@ -1477,34 +1498,10 @@ v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, } break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { - VkPhysicalDeviceMaintenance3Properties *props = - (VkPhysicalDeviceMaintenance3Properties *)ext; - /* We don't really have special restrictions for the maximum - * descriptors per set, other than maybe not exceeding the limits - * of addressable memory in a single allocation on either the host - * or the GPU. This will be a much larger limit than any of the - * per-stage limits already available in Vulkan though, so in practice, - * it is not expected to limit anything beyond what is already - * constrained through per-stage limits. - */ - uint32_t max_host_descriptors = - (UINT32_MAX - sizeof(struct v3dv_descriptor_set)) / - sizeof(struct v3dv_descriptor); - uint32_t max_gpu_descriptors = - (UINT32_MAX / v3dv_X(pdevice, max_descriptor_bo_size)()); - props->maxPerSetDescriptors = - MIN2(max_host_descriptors, max_gpu_descriptors); - - /* Minimum required by the spec */ - props->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { - VkPhysicalDeviceMultiviewProperties *props = - (VkPhysicalDeviceMultiviewProperties *)ext; - props->maxMultiviewViewCount = MAX_MULTIVIEW_VIEW_COUNT; - props->maxMultiviewInstanceIndex = UINT32_MAX - 1; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: { + VkPhysicalDeviceLineRasterizationPropertiesEXT *props = + (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext; + props->lineSubPixelPrecisionBits = V3D_COORD_SHIFT; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: @@ -1512,26 +1509,33 @@ v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, * never provide this extension. */ break; - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { - VkPhysicalDevicePointClippingProperties *props = - (VkPhysicalDevicePointClippingProperties *)ext; - props->pointClippingBehavior = - VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: { + VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *props = + (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext; + STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == + sizeof(props->shaderModuleIdentifierAlgorithmUUID)); + memcpy(props->shaderModuleIdentifierAlgorithmUUID, + vk_shaderModuleIdentifierAlgorithmUUID, + sizeof(props->shaderModuleIdentifierAlgorithmUUID)); break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { - VkPhysicalDeviceProtectedMemoryProperties *props = - (VkPhysicalDeviceProtectedMemoryProperties *)ext; - props->protectedNoFault = false; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_PROPERTIES_EXT: { + VkPhysicalDevicePipelineRobustnessPropertiesEXT *props = + (VkPhysicalDevicePipelineRobustnessPropertiesEXT *)ext; + props->defaultRobustnessStorageBuffers = + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT; + props->defaultRobustnessUniformBuffers = + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT; + props->defaultRobustnessVertexInputs = + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT; + props->defaultRobustnessImages = + VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DEVICE_DEFAULT_EXT; break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: { - VkPhysicalDeviceSubgroupProperties *props = - (VkPhysicalDeviceSubgroupProperties *)ext; - props->subgroupSize = V3D_CHANNELS; - props->supportedStages = VK_SHADER_STAGE_COMPUTE_BIT; - props->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT; - props->quadOperationsInAllStages = false; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: { + VkPhysicalDeviceMultiDrawPropertiesEXT *properties = + (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext; + properties->maxMultiDrawCount = 2048; break; } default: @@ -1553,25 +1557,14 @@ v3dv_queue_family_properties = { }; VKAPI_ATTR void VKAPI_CALL -v3dv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, - uint32_t *pCount, - VkQueueFamilyProperties *pQueueFamilyProperties) -{ - VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pCount); - - vk_outarray_append(&out, p) { - *p = v3dv_queue_family_properties; - } -} - -VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount, VkQueueFamilyProperties2 *pQueueFamilyProperties) { - VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount); + VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, + pQueueFamilyProperties, pQueueFamilyPropertyCount); - vk_outarray_append(&out, p) { + vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) { p->queueFamilyProperties = v3dv_queue_family_properties; vk_foreach_struct(s, p->pNext) { @@ -1592,11 +1585,28 @@ VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) { + V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice); + v3dv_GetPhysicalDeviceMemoryProperties(physicalDevice, &pMemoryProperties->memoryProperties); vk_foreach_struct(ext, pMemoryProperties->pNext) { switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: { + VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = + (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext; + p->heapUsage[0] = device->heap_used; + p->heapBudget[0] = compute_memory_budget(device); + + /* The heapBudget and heapUsage values must be zero for array elements + * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount + */ + for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) { + p->heapBudget[i] = 0u; + p->heapUsage[i] = 0u; + } + break; + } default: v3dv_debug_ignored_stype(ext->sType); break; @@ -1618,11 +1628,6 @@ v3dv_GetInstanceProcAddr(VkInstance _instance, * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps. */ PUBLIC -VKAPI_ATTR PFN_vkVoidFunction -VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance, - const char *pName); - -PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance, const char* pName) @@ -1630,23 +1635,6 @@ vk_icdGetInstanceProcAddr(VkInstance instance, return v3dv_GetInstanceProcAddr(instance, pName); } -/* With version 4+ of the loader interface the ICD should expose - * vk_icdGetPhysicalDeviceProcAddr() - */ -PUBLIC -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL -vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, - const char* pName); - -PFN_vkVoidFunction -vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, - const char* pName) -{ - V3DV_FROM_HANDLE(v3dv_instance, instance, _instance); - - return vk_instance_get_physical_device_proc_addr(&instance->vk, pName); -} - VKAPI_ATTR VkResult VKAPI_CALL v3dv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties) @@ -1671,30 +1659,66 @@ v3dv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, return VK_SUCCESS; } - return vk_error((struct v3dv_instance*) physical_device->vk.instance, - VK_ERROR_LAYER_NOT_PRESENT); + return vk_error(physical_device, VK_ERROR_LAYER_NOT_PRESENT); +} + +static void +destroy_queue_syncs(struct v3dv_queue *queue) +{ + for (int i = 0; i < V3DV_QUEUE_COUNT; i++) { + if (queue->last_job_syncs.syncs[i]) { + drmSyncobjDestroy(queue->device->pdevice->render_fd, + queue->last_job_syncs.syncs[i]); + } + } } static VkResult -queue_init(struct v3dv_device *device, struct v3dv_queue *queue) +queue_init(struct v3dv_device *device, struct v3dv_queue *queue, + const VkDeviceQueueCreateInfo *create_info, + uint32_t index_in_family) { - vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE); + VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, + index_in_family); + if (result != VK_SUCCESS) + return result; + + result = vk_queue_enable_submit_thread(&queue->vk); + if (result != VK_SUCCESS) + goto fail_submit_thread; + queue->device = device; - queue->flags = 0; + queue->vk.driver_submit = v3dv_queue_driver_submit; + + for (int i = 0; i < V3DV_QUEUE_COUNT; i++) { + queue->last_job_syncs.first[i] = true; + int ret = drmSyncobjCreate(device->pdevice->render_fd, + DRM_SYNCOBJ_CREATE_SIGNALED, + &queue->last_job_syncs.syncs[i]); + if (ret) { + result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "syncobj create failed: %m"); + goto fail_last_job_syncs; + } + } + queue->noop_job = NULL; - list_inithead(&queue->submit_wait_list); - pthread_mutex_init(&queue->mutex, NULL); return VK_SUCCESS; + +fail_last_job_syncs: + destroy_queue_syncs(queue); +fail_submit_thread: + vk_queue_finish(&queue->vk); + return result; } static void queue_finish(struct v3dv_queue *queue) { - vk_object_base_finish(&queue->base); - assert(list_is_empty(&queue->submit_wait_list)); if (queue->noop_job) v3dv_job_destroy(queue->noop_job); - pthread_mutex_destroy(&queue->mutex); + destroy_queue_syncs(queue); + vk_queue_finish(&queue->vk); } static void @@ -1728,19 +1752,6 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); - /* Check enabled features */ - if (pCreateInfo->pEnabledFeatures) { - VkPhysicalDeviceFeatures supported_features; - v3dv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); - VkBool32 *supported_feature = (VkBool32 *)&supported_features; - VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures; - unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); - for (uint32_t i = 0; i < num_features; i++) { - if (enabled_feature[i] && !supported_feature[i]) - return vk_error(instance, VK_ERROR_FEATURE_NOT_PRESENT); - } - } - /* Check requested queues (we only expose one queue ) */ assert(pCreateInfo->queueCreateInfoCount == 1); for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { @@ -1759,56 +1770,46 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice, struct vk_device_dispatch_table dispatch_table; vk_device_dispatch_table_from_entrypoints(&dispatch_table, &v3dv_device_entrypoints, true); + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &wsi_device_entrypoints, false); result = vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator); if (result != VK_SUCCESS) { vk_free(&device->vk.alloc, device); - return vk_error(instance, result); + return vk_error(NULL, result); } +#if DETECT_OS_ANDROID + device->gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO); + assert(device->gralloc); +#endif + device->instance = instance; device->pdevice = physical_device; - if (pAllocator) - device->vk.alloc = *pAllocator; - else - device->vk.alloc = physical_device->vk.instance->alloc; + mtx_init(&device->query_mutex, mtx_plain); + cnd_init(&device->query_ended); + + device->vk.command_buffer_ops = &v3dv_cmd_buffer_ops; - pthread_mutex_init(&device->mutex, NULL); + vk_device_set_drm_fd(&device->vk, physical_device->render_fd); + vk_device_enable_threaded_submit(&device->vk); - result = queue_init(device, &device->queue); + result = queue_init(device, &device->queue, + pCreateInfo->pQueueCreateInfos, 0); if (result != VK_SUCCESS) goto fail; device->devinfo = physical_device->devinfo; - /* Vulkan 1.1 and VK_KHR_get_physical_device_properties2 added - * VkPhysicalDeviceFeatures2 which can be used in the pNext chain of - * vkDeviceCreateInfo, in which case it should be used instead of - * pEnabledFeatures. - */ - const VkPhysicalDeviceFeatures2 *features2 = - vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_FEATURES_2); - if (features2) { - memcpy(&device->features, &features2->features, - sizeof(device->features)); - } else if (pCreateInfo->pEnabledFeatures) { - memcpy(&device->features, pCreateInfo->pEnabledFeatures, - sizeof(device->features)); - } - - if (device->features.robustBufferAccess) + if (device->vk.enabled_features.robustBufferAccess) perf_debug("Device created with Robust Buffer Access enabled.\n"); - int ret = drmSyncobjCreate(physical_device->render_fd, - DRM_SYNCOBJ_CREATE_SIGNALED, - &device->last_job_sync); - if (ret) { - result = VK_ERROR_INITIALIZATION_FAILED; - goto fail; - } + if (device->vk.enabled_features.robustImageAccess) + perf_debug("Device created with Robust Image Access enabled.\n"); -#ifdef DEBUG + +#if MESA_DEBUG v3dv_X(device, device_check_prepacked_sizes)(); #endif init_device_meta(device); @@ -1816,14 +1817,42 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice, v3dv_pipeline_cache_init(&device->default_pipeline_cache, device, 0, device->instance->default_pipeline_cache_enabled); device->default_attribute_float = - v3dv_pipeline_create_default_attribute_values(device, NULL); + v3dv_X(device, create_default_attribute_values)(device, NULL); + + device->device_address_mem_ctx = ralloc_context(NULL); + util_dynarray_init(&device->device_address_bo_list, + device->device_address_mem_ctx); + + mtx_init(&device->events.lock, mtx_plain); + result = v3dv_event_allocate_resources(device); + if (result != VK_SUCCESS) + goto fail; + + if (list_is_empty(&device->events.free_list)) { + result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + + result = v3dv_query_allocate_resources(device); + if (result != VK_SUCCESS) + goto fail; *pDevice = v3dv_device_to_handle(device); return VK_SUCCESS; fail: + cnd_destroy(&device->query_ended); + mtx_destroy(&device->query_mutex); + queue_finish(&device->queue); + destroy_device_meta(device); + v3dv_pipeline_cache_finish(&device->default_pipeline_cache); + v3dv_event_free_resources(device); + v3dv_query_free_resources(device); vk_device_finish(&device->vk); +#if DETECT_OS_ANDROID + u_gralloc_destroy(&device->gralloc); +#endif vk_free(&device->vk.alloc, device); return result; @@ -1835,10 +1864,14 @@ v3dv_DestroyDevice(VkDevice _device, { V3DV_FROM_HANDLE(v3dv_device, device, _device); - v3dv_DeviceWaitIdle(_device); + device->vk.dispatch_table.DeviceWaitIdle(_device); queue_finish(&device->queue); - pthread_mutex_destroy(&device->mutex); - drmSyncobjDestroy(device->pdevice->render_fd, device->last_job_sync); + + v3dv_event_free_resources(device); + mtx_destroy(&device->events.lock); + + v3dv_query_free_resources(device); + destroy_device_meta(device); v3dv_pipeline_cache_finish(&device->default_pipeline_cache); @@ -1847,36 +1880,23 @@ v3dv_DestroyDevice(VkDevice _device, device->default_attribute_float = NULL; } + ralloc_free(device->device_address_mem_ctx); + /* Bo cache should be removed the last, as any other object could be * freeing their private bos */ v3dv_bo_cache_destroy(device); + cnd_destroy(&device->query_ended); + mtx_destroy(&device->query_mutex); + vk_device_finish(&device->vk); +#if DETECT_OS_ANDROID + u_gralloc_destroy(&device->gralloc); +#endif vk_free2(&device->vk.alloc, pAllocator, device); } -VKAPI_ATTR void VKAPI_CALL -v3dv_GetDeviceQueue(VkDevice _device, - uint32_t queueFamilyIndex, - uint32_t queueIndex, - VkQueue *pQueue) -{ - V3DV_FROM_HANDLE(v3dv_device, device, _device); - - assert(queueIndex == 0); - assert(queueFamilyIndex == 0); - - *pQueue = v3dv_queue_to_handle(&device->queue); -} - -VKAPI_ATTR VkResult VKAPI_CALL -v3dv_DeviceWaitIdle(VkDevice _device) -{ - V3DV_FROM_HANDLE(v3dv_device, device, _device); - return v3dv_QueueWaitIdle(v3dv_queue_to_handle(&device->queue)); -} - static VkResult device_alloc(struct v3dv_device *device, struct v3dv_device_memory *mem, @@ -1914,15 +1934,12 @@ device_free(struct v3dv_device *device, struct v3dv_device_memory *mem) * display device to free the allocated dumb BO. */ if (mem->is_for_wsi) { - assert(mem->has_bo_ownership); - device_free_wsi_dumb(device->instance->physicalDevice.display_fd, - mem->bo->dumb_handle); + device_free_wsi_dumb(device->pdevice->display_fd, mem->bo->dumb_handle); } - if (mem->has_bo_ownership) - v3dv_bo_free(device, mem->bo); - else if (mem->bo) - vk_free(&device->vk.alloc, mem->bo); + p_atomic_add(&device->pdevice->heap_used, -((int64_t)mem->bo->size)); + + v3dv_bo_free(device, mem->bo); } static void @@ -1967,21 +1984,12 @@ device_import_bo(struct v3dv_device *device, int fd, uint64_t size, struct v3dv_bo **bo) { - VkResult result; - - *bo = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(struct v3dv_bo), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*bo == NULL) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto fail; - } + *bo = NULL; off_t real_size = lseek(fd, 0, SEEK_END); lseek(fd, 0, SEEK_SET); - if (real_size < 0 || (uint64_t) real_size < size) { - result = VK_ERROR_INVALID_EXTERNAL_HANDLE; - goto fail; - } + if (real_size < 0 || (uint64_t) real_size < size) + return VK_ERROR_INVALID_EXTERNAL_HANDLE; int render_fd = device->pdevice->render_fd; assert(render_fd >= 0); @@ -1989,31 +1997,26 @@ device_import_bo(struct v3dv_device *device, int ret; uint32_t handle; ret = drmPrimeFDToHandle(render_fd, fd, &handle); - if (ret) { - result = VK_ERROR_INVALID_EXTERNAL_HANDLE; - goto fail; - } + if (ret) + return VK_ERROR_INVALID_EXTERNAL_HANDLE; struct drm_v3d_get_bo_offset get_offset = { .handle = handle, }; ret = v3dv_ioctl(render_fd, DRM_IOCTL_V3D_GET_BO_OFFSET, &get_offset); - if (ret) { - result = VK_ERROR_INVALID_EXTERNAL_HANDLE; - goto fail; - } + if (ret) + return VK_ERROR_INVALID_EXTERNAL_HANDLE; assert(get_offset.offset != 0); - v3dv_bo_init(*bo, handle, size, get_offset.offset, "import", false); + *bo = v3dv_device_lookup_bo(device->pdevice, handle); + assert(*bo); - return VK_SUCCESS; + if ((*bo)->refcnt == 0) + v3dv_bo_init_import(*bo, handle, size, get_offset.offset, false); + else + p_atomic_inc(&(*bo)->refcnt); -fail: - if (*bo) { - vk_free2(&device->vk.alloc, pAllocator, *bo); - *bo = NULL; - } - return result; + return VK_SUCCESS; } static VkResult @@ -2030,19 +2033,8 @@ device_alloc_for_wsi(struct v3dv_device *device, #if using_v3d_simulator return device_alloc(device, mem, size); #else - /* If we are allocating for WSI we should have a swapchain and thus, - * we should've initialized the display device. However, Zink doesn't - * use swapchains, so in that case we can get here without acquiring the - * display device and we need to do it now. - */ VkResult result; - struct v3dv_instance *instance = device->instance; - struct v3dv_physical_device *pdevice = &device->instance->physicalDevice; - if (unlikely(pdevice->display_fd < 0)) { - result = v3dv_physical_device_acquire_display(instance, pdevice, NULL); - if (result != VK_SUCCESS) - return result; - } + struct v3dv_physical_device *pdevice = device->pdevice; assert(pdevice->display_fd != -1); mem->is_for_wsi = true; @@ -2082,6 +2074,53 @@ fail_create: #endif } +static void +device_add_device_address_bo(struct v3dv_device *device, + struct v3dv_bo *bo) +{ + util_dynarray_append(&device->device_address_bo_list, + struct v3dv_bo *, + bo); +} + +static void +device_remove_device_address_bo(struct v3dv_device *device, + struct v3dv_bo *bo) +{ + util_dynarray_delete_unordered(&device->device_address_bo_list, + struct v3dv_bo *, + bo); +} + +static void +free_memory(struct v3dv_device *device, + struct v3dv_device_memory *mem, + const VkAllocationCallbacks *pAllocator) +{ + if (mem == NULL) + return; + + if (mem->bo->map) + device_unmap(device, mem); + + if (mem->is_for_device_address) + device_remove_device_address_bo(device, mem->bo); + + device_free(device, mem); + + vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk); +} + +VKAPI_ATTR void VKAPI_CALL +v3dv_FreeMemory(VkDevice _device, + VkDeviceMemory _mem, + const VkAllocationCallbacks *pAllocator) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_device_memory, mem, _mem); + free_memory(device, mem, pAllocator); +} + VKAPI_ATTR VkResult VKAPI_CALL v3dv_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo, @@ -2090,25 +2129,34 @@ v3dv_AllocateMemory(VkDevice _device, { V3DV_FROM_HANDLE(v3dv_device, device, _device); struct v3dv_device_memory *mem; - struct v3dv_physical_device *pdevice = &device->instance->physicalDevice; + struct v3dv_physical_device *pdevice = device->pdevice; assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); - /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */ - assert(pAllocateInfo->allocationSize > 0); + /* We always allocate device memory in multiples of a page, so round up + * requested size to that. + */ + const VkDeviceSize alloc_size = align64(pAllocateInfo->allocationSize, 4096); + + if (unlikely(alloc_size > MAX_MEMORY_ALLOCATION_SIZE)) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + uint64_t heap_used = p_atomic_read(&pdevice->heap_used); + if (unlikely(heap_used + alloc_size > pdevice->memory.memoryHeaps[0].size)) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - mem = vk_object_zalloc(&device->vk, pAllocator, sizeof(*mem), - VK_OBJECT_TYPE_DEVICE_MEMORY); + mem = vk_device_memory_create(&device->vk, pAllocateInfo, + pAllocator, sizeof(*mem)); if (mem == NULL) return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.memoryTypeCount); mem->type = &pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex]; - mem->has_bo_ownership = true; mem->is_for_wsi = false; const struct wsi_memory_allocate_info *wsi_info = NULL; const VkImportMemoryFdInfoKHR *fd_info = NULL; + const VkMemoryAllocateFlagsInfo *flags_info = NULL; vk_foreach_struct_const(ext, pAllocateInfo->pNext) { switch ((unsigned)ext->sType) { case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA: @@ -2118,16 +2166,14 @@ v3dv_AllocateMemory(VkDevice _device, fd_info = (void *)ext; break; case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: - /* We don't support VK_KHR_buffer_device_address or multiple - * devices per device group, so we can ignore this. - */ + flags_info = (void *)ext; break; - case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR: + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO: /* We don't have particular optimizations associated with memory * allocations that won't be suballocated to multiple resources. */ break; - case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR: + case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO: /* The mask of handle types specified here must be supported * according to VkExternalImageFormatProperties, so it must be * fd or dmabuf, which don't have special requirements for us. @@ -2139,57 +2185,58 @@ v3dv_AllocateMemory(VkDevice _device, } } - VkResult result = VK_SUCCESS; - - /* We always allocate device memory in multiples of a page, so round up - * requested size to that. - */ - VkDeviceSize alloc_size = ALIGN(pAllocateInfo->allocationSize, 4096); + VkResult result; - if (unlikely(alloc_size > MAX_MEMORY_ALLOCATION_SIZE)) { - result = VK_ERROR_OUT_OF_DEVICE_MEMORY; + if (wsi_info) { + result = device_alloc_for_wsi(device, pAllocator, mem, alloc_size); + } else if (fd_info && fd_info->handleType) { + assert(fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || + fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + result = device_import_bo(device, pAllocator, + fd_info->fd, alloc_size, &mem->bo); + if (result == VK_SUCCESS) + close(fd_info->fd); + } else if (mem->vk.ahardware_buffer) { +#if DETECT_OS_ANDROID + const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer); + assert(handle->numFds > 0); + size_t size = lseek(handle->data[0], 0, SEEK_END); + result = device_import_bo(device, pAllocator, + handle->data[0], size, &mem->bo); +#else + result = VK_ERROR_FEATURE_NOT_PRESENT; +#endif } else { - if (wsi_info) { - result = device_alloc_for_wsi(device, pAllocator, mem, alloc_size); - } else if (fd_info && fd_info->handleType) { - assert(fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || - fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); - result = device_import_bo(device, pAllocator, - fd_info->fd, alloc_size, &mem->bo); - mem->has_bo_ownership = false; - if (result == VK_SUCCESS) - close(fd_info->fd); - } else { - result = device_alloc(device, mem, alloc_size); - } + result = device_alloc(device, mem, alloc_size); } if (result != VK_SUCCESS) { - vk_object_free(&device->vk, pAllocator, mem); - return vk_error(device->instance, result); + vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk); + return vk_error(device, result); } - *pMem = v3dv_device_memory_to_handle(mem); - return result; -} - -VKAPI_ATTR void VKAPI_CALL -v3dv_FreeMemory(VkDevice _device, - VkDeviceMemory _mem, - const VkAllocationCallbacks *pAllocator) -{ - V3DV_FROM_HANDLE(v3dv_device, device, _device); - V3DV_FROM_HANDLE(v3dv_device_memory, mem, _mem); - - if (mem == NULL) - return; - - if (mem->bo->map) - v3dv_UnmapMemory(_device, _mem); + heap_used = p_atomic_add_return(&pdevice->heap_used, mem->bo->size); + if (heap_used > pdevice->memory.memoryHeaps[0].size) { + free_memory(device, mem, pAllocator); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } - device_free(device, mem); + /* If this memory can be used via VK_KHR_buffer_device_address then we + * will need to manually add the BO to any job submit that makes use of + * VK_KHR_buffer_device_address, since such jobs may produce buffer + * load/store operations that may access any buffer memory allocated with + * this flag and we don't have any means to tell which buffers will be + * accessed through this mechanism since they don't even have to be bound + * through descriptor state. + */ + if (flags_info && + (flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)) { + mem->is_for_device_address = true; + device_add_device_address_bo(device, mem->bo); + } - vk_object_free(&device->vk, pAllocator, mem); + *pMem = v3dv_device_memory_to_handle(mem); + return result; } VKAPI_ATTR VkResult VKAPI_CALL @@ -2217,7 +2264,7 @@ v3dv_MapMemory(VkDevice _device, */ VkResult result = device_map(device, mem); if (result != VK_SUCCESS) - return vk_error(device->instance, result); + return vk_error(device, result); *ppData = ((uint8_t *) mem->bo->map) + offset; return VK_SUCCESS; @@ -2252,19 +2299,30 @@ v3dv_InvalidateMappedMemoryRanges(VkDevice _device, return VK_SUCCESS; } -VKAPI_ATTR void VKAPI_CALL -v3dv_GetImageMemoryRequirements2(VkDevice device, - const VkImageMemoryRequirementsInfo2 *pInfo, - VkMemoryRequirements2 *pMemoryRequirements) +static void +get_image_memory_requirements(struct v3dv_image *image, + VkImageAspectFlagBits planeAspect, + VkMemoryRequirements2 *pMemoryRequirements) { - V3DV_FROM_HANDLE(v3dv_image, image, pInfo->image); - pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) { .memoryTypeBits = 0x1, - .alignment = image->alignment, - .size = image->size + .alignment = image->planes[0].alignment, + .size = image->non_disjoint_size }; + if (planeAspect != VK_IMAGE_ASPECT_NONE) { + assert(image->format->plane_count > 1); + /* Disjoint images should have a 0 non_disjoint_size */ + assert(!pMemoryRequirements->memoryRequirements.size); + + uint8_t plane = v3dv_image_aspect_to_plane(image, planeAspect); + + VkMemoryRequirements *mem_reqs = + &pMemoryRequirements->memoryRequirements; + mem_reqs->alignment = image->planes[plane].alignment; + mem_reqs->size = image->planes[plane].size; + } + vk_foreach_struct(ext, pMemoryRequirements->pNext) { switch (ext->sType) { case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { @@ -2281,6 +2339,65 @@ v3dv_GetImageMemoryRequirements2(VkDevice device, } } +VKAPI_ATTR void VKAPI_CALL +v3dv_GetImageMemoryRequirements2(VkDevice device, + const VkImageMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + V3DV_FROM_HANDLE(v3dv_image, image, pInfo->image); + + VkImageAspectFlagBits planeAspect = VK_IMAGE_ASPECT_NONE; + vk_foreach_struct_const(ext, pInfo->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO: { + VkImagePlaneMemoryRequirementsInfo *req = + (VkImagePlaneMemoryRequirementsInfo *) ext; + planeAspect = req->planeAspect; + break; + } + default: + v3dv_debug_ignored_stype(ext->sType); + break; + } + } + + get_image_memory_requirements(image, planeAspect, pMemoryRequirements); +} + +VKAPI_ATTR void VKAPI_CALL +v3dv_GetDeviceImageMemoryRequirements( + VkDevice _device, + const VkDeviceImageMemoryRequirements *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + + struct v3dv_image image = { 0 }; + vk_image_init(&device->vk, &image.vk, pInfo->pCreateInfo); + + ASSERTED VkResult result = + v3dv_image_init(device, pInfo->pCreateInfo, NULL, &image); + assert(result == VK_SUCCESS); + + /* From VkDeviceImageMemoryRequirements spec: + * + * " planeAspect is a VkImageAspectFlagBits value specifying the aspect + * corresponding to the image plane to query. This parameter is ignored + * unless pCreateInfo::tiling is + * VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, or pCreateInfo::flags has + * VK_IMAGE_CREATE_DISJOINT_BIT set" + * + * We need to explicitly ignore that flag, or following asserts could be + * triggered. + */ + VkImageAspectFlagBits planeAspect = + pInfo->pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT || + pInfo->pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT ? + pInfo->planeAspect : 0; + + get_image_memory_requirements(&image, planeAspect, pMemoryRequirements); +} + static void bind_image_memory(const VkBindImageMemoryInfo *info) { @@ -2293,11 +2410,43 @@ bind_image_memory(const VkBindImageMemoryInfo *info) * the VkMemoryRequirements structure returned from a call to * vkGetImageMemoryRequirements with image" */ - assert(info->memoryOffset % image->alignment == 0); assert(info->memoryOffset < mem->bo->size); - image->mem = mem; - image->mem_offset = info->memoryOffset; + uint64_t offset = info->memoryOffset; + if (image->non_disjoint_size) { + /* We only check for plane 0 as it is the only one that actually starts + * at that offset + */ + assert(offset % image->planes[0].alignment == 0); + for (uint8_t plane = 0; plane < image->plane_count; plane++) { + image->planes[plane].mem = mem; + image->planes[plane].mem_offset = offset; + } + } else { + const VkBindImagePlaneMemoryInfo *plane_mem_info = + vk_find_struct_const(info->pNext, BIND_IMAGE_PLANE_MEMORY_INFO); + assert(plane_mem_info); + + /* + * From VkBindImagePlaneMemoryInfo spec: + * + * "If the image’s tiling is VK_IMAGE_TILING_LINEAR or + * VK_IMAGE_TILING_OPTIMAL, then planeAspect must be a single valid + * format plane for the image" + * + * <skip> + * + * "If the image’s tiling is VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, + * then planeAspect must be a single valid memory plane for the + * image" + * + * So planeAspect should only refer to one plane. + */ + uint8_t plane = v3dv_plane_from_aspect(plane_mem_info->planeAspect); + assert(offset % image->planes[plane].alignment == 0); + image->planes[plane].mem = mem; + image->planes[plane].mem_offset = offset; + } } VKAPI_ATTR VkResult VKAPI_CALL @@ -2306,21 +2455,59 @@ v3dv_BindImageMemory2(VkDevice _device, const VkBindImageMemoryInfo *pBindInfos) { for (uint32_t i = 0; i < bindInfoCount; i++) { +#if DETECT_OS_ANDROID + V3DV_FROM_HANDLE(v3dv_device_memory, mem, pBindInfos[i].memory); + V3DV_FROM_HANDLE(v3dv_device, device, _device); + if (mem != NULL && mem->vk.ahardware_buffer) { + AHardwareBuffer_Desc description; + const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer); + + V3DV_FROM_HANDLE(v3dv_image, image, pBindInfos[i].image); + AHardwareBuffer_describe(mem->vk.ahardware_buffer, &description); + + struct u_gralloc_buffer_handle gr_handle = { + .handle = handle, + .pixel_stride = description.stride, + .hal_format = description.format, + }; + + VkResult result = v3dv_gralloc_to_drm_explicit_layout( + device->gralloc, + &gr_handle, + image->android_explicit_layout, + image->android_plane_layouts, + V3DV_MAX_PLANE_COUNT); + if (result != VK_SUCCESS) + return result; + + result = v3dv_update_image_layout( + device, image, image->android_explicit_layout->drmFormatModifier, + /* disjoint = */ false, image->android_explicit_layout); + if (result != VK_SUCCESS) + return result; + } +#endif + const VkBindImageMemorySwapchainInfoKHR *swapchain_info = vk_find_struct_const(pBindInfos->pNext, BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR); if (swapchain_info && swapchain_info->swapchain) { +#if !DETECT_OS_ANDROID struct v3dv_image *swapchain_image = v3dv_wsi_get_image_from_swapchain(swapchain_info->swapchain, swapchain_info->imageIndex); + /* Making the assumption that swapchain images are a single plane */ + assert(swapchain_image->plane_count == 1); VkBindImageMemoryInfo swapchain_bind = { .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO, .image = pBindInfos[i].image, - .memory = v3dv_device_memory_to_handle(swapchain_image->mem), - .memoryOffset = swapchain_image->mem_offset, + .memory = v3dv_device_memory_to_handle(swapchain_image->planes[0].mem), + .memoryOffset = swapchain_image->planes[0].mem_offset, }; bind_image_memory(&swapchain_bind); - } else { +#endif + } else + { bind_image_memory(&pBindInfos[i]); } } @@ -2328,19 +2515,39 @@ v3dv_BindImageMemory2(VkDevice _device, return VK_SUCCESS; } -VKAPI_ATTR void VKAPI_CALL -v3dv_GetBufferMemoryRequirements2(VkDevice device, - const VkBufferMemoryRequirementsInfo2 *pInfo, - VkMemoryRequirements2 *pMemoryRequirements) +void +v3dv_buffer_init(struct v3dv_device *device, + const VkBufferCreateInfo *pCreateInfo, + struct v3dv_buffer *buffer, + uint32_t alignment) { - V3DV_FROM_HANDLE(v3dv_buffer, buffer, pInfo->buffer); + buffer->size = pCreateInfo->size; + buffer->usage = pCreateInfo->usage; + buffer->alignment = alignment; +} +static void +get_buffer_memory_requirements(struct v3dv_buffer *buffer, + VkMemoryRequirements2 *pMemoryRequirements) +{ pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) { .memoryTypeBits = 0x1, .alignment = buffer->alignment, .size = align64(buffer->size, buffer->alignment), }; + /* UBO and SSBO may be read using ldunifa, which prefetches the next + * 4 bytes after a read. If the buffer's size is exactly a multiple + * of a page size and the shader reads the last 4 bytes with ldunifa + * the prefetching would read out of bounds and cause an MMU error, + * so we allocate extra space to avoid kernel error spamming. + */ + bool can_ldunifa = buffer->usage & + (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); + if (can_ldunifa && (buffer->size % 4096 == 0)) + pMemoryRequirements->memoryRequirements.size += buffer->alignment; + vk_foreach_struct(ext, pMemoryRequirements->pNext) { switch (ext->sType) { case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { @@ -2357,8 +2564,30 @@ v3dv_GetBufferMemoryRequirements2(VkDevice device, } } -static void -bind_buffer_memory(const VkBindBufferMemoryInfo *info) +VKAPI_ATTR void VKAPI_CALL +v3dv_GetBufferMemoryRequirements2(VkDevice device, + const VkBufferMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + V3DV_FROM_HANDLE(v3dv_buffer, buffer, pInfo->buffer); + get_buffer_memory_requirements(buffer, pMemoryRequirements); +} + +VKAPI_ATTR void VKAPI_CALL +v3dv_GetDeviceBufferMemoryRequirements( + VkDevice _device, + const VkDeviceBufferMemoryRequirements *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + + struct v3dv_buffer buffer = { 0 }; + v3dv_buffer_init(device, pInfo->pCreateInfo, &buffer, V3D_NON_COHERENT_ATOM_SIZE); + get_buffer_memory_requirements(&buffer, pMemoryRequirements); +} + +void +v3dv_buffer_bind_memory(const VkBindBufferMemoryInfo *info) { V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->buffer); V3DV_FROM_HANDLE(v3dv_device_memory, mem, info->memory); @@ -2383,7 +2612,7 @@ v3dv_BindBufferMemory2(VkDevice device, const VkBindBufferMemoryInfo *pBindInfos) { for (uint32_t i = 0; i < bindInfoCount; i++) - bind_buffer_memory(&pBindInfos[i]); + v3dv_buffer_bind_memory(&pBindInfos[i]); return VK_SUCCESS; } @@ -2406,16 +2635,16 @@ v3dv_CreateBuffer(VkDevice _device, buffer = vk_object_zalloc(&device->vk, pAllocator, sizeof(*buffer), VK_OBJECT_TYPE_BUFFER); if (buffer == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - buffer->size = pCreateInfo->size; - buffer->usage = pCreateInfo->usage; - buffer->alignment = 256; /* nonCoherentAtomSize */ + v3dv_buffer_init(device, pCreateInfo, buffer, V3D_NON_COHERENT_ATOM_SIZE); /* Limit allocations to 32-bit */ const VkDeviceSize aligned_size = align64(buffer->size, buffer->alignment); - if (aligned_size > UINT32_MAX || aligned_size < buffer->size) + if (aligned_size > UINT32_MAX || aligned_size < buffer->size) { + vk_free(&device->vk.alloc, buffer); return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } *pBuffer = v3dv_buffer_to_handle(buffer); @@ -2452,20 +2681,32 @@ v3dv_CreateFramebuffer(VkDevice _device, framebuffer = vk_object_zalloc(&device->vk, pAllocator, size, VK_OBJECT_TYPE_FRAMEBUFFER); if (framebuffer == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); framebuffer->width = pCreateInfo->width; framebuffer->height = pCreateInfo->height; framebuffer->layers = pCreateInfo->layers; framebuffer->has_edge_padding = true; + const VkFramebufferAttachmentsCreateInfo *imageless = + vk_find_struct_const(pCreateInfo->pNext, + FRAMEBUFFER_ATTACHMENTS_CREATE_INFO); + framebuffer->attachment_count = pCreateInfo->attachmentCount; framebuffer->color_attachment_count = 0; - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - framebuffer->attachments[i] = - v3dv_image_view_from_handle(pCreateInfo->pAttachments[i]); - if (framebuffer->attachments[i]->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) - framebuffer->color_attachment_count++; + for (uint32_t i = 0; i < framebuffer->attachment_count; i++) { + if (!imageless) { + framebuffer->attachments[i] = + v3dv_image_view_from_handle(pCreateInfo->pAttachments[i]); + if (framebuffer->attachments[i]->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) + framebuffer->color_attachment_count++; + } else { + assert(i < imageless->attachmentImageInfoCount); + if (imageless->pAttachmentImageInfos[i].usage & + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + framebuffer->color_attachment_count++; + } + } } *pFramebuffer = v3dv_framebuffer_to_handle(framebuffer); @@ -2487,6 +2728,105 @@ v3dv_DestroyFramebuffer(VkDevice _device, vk_object_free(&device->vk, pAllocator, fb); } +void +v3dv_setup_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer, + const VkRenderingInfoKHR *info) +{ + struct v3dv_device *device = cmd_buffer->device; + + /* Max framebuffer attachments is max_color_RTs + D/S multiplied by two for + * MSAA resolves. + */ + const uint32_t max_attachments = + 2 * (V3D_MAX_RENDER_TARGETS(device->devinfo.ver) + 1); + const uint32_t attachments_alloc_size = + sizeof(struct v3dv_image_view *) * max_attachments; + + /* Only allocate the dynamic framebuffer once and will stay valid + * for the duration of the command buffer. + */ + struct v3dv_framebuffer *fb = cmd_buffer->state.dynamic_framebuffer; + if (!fb) { + uint32_t alloc_size = sizeof(struct v3dv_framebuffer) + + attachments_alloc_size; + fb = vk_object_zalloc(&cmd_buffer->device->vk, NULL, alloc_size, + VK_OBJECT_TYPE_FRAMEBUFFER); + if (fb == NULL) { + v3dv_flag_oom(cmd_buffer, NULL); + return; + } + cmd_buffer->state.dynamic_framebuffer = fb; + } else { + memset(fb->attachments, 0, attachments_alloc_size); + } + + fb->width = info->renderArea.offset.x + info->renderArea.extent.width; + fb->height = info->renderArea.offset.y + info->renderArea.extent.height; + + /* From the Vulkan spec for VkFramebufferCreateInfo: + * + * "If the render pass uses multiview, then layers must be one (...)" + */ + fb->layers = info->viewMask == 0 ? info->layerCount : 1; + + struct v3dv_render_pass *pass = &cmd_buffer->state.dynamic_pass; + assert(pass->subpass_count == 1 && pass->subpasses); + assert(pass->subpasses[0].color_count == info->colorAttachmentCount); + fb->color_attachment_count = info->colorAttachmentCount; + + uint32_t a = 0; + for (int i = 0; i < info->colorAttachmentCount; i++) { + if (info->pColorAttachments[i].imageView == VK_NULL_HANDLE) + continue; + fb->attachments[a++] = + v3dv_image_view_from_handle(info->pColorAttachments[i].imageView); + if (info->pColorAttachments[i].resolveMode != VK_RESOLVE_MODE_NONE) { + fb->attachments[a++] = + v3dv_image_view_from_handle(info->pColorAttachments[i].resolveImageView); + } + } + + if ((info->pDepthAttachment && info->pDepthAttachment->imageView) || + (info->pStencilAttachment && info->pStencilAttachment->imageView)) { + const struct VkRenderingAttachmentInfo *common_ds_info = + (info->pDepthAttachment && + info->pDepthAttachment->imageView != VK_NULL_HANDLE) ? + info->pDepthAttachment : + info->pStencilAttachment; + + fb->attachments[a++] = + v3dv_image_view_from_handle(common_ds_info->imageView); + + if (common_ds_info->resolveMode != VK_RESOLVE_MODE_NONE) { + fb->attachments[a++] = + v3dv_image_view_from_handle(common_ds_info->resolveImageView); + } + } + + assert(a == pass->attachment_count); + fb->attachment_count = a; + + /* Dynamic rendering doesn't provide the size of the underlying framebuffer + * so we estimate its size from the render area. This means it is possible + * the underlying attachments are larger and thus we cannot assume we have + * edge padding. + */ + fb->has_edge_padding = false; +} + +void +v3dv_destroy_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer) +{ + if (!cmd_buffer->state.dynamic_framebuffer) + return; + + VkDevice vk_device = v3dv_device_to_handle(cmd_buffer->device); + VkFramebuffer vk_dynamic_fb = + v3dv_framebuffer_to_handle(cmd_buffer->state.dynamic_framebuffer); + v3dv_DestroyFramebuffer(vk_device, vk_dynamic_fb, NULL); + cmd_buffer->state.dynamic_framebuffer = NULL; +} + VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, @@ -2494,7 +2834,7 @@ v3dv_GetMemoryFdPropertiesKHR(VkDevice _device, VkMemoryFdPropertiesKHR *pMemoryFdProperties) { V3DV_FROM_HANDLE(v3dv_device, device, _device); - struct v3dv_physical_device *pdevice = &device->instance->physicalDevice; + struct v3dv_physical_device *pdevice = device->pdevice; switch (handleType) { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: @@ -2502,7 +2842,7 @@ v3dv_GetMemoryFdPropertiesKHR(VkDevice _device, (1 << pdevice->memory.memoryTypeCount) - 1; return VK_SUCCESS; default: - return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); + return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); } } @@ -2523,7 +2863,7 @@ v3dv_GetMemoryFdKHR(VkDevice _device, mem->bo->handle, DRM_CLOEXEC, &fd); if (ret) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); *pFd = fd; @@ -2531,63 +2871,6 @@ v3dv_GetMemoryFdKHR(VkDevice _device, } VKAPI_ATTR VkResult VKAPI_CALL -v3dv_CreateEvent(VkDevice _device, - const VkEventCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkEvent *pEvent) -{ - V3DV_FROM_HANDLE(v3dv_device, device, _device); - struct v3dv_event *event = - vk_object_zalloc(&device->vk, pAllocator, sizeof(*event), - VK_OBJECT_TYPE_EVENT); - if (!event) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - /* Events are created in the unsignaled state */ - event->state = false; - *pEvent = v3dv_event_to_handle(event); - - return VK_SUCCESS; -} - -VKAPI_ATTR void VKAPI_CALL -v3dv_DestroyEvent(VkDevice _device, - VkEvent _event, - const VkAllocationCallbacks *pAllocator) -{ - V3DV_FROM_HANDLE(v3dv_device, device, _device); - V3DV_FROM_HANDLE(v3dv_event, event, _event); - - if (!event) - return; - - vk_object_free(&device->vk, pAllocator, event); -} - -VKAPI_ATTR VkResult VKAPI_CALL -v3dv_GetEventStatus(VkDevice _device, VkEvent _event) -{ - V3DV_FROM_HANDLE(v3dv_event, event, _event); - return p_atomic_read(&event->state) ? VK_EVENT_SET : VK_EVENT_RESET; -} - -VKAPI_ATTR VkResult VKAPI_CALL -v3dv_SetEvent(VkDevice _device, VkEvent _event) -{ - V3DV_FROM_HANDLE(v3dv_event, event, _event); - p_atomic_set(&event->state, 1); - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -v3dv_ResetEvent(VkDevice _device, VkEvent _event) -{ - V3DV_FROM_HANDLE(v3dv_event, event, _event); - p_atomic_set(&event->state, 0); - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -2601,7 +2884,9 @@ v3dv_CreateSampler(VkDevice _device, sampler = vk_object_zalloc(&device->vk, pAllocator, sizeof(*sampler), VK_OBJECT_TYPE_SAMPLER); if (!sampler) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + sampler->plane_count = 1; sampler->compare_enable = pCreateInfo->compareEnable; sampler->unnormalized_coordinates = pCreateInfo->unnormalizedCoordinates; @@ -2610,7 +2895,21 @@ v3dv_CreateSampler(VkDevice _device, vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); - v3dv_X(device, pack_sampler_state)(sampler, pCreateInfo, bc_info); + const VkSamplerYcbcrConversionInfo *ycbcr_conv_info = + vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO); + + const struct vk_format_ycbcr_info *ycbcr_info = NULL; + + if (ycbcr_conv_info) { + VK_FROM_HANDLE(vk_ycbcr_conversion, conversion, ycbcr_conv_info->conversion); + ycbcr_info = vk_format_get_ycbcr_info(conversion->state.format); + if (ycbcr_info) { + sampler->plane_count = ycbcr_info->n_planes; + sampler->conversion = conversion; + } + } + + v3dv_X(device, pack_sampler_state)(device, sampler, pCreateInfo, bc_info); *pSampler = v3dv_sampler_to_handle(sampler); @@ -2659,49 +2958,65 @@ v3dv_GetImageSparseMemoryRequirements2( *pSparseMemoryRequirementCount = 0; } -/* vk_icd.h does not declare this function, so we declare it here to - * suppress Wmissing-prototypes. - */ -PUBLIC VKAPI_ATTR VkResult VKAPI_CALL -vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion); +VKAPI_ATTR void VKAPI_CALL +v3dv_GetDeviceImageSparseMemoryRequirements( + VkDevice device, + const VkDeviceImageMemoryRequirements *pInfo, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) +{ + *pSparseMemoryRequirementCount = 0; +} -PUBLIC VKAPI_ATTR VkResult VKAPI_CALL -vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion) +VkDeviceAddress +v3dv_GetBufferDeviceAddress(VkDevice device, + const VkBufferDeviceAddressInfo *pInfo) { - /* For the full details on loader interface versioning, see - * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. - * What follows is a condensed summary, to help you navigate the large and - * confusing official doc. - * - * - Loader interface v0 is incompatible with later versions. We don't - * support it. - * - * - In loader interface v1: - * - The first ICD entrypoint called by the loader is - * vk_icdGetInstanceProcAddr(). The ICD must statically expose this - * entrypoint. - * - The ICD must statically expose no other Vulkan symbol unless it is - * linked with -Bsymbolic. - * - Each dispatchable Vulkan handle created by the ICD must be - * a pointer to a struct whose first member is VK_LOADER_DATA. The - * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. - * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and - * vkDestroySurfaceKHR(). The ICD must be capable of working with - * such loader-managed surfaces. - * - * - Loader interface v2 differs from v1 in: - * - The first ICD entrypoint called by the loader is - * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must - * statically expose this entrypoint. - * - * - Loader interface v3 differs from v2 in: - * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), - * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, - * because the loader no longer does so. - * - * - Loader interface v4 differs from v3 in: - * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr(). - */ - *pSupportedVersion = MIN2(*pSupportedVersion, 3u); - return VK_SUCCESS; + V3DV_FROM_HANDLE(v3dv_buffer, buffer, pInfo->buffer); + return buffer->mem_offset + buffer->mem->bo->offset; +} + +uint64_t +v3dv_GetBufferOpaqueCaptureAddress(VkDevice device, + const VkBufferDeviceAddressInfo *pInfo) +{ + /* Not implemented */ + return 0; +} + +uint64_t +v3dv_GetDeviceMemoryOpaqueCaptureAddress( + VkDevice device, + const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo) +{ + /* Not implemented */ + return 0; +} + +VkResult +v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device, + nir_shader *nir, + VkPipelineLayout pipeline_layout, + VkPipeline *pipeline) +{ + struct vk_shader_module cs_m = vk_shader_module_from_nir(nir); + + VkPipelineShaderStageCreateInfo set_event_cs_stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = vk_shader_module_to_handle(&cs_m), + .pName = "main", + }; + + VkComputePipelineCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = set_event_cs_stage, + .layout = pipeline_layout, + }; + + VkResult result = + v3dv_CreateComputePipelines(v3dv_device_to_handle(device), VK_NULL_HANDLE, + 1, &info, &device->vk.alloc, pipeline); + + return result; } |