diff options
Diffstat (limited to 'src/gallium/drivers/zink')
116 files changed, 40080 insertions, 12787 deletions
diff --git a/src/gallium/drivers/zink/VP_ZINK_requirements.json b/src/gallium/drivers/zink/VP_ZINK_requirements.json new file mode 100644 index 00000000000..6f6860048e2 --- /dev/null +++ b/src/gallium/drivers/zink/VP_ZINK_requirements.json @@ -0,0 +1,947 @@ +{ + "$schema": "https://schema.khronos.org/vulkan/profiles-0.8.2-271.json", + "capabilities": { + "vulkan10requirements": { + "features": { + "VkPhysicalDeviceFeatures": { + "robustBufferAccess": true + } + } + }, + "gl21_baseline": { + "extensions": { + "VK_KHR_maintenance1": 1, + "VK_KHR_create_renderpass2": 1, + "VK_KHR_imageless_framebuffer": 1, + "VK_KHR_timeline_semaphore": 1, + "VK_EXT_custom_border_color": 1, + "VK_EXT_line_rasterization": 1, + "VK_KHR_swapchain_mutable_format": 1, + "VK_KHR_incremental_present": 1, + "VK_EXT_border_color_swizzle": 1, + "VK_KHR_descriptor_update_template": 1 + }, + "features": { + "VkPhysicalDeviceFeatures": { + "logicOp": true, + "fillModeNonSolid": true, + "alphaToOne": true, + "shaderClipDistance": true + }, + "VkPhysicalDeviceCustomBorderColorFeaturesEXT": { + "customBorderColorWithoutFormat": true + }, + "VkPhysicalDeviceBorderColorSwizzleFeaturesEXT": { + "borderColorSwizzleFromImage": true + }, + "VkPhysicalDeviceLineRasterizationFeaturesEXT": { + "rectangularLines": true + } + } + }, + "gl21_baseline_vk10": { + "extensions": { + "VK_EXT_scalar_block_layout": 1 + }, + "features": { + "VkPhysicalDeviceScalarBlockLayoutFeatures": { + "scalarBlockLayout": true + }, + "VkPhysicalDeviceTimelineSemaphoreFeatures": { + "timelineSemaphore": true + }, + "VkPhysicalDeviceImagelessFramebufferFeatures": { + "imagelessFramebuffer": true + } + } + }, + "gl21_baseline_vk12": { + "features": { + "VkPhysicalDeviceVulkan12Features": { + "scalarBlockLayout": true, + "drawIndirectCount": true, + "imagelessFramebuffer": true, + "timelineSemaphore": true + } + } + }, + "gl21_baseline_line_bresenham": { + "features": { + "VkPhysicalDeviceLineRasterizationFeaturesEXT": { + "bresenhamLines": true + } + } + }, + "gl21_baseline_line_non_strict": { + "properties": { + "VkPhysicalDeviceProperties": { + "limits": { + "strictLines": false + } + } + } + }, + "gl21_optional": { + "extensions": { + "VK_KHR_external_memory": 1 + } + }, + "gl30_baseline": { + "extensions": { + "VK_EXT_transform_feedback": 1, + "VK_EXT_conditional_rendering": 1 + }, + "features": { + "VkPhysicalDeviceFeatures": { + "independentBlend": true + }, + "VkPhysicalDeviceTransformFeedbackFeaturesEXT": { + "transformFeedback": true + }, + "VkPhysicalDeviceConditionalRenderingFeaturesEXT": { + "conditionalRendering": true + } + }, + "formats": { + "VK_FORMAT_D32_SFLOAT_S8_UINT": { + "VkFormatProperties": { + "optimalTilingFeatures": [ + "VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT" + ] + } + } + } + }, + "gl31_baseline": { + "properties": { + "VkPhysicalDeviceProperties": { + "limits": { + "maxPerStageDescriptorSamplers": 16 + } + } + } + }, + "gl32_baseline": { + "extensions": { + "VK_EXT_depth_clip_enable": 1 + }, + "features": { + "VkPhysicalDeviceFeatures": { + "depthClamp": true, + "geometryShader": true, + "shaderTessellationAndGeometryPointSize": true + }, + "VkPhysicalDeviceDepthClipEnableFeaturesEXT": { + "depthClipEnable": true + } + } + }, + "gl33_baseline": { + "extensions": { + "VK_EXT_vertex_attribute_divisor": 1 + }, + "features": { + "VkPhysicalDeviceFeatures": { + "dualSrcBlend": true + }, + "VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT": { + "vertexAttributeInstanceRateDivisor": true + } + } + }, + "gl40_baseline": { + "extensions": { + "VK_KHR_maintenance2": 1, + "VK_KHR_maintenance3": 1, + "VK_KHR_maintenance4": 1, + "VK_KHR_maintenance5": 1 + }, + "features": { + "VkPhysicalDeviceFeatures": { + "sampleRateShading": true, + "tessellationShader": true, + "imageCubeArray": true + }, + "VkPhysicalDeviceMaintenance4Features": { + "maintenance4": true + }, + "VkPhysicalDeviceMaintenance5FeaturesKHR": { + "maintenance5": true + } + }, + "formats": { + "VK_FORMAT_R32G32B32_SFLOAT": { + "VkFormatProperties": { + "bufferFeatures": [ + "VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT" + ] + } + }, + "VK_FORMAT_R32G32B32_SINT": { + "VkFormatProperties": { + "bufferFeatures": [ + "VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT" + ] + } + }, + "VK_FORMAT_R32G32B32_UINT": { + "VkFormatProperties": { + "bufferFeatures": [ + "VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT" + ] + } + } + } + }, + "gl41_baseline": { + "features": { + "VkPhysicalDeviceFeatures": { + "multiViewport": true + } + }, + "properties": { + "VkPhysicalDeviceProperties": { + "limits": { + "maxImageDimension1D": 16384, + "maxImageDimension2D": 16384, + "maxImageDimension3D": 2048, + "maxImageDimensionCube": 16384, + "maxImageArrayLayers": 2048, + "maxViewports": 16 + } + } + } + }, + "gl42_baseline": { + "extensions": { + "VK_EXT_image_2d_view_of_3d": 1 + }, + "features": { + "VkPhysicalDeviceFeatures": { + "shaderStorageImageExtendedFormats": true, + "shaderStorageImageWriteWithoutFormat": true, + "vertexPipelineStoresAndAtomics": true, + "fragmentStoresAndAtomics": true + }, + "VkPhysicalDeviceImage2DViewOf3DFeaturesEXT": { + "image2DViewOf3D": true + } + } + }, + "gl42_baseline_vk10": { + "extensions": { + "VK_KHR_shader_draw_parameters": 1 + }, + "features": { + "VkPhysicalDeviceShaderDrawParametersFeatures": { + "shaderDrawParameters": true + } + } + }, + "gl42_baseline_vk12": { + "features": { + "VkPhysicalDeviceVulkan11Features": { + "shaderDrawParameters": true + } + } + }, + "gl43_baseline_rb2": { + "extensions": { + "VK_EXT_robustness2": 1 + }, + "features": { + "VkPhysicalDeviceRobustness2FeaturesEXT": { + "robustImageAccess2": true + } + } + }, + "gl43_baseline_rb_image_vk13": { + "features": { + "VkPhysicalDeviceVulkan13Features": { + "robustImageAccess": true + } + } + }, + "gl43_baseline_rb_image_ext": { + "extensions": { + "VK_EXT_image_robustness": 1 + }, + "features": { + "VkPhysicalDeviceImageRobustnessFeatures": { + "robustImageAccess": true + } + } + }, + "gl43_baseline": { + "features": { + "VkPhysicalDeviceFeatures": { + "robustBufferAccess": true, + "multiDrawIndirect": true + } + }, + "formats": { + "VK_FORMAT_R8G8B8A8_UNORM": { + "VkFormatProperties": { + "linearTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ], + "optimalTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ] + } + }, + "VK_FORMAT_R8G8B8A8_SRGB": { + "VkFormatProperties": { + "linearTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ], + "optimalTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ] + } + }, + "VK_FORMAT_R16_UNORM": { + "VkFormatProperties": { + "linearTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ], + "optimalTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ] + } + }, + "VK_FORMAT_R16G16_UNORM": { + "VkFormatProperties": { + "linearTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ], + "optimalTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ] + } + }, + "VK_FORMAT_R16_SNORM": { + "VkFormatProperties": { + "linearTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ], + "optimalTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ] + } + }, + "VK_FORMAT_R16G16_SNORM": { + "VkFormatProperties": { + "linearTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ], + "optimalTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ] + } + }, + "VK_FORMAT_D32_SFLOAT_S8_UINT": { + "VkFormatProperties": { + "optimalTilingFeatures": [ + "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT" + ] + } + } + } + }, + "gl44_baseline": { + "formats": { + "VK_FORMAT_B10G11R11_UFLOAT_PACK32": { + "VkFormatProperties": { + "bufferFeatures": [ + "VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT" + ] + } + } + } + }, + "gl44_baseline_ext": { + "extensions": { + "VK_KHR_sampler_mirror_clamp_to_edge": 1 + } + }, + "gl44_baseline_vk12": { + "features": { + "VkPhysicalDeviceVulkan12Features": { + "samplerMirrorClampToEdge": true + } + } + }, + "gl45_baseline": { + "features": { + "VkPhysicalDeviceFeatures": { + "shaderCullDistance": true + } + } + }, + "gl46_baseline": { + "extensions": { + "VK_KHR_draw_indirect_count": 1 + }, + "features": { + "VkPhysicalDeviceFeatures": { + "samplerAnisotropy": true, + "depthBiasClamp": true + } + } + }, + "gl46_optimal": { + "extensions": { + "VK_EXT_extended_dynamic_state": 1, + "VK_EXT_extended_dynamic_state2": 1, + "VK_EXT_extended_dynamic_state3": 1, + "VK_EXT_graphics_pipeline_library": 1, + "VK_EXT_non_seamless_cube_map": 1, + "VK_KHR_pipeline_library": 1, + "VK_EXT_attachment_feedback_loop_layout": 1, + "VK_EXT_attachment_feedback_loop_dynamic_state": 1 + }, + "features": { + "VkPhysicalDeviceExtendedDynamicStateFeaturesEXT": { + "extendedDynamicState": true + }, + "VkPhysicalDeviceExtendedDynamicState2FeaturesEXT": { + "extendedDynamicState2": true, + "extendedDynamicState2LogicOp": true, + "extendedDynamicState2PatchControlPoints": true + }, + "VkPhysicalDeviceExtendedDynamicState3FeaturesEXT": { + "extendedDynamicState3PolygonMode": true, + "extendedDynamicState3DepthClampEnable": true, + "extendedDynamicState3DepthClipEnable": true, + "extendedDynamicState3ProvokingVertexMode": true, + "extendedDynamicState3LineRasterizationMode": true, + "extendedDynamicState3DepthClipNegativeOneToOne": true + }, + "VkPhysicalDeviceFeatures": { + "textureCompressionBC": true + }, + "VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT": { + "graphicsPipelineLibrary": true + }, + "VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT": { + "nonSeamlessCubeMap": true + }, + "VkPhysicalDeviceProvokingVertexFeaturesEXT": { + "provokingVertexLast": true + }, + "VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT": { + "attachmentFeedbackLoopLayout": true + }, + "VkPhysicalDeviceAttachmentFeedbackLoopDynamicStateFeaturesEXT": { + "attachmentFeedbackLoopDynamicState": true + } + }, + "properties": { + "VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT": { + + } + } + }, + "gl46_optimal_ext": { + "extensions": { + "VK_EXT_provoking_vertex": 1, + "VK_KHR_dynamic_rendering": 1, + "VK_EXT_dynamic_rendering_unused_attachments": 1 + }, + "features": { + "VkPhysicalDeviceDynamicRenderingFeatures": { + "dynamicRendering": true + }, + "VkPhysicalDeviceDynamicRenderingUnusedAttachmentsFeaturesEXT": { + "dynamicRenderingUnusedAttachments": true + } + } + }, + "gl46_optimal_vk13": { + "features": { + "VkPhysicalDeviceVulkan13Features": { + "dynamicRendering": true + } + } + }, + "gl46_optional": { + "extensions": { + "VK_EXT_primitives_generated_query": 1, + "VK_EXT_color_write_enable": 1, + "VK_EXT_extended_dynamic_state3": 1, + "VK_EXT_descriptor_buffer": 1 + }, + "features": { + "VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT": { + "primitivesGeneratedQueryWithRasterizerDiscard": true + }, + "VkPhysicalDeviceColorWriteEnableFeaturesEXT": { + "colorWriteEnable": true + }, + "VkPhysicalDeviceExtendedDynamicState3FeaturesEXT": { + "extendedDynamicState3AlphaToOneEnable": true, + "extendedDynamicState3SampleMask": true, + "extendedDynamicState3AlphaToCoverageEnable": true, + "extendedDynamicState3ColorBlendEnable": true, + "extendedDynamicState3RasterizationSamples": true, + "extendedDynamicState3ColorWriteMask": true, + "extendedDynamicState3LogicOpEnable": true + }, + "VkPhysicalDeviceDescriptorBufferFeaturesEXT": { + "descriptorBuffer": true + } + } + }, + "gl46_optional_ext": { + "extensions": { + "VK_EXT_pipeline_creation_cache_control": 1 + }, + "features": { + "VkPhysicalDevicePipelineCreationCacheControlFeatures": { + "pipelineCreationCacheControl": true + } + } + }, + "gl46_optional_vk13": { + "features": { + "VkPhysicalDeviceVulkan13Features": { + "pipelineCreationCacheControl": true + } + } + }, + "GL_ARB_bindless_texture": { + "extensions": { + "VK_EXT_descriptor_indexing": 1 + }, + "properties": { + "VkPhysicalDeviceDescriptorIndexingProperties": { + "robustBufferAccessUpdateAfterBind": true + } + } + }, + "GL_ARB_sparse_texture": { + "features": { + "VkPhysicalDeviceFeatures": { + "sparseResidencyImage2D": true, + "sparseResidencyImage3D": true + } + } + }, + "GL_ARB_sparse_texture2": { + "features": { + "VkPhysicalDeviceFeatures": { + "sparseResidency2Samples": true + } + } + }, + "GL_ARB_sparse_texture_clamp_2s": { + "features": { + "VkPhysicalDeviceFeatures": { + "sparseResidency2Samples": true + } + } + }, + "GL_ARB_sparse_texture_clamp_4s": { + "features": { + "VkPhysicalDeviceFeatures": { + "sparseResidency4Samples": true + } + } + }, + "GL_ARB_sparse_texture_clamp_8s": { + "features": { + "VkPhysicalDeviceFeatures": { + "sparseResidency8Samples": true + } + } + }, + "GL_ARB_sparse_texture_clamp_16s": { + "features": { + "VkPhysicalDeviceFeatures": { + "sparseResidency16Samples": true + } + } + }, + "GL_ARB_sparse_buffer": { + "features": { + "VkPhysicalDeviceFeatures": { + "sparseBinding": true, + "sparseResidencyBuffer": true + } + } + }, + "GL_ARB_shader_viewport_layer_array_ext": { + "extensions": { + "VK_EXT_shader_viewport_index_layer": 1 + } + }, + "GL_ARB_shader_viewport_layer_array_vk12": { + "features": { + "VkPhysicalDeviceVulkan12Features": { + "shaderOutputViewportIndex": true, + "shaderOutputLayer": true + } + } + }, + "GL_ARB_fragment_shader_interlock": { + "extensions": { + "VK_EXT_fragment_shader_interlock": 1 + } + }, + "GL_ARB_shader_clock": { + "extensions": { + "VK_KHR_shader_clock": 1 + } + }, + "GL_ARB_shader_ballot_ext": { + "extensions": { + "VK_EXT_shader_subgroup_ballot": 1 + } + }, + "GL_ARB_shader_ballot_vk11": { + "properties": { + "VkPhysicalDeviceSubgroupProperties": { + "subgroupSize": 64, + "supportedOperations": [ "VK_SUBGROUP_FEATURE_BALLOT_BIT" ] + } + } + }, + "GL_ARB_sample_locations": { + "extensions": { + "VK_EXT_extended_dynamic_state": 1, + "VK_EXT_sample_locations": 1 + }, + "features": { + "VkPhysicalDeviceExtendedDynamicStateFeaturesEXT": { + "extendedDynamicState": true + } + } + }, + "GL_ARB_shader_stencil_export": { + "extensions": { + "VK_EXT_shader_stencil_export": 1 + } + }, + "GL_EXT_depth_bounds_test": { + "features": { + "VkPhysicalDeviceFeatures": { + "depthBounds": true + } + } + }, + "GL_EXT_texture_filter_minmax_ext": { + "extensions": { + "VK_EXT_sampler_filter_minmax": 1 + }, + "properties": { + "VkPhysicalDeviceSamplerFilterMinmaxProperties": { + "filterMinmaxSingleComponentFormats": true, + "filterMinmaxImageComponentMapping": true + } + } + }, + "GL_EXT_texture_filter_minmax_vk12": { + "properties": { + "VkPhysicalDeviceVulkan12Properties": { + "filterMinmaxSingleComponentFormats": true, + "filterMinmaxImageComponentMapping": true + } + } + } + }, + "profiles": { + "VP_ZINK_gl21_baseline": { + "version": 1, + "api-version": "1.2.0", + "label": "Zink OpenGL 2.1 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 2.1 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ] + ] + }, + "VP_ZINK_gl30_baseline": { + "version": 1, + "api-version": "1.2.0", + "label": "Zink OpenGL 3.0 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 3.0 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline" + ] + }, + "VP_ZINK_gl31_baseline": { + "version": 1, + "api-version": "1.2.0", + "label": "Zink OpenGL 3.1 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 3.1 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline", + "gl31_baseline" + ] + }, + "VP_ZINK_gl32_baseline": { + "version": 1, + "api-version": "1.2.0", + "label": "Zink OpenGL 3.2 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 3.2 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline", + "gl31_baseline", + "gl32_baseline" + ] + }, + "VP_ZINK_gl33_baseline": { + "version": 1, + "api-version": "1.2.0", + "label": "Zink OpenGL 3.3 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 3.3 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline", + "gl31_baseline", + "gl32_baseline", + "gl33_baseline" + ] + }, + "VP_ZINK_gl40_baseline": { + "version": 1, + "api-version": "1.2.0", + "label": "Zink OpenGL 4.0 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 4.0 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline", + "gl31_baseline", + "gl32_baseline", + "gl33_baseline", + "gl40_baseline" + ] + }, + "VP_ZINK_gl41_baseline": { + "version": 1, + "api-version": "1.2.0", + "label": "Zink OpenGL 4.1 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 4.1 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline", + "gl31_baseline", + "gl32_baseline", + "gl33_baseline", + "gl40_baseline", + "gl41_baseline" + ] + }, + "VP_ZINK_gl42_baseline": { + "version": 1, + "api-version": "1.2.0", + "label": "Zink OpenGL 4.2 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 4.2 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline", + "gl31_baseline", + "gl32_baseline", + "gl33_baseline", + "gl40_baseline", + "gl41_baseline", + "gl42_baseline", + [ "gl42_baseline_vk10", "gl42_baseline_vk12" ] + ] + }, + "VP_ZINK_gl43_baseline": { + "version": 1, + "api-version": "1.3.0", + "label": "Zink OpenGL 4.3 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 4.3 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline", + "gl31_baseline", + "gl32_baseline", + "gl33_baseline", + "gl40_baseline", + "gl41_baseline", + "gl42_baseline", + [ "gl42_baseline_vk10", "gl42_baseline_vk12" ], + "gl43_baseline", + [ "gl43_baseline_rb2", "gl43_baseline_rb_image_vk13", "gl43_baseline_rb_image_ext" ] + ] + }, + "VP_ZINK_gl44_baseline": { + "version": 1, + "api-version": "1.3.0", + "label": "Zink OpenGL 4.4 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 4.4 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline", + "gl31_baseline", + "gl32_baseline", + "gl33_baseline", + "gl40_baseline", + "gl41_baseline", + "gl42_baseline", + [ "gl42_baseline_vk10", "gl42_baseline_vk12" ], + "gl43_baseline", + [ "gl43_baseline_rb2", "gl43_baseline_rb_image_vk13", "gl43_baseline_rb_image_ext" ], + "gl44_baseline", + [ "gl44_baseline_ext", "gl44_baseline_vk12" ] + ] + }, + "VP_ZINK_gl45_baseline": { + "version": 1, + "api-version": "1.3.0", + "label": "Zink OpenGL 4.5 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 4.5 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline", + "gl31_baseline", + "gl32_baseline", + "gl33_baseline", + "gl40_baseline", + "gl41_baseline", + "gl42_baseline", + [ "gl42_baseline_vk10", "gl42_baseline_vk12" ], + "gl43_baseline", + [ "gl43_baseline_rb2", "gl43_baseline_rb_image_vk13", "gl43_baseline_rb_image_ext" ], + "gl44_baseline", + [ "gl44_baseline_ext", "gl44_baseline_vk12" ], + "gl45_baseline" + ] + }, + "VP_ZINK_gl46_baseline": { + "version": 1, + "api-version": "1.3.0", + "label": "Zink OpenGL 4.6 Baseline profile", + "description": "Minimum requirements for Zink OpenGL 4.6 support.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline", + "gl31_baseline", + "gl32_baseline", + "gl33_baseline", + "gl40_baseline", + "gl41_baseline", + "gl42_baseline", + [ "gl42_baseline_vk10", "gl42_baseline_vk12" ], + "gl43_baseline", + [ "gl43_baseline_rb2", "gl43_baseline_rb_image_vk13", "gl43_baseline_rb_image_ext" ], + "gl44_baseline", + [ "gl44_baseline_ext", "gl44_baseline_vk12" ], + "gl45_baseline", + "gl46_baseline" + ] + }, + "VP_ZINK_gl46_optimal": { + "version": 1, + "api-version": "1.3.0", + "label": "Zink OpenGL 4.6 Optimal profile", + "description": "Requirements for Zink OpenGL 4.6 support with best performances.", + "capabilities": [ + "vulkan10requirements", + "gl21_baseline", + [ "gl21_baseline_vk10", "gl21_baseline_vk12" ], + [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ], + "gl30_baseline", + "gl31_baseline", + "gl32_baseline", + "gl33_baseline", + "gl40_baseline", + "gl41_baseline", + "gl42_baseline", + [ "gl42_baseline_vk10", "gl42_baseline_vk12" ], + "gl43_baseline", + [ "gl43_baseline_rb2", "gl43_baseline_rb_image_vk13", "gl43_baseline_rb_image_ext" ], + "gl44_baseline", + [ "gl44_baseline_ext", "gl44_baseline_vk12" ], + "gl45_baseline", + "gl46_baseline", + "gl46_optimal", + [ "gl46_optimal_ext", "gl46_optimal_vk13" ] + ], + "optionals": [ + "gl46_optional", + [ "gl46_optional_ext", "gl46_optional_vk13" ], + "GL_ARB_bindless_texture", + "GL_ARB_sparse_texture", + "GL_ARB_sparse_texture2", + [ "GL_ARB_sparse_texture_clamp_2s", "GL_ARB_sparse_texture_clamp_4s", "GL_ARB_sparse_texture_clamp_8s", "GL_ARB_sparse_texture_clamp_16s" ], + "GL_ARB_sparse_buffer", + [ "GL_ARB_shader_viewport_layer_array_ext", "GL_ARB_shader_viewport_layer_array_vk12" ], + "GL_ARB_fragment_shader_interlock", + "GL_ARB_shader_clock", + [ "GL_ARB_shader_ballot_ext", "GL_ARB_shader_ballot_vk11" ], + "GL_ARB_sample_locations", + "GL_ARB_shader_stencil_export", + "GL_EXT_depth_bounds_test", + [ "GL_EXT_texture_filter_minmax_ext", "GL_EXT_texture_filter_minmax_vk12" ] + ] + } + }, + "contributors": { + "Mike Blumenkrantz": { + "company": "Valve" + }, + "Christophe Riccio": { + "company": "LunarG" + }, + "Erik Faye-Lund": { + "company": "Collabora" + }, + "Soroush Faghihi": { + "company": "Imagination Technologies" + }, + "Connor Abbott": { + "company": "Valve" + } + }, + "history": [ + { + "revision": 1, + "date": "2022-10-18", + "author": "Christophe Riccio", + "comment": "Initial revision" + } + ] +} diff --git a/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl-full.toml b/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl-full.toml new file mode 100644 index 00000000000..fae5431fde2 --- /dev/null +++ b/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl-full.toml @@ -0,0 +1,135 @@ +[[deqp]] +deqp = "/deqp/modules/gles2/deqp-gles2" +caselists = ["/deqp/mustpass/gles2-main.txt"] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +timeout = 180.0 +version_check = "GL ES 3.2.*git" +renderer_check = "zink.*Intel.*" + +[[deqp]] +deqp = "/deqp/modules/gles3/deqp-gles3" +caselists = ["/deqp/mustpass/gles3-main.txt"] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +timeout = 180.0 + +[[deqp]] +deqp = "/deqp/modules/gles31/deqp-gles31" +caselists = ["/deqp/mustpass/gles31-main.txt"] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +timeout = 180.0 + +[[deqp]] +deqp = "/deqp/external/openglcts/modules/glcts" +caselists = [ + "/deqp/mustpass/gles2-khr-main.txt", + "/deqp/mustpass/gles3-khr-main.txt", + "/deqp/mustpass/gles31-khr-main.txt", +] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +timeout = 180.0 + +[[deqp]] +deqp = "/deqp/external/openglcts/modules/glcts" +caselists = [ + "/deqp/mustpass/gl46-main.txt", + "/deqp/mustpass/gl46-khr-single.txt", +] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +timeout = 180.0 + +# 565-nozs +[[deqp]] +deqp = "/deqp/modules/gles3/deqp-gles3" +caselists = ["/deqp/mustpass/gles3-565-no-depth-no-stencil.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgb565d0s0ms0", +] +prefix = "565-nozs-" + +[[deqp]] +deqp = "/deqp/modules/gles31/deqp-gles31" +caselists = ["/deqp/mustpass/gles31-565-no-depth-no-stencil.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgb565d0s0ms0", +] +prefix = "565-nozs-" + +# multisample +[[deqp]] +deqp = "/deqp/modules/gles3/deqp-gles3" +caselists = ["/deqp/mustpass/gles3-multisample.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms4", +] +prefix = "multisample-" + +[[deqp]] +deqp = "/deqp/modules/gles31/deqp-gles31" +caselists = ["/deqp/mustpass/gles31-multisample.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms4", +] +prefix = "multisample-" + +[[deqp]] +deqp = "/deqp/modules/egl/deqp-egl-x11" +caselists = ["/deqp/mustpass/egl-main.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms0", +] + +[[deqp]] +deqp = "/deqp/modules/egl/deqp-egl-wayland" +caselists = ["/deqp/mustpass/egl-main.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms0", +] +prefix = "wayland-" + +[[piglit]] +piglit_folder = "/piglit" +profile = "gpu" +process_isolation = true +timeout = 180.0 diff --git a/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl.toml b/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl.toml new file mode 100644 index 00000000000..0554b105692 --- /dev/null +++ b/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl.toml @@ -0,0 +1,103 @@ +#[[deqp]] +#deqp = "/deqp/modules/gles2/deqp-gles2" +#caselists = ["/deqp/mustpass/gles2-main.txt"] +#deqp_args = [ +# "--deqp-surface-width=256", +# "--deqp-surface-height=256", +# "--deqp-surface-type=pbuffer", +# "--deqp-gl-config-name=rgba8888d24s8ms0", +# "--deqp-visibility=hidden" +#] +#timeout = 180.0 +#version_check = "GL ES 3.2.*git" +#renderer_check = "zink.*Intel.*" +# +#[[deqp]] +#deqp = "/deqp/modules/gles3/deqp-gles3" +#caselists = ["/deqp/mustpass/gles3-main.txt"] +#deqp_args = [ +# "--deqp-surface-width=256", +# "--deqp-surface-height=256", +# "--deqp-surface-type=pbuffer", +# "--deqp-gl-config-name=rgba8888d24s8ms0", +# "--deqp-visibility=hidden" +#] +#timeout = 180.0 +# +#[[deqp]] +#deqp = "/deqp/modules/gles31/deqp-gles31" +#caselists = ["/deqp/mustpass/gles31-main.txt"] +#deqp_args = [ +# "--deqp-surface-width=256", +# "--deqp-surface-height=256", +# "--deqp-surface-type=pbuffer", +# "--deqp-gl-config-name=rgba8888d24s8ms0", +# "--deqp-visibility=hidden" +#] +#timeout = 180.0 + +[[deqp]] +deqp = "/deqp/external/openglcts/modules/glcts" +caselists = [ + "/deqp/mustpass/gl46-main.txt", + "/deqp/mustpass/gl46-khr-single.txt", +] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +skips = ["/install/zink-anv-tgl-premerge-skips.txt"] +timeout = 180.0 +[deqp.env] + ZINK_DEBUG = "validation" + +# Regression testing for graphics pipelines where fragment shaders +# don't know about multisampling etc... at compile time +[[deqp]] +deqp = "/deqp/external/openglcts/modules/glcts" +caselists = [ + "/deqp/mustpass/gl46-main.txt", + +] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +skips = ["/install/zink-anv-tgl-premerge-skips.txt"] +timeout = 180.0 +include = ["KHR-GL46.sample_variables.mask.rgba8.*.samples.*.mask.*"] +prefix = "noopt-" +[deqp.env] + ZINK_DEBUG = "nobgc,noopt,validation" + +# Regression testing for graphics pipelines where fragment shaders +# don't know about multisampling etc... at compile time +[[deqp]] +deqp = "/deqp/modules/gles31/deqp-gles31" +caselists = ["/deqp/mustpass/gles31-main.txt"] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +timeout = 180.0 +include = ["dEQP-GLES31.functional.shaders.multisample_interpolation.interpolate_at_sample.*"] +prefix = "noopt-" +[deqp.env] + ZINK_DEBUG = "nobgc,noopt,validation" + +[[piglit]] +piglit_folder = "/piglit" +profile = "quick_gl" +process_isolation = true +skips = ["/install/zink-anv-tgl-premerge-skips.txt"] +timeout = 180.0 +fraction = 2 diff --git a/src/gallium/drivers/zink/ci/deqp-zink-freedreno-a618.toml b/src/gallium/drivers/zink/ci/deqp-zink-freedreno-a618.toml new file mode 100644 index 00000000000..742c0155360 --- /dev/null +++ b/src/gallium/drivers/zink/ci/deqp-zink-freedreno-a618.toml @@ -0,0 +1,87 @@ +# Basic test set +[[deqp]] +deqp = "/deqp/modules/gles2/deqp-gles2" +caselists = ["/deqp/mustpass/gles2-main.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms0", +] +version_check = "GL ES 3.2.*git" +renderer_check = "zink.*Adreno.*618" + +[[deqp]] +deqp = "/deqp/modules/gles3/deqp-gles3" +caselists = ["/deqp/mustpass/gles3-main.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms0", +] + +[[deqp]] +deqp = "/deqp/modules/gles31/deqp-gles31" +caselists = ["/deqp/mustpass/gles31-main.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms0", +] + +[[deqp]] +deqp = "/deqp/external/openglcts/modules/glcts" +caselists = [ + "/deqp/mustpass/gles2-khr-main.txt", + "/deqp/mustpass/gles3-khr-main.txt", + "/deqp/mustpass/gles31-khr-main.txt", +] +# We want to test desktop GL eventually, but fp64 is slow and we've got enough work +# to do just getting GLES sorted out. +# "/deqp/mustpass/gl46-main.txt", +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms0", +] + +# 565-nozs +[[deqp]] +deqp = "/deqp/modules/gles3/deqp-gles3" +caselists = ["/deqp/mustpass/gles3-565-no-depth-no-stencil.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgb565d0s0ms0", +] +prefix = "565-nozs-" + +[[deqp]] +deqp = "/deqp/modules/gles31/deqp-gles31" +caselists = ["/deqp/mustpass/gles31-565-no-depth-no-stencil.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgb565d0s0ms0", +] +prefix = "565-nozs-" + +# multisample +[[deqp]] +deqp = "/deqp/modules/gles3/deqp-gles3" +caselists = ["/deqp/mustpass/gles3-multisample.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms4", +] +prefix = "multisample-" + +[[deqp]] +deqp = "/deqp/modules/gles31/deqp-gles31" +caselists = ["/deqp/mustpass/gles31-multisample.txt"] +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms4", +] +prefix = "multisample-" diff --git a/src/gallium/drivers/zink/ci/deqp-zink-lvp-fails.txt b/src/gallium/drivers/zink/ci/deqp-zink-lvp-fails.txt deleted file mode 100644 index 60c8b845e2e..00000000000 --- a/src/gallium/drivers/zink/ci/deqp-zink-lvp-fails.txt +++ /dev/null @@ -1,49 +0,0 @@ -dEQP-GLES2.functional.clipping.point.wide_point_clip,Fail -dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_center,Fail -dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_corner,Fail -dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_center,Fail -dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_corner,Fail -dEQP-GLES3.functional.clipping.point.wide_point_clip,Fail -dEQP-GLES3.functional.clipping.point.wide_point_clip_viewport_center,Fail -dEQP-GLES3.functional.clipping.point.wide_point_clip_viewport_corner,Fail -dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag,Fail -dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_dst_x,Fail -dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_dst_x,Fail -dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_dst_y,Fail -dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_x,Fail -dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min,Fail -dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_dst_x,Fail -dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_x,Fail -dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_y,Fail -dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_x,Fail -dEQP-GLES3.functional.multisample.fbo_4_samples.proportionality_sample_coverage,Fail -dEQP-GLES3.functional.multisample.fbo_4_samples.sample_coverage_invert,Fail -dEQP-GLES3.functional.multisample.fbo_max_samples.proportionality_sample_coverage,Fail -dEQP-GLES3.functional.multisample.fbo_max_samples.sample_coverage_invert,Fail -KHR-GL32.transform_feedback.capture_geometry_separate_test,Fail -KHR-GL32.transform_feedback.capture_vertex_interleaved_test,Fail -KHR-GL32.transform_feedback.capture_vertex_separate_test,Fail -KHR-GL32.transform_feedback.discard_vertex_test,Fail -KHR-GL32.transform_feedback.draw_xfb_instanced_test,Crash -KHR-GL32.transform_feedback.draw_xfb_stream_instanced_test,Crash -KHR-GL32.transform_feedback.query_geometry_separate_test,Fail -KHR-GL32.transform_feedback.query_vertex_interleaved_test,Fail -KHR-GL32.transform_feedback.query_vertex_separate_test,Fail -dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail -dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail -KHR-GL32.packed_pixels.pbo_rectangle.r16i,Fail -KHR-GL32.packed_pixels.pbo_rectangle.r16ui,Fail -KHR-GL32.packed_pixels.pbo_rectangle.r32i,Fail -KHR-GL32.packed_pixels.pbo_rectangle.r32ui,Fail -KHR-GL32.packed_pixels.pbo_rectangle.r8ui,Fail -KHR-GL32.packed_pixels.pbo_rectangle.rg16i,Fail -KHR-GL32.packed_pixels.pbo_rectangle.rg16ui,Fail -KHR-GL32.packed_pixels.pbo_rectangle.rg32i,Fail -KHR-GL32.packed_pixels.pbo_rectangle.rg32ui,Fail -KHR-GL32.packed_pixels.pbo_rectangle.rg8ui,Fail -KHR-GL32.packed_pixels.pbo_rectangle.rgb10_a2ui,Fail -KHR-GL32.packed_pixels.pbo_rectangle.rgba16i,Fail -KHR-GL32.packed_pixels.pbo_rectangle.rgba16ui,Fail -KHR-GL32.packed_pixels.pbo_rectangle.rgba32i,Fail -KHR-GL32.packed_pixels.pbo_rectangle.rgba32ui,Fail -KHR-GL32.packed_pixels.pbo_rectangle.rgba8ui,Fail diff --git a/src/gallium/drivers/zink/ci/deqp-zink-lvp-flakes.txt b/src/gallium/drivers/zink/ci/deqp-zink-lvp-flakes.txt deleted file mode 100644 index 693fee240cd..00000000000 --- a/src/gallium/drivers/zink/ci/deqp-zink-lvp-flakes.txt +++ /dev/null @@ -1 +0,0 @@ -dEQP-GLES2.functional.texture.filtering.cube.nearest_linear_mirror_l8_pot diff --git a/src/gallium/drivers/zink/ci/deqp-zink-lvp-skips.txt b/src/gallium/drivers/zink/ci/deqp-zink-lvp-skips.txt deleted file mode 100644 index 39aa35934b6..00000000000 --- a/src/gallium/drivers/zink/ci/deqp-zink-lvp-skips.txt +++ /dev/null @@ -1 +0,0 @@ -KHR-GL32.texture_size_promotion.functional diff --git a/src/gallium/drivers/zink/ci/deqp-zink-lvp.toml b/src/gallium/drivers/zink/ci/deqp-zink-lvp.toml index 8c902ef0738..549b9f026a4 100644 --- a/src/gallium/drivers/zink/ci/deqp-zink-lvp.toml +++ b/src/gallium/drivers/zink/ci/deqp-zink-lvp.toml @@ -1,6 +1,6 @@ [[deqp]] deqp = "/deqp/modules/gles2/deqp-gles2" -caselists = ["/deqp/mustpass/gles2-master.txt"] +caselists = ["/deqp/mustpass/gles2-main.txt"] deqp_args = [ "--deqp-surface-width=256", "--deqp-surface-height=256", @@ -9,10 +9,24 @@ deqp_args = [ "--deqp-visibility=hidden" ] timeout = 180.0 +version_check = "GL ES 3.2.*git" +renderer_check = "zink.*llvmpipe" [[deqp]] deqp = "/deqp/modules/gles3/deqp-gles3" -caselists = ["/deqp/mustpass/gles3-master.txt"] +caselists = ["/deqp/mustpass/gles3-main.txt"] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +timeout = 180.0 + +[[deqp]] +deqp = "/deqp/modules/gles31/deqp-gles31" +caselists = ["/deqp/mustpass/gles31-main.txt"] deqp_args = [ "--deqp-surface-width=256", "--deqp-surface-height=256", @@ -25,7 +39,8 @@ timeout = 180.0 [[deqp]] deqp = "/deqp/external/openglcts/modules/glcts" caselists = [ - "/deqp/mustpass/gl32-master.txt", + "/deqp/mustpass/gl46-main.txt", + "/deqp/mustpass/gl46-khr-single.txt", ] deqp_args = [ "--deqp-surface-width=256", @@ -35,3 +50,16 @@ deqp_args = [ "--deqp-visibility=hidden" ] timeout = 180.0 + +[[piglit]] +piglit_folder = "/piglit" +profile = "gpu" +process_isolation = true +timeout = 180.0 + [piglit.env] + # Disable validation on piglit. We end up with use-after-frees from + # piglit_report() -> exit() having freed validation layer state, with a + # st_glFlush() -> tc_batch_execute() -> zink_set_vertex_buffers -> + # vulkan_layer_chassis::CmdPipelineBarrier2() (etc.) happening after that + # somehow. + ZINK_DEBUG = "" diff --git a/src/gallium/drivers/zink/ci/deqp-zink-radv.toml b/src/gallium/drivers/zink/ci/deqp-zink-radv.toml new file mode 100644 index 00000000000..b05b6c4f599 --- /dev/null +++ b/src/gallium/drivers/zink/ci/deqp-zink-radv.toml @@ -0,0 +1,41 @@ +[[deqp]] +deqp = "/deqp/external/openglcts/modules/glcts" +caselists = [ + "/deqp/mustpass/gles2-main.txt", + "/deqp/mustpass/gles3-main.txt", + "/deqp/mustpass/gles31-main.txt", +] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +timeout = 180.0 +renderer_check = "zink.*RADV" + +[[deqp]] +deqp = "/deqp/external/openglcts/modules/glcts" +caselists = [ + "/deqp/mustpass/gl46-main.txt", +] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +timeout = 180.0 +renderer_check = "zink.*RADV" + +[[piglit]] +piglit_folder = "/piglit" +profile = "gpu" +process_isolation = true +timeout = 180.0 + [piglit.env] + PIGLIT_NO_WINDOW = "1" + PIGLIT_PLATFORM = "gbm" + WAFFLE_PLATFORM = "gbm" diff --git a/src/gallium/drivers/zink/ci/deqp-zink-venus-lvp.toml b/src/gallium/drivers/zink/ci/deqp-zink-venus-lvp.toml new file mode 100644 index 00000000000..6b6a029b5bd --- /dev/null +++ b/src/gallium/drivers/zink/ci/deqp-zink-venus-lvp.toml @@ -0,0 +1,15 @@ +[[deqp]] +deqp = "/deqp/external/openglcts/modules/glcts" +caselists = [ + "/deqp/mustpass/gl46-main.txt", + "/deqp/mustpass/gl46-khr-single.txt", +] +deqp_args = [ + "--deqp-surface-width=256", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-visibility=hidden" +] +timeout = 180.0 +renderer_check = "zink.*llvmpipe" diff --git a/src/gallium/drivers/zink/ci/gitlab-ci-inc.yml b/src/gallium/drivers/zink/ci/gitlab-ci-inc.yml new file mode 100644 index 00000000000..dd4fc5166ca --- /dev/null +++ b/src/gallium/drivers/zink/ci/gitlab-ci-inc.yml @@ -0,0 +1,172 @@ +.zink-common-rules: + rules: + - !reference [.test, rules] + - !reference [.gl-rules, rules] + - changes: &zink_files_list + - src/gallium/drivers/zink/* + - src/gallium/drivers/zink/nir_to_spirv/* + - src/gallium/drivers/zink/ci/gitlab-ci.yml + - src/gallium/drivers/zink/ci/gitlab-ci-inc.yml + - src/gallium/drivers/zink/ci/deqp-$DEQP_SUITE.toml + - src/gallium/drivers/zink/ci/$GPU_VERSION-fails.txt + - src/gallium/drivers/zink/ci/$GPU_VERSION-flakes.txt + - src/gallium/drivers/zink/ci/$GPU_VERSION-skips.txt + - src/gallium/drivers/zink/ci/$GPU_VERSION-validation-settings.txt + - src/gallium/drivers/zink/ci/$PIGLIT_TRACES_FILE + when: on_success + +.zink-common-manual-rules: + retry: !reference [.scheduled_pipeline-rules, retry] + rules: + - !reference [.test, rules] + - !reference [.gl-manual-rules, rules] + - changes: + *zink_files_list + when: manual + +.zink-lvp-rules: + stage: layered-backends + rules: + - !reference [.test, rules] + - !reference [.lavapipe-rules, rules] + - !reference [.zink-common-rules, rules] + +.zink-lvp-venus-rules: + stage: layered-backends + rules: + - !reference [.test, rules] + - !reference [.lavapipe-rules, rules] + - !reference [.venus-rules, rules] + - !reference [.zink-common-rules, rules] + +.zink-anv-rules: + stage: layered-backends + rules: + - !reference [.test, rules] + - !reference [.anv-rules, rules] + - !reference [.zink-common-rules, rules] + +.zink-anv-manual-rules: + stage: layered-backends + retry: !reference [.scheduled_pipeline-rules, retry] + rules: + - !reference [.test, rules] + - !reference [.anv-manual-rules, rules] + - !reference [.zink-common-manual-rules, rules] + +.zink-anv-rules-restricted: + stage: layered-backends + rules: + - !reference [.test, rules] + - !reference [.restricted-rules, rules] + - !reference [.anv-rules, rules] + - !reference [.zink-common-rules, rules] + +.zink-turnip-rules: + stage: layered-backends + rules: + - !reference [.test, rules] + - !reference [.collabora-turnip-rules, rules] + - !reference [.zink-common-rules, rules] + variables: + ZINK_DEBUG: optimal_keys + +.zink-turnip-manual-rules: + stage: layered-backends + retry: !reference [.scheduled_pipeline-rules, retry] + rules: + - !reference [.test, rules] + - !reference [.collabora-turnip-manual-rules, rules] + - !reference [.zink-common-manual-rules, rules] + +.zink-radv-rules: + stage: layered-backends + rules: + - !reference [.test, rules] + - !reference [.radv-valve-rules, rules] + - !reference [.zink-common-rules, rules] + +.zink-radv-manual-rules: + stage: layered-backends + retry: !reference [.scheduled_pipeline-rules, retry] + rules: + - !reference [.test, rules] + - !reference [.radv-valve-manual-rules, rules] + - !reference [.zink-common-manual-rules, rules] + - changes: + - .gitlab-ci/container/build-piglit.sh + when: manual + +.zink-test: + timeout: 30m + variables: + MESA_LOADER_DRIVER_OVERRIDE: "zink" + FLAKES_CHANNEL: "#zink-ci" + MESA_VK_ABORT_ON_DEVICE_LOSS: 0 + +.zink-trace-test: + extends: + - .zink-test + variables: + # The libX11 in the debian we use doesn't XInitThreads() by default (need + # 1.8.1 for that), and eglretrace's waffle GLX path doesn't call it either, + # which ends up causing trouble with kopper's X usage. Use gbm for our + # trace replay, instead. + # https://gitlab.freedesktop.org/mesa/mesa/-/issues/6753 + HWCI_START_XORG: "" + WAFFLE_PLATFORM: gbm + PIGLIT_PLATFORM: gbm + +.zink-lvp-test: + extends: + - .zink-lvp-rules + - .zink-test + variables: + LIBGL_ALWAYS_SOFTWARE: "true" + LVP_POISON_MEMORY: "1" + GPU_VERSION: zink-lvp + # Fix non-conformant llvmpipe filter defaults + GALLIVM_PERF: "no_quad_lod" + +.zink-venus-lvp-test: + extends: + - .zink-lvp-venus-rules + - .zink-test + variables: + LIBGL_ALWAYS_SOFTWARE: "true" + LVP_POISON_MEMORY: "1" + GPU_VERSION: zink-venus-lvp + # Fix non-conformant llvmpipe filter defaults + GALLIVM_PERF: "no_quad_lod" + VK_DRIVER: virtio + GALLIUM_DRIVER: "zink" + CROSVM_GALLIUM_DRIVER: "llvmpipe" + CROSVM_VK_DRIVER: "lvp" + CROSVM_GPU_ARGS: "vulkan=true,gles=false,backend=virglrenderer,egl=true,surfaceless=true,fixed-blob-mapping=false" + +.zink-anv-test: + extends: + - .lava-acer-cp514-2h-1160g7-volteer:x86_64 + - .anv-test + - .zink-anv-rules + - .zink-test + variables: + VK_DRIVER: intel + GPU_VERSION: zink-anv-tgl + +.radv-zink-test-valve: + timeout: 30m + extends: + - .zink-test + - .test-radv + - .b2c-x86_64-test-gl + variables: + DEQP_SUITE: zink-radv + HWCI_TEST_SCRIPT: ./install/deqp-runner.sh + B2C_KERNEL_URL: https://fs.mupuf.org/linux-6.6-b2c-radv-ci # 6.6 + B2C_JOB_SUCCESS_REGEX: 'Execution is over, pipeline status: 0' + B2C_TIMEOUT_OVERALL_MINUTES: 20 + + # Disable reporting, since DUTs don't have internet access + FLAKES_CHANNEL: "" + diff --git a/src/gallium/drivers/zink/ci/gitlab-ci.yml b/src/gallium/drivers/zink/ci/gitlab-ci.yml index 9608a048b24..e775e71b758 100644 --- a/src/gallium/drivers/zink/ci/gitlab-ci.yml +++ b/src/gallium/drivers/zink/ci/gitlab-ci.yml @@ -1,50 +1,177 @@ -.zink-lvp-test: - extends: - - .zink-rules - variables: - ZINK_USE_LAVAPIPE: "true" - LIBGL_ALWAYS_SOFTWARE: "1" - GPU_VERSION: zink-lvp - # Fix non-conformant llvmpipe filter defaults - GALLIVM_PERF: "no_quad_lod" - LP_NUM_THREADS: 0 +include: + - local: 'src/gallium/drivers/zink/ci/gitlab-ci-inc.yml' -.zink-piglit-quick_gl: +zink-lvp: extends: - .test-gl + - .deqp-test - .zink-lvp-test variables: - PIGLIT_PROFILES: quick_gl + DEQP_SUITE: zink-lvp + DEQP_FRACTION: 8 PIGLIT_NO_WINDOW: 1 - PIGLIT_RUNNER_OPTIONS: "--timeout 180" + # Enable validation (except for on piglit, see deqp-zink-lvp.toml), logging + # to stdout and aborting on unknown failures. + ZINK_DEBUG: validation + XVFB_SCRIPT: "VK_DRIVER=lvp install/deqp-runner.sh" + script: | + xvfb-run --server-args='-noreset' bash -c ". $SCRIPTS_DIR/setup-test-env.sh && ${XVFB_SCRIPT}" -zink-piglit-timelines: +zink-venus-lvp: extends: - - .zink-piglit-quick_gl + - .test-gl + - .deqp-test + - .zink-venus-lvp-test + variables: + DEQP_SUITE: zink-venus-lvp + DEQP_FRACTION: 8 + PIGLIT_NO_WINDOW: 1 + # Enable validation (except for on piglit, see deqp-zink-venus-lvp.toml), logging + # to stdout and aborting on unknown failures. + ZINK_DEBUG: validation + LP_NUM_THREADS: 2 + CROSVM_MEMORY: 12288 + CROSVM_CPU: $FDO_CI_CONCURRENT script: - - xvfb-run --server-args='-noreset' sh -c "GALLIUM_DRIVER=zink VK_DRIVER=lvp install/piglit/piglit-runner.sh" + - xvfb-run --server-args='-noreset' bash -c "./install/crosvm-runner.sh ./install/deqp-runner.sh" -zink-piglit-no_timelines: +zink-anv-tgl: extends: - - .zink-piglit-quick_gl - script: - - xvfb-run --server-args='-noreset' sh -c "ZINK_NO_TIMELINES=1 GALLIUM_DRIVER=zink VK_DRIVER=lvp install/piglit/piglit-runner.sh" + - .zink-anv-test + timeout: 1h + variables: + DEQP_SUITE: zink-anv-tgl + PIGLIT_NO_WINDOW: 1 + HWCI_START_WESTON: 1 + # We use gbm because X can die: + # MESA: error: ZINK: vkQueueSubmit failed (VK_ERROR_DEVICE_LOST) + # Xorg: ../src/gallium/drivers/zink/zink_batch.c:599: zink_end_batch: Assertion `!ctx->batch_states' failed. + PIGLIT_PLATFORM: gbm -zink-piglit-lazy: +# Manual full run when you want to double-check the full status. +zink-anv-tgl-full: extends: - - .zink-piglit-quick_gl - script: - - xvfb-run --server-args='-noreset' sh -c "ZINK_DESCRIPTORS=lazy GALLIUM_DRIVER=zink VK_DRIVER=lvp install/piglit/piglit-runner.sh" + - zink-anv-tgl + - .zink-anv-manual-rules + variables: + DEQP_SUITE: zink-anv-tgl-full + JOB_TIMEOUT: 105 + HWCI_START_WESTON: 1 + timeout: 1h 45m + parallel: 3 -zink-lvp-deqp: +zink-anv-tgl-traces: extends: - - .test-gl - - .deqp-test - - .zink-lvp-test + - .lava-piglit-traces:x86_64 + - .zink-anv-test + - .zink-trace-test + # Add .lava-traces-base again to override .zink-anv-test setting the + # HWCI_TEST_SCRIPT, but .lava-piglit-traces having to come first to get + # dependencies right. + - .lava-traces-base variables: - GALLIUM_DRIVER: "zink" # move here due to bad xvfb-run interactions - VK_DRIVER: lvp # Don't move to the top level, piglit runs do funny stuff with VK_DRIVER set - DEQP_EXPECTED_RENDERER: "zink.*llvmpipe" - DEQP_VER: gles2 - DEQP_SUITE: zink-lvp - parallel: 2 + PIGLIT_TRACES_FILE: traces-zink.yml + +zink-anv-tgl-traces-restricted: + extends: + - zink-anv-tgl-traces + - .zink-anv-rules-restricted + variables: + PIGLIT_TRACES_FILE: traces-zink-restricted.yml + PIGLIT_REPLAY_EXTRA_ARGS: --db-path ${CI_PROJECT_DIR}/replayer-db/ --minio_bucket=mesa-tracie-private --jwt-file=${S3_JWT_FILE} + allow_failure: true + +zink-tu-a618: + extends: + - .lava-test-deqp:arm64 + - .zink-turnip-rules + - .zink-test + - .lava-sc7180-trogdor-lazor-limozeen:arm64 + variables: + DEQP_FRACTION: 2 + DEQP_SUITE: zink-freedreno-a618 + FLAKES_CHANNEL: "#freedreno-ci" + HWCI_START_WESTON: 1 + GPU_VERSION: zink-freedreno-a618 + +zink-tu-a618-full: + extends: + - zink-tu-a618 + - .collabora-turnip-manual-rules + variables: + DEQP_FRACTION: 1 + +zink-tu-a618-traces: + extends: + - a618-traces + - .zink-turnip-rules + - .zink-trace-test + parallel: null + variables: + PIGLIT_REPLAY_DEVICE_NAME: "zink-a618" + +zink-tu-a618-traces-performance: + extends: + - zink-tu-a618-traces + - .zink-turnip-manual-rules + - .piglit-performance:arm64 + rules: + - !reference [.piglit-performance:arm64, rules] + - !reference [.zink-turnip-manual-rules, rules] + variables: + # Always use the same device + # a618 tag starts with cbg-1 (not cbg-0) for some reason + LAVA_TAGS: "cbg-1" + needs: + - !reference [zink-tu-a618-traces, needs] + - !reference [.piglit-performance:arm64, needs] + +############### Combined testing (GL, GLES, Piglit) on RADV +zink-radv-polaris10-valve: + extends: + - .radv-zink-test-valve + - .polaris10-test-valve-kws + - .zink-radv-manual-rules + variables: + GPU_VERSION: zink-radv-polaris10 + ZINK_DEBUG: quiet + +zink-radv-navi10-valve: + extends: + - .radv-zink-test-valve + - .navi10-test-valve-mupuf + - .zink-radv-manual-rules + timeout: 40m + variables: + B2C_TIMEOUT_OVERALL_MINUTES: 30 + GPU_VERSION: zink-radv-navi10 + +zink-radv-vangogh-valve: + timeout: 35m + parallel: 3 + extends: + - .radv-zink-test-valve + - .vangogh-test-valve + - .zink-radv-rules + variables: + GPU_VERSION: zink-radv-vangogh + B2C_SESSION_REBOOT_REGEX: 'BUG: kernel NULL pointer dereference, address' + B2C_TIMEOUT_BOOT_RETRIES: 1 + B2C_TIMEOUT_BOOT_MINUTES: 30 + B2C_TIMEOUT_OVERALL_MINUTES: 30 + FDO_CI_CONCURRENT: 6 + # Override the list of tags to drop `priority:low` + tags: + - farm:$RUNNER_FARM_LOCATION + - amdgpu:codename:VANGOGH + +zink-radv-navi31-valve: + extends: + - .radv-zink-test-valve + - .navi31-test-valve + - .zink-radv-manual-rules + timeout: 1h 20m + variables: + GPU_VERSION: zink-radv-navi31 + B2C_TIMEOUT_BOOT_MINUTES: 75 + B2C_TIMEOUT_OVERALL_MINUTES: 75 diff --git a/src/gallium/drivers/zink/ci/piglit-zink-lvp-fails.txt b/src/gallium/drivers/zink/ci/piglit-zink-lvp-fails.txt deleted file mode 100644 index 95844f80a56..00000000000 --- a/src/gallium/drivers/zink/ci/piglit-zink-lvp-fails.txt +++ /dev/null @@ -1,826 +0,0 @@ -glx@glx-copy-sub-buffer,Fail -glx@glx-copy-sub-buffer samples=2,Fail -glx@glx-copy-sub-buffer samples=4,Fail -glx@glx-multi-window-single-context,Fail -glx@glx-multithread-texture,Fail -glx@glx-swap-copy,Fail -glx@glx-swap-pixmap-bad,Fail -glx@glx-tfp,Crash -glx@glx-visuals-depth,Crash -glx@glx-visuals-depth -pixmap,Crash -glx@glx-visuals-stencil,Crash -glx@glx-visuals-stencil -pixmap,Crash -glx@glx-query-drawable-glx_fbconfig_id-window,Fail -glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail -glx@glx_arb_create_context_no_error@no error,Fail -glx@glx_ext_import_context@free context,Fail -glx@glx_ext_import_context@get context id,Fail -glx@glx_ext_import_context@get current display,Fail -glx@glx_ext_import_context@import context- multi process,Fail -glx@glx_ext_import_context@import context- single process,Fail -glx@glx_ext_import_context@imported context has same context id,Fail -glx@glx_ext_import_context@make current- multi process,Fail -glx@glx_ext_import_context@make current- single process,Fail -glx@glx_ext_import_context@query context info,Fail -shaders@glsl-fs-pointcoord,Fail -shaders@point-vertex-id divisor,Fail -shaders@point-vertex-id gl_instanceid,Fail -shaders@point-vertex-id gl_instanceid divisor,Fail -shaders@point-vertex-id gl_vertexid,Fail -shaders@point-vertex-id gl_vertexid divisor,Fail -shaders@point-vertex-id gl_vertexid gl_instanceid,Fail -shaders@point-vertex-id gl_vertexid gl_instanceid divisor,Fail -spec@!opengl 1.0@gl-1.0-edgeflag,Fail -spec@!opengl 1.0@gl-1.0-edgeflag-quads,Fail -spec@!opengl 1.0@gl-1.0-no-op-paths,Fail -spec@!opengl 1.0@gl-1.0-swapbuffers-behavior,Fail -spec@!opengl 1.1@linestipple,Fail -spec@!opengl 1.1@linestipple@Factor 2x,Fail -spec@!opengl 1.1@linestipple@Factor 3x,Fail -spec@!opengl 1.1@linestipple@Line loop,Fail -spec@!opengl 1.1@linestipple@Line strip,Fail -spec@!opengl 1.1@polygon-offset,Fail -spec@!opengl 1.1@polygon-mode,Fail -spec@!opengl 1.1@polygon-mode-facing,Fail -spec@!opengl 1.1@polygon-mode-offset,Fail -spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail -spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail -spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail -spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail -spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail -spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail -spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail -spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail -spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail -spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail -spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail -spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail -spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail -spec@!opengl 1.1@read-front,Fail -spec@!opengl 1.1@read-front clear-front-first,Fail -spec@!opengl 1.1@read-front clear-front-first samples=2,Fail -spec@!opengl 1.1@read-front clear-front-first samples=4,Fail -spec@!opengl 1.1@read-front samples=2,Fail -spec@!opengl 1.1@read-front samples=4,Fail -spec@!opengl 1.2@copyteximage 3d,Fail -spec@!opengl 2.0@depth-tex-modes-glsl,Fail -spec@!opengl 2.0@gl-2.0-edgeflag,Fail -spec@!opengl 2.0@gl-2.0-edgeflag-immediate,Fail -spec@!opengl 2.1@pbo,Fail -spec@!opengl 2.1@pbo@test_polygon_stip,Fail -spec@!opengl 2.1@polygon-stipple-fs,Fail -spec@!opengl 3.0@sampler-cube-shadow,Fail -spec@!opengl 3.2@gl-3.2-adj-prims cull-back pv-first,Fail -spec@!opengl 3.2@gl-3.2-adj-prims cull-front pv-first,Fail -spec@!opengl 3.2@gl-3.2-adj-prims line cull-back pv-first,Fail -spec@!opengl 3.2@gl-3.2-adj-prims line cull-front pv-first,Fail -spec@!opengl 3.2@gl-3.2-adj-prims pv-first,Fail -spec@!opengl es 2.0@glsl-fs-pointcoord,Fail -spec@!opengl es 3.0@gles-3.0-transform-feedback-uniform-buffer-object,Fail -spec@arb_depth_texture@depth-tex-modes,Fail -spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail -spec@arb_framebuffer_object@fbo-gl_pointcoord,Fail -spec@arb_get_program_binary@restore-sso-program,Fail -spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail -spec@arb_pipeline_statistics_query@arb_pipeline_statistics_query-frag,Fail -spec@arb_point_parameters@arb_point_parameters-point-attenuation,Fail -spec@arb_point_parameters@arb_point_parameters-point-attenuation@Aliased combinations,Fail -spec@arb_point_parameters@arb_point_parameters-point-attenuation@Antialiased combinations,Fail -spec@arb_point_sprite@arb_point_sprite-checkerboard,Fail -spec@arb_point_sprite@arb_point_sprite-mipmap,Fail -spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail -spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail -spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail -spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail -spec@arb_sample_shading@interpolate-at-sample-position 2,Fail -spec@arb_sample_shading@interpolate-at-sample-position 4,Fail -spec@arb_sample_shading@samplemask 2,Fail -spec@arb_sample_shading@samplemask 2@0.250000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail -spec@arb_sample_shading@samplemask 2@noms partition,Fail -spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail -spec@arb_sample_shading@samplemask 2 all,Fail -spec@arb_sample_shading@samplemask 2 all@0.250000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail -spec@arb_sample_shading@samplemask 2 all@noms partition,Fail -spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail -spec@arb_sample_shading@samplemask 4,Fail -spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail -spec@arb_sample_shading@samplemask 4@noms partition,Fail -spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail -spec@arb_sample_shading@samplemask 4 all,Fail -spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail -spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail -spec@arb_sample_shading@samplemask 4 all@noms partition,Fail -spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail -spec@arb_seamless_cube_map@arb_seamless_cubemap,Fail -spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail -spec@arb_texture_cube_map_array@arb_texture_cube_map_array-sampler-cube-array-shadow,Fail -spec@arb_texture_float@fbo-blending-formats,Fail -spec@arb_texture_float@fbo-blending-formats@GL_INTENSITY16F_ARB,Fail -spec@arb_texture_float@fbo-blending-formats@GL_INTENSITY32F_ARB,Fail -spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE16F_ARB,Fail -spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE32F_ARB,Fail -spec@arb_texture_float@fbo-blending-formats@GL_RGB16F,Fail -spec@arb_texture_float@fbo-blending-formats@GL_RGB32F,Fail -spec@arb_texture_rg@multisample-fast-clear gl_arb_texture_rg-int,Fail -spec@arb_texture_view@rendering-formats,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_RG8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_RG8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_RG8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16F as GL_R16,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16F as GL_R16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16F as GL_RG8,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16F as GL_RG8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RG16,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RG16,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RG16,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8 as GL_R8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8 as GL_R8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8 as GL_R8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_R32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_R32UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RG16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RGB10_A2UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_R32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_RG16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_R32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_R32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGB10_A2UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_R16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_R16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_R16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_R16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_RG8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_RG8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_RG8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGB10_A2UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16F as GL_RGB16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16F as GL_RGB16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16_SNORM as GL_RGB16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16_SNORM as GL_RGB16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16_SNORM as GL_RGB16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB16_SNORM as GL_RGB16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8 as GL_RGB8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8 as GL_RGB8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8 as GL_RGB8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8_SNORM as GL_RGB8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8_SNORM as GL_RGB8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGB8_SNORM as GL_RGB8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16F as GL_RG32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16F as GL_RGBA16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16F as GL_RGBA16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RG32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RG32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGB10_A2UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_R32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16F,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16_SNORM,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGB10_A2,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGB10_A2UI,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8I,Fail -spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8UI,Fail -spec@egl 1.4@egl-copy-buffers,Fail -spec@egl 1.4@eglterminate then unbind context,Fail -spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail -spec@egl_khr_surfaceless_context@viewport,Fail -spec@egl_mesa_configless_context@basic,Fail -spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail -spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail -spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 msaa,Fail -spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 upsample,Fail -spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 msaa,Fail -spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 upsample,Fail -spec@ext_framebuffer_multisample@enable-flag,Fail -spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail -spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail -spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail -spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail -spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail -spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail -spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail -spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail -spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail -spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail -spec@ext_framebuffer_multisample@sample-coverage 2 non-inverted,Fail -spec@ext_framebuffer_multisample@sample-coverage 4 non-inverted,Fail -spec@ext_framebuffer_object@fbo-blending-formats,Fail -spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY,Fail -spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY12,Fail -spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY16,Fail -spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY4,Fail -spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY8,Fail -spec@ext_framebuffer_object@fbo-blending-formats@GL_LUMINANCE12,Fail -spec@ext_framebuffer_object@fbo-blending-formats@GL_LUMINANCE16,Fail -spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB10,Fail -spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB12,Fail -spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB16,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export-tex,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-intel_external_sampler_only,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-invalid_attributes,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-invalid_hints,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-missing_attributes,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-reimport-bug,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-unsupported_format,Fail -spec@ext_packed_float@query-rgba-signed-components,Fail -spec@ext_texture_integer@multisample-fast-clear gl_ext_texture_integer,Fail -spec@ext_texture_snorm@fbo-blending-formats,Fail -spec@ext_texture_snorm@fbo-blending-formats@GL_INTENSITY16_SNORM,Fail -spec@ext_texture_snorm@fbo-blending-formats@GL_INTENSITY8_SNORM,Fail -spec@ext_texture_snorm@fbo-blending-formats@GL_INTENSITY_SNORM,Fail -spec@ext_texture_snorm@fbo-blending-formats@GL_LUMINANCE16_SNORM,Fail -spec@ext_texture_snorm@fbo-blending-formats@GL_LUMINANCE8_SNORM,Fail -spec@ext_texture_snorm@fbo-blending-formats@GL_LUMINANCE_SNORM,Fail -spec@ext_texture_snorm@fbo-blending-formats@GL_RGB16_SNORM,Fail -spec@ext_texture_swizzle@depth_texture_mode_and_swizzle,Fail -spec@ext_transform_feedback2@counting with pause,Fail -spec@ext_transform_feedback@generatemipmap prims_generated,Fail -spec@intel_performance_query@intel_performance_query-issue_2235,Fail -spec@khr_texture_compression_astc@array-gl,Fail -spec@khr_texture_compression_astc@array-gl@12x12 Block Dim,Fail -spec@khr_texture_compression_astc@array-gl@5x5 Block Dim,Fail -spec@khr_texture_compression_astc@miptree-gl ldr,Fail -spec@khr_texture_compression_astc@miptree-gl ldr@LDR Profile,Fail -spec@khr_texture_compression_astc@miptree-gl srgb,Fail -spec@khr_texture_compression_astc@miptree-gl srgb@sRGB decode,Fail -spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail -spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail -spec@khr_texture_compression_astc@miptree-gl srgb-sd,Fail -spec@khr_texture_compression_astc@miptree-gl srgb-sd@sRGB skip decode,Fail -spec@khr_texture_compression_astc@miptree-gles ldr,Fail -spec@khr_texture_compression_astc@miptree-gles ldr@LDR Profile,Fail -spec@khr_texture_compression_astc@miptree-gles srgb,Fail -spec@khr_texture_compression_astc@miptree-gles srgb@sRGB decode,Fail -spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail -spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail -spec@khr_texture_compression_astc@miptree-gles srgb-sd,Fail -spec@khr_texture_compression_astc@miptree-gles srgb-sd@sRGB skip decode,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gl ldr,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gl ldr@LDR Profile,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb@sRGB decode,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gles ldr,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gles ldr@LDR Profile,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb@sRGB decode,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail -spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail -spec@arb_post_depth_coverage@arb_post_depth_coverage-multisampling,Fail -spec@arb_shader_image_load_store@early-z,Fail -spec@arb_shader_image_load_store@early-z@occlusion query test/early-z pass,Fail -spec@arb_shader_image_load_store@indexing,Fail -spec@arb_shader_image_load_store@indexing@Compute shader/dynamically uniform indexing test,Fail -spec@arb_shader_image_load_store@indexing@Fragment shader/dynamically uniform indexing test,Fail -spec@arb_shader_image_load_store@indexing@Geometry shader/dynamically uniform indexing test,Fail -spec@arb_shader_image_load_store@indexing@Tessellation control shader/dynamically uniform indexing test,Fail -spec@arb_shader_image_load_store@indexing@Tessellation evaluation shader/dynamically uniform indexing test,Fail -spec@arb_shader_image_load_store@indexing@Vertex shader/dynamically uniform indexing test,Fail -spec@arb_shader_image_load_store@invalid,Fail -spec@arb_shader_image_load_store@invalid@imageLoad/incompatible format test/imageBuffer,Fail -spec@khr_texture_compression_astc@array-gles,Fail -spec@khr_texture_compression_astc@array-gles@12x12 Block Dim,Fail -spec@khr_texture_compression_astc@array-gles@5x5 Block Dim,Fail -spec@oes_egl_image_external_essl3@oes_egl_image_external_essl3,Fail -spec@oes_egl_image_external_essl3@oes_egl_image_external_essl3@oes_egl_image_external_essl3_imageLoad,Fail -spec@oes_egl_image_external_essl3@oes_egl_image_external_essl3@oes_egl_image_external_essl3_imageStore,Fail -spec@oes_texture_view@rendering-formats,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_R16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_R16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_R16UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_R16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_RG8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_RG8UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_RG8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16F as GL_R16,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16F as GL_R16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16F as GL_RG8,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16F as GL_RG8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RG16,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RGB10_A2,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RG16,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RGB10_A2,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RG16,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RGB10_A2,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8 as GL_R8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8 as GL_R8UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8 as GL_R8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_R32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_R32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_R32UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RG16UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RGB10_A2UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_R32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_RG16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_RGB10_A2,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_RGBA8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_R32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_R32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RGB10_A2,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_R32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_R32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RGB10_A2,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGB10_A2,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGB10_A2UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_R16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_R16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_R16UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_R16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_RG8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_RG8UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_RG8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_R16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_R16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_R16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_R16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_RG8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_RG8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_RG8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_R16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_R16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_R16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_R16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGB10_A2UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGB10_A2,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB16F as GL_RGB16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB16F as GL_RGB16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB16_SNORM as GL_RGB16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGB8_SNORM as GL_RGB8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16F as GL_RG32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16F as GL_RGBA16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16F as GL_RGBA16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RG32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RG32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGB10_A2UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_R32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_R32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGB10_A2,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGB10_A2,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16F,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16_SNORM,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGB10_A2,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGB10_A2UI,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8I,Fail -spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8UI,Fail - - -#literally no driver can pass these -spec@!opengl 1.0@rasterpos,Fail -spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail -spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail -spec@arb_direct_state_access@gettextureimage-formats,Crash -spec@ext_texture_integer@fbo-integer,Fail - -#these need format conversions that gallium doesn't implement yet -spec@arb_texture_buffer_object@formats (fs- arb),Crash -spec@arb_texture_buffer_object@formats (vs- arb),Crash diff --git a/src/gallium/drivers/zink/ci/piglit-zink-lvp-flakes.txt b/src/gallium/drivers/zink/ci/piglit-zink-lvp-flakes.txt deleted file mode 100644 index e2b5fddf8fd..00000000000 --- a/src/gallium/drivers/zink/ci/piglit-zink-lvp-flakes.txt +++ /dev/null @@ -1 +0,0 @@ -spec@khr_debug@push-pop-group_gl.* diff --git a/src/gallium/drivers/zink/ci/traces-zink-restricted.yml b/src/gallium/drivers/zink/ci/traces-zink-restricted.yml new file mode 100644 index 00000000000..7ee2062e624 --- /dev/null +++ b/src/gallium/drivers/zink/ci/traces-zink-restricted.yml @@ -0,0 +1,111 @@ +%YAML 1.2 +--- +traces-db: + download-url: "http://caching-proxy/cache/?uri=https://s3.freedesktop.org/mesa-tracie-private/" + +traces: + AmnesiaTDD/Amnesia-f700-v2.trace: + gl-zink-anv-tgl: + checksum: c0a3a735ce4dcc394af8bef0289ba8b1 + Anna/Anna-f692-v2.trace: + gl-zink-anv-tgl: + label: [skip] + text: "trace contains no swapbuffers, so no frames recorded" + Antichamber/antichamber-f240-v2.trace: + gl-zink-anv-tgl: + checksum: 55e105b22656add7d16efac2bcad82f5 + text: "line widths look wrong compared to freedreno" + Cradle/cradle-f3000-v2.trace: + gl-zink-anv-tgl: + label: [skip, slow] + ICEDarkness/ICEDarkness-f230-v2.trace: + gl-zink-anv-tgl: + label: [skip, flakes] + checksum: 64ac14b2a04d510e470fb2e06b039b42 + text: "note that this trace is stable on freedreno" + LifelessPlanet/LifelessPlanet-f420-v2.trace: + gl-zink-anv-tgl: + checksum: defec084a01f13e5cf01147bcfd235e6 + MetroLLRedux/metro-ll-redux-kf480-v2.trace: + gl-zink-anv-tgl: + label: [skip, slow] + OilRush/OilRush-f14000-v2.trace: + gl-zink-anv-tgl: + label: [skip] + text: "requires allow_glsl_extension_directive_midshader" + Osmos/Osmos-f2660-v2.trace: + gl-zink-anv-tgl: + checksum: 17daf7dc20dd74186d04eb54497a1690 + PenumbraOverture/penumbra-ov-s0-2-864-f1500-v2.trace: + gl-zink-anv-tgl: + checksum: fff50b2eb306641d90b4249542d437d1 + ShadowWarrior/ShadowWarrior-f3952-v2.trace: + gl-zink-anv-tgl: + label: [skip, flakes] + checksum: 826f966a52bc956644bf41562aa9c686 + text: |- + This trace is flaky on freedreno too. + SirYouAreBeingHunted/sir-f750-v2.trace: + gl-zink-anv-tgl: + checksum: 68611fd9f51e5ef5d2eb4417031a379f + SpecOps/specops-s0-1088-f1300-v2.trace: + gl-zink-anv-tgl: + label: [skip, broken, flakes] + checksum: e4037172a61efe23a67b5cc9ea9960bb + text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986 + Superhot/superhot-f8100-v2.trace: + gl-zink-anv-tgl: + label: [skip, broken, flakes] + checksum: 3f4ce060d0306b639565f8705abdea26 + text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986 + TheRavenRemastered/Raven-f10900-v2.trace: + gl-zink-anv-tgl: + checksum: 64243ccd048cbbfc0649e8f3c5b5cdb3 + TombRaider2013/TombRaider-f1430-v2.trace: + gl-zink-anv-tgl: + label: [crash] + text: |- + ../src/intel/isl/isl_storage_image.c:196: isl_lower_storage_image_format: Assertion `!Unknown image format' failed. + where the format is ISL_FORMAT_B8G8R8A8_UNORM. + Witcher2/witcher2-s0-1970-f2038-v2.trace: + gl-zink-anv-tgl: + label: [skip] + text: "many errors like 'GL_INVALID_VALUE in glBindBufferRange(offset misaligned 144/32)'" + alien-isolation/AlienIsolation.bin.1-trim--k-f2000-v20201203-v2.trace: + gl-zink-anv-tgl: + label: [skip, slow] + text: "Slow, just seems to display a bit of text and no gameplay, anyway (radeonsi or iris)" + civilization-v/CivilizationV-trim--s705-761-f762-v20201203-v2.trace: + gl-zink-anv-tgl: + label: [skip, broken, flakes] + checksum: 82e6d93321ab146d758f2d60a48f265d + text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986 + golf-with-your-friends/GolfWithYourFriends-trim--f1070-v20201203-v2.trace: + gl-zink-anv-tgl: + label: [skip, flakes] + checksum: cd01820de77063f0397f26d6cd747d1c + text: "has caused ../src/gallium/drivers/zink/zink_context.c:3773: zink_wait_on_batch: Assertion `batch_id' failed." + hollow-knight/HollowKnight-trim--f2020-v20201203-v2.trace: + gl-zink-anv-tgl: + label: [skip, flakes] + checksum: 78f9122c5dcd89826efe421fa626971c + text: "note that this trace is stable on freedreno" + ksp/KSP-trim--f4800-v20201203-v2.trace: + gl-zink-anv-tgl: + checksum: 28b08ee598387fdc58b6e0e92261e1d3 + overcooked2/Overcooked2-trim--f3301-v20201203-v2.trace: + gl-zink-anv-tgl: + checksum: 2d342febd76eb2b33e0496e5ed57e124 + plague-inc-evolved/PlagueIncEvolved-trim--f1200-v20201203-v2.trace: + gl-zink-anv-tgl: + label: [skip] + text: "renders black in CI, freedreno CI has unspecified trouble with it too" + slime-rancher/SlimeRancher-trim--f970-v20201203-v2.trace: + gl-zink-anv-tgl: + label: [skip] + text: "renders black in CI, freedreno CI has unspecified trouble with it too" + stellaris/Stellaris-trim--f722-v20201203-v2.trace: + gl-zink-anv-tgl: + label: [skip, flakes] + checksum: 1c263695e5bdfcd622f26292a3b2a10e + text: "looks good but checksum keeps changing" diff --git a/src/gallium/drivers/zink/ci/traces-zink.yml b/src/gallium/drivers/zink/ci/traces-zink.yml new file mode 100644 index 00000000000..0bd060ef98a --- /dev/null +++ b/src/gallium/drivers/zink/ci/traces-zink.yml @@ -0,0 +1,144 @@ +%YAML 1.2 +--- +traces-db: + download-url: "http://caching-proxy/cache/?uri=https://s3.freedesktop.org/mesa-tracie-public/" + +traces: + 0ad/0ad-v2.trace: + gl-zink-anv-tgl: + label: [skip, broken, flakes] + checksum: 1da0ecf4034a81aa16e7984b75368aec + text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986 + behdad-glyphy/glyphy-v2.trace: + gl-zink-anv-tgl: + checksum: b743608724c13bc4105e95385fc2b810 + blender/blender-demo-cube_diorama.trace: + gl-zink-anv-tgl: + label: [skip, flakes, broken] + checksum: 7b3ebdb5a4a8282ff564a4f14e7791a4 + text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986 + blender/blender-demo-ellie_pose.trace: + gl-zink-anv-tgl: + label: [skip, broken, flakes] + checksum: 9b5090a236350f04cb2a61c5f0c0fe0f + text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986 + glxgears/glxgears-2-v2.trace: + gl-zink-anv-tgl: + checksum: f53ac20e17da91c0359c31f2fa3f401e + gputest/furmark-v2.trace: + gl-zink-anv-tgl: + checksum: 7f513bceca18b6f44049bc5a690df235 + gputest/triangle-v2.trace: + gl-zink-anv-tgl: + checksum: 5f694874b15bcd7a3689b387c143590b + godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc: + gl-zink-anv-tgl: + label: [skip] + checksum: dbe1de4e2e812413f173ea6c423117ff + text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?" + gputest/pixmark-julia-fp32-v2.trace: + gl-zink-anv-tgl: + checksum: fbf5e44a6f46684b84e5bb5ad6d36c67 + gputest/pixmark-julia-fp64-v2.trace: + gl-zink-anv-tgl: + checksum: 1760aea00af985b8cd902128235b08f6 + gputest/pixmark-volplosion-v2.trace: + gl-zink-anv-tgl: + checksum: 7e49248ad9dc4c052c04b11246c4bd33 + text: Different rendering from iris, but still looks correct (common result with this trace) + gputest/plot3d-v2.trace: + gl-zink-anv-tgl: + checksum: 56f11d133f72712a6df13855ec00cdb0 + gputest/tessmark-v2.trace: + gl-zink-anv-tgl: + checksum: 46e05521eca33c2720ba14c0ea6c9066 + humus/AmbientAperture-v2.trace: + gl-zink-anv-tgl: + checksum: 0f3b7351a84e1e6f15430f8766af4b4c + humus/Portals-v2.trace: + gl-zink-anv-tgl: + checksum: a37699d09e61a842fc909f0c4fb72cf1 + humus/CelShading-v2.trace: + gl-zink-anv-tgl: + checksum: 68f20f660b9d812083066342398fe1b0 + humus/DynamicBranching3-v2.trace: + gl-zink-anv-tgl: + checksum: 0eb6c37cb88b48513e217012edf1ad32 + humus/HDR-v2.trace: + gl-zink-anv-tgl: + checksum: b09d83a5887b19ceaaaf0ac69c6a0af4 + humus/RaytracedShadows-v2.trace: + gl-zink-anv-tgl: + checksum: 8c053a06021678e28bfffd68705c6293 + humus/VolumetricFogging2-v2.trace: + gl-zink-anv-tgl: + checksum: 354a0046d81981a5227691fd8401d8ef + neverball/neverball-v2.trace: + gl-zink-anv-tgl: + checksum: 476a3e154a2564c9b136705cfdcf36de + paraview/pv-manyspheres-v2.trace: + gl-zink-anv-tgl: + checksum: 190153b6437f7063a6853ca94e5914f2 + paraview/pv-waveletcontour-v2.trace: + gl-zink-anv-tgl: + checksum: be4aba8a24e2bd2beb068d0c9c89dfcb + paraview/pv-waveletvolume-v2.trace: + gl-zink-anv-tgl: + checksum: b36c25e52624cbf8dab73b6acecb8e84 + pathfinder/demo-v2.trace: + gl-zink-anv-tgl: + checksum: 558c3d2b1b7acc782b3908c579ce0ce8 + pathfinder/canvas_moire-v2.trace: + gl-zink-anv-tgl: + checksum: 1706229fca06c1d7946ebc94e3b0a66d + pathfinder/canvas_text_v2-v2.trace: + gl-zink-anv-tgl: + checksum: c824fcadd028eba50d9911ebe1a3f823 + ror/ror-default.trace: + gl-zink-anv-tgl: + checksum: d7b07cb1f6fdc6949bdaf84d2173e24b + supertuxkart/supertuxkart-antediluvian-abyss.rdc: + gl-zink-anv-tgl: + label: [skip] + checksum: 0af2faa0d9183c1bc4dc7612befe1f0a + text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?" + supertuxkart/supertuxkart-menu.rdc: + gl-zink-anv-tgl: + label: [skip] + checksum: 0a4095dc7b441643a3336975b61c9e6a + text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?" + supertuxkart/supertuxkart-ravenbridge-mansion.rdc: + gl-zink-anv-tgl: + label: [skip] + checksum: ca0b64f1a62e01765146be8391eae636 + text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?" + unvanquished/unvanquished-lowest.trace: + gl-zink-anv-tgl: + checksum: 7789205e8b4d160dc81e3684f0627a38 + unvanquished/unvanquished-ultra.trace: + gl-zink-anv-tgl: + checksum: 026dde18e934e7ce3e36eb13ea8e975c + valve/counterstrike-v2.trace: + gl-zink-anv-tgl: + label: [skip, flakes] + checksum: 148ec1105f5f14c90cb756a178cef264 + text: "note that this trace is stable on freedreno" + valve/counterstrike-source-v2.trace: + gl-zink-anv-tgl: + label: [skip, flakes] + checksum: d5eb7d064ca31cb316e853a082a3950d + text: occasional segfaults + valve/half-life-2-v2.trace: + gl-zink-anv-tgl: + checksum: 8deac48993e4515589a7165e8bd14f25 + valve/portal-2-v2.trace: + gl-zink-anv-tgl: + checksum: 3683fd5bb2224d6f4a1c47c1eab277d9 + warzone2100/warzone2100-default.trace: + gl-zink-anv-tgl: + label: [skip, flakes] + checksum: 56f1f06bdca3d5352b5e6c0c3d572f43 + text: occasional checksum change + xonotic/xonotic-keybench-high-v2.trace: + gl-zink-anv-tgl: + checksum: 659ef8c91d9eeccd0dc603b196c2577c diff --git a/src/gallium/drivers/zink/ci/zink-amdpro-fails.txt b/src/gallium/drivers/zink/ci/zink-amdpro-fails.txt new file mode 100644 index 00000000000..6a3165ccfac --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-amdpro-fails.txt @@ -0,0 +1,159 @@ +# broken vertex buffer robustness +#GTF-GL46.gtf21.GL2FixedTests.vertex_order.vertex_order,Fail +#GTF-GL46.gtf31.GL3Tests.draw_instanced.draw_instanced_vertex_attrib_stride,Fail +#GTF-GL46.gtf33.GL3Tests.instanced_arrays.instanced_arrays_stride,Fail +#KHR-GL46.tessellation_shader.tessellation_shader_triangles_tessellation.inner_tessellation_level_rounding +#KHR-GL46.vertex_attrib_64bit.limits_test,Fail +#KHR-GL46.vertex_attrib_64bit.vao,Fail +#KHR-GL46.vertex_attrib_binding.advanced-bindingUpdate,Fail +#KHR-GL46.vertex_attrib_binding.basic-inputI-case2,Fail +#KHR-Single-GL46.enhanced_layouts.varying_array_locations,Fail +#KHR-Single-GL46.enhanced_layouts.varying_locations,Fail +#KHR-Single-GL46.enhanced_layouts.varying_structure_locations,Fail + +# uncategorized +GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_query_objects,Fail +GTF-GL46.gtf33.GL3Tests.vertex_type_2_10_10_10_rev.vertex_type_2_10_10_10_rev_stride_pointer,Fail +GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_pause_resume,Fail +GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_states,Fail +KHR-GL46.direct_state_access.queries_functional,Fail +KHR-GL46.direct_state_access.vertex_arrays_attribute_binding,Fail +KHR-GL46.direct_state_access.vertex_arrays_enable_disable_attributes,Fail +KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_tessLevel,Fail +KHR-GL46.tessellation_shader.tessellation_shader_point_mode.points_verification,Fail +KHR-GL46.tessellation_shader.tessellation_shader_quads_tessellation.inner_tessellation_level_rounding,Fail +KHR-GL46.tessellation_shader.tessellation_shader_triangles_tessellation.identical_triangles,Fail +KHR-GL46.tessellation_shader.vertex.vertex_spacing,Fail +KHR-GL46.texture_cube_map_array.texture_size_tesselation_con_sh,Fail +KHR-GL46.texture_cube_map_array.texture_size_tesselation_ev_sh,Fail +KHR-Single-GL46.enhanced_layouts.xfb_override_qualifiers_with_api,Fail + +# These tests mysteriously fail to allocate descriptor sets with input attachments +KHR-GL46.blend_equation_advanced.blend_all.GL_COLORBURN_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_COLORDODGE_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_DARKEN_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_DIFFERENCE_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_EXCLUSION_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_HARDLIGHT_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_HSL_COLOR_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_HSL_HUE_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_HSL_LUMINOSITY_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_HSL_SATURATION_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_LIGHTEN_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_MULTIPLY_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_OVERLAY_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_SCREEN_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_all.GL_SOFTLIGHT_KHR_all_qualifier,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_COLORBURN_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_COLORDODGE_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_DARKEN_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_DIFFERENCE_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_EXCLUSION_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_HARDLIGHT_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_HSL_COLOR_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_HSL_HUE_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_HSL_LUMINOSITY_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_HSL_SATURATION_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_LIGHTEN_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_MULTIPLY_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_OVERLAY_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_SCREEN_KHR,Fail +KHR-GL46.blend_equation_advanced.blend_specific.GL_SOFTLIGHT_KHR,Fail +KHR-GL46.blend_equation_advanced.test_coherency.multiplySequence,Fail + +# This test fails after mysteriously failing to create a pipeline +KHR-Single-GL46.enhanced_layouts.xfb_global_buffer,Crash + +# These pass on AMDVLK +KHR-GL46.shaders.loops.do_while_constant_iterations.nested_tricky_dataflow_2_vertex,Fail +KHR-GL46.shaders.loops.do_while_constant_iterations.nested_sequence_fragment,Fail +KHR-GL46.shaders.loops.do_while_constant_iterations.nested_fragment,Fail +KHR-GL46.geometry_shader.primitive_queries.primitive_queries_lines,Fail +KHR-GL46.geometry_shader.primitive_queries.primitive_queries_points,Fail +KHR-GL46.geometry_shader.primitive_queries.primitive_queries_triangles,Fail +KHR-GL46.geometry_shader.qualifiers.flat_interpolation,Fail +KHR-GL46.tessellation_shader.tessellation_shader_triangles_tessellation.degenerate_triangle,Fail +KHR-GL46.compute_shader.pipeline-post-xfb,Fail +KHR-GL46.transform_feedback.draw_xfb_stream_test,Fail +KHR-Single-GL46.enhanced_layouts.xfb_all_stages,Fail +KHR-Single-GL46.enhanced_layouts.xfb_block_member_stride,Fail +KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_block_member,Fail +KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail +KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_variable,Fail +KHR-Single-GL46.enhanced_layouts.xfb_capture_struct,Fail +KHR-Single-GL46.enhanced_layouts.xfb_explicit_location,Fail +KHR-Single-GL46.enhanced_layouts.xfb_stride,Fail +KHR-Single-GL46.enhanced_layouts.xfb_stride_of_empty_list,Fail +KHR-Single-GL46.enhanced_layouts.xfb_stride_of_empty_list_and_api,Fail +KHR-Single-GL46.enhanced_layouts.xfb_struct_explicit_location,Fail +KHR-Single-GL46.enhanced_layouts.xfb_vertex_streams,Fail +KHR-GL46.transform_feedback.capture_geometry_interleaved_test,Fail +KHR-GL46.transform_feedback.capture_geometry_separate_test,Fail +KHR-GL46.transform_feedback.capture_vertex_interleaved_test,Fail +KHR-GL46.transform_feedback.capture_vertex_separate_test,Fail +KHR-GL46.transform_feedback.query_geometry_interleaved_test,Fail +KHR-GL46.transform_feedback.query_geometry_separate_test,Fail +KHR-GL46.transform_feedback.query_vertex_interleaved_test,Fail +KHR-GL46.transform_feedback.query_vertex_separate_test,Fail +GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_basic_outline,Fail +GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_double_precision,Fail +GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_geometry,Fail +GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_geometry_primitive_types,Fail +GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_interleaved,Fail +GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_max_interleaved,Fail +GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_max_separate,Fail +GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_misc,Fail +GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_overflow,Fail +GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_vertex_id,Fail +GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_basic,Fail +GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_framebuffer,Fail +GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_two_buffers,Fail +GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_basic_outline,Fail +GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_geometry_instanced,Fail +GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_multiple_streams,Fail +GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_streams_overflow,Fail +GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_streams_queried,Fail +GTF-GL46.gtf42.GL3Tests.transform_feedback_instanced.transform_feedback_instanced_basic,Fail +GTF-GL46.gtf42.GL3Tests.transform_feedback_instanced.transform_feedback_instanced_conditional_render,Fail +GTF-GL46.gtf42.GL3Tests.transform_feedback_instanced.transform_feedback_instanced_streams,Fail +KHR-GL46.texture_buffer.texture_buffer_operations_transform_feedback,Fail +GTF-GL46.gtf33.GL3Tests.vertex_type_2_10_10_10_rev.vertex_type_2_10_10_10_rev_attrib,Fail +GTF-GL46.gtf33.GL3Tests.vertex_type_2_10_10_10_rev.vertex_type_2_10_10_10_rev_bgra,Fail +GTF-GL46.gtf33.GL3Tests.vertex_type_2_10_10_10_rev.vertex_type_2_10_10_10_rev_conversion,Fail +GTF-GL46.gtf33.GL3Tests.vertex_type_2_10_10_10_rev.vertex_type_2_10_10_10_rev_divisor,Fail +KHR-GL46.direct_state_access.buffers_functional,Fail +KHR-GL46.direct_state_access.vertex_arrays_attribute_format,Fail +KHR-GL46.direct_state_access.vertex_arrays_vertex_buffers,Fail +KHR-GL46.gpu_shader_fp64.fp64.max_uniform_components,Fail +KHR-GL46.gpu_shader_fp64.fp64.named_uniform_blocks,Fail +KHR-GL46.shader_atomic_counters.advanced-usage-multi-stage,Fail +KHR-GL46.shaders.loops.do_while_constant_iterations.nested_sequence_vertex,Fail +KHR-GL46.shaders.loops.do_while_constant_iterations.nested_tricky_dataflow_2_fragment,Fail +KHR-GL46.shaders.loops.do_while_constant_iterations.nested_vertex,Fail +KHR-GL46.texture_buffer.texture_buffer_texture_buffer_range,Fail +KHR-GL46.texture_cube_map_array.texture_size_geometry_sh,Fail +KHR-GL46.texture_cube_map_array.texture_size_vertex_sh,Fail + +# VKCTS coverage gap: fails on all non-mesa drivers + +# Passes on AMDVLK, fails on PRO if not in a specific caselist order +KHR-GL46.direct_state_access.vertex_arrays_attribute_binding_divisor,Fail + +# No VKCTS coverage, no pass +KHR-GL46.cull_distance.coverage,Fail +KHR-GL46.cull_distance.functional,Fail + +# VK_EXT_image_2d_view_of_3d +KHR-GL46.shader_image_load_store.non-layered_binding,Fail + +# Stencil sampling is apparently broken +KHR-GL46.texture_view.view_sampling,Fail + +GTF-GL46.gtf31.GL3Tests.uniform_buffer_object.uniform_buffer_object_max_uniform_block_size,Fail + +KHR-GL46.direct_state_access.vertex_arrays_element_buffer,Fail + +# since 22.40-1577631 +GTF-GL46.gtf30.GL3Tests.half_float.half_float_rendering,Fail +GTF-GL46.gtf30.GL3Tests.half_float.half_float_varying_data,Fail +GTF-GL46.gtf43.GL3Tests.eac_compression_signed_r11.gl_compressed_signed_r11_eac,Fail diff --git a/src/gallium/drivers/zink/ci/zink-anv-icl-fails.txt b/src/gallium/drivers/zink/ci/zink-anv-icl-fails.txt new file mode 100644 index 00000000000..40af4cb9bbc --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-anv-icl-fails.txt @@ -0,0 +1,64 @@ +GTF-GL46.gtf32.GL3Tests.packed_pixels.packed_pixels_pbo,Fail +KHR-GL46.tessellation_shader.single.isolines_tessellation,Fail +KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.data_pass_through,Fail +KHR-GL46.tessellation_shader.tessellation_invariance.invariance_rule3,Fail +KHR-GL46.tessellation_shader.tessellation_shader_point_mode.points_verification,Fail +KHR-GL46.tessellation_shader.tessellation_shader_quads_tessellation.degenerate_case,Fail +KHR-GL46.tessellation_shader.tessellation_shader_quads_tessellation.inner_tessellation_level_rounding,Fail +KHR-GL46.tessellation_shader.tessellation_shader_tessellation.gl_InvocationID_PatchVerticesIn_PrimitiveID,Fail +KHR-GL46.tessellation_shader.vertex.vertex_spacing,Fail +KHR-GL46.texture_view.view_classes,Fail +KHR-GL46.texture_view.view_sampling,Fail + +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32i_rg16i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32i_rg16ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32i_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32i_rgba8i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32i_rgba8ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32ui_rg16i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32ui_rg16ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32ui_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32ui_rgba8i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32ui_rgba8ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16i_rg16ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16i_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16i_rgba8i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16i_rgba8ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16ui_rg16i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16ui_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16ui_rgba8i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16ui_rgba8ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2_rg16i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2_rg16ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2_rgba8i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2_rgba8ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2ui_rg16i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2ui_rg16ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2ui_rgba8i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2ui_rgba8ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8_rg16i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8_rg16ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8_rgba8i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8_rgba8ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8i_rg16i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8i_rg16ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8i_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8i_rgba8ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8ui_rg16i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8ui_rg16ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8ui_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8ui_rgba8i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.srgb8_alpha8_rg16i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.srgb8_alpha8_rg16ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.srgb8_alpha8_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.srgb8_alpha8_rgba8i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.srgb8_alpha8_rgba8ui.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_64_bits.rg32i_rgba16i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_64_bits.rg32ui_rgba16i.renderbuffer_to_renderbuffer,Fail +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_64_bits.rgba16ui_rgba16i.renderbuffer_to_renderbuffer,Fail + +# Piglit xfb tests +spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail +spec@ext_transform_feedback@tessellation quads wireframe,Fail diff --git a/src/gallium/drivers/zink/ci/zink-anv-icl-skips.txt b/src/gallium/drivers/zink/ci/zink-anv-icl-skips.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-anv-icl-skips.txt @@ -0,0 +1 @@ + diff --git a/src/gallium/drivers/zink/ci/zink-anv-tgl-fails.txt b/src/gallium/drivers/zink/ci/zink-anv-tgl-fails.txt new file mode 100644 index 00000000000..c5b580c4f18 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-anv-tgl-fails.txt @@ -0,0 +1,593 @@ +dEQP-EGL.functional.query_context.get_current_context.rgba8888_window,Crash + +wayland-dEQP-EGL.functional.resize.surface_size.grow,Fail +wayland-dEQP-EGL.functional.resize.surface_size.shrink,Fail +wayland-dEQP-EGL.functional.resize.surface_size.stretch_width,Fail +wayland-dEQP-EGL.functional.resize.surface_size.stretch_height,Fail + +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail +spec@egl_chromium_sync_control@conformance,Fail + +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail + +glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail +glx@glx_arb_create_context_no_error@no error,Fail +glx@glx_arb_create_context_robustness@invalid reset notification strategy,Fail + +glx@glx-swap-pixmap-bad,Fail + +# ../src/gallium/drivers/zink/zink_kopper.c:859: zink_kopper_update: Assertion `pres->bind & PIPE_BIND_DISPLAY_TARGET' failed. +glx@glx-visuals-depth -pixmap,Crash +glx@glx-visuals-stencil -pixmap,Crash + +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail + +spec@!opengl 1.0@gl-1.0-swapbuffers-behavior,Fail + +spec@!opengl 1.1@depthstencil-default_fb-blit samples=16,Fail +spec@!opengl 1.1@depthstencil-default_fb-blit samples=2,Fail +spec@!opengl 1.1@depthstencil-default_fb-blit samples=6,Fail +spec@!opengl 1.1@depthstencil-default_fb-blit samples=8,Fail + +spec@!opengl 1.1@line-smooth-stipple,Fail + +spec@!opengl 1.1@polygon-mode-facing,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail + +# Frontend issue across multiple drivers. +spec@!opengl 1.0@rasterpos,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail + +spec@!opengl 1.1@linestipple@Line strip,Fail +spec@!opengl 1.1@linestipple@Line loop,Fail +spec@!opengl 1.1@linestipple@Factor 2x,Fail +spec@!opengl 1.1@linestipple@Factor 3x,Fail +spec@!opengl 1.1@linestipple,Fail + +# polygon-mode: glPolygonMode(front=GL_LINE, back=GL_FILL), glCullMode(GL_NONE/GL_FALSE/GL_NO_ERROR) failed +# At position 0, found prim GL_FILL instead of GL_LINE +# polygon-mode: glPolygonMode(front=GL_POINT, back=GL_FILL), glCullMode(GL_NONE/GL_FALSE/GL_NO_ERROR) failed +# At position 1, found prim GL_POINT instead of GL_FILL +# (and more) +spec@!opengl 1.1@polygon-mode,Fail + +spec@!opengl 2.0@vs-point_size-zero,Fail + +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail + +spec@!opengl 2.1@polygon-stipple-fs,Fail + +spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail + +spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail + + +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail +spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64,Crash +spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-load.indirect.3,Fail + +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail + +spec@arb_sample_locations@test,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_shading@samplemask 16 all,Fail +spec@arb_sample_shading@samplemask 16 all@0.062500 mask_in_one,Fail +spec@arb_sample_shading@samplemask 16 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 16 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 16 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 16 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 16 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 16 all@noms partition,Fail +spec@arb_sample_shading@samplemask 16 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 16,Fail +spec@arb_sample_shading@samplemask 16@0.062500 mask_in_one,Fail +spec@arb_sample_shading@samplemask 16@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 16@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 16@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 16@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 16@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 16@noms partition,Fail +spec@arb_sample_shading@samplemask 16@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all,Fail +spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms partition,Fail +spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 2,Fail +spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms partition,Fail +spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all,Fail +spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms partition,Fail +spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4,Fail +spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms partition,Fail +spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all,Fail +spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@noms partition,Fail +spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6,Fail +spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@noms partition,Fail +spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all,Fail +spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@noms partition,Fail +spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8,Fail +spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@noms partition,Fail +spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail + +spec@arb_shader_image_load_store@early-z,Fail +spec@arb_shader_image_load_store@early-z@occlusion query test/early-z pass,Fail + +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail + +# "../src/gallium/drivers/zink/zink_compiler.c:2071: assign_producer_var_io: Assertion `*reserved < MAX_VARYING' failed." +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash + +# "arb_texture_buffer_object-formats: ../src/gallium/drivers/zink/zink_context.c:807: create_bvci: Assertion `bvci.format' failed." +spec@arb_texture_buffer_object@formats (vs- arb),Crash + +spec@arb_texture_buffer_object@formats (fs- arb),Crash +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8_ALPHA8,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8_ALPHA8,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8UI_EXT,Fail + +spec@egl 1.4@eglterminate then unbind context,Fail + +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail + +spec@egl_khr_surfaceless_context@viewport,Fail + +spec@egl_mesa_configless_context@basic,Fail + +spec@ext_external_objects@vk-image-overwrite@RGB 10 A2 UINT optimal: Failed to initialize OpenGL FBO/RBO,Fail + +spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail +spec@ext_framebuffer_multisample@clip-and-scissor-blit 16 msaa,Fail +spec@ext_framebuffer_multisample@enable-flag,Fail +spec@ext_framebuffer_multisample@interpolation 16 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 16 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 16 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 16 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail + +spec@ext_packed_float@query-rgba-signed-components,Fail + +spec@ext_transform_feedback@tessellation triangle_fan flat_first,Fail +spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail +spec@ext_transform_feedback@tessellation quads wireframe,Fail + +spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail +spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail + +spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail + +# since Debian 12 (bookworm) uprev +spec@arb_viewport_array@display-list,Fail +glx@glx-multi-window-single-context,Fail +spec@arb_timer_query@timestamp-get,Fail + +# SIGKILL +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-fs-getuniformdv,Crash +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-gs-getuniformdv,Crash + +spec@arb_fragment_layer_viewport@layer-no-gs,Fail + +# sparse_backing_alloc: Assertion `bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, ZINK_SPARSE_BUFFER_PAGE_SIZE)' failed +KHR-GL46.sparse_texture_tests.SparseTextureCommitment,Crash + +# uprev Piglit in Mesa +spec@arb_shader_storage_buffer_object@max-ssbo-size@fs,Crash +spec@arb_shader_storage_buffer_object@max-ssbo-size@vs,Crash + diff --git a/src/gallium/drivers/zink/ci/zink-anv-tgl-flakes.txt b/src/gallium/drivers/zink/ci/zink-anv-tgl-flakes.txt new file mode 100644 index 00000000000..9067c2b505a --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-anv-tgl-flakes.txt @@ -0,0 +1,139 @@ +glx@glx-tfp +# ??? +spec@arb_query_buffer_object@qbo +spec@arb_query_buffer_object@qbo@query-GL_PRIMITIVES_SUBMITTED-ASYNC_CPU_READ_BEFORE-GL_UNSIGNED_INT64_ARB + +dEQP-GLES31.functional.image_load_store.buffer.atomic.comp_swap_r32i_return_value + +# "../src/gallium/drivers/zink/zink_fence.c:130: fence_wait: Assertion `fence->batch_id' failed." +.*dEQP-EGL.functional.sharing.gles2.multithread.random_egl_sync.textures.copytexsubimage2d.4 +.*dEQP-EGL.functional.sharing.gles2.multithread.random_egl_sync.textures.texsubimage2d.4 + +# deqp-egl: ../src/intel/vulkan/anv_batch_chain.c:520: cmd_buffer_chain_to_batch_bo: Assertion `batch->end == current_bbo->bo->map + current_bbo->bo->size' failed. +dEQP-EGL.functional.swap_buffers_with_damage.resize.* + +# around the time of kernel warnings about dma_resv +# "ZINK: vkQueueSubmit failed (VK_ERROR_DEVICE_LOST) +# ../src/vulkan/runtime/vk_object.h:101: vk_object_base_assert_valid: Assertion `base == NULL || base->type == obj_type' failed." +dEQP-EGL.functional.query_context.get_current_display.* +dEQP-EGL.functional.query_context.get_current_.*_window +dEQP-EGL.functional.query_context.query_context.*_window + +# Probably more of the above. +dEQP-EGL.functional.color_clears.*_window + +# MESA: error: ZINK: vkQueueSubmit failed (VK_ERROR_DEVICE_LOST) +dEQP-EGL.functional.render.*_window + +# ../../src/xcb_in.c:746: xcb_request_check: Assertion `!reply' failed. +glx@glx-make-current + +# Expected 15 15 15, observed 0 0 0 +glx@glx-multithread-texture + +# Timed out after piglit success result? +glx@glx-visuals-depth + +# XIO: fatal IO error 11 (Resource temporarily unavailable) on X server ":0" +glx@glx-visuals-stencil + +# "../src/vulkan/wsi/wsi_common_x11.c:1387: Swapchain status changed to VK_ERROR_SURFACE_LOST_KHR +# XIO: fatal IO error 11 (Resource temporarily unavailable) on X server ":0" +# after 130 requests (128 known processed) with 0 events remaining." +glx@glx-multi-context-single-window + +spec@!opengl 1.1@depthstencil-default_fb-blit samples=.* + +# PIGLIT: {"subtest": {"Tessellation control-Fragment shader/'coherent' qualifier coherency test/256x256" : "pass"}} +# Probe value at (166, 0, 0, 0) +# Expected: 33.000000 33.000000 33.000000 33.000000 +# Observed: 77.000000 77.000000 77.000000 77.000000 +# PIGLIT: {"subtest": {"Tessellation evaluation-Geometry shader/'coherent' qualifier coherency test/256x256" : "fail"}} +# PIGLIT: {"subtest": {"Tessellation evaluation-Fragment shader/'coherent' qualifier coherency test/256x256" : "pass"}} +# PIGLIT: {"subtest": {"Geometry-Fragment shader/'coherent' qualifier coherency test/256x256" : "pass"}} +# Probe value at (56, 15, 0, 0) +# Expected: 33.000000 33.000000 33.000000 33.000000 +# Observed: 77.000000 77.000000 77.000000 77.000000 +# PIGLIT: {"subtest": {"Vertex-Tessellation control shader/'volatile' qualifier coherency test/256x256" : "fail"}} +spec@arb_shader_image_load_store@coherency + +spec@glsl-4.00@execution@built-in-functions@fs-inverse-dmat4 + +# Around the time of running these tests there are some warnings from the kernel in dma_resv.c, and at least +# some failures look like not waiting for rendering to complete. +# Because those tests sometimes pass, keep them here +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2_gles3.rgb565_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2_gles3.rgb888_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2_gles3.rgba8888_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2.rgb565_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2.rgb888_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2.rgba8888_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1.rgb565_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1.rgb888_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1.rgba8888_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles2.rgb565_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles2.rgb888_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles2.rgba8888_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles3.rgb565_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles3.rgb888_pbuffer +wayland-dEQP-EGL.functional.color_clears.multi_thread.gles3.rgba8888_pbuffer +wayland-dEQP-EGL.functional.color_clears.single_context.gles1.rgb565_pbuffer +wayland-dEQP-EGL.functional.color_clears.single_context.gles1.rgb888_pbuffer +wayland-dEQP-EGL.functional.color_clears.single_context.gles1.rgba8888_pbuffer +wayland-dEQP-EGL.functional.color_clears.single_context.gles2.rgb565_pbuffer +wayland-dEQP-EGL.functional.color_clears.single_context.gles2.rgb888_pbuffer +wayland-dEQP-EGL.functional.color_clears.single_context.gles2.rgba8888_pbuffer +wayland-dEQP-EGL.functional.color_clears.single_context.gles3.rgb565_pbuffer +wayland-dEQP-EGL.functional.color_clears.single_context.gles3.rgb888_pbuffer +wayland-dEQP-EGL.functional.color_clears.single_context.gles3.rgba8888_pbuffer +wayland-dEQP-EGL.functional.negative_api.create_pixmap_surface +wayland-dEQP-EGL.functional.render.multi_thread.gles2_gles3.rgb565_pbuffer +wayland-dEQP-EGL.functional.render.multi_thread.gles2_gles3.rgb888_pbuffer +wayland-dEQP-EGL.functional.render.multi_thread.gles2_gles3.rgba8888_pbuffer +wayland-dEQP-EGL.functional.render.multi_thread.gles2.rgb565_pbuffer +wayland-dEQP-EGL.functional.render.multi_thread.gles2.rgb888_pbuffer +wayland-dEQP-EGL.functional.render.multi_thread.gles2.rgba8888_pbuffer +wayland-dEQP-EGL.functional.render.multi_thread.gles3.rgb565_pbuffer +wayland-dEQP-EGL.functional.render.multi_thread.gles3.rgb888_pbuffer +wayland-dEQP-EGL.functional.render.multi_thread.gles3.rgba8888_pbuffer +wayland-dEQP-EGL.functional.render.single_context.gles2.rgb565_pbuffer +wayland-dEQP-EGL.functional.render.single_context.gles2.rgb888_pbuffer +wayland-dEQP-EGL.functional.render.single_context.gles2.rgba8888_pbuffer +wayland-dEQP-EGL.functional.render.single_context.gles3.rgb565_pbuffer +wayland-dEQP-EGL.functional.render.single_context.gles3.rgb888_pbuffer +wayland-dEQP-EGL.functional.render.single_context.gles3.rgba8888_pbuffer + +# Everything in wayland EGL is flaking with crashes since at least early 2023-09 +# Possibly https://gitlab.freedesktop.org/mesa/mesa/-/issues/9577 +wayland-dEQP-EGL.functional.* + +spec@arb_tessellation_shader@execution@variable-indexing@tcs-patch-vec4-index-wr + +spec@arb_timer_query@timestamp-get +# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49554086 +# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49564619 +# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49529964 +# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49530854 +# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49445862 +# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49588884 +glx@glx_ext_no_config_context@no fbconfig + +# See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25861#note_2140498 +spec@ext_timer_query@time-elapsed + +# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/53978300 +spec@!opengl 2.0@occlusion-query-discard +spec@arb_shader_storage_buffer_object@max-ssbo-size@fs +spec@arb_shader_storage_buffer_object@max-ssbo-size@fsexceed +spec@arb_shader_storage_buffer_object@max-ssbo-size@vs +spec@arb_shader_storage_buffer_object@max-ssbo-size@vsexceed + +# uprev Piglit in Mesa +spec@!opengl 1.1@depthstencil-default_fb-blit samples=16 +spec@!opengl 1.1@depthstencil-default_fb-blit samples=6 +spec@!opengl 1.1@depthstencil-default_fb-blit samples=8 +spec@arb_query_buffer_object@.*CPU_READ_BEFORE.* +spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Geometry shader/'coherent' qualifier coherency test/256x256 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-modifiers +spec@ext_timer_query@time-elapsed + diff --git a/src/gallium/drivers/zink/ci/zink-anv-tgl-premerge-skips.txt b/src/gallium/drivers/zink/ci/zink-anv-tgl-premerge-skips.txt new file mode 100644 index 00000000000..1d42c0bef49 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-anv-tgl-premerge-skips.txt @@ -0,0 +1,21 @@ +# Skip some >1min tests in pre merge. These will be covered in the nightly jobs. +KHR-GL46.packed_pixels.varied_rectangle.* +KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize +KHR-Single-GL46.arrays_of_arrays_gl.SizedDeclarationsPrimitive +KHR-Single-GL46.arrays_of_arrays_gl.SubroutineArgumentAliasing2 +KHR-Single-GL46.arrays_of_arrays_gl.SubroutineArgumentAliasing3 +KHR-Single-GL46.arrays_of_arrays_gl.SubroutineArgumentAliasing4 +KHR-Single-GL46.enhanced_layouts.ssb_member_invalid_offset_alignment +KHR-Single-GL46.enhanced_layouts.uniform_block_member_invalid_offset_alignment +KHR-Single-GL46.enhanced_layouts.varying_array_components +KHR-Single-GL46.enhanced_layouts.varying_structure_locations +KHR-Single-GL46.enhanced_layouts.xfb_override_qualifiers_with_api +KHR-Single-GL46.enhanced_layouts.xfb_stride +spec@!opengl 1.1@copypixels-sync +spec@!opengl 1.1@draw-sync +spec@arb_compute_shader@local-id-explosion +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-fs-getuniformdv +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-gs-getuniformdv +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-vs-getuniformdv +spec@ext_texture_array@fbo-depth-array +spec@ext_texture_lod_bias@lodbias diff --git a/src/gallium/drivers/zink/ci/zink-anv-tgl-skips.txt b/src/gallium/drivers/zink/ci/zink-anv-tgl-skips.txt new file mode 100644 index 00000000000..241cdee87b3 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-anv-tgl-skips.txt @@ -0,0 +1,48 @@ +#these take too long to finish in ci +KHR-GL46.texture_swizzle.smoke +KHR-GL46.copy_image.functional +KHR-GL46.gpu_shader_fp64.builtin.inverse_dmat4 +KHR-GL46.gpu_shader_fp64.builtin.inverse_dmat3 +KHR-GL46.gpu_shader_fp64.fp64.varyings +KHR-GL46.texture_swizzle.functional +KHR-Single-GL46.arrays_of_arrays_gl.AtomicUsage +KHR-Single-GL46.arrays_of_arrays_gl.InteractionFunctionCalls2 +KHR-Single-GL46.arrays_of_arrays_gl.SubroutineFunctionCalls2 +KHR-Single-GL46.enhanced_layouts.ssb_member_align_non_power_of_2 +KHR-Single-GL46.enhanced_layouts.uniform_block_member_align_non_power_of_2 +KHR-Single-GL46.enhanced_layouts.xfb_global_buffer +KHR-Single-GL46.arrays_of_arrays_gl.SubroutineFunctionCalls1 +KHR-GL46.direct_state_access.framebuffers_texture_layer_attachment +KHR-GL46.sparse_buffer_tests.BufferStorageTest + +# Definitely shouldn't take this long, but times out at 3 minutes. +glx@glx-visuals-depth$ +glx@glx-visuals-stencil$ + +.*built-in-functions@.*dmat[34].* +.*built-in-functions@.*-op-div-.*dmat.* +.*built-in-functions@fs-mod-dvec4-dvec4 +.*fs-isnan-dvec +.*gs-isnan-dvec +.*vs-isnan-dvec +.*conversion-implicit.*dmat.* + +spec@egl_nok_texture_from_pixmap@basic + +# implicit modifier selection is not currently supported +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_vyuy +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv diff --git a/src/gallium/drivers/zink/ci/zink-anv-tgl-validation-settings.txt b/src/gallium/drivers/zink/ci/zink-anv-tgl-validation-settings.txt new file mode 100644 index 00000000000..6615ba5181e --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-anv-tgl-validation-settings.txt @@ -0,0 +1,148 @@ +# Please include a comment with the log message and a testcase triggering each +# VUID at the bottom of the file. +khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,UNASSIGNED-CoreValidation-Shader-InconsistentSpirv,VUID-vkDestroyDevice-device-00378,VUID-VkShaderModuleCreateInfo-pCode-01377,VUID-RuntimeSpirv-Location-06272,VUID-VkGraphicsPipelineCreateInfo-renderPass-06590,VUID-VkGraphicsPipelineCreateInfo-Geometry-07725,VUID-vkCmdDrawMultiIndexedEXT-format-07753,UNASSIGNED-CoreValidation-Shader-InterfaceTypeMismatch,VUID-RuntimeSpirv-OpEntryPoint-07754,VUID-VkShaderModuleCreateInfo-pCode-01379,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-VkGraphicsPipelineCreateInfo-topology-00737,VUID-VkGraphicsPipelineCreateInfo-pStages-00736,VUID-vkCmdCopyImage-srcImage-07743,VUID-vkCmdDrawMultiIndexedEXT-format-07753,VUID-vkCmdDrawMultiEXT-pDepthAttachment-06181,VUID-vkCmdDrawMultiEXT-pStencilAttachment-06182,VUID-vkCmdDrawMultiIndexedEXT-pDepthAttachment-06181,VUID-vkCmdDrawMultiIndexedEXT-pStencilAttachment-06182,VUID-vkDestroyDevice-device-05137,VUID-vkCmdDrawMultiEXT-Input-08734 +khronos_validation.report_flags = error +khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK +VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT +khronos_validation.printf_buffer_size = 40960 +khronos_validation.printf_to_stdout = true +khronos_validation.log_filename = stdout + +# VUID-VkPhysicalDeviceProperties2-pNext-pNext +# VUID-VkDeviceCreateInfo-pNext-pNext +# never error due to unrecognized extensions + +# KHR-Single-GL46.enhanced_layouts.xfb_struct_explicit_location: +# [ UNASSIGNED-CoreValidation-Shader-InconsistentSpirv ] Object 0: handle = +# 0x556fd2b6e190, type = VK_OBJECT_TYPE_DEVICE; | MessageID = 0x6bbb14 | SPIR-V +# module not valid: The Component Type of Vector 1 must be the same as ResultType. + +# Intermittent, probably the end of a caselist so not tied to a specific known test: +# [ VUID-vkDestroyDevice-device-00378 ] Object 0: handle = 0x55c458362820, type = +# VK_OBJECT_TYPE_DEVICE; Object 1: handle = 0x2a7f70000000053, type = +# VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT; | MessageID = 0x71500fba | OBJ ERROR : For +# VkDevice 0x55c458362820[], VkDescriptorSetLayout 0x2a7f70000000053[] has not +# been destroyed. The Vulkan spec states: All child objects created on device must +# have been destroyed prior to destroying device +# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkDestroyDevice-device-00378) + +# KHR-Single-GL46.enhanced_layouts.xfb_struct_explicit_location: +# [ VUID-VkShaderModuleCreateInfo-pCode-01377 ] Object 0: handle = +# 0x563a677573b0, type = VK_OBJECT_TYPE_DEVICE; | MessageID = 0x5821254b | SPIR-V +# module not valid: The Component Type of Vector 1 must be the same as ResultType. +# %142 = OpVectorShuffle %v2uint %141 %141 0 1 +# The Vulkan spec states: pCode must point to either valid SPIR-V code, formatted and packed as described by the Khronos SPIR-V Specification or valid GLSL code which must be written to the GL_KHR_vulkan_glsl extension specification (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkShaderModuleCreateInfo-pCode-01377) + +# KHR-GL46.geometry_shader.limits.max_output_components +# [ VUID-RuntimeSpirv-Location-06272 ] Object 0: handle = 0x3a6cbb0000000025, +# type = VK_OBJECT_TYPE_SHADER_MODULE; | MessageID = 0xa3614f8b | Invalid Pipeline +# CreateInfo State: Fragment shader exceeds +# VkPhysicalDeviceLimits::maxFragmentInputComponents of 116 components by 4 +# components The Vulkan spec states: The sum of Location and the number of +# locations the variable it decorates consumes must be less than or equal to the +# value for the matching {ExecutionModel} defined in Shader Input and Output +# Locations +# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-RuntimeSpirv-Location-06272) + +# KHR-GL46.gpu_shader_fp64.builtin.roundeven_dvec4: +# [ VUID-VkGraphicsPipelineCreateInfo-renderPass-06590 ] Object 0: handle = +# 0x5647ce5e6ac0, type = VK_OBJECT_TYPE_DEVICE; | MessageID = 0xd835f001 | +# vkCreateGraphicsPipelines(): pCreateInfos[0] does contains fragment shader state +# and no fragment output state, pDepthStencilState does not point to a valid +# VkPipelineDepthStencilStateCreateInfo struct. The Vulkan spec states: If +# renderPass is VK_NULL_HANDLE and the pipeline is being created with fragment +# shader state but not fragment output interface state, pDepthStencilState must be +# a valid pointer to a valid VkPipelineDepthStencilStateCreateInfo structure +# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06590) + +# KHR-GL46.shader_image_size.basic-nonMS-gs-float: +# [ VUID-VkGraphicsPipelineCreateInfo-Geometry-07725 ] Object 0: handle = +# 0xa4ad110000039145, type = VK_OBJECT_TYPE_SHADER_MODULE; | MessageID = +# 0x64e29d24 | vkCreateGraphicsPipelines(): shaderTessellationAndGeometryPointSize +# is enabled, but PointSize is not written in the Geometry shader. The Vulkan spec +# states: If the pipeline is being created with a Geometry {ExecutionModel}, uses +# the OutputPoints {ExecutionMode}, and shaderTessellationAndGeometryPointSize is +# enabled, a PointSize decorated variable must be written to +# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkGraphicsPipelineCreateInfo-Geometry-07725) + +# KHR-GL46.sample_variables.mask.rgba8i.samples_8.mask_3: +# [ VUID-vkCmdDrawMultiIndexedEXT-format-07753 ] Object 0: handle = +# 0x535b660000000043, type = VK_OBJECT_TYPE_DESCRIPTOR_SET; | MessageID = +# 0xd35852d3 | vkCmdDrawMultiIndexedEXT: Descriptor set VkDescriptorSet +# 0x535b660000000043[] in binding #128 index 0 requires SINT component type, but +# bound descriptor format is VK_FORMAT_R8G8B8A8_UNORM (VkImageView +# 0x9638f80000000036[]). The Vulkan spec states: If a VkImageView is accessed as a +# result of this command, then the image view's format must match the numeric +# format from the Sampled Type operand of the OpTypeImage as described in the +# SPIR-V Sampled Type column of the Interpretation of Numeric Format table +# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdDrawMultiIndexedEXT-format-07753) + +# KHR-Single-GL46.enhanced_layouts.varying_locations: +# [ UNASSIGNED-CoreValidation-Shader-InterfaceTypeMismatch ] Object 0: handle = +# 0x5eb05e000000003b, type = VK_OBJECT_TYPE_SHADER_MODULE; | MessageID = +# 0xb6cf33fe | Type mismatch on location 0.0, between geometry shader and fragment +# shader: 'ptr to Output vec2 of uint64' vs 'ptr to Input struct of (vec4 of +# float32, vec4 of float32)' + +# KHR-Single-GL46.enhanced_layouts.varying_locations +# [ VUID-RuntimeSpirv-OpEntryPoint-07754 ] +# vkCreateGraphicsPipelines(): pCreateInfos[0] Type mismatch on Location 0 Component 0, between +# VK_SHADER_STAGE_GEOMETRY_BIT stage: +# pointer to Output -> +# vec2 of uint64 +# VK_SHADER_STAGE_FRAGMENT_BIT stage: +# pointer to Input -> +# struct of { +# vec4 of float32 +# vec4 of float32 +# } +# The Vulkan spec states: Any user-defined variables between the OpEntryPoint +# of two shader stages must have the same type and width for each Component +# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-RuntimeSpirv-OpEntryPoint-07754) + +# KHR-Single-GL46.enhanced_layouts.xfb_struct_explicit_location +# [ VUID-VkShaderModuleCreateInfo-pCode-01379 ] +# SPIR-V module not valid: The Component Type of Vector 1 must be the same as ResultType. +# %142 = OpVectorShuffle %v2uint %141 %141 0 1 + +# KHR-Single-GL46.enhanced_layouts.varying_locations +# +# VUID-RuntimeSpirv-OpEntryPoint-08743(ERROR / SPEC): msgNum: -1986897773 - +# Validation Error: [ VUID-RuntimeSpirv-OpEntryPoint-08743 ] +# vkCreateGraphicsPipelines(): pCreateInfos[0] VK_SHADER_STAGE_FRAGMENT_BIT declared input at +# Location 1 Comonent 2 but it is not an Output declared in VK_SHADER_STAGE_VERTEX_BIT +# The Vulkan spec states: Any user-defined variables shared between the OpEntryPoint of two shader stages, +# and declared with Input as its {StorageClass} for the subsequent shader stage, +# must have all Location slots and Component words declared in the preceding shader stage's +# OpEntryPoint with Output as the {StorageClass} +# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-RuntimeSpirv-OpEntryPoint-08743) +# +# VVL bug https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/5735 + + +# VUID-VkGraphicsPipelineCreateInfo-pStages-00736 +# VUID-VkGraphicsPipelineCreateInfo-topology-00737 +# spec bug https://gitlab.khronos.org/vulkan/vulkan/-/merge_requests/5916 + + +# VUID-vkCmdCopyImage-srcImage-07743 +# spec bug + +# VUID-vkCmdDrawMultiIndexedEXT-format-07753 +# KHR-GL46.shader_ballot_tests.ShaderBallotFunctionBallot +# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/4488 + + +# VUID-vkCmdDrawMultiEXT-pDepthAttachment-06181 +# VUID-vkCmdDrawMultiEXT-pStencilAttachment-06182 +# VUID-vkCmdDrawMultiIndexedEXT-pDepthAttachment-06181 +# VUID-vkCmdDrawMultiIndexedEXT-pStencilAttachment-06182 +# spec issue + + +# VUID-vkDestroyDevice-device-05137 +# some kind of bug + +# VUID-vkCmdDrawMultiEXT-Input-08734 +# not sure if cts bug... +# KHR-GL46.direct_state_access.vertex_arrays_attribute_format diff --git a/src/gallium/drivers/zink/ci/zink-freedreno-a618-fails.txt b/src/gallium/drivers/zink/ci/zink-freedreno-a618-fails.txt new file mode 100644 index 00000000000..18c4d4cc59e --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-freedreno-a618-fails.txt @@ -0,0 +1,7 @@ +# piglit xfb tests +spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail +spec@ext_transform_feedback@tessellation quads wireframe,Fail + +dEQP-GLES3.functional.shaders.matrix.inverse.dynamic.lowp_mat4_float_vertex,Fail +dEQP-GLES3.functional.shaders.matrix.inverse.dynamic.mediump_mat4_float_vertex,Fail +dEQP-GLES31.functional.texture.border_clamp.range_clamp.linear_float_color,Fail diff --git a/src/gallium/drivers/zink/ci/zink-freedreno-a618-flakes.txt b/src/gallium/drivers/zink/ci/zink-freedreno-a618-flakes.txt new file mode 100644 index 00000000000..11898356cb4 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-freedreno-a618-flakes.txt @@ -0,0 +1,63 @@ +# No GPU hang, looked like cache flushing fail. Happens on basically all format +# families. +dEQP-GLES3.functional.fbo.blit.conversion..* + +# No GPU hang, looks like one 2x2 quad was colored slightly off? +dEQP-GLES3.functional.fbo.msaa.4_samples.depth_component16 + +dEQP-GLES3.functional.transform_feedback.random_full_array_capture.separate.lines.4 + +# Lines were the wrong colors on iteration 4/10. +dEQP-GLES3.functional.transform_feedback.random.separate.lines.4 + +# Looks like maybe the end of level 0 face 5 got overwritten, others were all +# OK. source is 7 levels 65x65x9, dst is 7 levels 65x65x1 cubemap +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16i_r16i.texture3d_to_cubemap + +# level 0 face 5 again, corruption in the middle this time though. destination is 64x63, 1 level. +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16f_rg8.cubemap_to_texture2d_array +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16f_rg8i.cubemap_to_renderbuffer +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16f_r16ui.cubemap_to_renderbuffer +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16ui_r16ui.cubemap_to_renderbuffer + +# "MESA: error: ZINK: vkQueueSubmit failed (VK_ERROR_DEVICE_LOST)" +# Not preceded by a GPU hang report? One case had these faults, though: +# *** gpu fault: ttbr0=000000017505d000 iova=0000000114c89800 dir=WRITE type=TRANSLATION source=CCU (0,0,0,1) +# *** gpu fault: ttbr0=000000017505d000 iova=0000000114c5c8f0 dir=WRITE type=TRANSLATION source=UNKNOWN (0,0,0,1) +dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba_astc_12x12_khr_rgba32f.cubemap_to_cubemap +dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba_astc_6x5_khr_rgba32f.cubemap_to_cubemap +dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba32f_srgb8_alpha8_astc_12x12_khr.texture3d_to_texture2d +dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.srgb8_alpha8_astc_10x5_khr_rgba32i.texture2d_array_to_cubemap +dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.srgb8_alpha8_astc_8x6_khr_rgba32ui.cubemap_to_cubemap + +# Found when expanding coverage on 2022/11/17, or randomly flaking +dEQP-GLES31.functional.separate_shader.random.79 +dEQP-GLES3.functional.transform_feedback.random_full_array_capture.separate.lines.4 +dEQP-GLES3.functional.texture.specification.texsubimage3d_pbo.rgba4_2d_array +dEQP-GLES3.functional.texture.format.sized.cube.rgba4_npot +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.*cubemap.* +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.*texture2d_array.* +dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.*astc.*cubemap.* +dEQP-GLES3.functional.fbo.msaa.2_samples.depth_component16 + +# ? +dEQP-GLES31.functional.tessellation.invariance.primitive_set.isolines_fractional_odd_spacing_ccw +KHR-GLES31.core.texture_cube_map_array.color_depth_attachments +dEQP-GLES31.functional.texture.specification.texstorage3d.format.rgb5_a1_cube_array +dEQP-GLES31.functional.copy_image.compressed.viewclass_astc_4x4_rgba.rgba_astc_4x4_khr_srgb8_alpha8_astc_4x4_khr.texture2d_array_to_texture3d +dEQP-GLES31.functional.image_load_store.2d.format_reinterpret.r32ui_r32f + +dEQP-GLES3.functional.fbo.color.tex2d.rgb565 + +# https://gitlab.freedesktop.org/mesa/mesa/-/issues/9707 +dEQP-GLES3.functional.texture.specification.teximage2d_align.cube_rgba4_51_2 +# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49638414 +dEQP-GLES31.functional.fbo.color.texcubearray.rgb565 + +# uprev Piglit in Mesa +dEQP-GLES3.functional.fbo.blit.conversion.rgba4_to_rgb10_a2 +dEQP-GLES3.functional.fbo.blit.conversion.rgba8_to_r16f +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16f_rg8.texture2d_array_to_texture3d +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16f_rg8ui.texture2d_array_to_texture3d +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16ui_r16i.texture2d_array_to_texture2d_array + diff --git a/src/gallium/drivers/zink/ci/zink-freedreno-a618-skips.txt b/src/gallium/drivers/zink/ci/zink-freedreno-a618-skips.txt new file mode 100644 index 00000000000..5ca63552440 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-freedreno-a618-skips.txt @@ -0,0 +1,4 @@ +# Can run over 60 seconds +KHR-GLES31.core.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize +KHR-GLES31.core.tessellation_shader.vertex.vertex_ordering +KHR-GLES31.core.tessellation_shader.vertex.vertex_spacing diff --git a/src/gallium/drivers/zink/ci/zink-lvp-fails.txt b/src/gallium/drivers/zink/ci/zink-lvp-fails.txt new file mode 100644 index 00000000000..ba7682dc29e --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-lvp-fails.txt @@ -0,0 +1,147 @@ +# #6115 +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash + +# #6322 +spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail + +#kopper regressions/changes +spec@egl_chromium_sync_control@conformance,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail + +# this test tries to be error-compatible with nvidia. spoiler: mesa isn't, and no driver can pass it +glx@glx_arb_create_context@invalid flag,Fail + +glx@glx-swap-pixmap-bad,Fail +glx@glx-visuals-depth,Crash +glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail +glx@glx_arb_create_context_no_error@no error,Fail + +glx@glx_ext_import_context@free context,Fail +glx@glx_ext_import_context@get context id,Fail +glx@glx_ext_import_context@get current display,Fail +glx@glx_ext_import_context@import context- multi process,Fail +glx@glx_ext_import_context@import context- single process,Fail +glx@glx_ext_import_context@imported context has same context id,Fail +glx@glx_ext_import_context@make current- multi process,Fail +glx@glx_ext_import_context@make current- single process,Fail +glx@glx_ext_import_context@query context info,Fail +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail +spec@!opengl 1.1@polygon-mode,Fail +spec@!opengl 1.1@polygon-mode-facing,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail +spec@!opengl 1.2@copyteximage 3d,Fail +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail +spec@!opengl 2.1@polygon-stipple-fs,Fail +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail +spec@arb_pipeline_statistics_query@arb_pipeline_statistics_query-frag,Fail +spec@arb_point_sprite@arb_point_sprite-mipmap,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail +spec@arb_sample_shading@interpolate-at-sample-position 2,Fail +spec@arb_sample_shading@interpolate-at-sample-position 4,Fail +spec@arb_sample_shading@samplemask 2,Fail +spec@arb_sample_shading@samplemask 2@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms partition,Fail +spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all,Fail +spec@arb_sample_shading@samplemask 2 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms partition,Fail +spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4,Fail +spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms partition,Fail +spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all,Fail +spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms partition,Fail +spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail + +spec@egl 1.4@eglterminate then unbind context,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail +spec@egl_khr_surfaceless_context@viewport,Fail +spec@egl_mesa_configless_context@basic,Fail +spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail +spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 msaa,Fail +spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 upsample,Fail +spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 msaa,Fail +spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 upsample,Fail +spec@ext_framebuffer_multisample@enable-flag,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail +spec@ext_packed_float@query-rgba-signed-components,Fail + +spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail +spec@arb_post_depth_coverage@arb_post_depth_coverage-multisampling,Fail +spec@arb_shader_image_load_store@early-z,Fail +spec@arb_shader_image_load_store@early-z@occlusion query test/early-z pass,Fail + +spec@arb_shader_image_load_store@execution@image-array-out-of-bounds-access-load,Crash +spec@arb_shader_image_load_store@execution@image-array-out-of-bounds-access-store,Crash + +#literally no driver can pass these +spec@!opengl 1.0@rasterpos,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail + +spec@arb_tessellation_shader@execution@gs-primitiveid-instanced,Fail +spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail +spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail + +spec@ext_transform_feedback@tessellation quads wireframe,Fail + +# Debian 12 CI update, see https://gitlab.freedesktop.org/mesa/mesa/-/issues/9072 +spec@ext_packed_float@multisample-formats 4 gl_ext_packed_float,Crash +spec@ext_transform_feedback@tessellation quad_strip wireframe,Crash +spec@!opengl 1.0@gl-1.0-dlist-beginend,Crash +spec@nv_texture_barrier@blending-in-shader,Crash + +spec@arb_viewport_array@display-list,Fail diff --git a/src/gallium/drivers/zink/ci/zink-lvp-flakes.txt b/src/gallium/drivers/zink/ci/zink-lvp-flakes.txt new file mode 100644 index 00000000000..a883379893e --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-lvp-flakes.txt @@ -0,0 +1,40 @@ +dEQP-GLES2.functional.texture.filtering.cube.nearest_linear_mirror_l8_pot +spec@khr_debug@push-pop-group_gl.* +glx@glx-multi-window-single-context + +# "free(): invalid next size (fast)" +# since it's heap corruption, it may or may not appear in a particular run +spec@arb_compute_variable_group_size@local-size + +# https://gitlab.freedesktop.org/mesa/mesa/-/jobs/20908454 +# "X connection to :99 broken (explicit kill or server shutdown)." +glx@glx-multi-context-ib-1 + +# depth visuals +glx@glx-visuals-depth +glx@glx-visuals-stencil + +# mysterious +glx@glx-shader-sharing + +spec@arb_fragment_program@no-newline +# glx-destroycontext-1: ../../src/xcb_conn.c:215: write_vec: Assertion `!c->out.queue_len' failed. +glx@glx-destroycontext-1 + +glx@glx-multithread-texture + +# does not happen very often, but rarely does +KHR-GL46.limits.max_fragment_interpolation_offset + +# no output timeout, probably stuck in some X11 connection thing +spec@ext_framebuffer_multisample@accuracy all_samples depth_resolve depthstencil + +# segfault in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/48719777 and others +spec@ext_texture_array@texsubimage array + +# crash in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/48476882 and others +KHR-GL46.layout_location.sampler_2d_shadow + +# uprev Piglit in Mesa +spec@ext_framebuffer_multisample@accuracy all_samples depth_draw small depthstencil + diff --git a/src/gallium/drivers/zink/ci/piglit-zink-lvp-skips.txt b/src/gallium/drivers/zink/ci/zink-lvp-skips.txt index 623645c36db..7226486adfb 100644 --- a/src/gallium/drivers/zink/ci/piglit-zink-lvp-skips.txt +++ b/src/gallium/drivers/zink/ci/zink-lvp-skips.txt @@ -2,16 +2,18 @@ # non-zero-length and not starting with '#', will regex match to # delete lines from the test list. Be careful. +KHR-GL32.texture_size_promotion.functional + +# this is just broken. +KHR-GL46.shader_ballot_tests.ShaderBallotBitmasks +KHR-GL46.shader_ballot_tests.ShaderBallotFunctionRead + # ignores copied from the old runner script spec@arb_map_buffer_alignment@arb_map_buffer_alignment-map-invalidate-range -glx@glx-make-current spec@arb_timer_query.* spec@arb_sample_shading@builtin-gl-sample-mask spec@glsl-1.30@execution@tex-miplevel-selection.* -# only supported if Piglit is using GLUT -spec@!opengl 1.1@windowoverlap - # This test doesn't even seem to exist, but piglit adds it to a group...? spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match @@ -28,4 +30,18 @@ spec@arb_compute_shader@local-id-explosion # I can't reproduce these crashes locally # even after running them in loops for 4+ hours, so disable for now -spec@arb_shader_texture_lod@execution@tex-miplevel-selection.* +.*tex-miplevel-selection.* + +# these are insanely long +KHR-GL46.copy_image.functional +KHR-GL46.texture_swizzle.smoke +KHR-GL46.texture_swizzle.functional + +# Kopper regression +glx@glx-tfp + +# These tests started hitting timeouts when we upgraded LLVM from v11 to 13 +spec@arb_texture_rg@fbo-blending-formats + +#these need format conversions that gallium doesn't implement yet +spec@arb_texture_buffer_object@formats.*arb.* diff --git a/src/gallium/drivers/zink/ci/zink-lvp-validation-settings.txt b/src/gallium/drivers/zink/ci/zink-lvp-validation-settings.txt new file mode 100644 index 00000000000..e3e3c5546df --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-lvp-validation-settings.txt @@ -0,0 +1,62 @@ +# Please include a comment with the log message and a testcase triggering each +# VUID at the bottom of the file. +khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiEXT-None-02699,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdPipelineBarrier2-shaderTileImageColorReadAccess-08718,VUID-VkGraphicsPipelineCreateInfo-flags-06482,VUID-vkCmdPipelineBarrier2-None-08719 +khronos_validation.report_flags = error +khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK +VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT +khronos_validation.printf_buffer_size = 40960 +khronos_validation.printf_to_stdout = true +khronos_validation.log_filename = stdout + +# VUID-VkPhysicalDeviceProperties2-pNext-pNext +# VUID-VkDeviceCreateInfo-pNext-pNext +# never error due to unrecognized extensions + + +# KHR-GL46.shader_image_load_store.basic-allTargets-atomic: +# [ VUID-vkCmdDrawMultiEXT-viewType-07752 ] Object 0: handle = +# 0x5581c500000000d5, type = VK_OBJECT_TYPE_DESCRIPTOR_SET; | MessageID = +# 0xacde5967 | vkCmdDrawMultiEXT: Descriptor set VkDescriptorSet +# 0x5581c500000000d5[] in binding #129 index 0 requires an image view of type +# VK_IMAGE_VIEW_TYPE_CUBE but got VkImageView 0x359e9300000000cb[] which is of +# type VK_IMAGE_VIEW_TYPE_CUBE_ARRAY. The Vulkan spec states: If a VkImageView is +# accessed as a result of this command, then the image view's viewType must match +# the Dim operand of the OpTypeImage as described in Instruction/Sampler/Image +# View Validation +# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdDrawMultiEXT-viewType-07752) + +# KHR-Single-GL46.enhanced_layouts.varying_array_locations: +# +# [ VUID-RuntimeSpirv-Location-06272 ] Object 0: handle = +# 0x8f5f070000000095, type = VK_OBJECT_TYPE_SHADER_MODULE; | MessageID = +# 0xa3614f8b | Invalid Pipeline CreateInfo State: Geometry shader exceeds +# VkPhysicalDeviceLimits::maxGeometryInputComponents of 64 components by +# 1 components The Vulkan spec states: The sum of Location and the number +# of locations the variable it decorates consumes must be less than or +# equal to the value for the matching {ExecutionModel} defined in Shader +# Input and Output Locations +# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-RuntimeSpirv-Location-06272) + +# dEQP-GLES31.functional.separate_shader.random.69 +# +# UNASSIGNED-CoreValidation-Shader-MissingOutput(ERROR / SPEC): msgNum: 1086655814 - +# Validation Error: [ UNASSIGNED-CoreValidation-Shader-MissingOutput ] +# vkCreateGraphicsPipelines(): pCreateInfos[0] VK_SHADER_STAGE_FRAGMENT_BIT declared input at Location 0 +# Comonent 2 but it is not an Output declared in VK_SHADER_STAGE_VERTEX_BIT +# +# VUID-RuntimeSpirv-OpEntryPoint-08743(ERROR / SPEC): msgNum: -1986897773 - +# Validation Error: [ VUID-RuntimeSpirv-OpEntryPoint-08743 ] +# vkCreateGraphicsPipelines(): pCreateInfos[0] VK_SHADER_STAGE_FRAGMENT_BIT declared input at +# Location 1 Comonent 2 but it is not an Output declared in VK_SHADER_STAGE_VERTEX_BIT +# The Vulkan spec states: Any user-defined variables shared between the OpEntryPoint of two shader stages, +# and declared with Input as its {StorageClass} for the subsequent shader stage, +# must have all Location slots and Component words declared in the preceding shader stage's +# OpEntryPoint with Output as the {StorageClass} +# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-RuntimeSpirv-OpEntryPoint-08743) +# +# VVL bug https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/5735 + +# VVL missing support for KHR_dynamic_rendering_local_read +# VUID-vkCmdPipelineBarrier2-shaderTileImageColorReadAccess-08718 +# VUID-VkGraphicsPipelineCreateInfo-flags-06482 +# VUID-vkCmdPipelineBarrier2-None-08719 diff --git a/src/gallium/drivers/zink/ci/zink-nv-fails.txt b/src/gallium/drivers/zink/ci/zink-nv-fails.txt new file mode 100644 index 00000000000..ca151922eac --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-nv-fails.txt @@ -0,0 +1,2 @@ +KHR-GL46.sparse_texture2_tests.SparseTexture2Commitment,Fail +KHR-GL46.sparse_texture2_tests.SparseTexture2Lookup,Fail diff --git a/src/gallium/drivers/zink/ci/zink-nv-flakes.txt b/src/gallium/drivers/zink/ci/zink-nv-flakes.txt new file mode 100644 index 00000000000..98e94fdb9df --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-nv-flakes.txt @@ -0,0 +1,2 @@ +# ooms +dEQP-GLES31.functional.draw_indirect.compute_interop.large.drawelements_separate_grid_1200x1200_drawcount_1 diff --git a/src/gallium/drivers/zink/ci/zink-nv-skips.txt b/src/gallium/drivers/zink/ci/zink-nv-skips.txt new file mode 100644 index 00000000000..3ec8c63df51 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-nv-skips.txt @@ -0,0 +1,4 @@ +# these take forever +KHR-GL46.gpu_shader_fp64.fp64.max_uniform_components +KHR-GL46.texture_swizzle.smoke +KHR-GL46.copy_image.functional diff --git a/src/gallium/drivers/zink/ci/zink-nvk-fails.txt b/src/gallium/drivers/zink/ci/zink-nvk-fails.txt new file mode 100644 index 00000000000..6961c59b20f --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-nvk-fails.txt @@ -0,0 +1,902 @@ +# nvk sparse needs more testing +KHR-GL46.sparse_texture2_tests.SparseTexture2Commitment,Fail +KHR-GL46.sparse_texture2_tests.SparseTexture2Lookup,Fail +KHR-GL46.sparse_texture2_tests.UncommittedRegionsAccess,Fail +KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor,Fail +KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail + +# regressions ? +spec@arb_bindless_texture@execution@images@ubo-named-block,Fail +spec@arb_get_texture_sub_image@arb_get_texture_sub_image-get,Fail +spec@egl_ext_device_query@conformance,Fail + +dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bit_count_per_two_samples.multisample_rbo_4,Fail +dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bit_count_per_two_samples.multisample_rbo_8,Fail +dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bit_count_per_two_samples.multisample_texture_4,Fail +dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bit_count_per_two_samples.multisample_texture_8,Fail +dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bits_unique_per_two_samples.multisample_texture_4,Fail +dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bits_unique_per_two_samples.multisample_texture_8,Fail + +glx@glx-multi-window-single-context,Fail +glx@glx-visuals-depth,Crash +glx@glx-visuals-stencil,Crash +glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail +glx@glx_arb_create_context_no_error@no error,Fail +glx@glx_arb_create_context_robustness@invalid reset notification strategy,Fail +glx@glx_ext_import_context@free context,Fail +glx@glx_ext_import_context@get context id,Fail +glx@glx_ext_import_context@get current display,Fail +glx@glx_ext_import_context@import context- multi process,Fail +glx@glx_ext_import_context@import context- single process,Fail +glx@glx_ext_import_context@imported context has same context id,Fail +glx@glx_ext_import_context@make current- multi process,Fail +glx@glx_ext_import_context@make current- single process,Fail +glx@glx_ext_import_context@query context info,Fail +glx@glx_ext_no_config_context@no fbconfig,Fail +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail +spec@!opengl 1.0@rasterpos,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail +spec@!opengl 1.1@line-smooth-coverage,Fail +spec@!opengl 1.1@line-smooth-stipple,Fail +spec@!opengl 1.1@max-texture-size,Crash +spec@!opengl 1.1@polygon-mode,Fail +spec@!opengl 1.1@polygon-mode-facing,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail +spec@!opengl 1.2@copyteximage 3d,Fail +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail +spec@!opengl 2.1@polygon-stipple-fs,Fail +spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail +spec@!opengl es 3.0@gles-3.0-transform-feedback-uniform-buffer-object,Fail +spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-mixed-const-non-const-uniform-index,Fail +spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-mixed-const-non-const-uniform-index2,Fail +spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-non-const-uniform-index,Fail +spec@arb_arrays_of_arrays@execution@ubo@fs-const,Fail +spec@arb_arrays_of_arrays@execution@ubo@fs-const-explicit-binding,Fail +spec@arb_arrays_of_arrays@execution@ubo@fs-mixed-const-nonconst,Fail +spec@arb_arrays_of_arrays@execution@ubo@fs-nonconst,Fail +spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash +spec@arb_bindless_texture@execution@samplers@ubo-binding-samplers-conflict,Fail +spec@arb_buffer_storage@bufferstorage-persistent draw,Fail +spec@arb_buffer_storage@bufferstorage-persistent read,Fail +spec@arb_buffer_storage@bufferstorage-persistent_gles3 draw,Fail +spec@arb_buffer_storage@bufferstorage-persistent_gles3 read,Fail +spec@arb_fragment_layer_viewport@layer-gs-writes-out-of-range,Fail +spec@arb_fragment_layer_viewport@viewport-gs-writes-out-of-range,Fail +spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail +spec@arb_gl_spirv@execution@ubo@aoa-2,Fail +spec@arb_gl_spirv@execution@ubo@array-complex,Fail +spec@arb_gl_spirv@execution@ubo@array-different-array-stride-ubo,Fail +spec@arb_gl_spirv@execution@ubo@array-inside-ubo,Fail +spec@arb_gl_spirv@execution@ubo@array-inside-ubo-copy,Fail +spec@arb_gl_spirv@execution@ubo@array-of-arrays-inside-ubo,Fail +spec@arb_gl_spirv@execution@ubo@matrix@complex,Fail +spec@arb_gl_spirv@execution@ubo@simple,Fail +spec@arb_gl_spirv@execution@ubo@two-stages,Fail +spec@arb_gl_spirv@linker@uniform@multisampler,Crash +spec@arb_gl_spirv@linker@uniform@multisampler-array,Crash +spec@arb_gpu_shader5@execution@ubo_array_indexing@fs-array-nonconst,Fail +spec@arb_gpu_shader5@execution@ubo_array_indexing@gs-array-nonconst,Fail +spec@arb_gpu_shader5@execution@ubo_array_indexing@vs-array-nonconst,Fail +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@fs-array-copy,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@fs-double-array-const-index,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@fs-double-array-variable-index,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@fs-doubles,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@fs-doubles-float-mixed,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@fs-nested-struct,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-direct-1,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-load.indirect.1,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-load.indirect.2,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-load.indirect.3,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@gs-array-copy,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@gs-double-array-const-index,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@gs-double-array-variable-index,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@gs-doubles-float-mixed,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@gs-nested-struct,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@vs-array-copy,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@vs-double-array-const-index,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@vs-double-array-variable-index,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@vs-doubles,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@vs-doubles-float-mixed,Fail +spec@arb_gpu_shader_fp64@uniform_buffers@vs-nested-struct,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail +spec@arb_sample_locations@test,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_shading@arb_sample_shading-builtin-gl-sample-mask-mrt-alpha-to-coverage,Fail +spec@arb_sample_shading@samplemask 2,Fail +spec@arb_sample_shading@samplemask 2 all,Fail +spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@0.500000 partition,Fail +spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@1.000000 partition,Fail +spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms partition,Fail +spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@0.500000 partition,Fail +spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@1.000000 partition,Fail +spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms partition,Fail +spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4,Fail +spec@arb_sample_shading@samplemask 4 all,Fail +spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@0.250000 partition,Fail +spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@0.500000 partition,Fail +spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@1.000000 partition,Fail +spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms partition,Fail +spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.250000 partition,Fail +spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.500000 partition,Fail +spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@1.000000 partition,Fail +spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms partition,Fail +spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6,Fail +spec@arb_sample_shading@samplemask 6 all,Fail +spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.125000 partition,Fail +spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.250000 partition,Fail +spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.500000 partition,Fail +spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@1.000000 partition,Fail +spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@noms partition,Fail +spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.125000 partition,Fail +spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.250000 partition,Fail +spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.500000 partition,Fail +spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@1.000000 partition,Fail +spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@noms partition,Fail +spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8,Fail +spec@arb_sample_shading@samplemask 8 all,Fail +spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.125000 partition,Fail +spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.250000 partition,Fail +spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.500000 partition,Fail +spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@1.000000 partition,Fail +spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@noms partition,Fail +spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.125000 partition,Fail +spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.250000 partition,Fail +spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.500000 partition,Fail +spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@1.000000 partition,Fail +spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@noms partition,Fail +spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail +spec@arb_shader_image_load_store@atomicity,Crash +spec@arb_shader_image_load_store@max-size,Fail +spec@arb_shader_image_load_store@max-size@image3D max size test/8x8x16384x1,Fail +spec@arb_shader_image_load_store@semantics,Crash +spec@arb_shader_image_size@builtin,Crash +spec@arb_shader_image_size@builtin@rgba32f/Compute/image3D max size test/8x8x16384x1,Fail +spec@arb_shader_image_size@builtin@rgba32f/Fragment/image3D max size test/8x8x16384x1,Fail +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail +spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail +spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail +spec@arb_sync@clientwaitsync-timeout,Fail +spec@arb_sync@clientwaitsync-timeout@read,Fail +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-patch-input-array-float-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-patch-input-array-vec2-index-invalid-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-patch-input-array-vec2-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-patch-input-array-vec3-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-patch-input-array-vec4-index-rd,Crash +spec@arb_texture_buffer_object@formats (fs- arb),Crash +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8_ALPHA8,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb),Crash +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8_ALPHA8,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail +spec@arb_texture_cube_map_array@texsubimage cube_map_array,Fail +spec@arb_texture_float@multisample-formats 2 gl_arb_texture_float,Fail +spec@arb_texture_float@multisample-formats 4 gl_arb_texture_float,Fail +spec@arb_texture_float@multisample-formats 6 gl_arb_texture_float,Fail +spec@arb_texture_float@multisample-formats 8 gl_arb_texture_float,Fail +spec@arb_texture_rg@multisample-formats 2 gl_arb_texture_rg,Fail +spec@arb_texture_rg@multisample-formats 2 gl_arb_texture_rg-float,Fail +spec@arb_texture_rg@multisample-formats 4 gl_arb_texture_rg,Fail +spec@arb_texture_rg@multisample-formats 4 gl_arb_texture_rg-float,Fail +spec@arb_texture_rg@multisample-formats 6 gl_arb_texture_rg,Fail +spec@arb_texture_rg@multisample-formats 6 gl_arb_texture_rg-float,Fail +spec@arb_texture_rg@multisample-formats 8 gl_arb_texture_rg,Fail +spec@arb_texture_rg@multisample-formats 8 gl_arb_texture_rg-float,Fail +spec@arb_timer_query@query gl_timestamp,Fail +spec@arb_uniform_buffer_object@2-buffers-bug,Fail +spec@arb_uniform_buffer_object@execution@fs-array-of-structs-std140-indirect,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-array-array-struct-array,Fail +spec@arb_uniform_buffer_object@execution@shared-array-struct-array-struct,Fail +spec@arb_uniform_buffer_object@execution@shared-array-struct-struct,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat2x2-and-column_major-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat2x3-and-column_major-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat2x4-and-column_major-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat3x2-and-column_major-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat3x3-and-column_major-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat3x4-and-column_major-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat4x2-and-column_major-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat4x3-and-column_major-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat4x4-and-column_major-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-mat2x2-and-column_major-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-mat2x3-and-column_major-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-mat2x4-and-column_major-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-mat3x2-and-column_major-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-mat3x3-and-column_major-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-mat3x4-and-column_major-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-mat4x2-and-column_major-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-mat4x3-and-column_major-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-mat4x4-and-column_major-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@shared-struct-array-array-array,Fail +spec@arb_uniform_buffer_object@execution@shared-struct-array-array-array-struct,Fail +spec@arb_uniform_buffer_object@execution@shared-struct-array-array-struct,Fail +spec@arb_uniform_buffer_object@execution@shared-struct-array-struct,Fail +spec@arb_uniform_buffer_object@execution@shared-struct-struct,Fail +spec@arb_uniform_buffer_object@execution@shared-struct-struct-struct,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-array-array-struct-array,Fail +spec@arb_uniform_buffer_object@execution@std140-array-struct-array-struct,Fail +spec@arb_uniform_buffer_object@execution@std140-array-struct-struct,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat2x2-and-column_major-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat2x3-and-column_major-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat2x4-and-column_major-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat3x2-and-column_major-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat3x3-and-column_major-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat3x4-and-column_major-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat4x2-and-column_major-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat4x3-and-column_major-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat4x4-and-column_major-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-mat2x2-and-column_major-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-mat2x3-and-column_major-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-mat2x4-and-column_major-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-mat3x2-and-column_major-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-mat3x3-and-column_major-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-mat3x4-and-column_major-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-mat4x2-and-column_major-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-mat4x3-and-column_major-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-mat4x4-and-column_major-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat2x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat2x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat2x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat3x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat3x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat3x4,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat4x2,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat4x3,Fail +spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat4x4,Fail +spec@arb_uniform_buffer_object@execution@std140-struct-array-array-array,Fail +spec@arb_uniform_buffer_object@execution@std140-struct-array-array-array-struct,Fail +spec@arb_uniform_buffer_object@execution@std140-struct-array-array-struct,Fail +spec@arb_uniform_buffer_object@execution@std140-struct-array-struct,Fail +spec@arb_uniform_buffer_object@execution@std140-struct-struct,Fail +spec@arb_uniform_buffer_object@execution@std140-struct-struct-struct,Fail +spec@egl 1.4@egl-copy-buffers,Crash +spec@egl_chromium_sync_control@conformance,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail +spec@egl_khr_gl_colorspace@linear,Crash +spec@egl_khr_gl_colorspace@srgb,Crash +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail +spec@egl_nok_texture_from_pixmap@basic,Crash +spec@ext_direct_state_access@named-buffers 30,Fail +spec@ext_direct_state_access@named-buffers 30@FlushMappedNamedBufferRangeEXT,Fail +spec@ext_external_objects@vk-image-overwrite,Crash +spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail +spec@ext_framebuffer_multisample@accuracy 2 srgb small depthstencil,Fail +spec@ext_framebuffer_multisample@accuracy 2 srgb small depthstencil linear,Fail +spec@ext_framebuffer_multisample@accuracy 4 srgb small depthstencil,Fail +spec@ext_framebuffer_multisample@accuracy 4 srgb small depthstencil linear,Fail +spec@ext_framebuffer_multisample@accuracy 6 srgb small depthstencil,Fail +spec@ext_framebuffer_multisample@accuracy 6 srgb small depthstencil linear,Fail +spec@ext_framebuffer_multisample@accuracy 8 srgb small depthstencil,Fail +spec@ext_framebuffer_multisample@accuracy 8 srgb small depthstencil linear,Fail +spec@ext_framebuffer_multisample@accuracy all_samples srgb small depthstencil,Fail +spec@ext_framebuffer_multisample@accuracy all_samples srgb small depthstencil linear,Fail +spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail +spec@ext_framebuffer_multisample@enable-flag,Fail +spec@ext_framebuffer_multisample@formats 2,Fail +spec@ext_framebuffer_multisample@formats 4,Fail +spec@ext_framebuffer_multisample@formats 6,Fail +spec@ext_framebuffer_multisample@formats 8,Fail +spec@ext_framebuffer_multisample@formats all_samples,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail +spec@ext_packed_float@query-rgba-signed-components,Fail +spec@ext_texture_array@texsubimage array,Fail +spec@ext_texture_env_combine@texture-env-combine,Timeout +spec@ext_texture_snorm@multisample-formats 2 gl_ext_texture_snorm,Fail +spec@ext_texture_snorm@multisample-formats 4 gl_ext_texture_snorm,Fail +spec@ext_texture_snorm@multisample-formats 6 gl_ext_texture_snorm,Fail +spec@ext_texture_snorm@multisample-formats 8 gl_ext_texture_snorm,Fail +spec@ext_texture_srgb@multisample-formats 2 gl_ext_texture_srgb,Fail +spec@ext_texture_srgb@multisample-formats 4 gl_ext_texture_srgb,Fail +spec@ext_texture_srgb@multisample-formats 6 gl_ext_texture_srgb,Fail +spec@ext_texture_srgb@multisample-formats 8 gl_ext_texture_srgb,Fail +spec@ext_timer_query@time-elapsed,Fail +spec@ext_transform_feedback@immediate-reuse-uniform-buffer,Fail +spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail +spec@ext_transform_feedback@tessellation quads wireframe,Fail +spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash +spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash +spec@glsl-1.10@execution@samplers@glsl-fs-lots-of-tex,Fail +spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail +spec@glsl-1.40@uniform_buffer@fs-array-copy,Fail +spec@glsl-1.40@uniform_buffer@fs-bools,Fail +spec@glsl-1.40@uniform_buffer@fs-float-array-const-index,Fail +spec@glsl-1.40@uniform_buffer@fs-float-array-variable-index,Fail +spec@glsl-1.40@uniform_buffer@fs-floats,Fail +spec@glsl-1.40@uniform_buffer@fs-struct,Fail +spec@glsl-1.40@uniform_buffer@fs-struct-copy,Fail +spec@glsl-1.40@uniform_buffer@fs-struct-copy-complicated,Fail +spec@glsl-1.40@uniform_buffer@fs-struct-pad,Fail +spec@glsl-1.40@uniform_buffer@fs-two-members,Fail +spec@glsl-1.40@uniform_buffer@vs-array-copy,Fail +spec@glsl-1.40@uniform_buffer@vs-bools,Fail +spec@glsl-1.40@uniform_buffer@vs-float-array-const-index,Fail +spec@glsl-1.40@uniform_buffer@vs-float-array-variable-index,Fail +spec@glsl-1.40@uniform_buffer@vs-floats,Fail +spec@glsl-1.40@uniform_buffer@vs-struct,Fail +spec@glsl-1.40@uniform_buffer@vs-struct-copy,Fail +spec@glsl-1.40@uniform_buffer@vs-struct-copy-complicated,Fail +spec@glsl-1.40@uniform_buffer@vs-struct-pad,Fail +spec@glsl-1.40@uniform_buffer@vs-two-members,Fail +spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail +spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail +spec@glsl-1.50@uniform_buffer@gs-array-copy,Fail +spec@glsl-1.50@uniform_buffer@gs-bools,Fail +spec@glsl-1.50@uniform_buffer@gs-float-array-const-index,Fail +spec@glsl-1.50@uniform_buffer@gs-float-array-variable-index,Fail +spec@glsl-1.50@uniform_buffer@gs-floats,Fail +spec@glsl-1.50@uniform_buffer@gs-struct,Fail +spec@glsl-1.50@uniform_buffer@gs-struct-copy,Fail +spec@glsl-1.50@uniform_buffer@gs-struct-copy-complicated,Fail +spec@glsl-1.50@uniform_buffer@gs-struct-pad,Fail +spec@glsl-1.50@uniform_buffer@gs-two-members,Fail +spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail +spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail +spec@oes_viewport_array@viewport-gs-writes-out-of-range,Fail diff --git a/src/gallium/drivers/zink/ci/zink-nvk-flakes.txt b/src/gallium/drivers/zink/ci/zink-nvk-flakes.txt new file mode 100644 index 00000000000..346817ceda7 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-nvk-flakes.txt @@ -0,0 +1,10 @@ +spec@arb_pixel_buffer_object@pbo-drawpixels +spec@arb_shader_image_load_store@semantics@imageLoad/Tessellation control shader/r8/image2D test +spec@arb_vertex_attrib_64bit@execution@vs_in@vs-input-double_dvec4-double_dmat3-position +spec@arb_vertex_attrib_64bit@execution@vs_in@vs-input-position-float_vec4_array3-double_dvec4 +spec@glsl-1.30@linker@interpolation-qualifiers@default-gl_backsecondarycolor-flat-gl_secondarycolor +spec@glsl-4.00@execution@built-in-functions@fs-op-add-dvec2-dvec2 +spec@glsl-4.20@execution@vs_in@vs-input-ubyte_uint-short_int-double_dvec3-position +spec@glsl-4.30@execution@built-in-functions@cs-lessthan-vec4-vec4 +spec@glsl-4.30@execution@built-in-functions@cs-op-assign-sub-mat3x2-mat3x2 +spec@glsl-4.30@execution@built-in-functions@cs-op-uplus-mat2 diff --git a/src/gallium/drivers/zink/ci/zink-nvk-skips.txt b/src/gallium/drivers/zink/ci/zink-nvk-skips.txt new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-nvk-skips.txt diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi10-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-navi10-fails.txt new file mode 100644 index 00000000000..53c26e072e9 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-navi10-fails.txt @@ -0,0 +1,183 @@ +# kopper +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail + +# #6115 +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash + + +glx@extension string sanity,Fail + +# #6322 +spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail + +spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash + +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail +spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail +spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 6,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 8,Fail +spec@arb_sample_shading@interpolate-at-sample-position 2,Fail +spec@arb_sample_shading@interpolate-at-sample-position 4,Fail +spec@arb_sample_shading@interpolate-at-sample-position 6,Fail +spec@arb_sample_shading@interpolate-at-sample-position 8,Fail + +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail + +spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail +spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail + +spec@egl 1.4@eglterminate then unbind context,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail +spec@egl_khr_surfaceless_context@viewport,Fail + +spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail +spec@ext_packed_float@query-rgba-signed-components,Fail + +spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash +spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash + +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-clamp-z,Fail + +spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail + +spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail +spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail +spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail + +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail +spec@!opengl 1.1@polygon-mode-facing,Fail +spec@!opengl 1.1@polygon-mode,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.5@depth-tex-compare,Fail + +spec@!opengl 2.0@vs-point_size-zero,Fail +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail +spec@!opengl 2.1@polygon-stipple-fs,Fail +spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail + + +# Introduced with the uprev of piglit (70ce1dcacc92 - "ci: Update piglit with s3 support") +spec@egl 1.4@egl-ext_egl_image_storage,Fail + +# Introduced by a8d2b288eee3 ("ci/piglit: 2023-01-19 uprev") +spec@!opengl 1.1@line-smooth-stipple,Fail + +spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail +spec@ext_transform_feedback@tessellation quads wireframe,Fail + +# Regressed between 1080ff39717b92b99afcf51283bec3994deae376..ef01a9cf3b465889fe8084732264dad0580270c3 +spec@arb_sample_shading@samplemask 2,Fail +spec@arb_sample_shading@samplemask 2 all,Fail +spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms partition,Fail +spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms partition,Fail +spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4,Fail +spec@arb_sample_shading@samplemask 4 all,Fail +spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms partition,Fail +spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms partition,Fail +spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6,Fail +spec@arb_sample_shading@samplemask 6 all,Fail +spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@noms partition,Fail +spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@noms partition,Fail +spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8,Fail +spec@arb_sample_shading@samplemask 8 all,Fail +spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@noms partition,Fail +spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@noms partition,Fail +spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail +spec@ext_framebuffer_multisample@enable-flag,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail + +# Polygon smoothing isn't supported in Vulkan. +spec@!opengl 1.0@gl-1.0-polygon-line-aa,Fail + +# Regression noticed in https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/891104 +spec@arb_viewport_array@display-list,Fail diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi10-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-navi10-flakes.txt new file mode 100644 index 00000000000..7642429cce0 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-navi10-flakes.txt @@ -0,0 +1,30 @@ +KHR-GL46.packed_pixels.varied_rectangle.rgb16 +dEQP-GLES2.functional.texture.filtering.cube.nearest_linear_clamp_rgba4444_pot +dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.srgb8_alpha8_astc_10x5_khr_rgba32i.texture2d_array_to_texture2d_array +dEQP-GLES31.functional.texture.filtering.cube_array.combinations.nearest_nearest_repeat_repeat +spec@arb_enhanced_layouts@execution@component-layout@sso-vs-gs-fs-array-interleave +spec@arb_fragment_shader_interlock@arb_fragment_shader_interlock-image-load-store +spec@arb_texture_cube_map@copyteximage cube samples=6 +spec@arb_texture_cube_map@copyteximage cube samples=8 +spec@arb_texture_rectangle@copyteximage rect samples=6 +spec@arb_texture_rectangle@copyteximage rect samples=8 +spec@arb_timer_query@timestamp-get +spec@ext_texture_array@copyteximage 1d_array samples=6 +spec@ext_texture_array@copyteximage 1d_array samples=8 +spec@ext_texture_array@copyteximage 2d_array samples=6 +spec@ext_texture_array@copyteximage 2d_array samples=8 +spec@ext_transform_feedback@max-varyings +spec@ext_transform_feedback@max-varyings@max-varying-arrays-of-arrays +spec@glsl-1.50@execution@geometry@point-size-out +spec@!opengl 1.0@rasterpos +spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked +spec@!opengl 1.0@rasterpos@glsl_vs_tes_linke + +# Marked as flake because it passes with ESO but crashes with pipelines. +spec@arb_tessellation_shader@arb_tessellation_shader-tes-gs-max-output -small -scan 1 50 + +# Nightly run expectations update +dEQP-GLES31.functional.copy_image.mixed.viewclass_64_bits_mixed.signed_r11_eac_rgba16ui.cubemap_to_texture3d +KHR-GL46.packed_pixels.varied_rectangle.rgb10_a2ui +spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec3-index-wr-before-barrier + diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi10-skips.txt b/src/gallium/drivers/zink/ci/zink-radv-navi10-skips.txt new file mode 100644 index 00000000000..4ca4f66b281 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-navi10-skips.txt @@ -0,0 +1,64 @@ +ext_texture_env.* +spec@arb_shader_image_load_store.invalid +spec@arb_shader_image_load_store.max-size +spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64 +.*@execution@vs_in.* + +# Kopper regression +glx@glx-tfp + +spec@egl_nok_texture_from_pixmap@basic + +# Exclude GLX tests. +glx@glx.* + +# Tests below timeout most of the time. +KHR-GL46.copy_image.functional +KHR-GL46.texture_swizzle.smoke +KHR-GL46.texture_swizzle.functional +KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize + +#these need format conversions that gallium doesn't implement yet +spec@arb_texture_buffer_object@formats.*arb.* + +# These randomly hang. +spec@ext_external_objects@.* + +# These are too random. +spec@arb_shader_clock@execution@clock.* + +# These run OOM and migth hang? +spec@arb_texture_buffer_object@texture-buffer-size-clamp.* +spec@!opengl 1.1@streaming-texture-leak +spec@arb_uniform_buffer_object@maxuniformblocksize.* + +# implicit modifier selection not currently supported +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_vyuy +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88 + +# This subset hangs since a077c14f150 ("zink: Fix resizable BAR detection logic") +# for very weird reasons, skip it completely until the issue is properly fixed. +spec@arb_shader_image_load_store.* diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt new file mode 100644 index 00000000000..0c6ae03c09d --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt @@ -0,0 +1,217 @@ +# kopper +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail + +# #6115 +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash + + +glx@extension string sanity,Fail + +# #6322 +spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail + +spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash + +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail +spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail +spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 6,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 8,Fail +spec@arb_sample_shading@interpolate-at-sample-position 2,Fail +spec@arb_sample_shading@interpolate-at-sample-position 4,Fail +spec@arb_sample_shading@interpolate-at-sample-position 6,Fail +spec@arb_sample_shading@interpolate-at-sample-position 8,Fail + +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail +spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail +spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail +spec@arb_tessellation_shader@arb_tessellation_shader-tes-gs-max-output -small -scan 1 50,Crash + +spec@egl 1.4@eglterminate then unbind context,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail +spec@egl_khr_surfaceless_context@viewport,Fail + +spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail +spec@ext_packed_float@query-rgba-signed-components,Fail + +spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash +spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash + +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-clamp-z,Fail + +spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail + +spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail +spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail +spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail + +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail +spec@!opengl 1.1@polygon-mode-facing,Fail +spec@!opengl 1.1@polygon-mode,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.5@depth-tex-compare,Fail + +spec@!opengl 2.0@vs-point_size-zero,Fail +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail +spec@!opengl 2.1@polygon-stipple-fs,Fail +spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail + + +# Introduced with the uprev of piglit (70ce1dcacc92 - "ci: Update piglit with s3 support") +spec@egl 1.4@egl-ext_egl_image_storage,Fail + +# Introduced by a8d2b288eee3 ("ci/piglit: 2023-01-19 uprev") +spec@!opengl 1.1@line-smooth-stipple,Fail + +# Delta over NAVI10 +spec@!opengl 1.1@depthstencil-default_fb-blit samples=4,Fail +spec@!opengl 1.1@depthstencil-default_fb-copypixels samples=6,Fail + +spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail +spec@ext_transform_feedback@tessellation quads wireframe,Fail + +# Regressed between 1080ff39717b92b99afcf51283bec3994deae376..ef01a9cf3b465889fe8084732264dad0580270c3 +spec@arb_sample_shading@samplemask 2,Fail +spec@arb_sample_shading@samplemask 2 all,Fail +spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms partition,Fail +spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms partition,Fail +spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4,Fail +spec@arb_sample_shading@samplemask 4 all,Fail +spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms partition,Fail +spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms partition,Fail +spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6,Fail +spec@arb_sample_shading@samplemask 6 all,Fail +spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@noms partition,Fail +spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@noms partition,Fail +spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8,Fail +spec@arb_sample_shading@samplemask 8 all,Fail +spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@noms partition,Fail +spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@noms partition,Fail +spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail +spec@ext_framebuffer_multisample@enable-flag,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail + +# Polygon smoothing isn't supported in Vulkan. +spec@!opengl 1.0@gl-1.0-polygon-line-aa,Fail + +# Regression noticed in https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/891104 +spec@arb_viewport_array@display-list,Fail + +dEQP-GLES3.functional.shaders.precision.uint.highp_div_fragment,Fail +spec@arb_sample_shading@arb_sample_shading-builtin-gl-sample-mask-mrt-alpha-to-coverage,Fail + +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_vyuy,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi31-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-navi31-flakes.txt new file mode 100644 index 00000000000..aadc1208612 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-navi31-flakes.txt @@ -0,0 +1,44 @@ +dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_equal_spacing_ccw_point_mode +dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_equal_spacing_cw_point_mode +dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_fractional_even_spacing_ccw_point_mode +dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_fractional_even_spacing_cw_point_mode +dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_fractional_odd_spacing_ccw_point_mode +dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_fractional_odd_spacing_cw_point_mode +KHR-GL46.geometry_shader.primitive_counter.lines_to_line_strip +KHR-GL46.geometry_shader.primitive_counter.lines_to_line_strip_rp +KHR-GL46.geometry_shader.primitive_counter.points_to_line_strip +KHR-GL46.geometry_shader.primitive_counter.points_to_line_strip_rp +KHR-GL46.geometry_shader.primitive_counter.triangles_to_line_strip_rp +spec@arb_depth_texture@fbo-depth-gl_depth_component16-blit +spec@arb_fragment_shader_interlock@arb_fragment_shader_interlock-image-load-store +spec@arb_sample_shading@arb_sample_shading-builtin-gl-sample-mask-mrt-alpha-to-coverage +spec@arb_shader_image_load_store@coherency +spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Fragment shader/'coherent' qualifier coherency test/256x256 +spec@arb_shader_image_load_store@shader-mem-barrier +spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'coherent' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'coherent' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'volatile' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'volatile' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'coherent' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'coherent' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'volatile' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'volatile' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'coherent' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'coherent' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'volatile' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'volatile' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'coherent' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'coherent' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'volatile' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'volatile' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'coherent' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'coherent' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'volatile' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'volatile' qualifier memory barrier test/modulus=64 +spec@arb_timer_query@timestamp-get +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv +spec@glsl-1.50@execution@geometry@point-size-out +spec@glsl-1.50@execution@redeclare-pervertex-out-subset-gs +spec@!opengl 1.0@rasterpos +spec@!opengl 1.1@depthstencil-default_fb-blit samples=4 +spec@!opengl 1.1@depthstencil-default_fb-copypixels samples=6 diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi31-skips.txt b/src/gallium/drivers/zink/ci/zink-radv-navi31-skips.txt new file mode 100644 index 00000000000..47781bd87ae --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-navi31-skips.txt @@ -0,0 +1,34 @@ +ext_texture_env.* +spec@arb_shader_image_load_store.invalid +spec@arb_shader_image_load_store.max-size +spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64 +.*@execution@vs_in.* + +# Kopper regression +glx@glx-tfp + +spec@egl_nok_texture_from_pixmap@basic + + +# Exclude GLX tests. +glx@glx.* + +# Tests below timeout most of the time. +KHR-GL46.copy_image.functional +KHR-GL46.texture_swizzle.smoke +KHR-GL46.texture_swizzle.functional +KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize + +#these need format conversions that gallium doesn't implement yet +spec@arb_texture_buffer_object@formats.*arb.* + +# These randomly hang. +spec@ext_external_objects@.* + +# These are too random. +spec@arb_shader_clock@execution@clock.* + +# These run OOM and migth hang? +spec@arb_texture_buffer_object@texture-buffer-size-clamp.* +spec@!opengl 1.1@streaming-texture-leak +spec@arb_uniform_buffer_object@maxuniformblocksize.* diff --git a/src/gallium/drivers/zink/ci/zink-radv-polaris10-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-polaris10-fails.txt new file mode 100644 index 00000000000..324c7132a0e --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-polaris10-fails.txt @@ -0,0 +1,610 @@ +# kopper +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail + +# #6115 +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash + + +glx@extension string sanity,Fail + +# #6322 +spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail + +spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash + +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail +spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail +spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 6,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 8,Fail +spec@arb_sample_shading@interpolate-at-sample-position 2,Fail +spec@arb_sample_shading@interpolate-at-sample-position 4,Fail +spec@arb_sample_shading@interpolate-at-sample-position 6,Fail +spec@arb_sample_shading@interpolate-at-sample-position 8,Fail + +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail + +spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail +spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail + +spec@egl 1.4@eglterminate then unbind context,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail +spec@egl_khr_surfaceless_context@viewport,Fail + +spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_vyuy,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu,Fail + +# implicit modifier selection is broken with radeonsi display server +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail + +spec@ext_packed_float@query-rgba-signed-components,Fail + +spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash +spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash + +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-clamp-z,Fail + +spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail + +spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail +spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail +spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail + +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail +spec@!opengl 1.1@polygon-mode-facing,Fail +spec@!opengl 1.1@polygon-mode,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.5@depth-tex-compare,Fail + +spec@!opengl 2.0@vs-point_size-zero,Fail +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail +spec@!opengl 2.1@polygon-stipple-fs,Fail +spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail + +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail + +# Introduced with the uprev of piglit (70ce1dcacc92 - "ci: Update piglit with s3 support") +spec@egl 1.4@egl-ext_egl_image_storage,Fail + +# Introduced by a8d2b288eee3 ("ci/piglit: 2023-01-19 uprev") +spec@!opengl 1.1@line-smooth-stipple,Fail + +spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail +spec@ext_transform_feedback@tessellation quads wireframe,Fail + +# Regressed between 1080ff39717b92b99afcf51283bec3994deae376..ef01a9cf3b465889fe8084732264dad0580270c3 +spec@arb_sample_shading@samplemask 2,Fail +spec@arb_sample_shading@samplemask 2 all,Fail +spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms partition,Fail +spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms partition,Fail +spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4,Fail +spec@arb_sample_shading@samplemask 4 all,Fail +spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms partition,Fail +spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms partition,Fail +spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6,Fail +spec@arb_sample_shading@samplemask 6 all,Fail +spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@noms partition,Fail +spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@noms partition,Fail +spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8,Fail +spec@arb_sample_shading@samplemask 8 all,Fail +spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@noms partition,Fail +spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@noms partition,Fail +spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail +spec@ext_framebuffer_multisample@enable-flag,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail + +# Polygon smoothing isn't supported in Vulkan. +spec@!opengl 1.0@gl-1.0-polygon-line-aa,Fail + +# Regression noticed in https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/891104 +spec@arb_viewport_array@display-list,Fail + +KHR-GL46.geometry_shader.rendering.rendering.triangles_with_adjacency_input_line_strip_output_triangle_strip_adjacency_drawcall,Fail +KHR-GL46.geometry_shader.rendering.rendering.triangles_with_adjacency_input_triangle_strip_output_triangle_strip_adjacency_drawcall,Fail +KHR-GL46.texture_size_promotion.functional,Fail +dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.greater_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.greater_or_equal_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.less_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.less_or_equal_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d.nearest_mipmap_nearest.greater_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d.nearest_mipmap_nearest.greater_or_equal_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d.nearest_mipmap_nearest.less_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d.nearest_mipmap_nearest.less_or_equal_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.greater_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_nearest.greater_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_nearest.greater_or_equal_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_nearest.less_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_nearest.less_or_equal_depth24_stencil8,Fail +dEQP-GLES3.functional.texture.specification.teximage2d_depth.depth24_stencil8,Fail +dEQP-GLES3.functional.texture.specification.teximage2d_depth.depth32f_stencil8,Fail +dEQP-GLES3.functional.texture.specification.teximage3d_depth.depth24_stencil8_2d_array,Fail +dEQP-GLES3.functional.texture.specification.teximage3d_depth.depth32f_stencil8_2d_array,Fail +dEQP-GLES3.functional.texture.specification.texstorage2d.format.depth24_stencil8_2d,Fail +dEQP-GLES3.functional.texture.specification.texstorage2d.format.depth24_stencil8_cube,Fail +dEQP-GLES3.functional.texture.specification.texstorage2d.format.depth32f_stencil8_2d,Fail +dEQP-GLES3.functional.texture.specification.texstorage2d.format.depth32f_stencil8_cube,Fail +dEQP-GLES3.functional.texture.specification.texstorage3d.format.depth24_stencil8_2d_array,Fail +dEQP-GLES3.functional.texture.specification.texstorage3d.format.depth32f_stencil8_2d_array,Fail +dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth24_stencil8,Fail +dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth32f_stencil8,Fail +dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_2d_array,Fail +dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth32f_stencil8_2d_array,Fail +dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x15,Fail +dEQP-GLES31.functional.fbo.no_attachments.npot_size.1x1,Fail +dEQP-GLES31.functional.fbo.no_attachments.npot_size.3x3,Fail +dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth24_stencil8_cube_array,Fail +dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth32f_stencil8_cube_array,Fail +dEQP-GLES31.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_cube_array,Fail + +spec@!opengl 1.1@getteximage-depth,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D-GL_DEPTH_COMPONENT,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D-GL_DEPTH_COMPONENT16,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D-GL_DEPTH_COMPONENT24,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D-GL_DEPTH_COMPONENT32,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT16,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT24,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT32,Fail +spec@!opengl 1.1@texwrap formats bordercolor,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_ALPHA12- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_ALPHA16- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_ALPHA4- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_ALPHA8- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_R3_G3_B2- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB10- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB10_A2- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB12- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB16- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB4- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB5- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB5_A1- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB8- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA12- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA16- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA4- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA8- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_ALPHA12- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_ALPHA16- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_ALPHA4- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_ALPHA8- border color only,Fail +spec@!opengl 3.2@gl-3.2-adj-prims cull-back pv-last,Fail +spec@!opengl 3.2@gl-3.2-adj-prims cull-front pv-last,Fail +spec@!opengl 3.2@gl-3.2-adj-prims line cull-back pv-last,Fail +spec@!opengl 3.2@gl-3.2-adj-prims line cull-front pv-last,Fail +spec@!opengl 3.2@gl-3.2-adj-prims pv-last,Fail +spec@arb_depth_buffer_float@fbo-clear-formats,Fail +spec@arb_depth_buffer_float@fbo-clear-formats@GL_DEPTH32F_STENCIL8,Fail +spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled,Fail +spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled@GL_RGB565- swizzled- border color only,Fail +spec@arb_sample_locations@test,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: true,Fail +spec@arb_texture_compression@texwrap formats bordercolor-swizzled,Fail +spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGB- swizzled- border color only,Fail +spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA- swizzled- border color only,Fail +spec@arb_texture_compression_bptc@texwrap formats bordercolor-swizzled,Fail +spec@arb_texture_compression_bptc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA_BPTC_UNORM- swizzled- border color only,Fail +spec@arb_texture_compression_bptc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT- swizzled- border color only,Fail +spec@arb_texture_compression_bptc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT- swizzled- border color only,Fail +spec@arb_texture_compression_bptc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_ALPHA16F_ARB- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_ALPHA32F_ARB- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGB16F- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGB32F- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGBA16F- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGBA32F- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor@GL_ALPHA16F_ARB- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor@GL_ALPHA32F_ARB- border color only,Fail +spec@arb_texture_rg@texwrap formats bordercolor-swizzled,Fail +spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_R16- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_R8- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_RG16- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_RG8- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled,Fail +spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_R16F- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_R32F- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_RG16F- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_RG32F- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R16I- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R16UI- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R32I- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R32UI- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R8I- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R8UI- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG16I- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG16UI- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG32I- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG32UI- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG8I- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG8UI- swizzled- border color only,Fail +spec@arb_texture_rgb10_a2ui@texwrap formats bordercolor-swizzled,Fail +spec@arb_texture_rgb10_a2ui@texwrap formats bordercolor-swizzled@GL_RGB10_A2UI- swizzled- border color only,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export-tex,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail +spec@ext_packed_depth_stencil@fbo-clear-formats,Fail +spec@ext_packed_depth_stencil@fbo-clear-formats@GL_DEPTH24_STENCIL8,Fail +spec@ext_packed_depth_stencil@fbo-clear-formats@GL_DEPTH_STENCIL,Fail +spec@ext_packed_float@texwrap formats bordercolor-swizzled,Fail +spec@ext_packed_float@texwrap formats bordercolor-swizzled@GL_R11F_G11F_B10F- swizzled- border color only,Fail +spec@ext_texture_compression_rgtc@texwrap formats bordercolor-swizzled,Fail +spec@ext_texture_compression_rgtc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RED_RGTC1- swizzled- border color only,Fail +spec@ext_texture_compression_rgtc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RG_RGTC2- swizzled- border color only,Fail +spec@ext_texture_compression_rgtc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_SIGNED_RED_RGTC1- swizzled- border color only,Fail +spec@ext_texture_compression_rgtc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_SIGNED_RG_RGTC2- swizzled- border color only,Fail +spec@ext_texture_compression_s3tc@texwrap formats bordercolor-swizzled,Fail +spec@ext_texture_compression_s3tc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT- swizzled- border color only,Fail +spec@ext_texture_compression_s3tc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT- swizzled- border color only,Fail +spec@ext_texture_compression_s3tc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT- swizzled- border color only,Fail +spec@ext_texture_compression_s3tc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGB_S3TC_DXT1_EXT- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB16I- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB16UI- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB32I- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB32UI- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB8I- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB8UI- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA16I- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA16UI- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA32I- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA32UI- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA8I- swizzled- border color only,Fail +spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA8UI- swizzled- border color only,Fail +spec@ext_texture_shared_exponent@texwrap formats bordercolor-swizzled,Fail +spec@ext_texture_shared_exponent@texwrap formats bordercolor-swizzled@GL_RGB9_E5- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_R16_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_R8_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RG16_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RG8_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGB16_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGB8_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGBA16_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGBA8_SNORM- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor-swizzled,Fail +spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8_ALPHA8- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT- swizzled- border color only,Fail +spec@glsl-1.50@execution@geometry@primitive-types gl_triangle_strip_adjacency,Fail +spec@glsl-1.50@execution@geometry@tri-strip-ordering-with-prim-restart gl_triangle_strip_adjacency ffs,Fail +spec@glsl-1.50@execution@geometry@tri-strip-ordering-with-prim-restart gl_triangle_strip_adjacency other,Fail + +# Regressions from 1f4662cc4ed0c5b87479eb71e53a1320ab1b414b +spec@ext_texture_array@copyteximage 1d_array,Fail +spec@ext_texture_array@copyteximage 1d_array samples=2,Fail +spec@ext_texture_array@copyteximage 1d_array samples=4,Fail +spec@ext_texture_array@copyteximage 1d_array samples=6,Fail +spec@ext_texture_array@copyteximage 1d_array samples=8,Fail diff --git a/src/gallium/drivers/zink/ci/zink-radv-polaris10-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-polaris10-flakes.txt new file mode 100644 index 00000000000..efc6bf8df8f --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-polaris10-flakes.txt @@ -0,0 +1,33 @@ +dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x15 +dEQP-GLES31.functional.fbo.no_attachments.npot_size.1x1 +dEQP-GLES31.functional.fbo.no_attachments.npot_size.3x3 +dEQP-GLES31.functional.fbo.no_attachments.size.16x16 +dEQP-GLES31.functional.texture.specification.teximage3d_depth.depth24_stencil8_cube_array +dEQP-GLES31.functional.texture.specification.teximage3d_depth.depth32f_stencil8_cube_array +dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth24_stencil8_cube_array +dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth32f_stencil8_cube_array +dEQP-GLES31.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_cube_array +dEQP-GLES31.functional.texture.specification.texsubimage3d_depth.depth32f_stencil8_cube_array +dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.equal_depth24_stencil8 +dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.greater_depth24_stencil8 +dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.greater_or_equal_depth24_stencil8 +dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.less_depth24_stencil8 +dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.less_or_equal_depth24_stencil8 +dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_nearest.not_equal_depth24_stencil8 +dEQP-GLES3.functional.texture.specification.texstorage2d.format.depth24_stencil8_cube +spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@glScissor +spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@glViewport +spec@glsl-1.50@execution@geometry@point-size-out +spec@glsl-1.50@execution@redeclare-pervertex-out-subset-gs +spec@!opengl 1.0@rasterpos + +# Updated by ci-collate, found in this job run: https://gitlab.freedesktop.org/mesa/mesa/-/jobs/56164998 +spec@!opengl 1.1@depthstencil-default_fb-blit + +# Updated by ci-collate, found in this job run: https://gitlab.freedesktop.org/mesa/mesa/-/jobs/56260518 +dEQP-GLES31.functional.fbo.no_attachments.npot_size.17x17 + +# This test is flaking: +# Fail: https://gitlab.freedesktop.org/mesa/mesa/-/jobs/56268639 +# Success: https://gitlab.freedesktop.org/mesa/mesa/-/jobs/56260518 +dEQP-GLES31.functional.fbo.no_attachments.npot_size.31x31 diff --git a/src/gallium/drivers/zink/ci/zink-radv-polaris10-skips.txt b/src/gallium/drivers/zink/ci/zink-radv-polaris10-skips.txt new file mode 100644 index 00000000000..dafa035046d --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-polaris10-skips.txt @@ -0,0 +1,49 @@ +ext_texture_env.* +spec@arb_shader_image_load_store.invalid +spec@arb_shader_image_load_store.max-size +spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64 +.*@execution@vs_in.* + +# Kopper regression +glx@glx-tfp + +spec@egl_nok_texture_from_pixmap@basic + +# Exclude GLX tests. +glx@glx.* + +# Tests below timeout most of the time. +KHR-GL46.copy_image.functional +KHR-GL46.texture_swizzle.smoke +KHR-GL46.texture_swizzle.functional +KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize + +#these need format conversions that gallium doesn't implement yet +spec@arb_texture_buffer_object@formats.*arb.* + +# These randomly hang. +spec@ext_external_objects@.* + +# These are too random. +spec@arb_shader_clock@execution@clock.* + +# These run OOM and migth hang? +spec@arb_texture_buffer_object@texture-buffer-size-clamp.* +spec@!opengl 1.1@streaming-texture-leak +spec@arb_uniform_buffer_object@maxuniformblocksize.* + +# implicit modifier selection not currently supported +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88 + +# This subset hangs since a077c14f150 ("zink: Fix resizable BAR detection logic") +# for very weird reasons, skip it completely until the issue is properly fixed. +spec@arb_shader_image_load_store.* diff --git a/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt new file mode 100644 index 00000000000..230fc86df47 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt @@ -0,0 +1,188 @@ +# kopper +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail + +# #6115 +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash + + +glx@extension string sanity,Fail + +# #6322 +spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail + +spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash + +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail +spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail +spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 6,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 8,Fail +spec@arb_sample_shading@interpolate-at-sample-position 2,Fail +spec@arb_sample_shading@interpolate-at-sample-position 4,Fail +spec@arb_sample_shading@interpolate-at-sample-position 6,Fail +spec@arb_sample_shading@interpolate-at-sample-position 8,Fail + +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail +spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail +spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail + +spec@egl 1.4@eglterminate then unbind context,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail +spec@egl_khr_surfaceless_context@viewport,Fail + +spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail +spec@ext_packed_float@query-rgba-signed-components,Fail + +spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash +spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash + +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-clamp-z,Fail + +spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail + +spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail +spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail +spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail + +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail +spec@!opengl 1.1@polygon-mode-facing,Fail +spec@!opengl 1.1@polygon-mode,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.5@depth-tex-compare,Fail + +spec@!opengl 2.0@vs-point_size-zero,Fail +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail +spec@!opengl 2.1@polygon-stipple-fs,Fail +spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail + + +# Introduced with the uprev of piglit (70ce1dcacc92 - "ci: Update piglit with s3 support") +spec@egl 1.4@egl-ext_egl_image_storage,Fail + +# Introduced by a8d2b288eee3 ("ci/piglit: 2023-01-19 uprev") +spec@!opengl 1.1@line-smooth-stipple,Fail + +# Delta over NAVI10 +spec@!opengl 1.1@depthstencil-default_fb-blit samples=4,Fail +spec@!opengl 1.1@depthstencil-default_fb-blit samples=6,Fail +spec@!opengl 1.1@depthstencil-default_fb-blit samples=8,Fail + +spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail +spec@ext_transform_feedback@tessellation quads wireframe,Fail + +# Regressed between 1080ff39717b92b99afcf51283bec3994deae376..ef01a9cf3b465889fe8084732264dad0580270c3 +spec@arb_sample_shading@samplemask 2,Fail +spec@arb_sample_shading@samplemask 2 all,Fail +spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms partition,Fail +spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms partition,Fail +spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4,Fail +spec@arb_sample_shading@samplemask 4 all,Fail +spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms partition,Fail +spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms partition,Fail +spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6,Fail +spec@arb_sample_shading@samplemask 6 all,Fail +spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6 all@noms partition,Fail +spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 6@noms partition,Fail +spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8,Fail +spec@arb_sample_shading@samplemask 8 all,Fail +spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8 all@noms partition,Fail +spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 8@noms partition,Fail +spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail +spec@ext_framebuffer_multisample@enable-flag,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail + +# Polygon smoothing isn't supported in Vulkan. +spec@!opengl 1.0@gl-1.0-polygon-line-aa,Fail + +# Regression noticed in https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/891104 +spec@arb_viewport_array@display-list,Fail + diff --git a/src/gallium/drivers/zink/ci/zink-radv-vangogh-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-vangogh-flakes.txt new file mode 100644 index 00000000000..e5576267400 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-vangogh-flakes.txt @@ -0,0 +1,41 @@ +dEQP-GLES2.functional.shaders.random.swizzle.vertex.43 +dEQP-GLES31.functional.copy_image.compressed.viewclass_astc_12x10_rgba.srgb8_alpha8_astc_12x10_khr_rgba_astc_12x10_khr.texture3d_to_texture3d +dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba_astc_4x4_khr_rgba32ui.texture2d_array_to_texture2d_array +dEQP-GLES31.functional.texture.filtering.cube_array.sizes.8x8x6_nearest +dEQP-GLES3.functional.texture.filtering.cube.combinations.linear_linear_mirror_mirror +dEQP-GLES3.functional.texture.shadow.cube.linear_mipmap_nearest.greater_depth_component32f +object namespace pollution@framebuffer with glgetteximage +spec@arb_fragment_shader_interlock@arb_fragment_shader_interlock-image-load-store +spec@arb_instanced_arrays@arb_instanced_arrays-instanced_arrays-vbo +spec@arb_shader_image_load_store@shader-mem-barrier +spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'coherent' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'coherent' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'volatile' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'volatile' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'coherent' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'volatile' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'coherent' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'coherent' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'volatile' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'volatile' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'coherent' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'coherent' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'volatile' qualifier memory barrier test/modulus=16 +spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'volatile' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'coherent' qualifier memory barrier test/modulus=64 +spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'volatile' qualifier memory barrier test/modulus=64 +spec@arb_tessellation_shader@execution@built-in-functions@tcs-sign-vec3 +spec@arb_texture_multisample@arb_texture_multisample-dsa-texelfetch +spec@arb_texture_multisample@arb_texture_multisample-dsa-texelfetch@Texture type: GL_RGB9_E5 +spec@arb_timer_query@timestamp-get +spec@glsl-1.10@execution@built-in-functions@vs-equal-vec2-vec2 +spec@glsl-1.50@execution@built-in-functions@gs-greaterthan-uvec3-uvec3 +spec@glsl-1.50@execution@geometry@point-size-out +spec@glsl-1.50@execution@redeclare-pervertex-out-subset-gs +spec@!opengl 1.0@rasterpos +spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked +spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked +spec@!opengl 1.1@depthstencil-default_fb-blit samples=4 + +# Marked as flake because it passes with ESO but crashes with pipelines. +spec@arb_tessellation_shader@arb_tessellation_shader-tes-gs-max-output -small -scan 1 50 diff --git a/src/gallium/drivers/zink/ci/zink-radv-vangogh-skips.txt b/src/gallium/drivers/zink/ci/zink-radv-vangogh-skips.txt new file mode 100644 index 00000000000..4d37b3041b0 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-radv-vangogh-skips.txt @@ -0,0 +1,61 @@ +ext_texture_env.* +spec@arb_shader_image_load_store.invalid +spec@arb_shader_image_load_store.max-size +spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64 +.*@execution@vs_in.* + +# Kopper regression +glx@glx-tfp + +spec@egl_nok_texture_from_pixmap@basic + + +# Exclude GLX tests. +glx@glx.* + +# Tests below timeout most of the time. +KHR-GL46.copy_image.functional +KHR-GL46.texture_swizzle.smoke +KHR-GL46.texture_swizzle.functional +KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize + +#these need format conversions that gallium doesn't implement yet +spec@arb_texture_buffer_object@formats.*arb.* + +# These randomly hang. +spec@ext_external_objects@.* + +# These are too random. +spec@arb_shader_clock@execution@clock.* + +# These run OOM and migth hang? +spec@arb_texture_buffer_object@texture-buffer-size-clamp.* +spec@!opengl 1.1@streaming-texture-leak +spec@arb_uniform_buffer_object@maxuniformblocksize.* + +# implicit modifier selection not currently supported +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_vyuy +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216 +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88 diff --git a/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt b/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt new file mode 100644 index 00000000000..834e02589d1 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt @@ -0,0 +1,20 @@ +GTF-GL46.gtf30.GL3Tests.framebuffer_blit.framebuffer_blit_functionality_multisampled_to_singlesampled_blit,Fail +GTF-GL46.gtf30.GL3Tests.sgis_texture_lod.sgis_texture_lod_basic_lod_selection,Fail +GTF-GL46.gtf32.GL3Tests.draw_elements_base_vertex.draw_elements_base_vertex_invalid_mode,Fail + +KHR-Single-GL46.arrays_of_arrays_gl.AtomicUsage,Fail + +# Turnip has maxFragmentInputComponents = 124, while GL requires +# gl_MaxFragmentInputComponents >= 128 +KHR-GL46.limits.max_fragment_input_components,Fail + +# https://gerrit.khronos.org/c/vk-gl-cts/+/9672 +KHR-GL46.buffer_storage.map_persistent_draw,Fail + +# https://gitlab.freedesktop.org/mesa/mesa/-/issues/6723 +KHR-GL46.copy_image.functional,Fail +KHR-GL46.texture_view.view_classes,Fail + +# Piglit xfb tests +spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail +spec@ext_transform_feedback@tessellation quads wireframe,Fail diff --git a/src/gallium/drivers/zink/ci/zink-tu-a630-flakes.txt b/src/gallium/drivers/zink/ci/zink-tu-a630-flakes.txt new file mode 100644 index 00000000000..a9107058df7 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-tu-a630-flakes.txt @@ -0,0 +1,16 @@ +GTF-GL46.gtf32.GL3Tests.packed_pixels.packed_pixels_pixelstore +KHR-Single-GL46.arrays_of_arrays_gl.ConstructorsAndUnsizedDeclConstructorSizing1 +dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_write_dynamic_read_vertex +dEQP-GLES3.functional.texture.wrap.astc_12x10_srgb.repeat_mirror_linear_divisible +dEQP-GLES3.functional.texture.wrap.astc_6x6.repeat_mirror_nearest_divisible +dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba_astc_10x6_khr_rgba32i.texture3d_to_texture3d +dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.srgb8_alpha8_astc_10x5_khr_rgba32f.texture3d_to_texture2d_array +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.rg8_rg8.texture2d_to_renderbuffer +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.rg8i_rg8i.cubemap_to_renderbuffer +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.rg8i_rg8i.texture2d_to_renderbuffer +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.rg8i_rg8i.texture3d_to_renderbuffer +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.rg8ui_rg8ui.texture2d_to_renderbuffer +dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16i_rgb10_a2.cubemap_to_renderbuffer +dEQP-GLES3.functional.texture.specification.texstorage3d.format.depth_component16_2d_array +dEQP-GLES3.functional.texture.specification.texstorage2d.format.rgb565_cube +dEQP-GLES31.functional.fbo.color.texcubearray.r16f diff --git a/src/gallium/drivers/zink/ci/zink-tu-a630-skips.txt b/src/gallium/drivers/zink/ci/zink-tu-a630-skips.txt new file mode 100644 index 00000000000..fb03e671d01 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-tu-a630-skips.txt @@ -0,0 +1,2 @@ +# takes forever, but passes +KHR-GL46.texture_swizzle.smoke diff --git a/src/gallium/drivers/zink/ci/zink-tu-a750-fails.txt b/src/gallium/drivers/zink/ci/zink-tu-a750-fails.txt new file mode 100644 index 00000000000..f2921348eba --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-tu-a750-fails.txt @@ -0,0 +1,524 @@ +GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_two_buffers,Fail +dEQP-GLES3.functional.fbo.depth.depth_test_clamp.depth32f_stencil8,Fail +dEQP-GLES3.functional.fbo.depth.depth_test_clamp.depth_component32f,Fail +dEQP-GLES3.functional.fbo.depth.depth_write_clamp.depth32f_stencil8,Fail +dEQP-GLES3.functional.fbo.depth.depth_write_clamp.depth_component32f,Fail + +GTF-GL46.gtf30.GL3Tests.sgis_texture_lod.sgis_texture_lod_basic_lod_selection,Fail + +KHR-Single-GL46.arrays_of_arrays_gl.SubroutineFunctionCalls2,Crash + +# Turnip has maxFragmentInputComponents = 124, while GL requires +# gl_MaxFragmentInputComponents >= 128 +KHR-GL46.limits.max_fragment_input_components,Fail + +# https://gitlab.freedesktop.org/mesa/mesa/-/issues/6723 +KHR-GL46.copy_image.functional,Fail +KHR-GL46.texture_view.view_classes,Fail + + +dEQP-GLES3.functional.shaders.matrix.inverse.dynamic.lowp_mat4_float_vertex,Fail +dEQP-GLES3.functional.shaders.matrix.inverse.dynamic.mediump_mat4_float_vertex,Fail + +glx@glx-multi-window-single-context,Fail +glx@glx-multithread-texture,Fail +glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail +glx@glx_arb_create_context_no_error@no error,Fail +glx@glx_arb_create_context_robustness@invalid reset notification strategy,Fail +glx@glx_ext_no_config_context@no fbconfig,Fail +spec@arb_texture_rectangle@fbo-blit rect,Fail +spec@egl_chromium_sync_control@conformance,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail +spec@ext_framebuffer_blit@fbo-blit,Fail +spec@ext_framebuffer_blit@fbo-copypix,Fail +spec@ext_framebuffer_blit@fbo-readdrawpix,Fail +spec@!opengl 1.0@depth-clear-precision-check,Fail +spec@!opengl 1.0@depth-clear-precision-check@depth24,Fail +spec@!opengl 1.0@depth-clear-precision-check@depth24_stencil8,Fail +spec@!opengl 1.0@depth-clear-precision-check@depth32,Fail +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail +spec@!opengl 1.0@rasterpos,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail +spec@!opengl 1.1@line-aa-width,Fail +spec@!opengl 1.1@line-smooth-stipple,Crash +spec@!opengl 1.1@linestipple,Crash +spec@!opengl 1.1@polygon-mode,Fail +spec@!opengl 1.1@polygon-mode-facing,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-offset,Fail +spec@!opengl 2.0@vs-point_size-zero,Fail +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail +spec@!opengl 2.1@polygon-stipple-fs,Fail +spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail +spec@!opengl 3.2@gl-3.2-adj-prims cull-back pv-first,Fail +spec@!opengl 3.2@gl-3.2-adj-prims cull-front pv-first,Fail +spec@!opengl 3.2@gl-3.2-adj-prims line cull-back pv-first,Fail +spec@!opengl 3.2@gl-3.2-adj-prims line cull-front pv-first,Fail +spec@!opengl 3.2@gl-3.2-adj-prims pv-first,Fail +spec@!opengl 3.2@minmax,Fail +spec@!opengl 3.3@minmax,Fail +spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash +spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail +spec@arb_gpu_shader5@arb_gpu_shader5-interpolateatsample-dynamically-nonuniform,Fail +spec@arb_gpu_shader5@execution@built-in-functions@fs-interpolateatcentroid-array-of-structs,Crash +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail +spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64,Crash +spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-load.indirect.3,Fail +spec@arb_internalformat_query2@all internalformat_<x>_size pname checks,Fail +spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_BLUE_SIZE,Fail +spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_GREEN_SIZE,Fail +spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_RED_SIZE,Fail +spec@arb_internalformat_query2@all internalformat_<x>_type pname checks,Fail +spec@arb_internalformat_query2@all internalformat_<x>_type pname checks@GL_INTERNALFORMAT_BLUE_TYPE,Fail +spec@arb_internalformat_query2@all internalformat_<x>_type pname checks@GL_INTERNALFORMAT_GREEN_TYPE,Fail +spec@arb_internalformat_query2@all internalformat_<x>_type pname checks@GL_INTERNALFORMAT_RED_TYPE,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2' on GL_PROGRAM_INPUT,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail +spec@arb_query_buffer_object@coherency,Fail +spec@arb_query_buffer_object@coherency@index-buffer-GL_TESS_CONTROL_SHADER_PATCHES,Fail +spec@arb_query_buffer_object@coherency@indirect-dispatch-GL_TESS_CONTROL_SHADER_PATCHES,Fail +spec@arb_query_buffer_object@coherency@indirect-draw-GL_TESS_CONTROL_SHADER_PATCHES,Fail +spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_TESS_CONTROL_SHADER_PATCHES,Fail +spec@arb_query_buffer_object@qbo,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC-GL_INT,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC-GL_UNSIGNED_INT,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC-GL_UNSIGNED_INT64_ARB,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_AFTER-GL_INT,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_AFTER-GL_UNSIGNED_INT,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_AFTER-GL_UNSIGNED_INT64_ARB,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_BEFORE-GL_INT,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_BEFORE-GL_UNSIGNED_INT,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_BEFORE-GL_UNSIGNED_INT64_ARB,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC-GL_INT,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC-GL_UNSIGNED_INT,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC-GL_UNSIGNED_INT64_ARB,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_INT,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_UNSIGNED_INT,Fail +spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_UNSIGNED_INT64_ARB,Fail +spec@arb_sample_locations@test,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: true,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: false,Fail +spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: true,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail +spec@arb_sample_shading@interpolate-at-sample-position 2,Fail +spec@arb_sample_shading@interpolate-at-sample-position 4,Fail +spec@arb_sample_shading@samplemask 2,Fail +spec@arb_sample_shading@samplemask 2 all,Fail +spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms partition,Fail +spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms partition,Fail +spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4,Fail +spec@arb_sample_shading@samplemask 4 all,Fail +spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms partition,Fail +spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms partition,Fail +spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail +spec@arb_shader_image_load_store@coherency,Fail +spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Fragment shader/'coherent' qualifier coherency test/512x512,Fail +spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Fragment shader/'volatile' qualifier coherency test/1024x1024,Fail +spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Geometry shader/'coherent' qualifier coherency test/1024x1024,Fail +spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Geometry shader/'coherent' qualifier coherency test/512x512,Fail +spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Geometry shader/'volatile' qualifier coherency test/1024x1024,Fail +spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Geometry shader/'volatile' qualifier coherency test/512x512,Fail +spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'coherent' qualifier coherency test/1024x1024,Fail +spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'coherent' qualifier coherency test/256x256,Fail +spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'coherent' qualifier coherency test/512x512,Fail +spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'volatile' qualifier coherency test/1024x1024,Fail +spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'volatile' qualifier coherency test/256x256,Fail +spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'volatile' qualifier coherency test/512x512,Fail +spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail +spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail +spec@arb_tessellation_shader@execution@tcs-input-read-mat,Fail +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash +spec@arb_texture_buffer_object@formats (fs- arb),Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY16,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY16F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY16I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY32F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY32I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY8,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY8I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8_ALPHA8,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb),Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY16,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY16F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY16I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY32F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY32I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY8,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY8I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16_ALPHA16,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8_ALPHA8,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail +spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail +spec@arb_texture_rectangle@1-1-linear-texture,Fail +spec@arb_timer_query@timestamp-get,Fail +spec@ext_external_objects@vk-depth-display,Fail +spec@ext_external_objects@vk-depth-display@D16,Fail +spec@ext_external_objects@vk-depth-display@D24S8,Fail +spec@ext_external_objects@vk-depth-display@D32S8,Fail +spec@ext_external_objects@vk-image-display,Fail +spec@ext_external_objects@vk-image-display-muliple-textures,Fail +spec@ext_external_objects@vk-image-display-overwrite,Fail +spec@ext_external_objects@vk-image-overwrite,Fail +spec@ext_external_objects@vk-image-overwrite@RGB 10 A2 UINT optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGB 10 A2 UNORM optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGB 5 A1 UNORM optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 16 INT optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 16 SFLOAT optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 16 UINT optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 16 UNORM optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 32 INT optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 32 SFLOAT optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 32 UINT optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 4 UNORM optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 8 INT optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 8 SRGB optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 8 UINT optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-image-overwrite@RGBA 8 UNORM optimal: Failed to create texture from GL memory object.,Fail +spec@ext_external_objects@vk-semaphores,Fail +spec@ext_external_objects@vk-semaphores-2,Fail +spec@ext_external_objects@vk-stencil-display,Fail +spec@ext_external_objects@vk-stencil-display@D24S8,Fail +spec@ext_external_objects@vk-stencil-display@D32S8,Fail +spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail +spec@ext_framebuffer_multisample@alpha-to-coverage-dual-src-blend 2,Fail +spec@ext_framebuffer_multisample@alpha-to-coverage-dual-src-blend 4,Fail +spec@ext_framebuffer_multisample@alpha-to-coverage-no-draw-buffer-zero 2,Fail +spec@ext_framebuffer_multisample@alpha-to-coverage-no-draw-buffer-zero 4,Fail +spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail +spec@ext_framebuffer_multisample@draw-buffers-alpha-to-coverage 2,Fail +spec@ext_framebuffer_multisample@draw-buffers-alpha-to-coverage 4,Fail +spec@ext_framebuffer_multisample@enable-flag,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@sample-alpha-to-coverage 2 color,Fail +spec@ext_framebuffer_multisample@sample-alpha-to-coverage 4 color,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail +spec@ext_packed_float@query-rgba-signed-components,Fail +spec@ext_transform_feedback@structs struct-array-elem run,Fail +spec@ext_transform_feedback@structs struct-array-elem run interface,Fail +spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail +spec@ext_transform_feedback@tessellation quads wireframe,Fail +spec@ext_transform_feedback@tessellation triangle_fan flat_first,Fail +spec@ext_transform_feedback@tessellation triangle_strip flat_first,Fail +spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash +spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash +spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash +spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail +spec@glsl-1.30@execution@texelfetch fs sampler3d 1x129x9-98x129x9,Fail +spec@glsl-1.30@execution@texelfetch fs sampler3d 98x1x9-98x129x9,Fail +spec@glsl-1.50@built-in constants,Fail +spec@glsl-1.50@built-in constants@gl_MaxFragmentInputComponents,Fail +spec@glsl-1.50@execution@geometry@point-size-out,Fail +spec@glsl-1.50@execution@geometry@primitive-id-restart gl_line_strip_adjacency other,Crash +spec@glsl-1.50@execution@interface-blocks-complex-vs-fs,Fail +spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail +spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail +spec@glsl-1.50@execution@redeclare-pervertex-out-subset-gs,Fail +spec@glsl-1.50@execution@variable-indexing@gs-output-array-vec4-index-wr,Fail +spec@glsl-3.30@built-in constants,Fail +spec@glsl-3.30@built-in constants@gl_MaxFragmentInputComponents,Fail +spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail +spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail +spec@khr_texture_compression_astc@array-gl,Fail +spec@khr_texture_compression_astc@array-gl@12x12 Block Dim,Fail +spec@khr_texture_compression_astc@array-gl@5x5 Block Dim,Fail +spec@khr_texture_compression_astc@array-gles,Fail +spec@khr_texture_compression_astc@array-gles@12x12 Block Dim,Fail +spec@khr_texture_compression_astc@array-gles@5x5 Block Dim,Fail +spec@khr_texture_compression_astc@miptree-gl hdr,Fail +spec@khr_texture_compression_astc@miptree-gl hdr@HDR Profile,Fail +spec@khr_texture_compression_astc@miptree-gl ldr,Fail +spec@khr_texture_compression_astc@miptree-gl ldr@LDR Profile,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-sd,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-sd@sRGB skip decode,Fail +spec@khr_texture_compression_astc@miptree-gles hdr,Fail +spec@khr_texture_compression_astc@miptree-gles hdr@HDR Profile,Fail +spec@khr_texture_compression_astc@miptree-gles ldr,Fail +spec@khr_texture_compression_astc@miptree-gles ldr@LDR Profile,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-sd,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-sd@sRGB skip decode,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl hdr,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl hdr@HDR Profile,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles hdr,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles hdr@HDR Profile,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail diff --git a/src/gallium/drivers/zink/ci/zink-tu-a750-flakes.txt b/src/gallium/drivers/zink/ci/zink-tu-a750-flakes.txt new file mode 100644 index 00000000000..698e5b6a711 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-tu-a750-flakes.txt @@ -0,0 +1,4 @@ +KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS +spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Fragment shader/'coherent' qualifier coherency test/512x512 +glx@glx-multithread-texture +glx@glx-visuals-depth diff --git a/src/gallium/drivers/zink/ci/zink-tu-a750-skips.txt b/src/gallium/drivers/zink/ci/zink-tu-a750-skips.txt new file mode 100644 index 00000000000..0628fe02c29 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-tu-a750-skips.txt @@ -0,0 +1,23 @@ +GTF-GL46.gtf32.GL3Tests.packed_pixels.packed_pixels_pixelstore +KHR-GL46.texture_swizzle.smoke +KHR-Single-GL46.arrays_of_arrays_gl.SubroutineFunctionCalls2 + +# crashes +KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component + +spec@.*dvec.* +spec@.*dmat.* +spec@.*int64.* +spec@.*64bit.* +spec@arb_texture_buffer_object@texture-buffer-size-clamp.* + +# hangs +spec@arb_texture_barrier@arb_texture_barrier-blending-in-shader.* +spec@glsl-1.50@execution@geometry@primitive-id-restart gl_line_strip_adjacency ffs + +# timeout +glx@glx-visuals-stencil +spec@!opengl 1.0@gl-1.0-drawbuffer-modes +spec@arb_texture_cube_map@cubemap npot +spec@arb_texture_cube_map_array@arb_texture_cube_map_array-sampler-cube-array-shadow +spec@egl_nok_texture_from_pixmap@basic diff --git a/src/gallium/drivers/zink/ci/zink-venus-lvp-fails.txt b/src/gallium/drivers/zink/ci/zink-venus-lvp-fails.txt new file mode 100644 index 00000000000..0d05bbda4d2 --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-venus-lvp-fails.txt @@ -0,0 +1,164 @@ +# #6115 +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash +spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash + +# #6322 +spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail + +#kopper regressions/changes +spec@egl_chromium_sync_control@conformance,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail +spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail + + +dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag,Fail +dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_dst_x,Fail +dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_dst_y,Fail +dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min,Fail +dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_dst_x,Fail +dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_x,Fail +dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_y,Fail + +# this test tries to be error-compatible with nvidia. spoiler: mesa isn't, and no driver can pass it +glx@glx_arb_create_context@invalid flag,Fail + +glx@glx-swap-pixmap-bad,Fail +glx@glx-visuals-depth,Crash +glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail +glx@glx_arb_create_context_no_error@no error,Fail + +glx@glx_ext_import_context@free context,Fail +glx@glx_ext_import_context@get context id,Fail +glx@glx_ext_import_context@get current display,Fail +glx@glx_ext_import_context@import context- multi process,Fail +glx@glx_ext_import_context@import context- single process,Fail +glx@glx_ext_import_context@imported context has same context id,Fail +glx@glx_ext_import_context@make current- multi process,Fail +glx@glx_ext_import_context@make current- single process,Fail +glx@glx_ext_import_context@query context info,Fail +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail +spec@!opengl 1.1@polygon-mode,Fail +spec@!opengl 1.1@polygon-mode-facing,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail +spec@!opengl 1.2@copyteximage 3d,Fail +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail +spec@!opengl 2.1@polygon-stipple-fs,Fail +spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail +spec@arb_pipeline_statistics_query@arb_pipeline_statistics_query-frag,Fail +spec@arb_point_sprite@arb_point_sprite-mipmap,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail +spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail +spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail +spec@arb_sample_shading@interpolate-at-sample-position 2,Fail +spec@arb_sample_shading@interpolate-at-sample-position 4,Fail +spec@arb_sample_shading@samplemask 2,Fail +spec@arb_sample_shading@samplemask 2@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2@noms partition,Fail +spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all,Fail +spec@arb_sample_shading@samplemask 2 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 2 all@noms partition,Fail +spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4,Fail +spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4@noms partition,Fail +spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all,Fail +spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail +spec@arb_sample_shading@samplemask 4 all@noms partition,Fail +spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail + +spec@egl 1.4@eglterminate then unbind context,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail +spec@egl_khr_surfaceless_context@viewport,Fail +spec@egl_mesa_configless_context@basic,Fail +spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail +spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail +spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 msaa,Fail +spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 upsample,Fail +spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 msaa,Fail +spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 upsample,Fail +spec@ext_framebuffer_multisample@enable-flag,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail +spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail +spec@ext_packed_float@query-rgba-signed-components,Fail + +spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail +spec@arb_post_depth_coverage@arb_post_depth_coverage-multisampling,Fail +spec@arb_shader_image_load_store@early-z,Fail +spec@arb_shader_image_load_store@early-z@occlusion query test/early-z pass,Fail + +spec@arb_shader_image_load_store@execution@image-array-out-of-bounds-access-load,Crash +spec@arb_shader_image_load_store@execution@image-array-out-of-bounds-access-store,Crash + +#literally no driver can pass these +spec@!opengl 1.0@rasterpos,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail +spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail + + +spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-mixed-const-non-const-uniform-index,Fail +spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-mixed-const-non-const-uniform-index2,Fail +spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-non-const-uniform-index,Fail +spec@arb_gpu_shader_fp64@execution@conversion,Fail +spec@arb_tessellation_shader@execution@gs-primitiveid-instanced,Fail +spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail +spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail +spec@glsl-4.00@execution@conversion,Fail + +spec@ext_transform_feedback@tessellation quads wireframe,Fail + +# Debian 12 CI update, see https://gitlab.freedesktop.org/mesa/mesa/-/issues/9072 +spec@ext_packed_float@multisample-formats 4 gl_ext_packed_float,Crash +spec@ext_transform_feedback@tessellation quad_strip wireframe,Crash +spec@!opengl 1.0@gl-1.0-dlist-beginend,Crash +spec@arb_clip_control@arb_clip_control-depth-precision,Crash +spec@nv_texture_barrier@blending-in-shader,Crash + +spec@arb_viewport_array@display-list,Fail diff --git a/src/gallium/drivers/zink/ci/zink-venus-lvp-flakes.txt b/src/gallium/drivers/zink/ci/zink-venus-lvp-flakes.txt new file mode 100644 index 00000000000..a883379893e --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-venus-lvp-flakes.txt @@ -0,0 +1,40 @@ +dEQP-GLES2.functional.texture.filtering.cube.nearest_linear_mirror_l8_pot +spec@khr_debug@push-pop-group_gl.* +glx@glx-multi-window-single-context + +# "free(): invalid next size (fast)" +# since it's heap corruption, it may or may not appear in a particular run +spec@arb_compute_variable_group_size@local-size + +# https://gitlab.freedesktop.org/mesa/mesa/-/jobs/20908454 +# "X connection to :99 broken (explicit kill or server shutdown)." +glx@glx-multi-context-ib-1 + +# depth visuals +glx@glx-visuals-depth +glx@glx-visuals-stencil + +# mysterious +glx@glx-shader-sharing + +spec@arb_fragment_program@no-newline +# glx-destroycontext-1: ../../src/xcb_conn.c:215: write_vec: Assertion `!c->out.queue_len' failed. +glx@glx-destroycontext-1 + +glx@glx-multithread-texture + +# does not happen very often, but rarely does +KHR-GL46.limits.max_fragment_interpolation_offset + +# no output timeout, probably stuck in some X11 connection thing +spec@ext_framebuffer_multisample@accuracy all_samples depth_resolve depthstencil + +# segfault in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/48719777 and others +spec@ext_texture_array@texsubimage array + +# crash in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/48476882 and others +KHR-GL46.layout_location.sampler_2d_shadow + +# uprev Piglit in Mesa +spec@ext_framebuffer_multisample@accuracy all_samples depth_draw small depthstencil + diff --git a/src/gallium/drivers/zink/ci/zink-venus-lvp-skips.txt b/src/gallium/drivers/zink/ci/zink-venus-lvp-skips.txt new file mode 100644 index 00000000000..7226486adfb --- /dev/null +++ b/src/gallium/drivers/zink/ci/zink-venus-lvp-skips.txt @@ -0,0 +1,47 @@ +# Note: skips lists for CI are just a list of lines that, when +# non-zero-length and not starting with '#', will regex match to +# delete lines from the test list. Be careful. + +KHR-GL32.texture_size_promotion.functional + +# this is just broken. +KHR-GL46.shader_ballot_tests.ShaderBallotBitmasks +KHR-GL46.shader_ballot_tests.ShaderBallotFunctionRead + +# ignores copied from the old runner script +spec@arb_map_buffer_alignment@arb_map_buffer_alignment-map-invalidate-range +spec@arb_timer_query.* +spec@arb_sample_shading@builtin-gl-sample-mask +spec@glsl-1.30@execution@tex-miplevel-selection.* + +# This test doesn't even seem to exist, but piglit adds it to a group...? +spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match + +# This one seems to have a typo in the name, and doesn't really ever run? +spec@egl_ext_protected_content@conformance + +# has a race where probes periodically return black +# cf. https://gitlab.freedesktop.org/mesa/mesa/-/jobs/10624521 +glx@glx-multi-context-single-window + +# This one takes too long, but passes. There's other tests that don't +# try all the combinations, so that's probably enough. +spec@arb_compute_shader@local-id-explosion + +# I can't reproduce these crashes locally +# even after running them in loops for 4+ hours, so disable for now +.*tex-miplevel-selection.* + +# these are insanely long +KHR-GL46.copy_image.functional +KHR-GL46.texture_swizzle.smoke +KHR-GL46.texture_swizzle.functional + +# Kopper regression +glx@glx-tfp + +# These tests started hitting timeouts when we upgraded LLVM from v11 to 13 +spec@arb_texture_rg@fbo-blending-formats + +#these need format conversions that gallium doesn't implement yet +spec@arb_texture_buffer_object@formats.*arb.* diff --git a/src/gallium/drivers/zink/driinfo_zink.h b/src/gallium/drivers/zink/driinfo_zink.h index e1cf6d7d559..cdf1596cce0 100644 --- a/src/gallium/drivers/zink/driinfo_zink.h +++ b/src/gallium/drivers/zink/driinfo_zink.h @@ -6,5 +6,10 @@ DRI_CONF_SECTION_DEBUG DRI_CONF_SECTION_END DRI_CONF_SECTION_PERFORMANCE +DRI_CONF_MESA_GLTHREAD_DRIVER(true) +DRI_CONF_OPT_B(zink_shader_object_enable, false, "Enable support for EXT_shader_object") +DRI_CONF_SECTION_END +DRI_CONF_SECTION_QUALITY + DRI_CONF_OPT_B(zink_emulate_point_smooth, false, "Enable support for emulated GL_POINT_SMOOTH") DRI_CONF_SECTION_END diff --git a/src/gallium/drivers/zink/meson.build b/src/gallium/drivers/zink/meson.build index 8da0092bbce..db68907f256 100644 --- a/src/gallium/drivers/zink/meson.build +++ b/src/gallium/drivers/zink/meson.build @@ -19,7 +19,7 @@ # SOFTWARE. files_libzink = files( - 'nir_lower_dynamic_bo_access.c', + 'zink_lower_cubemap_to_array.c', 'nir_to_spirv/nir_to_spirv.c', 'nir_to_spirv/spirv_builder.c', 'zink_batch.c', @@ -28,8 +28,8 @@ files_libzink = files( 'zink_clear.c', 'zink_compiler.c', 'zink_context.c', + 'zink_kopper.c', 'zink_descriptors.c', - 'zink_descriptors_lazy.c', 'zink_draw.cpp', 'zink_fence.c', 'zink_format.c', @@ -42,6 +42,7 @@ files_libzink = files( 'zink_screen.c', 'zink_state.c', 'zink_surface.c', + 'zink_synchronization.cpp', ) zink_device_info = custom_target( @@ -49,7 +50,7 @@ zink_device_info = custom_target( input : ['zink_device_info.py'], output : ['zink_device_info.h', 'zink_device_info.c'], command : [ - prog_python, '@INPUT@', '@OUTPUT@', join_paths(meson.source_root(), 'src/vulkan/registry/vk.xml') + prog_python, '@INPUT@', '@OUTPUT@', vk_api_xml ] ) @@ -58,7 +59,7 @@ zink_instance = custom_target( input : ['zink_instance.py'], output : ['zink_instance.h', 'zink_instance.c'], command : [ - prog_python, '@INPUT@', '@OUTPUT@', join_paths(meson.source_root(), 'src/vulkan/registry/vk.xml') + prog_python, '@INPUT@', '@OUTPUT@', vk_api_xml ] ) @@ -67,33 +68,53 @@ zink_nir_algebraic_c = custom_target( input : 'nir_to_spirv/zink_nir_algebraic.py', output : 'zink_nir_algebraic.c', command : [ - prog_python, '@INPUT@', - '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), + prog_python, '@INPUT@', '-p', dir_compiler_nir, ], capture : true, - depend_files : nir_algebraic_py, + depend_files : nir_algebraic_depends, ) zink_c_args = [] inc_zink_vk = [] -if with_swrast_vk - zink_c_args += '-DZINK_WITH_SWRAST_VK' -endif +if host_machine.system() == 'darwin' + + # MoltenVK options + if with_moltenvk_dir != '' + fs = import('fs') + # Vulkan SDK 1.3.250 to 1.3.268 support + moltenvk_includes = join_paths(with_moltenvk_dir, 'MoltenVK', 'include') + if not fs.is_dir(moltenvk_includes) + # Vulkan SDK 1.3.275 onwards support + moltenvk_includes = join_paths(with_moltenvk_dir, 'macos', 'include') + if not fs.is_dir(moltenvk_includes) + # MoltenVK from brew support + moltenvk_includes = join_paths(with_moltenvk_dir, 'include') + if not fs.is_dir(moltenvk_includes) + error(f'moltenvk includes cannot be found in moltenvk-dir="@with_moltenvk_dir@"') + endif + endif + endif + inc_zink_vk += include_directories(moltenvk_includes) + else + error('moltenvk-dir is required but not set.') + endif -# MoltenVK options -if with_moltenvk_dir != '' - inc_zink_vk = [inc_zink_vk, include_directories( join_paths(with_moltenvk_dir, 'include') )] zink_c_args += ['-x','objective-c'] # Put compiler into objective-C mode to allow for MacOS types, like IOSurface and CAMetalLayer zink_c_args += ['-iframework' , 'Foundation'] + endif libzink = static_library( 'zink', [files_libzink, zink_device_info, zink_instance, zink_nir_algebraic_c, vk_dispatch_table], gnu_symbol_visibility : 'hidden', - include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_vulkan_wsi, inc_vulkan_util, inc_zink_vk], - dependencies: [dep_vulkan, idep_nir_headers, idep_mesautil, idep_vulkan_util_headers, dep_libdrm], + include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_vulkan_util, inc_zink_vk], + link_args : [ld_args_build_id], + dependencies: [ + idep_nir_headers, idep_mesautil, idep_vulkan_util_headers, + idep_vulkan_wsi_defines, idep_vulkan_util, dep_libdrm, vulkan_wsi_deps + ], c_args: zink_c_args, ) diff --git a/src/gallium/drivers/zink/nir_lower_dynamic_bo_access.c b/src/gallium/drivers/zink/nir_lower_dynamic_bo_access.c deleted file mode 100644 index cc38565b155..00000000000 --- a/src/gallium/drivers/zink/nir_lower_dynamic_bo_access.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright © 2020 Mike Blumenkrantz - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Mike Blumenkrantz <michael.blumenkrantz@gmail.com> - */ - -#include "nir.h" -#include "nir_builder.h" - -bool nir_lower_dynamic_bo_access(nir_shader *shader); -/** - * This pass converts dynamic UBO/SSBO block indices to constant indices by generating - * conditional chains which reduce to single values. - * - * This is needed by anything which intends to convert GLSL-like shaders to SPIRV, - * as SPIRV requires explicit load points for UBO/SSBO variables and has no instruction for - * loading based on an offset in the underlying driver's binding table - */ - - -/* generate a single ssa value which conditionally selects the right value that - * was previously loaded by the load_ubo conditional chain - */ -static nir_ssa_def * -recursive_generate_bo_ssa_def(nir_builder *b, nir_intrinsic_instr *instr, nir_ssa_def *index, unsigned start, unsigned end) -{ - if (start == end - 1) { - nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, instr->intrinsic); - new_instr->src[0] = nir_src_for_ssa(nir_imm_int(b, start)); - for (unsigned i = 0; i < nir_intrinsic_infos[instr->intrinsic].num_srcs; i++) { - if (i) - nir_src_copy(&new_instr->src[i], &instr->src[i]); - } - if (instr->intrinsic != nir_intrinsic_load_ubo_vec4) { - nir_intrinsic_set_align(new_instr, nir_intrinsic_align_mul(instr), nir_intrinsic_align_offset(instr)); - if (instr->intrinsic != nir_intrinsic_load_ssbo) - nir_intrinsic_set_range(new_instr, nir_intrinsic_range(instr)); - } - new_instr->num_components = instr->num_components; - nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, - nir_dest_num_components(instr->dest), - nir_dest_bit_size(instr->dest), NULL); - nir_builder_instr_insert(b, &new_instr->instr); - return &new_instr->dest.ssa; - } - - unsigned mid = start + (end - start) / 2; - return nir_build_alu(b, nir_op_bcsel, nir_build_alu(b, nir_op_ilt, index, nir_imm_int(b, mid), NULL, NULL), - recursive_generate_bo_ssa_def(b, instr, index, start, mid), - recursive_generate_bo_ssa_def(b, instr, index, mid, end), - NULL - ); -} - -static void -generate_store_ssbo_ssa_def(nir_builder *b, nir_intrinsic_instr *instr, nir_ssa_def *index, unsigned start, unsigned end) -{ - if (start == end - 1) { - nir_intrinsic_instr *new_instr = nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr)); - new_instr->src[1] = nir_src_for_ssa(nir_imm_int(b, start)); - nir_builder_instr_insert(b, &new_instr->instr); - } else { - int mid = start + (end - start) / 2; - nir_ssa_def *mid_idx = nir_imm_int(b, mid); - nir_push_if(b, nir_ilt(b, index, mid_idx)); - generate_store_ssbo_ssa_def(b, instr, index, start, mid); - nir_push_else(b, NULL); - generate_store_ssbo_ssa_def(b, instr, index, mid, end); - nir_pop_if(b, NULL); - } -} - -static bool -lower_dynamic_bo_access_instr(nir_builder *b, - nir_instr *instr_, - UNUSED void *cb_data) -{ - if (instr_->type != nir_instr_type_intrinsic) - return false; - - nir_intrinsic_instr *instr = nir_instr_as_intrinsic(instr_); - - if (instr->intrinsic != nir_intrinsic_load_ubo && - instr->intrinsic != nir_intrinsic_load_ubo_vec4 && - instr->intrinsic != nir_intrinsic_get_ssbo_size && - instr->intrinsic != nir_intrinsic_load_ssbo && - instr->intrinsic != nir_intrinsic_store_ssbo) - return false; - /* block index src is 1 for this op */ - unsigned block_idx = instr->intrinsic == nir_intrinsic_store_ssbo; - if (nir_src_is_const(instr->src[block_idx])) - return false; - b->cursor = nir_after_instr(&instr->instr); - bool ssbo_mode = instr->intrinsic != nir_intrinsic_load_ubo && instr->intrinsic != nir_intrinsic_load_ubo_vec4; - unsigned first_idx = UINT_MAX, last_idx; - if (ssbo_mode) { - nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_ssbo) - first_idx = MIN2(first_idx, var->data.driver_location); - last_idx = first_idx + b->shader->info.num_ssbos; - } else { - /* skip 0 index if uniform_0 is one we created previously */ - first_idx = !b->shader->info.first_ubo_is_default_ubo; - last_idx = first_idx + b->shader->info.num_ubos; - } - - if (instr->intrinsic != nir_intrinsic_store_ssbo) { - /* now create the composite dest with a bcsel chain based on the original value */ - nir_ssa_def *new_dest = recursive_generate_bo_ssa_def(b, instr, - instr->src[block_idx].ssa, - first_idx, last_idx); - - /* now use the composite dest in all cases where the original dest (from the dynamic index) - * was used and remove the dynamically-indexed load_*bo instruction - */ - nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, new_dest, - &instr->instr); - } else - generate_store_ssbo_ssa_def(b, instr, instr->src[block_idx].ssa, first_idx, last_idx); - nir_instr_remove(&instr->instr); - - return true; -} - -bool -nir_lower_dynamic_bo_access(nir_shader *shader) -{ - return nir_shader_instructions_pass(shader, - lower_dynamic_bo_access_instr, - nir_metadata_dominance, - NULL); -} diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index f62aad28eb3..88ced74699f 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -40,59 +40,69 @@ struct ntv_context { */ bool spirv_1_4_interfaces; + bool explicit_lod; //whether to set lod=0 for texture() + struct spirv_builder builder; + nir_shader *nir; struct hash_table *glsl_types; + struct hash_table *bo_struct_types; + struct hash_table *bo_array_types; SpvId GLSL_std_450; gl_shader_stage stage; - const struct zink_so_info *so_info; + const struct zink_shader_info *sinfo; - SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][3]; //8, 16, 32 + SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][5]; //8, 16, 32, unused, 64 nir_variable *ubo_vars[PIPE_MAX_CONSTANT_BUFFERS]; - SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][3]; //8, 16, 32 - nir_variable *ssbo_vars[PIPE_MAX_SHADER_BUFFERS]; - SpvId image_types[PIPE_MAX_SAMPLERS]; - SpvId images[PIPE_MAX_SAMPLERS]; - SpvId sampler_types[PIPE_MAX_SAMPLERS]; - SpvId samplers[PIPE_MAX_SAMPLERS]; - unsigned char sampler_array_sizes[PIPE_MAX_SAMPLERS]; - unsigned samplers_used : PIPE_MAX_SAMPLERS; + SpvId ssbos[5]; //8, 16, 32, unused, 64 + nir_variable *ssbo_vars; + + SpvId images[PIPE_MAX_SHADER_IMAGES]; + struct hash_table image_types; + SpvId samplers[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + SpvId bindless_samplers[2]; + SpvId cl_samplers[PIPE_MAX_SAMPLERS]; + nir_variable *sampler_var[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /* driver_location -> variable */ + nir_variable *bindless_sampler_var[2]; + unsigned last_sampler; + unsigned bindless_set_idx; + nir_variable *image_var[PIPE_MAX_SHADER_IMAGES]; /* driver_location -> variable */ + SpvId entry_ifaces[PIPE_MAX_SHADER_INPUTS * 4 + PIPE_MAX_SHADER_OUTPUTS * 4]; size_t num_entry_ifaces; SpvId *defs; + nir_alu_type *def_types; + SpvId *resident_defs; size_t num_defs; - SpvId *regs; - size_t num_regs; - struct hash_table *vars; /* nir_variable -> SpvId */ - struct hash_table *image_vars; /* SpvId -> nir_variable */ - struct hash_table *so_outputs; /* pipe_stream_output -> SpvId */ - unsigned outputs[VARYING_SLOT_MAX * 4]; - const struct glsl_type *so_output_gl_types[VARYING_SLOT_MAX * 4]; - SpvId so_output_types[VARYING_SLOT_MAX * 4]; const SpvId *block_ids; size_t num_blocks; bool block_started; SpvId loop_break, loop_cont; + SpvId shared_block_var[5]; //8, 16, 32, unused, 64 + SpvId shared_block_arr_type[5]; //8, 16, 32, unused, 64 + SpvId scratch_block_var[5]; //8, 16, 32, unused, 64 + SpvId front_face_var, instance_id_var, vertex_id_var, primitive_id_var, invocation_id_var, // geometry sample_mask_type, sample_id_var, sample_pos_var, sample_mask_in_var, tess_patch_vertices_in, tess_coord_var, // tess - push_const_var, + push_const_var, point_coord_var, workgroup_id_var, num_workgroups_var, local_invocation_id_var, global_invocation_id_var, local_invocation_index_var, helper_invocation_var, local_group_size_var, - shared_block_var, base_vertex_var, base_instance_var, draw_id_var; + SpvId shared_mem_size; + SpvId subgroup_eq_mask_var, subgroup_ge_mask_var, subgroup_gt_mask_var, @@ -101,6 +111,9 @@ struct ntv_context { subgroup_le_mask_var, subgroup_lt_mask_var, subgroup_size_var; + + SpvId discard_func; + SpvId float_array_type[2]; }; static SpvId @@ -108,10 +121,6 @@ get_fvec_constant(struct ntv_context *ctx, unsigned bit_size, unsigned num_components, double value); static SpvId -get_uvec_constant(struct ntv_context *ctx, unsigned bit_size, - unsigned num_components, uint64_t value); - -static SpvId get_ivec_constant(struct ntv_context *ctx, unsigned bit_size, unsigned num_components, int64_t value); @@ -126,6 +135,128 @@ static SpvId emit_triop(struct ntv_context *ctx, SpvOp op, SpvId type, SpvId src0, SpvId src1, SpvId src2); +static bool +alu_op_is_typeless(nir_op op) +{ + switch (op) { + case nir_op_mov: + case nir_op_vec16: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + case nir_op_vec5: + case nir_op_vec8: + case nir_op_bcsel: + return true; + default: + break; + } + return false; +} + +static nir_alu_type +get_nir_alu_type(const struct glsl_type *type) +{ + return nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(glsl_without_array_or_matrix(type)))); +} + +static nir_alu_type +infer_nir_alu_type_from_uses_ssa(nir_def *ssa); + +static nir_alu_type +infer_nir_alu_type_from_use(nir_src *src) +{ + nir_instr *instr = nir_src_parent_instr(src); + nir_alu_type atype = nir_type_invalid; + switch (instr->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (alu->op == nir_op_bcsel) { + if (nir_srcs_equal(alu->src[0].src, *src)) { + /* special case: the first src in bcsel is always bool */ + return nir_type_bool; + } + } + /* ignore typeless ops */ + if (alu_op_is_typeless(alu->op)) { + atype = infer_nir_alu_type_from_uses_ssa(&alu->def); + break; + } + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + if (!nir_srcs_equal(alu->src[i].src, *src)) + continue; + atype = nir_op_infos[alu->op].input_types[i]; + break; + } + break; + } + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (!nir_srcs_equal(tex->src[i].src, *src)) + continue; + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + case nir_tex_src_lod: + if (tex->op == nir_texop_txf || + tex->op == nir_texop_txf_ms || + tex->op == nir_texop_txs) + atype = nir_type_int; + else + atype = nir_type_float; + break; + case nir_tex_src_projector: + case nir_tex_src_bias: + case nir_tex_src_min_lod: + case nir_tex_src_comparator: + case nir_tex_src_ddx: + case nir_tex_src_ddy: + atype = nir_type_float; + break; + case nir_tex_src_offset: + case nir_tex_src_ms_index: + case nir_tex_src_texture_offset: + case nir_tex_src_sampler_offset: + case nir_tex_src_sampler_handle: + case nir_tex_src_texture_handle: + atype = nir_type_int; + break; + default: + break; + } + break; + } + break; + } + case nir_instr_type_intrinsic: { + if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_deref) { + atype = get_nir_alu_type(nir_instr_as_deref(instr)->type); + } else if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_store_deref) { + atype = get_nir_alu_type(nir_src_as_deref(nir_instr_as_intrinsic(instr)->src[0])->type); + } + break; + } + default: + break; + } + return nir_alu_type_get_base_type(atype); +} + +static nir_alu_type +infer_nir_alu_type_from_uses_ssa(nir_def *ssa) +{ + nir_alu_type atype = nir_type_invalid; + /* try to infer a type: if it's wrong then whatever, but at least we tried */ + nir_foreach_use_including_if(src, ssa) { + if (nir_src_is_if(src)) + return nir_type_bool; + atype = infer_nir_alu_type_from_use(src); + if (atype) + break; + } + return atype ? atype : nir_type_uint; +} + static SpvId get_bvec_type(struct ntv_context *ctx, int num_components) { @@ -138,17 +269,24 @@ get_bvec_type(struct ntv_context *ctx, int num_components) return bool_type; } +static SpvId +find_image_type(struct ntv_context *ctx, nir_variable *var) +{ + struct hash_entry *he = _mesa_hash_table_search(&ctx->image_types, var); + return he ? (intptr_t)he->data : 0; +} + static SpvScope -get_scope(nir_scope scope) +get_scope(mesa_scope scope) { SpvScope conv[] = { - [NIR_SCOPE_NONE] = 0, - [NIR_SCOPE_INVOCATION] = SpvScopeInvocation, - [NIR_SCOPE_SUBGROUP] = SpvScopeSubgroup, - [NIR_SCOPE_SHADER_CALL] = SpvScopeShaderCallKHR, - [NIR_SCOPE_WORKGROUP] = SpvScopeWorkgroup, - [NIR_SCOPE_QUEUE_FAMILY] = SpvScopeQueueFamily, - [NIR_SCOPE_DEVICE] = SpvScopeDevice, + [SCOPE_NONE] = 0, + [SCOPE_INVOCATION] = SpvScopeInvocation, + [SCOPE_SUBGROUP] = SpvScopeSubgroup, + [SCOPE_SHADER_CALL] = SpvScopeShaderCallKHR, + [SCOPE_WORKGROUP] = SpvScopeWorkgroup, + [SCOPE_QUEUE_FAMILY] = SpvScopeQueueFamily, + [SCOPE_DEVICE] = SpvScopeDevice, }; return conv[scope]; } @@ -163,9 +301,7 @@ block_label(struct ntv_context *ctx, nir_block *block) static void emit_access_decorations(struct ntv_context *ctx, nir_variable *var, SpvId var_id) { - unsigned access = var->data.access; - while (access) { - unsigned bit = u_bit_scan(&access); + u_foreach_bit(bit, var->data.access) { switch (1 << bit) { case ACCESS_COHERENT: /* SpvDecorationCoherent can't be used with vulkan memory model */ @@ -186,43 +322,79 @@ emit_access_decorations(struct ntv_context *ctx, nir_variable *var, SpvId var_id spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationNonUniform); break; case ACCESS_CAN_REORDER: - case ACCESS_STREAM_CACHE_POLICY: + case ACCESS_NON_TEMPORAL: /* no equivalent */ break; default: unreachable("unknown access bit"); } } + /* The Simple, GLSL, and Vulkan memory models can assume that aliasing is generally + * not present between the memory object declarations. Specifically, the consumer + * is free to assume aliasing is not present between memory object declarations, + * unless the memory object declarations explicitly indicate they alias. + * ... + * Applying Restrict is allowed, but has no effect. + * ... + * Only those memory object declarations decorated with Aliased or AliasedPointer may alias each other. + * + * - SPIRV 2.18.2 Aliasing + * + * thus if the variable isn't marked restrict, assume it may alias + */ + if (!(var->data.access & ACCESS_RESTRICT)) + spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationAliased); } static SpvOp -get_atomic_op(nir_intrinsic_op op) +get_atomic_op(struct ntv_context *ctx, unsigned bit_size, nir_atomic_op op) { switch (op) { -#define CASE_ATOMIC_OP(type) \ - case nir_intrinsic_ssbo_atomic_##type: \ - case nir_intrinsic_image_deref_atomic_##type: \ - case nir_intrinsic_shared_atomic_##type - - CASE_ATOMIC_OP(add): +#define ATOMIC_FCAP(NAME) \ + do {\ + if (bit_size == 16) \ + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityAtomicFloat16##NAME##EXT); \ + if (bit_size == 32) \ + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityAtomicFloat32##NAME##EXT); \ + if (bit_size == 64) \ + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityAtomicFloat64##NAME##EXT); \ + } while (0) + + case nir_atomic_op_fadd: + ATOMIC_FCAP(Add); + if (bit_size == 16) + spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float16_add"); + else + spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float_add"); + return SpvOpAtomicFAddEXT; + case nir_atomic_op_fmax: + ATOMIC_FCAP(MinMax); + spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float_min_max"); + return SpvOpAtomicFMaxEXT; + case nir_atomic_op_fmin: + ATOMIC_FCAP(MinMax); + spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float_min_max"); + return SpvOpAtomicFMinEXT; + + case nir_atomic_op_iadd: return SpvOpAtomicIAdd; - CASE_ATOMIC_OP(umin): + case nir_atomic_op_umin: return SpvOpAtomicUMin; - CASE_ATOMIC_OP(imin): + case nir_atomic_op_imin: return SpvOpAtomicSMin; - CASE_ATOMIC_OP(umax): + case nir_atomic_op_umax: return SpvOpAtomicUMax; - CASE_ATOMIC_OP(imax): + case nir_atomic_op_imax: return SpvOpAtomicSMax; - CASE_ATOMIC_OP(and): + case nir_atomic_op_iand: return SpvOpAtomicAnd; - CASE_ATOMIC_OP(or): + case nir_atomic_op_ior: return SpvOpAtomicOr; - CASE_ATOMIC_OP(xor): + case nir_atomic_op_ixor: return SpvOpAtomicXor; - CASE_ATOMIC_OP(exchange): + case nir_atomic_op_xchg: return SpvOpAtomicExchange; - CASE_ATOMIC_OP(comp_swap): + case nir_atomic_op_cmpxchg: return SpvOpAtomicCompareExchange; default: debug_printf("%s - ", nir_intrinsic_infos[op].name); @@ -230,7 +402,7 @@ get_atomic_op(nir_intrinsic_op op) } return 0; } -#undef CASE_ATOMIC_OP + static SpvId emit_float_const(struct ntv_context *ctx, int bit_size, double value) { @@ -294,10 +466,37 @@ get_uvec_type(struct ntv_context *ctx, unsigned bit_size, unsigned num_component return uint_type; } +static SpvId +get_alu_type(struct ntv_context *ctx, nir_alu_type type, unsigned num_components, unsigned bit_size) +{ + if (bit_size == 1) + return get_bvec_type(ctx, num_components); + + type = nir_alu_type_get_base_type(type); + switch (nir_alu_type_get_base_type(type)) { + case nir_type_bool: + return get_bvec_type(ctx, num_components); + + case nir_type_int: + return get_ivec_type(ctx, bit_size, num_components); + + case nir_type_uint: + return get_uvec_type(ctx, bit_size, num_components); + + case nir_type_float: + return get_fvec_type(ctx, bit_size, num_components); + + default: + unreachable("unsupported nir_alu_type"); + } +} + static SpvStorageClass get_storage_class(struct nir_variable *var) { switch (var->data.mode) { + case nir_var_function_temp: + return SpvStorageClassFunction; case nir_var_mem_push_const: return SpvStorageClassPushConstant; case nir_var_shader_in: @@ -305,7 +504,12 @@ get_storage_class(struct nir_variable *var) case nir_var_shader_out: return SpvStorageClassOutput; case nir_var_uniform: + case nir_var_image: return SpvStorageClassUniformConstant; + case nir_var_mem_ubo: + return SpvStorageClassUniform; + case nir_var_mem_ssbo: + return SpvStorageClassStorageBuffer; default: unreachable("Unsupported nir_variable_mode"); } @@ -313,10 +517,10 @@ get_storage_class(struct nir_variable *var) } static SpvId -get_dest_uvec_type(struct ntv_context *ctx, nir_dest *dest) +get_def_uvec_type(struct ntv_context *ctx, nir_def *def) { - unsigned bit_size = nir_dest_bit_size(*dest); - return get_uvec_type(ctx, bit_size, nir_dest_num_components(*dest)); + unsigned bit_size = def->bit_size; + return get_uvec_type(ctx, bit_size, def->num_components); } static SpvId @@ -346,7 +550,15 @@ get_glsl_basetype(struct ntv_context *ctx, enum glsl_base_type type) case GLSL_TYPE_UINT64: return spirv_builder_type_uint(&ctx->builder, 64); - /* TODO: handle more types */ + + case GLSL_TYPE_UINT16: + return spirv_builder_type_uint(&ctx->builder, 16); + case GLSL_TYPE_INT16: + return spirv_builder_type_int(&ctx->builder, 16); + case GLSL_TYPE_INT8: + return spirv_builder_type_int(&ctx->builder, 8); + case GLSL_TYPE_UINT8: + return spirv_builder_type_uint(&ctx->builder, 8); default: unreachable("unknown GLSL type"); @@ -413,8 +625,11 @@ get_glsl_type(struct ntv_context *ctx, const struct glsl_type *type) types[i] = get_glsl_type(ctx, glsl_get_struct_field(type, i)); ret = spirv_builder_type_struct(&ctx->builder, types, glsl_get_length(type)); - for (unsigned i = 0; i < glsl_get_length(type); i++) - spirv_builder_emit_member_offset(&ctx->builder, ret, i, glsl_get_struct_field_offset(type, i)); + for (unsigned i = 0; i < glsl_get_length(type); i++) { + int32_t offset = glsl_get_struct_field_offset(type, i); + if (offset >= 0) + spirv_builder_emit_member_offset(&ctx->builder, ret, i, offset); + } } else unreachable("Unhandled GLSL type"); @@ -423,21 +638,99 @@ get_glsl_type(struct ntv_context *ctx, const struct glsl_type *type) } static void -create_shared_block(struct ntv_context *ctx, unsigned shared_size) +create_scratch_block(struct ntv_context *ctx, unsigned scratch_size, unsigned bit_size) { - SpvId type = spirv_builder_type_uint(&ctx->builder, 32); - SpvId array = spirv_builder_type_array(&ctx->builder, type, emit_uint_const(ctx, 32, shared_size / 4)); - spirv_builder_emit_array_stride(&ctx->builder, array, 4); + unsigned idx = bit_size >> 4; + SpvId type = spirv_builder_type_uint(&ctx->builder, bit_size); + unsigned block_size = scratch_size / (bit_size / 8); + assert(block_size); + SpvId array = spirv_builder_type_array(&ctx->builder, type, emit_uint_const(ctx, 32, block_size)); + spirv_builder_emit_array_stride(&ctx->builder, array, bit_size / 8); SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassWorkgroup, + SpvStorageClassPrivate, array); - ctx->shared_block_var = spirv_builder_emit_var(&ctx->builder, ptr_type, SpvStorageClassWorkgroup); + ctx->scratch_block_var[idx] = spirv_builder_emit_var(&ctx->builder, ptr_type, SpvStorageClassPrivate); + if (ctx->spirv_1_4_interfaces) { + assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); + ctx->entry_ifaces[ctx->num_entry_ifaces++] = ctx->scratch_block_var[idx]; + } +} + +static SpvId +get_scratch_block(struct ntv_context *ctx, unsigned bit_size) +{ + unsigned idx = bit_size >> 4; + if (!ctx->scratch_block_var[idx]) + create_scratch_block(ctx, ctx->nir->scratch_size, bit_size); + return ctx->scratch_block_var[idx]; +} + +static void +create_shared_block(struct ntv_context *ctx, unsigned bit_size) +{ + unsigned idx = bit_size >> 4; + SpvId type = spirv_builder_type_uint(&ctx->builder, bit_size); + SpvId array; + + assert(gl_shader_stage_is_compute(ctx->nir->info.stage)); + if (ctx->nir->info.cs.has_variable_shared_mem) { + assert(ctx->shared_mem_size); + SpvId const_shared_size = emit_uint_const(ctx, 32, ctx->nir->info.shared_size); + SpvId shared_mem_size = spirv_builder_emit_triop(&ctx->builder, SpvOpSpecConstantOp, spirv_builder_type_uint(&ctx->builder, 32), SpvOpIAdd, const_shared_size, ctx->shared_mem_size); + shared_mem_size = spirv_builder_emit_triop(&ctx->builder, SpvOpSpecConstantOp, spirv_builder_type_uint(&ctx->builder, 32), SpvOpUDiv, shared_mem_size, emit_uint_const(ctx, 32, bit_size / 8)); + array = spirv_builder_type_array(&ctx->builder, type, shared_mem_size); + } else { + unsigned block_size = ctx->nir->info.shared_size / (bit_size / 8); + assert(block_size); + array = spirv_builder_type_array(&ctx->builder, type, emit_uint_const(ctx, 32, block_size)); + } + + ctx->shared_block_arr_type[idx] = array; + spirv_builder_emit_array_stride(&ctx->builder, array, bit_size / 8); + + /* Create wrapper struct for Block, Offset and Aliased decorations. */ + SpvId block = spirv_builder_type_struct(&ctx->builder, &array, 1); + + SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassWorkgroup, + block); + ctx->shared_block_var[idx] = spirv_builder_emit_var(&ctx->builder, ptr_type, SpvStorageClassWorkgroup); if (ctx->spirv_1_4_interfaces) { assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); - ctx->entry_ifaces[ctx->num_entry_ifaces++] = ctx->shared_block_var; + ctx->entry_ifaces[ctx->num_entry_ifaces++] = ctx->shared_block_var[idx]; + } + /* Alias our shared memory blocks */ + if (ctx->sinfo->have_workgroup_memory_explicit_layout) { + spirv_builder_emit_member_offset(&ctx->builder, block, 0, 0); + spirv_builder_emit_decoration(&ctx->builder, block, SpvDecorationBlock); + spirv_builder_emit_decoration(&ctx->builder, ctx->shared_block_var[idx], SpvDecorationAliased); } } +static SpvId +get_shared_block(struct ntv_context *ctx, unsigned bit_size) +{ + unsigned idx = bit_size >> 4; + if (!ctx->shared_block_var[idx]) + create_shared_block(ctx, bit_size); + if (ctx->sinfo->have_workgroup_memory_explicit_layout) { + spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_workgroup_memory_explicit_layout"); + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityWorkgroupMemoryExplicitLayoutKHR); + if (ctx->shared_block_var[0]) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR); + if (ctx->shared_block_var[1]) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR); + } + + SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassWorkgroup, + ctx->shared_block_arr_type[idx]); + SpvId zero = emit_uint_const(ctx, 32, 0); + + return spirv_builder_emit_access_chain(&ctx->builder, ptr_type, + ctx->shared_block_var[idx], &zero, 1); +} + #define HANDLE_EMIT_BUILTIN(SLOT, BUILTIN) \ case VARYING_SLOT_##SLOT: \ spirv_builder_emit_builtin(&ctx->builder, var_id, SpvBuiltIn##BUILTIN); \ @@ -505,7 +798,6 @@ emit_input(struct ntv_context *ctx, struct nir_variable *var) else if (ctx->stage == MESA_SHADER_FRAGMENT) { switch (var->data.location) { HANDLE_EMIT_BUILTIN(POS, FragCoord); - HANDLE_EMIT_BUILTIN(PNTC, PointCoord); HANDLE_EMIT_BUILTIN(LAYER, Layer); HANDLE_EMIT_BUILTIN(PRIMITIVE_ID, PrimitiveId); HANDLE_EMIT_BUILTIN(CLIP_DIST0, ClipDistance); @@ -521,6 +813,7 @@ emit_input(struct ntv_context *ctx, struct nir_variable *var) spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationCentroid); else if (var->data.sample) spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationSample); + emit_interpolation(ctx, var_id, var->data.interpolation); } else if (ctx->stage < MESA_SHADER_FRAGMENT) { switch (var->data.location) { HANDLE_EMIT_BUILTIN(POS, Position); @@ -550,8 +843,6 @@ emit_input(struct ntv_context *ctx, struct nir_variable *var) if (var->data.patch) spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationPatch); - emit_interpolation(ctx, var_id, var->data.interpolation); - _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); @@ -574,6 +865,11 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var) if (var->name) spirv_builder_emit_name(&ctx->builder, var_id, var->name); + if (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW) { + spirv_builder_emit_decoration(&ctx->builder, var_id, + SpvDecorationRelaxedPrecision); + } + if (ctx->stage != MESA_SHADER_FRAGMENT) { switch (var->data.location) { HANDLE_EMIT_BUILTIN(POS, Position); @@ -587,16 +883,12 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var) HANDLE_EMIT_BUILTIN(TESS_LEVEL_INNER, TessLevelInner); default: - spirv_builder_emit_location(&ctx->builder, var_id, - var->data.driver_location); - } - /* tcs can't do xfb */ - if (ctx->stage != MESA_SHADER_TESS_CTRL) { - unsigned idx = var->data.location << 2 | var->data.location_frac; - ctx->outputs[idx] = var_id; - ctx->so_output_gl_types[idx] = var->type; - ctx->so_output_types[idx] = var_type; + /* non-xfb psiz output will have location -1 */ + if (var->data.location >= 0) + spirv_builder_emit_location(&ctx->builder, var_id, + var->data.driver_location); } + emit_interpolation(ctx, var_id, var->data.interpolation); } else { if (var->data.location >= FRAG_RESULT_DATA0) { spirv_builder_emit_location(&ctx->builder, var_id, @@ -633,12 +925,10 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var) spirv_builder_emit_component(&ctx->builder, var_id, var->data.location_frac); - emit_interpolation(ctx, var_id, var->data.interpolation); - if (var->data.patch) spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationPatch); - if (var->data.explicit_xfb_buffer) { + if (var->data.explicit_xfb_buffer && ctx->nir->xfb_info) { spirv_builder_emit_offset(&ctx->builder, var_id, var->data.offset); spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, var->data.xfb.buffer); spirv_builder_emit_xfb_stride(&ctx->builder, var_id, var->data.xfb.stride); @@ -652,6 +942,41 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var) ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; } +static void +emit_shader_temp(struct ntv_context *ctx, struct nir_variable *var) +{ + SpvId var_type = get_glsl_type(ctx, var->type); + + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPrivate, + var_type); + SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, + SpvStorageClassPrivate); + if (var->name) + spirv_builder_emit_name(&ctx->builder, var_id, var->name); + + _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); + + assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); + ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; +} + +static void +emit_temp(struct ntv_context *ctx, struct nir_variable *var) +{ + SpvId var_type = get_glsl_type(ctx, var->type); + + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassFunction, + var_type); + SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, + SpvStorageClassFunction); + if (var->name) + spirv_builder_emit_name(&ctx->builder, var_id, var->name); + + _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); +} + static SpvDim type_to_dim(enum glsl_sampler_dim gdim, bool *is_ms) { @@ -674,6 +999,9 @@ type_to_dim(enum glsl_sampler_dim gdim, bool *is_ms) case GLSL_SAMPLER_DIM_MS: *is_ms = true; return SpvDim2D; + case GLSL_SAMPLER_DIM_SUBPASS_MS: + *is_ms = true; + return SpvDimSubpassData; case GLSL_SAMPLER_DIM_SUBPASS: return SpvDimSubpassData; default: @@ -800,13 +1128,12 @@ get_image_format(struct ntv_context *ctx, enum pipe_format format) return ret; } -static void -emit_image(struct ntv_context *ctx, struct nir_variable *var) +static SpvId +get_bare_image_type(struct ntv_context *ctx, struct nir_variable *var, bool is_sampler) { const struct glsl_type *type = glsl_without_array(var->type); bool is_ms; - bool is_sampler = glsl_type_is_sampler(type); if (var->data.fb_fetch_output) { spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInputAttachment); @@ -818,29 +1145,64 @@ emit_image(struct ntv_context *ctx, struct nir_variable *var) } SpvDim dimension = type_to_dim(glsl_get_sampler_dim(type), &is_ms); + if (dimension == SpvDim1D) { + if (is_sampler) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySampled1D); + else + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImage1D); + } + if (dimension == SpvDimBuffer) { + if (is_sampler) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySampledBuffer); + else + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageBuffer); + } + bool arrayed = glsl_sampler_type_is_array(type); if (dimension == SpvDimCube && arrayed) spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageCubeArray); + if (arrayed && !is_sampler && is_ms) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageMSArray); SpvId result_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type)); - SpvId image_type = spirv_builder_type_image(&ctx->builder, result_type, + return spirv_builder_type_image(&ctx->builder, result_type, dimension, false, arrayed, is_ms, is_sampler ? 1 : 2, get_image_format(ctx, var->data.image.format)); +} + +static SpvId +get_image_type(struct ntv_context *ctx, struct nir_variable *var, + bool is_sampler, bool is_buffer) +{ + SpvId image_type = get_bare_image_type(ctx, var, is_sampler); + return is_sampler && ctx->stage != MESA_SHADER_KERNEL && !is_buffer ? + spirv_builder_type_sampled_image(&ctx->builder, image_type) : + image_type; +} - SpvId var_type = is_sampler ? spirv_builder_type_sampled_image(&ctx->builder, image_type) : image_type; +static SpvId +emit_image(struct ntv_context *ctx, struct nir_variable *var, SpvId image_type) +{ + if (var->data.bindless) + return 0; + const struct glsl_type *type = glsl_without_array(var->type); + + bool is_sampler = glsl_type_is_sampler(type); + bool is_buffer = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF; + SpvId var_type = is_sampler && ctx->stage != MESA_SHADER_KERNEL && !is_buffer ? + spirv_builder_type_sampled_image(&ctx->builder, image_type) : image_type; + + bool mediump = (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW); int index = var->data.driver_location; - assert(!is_sampler || (!(ctx->samplers_used & (1 << index)))); - assert(!is_sampler || !ctx->sampler_types[index]); - assert(is_sampler || !ctx->image_types[index]); + assert(!find_image_type(ctx, var)); if (glsl_type_is_array(var->type)) { var_type = spirv_builder_type_array(&ctx->builder, var_type, emit_uint_const(ctx, 32, glsl_get_aoa_size(var->type))); spirv_builder_emit_array_stride(&ctx->builder, var_type, sizeof(void*)); - ctx->sampler_array_sizes[index] = glsl_get_aoa_size(var->type); } SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassUniformConstant, @@ -849,25 +1211,32 @@ emit_image(struct ntv_context *ctx, struct nir_variable *var) SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, SpvStorageClassUniformConstant); + if (mediump) { + spirv_builder_emit_decoration(&ctx->builder, var_id, + SpvDecorationRelaxedPrecision); + } + if (var->name) spirv_builder_emit_name(&ctx->builder, var_id, var->name); if (var->data.fb_fetch_output) spirv_builder_emit_input_attachment_index(&ctx->builder, var_id, var->data.index); + _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); if (is_sampler) { - ctx->sampler_types[index] = image_type; - ctx->samplers[index] = var_id; - ctx->samplers_used |= 1 << index; + if (var->data.descriptor_set == ctx->bindless_set_idx) { + assert(!ctx->bindless_samplers[index]); + ctx->bindless_samplers[index] = var_id; + } else { + assert(!ctx->samplers[index]); + ctx->samplers[index] = var_id; + } } else { - ctx->image_types[index] = image_type; + assert(!ctx->images[index]); ctx->images[index] = var_id; - _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); - uint32_t *key = ralloc_size(ctx->mem_ctx, sizeof(uint32_t)); - *key = var_id; - _mesa_hash_table_insert(ctx->image_vars, key, var); emit_access_decorations(ctx, var, var_id); } + _mesa_hash_table_insert(&ctx->image_types, var, (void *)(intptr_t)image_type); if (ctx->spirv_1_4_interfaces) { assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; @@ -875,6 +1244,30 @@ emit_image(struct ntv_context *ctx, struct nir_variable *var) spirv_builder_emit_descriptor_set(&ctx->builder, var_id, var->data.descriptor_set); spirv_builder_emit_binding(&ctx->builder, var_id, var->data.binding); + return var_id; +} + +static void +emit_sampler(struct ntv_context *ctx, unsigned sampler_index, unsigned desc_set) +{ + SpvId type = spirv_builder_type_sampler(&ctx->builder); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassUniformConstant, + type); + + SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, + SpvStorageClassUniformConstant); + char buf[128]; + snprintf(buf, sizeof(buf), "sampler_%u", sampler_index); + spirv_builder_emit_name(&ctx->builder, var_id, buf); + spirv_builder_emit_descriptor_set(&ctx->builder, var_id, desc_set); + spirv_builder_emit_binding(&ctx->builder, var_id, sampler_index); + ctx->cl_samplers[sampler_index] = var_id; + if (ctx->spirv_1_4_interfaces) { + assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); + ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; + } + } static SpvId @@ -887,19 +1280,22 @@ get_sized_uint_array_type(struct ntv_context *ctx, unsigned array_size, unsigned return array_type; } +/* get array<struct(array_type <--this one)> */ static SpvId -get_bo_array_type(struct ntv_context *ctx, struct nir_variable *var, unsigned bitsize) +get_bo_array_type(struct ntv_context *ctx, struct nir_variable *var) { + struct hash_entry *he = _mesa_hash_table_search(ctx->bo_array_types, var); + if (he) + return (SpvId)(uintptr_t)he->data; + unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(glsl_without_array(var->type), 0))); assert(bitsize); SpvId array_type; - const struct glsl_type *type = var->type; - if (!glsl_type_is_unsized_array(type)) { - type = glsl_get_struct_field(var->interface_type, 0); - if (!glsl_type_is_unsized_array(type)) { - uint32_t array_size = glsl_get_length(type) * (bitsize / 4); - assert(array_size); - return get_sized_uint_array_type(ctx, array_size, bitsize); - } + const struct glsl_type *type = glsl_without_array(var->type); + const struct glsl_type *first_type = glsl_get_struct_field(type, 0); + if (!glsl_type_is_unsized_array(first_type)) { + uint32_t array_size = glsl_get_length(first_type); + assert(array_size); + return get_sized_uint_array_type(ctx, array_size, bitsize); } SpvId uint_type = spirv_builder_type_uint(&ctx->builder, bitsize); array_type = spirv_builder_type_runtime_array(&ctx->builder, uint_type); @@ -907,19 +1303,25 @@ get_bo_array_type(struct ntv_context *ctx, struct nir_variable *var, unsigned bi return array_type; } +/* get array<struct(array_type) <--this one> */ static SpvId -get_bo_struct_type(struct ntv_context *ctx, struct nir_variable *var, unsigned bitsize) +get_bo_struct_type(struct ntv_context *ctx, struct nir_variable *var) { - SpvId array_type = get_bo_array_type(ctx, var, bitsize); + struct hash_entry *he = _mesa_hash_table_search(ctx->bo_struct_types, var); + if (he) + return (SpvId)(uintptr_t)he->data; + const struct glsl_type *bare_type = glsl_without_array(var->type); + unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(bare_type, 0))); + SpvId array_type = get_bo_array_type(ctx, var); + _mesa_hash_table_insert(ctx->bo_array_types, var, (void *)(uintptr_t)array_type); bool ssbo = var->data.mode == nir_var_mem_ssbo; // wrap UBO-array in a struct SpvId runtime_array = 0; - if (ssbo && glsl_get_length(var->interface_type) > 1) { - const struct glsl_type *last_member = glsl_get_struct_field(var->interface_type, glsl_get_length(var->interface_type) - 1); + if (ssbo && glsl_get_length(bare_type) > 1) { + const struct glsl_type *last_member = glsl_get_struct_field(bare_type, glsl_get_length(bare_type) - 1); if (glsl_type_is_unsized_array(last_member)) { - bool is_64bit = glsl_type_is_64bit(glsl_without_array(last_member)); - runtime_array = spirv_builder_type_runtime_array(&ctx->builder, get_uvec_type(ctx, is_64bit ? 64 : bitsize, 1)); + runtime_array = spirv_builder_type_runtime_array(&ctx->builder, get_uvec_type(ctx, bitsize, 1)); spirv_builder_emit_array_stride(&ctx->builder, runtime_array, glsl_get_explicit_stride(last_member)); } } @@ -934,36 +1336,39 @@ get_bo_struct_type(struct ntv_context *ctx, struct nir_variable *var, unsigned b spirv_builder_emit_decoration(&ctx->builder, struct_type, SpvDecorationBlock); spirv_builder_emit_member_offset(&ctx->builder, struct_type, 0, 0); - if (runtime_array) { - spirv_builder_emit_member_offset(&ctx->builder, struct_type, 1, - glsl_get_struct_field_offset(var->interface_type, - glsl_get_length(var->interface_type) - 1)); - } + if (runtime_array) + spirv_builder_emit_member_offset(&ctx->builder, struct_type, 1, 0); - return spirv_builder_type_pointer(&ctx->builder, - ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform, - struct_type); + return struct_type; } static void -emit_bo(struct ntv_context *ctx, struct nir_variable *var, unsigned force_bitsize) +emit_bo(struct ntv_context *ctx, struct nir_variable *var, bool aliased) { + unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(glsl_without_array(var->type), 0))); bool ssbo = var->data.mode == nir_var_mem_ssbo; - unsigned bitsize = force_bitsize ? force_bitsize : 32; - unsigned idx = bitsize >> 4; - assert(idx < ARRAY_SIZE(ctx->ssbos[0])); - - SpvId pointer_type = get_bo_struct_type(ctx, var, bitsize); - + SpvId struct_type = get_bo_struct_type(ctx, var); + _mesa_hash_table_insert(ctx->bo_struct_types, var, (void *)(uintptr_t)struct_type); + SpvId array_length = emit_uint_const(ctx, 32, glsl_get_length(var->type)); + SpvId array_type = spirv_builder_type_array(&ctx->builder, struct_type, array_length); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform, + array_type); SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform); if (var->name) spirv_builder_emit_name(&ctx->builder, var_id, var->name); + if (aliased) + spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationAliased); + + unsigned idx = bitsize >> 4; + assert(idx < ARRAY_SIZE(ctx->ssbos)); if (ssbo) { - assert(!ctx->ssbos[var->data.driver_location][idx]); - ctx->ssbos[var->data.driver_location][idx] = var_id; - ctx->ssbo_vars[var->data.driver_location] = var; + assert(!ctx->ssbos[idx]); + ctx->ssbos[idx] = var_id; + if (bitsize == 32) + ctx->ssbo_vars = var; } else { assert(!ctx->ubos[var->data.driver_location][idx]); ctx->ubos[var->data.driver_location][idx] = var_id; @@ -973,79 +1378,60 @@ emit_bo(struct ntv_context *ctx, struct nir_variable *var, unsigned force_bitsiz assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; } + _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); spirv_builder_emit_descriptor_set(&ctx->builder, var_id, var->data.descriptor_set); spirv_builder_emit_binding(&ctx->builder, var_id, var->data.binding); } -static void -emit_uniform(struct ntv_context *ctx, struct nir_variable *var) -{ - if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo) - emit_bo(ctx, var, 0); - else { - assert(var->data.mode == nir_var_uniform); - const struct glsl_type *type = glsl_without_array(var->type); - if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) - emit_image(ctx, var); - } -} - static SpvId get_vec_from_bit_size(struct ntv_context *ctx, uint32_t bit_size, uint32_t num_components) { if (bit_size == 1) return get_bvec_type(ctx, num_components); - if (bit_size == 8 || bit_size == 16 || bit_size == 32 || bit_size == 64) - return get_uvec_type(ctx, bit_size, num_components); - unreachable("unhandled register bit size"); - return 0; + return get_uvec_type(ctx, bit_size, num_components); } static SpvId -get_src_ssa(struct ntv_context *ctx, const nir_ssa_def *ssa) +get_src_ssa(struct ntv_context *ctx, const nir_def *ssa, nir_alu_type *atype) { assert(ssa->index < ctx->num_defs); assert(ctx->defs[ssa->index] != 0); + *atype = ctx->def_types[ssa->index]; return ctx->defs[ssa->index]; } -static SpvId -get_var_from_reg(struct ntv_context *ctx, nir_register *reg) +static void +init_reg(struct ntv_context *ctx, nir_intrinsic_instr *decl, nir_alu_type atype) { - assert(reg->index < ctx->num_regs); - assert(ctx->regs[reg->index] != 0); - return ctx->regs[reg->index]; -} + unsigned index = decl->def.index; + unsigned num_components = nir_intrinsic_num_components(decl); + unsigned bit_size = nir_intrinsic_bit_size(decl); -static SpvId -get_src_reg(struct ntv_context *ctx, const nir_reg_src *reg) -{ - assert(reg->reg); - assert(!reg->indirect); - assert(!reg->base_offset); + if (ctx->defs[index]) + return; + + SpvId type = get_alu_type(ctx, atype, num_components, bit_size); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassFunction, + type); + SpvId var = spirv_builder_emit_var(&ctx->builder, pointer_type, + SpvStorageClassFunction); - SpvId var = get_var_from_reg(ctx, reg->reg); - SpvId type = get_vec_from_bit_size(ctx, reg->reg->bit_size, reg->reg->num_components); - return spirv_builder_emit_load(&ctx->builder, type, var); + ctx->defs[index] = var; + ctx->def_types[index] = nir_alu_type_get_base_type(atype); } static SpvId -get_src(struct ntv_context *ctx, nir_src *src) +get_src(struct ntv_context *ctx, nir_src *src, nir_alu_type *atype) { - if (src->is_ssa) - return get_src_ssa(ctx, src->ssa); - else - return get_src_reg(ctx, &src->reg); + return get_src_ssa(ctx, src->ssa, atype); } static SpvId -get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src) +get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src, nir_alu_type *atype) { - assert(!alu->src[src].negate); - assert(!alu->src[src].abs); - - SpvId def = get_src(ctx, &alu->src[src].src); + SpvId def = get_src(ctx, &alu->src[src].src, atype); unsigned used_channels = 0; bool need_swizzle = false; @@ -1068,10 +1454,7 @@ get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src) return def; int bit_size = nir_src_bit_size(alu->src[src].src); - assert(bit_size == 1 || bit_size == 8 || bit_size == 16 || bit_size == 32 || bit_size == 64); - - SpvId raw_type = bit_size == 1 ? spirv_builder_type_bool(&ctx->builder) : - spirv_builder_type_uint(&ctx->builder, bit_size); + SpvId raw_type = get_alu_type(ctx, *atype, 1, bit_size); if (used_channels == 1) { uint32_t indices[] = { alu->src[src].swizzle[0] }; @@ -1111,14 +1494,6 @@ get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src) } } -static void -store_ssa_def(struct ntv_context *ctx, nir_ssa_def *ssa, SpvId result) -{ - assert(result != 0); - assert(ssa->index < ctx->num_defs); - ctx->defs[ssa->index] = result; -} - static SpvId emit_select(struct ntv_context *ctx, SpvId type, SpvId cond, SpvId if_true, SpvId if_false) @@ -1127,14 +1502,6 @@ emit_select(struct ntv_context *ctx, SpvId type, SpvId cond, } static SpvId -uvec_to_bvec(struct ntv_context *ctx, SpvId value, unsigned num_components) -{ - SpvId type = get_bvec_type(ctx, num_components); - SpvId zero = get_uvec_constant(ctx, 32, num_components, 0); - return emit_binop(ctx, SpvOpINotEqual, type, value, zero); -} - -static SpvId emit_bitcast(struct ntv_context *ctx, SpvId type, SpvId value) { return emit_unop(ctx, SpvOpBitcast, type, value); @@ -1164,50 +1531,22 @@ bitcast_to_fvec(struct ntv_context *ctx, SpvId value, unsigned bit_size, return emit_bitcast(ctx, type, value); } -static void -store_reg_def(struct ntv_context *ctx, nir_reg_dest *reg, SpvId result) +static SpvId +cast_src_to_type(struct ntv_context *ctx, SpvId value, nir_src src, nir_alu_type atype) { - SpvId var = get_var_from_reg(ctx, reg->reg); - assert(var); - spirv_builder_emit_store(&ctx->builder, var, result); + atype = nir_alu_type_get_base_type(atype); + unsigned num_components = nir_src_num_components(src); + unsigned bit_size = nir_src_bit_size(src); + return emit_bitcast(ctx, get_alu_type(ctx, atype, num_components, bit_size), value); } static void -store_dest_raw(struct ntv_context *ctx, nir_dest *dest, SpvId result) +store_def(struct ntv_context *ctx, unsigned def_index, SpvId result, nir_alu_type type) { - if (dest->is_ssa) - store_ssa_def(ctx, &dest->ssa, result); - else - store_reg_def(ctx, &dest->reg, result); -} - -static SpvId -store_dest(struct ntv_context *ctx, nir_dest *dest, SpvId result, nir_alu_type type) -{ - unsigned num_components = nir_dest_num_components(*dest); - unsigned bit_size = nir_dest_bit_size(*dest); - - if (bit_size != 1) { - switch (nir_alu_type_get_base_type(type)) { - case nir_type_bool: - assert("bool should have bit-size 1"); - break; - - case nir_type_uint: - break; /* nothing to do! */ - - case nir_type_int: - case nir_type_float: - result = bitcast_to_uvec(ctx, result, bit_size, num_components); - break; - - default: - unreachable("unsupported nir_alu_type"); - } - } - - store_dest_raw(ctx, dest, result); - return result; + assert(result != 0); + assert(def_index < ctx->num_defs); + ctx->def_types[def_index] = nir_alu_type_get_base_type(type); + ctx->defs[def_index] = result; } static SpvId @@ -1216,178 +1555,20 @@ emit_unop(struct ntv_context *ctx, SpvOp op, SpvId type, SpvId src) return spirv_builder_emit_unop(&ctx->builder, op, type, src); } -/* return the intended xfb output vec type based on base type and vector size */ -static SpvId -get_output_type(struct ntv_context *ctx, unsigned register_index, unsigned num_components) -{ - const struct glsl_type *out_type = NULL; - /* index is based on component, so we might have to go back a few slots to get to the base */ - while (!out_type) - out_type = ctx->so_output_gl_types[register_index--]; - enum glsl_base_type base_type = glsl_get_base_type(out_type); - if (base_type == GLSL_TYPE_ARRAY) - base_type = glsl_get_base_type(glsl_without_array(out_type)); - - switch (base_type) { - case GLSL_TYPE_BOOL: - return get_bvec_type(ctx, num_components); - - case GLSL_TYPE_FLOAT: - return get_fvec_type(ctx, 32, num_components); - - case GLSL_TYPE_INT: - return get_ivec_type(ctx, 32, num_components); - - case GLSL_TYPE_UINT: - return get_uvec_type(ctx, 32, num_components); - - default: - break; - } - unreachable("unknown type"); - return 0; -} - -/* for streamout create new outputs, as streamout can be done on individual components, - from complete outputs, so we just can't use the created packed outputs */ -static void -emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info, - unsigned first_so) -{ - unsigned output = 0; - for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { - struct pipe_stream_output so_output = so_info->so_info.output[i]; - unsigned slot = so_info->so_info_slots[i] << 2 | so_output.start_component; - SpvId out_type = get_output_type(ctx, slot, so_output.num_components); - SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassOutput, - out_type); - SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, - SpvStorageClassOutput); - char name[10]; - - snprintf(name, 10, "xfb%d", output); - spirv_builder_emit_name(&ctx->builder, var_id, name); - spirv_builder_emit_offset(&ctx->builder, var_id, (so_output.dst_offset * 4)); - spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, so_output.output_buffer); - spirv_builder_emit_xfb_stride(&ctx->builder, var_id, so_info->so_info.stride[so_output.output_buffer] * 4); - if (so_output.stream) - spirv_builder_emit_stream(&ctx->builder, var_id, so_output.stream); - - /* output location is incremented by VARYING_SLOT_VAR0 for non-builtins in vtn, - * so we need to ensure that the new xfb location slot doesn't conflict with any previously-emitted - * outputs. - */ - uint32_t location = first_so + i; - assert(location < VARYING_SLOT_VAR0); - spirv_builder_emit_location(&ctx->builder, var_id, location); - - /* note: gl_ClipDistance[4] can the 0-indexed member of VARYING_SLOT_CLIP_DIST1 here, - * so this is still the 0 component - */ - if (so_output.start_component) - spirv_builder_emit_component(&ctx->builder, var_id, so_output.start_component); - - uint32_t *key = ralloc_size(ctx->mem_ctx, sizeof(uint32_t)); - *key = (uint32_t)so_output.register_index << 2 | so_output.start_component; - _mesa_hash_table_insert(ctx->so_outputs, key, (void *)(intptr_t)var_id); - - assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); - ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; - output += align(so_output.num_components, 4) / 4; - } -} - -static void -emit_so_outputs(struct ntv_context *ctx, - const struct zink_so_info *so_info) -{ - for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { - uint32_t components[NIR_MAX_VEC_COMPONENTS]; - unsigned slot = so_info->so_info_slots[i]; - struct pipe_stream_output so_output = so_info->so_info.output[i]; - uint32_t so_key = (uint32_t) so_output.register_index << 2 | so_output.start_component; - uint32_t location = (uint32_t) slot << 2 | so_output.start_component; - struct hash_entry *he = _mesa_hash_table_search(ctx->so_outputs, &so_key); - assert(he); - SpvId so_output_var_id = (SpvId)(intptr_t)he->data; - - SpvId type = get_output_type(ctx, location, so_output.num_components); - SpvId output = 0; - /* index is based on component, so we might have to go back a few slots to get to the base */ - UNUSED uint32_t orig_location = location; - while (!output) - output = ctx->outputs[location--]; - location++; - SpvId output_type = ctx->so_output_types[location]; - const struct glsl_type *out_type = ctx->so_output_gl_types[location]; - - SpvId src = spirv_builder_emit_load(&ctx->builder, output_type, output); - - SpvId result; - - for (unsigned c = 0; c < so_output.num_components; c++) { - components[c] = so_output.start_component + c; - /* this is the second half of a 2 * vec4 array */ - if (slot == VARYING_SLOT_CLIP_DIST1) - components[c] += 4; - } - - /* if we're emitting a scalar or the type we're emitting matches the output's original type and we're - * emitting the same number of components, then we can skip any sort of conversion here - */ - if (glsl_type_is_scalar(out_type) || (type == output_type && glsl_get_length(out_type) == so_output.num_components)) - result = src; - else { - /* OpCompositeExtract can only extract scalars for our use here */ - if (so_output.num_components == 1) { - result = spirv_builder_emit_composite_extract(&ctx->builder, type, src, components, so_output.num_components); - } else if (glsl_type_is_vector(out_type)) { - /* OpVectorShuffle can select vector members into a differently-sized vector */ - result = spirv_builder_emit_vector_shuffle(&ctx->builder, type, - src, src, - components, so_output.num_components); - result = emit_bitcast(ctx, type, result); - } else { - /* for arrays, we need to manually extract each desired member - * and re-pack them into the desired output type - */ - for (unsigned c = 0; c < so_output.num_components; c++) { - uint32_t member[2]; - unsigned member_idx = 0; - if (glsl_type_is_matrix(out_type)) { - member_idx = 1; - member[0] = so_output.register_index; - } - member[member_idx] = so_output.start_component + c; - SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(glsl_without_array_or_matrix(out_type))); - - if (slot == VARYING_SLOT_CLIP_DIST1) - member[member_idx] += 4; - components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, member, 1 + member_idx); - } - result = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components); - } - } - - spirv_builder_emit_store(&ctx->builder, so_output_var_id, result); - } -} - static SpvId emit_atomic(struct ntv_context *ctx, SpvId op, SpvId type, SpvId src0, SpvId src1, SpvId src2) { if (op == SpvOpAtomicLoad) - return spirv_builder_emit_triop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup), + return spirv_builder_emit_triop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice), emit_uint_const(ctx, 32, 0)); if (op == SpvOpAtomicCompareExchange) - return spirv_builder_emit_hexop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup), + return spirv_builder_emit_hexop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice), emit_uint_const(ctx, 32, 0), emit_uint_const(ctx, 32, 0), /* these params are intentionally swapped */ src2, src1); - return spirv_builder_emit_quadop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup), + return spirv_builder_emit_quadop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice), emit_uint_const(ctx, 32, 0), src1); } @@ -1453,26 +1634,6 @@ get_fvec_constant(struct ntv_context *ctx, unsigned bit_size, } static SpvId -get_uvec_constant(struct ntv_context *ctx, unsigned bit_size, - unsigned num_components, uint64_t value) -{ - assert(bit_size == 32 || bit_size == 64); - - SpvId result = emit_uint_const(ctx, bit_size, value); - if (num_components == 1) - return result; - - assert(num_components > 1); - SpvId components[NIR_MAX_VEC_COMPONENTS]; - for (int i = 0; i < num_components; i++) - components[i] = result; - - SpvId type = get_uvec_type(ctx, bit_size, num_components); - return spirv_builder_const_composite(&ctx->builder, type, components, - num_components); -} - -static SpvId get_ivec_constant(struct ntv_context *ctx, unsigned bit_size, unsigned num_components, int64_t value) { @@ -1498,36 +1659,36 @@ alu_instr_src_components(const nir_alu_instr *instr, unsigned src) if (nir_op_infos[instr->op].input_sizes[src] > 0) return nir_op_infos[instr->op].input_sizes[src]; - if (instr->dest.dest.is_ssa) - return instr->dest.dest.ssa.num_components; - else - return instr->dest.dest.reg.reg->num_components; + return instr->def.num_components; } static SpvId -get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src) +get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src, SpvId *raw_value, nir_alu_type *atype) { - SpvId raw_value = get_alu_src_raw(ctx, alu, src); + *raw_value = get_alu_src_raw(ctx, alu, src, atype); unsigned num_components = alu_instr_src_components(alu, src); unsigned bit_size = nir_src_bit_size(alu->src[src].src); - nir_alu_type type = nir_op_infos[alu->op].input_types[src]; + nir_alu_type type = alu_op_is_typeless(alu->op) ? *atype : nir_op_infos[alu->op].input_types[src]; + type = nir_alu_type_get_base_type(type); + if (type == *atype) + return *raw_value; if (bit_size == 1) - return raw_value; + return *raw_value; else { switch (nir_alu_type_get_base_type(type)) { case nir_type_bool: unreachable("bool should have bit-size 1"); case nir_type_int: - return bitcast_to_ivec(ctx, raw_value, bit_size, num_components); + return bitcast_to_ivec(ctx, *raw_value, bit_size, num_components); case nir_type_uint: - return raw_value; + return bitcast_to_uvec(ctx, *raw_value, bit_size, num_components); case nir_type_float: - return bitcast_to_fvec(ctx, raw_value, bit_size, num_components); + return bitcast_to_fvec(ctx, *raw_value, bit_size, num_components); default: unreachable("unknown nir_alu_type"); @@ -1535,39 +1696,16 @@ get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src) } } -static SpvId -store_alu_result(struct ntv_context *ctx, nir_alu_instr *alu, SpvId result) +static void +store_alu_result(struct ntv_context *ctx, nir_alu_instr *alu, SpvId result, nir_alu_type atype) { - assert(!alu->dest.saturate); - return store_dest(ctx, &alu->dest.dest, result, - nir_op_infos[alu->op].output_type); + store_def(ctx, alu->def.index, result, atype); } static SpvId -get_dest_type(struct ntv_context *ctx, nir_dest *dest, nir_alu_type type) +get_def_type(struct ntv_context *ctx, nir_def *def, nir_alu_type type) { - unsigned num_components = nir_dest_num_components(*dest); - unsigned bit_size = nir_dest_bit_size(*dest); - - if (bit_size == 1) - return get_bvec_type(ctx, num_components); - - switch (nir_alu_type_get_base_type(type)) { - case nir_type_bool: - unreachable("bool should have bit-size 1"); - - case nir_type_int: - return get_ivec_type(ctx, bit_size, num_components); - - case nir_type_uint: - return get_uvec_type(ctx, bit_size, num_components); - - case nir_type_float: - return get_fvec_type(ctx, bit_size, num_components); - - default: - unreachable("unsupported nir_alu_type"); - } + return get_alu_type(ctx, type, def->num_components, def->bit_size); } static bool @@ -1588,14 +1726,66 @@ needs_derivative_control(nir_alu_instr *alu) static void emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) { + bool is_bcsel = alu->op == nir_op_bcsel; + nir_alu_type stype[NIR_MAX_VEC_COMPONENTS] = {0}; SpvId src[NIR_MAX_VEC_COMPONENTS]; + SpvId raw_src[NIR_MAX_VEC_COMPONENTS]; for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) - src[i] = get_alu_src(ctx, alu, i); + src[i] = get_alu_src(ctx, alu, i, &raw_src[i], &stype[i]); + + nir_alu_type typeless_type = stype[is_bcsel]; + if (nir_op_infos[alu->op].num_inputs > 1 && + alu_op_is_typeless(alu->op) && + nir_src_bit_size(alu->src[is_bcsel].src) != 1) { + unsigned uint_count = 0; + unsigned int_count = 0; + unsigned float_count = 0; + for (unsigned i = is_bcsel; i < nir_op_infos[alu->op].num_inputs; i++) { + if (stype[i] == nir_type_bool) + break; + switch (stype[i]) { + case nir_type_uint: + uint_count++; + break; + case nir_type_int: + int_count++; + break; + case nir_type_float: + float_count++; + break; + default: + unreachable("this shouldn't happen"); + } + } + if (uint_count > int_count && uint_count > float_count) + typeless_type = nir_type_uint; + else if (int_count > uint_count && int_count > float_count) + typeless_type = nir_type_int; + else if (float_count > uint_count && float_count > int_count) + typeless_type = nir_type_float; + else if (float_count == uint_count || uint_count == int_count) + typeless_type = nir_type_uint; + else if (float_count == int_count) + typeless_type = nir_type_float; + else + typeless_type = nir_type_uint; + assert(typeless_type != nir_type_bool); + for (unsigned i = is_bcsel; i < nir_op_infos[alu->op].num_inputs; i++) { + unsigned num_components = alu_instr_src_components(alu, i); + unsigned bit_size = nir_src_bit_size(alu->src[i].src); + SpvId type = get_alu_type(ctx, typeless_type, num_components, bit_size); + if (stype[i] != typeless_type) { + src[i] = emit_bitcast(ctx, type, src[i]); + } + } + } - SpvId dest_type = get_dest_type(ctx, &alu->dest.dest, - nir_op_infos[alu->op].output_type); - unsigned bit_size = nir_dest_bit_size(alu->dest.dest); - unsigned num_components = nir_dest_num_components(alu->dest.dest); + unsigned bit_size = alu->def.bit_size; + unsigned num_components = alu->def.num_components; + nir_alu_type atype = bit_size == 1 ? + nir_type_bool : + (alu_op_is_typeless(alu->op) ? typeless_type : nir_op_infos[alu->op].output_type); + SpvId dest_type = get_def_type(ctx, &alu->def, atype); if (needs_derivative_control(alu)) spirv_builder_emit_cap(&ctx->builder, SpvCapabilityDerivativeControl); @@ -1621,6 +1811,8 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) UNOP(nir_op_fddy, SpvOpDPdy) UNOP(nir_op_fddy_coarse, SpvOpDPdyCoarse) UNOP(nir_op_fddy_fine, SpvOpDPdyFine) + UNOP(nir_op_f2i8, SpvOpConvertFToS) + UNOP(nir_op_f2u8, SpvOpConvertFToU) UNOP(nir_op_f2i16, SpvOpConvertFToS) UNOP(nir_op_f2u16, SpvOpConvertFToU) UNOP(nir_op_f2i32, SpvOpConvertFToS) @@ -1629,6 +1821,7 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) UNOP(nir_op_i2f32, SpvOpConvertSToF) UNOP(nir_op_u2f16, SpvOpConvertUToF) UNOP(nir_op_u2f32, SpvOpConvertUToF) + UNOP(nir_op_i2i8, SpvOpSConvert) UNOP(nir_op_i2i16, SpvOpSConvert) UNOP(nir_op_i2i32, SpvOpSConvert) UNOP(nir_op_u2u8, SpvOpUConvert) @@ -1647,6 +1840,12 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) UNOP(nir_op_bit_count, SpvOpBitCount) #undef UNOP + case nir_op_f2f16_rtz: + assert(nir_op_infos[alu->op].num_inputs == 1); + result = emit_unop(ctx, SpvOpFConvert, dest_type, src[0]); + spirv_builder_emit_rounding_mode(&ctx->builder, result, SpvFPRoundingModeRTZ); + break; + case nir_op_inot: if (bit_size == 1) result = emit_unop(ctx, SpvOpLogicalNot, dest_type, src[0]); @@ -1654,6 +1853,7 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) result = emit_unop(ctx, SpvOpNot, dest_type, src[0]); break; + case nir_op_b2i8: case nir_op_b2i16: case nir_op_b2i32: case nir_op_b2i64: @@ -1672,12 +1872,25 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) get_fvec_constant(ctx, bit_size, num_components, 0)); break; + case nir_op_uclz: + assert(nir_op_infos[alu->op].num_inputs == 1); + result = emit_unop(ctx, SpvOpUCountLeadingZerosINTEL, dest_type, src[0]); + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityIntegerFunctions2INTEL); + spirv_builder_emit_extension(&ctx->builder, "SPV_INTEL_shader_integer_functions2"); + break; #define BUILTIN_UNOP(nir_op, spirv_op) \ case nir_op: \ assert(nir_op_infos[alu->op].num_inputs == 1); \ result = emit_builtin_unop(ctx, spirv_op, dest_type, src[0]); \ break; +#define BUILTIN_UNOPF(nir_op, spirv_op) \ + case nir_op: \ + assert(nir_op_infos[alu->op].num_inputs == 1); \ + result = emit_builtin_unop(ctx, spirv_op, get_def_type(ctx, &alu->def, nir_type_float), src[0]); \ + atype = nir_type_float; \ + break; + BUILTIN_UNOP(nir_op_iabs, GLSLstd450SAbs) BUILTIN_UNOP(nir_op_fabs, GLSLstd450FAbs) BUILTIN_UNOP(nir_op_fsqrt, GLSLstd450Sqrt) @@ -1696,31 +1909,27 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) BUILTIN_UNOP(nir_op_ufind_msb, GLSLstd450FindUMsb) BUILTIN_UNOP(nir_op_find_lsb, GLSLstd450FindILsb) BUILTIN_UNOP(nir_op_ifind_msb, GLSLstd450FindSMsb) - BUILTIN_UNOP(nir_op_pack_half_2x16, GLSLstd450PackHalf2x16) - BUILTIN_UNOP(nir_op_unpack_half_2x16, GLSLstd450UnpackHalf2x16) - BUILTIN_UNOP(nir_op_pack_64_2x32, GLSLstd450PackDouble2x32) -#undef BUILTIN_UNOP - case nir_op_frcp: + case nir_op_pack_half_2x16: assert(nir_op_infos[alu->op].num_inputs == 1); - result = emit_binop(ctx, SpvOpFDiv, dest_type, - get_fvec_constant(ctx, bit_size, num_components, 1), - src[0]); + result = emit_builtin_unop(ctx, GLSLstd450PackHalf2x16, get_def_type(ctx, &alu->def, nir_type_uint), src[0]); break; - case nir_op_f2b1: + case nir_op_unpack_64_2x32: assert(nir_op_infos[alu->op].num_inputs == 1); - result = emit_binop(ctx, SpvOpFOrdNotEqual, dest_type, src[0], - get_fvec_constant(ctx, - nir_src_bit_size(alu->src[0].src), - num_components, 0)); + result = emit_builtin_unop(ctx, GLSLstd450UnpackDouble2x32, get_def_type(ctx, &alu->def, nir_type_uint), src[0]); break; - case nir_op_i2b1: + + BUILTIN_UNOPF(nir_op_unpack_half_2x16, GLSLstd450UnpackHalf2x16) + BUILTIN_UNOPF(nir_op_pack_64_2x32, GLSLstd450PackDouble2x32) +#undef BUILTIN_UNOP +#undef BUILTIN_UNOPF + + case nir_op_frcp: assert(nir_op_infos[alu->op].num_inputs == 1); - result = emit_binop(ctx, SpvOpINotEqual, dest_type, src[0], - get_ivec_constant(ctx, - nir_src_bit_size(alu->src[0].src), - num_components, 0)); + result = emit_binop(ctx, SpvOpFDiv, dest_type, + get_fvec_constant(ctx, bit_size, num_components, 1), + src[0]); break; @@ -1736,6 +1945,8 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) BINOP(nir_op_idiv, SpvOpSDiv) BINOP(nir_op_udiv, SpvOpUDiv) BINOP(nir_op_umod, SpvOpUMod) + BINOP(nir_op_imod, SpvOpSMod) + BINOP(nir_op_irem, SpvOpSRem) BINOP(nir_op_fadd, SpvOpFAdd) BINOP(nir_op_fsub, SpvOpFSub) BINOP(nir_op_fmul, SpvOpFMul) @@ -1747,12 +1958,6 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) BINOP(nir_op_uge, SpvOpUGreaterThanEqual) BINOP(nir_op_flt, SpvOpFOrdLessThan) BINOP(nir_op_fge, SpvOpFOrdGreaterThanEqual) - BINOP(nir_op_feq, SpvOpFOrdEqual) - BINOP(nir_op_fneu, SpvOpFUnordNotEqual) - BINOP(nir_op_ishl, SpvOpShiftLeftLogical) - BINOP(nir_op_ishr, SpvOpShiftRightArithmetic) - BINOP(nir_op_ushr, SpvOpShiftRightLogical) - BINOP(nir_op_ixor, SpvOpBitwiseXor) BINOP(nir_op_frem, SpvOpFRem) #undef BINOP @@ -1769,8 +1974,26 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) BINOP_LOG(nir_op_ior, SpvOpBitwiseOr, SpvOpLogicalOr) BINOP_LOG(nir_op_ieq, SpvOpIEqual, SpvOpLogicalEqual) BINOP_LOG(nir_op_ine, SpvOpINotEqual, SpvOpLogicalNotEqual) + BINOP_LOG(nir_op_ixor, SpvOpBitwiseXor, SpvOpLogicalNotEqual) #undef BINOP_LOG +#define BINOP_SHIFT(nir_op, spirv_op) \ + case nir_op: { \ + assert(nir_op_infos[alu->op].num_inputs == 2); \ + int shift_bit_size = nir_src_bit_size(alu->src[1].src); \ + nir_alu_type shift_nir_type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[1]); \ + SpvId shift_type = get_alu_type(ctx, shift_nir_type, num_components, shift_bit_size); \ + SpvId shift_mask = get_ivec_constant(ctx, shift_bit_size, num_components, bit_size - 1); \ + SpvId shift_count = emit_binop(ctx, SpvOpBitwiseAnd, shift_type, src[1], shift_mask); \ + result = emit_binop(ctx, spirv_op, dest_type, src[0], shift_count); \ + break; \ + } + + BINOP_SHIFT(nir_op_ishl, SpvOpShiftLeftLogical) + BINOP_SHIFT(nir_op_ishr, SpvOpShiftRightArithmetic) + BINOP_SHIFT(nir_op_ushr, SpvOpShiftRightLogical) +#undef BINOP_SHIFT + #define BUILTIN_BINOP(nir_op, spirv_op) \ case nir_op: \ assert(nir_op_infos[alu->op].num_inputs == 2); \ @@ -1783,8 +2006,31 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) BUILTIN_BINOP(nir_op_imax, GLSLstd450SMax) BUILTIN_BINOP(nir_op_umin, GLSLstd450UMin) BUILTIN_BINOP(nir_op_umax, GLSLstd450UMax) + BUILTIN_BINOP(nir_op_ldexp, GLSLstd450Ldexp) #undef BUILTIN_BINOP +#define INTEL_BINOP(nir_op, spirv_op) \ + case nir_op: \ + assert(nir_op_infos[alu->op].num_inputs == 2); \ + result = emit_binop(ctx, spirv_op, dest_type, src[0], src[1]); \ + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityIntegerFunctions2INTEL); \ + spirv_builder_emit_extension(&ctx->builder, "SPV_INTEL_shader_integer_functions2"); \ + break; + + INTEL_BINOP(nir_op_uabs_isub, SpvOpAbsISubINTEL) + INTEL_BINOP(nir_op_uabs_usub, SpvOpAbsUSubINTEL) + INTEL_BINOP(nir_op_iadd_sat, SpvOpIAddSatINTEL) + INTEL_BINOP(nir_op_uadd_sat, SpvOpUAddSatINTEL) + INTEL_BINOP(nir_op_ihadd, SpvOpIAverageINTEL) + INTEL_BINOP(nir_op_uhadd, SpvOpUAverageINTEL) + INTEL_BINOP(nir_op_irhadd, SpvOpIAverageRoundedINTEL) + INTEL_BINOP(nir_op_urhadd, SpvOpUAverageRoundedINTEL) + INTEL_BINOP(nir_op_isub_sat, SpvOpISubSatINTEL) + INTEL_BINOP(nir_op_usub_sat, SpvOpUSubSatINTEL) + INTEL_BINOP(nir_op_imul_32x16, SpvOpIMul32x16INTEL) + INTEL_BINOP(nir_op_umul_32x16, SpvOpUMul32x16INTEL) +#undef INTEL_BINOP + case nir_op_fdot2: case nir_op_fdot3: case nir_op_fdot4: @@ -1799,6 +2045,23 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) case nir_op_sge: unreachable("should already be lowered away"); + case nir_op_fneu: + assert(nir_op_infos[alu->op].num_inputs == 2); + if (raw_src[0] == raw_src[1]) + result = emit_unop(ctx, SpvOpIsNan, dest_type, src[0]); + else + result = emit_binop(ctx, SpvOpFUnordNotEqual, dest_type, src[0], src[1]); + break; + + case nir_op_feq: + assert(nir_op_infos[alu->op].num_inputs == 2); + if (raw_src[0] == raw_src[1]) + result = emit_unop(ctx, SpvOpLogicalNot, dest_type, + emit_unop(ctx, SpvOpIsNan, dest_type, src[0])); + else + result = emit_binop(ctx, SpvOpFOrdEqual, dest_type, src[0], src[1]); + break; + case nir_op_flrp: assert(nir_op_infos[alu->op].num_inputs == 3); result = emit_builtin_triop(ctx, GLSLstd450FMix, dest_type, @@ -1841,6 +2104,84 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) result = spirv_builder_emit_quadop(&ctx->builder, SpvOpBitFieldInsert, dest_type, src[0], src[1], src[2], src[3]); break; + /* those are all simple bitcasts, we could do better, but it doesn't matter */ + case nir_op_pack_32_4x8: + case nir_op_pack_32_2x16: + case nir_op_pack_64_4x16: + case nir_op_unpack_32_4x8: + case nir_op_unpack_32_2x16: + case nir_op_unpack_64_4x16: { + result = emit_bitcast(ctx, dest_type, src[0]); + break; + } + + case nir_op_pack_32_2x16_split: + case nir_op_pack_64_2x32_split: { + nir_alu_type type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[0]); + if (num_components <= 2) { + SpvId components[] = {src[0], src[1]}; + SpvId vec_type = get_alu_type(ctx, type, num_components * 2, nir_src_bit_size(alu->src[0].src)); + result = spirv_builder_emit_composite_construct(&ctx->builder, vec_type, components, 2); + result = emit_bitcast(ctx, dest_type, result); + } else { + SpvId components[NIR_MAX_VEC_COMPONENTS]; + SpvId conv_type = get_alu_type(ctx, type, 1, nir_src_bit_size(alu->src[0].src)); + SpvId vec_type = get_alu_type(ctx, type, 2, nir_src_bit_size(alu->src[0].src)); + SpvId dest_scalar_type = get_alu_type(ctx, nir_op_infos[alu->op].output_type, 1, bit_size); + for (unsigned i = 0; i < nir_src_num_components(alu->src[0].src); i++) { + SpvId conv[2]; + conv[0] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, src[0], &i, 1); + conv[1] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, src[1], &i, 1); + SpvId vec = spirv_builder_emit_composite_construct(&ctx->builder, vec_type, conv, 2); + components[i] = emit_bitcast(ctx, dest_scalar_type, vec); + } + result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, components, num_components); + } + break; + } + + case nir_op_unpack_32_2x16_split_x: + case nir_op_unpack_64_2x32_split_x: { + nir_alu_type type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[0]); + SpvId vec_type = get_alu_type(ctx, type, 2, bit_size); + unsigned idx = 0; + if (num_components == 1) { + SpvId vec = emit_bitcast(ctx, vec_type, src[0]); + result = spirv_builder_emit_composite_extract(&ctx->builder, dest_type, vec, &idx, 1); + } else { + SpvId components[NIR_MAX_VEC_COMPONENTS]; + for (unsigned i = 0; i < nir_src_num_components(alu->src[0].src); i++) { + SpvId conv = spirv_builder_emit_composite_extract(&ctx->builder, get_alu_type(ctx, type, 1, nir_src_bit_size(alu->src[0].src)), src[0], &i, 1); + conv = emit_bitcast(ctx, vec_type, conv); + SpvId conv_type = get_alu_type(ctx, type, 1, bit_size); + components[i] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, conv, &idx, 1); + } + result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, components, num_components); + } + break; + } + + case nir_op_unpack_32_2x16_split_y: + case nir_op_unpack_64_2x32_split_y: { + nir_alu_type type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[0]); + SpvId vec_type = get_alu_type(ctx, type, 2, bit_size); + unsigned idx = 1; + if (num_components == 1) { + SpvId vec = emit_bitcast(ctx, vec_type, src[0]); + result = spirv_builder_emit_composite_extract(&ctx->builder, dest_type, vec, &idx, 1); + } else { + SpvId components[NIR_MAX_VEC_COMPONENTS]; + for (unsigned i = 0; i < nir_src_num_components(alu->src[0].src); i++) { + SpvId conv = spirv_builder_emit_composite_extract(&ctx->builder, get_alu_type(ctx, type, 1, nir_src_bit_size(alu->src[0].src)), src[0], &i, 1); + conv = emit_bitcast(ctx, vec_type, conv); + SpvId conv_type = get_alu_type(ctx, type, 1, bit_size); + components[i] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, conv, &idx, 1); + } + result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, components, num_components); + } + break; + } + default: fprintf(stderr, "emit_alu: not implemented (%s)\n", nir_op_infos[alu->op].name); @@ -1851,7 +2192,7 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) if (alu->exact) spirv_builder_emit_decoration(&ctx->builder, result, SpvDecorationNoContraction); - store_alu_result(ctx, alu, result); + store_alu_result(ctx, alu, result, atype); } static void @@ -1861,273 +2202,99 @@ emit_load_const(struct ntv_context *ctx, nir_load_const_instr *load_const) unsigned num_components = load_const->def.num_components; SpvId components[NIR_MAX_VEC_COMPONENTS]; + nir_alu_type atype; if (bit_size == 1) { + atype = nir_type_bool; for (int i = 0; i < num_components; i++) components[i] = spirv_builder_const_bool(&ctx->builder, load_const->value[i].b); } else { + atype = infer_nir_alu_type_from_uses_ssa(&load_const->def); for (int i = 0; i < num_components; i++) { - uint64_t tmp = nir_const_value_as_uint(load_const->value[i], - bit_size); - components[i] = emit_uint_const(ctx, bit_size, tmp); + switch (atype) { + case nir_type_uint: { + uint64_t tmp = nir_const_value_as_uint(load_const->value[i], bit_size); + components[i] = emit_uint_const(ctx, bit_size, tmp); + break; + } + case nir_type_int: { + int64_t tmp = nir_const_value_as_int(load_const->value[i], bit_size); + components[i] = emit_int_const(ctx, bit_size, tmp); + break; + } + case nir_type_float: { + double tmp = nir_const_value_as_float(load_const->value[i], bit_size); + components[i] = emit_float_const(ctx, bit_size, tmp); + break; + } + default: + unreachable("this shouldn't happen!"); + } } } if (num_components > 1) { - SpvId type = get_vec_from_bit_size(ctx, bit_size, - num_components); + SpvId type = get_alu_type(ctx, atype, num_components, bit_size); SpvId value = spirv_builder_const_composite(&ctx->builder, type, components, num_components); - store_ssa_def(ctx, &load_const->def, value); + store_def(ctx, load_const->def.index, value, atype); } else { assert(num_components == 1); - store_ssa_def(ctx, &load_const->def, components[0]); - } -} - -static void -emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr) -{ - nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]); - bool ssbo = intr->intrinsic == nir_intrinsic_load_ssbo; - assert(const_block_index); // no dynamic indexing for now - - unsigned idx = 0; - unsigned bit_size = nir_dest_bit_size(intr->dest); - idx = MIN2(bit_size, 32) >> 4; - if (ssbo) { - assert(idx < ARRAY_SIZE(ctx->ssbos[0])); - if (!ctx->ssbos[const_block_index->u32][idx]) - emit_bo(ctx, ctx->ssbo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest)); - } else { - assert(idx < ARRAY_SIZE(ctx->ubos[0])); - if (!ctx->ubos[const_block_index->u32][idx]) - emit_bo(ctx, ctx->ubo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest)); - } - SpvId bo = ssbo ? ctx->ssbos[const_block_index->u32][idx] : ctx->ubos[const_block_index->u32][idx]; - SpvId uint_type = get_uvec_type(ctx, MIN2(bit_size, 32), 1); - SpvId one = emit_uint_const(ctx, 32, 1); - - /* number of components being loaded */ - unsigned num_components = nir_dest_num_components(intr->dest); - /* we need to grab 2x32 to fill the 64bit value */ - if (bit_size == 64) - num_components *= 2; - SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2]; - SpvId result; - - /* destination type for the load */ - SpvId type = get_dest_uvec_type(ctx, &intr->dest); - /* an id of an array member in bytes */ - SpvId uint_size = emit_uint_const(ctx, 32, MIN2(bit_size, 32) / 8); - - /* we grab a single array member at a time, so it's a pointer to a uint */ - SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, - ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform, - uint_type); - - /* our generated uniform has a memory layout like - * - * struct { - * uint base[array_size]; - * }; - * - * where 'array_size' is set as though every member of the ubo takes up a vec4, - * even if it's only a vec2 or a float. - * - * first, access 'base' - */ - SpvId member = emit_uint_const(ctx, 32, 0); - /* this is the offset (in bytes) that we're accessing: - * it may be a const value or it may be dynamic in the shader - */ - SpvId offset = get_src(ctx, &intr->src[1]); - /* calculate the byte offset in the array */ - SpvId vec_offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size); - /* OpAccessChain takes an array of indices that drill into a hierarchy based on the type: - * index 0 is accessing 'base' - * index 1 is accessing 'base[index 1]' - * - * we must perform the access this way in case src[1] is dynamic because there's - * no other spirv method for using an id to access a member of a composite, as - * (composite|vector)_extract both take literals - */ - for (unsigned i = 0; i < num_components; i++) { - SpvId indices[2] = { member, vec_offset }; - SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, - bo, indices, - ARRAY_SIZE(indices)); - /* load a single value into the constituents array */ - if (ssbo && nir_intrinsic_access(intr) & ACCESS_COHERENT) - constituents[i] = emit_atomic(ctx, SpvOpAtomicLoad, uint_type, ptr, 0, 0); - else - constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, ptr); - /* increment to the next member index for the next load */ - vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one); - } - - /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values - * by creating uvec2 composites and bitcasting them to u64 values - */ - if (bit_size == 64) { - num_components /= 2; - type = get_uvec_type(ctx, 64, num_components); - SpvId u64_type = get_uvec_type(ctx, 64, 1); - for (unsigned i = 0; i < num_components; i++) { - constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2); - constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]); - } - } - /* if loading more than 1 value, reassemble the results into the desired type, - * otherwise just use the loaded result - */ - if (num_components > 1) { - result = spirv_builder_emit_composite_construct(&ctx->builder, - type, - constituents, - num_components); - } else - result = constituents[0]; - - /* explicitly convert to a bool vector if the destination type is a bool */ - if (nir_dest_bit_size(intr->dest) == 1) - result = uvec_to_bvec(ctx, result, num_components); - - store_dest(ctx, &intr->dest, result, nir_type_uint); -} - -static void -emit_store_ssbo(struct ntv_context *ctx, nir_intrinsic_instr *intr) -{ - /* TODO: would be great to refactor this in with emit_load_bo() */ - - nir_const_value *const_block_index = nir_src_as_const_value(intr->src[1]); - assert(const_block_index); - - unsigned idx = MIN2(nir_src_bit_size(intr->src[0]), 32) >> 4; - assert(idx < ARRAY_SIZE(ctx->ssbos[0])); - if (!ctx->ssbos[const_block_index->u32][idx]) - emit_bo(ctx, ctx->ssbo_vars[const_block_index->u32], nir_src_bit_size(intr->src[0])); - SpvId bo = ctx->ssbos[const_block_index->u32][idx]; - - unsigned bit_size = nir_src_bit_size(intr->src[0]); - SpvId uint_type = get_uvec_type(ctx, 32, 1); - SpvId one = emit_uint_const(ctx, 32, 1); - - /* number of components being stored */ - unsigned wrmask = nir_intrinsic_write_mask(intr); - unsigned num_components = util_bitcount(wrmask); - - /* we need to grab 2x32 to fill the 64bit value */ - bool is_64bit = bit_size == 64; - - /* an id of an array member in bytes */ - SpvId uint_size = emit_uint_const(ctx, 32, MIN2(bit_size, 32) / 8); - /* we grab a single array member at a time, so it's a pointer to a uint */ - SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassStorageBuffer, - get_uvec_type(ctx, MIN2(bit_size, 32), 1)); - - /* our generated uniform has a memory layout like - * - * struct { - * uint base[array_size]; - * }; - * - * where 'array_size' is set as though every member of the ubo takes up a vec4, - * even if it's only a vec2 or a float. - * - * first, access 'base' - */ - SpvId member = emit_uint_const(ctx, 32, 0); - /* this is the offset (in bytes) that we're accessing: - * it may be a const value or it may be dynamic in the shader - */ - SpvId offset = get_src(ctx, &intr->src[2]); - /* calculate byte offset */ - SpvId vec_offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size); - - SpvId value = get_src(ctx, &intr->src[0]); - /* OpAccessChain takes an array of indices that drill into a hierarchy based on the type: - * index 0 is accessing 'base' - * index 1 is accessing 'base[index 1]' - * index 2 is accessing 'base[index 1][index 2]' - * - * we must perform the access this way in case src[1] is dynamic because there's - * no other spirv method for using an id to access a member of a composite, as - * (composite|vector)_extract both take literals - */ - unsigned write_count = 0; - SpvId src_base_type = get_uvec_type(ctx, bit_size, 1); - for (unsigned i = 0; write_count < num_components; i++) { - if (wrmask & (1 << i)) { - SpvId component = nir_src_num_components(intr->src[0]) > 1 ? - spirv_builder_emit_composite_extract(&ctx->builder, src_base_type, value, &i, 1) : - value; - SpvId component_split; - if (is_64bit) - component_split = emit_bitcast(ctx, get_uvec_type(ctx, 32, 2), component); - for (unsigned j = 0; j < 1 + !!is_64bit; j++) { - if (j) - vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one); - SpvId indices[] = { member, vec_offset }; - SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, - bo, indices, - ARRAY_SIZE(indices)); - if (is_64bit) - component = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, component_split, &j, 1); - if (nir_intrinsic_access(intr) & ACCESS_COHERENT) - spirv_builder_emit_atomic_store(&ctx->builder, ptr, SpvScopeWorkgroup, 0, component); - else - spirv_builder_emit_store(&ctx->builder, ptr, component); - } - write_count++; - } else if (is_64bit) - /* we're doing 32bit stores here, so we need to increment correctly here */ - vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one); - - /* increment to the next vec4 member index for the next store */ - vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one); + store_def(ctx, load_const->def.index, components[0], atype); } } static void emit_discard(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - assert(ctx->block_started); - spirv_builder_emit_kill(&ctx->builder); - /* discard is weird in NIR, so let's just create an unreachable block after - it and hope that the vulkan driver will DCE any instructinos in it. */ - spirv_builder_label(&ctx->builder, spirv_builder_new_id(&ctx->builder)); + assert(ctx->discard_func); + SpvId type_void = spirv_builder_type_void(&ctx->builder); + spirv_builder_function_call(&ctx->builder, type_void, + ctx->discard_func, NULL, 0); } static void emit_load_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId ptr = get_src(ctx, intr->src); + nir_alu_type atype; + SpvId ptr = get_src(ctx, intr->src, &atype); + + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + SpvId type; + if (glsl_type_is_image(deref->type)) { + nir_variable *var = nir_deref_instr_get_variable(deref); + const struct glsl_type *gtype = glsl_without_array(var->type); + type = get_image_type(ctx, var, + glsl_type_is_sampler(gtype), + glsl_get_sampler_dim(gtype) == GLSL_SAMPLER_DIM_BUF); + atype = nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(gtype)); + } else { + type = get_glsl_type(ctx, deref->type); + atype = get_nir_alu_type(deref->type); + } + SpvId result; - SpvId result = spirv_builder_emit_load(&ctx->builder, - get_glsl_type(ctx, nir_src_as_deref(intr->src[0])->type), - ptr); - unsigned num_components = nir_dest_num_components(intr->dest); - unsigned bit_size = nir_dest_bit_size(intr->dest); - result = bitcast_to_uvec(ctx, result, bit_size, num_components); - store_dest(ctx, &intr->dest, result, nir_type_uint); + if (nir_intrinsic_access(intr) & ACCESS_COHERENT) + result = emit_atomic(ctx, SpvOpAtomicLoad, type, ptr, 0, 0); + else + result = spirv_builder_emit_load(&ctx->builder, type, ptr); + store_def(ctx, intr->def.index, result, atype); } static void emit_store_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId ptr = get_src(ctx, &intr->src[0]); - SpvId src = get_src(ctx, &intr->src[1]); + nir_alu_type ptype, stype; + SpvId ptr = get_src(ctx, &intr->src[0], &ptype); + SpvId src = get_src(ctx, &intr->src[1], &stype); const struct glsl_type *gtype = nir_src_as_deref(intr->src[0])->type; SpvId type = get_glsl_type(ctx, gtype); - nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); - unsigned num_writes = util_bitcount(nir_intrinsic_write_mask(intr)); + nir_variable *var = nir_intrinsic_get_var(intr, 0); unsigned wrmask = nir_intrinsic_write_mask(intr); - if (num_writes && num_writes != intr->num_components) { + if (!glsl_type_is_scalar(gtype) && + wrmask != BITFIELD_MASK(glsl_type_is_array(gtype) ? glsl_get_aoa_size(gtype) : glsl_get_vector_elements(gtype))) { /* no idea what we do if this fails */ assert(glsl_type_is_array(gtype) || glsl_type_is_vector(gtype)); @@ -2136,17 +2303,18 @@ emit_store_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvId member_type; if (glsl_type_is_vector(gtype)) { result_type = get_glsl_basetype(ctx, glsl_get_base_type(gtype)); - member_type = get_uvec_type(ctx, 32, 1); + member_type = get_alu_type(ctx, stype, 1, glsl_get_bit_size(gtype)); } else member_type = result_type = get_glsl_type(ctx, glsl_get_array_element(gtype)); SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassOutput, + get_storage_class(var), result_type); for (unsigned i = 0; i < 4; i++) - if ((wrmask >> i) & 1) { + if (wrmask & BITFIELD_BIT(i)) { SpvId idx = emit_uint_const(ctx, 32, i); SpvId val = spirv_builder_emit_composite_extract(&ctx->builder, member_type, src, &i, 1); - val = emit_bitcast(ctx, result_type, val); + if (stype != ptype) + val = emit_bitcast(ctx, result_type, val); SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, ptr, &idx, 1); spirv_builder_emit_store(&ctx->builder, member, val); @@ -2155,99 +2323,161 @@ emit_store_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr) } SpvId result; - if (ctx->stage == MESA_SHADER_FRAGMENT && var->data.location == FRAG_RESULT_SAMPLE_MASK) { + if (ctx->stage == MESA_SHADER_FRAGMENT && + var->data.mode == nir_var_shader_out && + var->data.location == FRAG_RESULT_SAMPLE_MASK) { src = emit_bitcast(ctx, type, src); /* SampleMask is always an array in spirv, so we need to construct it into one */ result = spirv_builder_emit_composite_construct(&ctx->builder, ctx->sample_mask_type, &src, 1); - } else - result = emit_bitcast(ctx, type, src); - spirv_builder_emit_store(&ctx->builder, ptr, result); + } else { + if (ptype == stype) + result = src; + else + result = emit_bitcast(ctx, type, src); + } + if (nir_intrinsic_access(intr) & ACCESS_COHERENT) + spirv_builder_emit_atomic_store(&ctx->builder, ptr, SpvScopeDevice, 0, result); + else + spirv_builder_emit_store(&ctx->builder, ptr, result); } static void emit_load_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint); - unsigned num_components = nir_dest_num_components(intr->dest); - unsigned bit_size = nir_dest_bit_size(intr->dest); - bool qword = bit_size == 64; - SpvId uint_type = get_uvec_type(ctx, 32, 1); + SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint); + unsigned num_components = intr->def.num_components; + unsigned bit_size = intr->def.bit_size; + SpvId uint_type = get_uvec_type(ctx, bit_size, 1); SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassWorkgroup, uint_type); - SpvId offset = emit_binop(ctx, SpvOpUDiv, uint_type, get_src(ctx, &intr->src[0]), emit_uint_const(ctx, 32, 4)); + nir_alu_type atype; + SpvId offset = get_src(ctx, &intr->src[0], &atype); + if (atype == nir_type_float) + offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[0]), 1); SpvId constituents[NIR_MAX_VEC_COMPONENTS]; + SpvId shared_block = get_shared_block(ctx, bit_size); /* need to convert array -> vec */ for (unsigned i = 0; i < num_components; i++) { - SpvId parts[2]; - for (unsigned j = 0; j < 1 + !!qword; j++) { - SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, - ctx->shared_block_var, &offset, 1); - parts[j] = spirv_builder_emit_load(&ctx->builder, uint_type, member); - offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, 1)); - } - if (qword) - constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 64, 1), parts, 2); - else - constituents[i] = parts[0]; + SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, + shared_block, &offset, 1); + constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, member); + offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, 1)); } SpvId result; if (num_components > 1) result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, constituents, num_components); else - result = bitcast_to_uvec(ctx, constituents[0], bit_size, num_components); - store_dest(ctx, &intr->dest, result, nir_type_uint); + result = constituents[0]; + store_def(ctx, intr->def.index, result, nir_type_uint); } static void emit_store_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId src = get_src(ctx, &intr->src[0]); - bool qword = nir_src_bit_size(intr->src[0]) == 64; + nir_alu_type atype; + SpvId src = get_src(ctx, &intr->src[0], &atype); - unsigned num_writes = util_bitcount(nir_intrinsic_write_mask(intr)); unsigned wrmask = nir_intrinsic_write_mask(intr); - /* this is a partial write, so we have to loop and do a per-component write */ - SpvId uint_type = get_uvec_type(ctx, 32, 1); + unsigned bit_size = nir_src_bit_size(intr->src[0]); + SpvId uint_type = get_uvec_type(ctx, bit_size, 1); SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassWorkgroup, uint_type); - SpvId offset = emit_binop(ctx, SpvOpUDiv, uint_type, get_src(ctx, &intr->src[1]), emit_uint_const(ctx, 32, 4)); - - for (unsigned i = 0; num_writes; i++) { - if ((wrmask >> i) & 1) { - for (unsigned j = 0; j < 1 + !!qword; j++) { - unsigned comp = ((1 + !!qword) * i) + j; - SpvId shared_offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, comp)); - SpvId val = src; - if (nir_src_num_components(intr->src[0]) != 1 || qword) - val = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, src, &comp, 1); - SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, - ctx->shared_block_var, &shared_offset, 1); - spirv_builder_emit_store(&ctx->builder, member, val); - } - num_writes--; - } + nir_alu_type otype; + SpvId offset = get_src(ctx, &intr->src[1], &otype); + if (otype == nir_type_float) + offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[0]), 1); + SpvId shared_block = get_shared_block(ctx, bit_size); + /* this is a partial write, so we have to loop and do a per-component write */ + u_foreach_bit(i, wrmask) { + SpvId shared_offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, i)); + SpvId val = src; + if (nir_src_num_components(intr->src[0]) != 1) + val = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, src, &i, 1); + if (atype != nir_type_uint) + val = emit_bitcast(ctx, get_alu_type(ctx, nir_type_uint, 1, bit_size), val); + SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, + shared_block, &shared_offset, 1); + spirv_builder_emit_store(&ctx->builder, member, val); + } +} + +static void +emit_load_scratch(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint); + unsigned num_components = intr->def.num_components; + unsigned bit_size = intr->def.bit_size; + SpvId uint_type = get_uvec_type(ctx, bit_size, 1); + SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPrivate, + uint_type); + nir_alu_type atype; + SpvId offset = get_src(ctx, &intr->src[0], &atype); + if (atype != nir_type_uint) + offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[0]), 1); + SpvId constituents[NIR_MAX_VEC_COMPONENTS]; + SpvId scratch_block = get_scratch_block(ctx, bit_size); + /* need to convert array -> vec */ + for (unsigned i = 0; i < num_components; i++) { + SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, + scratch_block, &offset, 1); + constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, member); + offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, 1)); + } + SpvId result; + if (num_components > 1) + result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, constituents, num_components); + else + result = constituents[0]; + store_def(ctx, intr->def.index, result, nir_type_uint); +} + +static void +emit_store_scratch(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + nir_alu_type atype; + SpvId src = get_src(ctx, &intr->src[0], &atype); + + unsigned wrmask = nir_intrinsic_write_mask(intr); + unsigned bit_size = nir_src_bit_size(intr->src[0]); + SpvId uint_type = get_uvec_type(ctx, bit_size, 1); + SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPrivate, + uint_type); + nir_alu_type otype; + SpvId offset = get_src(ctx, &intr->src[1], &otype); + if (otype != nir_type_uint) + offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[1]), 1); + SpvId scratch_block = get_scratch_block(ctx, bit_size); + /* this is a partial write, so we have to loop and do a per-component write */ + u_foreach_bit(i, wrmask) { + SpvId scratch_offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, i)); + SpvId val = src; + if (nir_src_num_components(intr->src[0]) != 1) + val = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, src, &i, 1); + if (atype != nir_type_uint) + val = emit_bitcast(ctx, get_alu_type(ctx, nir_type_uint, 1, bit_size), val); + SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, + scratch_block, &scratch_offset, 1); + spirv_builder_emit_store(&ctx->builder, member, val); } } static void emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - unsigned bit_size = nir_dest_bit_size(intr->dest); SpvId uint_type = get_uvec_type(ctx, 32, 1); SpvId load_type = get_uvec_type(ctx, 32, 1); /* number of components being loaded */ - unsigned num_components = nir_dest_num_components(intr->dest); - /* we need to grab 2x32 to fill the 64bit value */ - if (bit_size == 64) - num_components *= 2; + unsigned num_components = intr->def.num_components; SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2]; SpvId result; /* destination type for the load */ - SpvId type = get_dest_uvec_type(ctx, &intr->dest); + SpvId type = get_def_uvec_type(ctx, &intr->def); SpvId one = emit_uint_const(ctx, 32, 1); /* we grab a single array member at a time, so it's a pointer to a uint */ @@ -2255,9 +2485,12 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvStorageClassPushConstant, load_type); - SpvId member = get_src(ctx, &intr->src[0]); + nir_alu_type atype; + SpvId member = get_src(ctx, &intr->src[0], &atype); + if (atype == nir_type_float) + member = bitcast_to_uvec(ctx, member, nir_src_bit_size(intr->src[0]), 1); /* reuse the offset from ZINK_PUSH_CONST_OFFSET */ - SpvId offset = emit_uint_const(ctx, 32, 0); + SpvId offset = emit_uint_const(ctx, 32, nir_intrinsic_component(intr)); /* OpAccessChain takes an array of indices that drill into a hierarchy based on the type: * index 0 is accessing 'base' * index 1 is accessing 'base[index 1]' @@ -2274,18 +2507,6 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr) offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one); } - /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values - * by creating uvec2 composites and bitcasting them to u64 values - */ - if (bit_size == 64) { - num_components /= 2; - type = get_uvec_type(ctx, 64, num_components); - SpvId u64_type = get_uvec_type(ctx, 64, 1); - for (unsigned i = 0; i < num_components; i++) { - constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2); - constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]); - } - } /* if loading more than 1 value, reassemble the results into the desired type, * otherwise just use the loaded result */ @@ -2297,7 +2518,84 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr) } else result = constituents[0]; - store_dest(ctx, &intr->dest, result, nir_type_uint); + store_def(ctx, intr->def.index, result, nir_type_uint); +} + +static void +emit_load_global(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + bool coherent = ctx->sinfo->have_vulkan_memory_model && nir_intrinsic_access(intr) & ACCESS_COHERENT; + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityPhysicalStorageBufferAddresses); + SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPhysicalStorageBuffer, + dest_type); + nir_alu_type atype; + SpvId ptr = emit_bitcast(ctx, pointer_type, get_src(ctx, &intr->src[0], &atype)); + SpvId result = spirv_builder_emit_load_aligned(&ctx->builder, dest_type, ptr, intr->def.bit_size / 8, coherent); + store_def(ctx, intr->def.index, result, nir_type_uint); +} + +static void +emit_store_global(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + bool coherent = ctx->sinfo->have_vulkan_memory_model && nir_intrinsic_access(intr) & ACCESS_COHERENT; + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityPhysicalStorageBufferAddresses); + unsigned bit_size = nir_src_bit_size(intr->src[0]); + SpvId dest_type = get_uvec_type(ctx, bit_size, 1); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPhysicalStorageBuffer, + dest_type); + nir_alu_type atype; + SpvId param = get_src(ctx, &intr->src[0], &atype); + if (atype != nir_type_uint) + param = emit_bitcast(ctx, dest_type, param); + SpvId ptr = emit_bitcast(ctx, pointer_type, get_src(ctx, &intr->src[1], &atype)); + spirv_builder_emit_store_aligned(&ctx->builder, ptr, param, bit_size / 8, coherent); +} + +static void +emit_load_reg(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + assert(nir_intrinsic_base(intr) == 0 && "no array registers"); + + nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[0].ssa); + unsigned num_components = nir_intrinsic_num_components(decl); + unsigned bit_size = nir_intrinsic_bit_size(decl); + unsigned index = decl->def.index; + assert(index < ctx->num_defs); + + init_reg(ctx, decl, nir_type_uint); + assert(ctx->defs[index] != 0); + + nir_alu_type atype = ctx->def_types[index]; + SpvId var = ctx->defs[index]; + SpvId type = get_alu_type(ctx, atype, num_components, bit_size); + SpvId result = spirv_builder_emit_load(&ctx->builder, type, var); + store_def(ctx, intr->def.index, result, atype); +} + +static void +emit_store_reg(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + nir_alu_type atype; + SpvId param = get_src(ctx, &intr->src[0], &atype); + + nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[1].ssa); + unsigned index = decl->def.index; + unsigned num_components = nir_intrinsic_num_components(decl); + unsigned bit_size = nir_intrinsic_bit_size(decl); + + atype = nir_alu_type_get_base_type(atype); + init_reg(ctx, decl, atype); + SpvId var = ctx->defs[index]; + nir_alu_type vtype = ctx->def_types[index]; + if (atype != vtype) { + assert(vtype != nir_type_bool); + param = emit_bitcast(ctx, get_alu_type(ctx, vtype, num_components, bit_size), param); + } + assert(var); + spirv_builder_emit_store(&ctx->builder, var, param); } static SpvId @@ -2313,6 +2611,17 @@ create_builtin_var(struct ntv_context *ctx, SpvId var_type, spirv_builder_emit_name(&ctx->builder, var, name); spirv_builder_emit_builtin(&ctx->builder, var, builtin); + if (ctx->stage == MESA_SHADER_FRAGMENT) { + switch (builtin) { + case SpvBuiltInSampleId: + case SpvBuiltInSubgroupLocalInvocationId: + spirv_builder_emit_decoration(&ctx->builder, var, SpvDecorationFlat); + break; + default: + break; + } + } + assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); ctx->entry_ifaces[ctx->num_entry_ifaces++] = var; return var; @@ -2330,37 +2639,39 @@ emit_load_front_face(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvId result = spirv_builder_emit_load(&ctx->builder, var_type, ctx->front_face_var); - assert(1 == nir_dest_num_components(intr->dest)); - store_dest(ctx, &intr->dest, result, nir_type_bool); + assert(1 == intr->def.num_components); + store_def(ctx, intr->def.index, result, nir_type_bool); } static void emit_load_uint_input(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId *var_id, const char *var_name, SpvBuiltIn builtin) { SpvId var_type = spirv_builder_type_uint(&ctx->builder, 32); - if (builtin == SpvBuiltInSampleMask) { - /* gl_SampleMaskIn is an array[1] in spirv... */ - var_type = spirv_builder_type_array(&ctx->builder, var_type, emit_uint_const(ctx, 32, 1)); - spirv_builder_emit_array_stride(&ctx->builder, var_type, sizeof(uint32_t)); - } if (!*var_id) { + if (builtin == SpvBuiltInSampleMask) { + /* gl_SampleMaskIn is an array[1] in spirv... */ + var_type = spirv_builder_type_array(&ctx->builder, var_type, emit_uint_const(ctx, 32, 1)); + spirv_builder_emit_array_stride(&ctx->builder, var_type, sizeof(uint32_t)); + } *var_id = create_builtin_var(ctx, var_type, SpvStorageClassInput, var_name, builtin); - if (builtin == SpvBuiltInSampleMask) { - SpvId zero = emit_uint_const(ctx, 32, 0); - var_type = spirv_builder_type_uint(&ctx->builder, 32); - SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassInput, - var_type); - *var_id = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, *var_id, &zero, 1); - } } - SpvId result = spirv_builder_emit_load(&ctx->builder, var_type, *var_id); - assert(1 == nir_dest_num_components(intr->dest)); - store_dest(ctx, &intr->dest, result, nir_type_uint); + SpvId load_var = *var_id; + if (builtin == SpvBuiltInSampleMask) { + SpvId zero = emit_uint_const(ctx, 32, 0); + var_type = spirv_builder_type_uint(&ctx->builder, 32); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassInput, + var_type); + load_var = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, load_var, &zero, 1); + } + + SpvId result = spirv_builder_emit_load(&ctx->builder, var_type, load_var); + assert(1 == intr->def.num_components); + store_def(ctx, intr->def.index, result, nir_type_uint); } static void @@ -2370,16 +2681,19 @@ emit_load_vec_input(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId *v switch (type) { case nir_type_bool: - var_type = get_bvec_type(ctx, nir_dest_num_components(intr->dest)); + var_type = get_bvec_type(ctx, intr->def.num_components); break; case nir_type_int: - var_type = get_ivec_type(ctx, nir_dest_bit_size(intr->dest), nir_dest_num_components(intr->dest)); + var_type = get_ivec_type(ctx, intr->def.bit_size, + intr->def.num_components); break; case nir_type_uint: - var_type = get_uvec_type(ctx, nir_dest_bit_size(intr->dest), nir_dest_num_components(intr->dest)); + var_type = get_uvec_type(ctx, intr->def.bit_size, + intr->def.num_components); break; case nir_type_float: - var_type = get_fvec_type(ctx, nir_dest_bit_size(intr->dest), nir_dest_num_components(intr->dest)); + var_type = get_fvec_type(ctx, intr->def.bit_size, + intr->def.num_components); break; default: unreachable("unknown type passed"); @@ -2391,7 +2705,7 @@ emit_load_vec_input(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId *v builtin); SpvId result = spirv_builder_emit_load(&ctx->builder, var_type, *var_id); - store_dest(ctx, &intr->dest, result, type); + store_def(ctx, intr->def.index, result, type); } static void @@ -2399,133 +2713,182 @@ emit_interpolate(struct ntv_context *ctx, nir_intrinsic_instr *intr) { SpvId op; spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInterpolationFunction); + SpvId src1 = 0; + nir_alu_type atype; switch (intr->intrinsic) { case nir_intrinsic_interp_deref_at_centroid: op = GLSLstd450InterpolateAtCentroid; break; case nir_intrinsic_interp_deref_at_sample: op = GLSLstd450InterpolateAtSample; + src1 = get_src(ctx, &intr->src[1], &atype); + if (atype != nir_type_int) + src1 = emit_bitcast(ctx, get_ivec_type(ctx, 32, 1), src1); break; case nir_intrinsic_interp_deref_at_offset: op = GLSLstd450InterpolateAtOffset; + src1 = get_src(ctx, &intr->src[1], &atype); + /* + The offset operand must be a vector of 2 components of 32-bit floating-point type. + - InterpolateAtOffset spec + */ + if (atype != nir_type_float) + src1 = emit_bitcast(ctx, get_fvec_type(ctx, 32, 2), src1); break; default: unreachable("unknown interp op"); } - SpvId ptr = get_src(ctx, &intr->src[0]); + nir_alu_type ptype; + SpvId ptr = get_src(ctx, &intr->src[0], &ptype); SpvId result; + const struct glsl_type *gtype = nir_src_as_deref(intr->src[0])->type; + assert(glsl_get_vector_elements(gtype) == intr->num_components); + assert(ptype == get_nir_alu_type(gtype)); if (intr->intrinsic == nir_intrinsic_interp_deref_at_centroid) - result = emit_builtin_unop(ctx, op, get_glsl_type(ctx, nir_src_as_deref(intr->src[0])->type), ptr); + result = emit_builtin_unop(ctx, op, get_glsl_type(ctx, gtype), ptr); else - result = emit_builtin_binop(ctx, op, get_glsl_type(ctx, nir_src_as_deref(intr->src[0])->type), - ptr, get_src(ctx, &intr->src[1])); - unsigned num_components = nir_dest_num_components(intr->dest); - unsigned bit_size = nir_dest_bit_size(intr->dest); - result = bitcast_to_uvec(ctx, result, bit_size, num_components); - store_dest(ctx, &intr->dest, result, nir_type_uint); + result = emit_builtin_binop(ctx, op, get_glsl_type(ctx, gtype), ptr, src1); + store_def(ctx, intr->def.index, result, ptype); } static void -handle_atomic_op(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId ptr, SpvId param, SpvId param2) +handle_atomic_op(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId ptr, SpvId param, SpvId param2, nir_alu_type type) { - SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32); - SpvId result = emit_atomic(ctx, get_atomic_op(intr->intrinsic), dest_type, ptr, param, param2); + SpvId dest_type = get_def_type(ctx, &intr->def, type); + SpvId result = emit_atomic(ctx, + get_atomic_op(ctx, intr->def.bit_size, nir_intrinsic_atomic_op(intr)), + dest_type, ptr, param, param2); assert(result); - store_dest(ctx, &intr->dest, result, nir_type_uint); + store_def(ctx, intr->def.index, result, type); } static void -emit_ssbo_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) +emit_deref_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId ssbo; - SpvId param; - SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32); + nir_alu_type atype; + nir_alu_type ret_type = nir_atomic_op_type(nir_intrinsic_atomic_op(intr)) == nir_type_float ? nir_type_float : nir_type_uint; + SpvId ptr = get_src(ctx, &intr->src[0], &atype); + if (atype != ret_type && ret_type == nir_type_float) { + unsigned bit_size = nir_src_bit_size(intr->src[0]); + SpvId *float_array_type = &ctx->float_array_type[bit_size == 32 ? 0 : 1]; + if (!*float_array_type) { + *float_array_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassStorageBuffer, + spirv_builder_type_float(&ctx->builder, bit_size)); + } + ptr = emit_unop(ctx, SpvOpBitcast, *float_array_type, ptr); + } - nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]); - assert(const_block_index); // no dynamic indexing for now - unsigned bit_size = MIN2(nir_src_bit_size(intr->src[0]), 32); - unsigned idx = bit_size >> 4; - assert(idx < ARRAY_SIZE(ctx->ssbos[0])); - if (!ctx->ssbos[const_block_index->u32][idx]) - emit_bo(ctx, ctx->ssbo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest)); - ssbo = ctx->ssbos[const_block_index->u32][idx]; - param = get_src(ctx, &intr->src[2]); - - SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassStorageBuffer, - dest_type); - SpvId uint_type = get_uvec_type(ctx, 32, 1); - /* an id of the array stride in bytes */ - SpvId uint_size = emit_uint_const(ctx, 32, bit_size / 8); - SpvId member = emit_uint_const(ctx, 32, 0); - SpvId offset = get_src(ctx, &intr->src[1]); - SpvId vec_offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size); - SpvId indices[] = { member, vec_offset }; - SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, - ssbo, indices, - ARRAY_SIZE(indices)); + SpvId param = get_src(ctx, &intr->src[1], &atype); + if (atype != ret_type) + param = cast_src_to_type(ctx, param, intr->src[1], ret_type); SpvId param2 = 0; - if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) - param2 = get_src(ctx, &intr->src[3]); + if (nir_src_bit_size(intr->src[1]) == 64) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInt64Atomics); - handle_atomic_op(ctx, intr, ptr, param, param2); + if (intr->intrinsic == nir_intrinsic_deref_atomic_swap) { + param2 = get_src(ctx, &intr->src[2], &atype); + if (atype != ret_type) + param2 = cast_src_to_type(ctx, param2, intr->src[2], ret_type); + } + + handle_atomic_op(ctx, intr, ptr, param, param2, ret_type); } static void emit_shared_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32); - SpvId param = get_src(ctx, &intr->src[1]); + unsigned bit_size = nir_src_bit_size(intr->src[1]); + SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint); + nir_alu_type atype; + nir_alu_type ret_type = nir_atomic_op_type(nir_intrinsic_atomic_op(intr)) == nir_type_float ? nir_type_float : nir_type_uint; + SpvId param = get_src(ctx, &intr->src[1], &atype); + if (atype != ret_type) + param = cast_src_to_type(ctx, param, intr->src[1], ret_type); SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassWorkgroup, dest_type); - SpvId offset = emit_binop(ctx, SpvOpUDiv, get_uvec_type(ctx, 32, 1), get_src(ctx, &intr->src[0]), emit_uint_const(ctx, 32, 4)); + SpvId offset = get_src(ctx, &intr->src[0], &atype); + if (atype != nir_type_uint) + offset = cast_src_to_type(ctx, offset, intr->src[0], nir_type_uint); + offset = emit_binop(ctx, SpvOpUDiv, get_uvec_type(ctx, 32, 1), offset, emit_uint_const(ctx, 32, bit_size / 8)); + SpvId shared_block = get_shared_block(ctx, bit_size); SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, - ctx->shared_block_var, &offset, 1); + shared_block, &offset, 1); + if (nir_src_bit_size(intr->src[1]) == 64) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInt64Atomics); + SpvId param2 = 0; + + if (intr->intrinsic == nir_intrinsic_shared_atomic_swap) { + param2 = get_src(ctx, &intr->src[2], &atype); + if (atype != ret_type) + param2 = cast_src_to_type(ctx, param2, intr->src[2], ret_type); + } + + handle_atomic_op(ctx, intr, ptr, param, param2, ret_type); +} + +static void +emit_global_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + unsigned bit_size = nir_src_bit_size(intr->src[1]); + SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint); + nir_alu_type atype; + nir_alu_type ret_type = nir_atomic_op_type(nir_intrinsic_atomic_op(intr)) == nir_type_float ? nir_type_float : nir_type_uint; + SpvId param = get_src(ctx, &intr->src[1], &atype); + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityPhysicalStorageBufferAddresses); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPhysicalStorageBuffer, + dest_type); + SpvId ptr = emit_bitcast(ctx, pointer_type, get_src(ctx, &intr->src[0], &atype)); + + if (bit_size == 64) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInt64Atomics); SpvId param2 = 0; - if (intr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) - param2 = get_src(ctx, &intr->src[2]); + if (intr->intrinsic == nir_intrinsic_global_atomic_swap) + param2 = get_src(ctx, &intr->src[2], &atype); - handle_atomic_op(ctx, intr, ptr, param, param2); + handle_atomic_op(ctx, intr, ptr, param, param2, ret_type); } static void emit_get_ssbo_size(struct ntv_context *ctx, nir_intrinsic_instr *intr) { SpvId uint_type = get_uvec_type(ctx, 32, 1); - nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]); - assert(const_block_index); // no dynamic indexing for now - nir_variable *var = ctx->ssbo_vars[const_block_index->u32]; + nir_variable *var = ctx->ssbo_vars; + const struct glsl_type *bare_type = glsl_without_array(var->type); + unsigned last_member_idx = glsl_get_length(bare_type) - 1; + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassStorageBuffer, + get_bo_struct_type(ctx, var)); + nir_alu_type atype; + SpvId bo = get_src(ctx, &intr->src[0], &atype); + if (atype == nir_type_float) + bo = bitcast_to_uvec(ctx, bo, nir_src_bit_size(intr->src[0]), 1); + SpvId indices[] = { bo }; + SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, + ctx->ssbos[2], indices, + ARRAY_SIZE(indices)); SpvId result = spirv_builder_emit_binop(&ctx->builder, SpvOpArrayLength, uint_type, - ctx->ssbos[const_block_index->u32][2], 1); + ptr, last_member_idx); /* this is going to be converted by nir to: length = (buffer_size - offset) / stride * so we need to un-convert it to avoid having the calculation performed twice */ - unsigned last_member_idx = glsl_get_length(var->interface_type) - 1; - const struct glsl_type *last_member = glsl_get_struct_field(var->interface_type, last_member_idx); + const struct glsl_type *last_member = glsl_get_struct_field(bare_type, last_member_idx); /* multiply by stride */ result = emit_binop(ctx, SpvOpIMul, uint_type, result, emit_uint_const(ctx, 32, glsl_get_explicit_stride(last_member))); /* get total ssbo size by adding offset */ result = emit_binop(ctx, SpvOpIAdd, uint_type, result, emit_uint_const(ctx, 32, - glsl_get_struct_field_offset(var->interface_type, last_member_idx))); - store_dest(ctx, &intr->dest, result, nir_type_uint); -} - -static inline nir_variable * -get_var_from_image(struct ntv_context *ctx, SpvId var_id) -{ - struct hash_entry *he = _mesa_hash_table_search(ctx->image_vars, &var_id); - assert(he); - return he->data; + glsl_get_struct_field_offset(bare_type, last_member_idx))); + store_def(ctx, intr->def.index, result, nir_type_uint); } static SpvId @@ -2534,16 +2897,17 @@ get_image_coords(struct ntv_context *ctx, const struct glsl_type *type, nir_src uint32_t num_coords = glsl_get_sampler_coordinate_components(type); uint32_t src_components = nir_src_num_components(*src); - SpvId spv = get_src(ctx, src); + nir_alu_type atype; + SpvId spv = get_src(ctx, src, &atype); if (num_coords == src_components) return spv; /* need to extract the coord dimensions that the image can use */ - SpvId vec_type = get_uvec_type(ctx, 32, num_coords); + SpvId vec_type = get_alu_type(ctx, atype, num_coords, 32); if (num_coords == 1) return spirv_builder_emit_vector_extract(&ctx->builder, vec_type, spv, 0); uint32_t constituents[4]; - SpvId zero = emit_uint_const(ctx, nir_src_bit_size(*src), 0); + SpvId zero = atype == nir_type_uint ? emit_uint_const(ctx, nir_src_bit_size(*src), 0) : emit_float_const(ctx, nir_src_bit_size(*src), 0); assert(num_coords < ARRAY_SIZE(constituents)); for (unsigned i = 0; i < num_coords; i++) constituents[i] = i < src_components ? i : zero; @@ -2553,81 +2917,165 @@ get_image_coords(struct ntv_context *ctx, const struct glsl_type *type, nir_src static void emit_image_deref_store(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId img_var = get_src(ctx, &intr->src[0]); - nir_variable *var = get_var_from_image(ctx, img_var); - SpvId img_type = ctx->image_types[var->data.driver_location]; + nir_alu_type atype; + SpvId img_var = get_src(ctx, &intr->src[0], &atype); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + SpvId img_type = find_image_type(ctx, var); const struct glsl_type *type = glsl_without_array(var->type); SpvId base_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type)); SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var); SpvId coord = get_image_coords(ctx, type, &intr->src[1]); - SpvId texel = get_src(ctx, &intr->src[3]); - SpvId sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS ? get_src(ctx, &intr->src[2]) : 0; - assert(nir_src_bit_size(intr->src[3]) == glsl_base_type_bit_size(glsl_get_sampler_result_type(type))); + SpvId texel = get_src(ctx, &intr->src[3], &atype); /* texel type must match image type */ - texel = emit_bitcast(ctx, - spirv_builder_type_vector(&ctx->builder, base_type, 4), - texel); + if (atype != nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(type))) + texel = emit_bitcast(ctx, + spirv_builder_type_vector(&ctx->builder, base_type, 4), + texel); + bool use_sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || + glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS; + SpvId sample = use_sample ? get_src(ctx, &intr->src[2], &atype) : 0; + assert(nir_src_bit_size(intr->src[3]) == glsl_base_type_bit_size(glsl_get_sampler_result_type(type))); spirv_builder_emit_image_write(&ctx->builder, img, coord, texel, 0, sample, 0); } +static SpvId +extract_sparse_load(struct ntv_context *ctx, SpvId result, SpvId dest_type, nir_def *def) +{ + /* Result Type must be an OpTypeStruct with two members. + * The first member’s type must be an integer type scalar. + * It holds a Residency Code that can be passed to OpImageSparseTexelsResident + * - OpImageSparseRead spec + */ + uint32_t idx = 0; + SpvId resident = spirv_builder_emit_composite_extract(&ctx->builder, spirv_builder_type_uint(&ctx->builder, 32), result, &idx, 1); + idx = 1; + /* normal vec4 return */ + if (def->num_components == 4) + result = spirv_builder_emit_composite_extract(&ctx->builder, dest_type, result, &idx, 1); + else { + /* shadow */ + assert(def->num_components == 1); + SpvId type = spirv_builder_type_float(&ctx->builder, def->bit_size); + SpvId val[2]; + /* pad to 2 components: the upcoming is_sparse_texels_resident instr will always use the + * separate residency value, but the shader still expects this return to be a vec2, + * so give it a vec2 + */ + val[0] = spirv_builder_emit_composite_extract(&ctx->builder, type, result, &idx, 1); + val[1] = emit_float_const(ctx, def->bit_size, 0); + result = spirv_builder_emit_composite_construct(&ctx->builder, get_fvec_type(ctx, def->bit_size, 2), val, 2); + } + assert(resident != 0); + assert(def->index < ctx->num_defs); + ctx->resident_defs[def->index] = resident; + return result; +} + static void emit_image_deref_load(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId img_var = get_src(ctx, &intr->src[0]); - nir_variable *var = get_var_from_image(ctx, img_var); - SpvId img_type = ctx->image_types[var->data.driver_location]; + bool sparse = intr->intrinsic == nir_intrinsic_image_deref_sparse_load; + nir_alu_type atype; + SpvId img_var = get_src(ctx, &intr->src[0], &atype); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + bool mediump = (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW); + SpvId img_type = find_image_type(ctx, var); const struct glsl_type *type = glsl_without_array(var->type); SpvId base_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type)); SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var); SpvId coord = get_image_coords(ctx, type, &intr->src[1]); - SpvId sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS ? get_src(ctx, &intr->src[2]) : 0; + bool use_sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || + glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS; + SpvId sample = use_sample ? get_src(ctx, &intr->src[2], &atype) : 0; + SpvId dest_type = spirv_builder_type_vector(&ctx->builder, base_type, + intr->def.num_components); SpvId result = spirv_builder_emit_image_read(&ctx->builder, - spirv_builder_type_vector(&ctx->builder, base_type, nir_dest_num_components(intr->dest)), - img, coord, 0, sample, 0); - store_dest(ctx, &intr->dest, result, nir_type_float); + dest_type, + img, coord, 0, sample, 0, sparse); + if (sparse) + result = extract_sparse_load(ctx, result, dest_type, &intr->def); + + if (!sparse && mediump) { + spirv_builder_emit_decoration(&ctx->builder, result, + SpvDecorationRelaxedPrecision); + } + + store_def(ctx, intr->def.index, result, nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(type))); } static void emit_image_deref_size(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId img_var = get_src(ctx, &intr->src[0]); - nir_variable *var = get_var_from_image(ctx, img_var); - SpvId img_type = ctx->image_types[var->data.driver_location]; + nir_alu_type atype; + SpvId img_var = get_src(ctx, &intr->src[0], &atype); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + SpvId img_type = find_image_type(ctx, var); const struct glsl_type *type = glsl_without_array(var->type); SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var); - SpvId result = spirv_builder_emit_image_query_size(&ctx->builder, get_uvec_type(ctx, 32, glsl_get_sampler_coordinate_components(type)), img, 0); - store_dest(ctx, &intr->dest, result, nir_type_uint); + unsigned num_components = glsl_get_sampler_coordinate_components(type); + /* SPIRV requires 2 components for non-array cube size */ + if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && !glsl_sampler_type_is_array(type)) + num_components = 2; + + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageQuery); + SpvId result = spirv_builder_emit_image_query_size(&ctx->builder, get_uvec_type(ctx, 32, num_components), img, 0); + store_def(ctx, intr->def.index, result, nir_type_uint); } static void emit_image_deref_samples(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId img_var = get_src(ctx, &intr->src[0]); - nir_variable *var = get_var_from_image(ctx, img_var); - SpvId img_type = ctx->image_types[var->data.driver_location]; + nir_alu_type atype; + SpvId img_var = get_src(ctx, &intr->src[0], &atype); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + SpvId img_type = find_image_type(ctx, var); SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var); - SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples, get_dest_type(ctx, &intr->dest, nir_type_uint), img); - store_dest(ctx, &intr->dest, result, nir_type_uint); + + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageQuery); + SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples, get_def_type(ctx, &intr->def, nir_type_uint), img); + store_def(ctx, intr->def.index, result, nir_type_uint); } static void emit_image_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId img_var = get_src(ctx, &intr->src[0]); - SpvId param = get_src(ctx, &intr->src[3]); - nir_variable *var = get_var_from_image(ctx, img_var); + nir_alu_type atype, ptype; + SpvId param = get_src(ctx, &intr->src[3], &ptype); + SpvId img_var = get_src(ctx, &intr->src[0], &atype); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); const struct glsl_type *type = glsl_without_array(var->type); bool is_ms; type_to_dim(glsl_get_sampler_dim(type), &is_ms); - SpvId sample = is_ms ? get_src(ctx, &intr->src[2]) : emit_uint_const(ctx, 32, 0); + SpvId sample = is_ms ? get_src(ctx, &intr->src[2], &atype) : emit_uint_const(ctx, 32, 0); SpvId coord = get_image_coords(ctx, type, &intr->src[1]); - SpvId base_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type)); + enum glsl_base_type glsl_result_type = glsl_get_sampler_result_type(type); + SpvId base_type = get_glsl_basetype(ctx, glsl_result_type); SpvId texel = spirv_builder_emit_image_texel_pointer(&ctx->builder, base_type, img_var, coord, sample); SpvId param2 = 0; - if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) - param2 = get_src(ctx, &intr->src[4]); - handle_atomic_op(ctx, intr, texel, param, param2); + /* The type of Value must be the same as Result Type. + * The type of the value pointed to by Pointer must be the same as Result Type. + */ + nir_alu_type ntype = nir_get_nir_type_for_glsl_base_type(glsl_result_type); + if (ptype != ntype) { + SpvId cast_type = get_def_type(ctx, &intr->def, ntype); + param = emit_bitcast(ctx, cast_type, param); + } + + if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap) { + param2 = get_src(ctx, &intr->src[4], &ptype); + if (ptype != ntype) { + SpvId cast_type = get_def_type(ctx, &intr->def, ntype); + param2 = emit_bitcast(ctx, cast_type, param2); + } + } + + handle_atomic_op(ctx, intr, texel, param, param2, ntype); } static void @@ -2635,9 +3083,10 @@ emit_ballot(struct ntv_context *ctx, nir_intrinsic_instr *intr) { spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySubgroupBallotKHR); spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_ballot"); - SpvId type = get_dest_uvec_type(ctx, &intr->dest); - SpvId result = emit_unop(ctx, SpvOpSubgroupBallotKHR, type, get_src(ctx, &intr->src[0])); - store_dest(ctx, &intr->dest, result, nir_type_uint); + SpvId type = get_def_uvec_type(ctx, &intr->def); + nir_alu_type atype; + SpvId result = emit_unop(ctx, SpvOpSubgroupBallotKHR, type, get_src(ctx, &intr->src[0], &atype)); + store_def(ctx, intr->def.index, result, nir_type_uint); } static void @@ -2645,9 +3094,11 @@ emit_read_first_invocation(struct ntv_context *ctx, nir_intrinsic_instr *intr) { spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySubgroupBallotKHR); spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_ballot"); - SpvId type = get_dest_type(ctx, &intr->dest, nir_type_uint); - SpvId result = emit_unop(ctx, SpvOpSubgroupFirstInvocationKHR, type, get_src(ctx, &intr->src[0])); - store_dest(ctx, &intr->dest, result, nir_type_uint); + nir_alu_type atype; + SpvId src = get_src(ctx, &intr->src[0], &atype); + SpvId type = get_def_type(ctx, &intr->def, atype); + SpvId result = emit_unop(ctx, SpvOpSubgroupFirstInvocationKHR, type, src); + store_def(ctx, intr->def.index, result, atype); } static void @@ -2655,11 +3106,13 @@ emit_read_invocation(struct ntv_context *ctx, nir_intrinsic_instr *intr) { spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySubgroupBallotKHR); spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_ballot"); - SpvId type = get_dest_type(ctx, &intr->dest, nir_type_uint); + nir_alu_type atype, itype; + SpvId src = get_src(ctx, &intr->src[0], &atype); + SpvId type = get_def_type(ctx, &intr->def, atype); SpvId result = emit_binop(ctx, SpvOpSubgroupReadInvocationKHR, type, - get_src(ctx, &intr->src[0]), - get_src(ctx, &intr->src[1])); - store_dest(ctx, &intr->dest, result, nir_type_uint); + src, + get_src(ctx, &intr->src[1], &itype)); + store_def(ctx, intr->def.index, result, atype); } static void @@ -2669,9 +3122,25 @@ emit_shader_clock(struct ntv_context *ctx, nir_intrinsic_instr *intr) spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_clock"); SpvScope scope = get_scope(nir_intrinsic_memory_scope(intr)); - SpvId type = get_dest_type(ctx, &intr->dest, nir_type_uint); + SpvId type = get_def_type(ctx, &intr->def, nir_type_uint); SpvId result = spirv_builder_emit_unop_const(&ctx->builder, SpvOpReadClockKHR, type, scope); - store_dest(ctx, &intr->dest, result, nir_type_uint); + store_def(ctx, intr->def.index, result, nir_type_uint); +} + +static void +emit_is_sparse_texels_resident(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySparseResidency); + + SpvId type = get_def_type(ctx, &intr->def, nir_type_uint); + + unsigned index = intr->src[0].ssa->index; + assert(index < ctx->num_defs); + assert(ctx->resident_defs[index] != 0); + SpvId resident = ctx->resident_defs[index]; + + SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageSparseTexelsResident, type, resident); + store_def(ctx, intr->def.index, result, nir_type_uint); } static void @@ -2693,27 +3162,85 @@ emit_vote(struct ntv_context *ctx, nir_intrinsic_instr *intr) default: unreachable("unknown vote intrinsic"); } - SpvId result = spirv_builder_emit_vote(&ctx->builder, op, get_src(ctx, &intr->src[0])); - store_dest_raw(ctx, &intr->dest, result); + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityGroupNonUniformVote); + nir_alu_type atype; + SpvId result = spirv_builder_emit_vote(&ctx->builder, op, get_src(ctx, &intr->src[0], &atype)); + store_def(ctx, intr->def.index, result, nir_type_bool); +} + +static void +emit_is_helper_invocation(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + spirv_builder_emit_extension(&ctx->builder, + "SPV_EXT_demote_to_helper_invocation"); + SpvId result = spirv_is_helper_invocation(&ctx->builder); + store_def(ctx, intr->def.index, result, nir_type_bool); +} + +static void +emit_barrier(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + SpvScope scope = get_scope(nir_intrinsic_execution_scope(intr)); + SpvScope mem_scope = get_scope(nir_intrinsic_memory_scope(intr)); + SpvMemorySemanticsMask semantics = 0; + + if (nir_intrinsic_memory_scope(intr) != SCOPE_NONE) { + nir_variable_mode modes = nir_intrinsic_memory_modes(intr); + + if (modes & nir_var_image) + semantics |= SpvMemorySemanticsImageMemoryMask; + + if (modes & nir_var_mem_shared) + semantics |= SpvMemorySemanticsWorkgroupMemoryMask; + + if (modes & (nir_var_mem_ssbo | nir_var_mem_global)) + semantics |= SpvMemorySemanticsUniformMemoryMask; + + if (modes & nir_var_mem_global) + semantics |= SpvMemorySemanticsCrossWorkgroupMemoryMask; + + if (modes & (nir_var_shader_out | nir_var_mem_task_payload)) + semantics |= SpvMemorySemanticsOutputMemoryMask; + + if (!modes) + semantics = SpvMemorySemanticsWorkgroupMemoryMask | + SpvMemorySemanticsUniformMemoryMask | + SpvMemorySemanticsImageMemoryMask | + SpvMemorySemanticsCrossWorkgroupMemoryMask; + semantics |= SpvMemorySemanticsAcquireReleaseMask; + } + + if (nir_intrinsic_execution_scope(intr) != SCOPE_NONE) + spirv_builder_emit_control_barrier(&ctx->builder, scope, mem_scope, semantics); + else + spirv_builder_emit_memory_barrier(&ctx->builder, mem_scope, semantics); } static void emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) { switch (intr->intrinsic) { - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ssbo: - emit_load_bo(ctx, intr); + case nir_intrinsic_decl_reg: + /* Nothing to do */ + break; + + case nir_intrinsic_load_reg: + emit_load_reg(ctx, intr); break; - case nir_intrinsic_store_ssbo: - emit_store_ssbo(ctx, intr); + case nir_intrinsic_store_reg: + emit_store_reg(ctx, intr); break; case nir_intrinsic_discard: emit_discard(ctx, intr); break; + case nir_intrinsic_demote: + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityDemoteToHelperInvocation); + spirv_builder_emit_demote(&ctx->builder); + break; + case nir_intrinsic_load_deref: emit_load_deref(ctx, intr); break; @@ -2722,10 +3249,19 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_store_deref(ctx, intr); break; - case nir_intrinsic_load_push_constant: + case nir_intrinsic_load_push_constant_zink: emit_load_push_const(ctx, intr); break; + case nir_intrinsic_load_global: + case nir_intrinsic_load_global_constant: + emit_load_global(ctx, intr); + break; + + case nir_intrinsic_store_global: + emit_store_global(ctx, intr); + break; + case nir_intrinsic_load_front_face: emit_load_front_face(ctx, intr); break; @@ -2759,9 +3295,15 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) break; case nir_intrinsic_load_sample_id: + spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySampleRateShading); emit_load_uint_input(ctx, intr, &ctx->sample_id_var, "gl_SampleId", SpvBuiltInSampleId); break; + case nir_intrinsic_load_point_coord_maybe_flipped: + case nir_intrinsic_load_point_coord: + emit_load_vec_input(ctx, intr, &ctx->point_coord_var, "gl_PointCoord", SpvBuiltInPointCoord, nir_type_float); + break; + case nir_intrinsic_load_sample_pos: emit_load_vec_input(ctx, intr, &ctx->sample_pos_var, "gl_SamplePosition", SpvBuiltInSamplePosition, nir_type_float); break; @@ -2770,21 +3312,15 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_load_uint_input(ctx, intr, &ctx->sample_mask_in_var, "gl_SampleMaskIn", SpvBuiltInSampleMask); break; - case nir_intrinsic_emit_vertex_with_counter: - /* geometry shader emits copied xfb outputs just prior to EmitVertex(), - * since that's the end of the shader - */ - if (ctx->so_info) - emit_so_outputs(ctx, ctx->so_info); - spirv_builder_emit_vertex(&ctx->builder, nir_intrinsic_stream_id(intr)); - break; - - case nir_intrinsic_set_vertex_and_primitive_count: - /* do nothing */ + case nir_intrinsic_emit_vertex: + if (ctx->nir->info.gs.vertices_out) //skip vertex emission if !vertices_out + spirv_builder_emit_vertex(&ctx->builder, nir_intrinsic_stream_id(intr), + ctx->nir->info.stage == MESA_SHADER_GEOMETRY && util_bitcount(ctx->nir->info.gs.active_stream_mask) > 1); break; - case nir_intrinsic_end_primitive_with_counter: - spirv_builder_end_primitive(&ctx->builder, nir_intrinsic_stream_id(intr)); + case nir_intrinsic_end_primitive: + spirv_builder_end_primitive(&ctx->builder, nir_intrinsic_stream_id(intr), + ctx->nir->info.stage == MESA_SHADER_GEOMETRY && util_bitcount(ctx->nir->info.gs.active_stream_mask) > 1); break; case nir_intrinsic_load_helper_invocation: @@ -2801,39 +3337,8 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvBuiltInTessCoord, nir_type_float); break; - case nir_intrinsic_memory_barrier_tcs_patch: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup, - SpvMemorySemanticsOutputMemoryMask | SpvMemorySemanticsReleaseMask); - break; - - case nir_intrinsic_memory_barrier: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup, - SpvMemorySemanticsImageMemoryMask | SpvMemorySemanticsUniformMemoryMask | - SpvMemorySemanticsMakeVisibleMask | SpvMemorySemanticsAcquireReleaseMask); - break; - - case nir_intrinsic_memory_barrier_image: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeDevice, - SpvMemorySemanticsImageMemoryMask | - SpvMemorySemanticsAcquireReleaseMask); - break; - - case nir_intrinsic_group_memory_barrier: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup, - SpvMemorySemanticsWorkgroupMemoryMask | - SpvMemorySemanticsAcquireReleaseMask); - break; - - case nir_intrinsic_memory_barrier_shared: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup, - SpvMemorySemanticsWorkgroupMemoryMask | - SpvMemorySemanticsAcquireReleaseMask); - break; - - case nir_intrinsic_control_barrier: - spirv_builder_emit_control_barrier(&ctx->builder, SpvScopeWorkgroup, - SpvScopeWorkgroup, - SpvMemorySemanticsWorkgroupMemoryMask | SpvMemorySemanticsAcquireMask); + case nir_intrinsic_barrier: + emit_barrier(ctx, intr); break; case nir_intrinsic_interp_deref_at_centroid: @@ -2842,38 +3347,21 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_interpolate(ctx, intr); break; - case nir_intrinsic_memory_barrier_buffer: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeDevice, - SpvMemorySemanticsUniformMemoryMask | - SpvMemorySemanticsAcquireReleaseMask); - break; - - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: - emit_ssbo_atomic_intrinsic(ctx, intr); - break; - - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_deref_atomic: + case nir_intrinsic_deref_atomic_swap: + emit_deref_atomic_intrinsic(ctx, intr); + break; + + case nir_intrinsic_shared_atomic: + case nir_intrinsic_shared_atomic_swap: emit_shared_atomic_intrinsic(ctx, intr); break; + case nir_intrinsic_global_atomic: + case nir_intrinsic_global_atomic_swap: + emit_global_atomic_intrinsic(ctx, intr); + break; + case nir_intrinsic_begin_invocation_interlock: case nir_intrinsic_end_invocation_interlock: spirv_builder_emit_interlock(&ctx->builder, intr->intrinsic == nir_intrinsic_end_invocation_interlock); @@ -2887,6 +3375,7 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_image_deref_store(ctx, intr); break; + case nir_intrinsic_image_deref_sparse_load: case nir_intrinsic_image_deref_load: emit_image_deref_load(ctx, intr); break; @@ -2899,16 +3388,8 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_image_deref_samples(ctx, intr); break; - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_umin: - case nir_intrinsic_image_deref_atomic_imin: - case nir_intrinsic_image_deref_atomic_umax: - case nir_intrinsic_image_deref_atomic_imax: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_atomic: + case nir_intrinsic_image_deref_atomic_swap: emit_image_intrinsic(ctx, intr); break; @@ -2959,7 +3440,7 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_workgroup_size: assert(ctx->local_group_size_var); - store_dest(ctx, &intr->dest, ctx->local_group_size_var, nir_type_uint); + store_def(ctx, intr->def.index, ctx->local_group_size_var, nir_type_uint); break; case nir_intrinsic_load_shared: @@ -2970,6 +3451,14 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_store_shared(ctx, intr); break; + case nir_intrinsic_load_scratch: + emit_load_scratch(ctx, intr); + break; + + case nir_intrinsic_store_scratch: + emit_store_scratch(ctx, intr); + break; + case nir_intrinsic_shader_clock: emit_shader_clock(ctx, intr); break; @@ -2981,6 +3470,14 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_vote(ctx, intr); break; + case nir_intrinsic_is_sparse_resident_zink: + emit_is_sparse_texels_resident(ctx, intr); + break; + + case nir_intrinsic_is_helper_invocation: + emit_is_helper_invocation(ctx, intr); + break; + default: fprintf(stderr, "emit_intrinsic: not implemented (%s)\n", nir_intrinsic_infos[intr->intrinsic].name); @@ -2989,20 +3486,24 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) } static void -emit_undef(struct ntv_context *ctx, nir_ssa_undef_instr *undef) +emit_undef(struct ntv_context *ctx, nir_undef_instr *undef) { SpvId type = undef->def.bit_size == 1 ? get_bvec_type(ctx, undef->def.num_components) : get_uvec_type(ctx, undef->def.bit_size, undef->def.num_components); - store_ssa_def(ctx, &undef->def, - spirv_builder_emit_undef(&ctx->builder, type)); + store_def(ctx, undef->def.index, + spirv_builder_emit_undef(&ctx->builder, type), + undef->def.bit_size == 1 ? nir_type_bool : nir_type_uint); } static SpvId get_src_float(struct ntv_context *ctx, nir_src *src) { - SpvId def = get_src(ctx, src); + nir_alu_type atype; + SpvId def = get_src(ctx, src, &atype); + if (atype == nir_type_float) + return def; unsigned num_components = nir_src_num_components(*src); unsigned bit_size = nir_src_bit_size(*src); return bitcast_to_fvec(ctx, def, bit_size, num_components); @@ -3011,7 +3512,10 @@ get_src_float(struct ntv_context *ctx, nir_src *src) static SpvId get_src_int(struct ntv_context *ctx, nir_src *src) { - SpvId def = get_src(ctx, src); + nir_alu_type atype; + SpvId def = get_src(ctx, src, &atype); + if (atype == nir_type_int) + return def; unsigned num_components = nir_src_num_components(*src); unsigned bit_size = nir_src_bit_size(*src); return bitcast_to_ivec(ctx, def, bit_size, num_components); @@ -3027,44 +3531,37 @@ tex_instr_is_lod_allowed(nir_tex_instr *tex) return (tex->sampler_dim == GLSL_SAMPLER_DIM_1D || tex->sampler_dim == GLSL_SAMPLER_DIM_2D || tex->sampler_dim == GLSL_SAMPLER_DIM_3D || - tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE); + tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE || + /* External images are interpreted as 2D in type_to_dim, + * so LOD is allowed */ + tex->sampler_dim == GLSL_SAMPLER_DIM_EXTERNAL || + /* RECT will always become 2D, so this is fine */ + tex->sampler_dim == GLSL_SAMPLER_DIM_RECT); } static void -emit_tex(struct ntv_context *ctx, nir_tex_instr *tex) +get_tex_srcs(struct ntv_context *ctx, nir_tex_instr *tex, + nir_variable **bindless_var, unsigned *coord_components, + struct spriv_tex_src *tex_src) { - assert(tex->op == nir_texop_tex || - tex->op == nir_texop_txb || - tex->op == nir_texop_txl || - tex->op == nir_texop_txd || - tex->op == nir_texop_txf || - tex->op == nir_texop_txf_ms || - tex->op == nir_texop_txs || - tex->op == nir_texop_lod || - tex->op == nir_texop_tg4 || - tex->op == nir_texop_texture_samples || - tex->op == nir_texop_query_levels); - assert(tex->texture_index == tex->sampler_index); - - SpvId coord = 0, proj = 0, bias = 0, lod = 0, dref = 0, dx = 0, dy = 0, - const_offset = 0, offset = 0, sample = 0, tex_offset = 0; - unsigned coord_components = 0; + tex_src->sparse = tex->is_sparse; + nir_alu_type atype; for (unsigned i = 0; i < tex->num_srcs; i++) { nir_const_value *cv; switch (tex->src[i].src_type) { case nir_tex_src_coord: if (tex->op == nir_texop_txf || tex->op == nir_texop_txf_ms) - coord = get_src_int(ctx, &tex->src[i].src); + tex_src->coord = get_src_int(ctx, &tex->src[i].src); else - coord = get_src_float(ctx, &tex->src[i].src); - coord_components = nir_src_num_components(tex->src[i].src); + tex_src->coord = get_src_float(ctx, &tex->src[i].src); + *coord_components = nir_src_num_components(tex->src[i].src); break; case nir_tex_src_projector: assert(nir_src_num_components(tex->src[i].src) == 1); - proj = get_src_float(ctx, &tex->src[i].src); - assert(proj != 0); + tex_src->proj = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->proj != 0); break; case nir_tex_src_offset: @@ -3074,27 +3571,33 @@ emit_tex(struct ntv_context *ctx, nir_tex_instr *tex) unsigned num_components = nir_src_num_components(tex->src[i].src); SpvId components[NIR_MAX_VEC_COMPONENTS]; - for (int i = 0; i < num_components; ++i) { - int64_t tmp = nir_const_value_as_int(cv[i], bit_size); - components[i] = emit_int_const(ctx, bit_size, tmp); + for (int j = 0; j < num_components; ++j) { + int64_t tmp = nir_const_value_as_int(cv[j], bit_size); + components[j] = emit_int_const(ctx, bit_size, tmp); } if (num_components > 1) { SpvId type = get_ivec_type(ctx, bit_size, num_components); - const_offset = spirv_builder_const_composite(&ctx->builder, - type, - components, - num_components); + tex_src->const_offset = spirv_builder_const_composite(&ctx->builder, + type, + components, + num_components); } else - const_offset = components[0]; + tex_src->const_offset = components[0]; } else - offset = get_src_int(ctx, &tex->src[i].src); + tex_src->offset = get_src_int(ctx, &tex->src[i].src); break; case nir_tex_src_bias: assert(tex->op == nir_texop_txb); - bias = get_src_float(ctx, &tex->src[i].src); - assert(bias != 0); + tex_src->bias = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->bias != 0); + break; + + case nir_tex_src_min_lod: + assert(nir_src_num_components(tex->src[i].src) == 1); + tex_src->min_lod = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->min_lod != 0); break; case nir_tex_src_lod: @@ -3102,201 +3605,320 @@ emit_tex(struct ntv_context *ctx, nir_tex_instr *tex) if (tex->op == nir_texop_txf || tex->op == nir_texop_txf_ms || tex->op == nir_texop_txs) - lod = get_src_int(ctx, &tex->src[i].src); + tex_src->lod = get_src_int(ctx, &tex->src[i].src); else - lod = get_src_float(ctx, &tex->src[i].src); - assert(lod != 0); + tex_src->lod = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->lod != 0); break; case nir_tex_src_ms_index: assert(nir_src_num_components(tex->src[i].src) == 1); - sample = get_src_int(ctx, &tex->src[i].src); + tex_src->sample = get_src_int(ctx, &tex->src[i].src); break; case nir_tex_src_comparator: assert(nir_src_num_components(tex->src[i].src) == 1); - dref = get_src_float(ctx, &tex->src[i].src); - assert(dref != 0); + tex_src->dref = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->dref != 0); break; case nir_tex_src_ddx: - dx = get_src_float(ctx, &tex->src[i].src); - assert(dx != 0); + tex_src->dx = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->dx != 0); break; case nir_tex_src_ddy: - dy = get_src_float(ctx, &tex->src[i].src); - assert(dy != 0); + tex_src->dy = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->dy != 0); break; case nir_tex_src_texture_offset: - tex_offset = get_src_int(ctx, &tex->src[i].src); + tex_src->tex_offset = get_src_int(ctx, &tex->src[i].src); break; case nir_tex_src_sampler_offset: + case nir_tex_src_sampler_handle: /* don't care */ break; + case nir_tex_src_texture_handle: + tex_src->bindless = get_src(ctx, &tex->src[i].src, &atype); + *bindless_var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[i].src)); + break; + default: fprintf(stderr, "texture source: %d\n", tex->src[i].src_type); unreachable("unknown texture source"); } } +} - unsigned texture_index = tex->texture_index; - if (!tex_offset) { - /* convert constant index back to base + offset */ - unsigned last_sampler = util_last_bit(ctx->samplers_used); - for (unsigned i = 0; i < last_sampler; i++) { - if (!ctx->sampler_array_sizes[i]) { - if (i == texture_index) - /* this is a non-array sampler, so we don't need an access chain */ +static void +find_sampler_and_texture_index(struct ntv_context *ctx, struct spriv_tex_src *tex_src, + nir_variable *bindless_var, + nir_variable **var, uint32_t *texture_index) +{ + *var = bindless_var ? bindless_var : ctx->sampler_var[*texture_index]; + nir_variable **sampler_var = tex_src->bindless ? ctx->bindless_sampler_var : ctx->sampler_var; + if (!bindless_var && (!tex_src->tex_offset || !var)) { + if (sampler_var[*texture_index]) { + if (glsl_type_is_array(sampler_var[*texture_index]->type)) + tex_src->tex_offset = emit_uint_const(ctx, 32, 0); + } else { + /* convert constant index back to base + offset */ + for (int i = *texture_index; i >= 0; i--) { + if (sampler_var[i]) { + assert(glsl_type_is_array(sampler_var[i]->type)); + if (!tex_src->tex_offset) + tex_src->tex_offset = emit_uint_const(ctx, 32, *texture_index - i); + *var = sampler_var[i]; + *texture_index = i; break; - } else if (texture_index <= i + ctx->sampler_array_sizes[i] - 1) { - /* this is the first member of a sampler array */ - tex_offset = emit_uint_const(ctx, 32, texture_index - i); - texture_index = i; - break; + } } } } - SpvId image_type = ctx->sampler_types[texture_index]; - assert(image_type); - SpvId sampled_type = spirv_builder_type_sampled_image(&ctx->builder, - image_type); - assert(sampled_type); - assert(ctx->samplers_used & (1u << texture_index)); - SpvId sampler_id = ctx->samplers[texture_index]; - if (tex_offset) { - SpvId ptr = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassUniformConstant, sampled_type); - sampler_id = spirv_builder_emit_access_chain(&ctx->builder, ptr, sampler_id, &tex_offset, 1); - } - SpvId load = spirv_builder_emit_load(&ctx->builder, sampled_type, sampler_id); - - SpvId dest_type = get_dest_type(ctx, &tex->dest, tex->dest_type); +} - if (!tex_instr_is_lod_allowed(tex)) - lod = 0; - if (tex->op == nir_texop_txs) { - SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load); - SpvId result = spirv_builder_emit_image_query_size(&ctx->builder, - dest_type, image, - lod); - store_dest(ctx, &tex->dest, result, tex->dest_type); - return; - } - if (tex->op == nir_texop_query_levels) { - SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load); - SpvId result = spirv_builder_emit_image_query_levels(&ctx->builder, - dest_type, image); - store_dest(ctx, &tex->dest, result, tex->dest_type); - return; - } - if (tex->op == nir_texop_texture_samples) { - SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load); - SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples, - dest_type, image); - store_dest(ctx, &tex->dest, result, tex->dest_type); - return; +static SpvId +get_texture_load(struct ntv_context *ctx, SpvId sampler_id, nir_tex_instr *tex, + SpvId image_type, SpvId sampled_type) +{ + if (ctx->stage == MESA_SHADER_KERNEL) { + SpvId image_load = spirv_builder_emit_load(&ctx->builder, image_type, sampler_id); + if (nir_tex_instr_need_sampler(tex)) { + SpvId sampler_load = spirv_builder_emit_load(&ctx->builder, spirv_builder_type_sampler(&ctx->builder), + ctx->cl_samplers[tex->sampler_index]); + return spirv_builder_emit_sampled_image(&ctx->builder, sampled_type, image_load, sampler_load); + } else { + return image_load; + } + } else { + return spirv_builder_emit_load(&ctx->builder, sampled_type, sampler_id); } +} - if (proj && coord_components > 0) { - SpvId constituents[NIR_MAX_VEC_COMPONENTS + 1]; - if (coord_components == 1) - constituents[0] = coord; - else { - assert(coord_components > 1); - SpvId float_type = spirv_builder_type_float(&ctx->builder, 32); - for (uint32_t i = 0; i < coord_components; ++i) - constituents[i] = spirv_builder_emit_composite_extract(&ctx->builder, - float_type, - coord, - &i, 1); - } +static SpvId +get_texop_dest_type(struct ntv_context *ctx, const nir_tex_instr *tex) +{ + SpvId actual_dest_type; + unsigned num_components = tex->def.num_components; + switch (nir_alu_type_get_base_type(tex->dest_type)) { + case nir_type_int: + actual_dest_type = get_ivec_type(ctx, 32, num_components); + break; - constituents[coord_components++] = proj; + case nir_type_uint: + actual_dest_type = get_uvec_type(ctx, 32, num_components); + break; - SpvId vec_type = get_fvec_type(ctx, 32, coord_components); - coord = spirv_builder_emit_composite_construct(&ctx->builder, - vec_type, - constituents, - coord_components); - } - if (tex->op == nir_texop_lod) { - SpvId result = spirv_builder_emit_image_query_lod(&ctx->builder, - dest_type, load, - coord); - store_dest(ctx, &tex->dest, result, tex->dest_type); - return; + case nir_type_float: + actual_dest_type = get_fvec_type(ctx, 32, num_components); + break; + + default: + unreachable("unexpected nir_alu_type"); } - SpvId actual_dest_type; - if (dref) - actual_dest_type = - spirv_builder_type_float(&ctx->builder, - nir_dest_bit_size(tex->dest)); + + return actual_dest_type; +} + +static void +move_tex_proj_to_coord(struct ntv_context *ctx, unsigned coord_components, struct spriv_tex_src *tex_src) +{ + SpvId constituents[NIR_MAX_VEC_COMPONENTS + 1]; + if (coord_components == 1) + constituents[0] = tex_src->coord; else { - unsigned num_components = nir_dest_num_components(tex->dest); - switch (nir_alu_type_get_base_type(tex->dest_type)) { - case nir_type_int: - actual_dest_type = get_ivec_type(ctx, 32, num_components); - break; + assert(coord_components > 1); + SpvId float_type = spirv_builder_type_float(&ctx->builder, 32); + for (uint32_t i = 0; i < coord_components; ++i) + constituents[i] = spirv_builder_emit_composite_extract(&ctx->builder, + float_type, + tex_src->coord, + &i, 1); + } - case nir_type_uint: - actual_dest_type = get_uvec_type(ctx, 32, num_components); - break; + constituents[coord_components++] = tex_src->proj; - case nir_type_float: - actual_dest_type = get_fvec_type(ctx, 32, num_components); - break; + SpvId vec_type = get_fvec_type(ctx, 32, coord_components); + tex_src->coord = spirv_builder_emit_composite_construct(&ctx->builder, + vec_type, + constituents, + coord_components); +} - default: - unreachable("unexpected nir_alu_type"); - } - } +static SpvId +get_tex_image_to_load( struct ntv_context *ctx, SpvId image_type, bool is_buffer, SpvId load) +{ + return is_buffer || ctx->stage == MESA_SHADER_KERNEL ? + load : + spirv_builder_emit_image(&ctx->builder, image_type, load); +} + +static SpvId +emit_tex_readop(struct ntv_context *ctx, nir_variable *bindless_var, SpvId load, + struct spriv_tex_src *tex_src, SpvId dest_type, bool is_buffer, + nir_variable *var, SpvId image_type, nir_tex_instr *tex) +{ + SpvId actual_dest_type = get_texop_dest_type(ctx, tex); SpvId result; - if (offset) + if (tex_src->offset) spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageGatherExtended); + if (tex_src->min_lod) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityMinLod); if (tex->op == nir_texop_txf || tex->op == nir_texop_txf_ms || tex->op == nir_texop_tg4) { - SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load); + SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load); if (tex->op == nir_texop_tg4) { - if (const_offset) + if (tex_src->const_offset) spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageGatherExtended); - result = spirv_builder_emit_image_gather(&ctx->builder, dest_type, - load, coord, emit_uint_const(ctx, 32, tex->component), - lod, sample, const_offset, offset, dref); - } else + result = spirv_builder_emit_image_gather(&ctx->builder, actual_dest_type, + load, tex_src, emit_uint_const(ctx, 32, tex->component)); + actual_dest_type = dest_type; + } else { + assert(tex->op == nir_texop_txf_ms || !tex_src->sample); + bool is_ms; + type_to_dim(glsl_get_sampler_dim(glsl_without_array(var->type)), &is_ms); + assert(is_ms || !tex_src->sample); result = spirv_builder_emit_image_fetch(&ctx->builder, actual_dest_type, - image, coord, lod, sample, const_offset, offset); + image, tex_src); + } } else { + if (tex->op == nir_texop_txl) + tex_src->min_lod = 0; result = spirv_builder_emit_image_sample(&ctx->builder, actual_dest_type, load, - coord, - proj != 0, - lod, bias, dref, dx, dy, - const_offset, offset); + tex_src); } - spirv_builder_emit_decoration(&ctx->builder, result, - SpvDecorationRelaxedPrecision); - - if (dref && nir_dest_num_components(tex->dest) > 1 && tex->op != nir_texop_tg4) { - SpvId components[4] = { result, result, result, result }; - result = spirv_builder_emit_composite_construct(&ctx->builder, - dest_type, - components, - 4); + if (!bindless_var && (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW)) { + spirv_builder_emit_decoration(&ctx->builder, result, + SpvDecorationRelaxedPrecision); } - if (nir_dest_bit_size(tex->dest) != 32) { + if (tex->is_sparse) + result = extract_sparse_load(ctx, result, actual_dest_type, &tex->def); + + if (tex->def.bit_size != 32) { /* convert FP32 to FP16 */ result = emit_unop(ctx, SpvOpFConvert, dest_type, result); } - store_dest(ctx, &tex->dest, result, tex->dest_type); + return result; +} + +static void +emit_tex(struct ntv_context *ctx, nir_tex_instr *tex) +{ + assert(tex->op == nir_texop_tex || + tex->op == nir_texop_txb || + tex->op == nir_texop_txl || + tex->op == nir_texop_txd || + tex->op == nir_texop_txf || + tex->op == nir_texop_txf_ms || + tex->op == nir_texop_txs || + tex->op == nir_texop_lod || + tex->op == nir_texop_tg4 || + tex->op == nir_texop_texture_samples || + tex->op == nir_texop_query_levels); + assert(tex->texture_index == tex->sampler_index || ctx->stage == MESA_SHADER_KERNEL); + + struct spriv_tex_src tex_src = {0}; + unsigned coord_components = 0; + nir_variable *bindless_var = NULL; + nir_variable *var = NULL; + uint32_t texture_index = tex->texture_index; + + get_tex_srcs(ctx, tex, &bindless_var, &coord_components, &tex_src); + find_sampler_and_texture_index(ctx, &tex_src, bindless_var, &var, &texture_index); + + assert(var); + SpvId image_type = find_image_type(ctx, var); + assert(image_type); + + bool is_buffer = glsl_get_sampler_dim(glsl_without_array(var->type)) == + GLSL_SAMPLER_DIM_BUF; + SpvId sampled_type = is_buffer ? image_type : + spirv_builder_type_sampled_image(&ctx->builder, image_type); + assert(sampled_type); + + SpvId sampler_id = tex_src.bindless ? tex_src.bindless : ctx->samplers[texture_index]; + if (tex_src.tex_offset) { + SpvId ptr = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassUniformConstant, sampled_type); + sampler_id = spirv_builder_emit_access_chain(&ctx->builder, ptr, sampler_id, &tex_src.tex_offset, 1); + } + + SpvId load = get_texture_load(ctx, sampler_id, tex, image_type, sampled_type); + + if (tex->is_sparse) + tex->def.num_components--; + SpvId dest_type = get_def_type(ctx, &tex->def, tex->dest_type); + + if (nir_tex_instr_is_query(tex)) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageQuery); + + if (!tex_instr_is_lod_allowed(tex)) + tex_src.lod = 0; + else if (ctx->stage != MESA_SHADER_FRAGMENT && + tex->op == nir_texop_tex && ctx->explicit_lod && !tex_src.lod) + tex_src.lod = emit_float_const(ctx, 32, 0.0); + + if (tex_src.proj && coord_components > 0) + move_tex_proj_to_coord(ctx, coord_components, &tex_src); + + SpvId result = 0; + + switch (tex->op) { + case nir_texop_txs: { + SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load); + /* Its Dim operand must be one of 1D, 2D, 3D, or Cube + * - OpImageQuerySizeLod specification + * + * Additionally, if its Dim is 1D, 2D, 3D, or Cube, + * it must also have either an MS of 1 or a Sampled of 0 or 2. + * - OpImageQuerySize specification + * + * all spirv samplers use these types + */ + if (!tex_src.lod && tex_instr_is_lod_allowed(tex)) + tex_src.lod = emit_uint_const(ctx, 32, 0); + result = spirv_builder_emit_image_query_size(&ctx->builder, + dest_type, image, + tex_src.lod); + break; + } + case nir_texop_query_levels: { + SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load); + result = spirv_builder_emit_image_query_levels(&ctx->builder, + dest_type, image); + break; + } + case nir_texop_texture_samples: { + SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load); + result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples, + dest_type, image); + break; + } + case nir_texop_lod: { + result = spirv_builder_emit_image_query_lod(&ctx->builder, + dest_type, load, + tex_src.coord); + break; + } + default: + result = emit_tex_readop(ctx, bindless_var, load, &tex_src, + dest_type, is_buffer, var, image_type, tex); + break; + } + + store_def(ctx, tex->def.index, result, tex->dest_type); + + if (tex->is_sparse) + tex->def.num_components++; } static void @@ -3356,7 +3978,7 @@ emit_deref_var(struct ntv_context *ctx, nir_deref_instr *deref) struct hash_entry *he = _mesa_hash_table_search(ctx->vars, deref->var); assert(he); SpvId result = (SpvId)(intptr_t)he->data; - store_dest_raw(ctx, &deref->dest, result); + store_def(ctx, deref->def.index, result, get_nir_alu_type(deref->type)); } static void @@ -3365,21 +3987,63 @@ emit_deref_array(struct ntv_context *ctx, nir_deref_instr *deref) assert(deref->deref_type == nir_deref_type_array); nir_variable *var = nir_deref_instr_get_variable(deref); + if (!nir_src_is_always_uniform(deref->arr.index)) { + if (deref->modes & nir_var_mem_ubo) + spirv_builder_emit_cap(&ctx->builder, + SpvCapabilityUniformBufferArrayDynamicIndexing); + + if (deref->modes & nir_var_mem_ssbo) + spirv_builder_emit_cap(&ctx->builder, + SpvCapabilityStorageBufferArrayDynamicIndexing); + + if (deref->modes & (nir_var_uniform | nir_var_image)) { + const struct glsl_type *type = glsl_without_array(var->type); + assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); + + if (glsl_type_is_sampler(type)) + spirv_builder_emit_cap(&ctx->builder, + SpvCapabilitySampledImageArrayDynamicIndexing); + else + spirv_builder_emit_cap(&ctx->builder, + SpvCapabilityStorageImageArrayDynamicIndexing); + } + } + SpvStorageClass storage_class = get_storage_class(var); - SpvId base, type; + SpvId type; + nir_alu_type atype = nir_type_uint; + + SpvId base = get_src(ctx, &deref->parent, &atype); + switch (var->data.mode) { + + case nir_var_mem_ubo: + case nir_var_mem_ssbo: + base = get_src(ctx, &deref->parent, &atype); + /* this is either the array<buffers> deref or the array<uint> deref */ + if (glsl_type_is_struct_or_ifc(deref->type)) { + /* array<buffers> */ + type = get_bo_struct_type(ctx, var); + break; + } + /* array<uint> */ + FALLTHROUGH; + case nir_var_function_temp: case nir_var_shader_in: case nir_var_shader_out: - base = get_src(ctx, &deref->parent); + base = get_src(ctx, &deref->parent, &atype); type = get_glsl_type(ctx, deref->type); break; - case nir_var_uniform: { - assert(glsl_type_is_image(glsl_without_array(var->type))); + case nir_var_uniform: + case nir_var_image: { struct hash_entry *he = _mesa_hash_table_search(ctx->vars, var); assert(he); base = (SpvId)(intptr_t)he->data; - type = ctx->image_types[var->data.driver_location]; + const struct glsl_type *gtype = glsl_without_array(var->type); + type = get_image_type(ctx, var, + glsl_type_is_sampler(gtype), + glsl_get_sampler_dim(gtype) == GLSL_SAMPLER_DIM_BUF); break; } @@ -3387,7 +4051,30 @@ emit_deref_array(struct ntv_context *ctx, nir_deref_instr *deref) unreachable("Unsupported nir_variable_mode\n"); } - SpvId index = get_src(ctx, &deref->arr.index); + nir_alu_type itype; + SpvId index = get_src(ctx, &deref->arr.index, &itype); + if (itype == nir_type_float) + index = emit_bitcast(ctx, get_uvec_type(ctx, 32, 1), index); + + if (var->data.mode == nir_var_uniform || var->data.mode == nir_var_image) { + nir_deref_instr *aoa_deref = nir_src_as_deref(deref->parent); + uint32_t inner_stride = glsl_array_size(aoa_deref->type); + + while (aoa_deref->deref_type != nir_deref_type_var) { + assert(aoa_deref->deref_type == nir_deref_type_array); + + SpvId aoa_index = get_src(ctx, &aoa_deref->arr.index, &itype); + if (itype == nir_type_float) + aoa_index = emit_bitcast(ctx, get_uvec_type(ctx, 32, 1), aoa_index); + + aoa_deref = nir_src_as_deref(aoa_deref->parent); + + uint32_t stride = glsl_get_aoa_size(aoa_deref->type) / inner_stride; + aoa_index = emit_binop(ctx, SpvOpIMul, get_uvec_type(ctx, 32, 1), aoa_index, + emit_uint_const(ctx, 32, stride)); + index = emit_binop(ctx, SpvOpIAdd, get_uvec_type(ctx, 32, 1), index, aoa_index); + } + } SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, storage_class, @@ -3398,14 +4085,7 @@ emit_deref_array(struct ntv_context *ctx, nir_deref_instr *deref) base, &index, 1); /* uint is a bit of a lie here, it's really just an opaque type */ - store_dest(ctx, &deref->dest, result, nir_type_uint); - - /* image ops always need to be able to get the variable to check out sampler types and such */ - if (glsl_type_is_image(glsl_without_array(var->type))) { - uint32_t *key = ralloc_size(ctx->mem_ctx, sizeof(uint32_t)); - *key = result; - _mesa_hash_table_insert(ctx->image_vars, key, var); - } + store_def(ctx, deref->def.index, result, get_nir_alu_type(deref->type)); } static void @@ -3417,17 +4097,21 @@ emit_deref_struct(struct ntv_context *ctx, nir_deref_instr *deref) SpvStorageClass storage_class = get_storage_class(var); SpvId index = emit_uint_const(ctx, 32, deref->strct.index); + SpvId type = (var->data.mode & (nir_var_mem_ubo | nir_var_mem_ssbo)) ? + get_bo_array_type(ctx, var) : + get_glsl_type(ctx, deref->type); SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, storage_class, - get_glsl_type(ctx, deref->type)); + type); + nir_alu_type atype; SpvId result = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, - get_src(ctx, &deref->parent), + get_src(ctx, &deref->parent, &atype), &index, 1); /* uint is a bit of a lie here, it's really just an opaque type */ - store_dest(ctx, &deref->dest, result, nir_type_uint); + store_def(ctx, deref->def.index, result, get_nir_alu_type(deref->type)); } static void @@ -3466,8 +4150,8 @@ emit_block(struct ntv_context *ctx, struct nir_block *block) case nir_instr_type_load_const: emit_load_const(ctx, nir_instr_as_load_const(instr)); break; - case nir_instr_type_ssa_undef: - emit_undef(ctx, nir_instr_as_ssa_undef(instr)); + case nir_instr_type_undef: + emit_undef(ctx, nir_instr_as_undef(instr)); break; case nir_instr_type_tex: emit_tex(ctx, nir_instr_as_tex(instr)); @@ -3498,7 +4182,8 @@ static SpvId get_src_bool(struct ntv_context *ctx, nir_src *src) { assert(nir_src_bit_size(*src) == 1); - return get_src(ctx, src); + nir_alu_type atype; + return get_src(ctx, src, &atype); } static void @@ -3538,6 +4223,7 @@ emit_if(struct ntv_context *ctx, nir_if *if_stmt) static void emit_loop(struct ntv_context *ctx, nir_loop *loop) { + assert(!nir_loop_has_continue_construct(loop)); SpvId header_id = spirv_builder_new_id(&ctx->builder); SpvId begin_id = block_label(ctx, nir_loop_first_block(loop)); SpvId break_id = spirv_builder_new_id(&ctx->builder); @@ -3592,35 +4278,33 @@ emit_cf_list(struct ntv_context *ctx, struct exec_list *list) } static SpvExecutionMode -get_input_prim_type_mode(uint16_t type) +get_input_prim_type_mode(enum mesa_prim type) { switch (type) { - case GL_POINTS: + case MESA_PRIM_POINTS: return SpvExecutionModeInputPoints; - case GL_LINES: - case GL_LINE_LOOP: - case GL_LINE_STRIP: + case MESA_PRIM_LINES: + case MESA_PRIM_LINE_LOOP: + case MESA_PRIM_LINE_STRIP: return SpvExecutionModeInputLines; - case GL_TRIANGLE_STRIP: - case GL_TRIANGLES: - case GL_TRIANGLE_FAN: + case MESA_PRIM_TRIANGLE_STRIP: + case MESA_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLE_FAN: return SpvExecutionModeTriangles; - case GL_QUADS: - case GL_QUAD_STRIP: + case MESA_PRIM_QUADS: + case MESA_PRIM_QUAD_STRIP: return SpvExecutionModeQuads; break; - case GL_POLYGON: + case MESA_PRIM_POLYGON: unreachable("handle polygons in gs"); break; - case GL_LINES_ADJACENCY: - case GL_LINE_STRIP_ADJACENCY: + case MESA_PRIM_LINES_ADJACENCY: + case MESA_PRIM_LINE_STRIP_ADJACENCY: return SpvExecutionModeInputLinesAdjacency; - case GL_TRIANGLES_ADJACENCY: - case GL_TRIANGLE_STRIP_ADJACENCY: + case MESA_PRIM_TRIANGLES_ADJACENCY: + case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: return SpvExecutionModeInputTrianglesAdjacency; break; - case GL_ISOLINES: - return SpvExecutionModeIsolines; default: debug_printf("unknown geometry shader input mode %u\n", type); unreachable("error!"); @@ -3630,38 +4314,36 @@ get_input_prim_type_mode(uint16_t type) return 0; } static SpvExecutionMode -get_output_prim_type_mode(uint16_t type) +get_output_prim_type_mode(enum mesa_prim type) { switch (type) { - case GL_POINTS: + case MESA_PRIM_POINTS: return SpvExecutionModeOutputPoints; - case GL_LINES: - case GL_LINE_LOOP: - unreachable("GL_LINES/LINE_LOOP passed as gs output"); + case MESA_PRIM_LINES: + case MESA_PRIM_LINE_LOOP: + unreachable("MESA_PRIM_LINES/LINE_LOOP passed as gs output"); break; - case GL_LINE_STRIP: + case MESA_PRIM_LINE_STRIP: return SpvExecutionModeOutputLineStrip; - case GL_TRIANGLE_STRIP: + case MESA_PRIM_TRIANGLE_STRIP: return SpvExecutionModeOutputTriangleStrip; - case GL_TRIANGLES: - case GL_TRIANGLE_FAN: //FIXME: not sure if right for output + case MESA_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLE_FAN: //FIXME: not sure if right for output return SpvExecutionModeTriangles; - case GL_QUADS: - case GL_QUAD_STRIP: + case MESA_PRIM_QUADS: + case MESA_PRIM_QUAD_STRIP: return SpvExecutionModeQuads; - case GL_POLYGON: + case MESA_PRIM_POLYGON: unreachable("handle polygons in gs"); break; - case GL_LINES_ADJACENCY: - case GL_LINE_STRIP_ADJACENCY: + case MESA_PRIM_LINES_ADJACENCY: + case MESA_PRIM_LINE_STRIP_ADJACENCY: unreachable("handle line adjacency in gs"); break; - case GL_TRIANGLES_ADJACENCY: - case GL_TRIANGLE_STRIP_ADJACENCY: + case MESA_PRIM_TRIANGLES_ADJACENCY: + case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: unreachable("handle triangle adjacency in gs"); break; - case GL_ISOLINES: - return SpvExecutionModeIsolines; default: debug_printf("unknown geometry shader output mode %u\n", type); unreachable("error!"); @@ -3690,12 +4372,12 @@ get_depth_layout_mode(enum gl_frag_depth_layout depth_layout) } static SpvExecutionMode -get_primitive_mode(uint16_t primitive_mode) +get_primitive_mode(enum tess_primitive_mode primitive_mode) { switch (primitive_mode) { - case GL_TRIANGLES: return SpvExecutionModeTriangles; - case GL_QUADS: return SpvExecutionModeQuads; - case GL_ISOLINES: return SpvExecutionModeIsolines; + case TESS_PRIMITIVE_TRIANGLES: return SpvExecutionModeTriangles; + case TESS_PRIMITIVE_QUADS: return SpvExecutionModeQuads; + case TESS_PRIMITIVE_ISOLINES: return SpvExecutionModeIsolines; default: unreachable("unknown tess prim type!"); } @@ -3717,39 +4399,43 @@ get_spacing(enum gl_tess_spacing spacing) } struct spirv_shader * -nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_version) +nir_to_spirv(struct nir_shader *s, const struct zink_shader_info *sinfo, uint32_t spirv_version) { struct spirv_shader *ret = NULL; struct ntv_context ctx = {0}; ctx.mem_ctx = ralloc_context(NULL); + ctx.nir = s; ctx.builder.mem_ctx = ctx.mem_ctx; assert(spirv_version >= SPIRV_VERSION(1, 0)); ctx.spirv_1_4_interfaces = spirv_version >= SPIRV_VERSION(1, 4); + ctx.bindless_set_idx = sinfo->bindless_set_idx; ctx.glsl_types = _mesa_pointer_hash_table_create(ctx.mem_ctx); - if (!ctx.glsl_types) + ctx.bo_array_types = _mesa_pointer_hash_table_create(ctx.mem_ctx); + ctx.bo_struct_types = _mesa_pointer_hash_table_create(ctx.mem_ctx); + if (!ctx.glsl_types || !ctx.bo_array_types || !ctx.bo_struct_types || + !_mesa_hash_table_init(&ctx.image_types, ctx.mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal)) goto fail; spirv_builder_emit_cap(&ctx.builder, SpvCapabilityShader); - if (s->info.image_buffers != 0) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImageBuffer); - spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampledBuffer); switch (s->info.stage) { case MESA_SHADER_FRAGMENT: - if (s->info.fs.post_depth_coverage && - BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN)) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampleMaskPostDepthCoverage); if (s->info.fs.uses_sample_shading) spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampleRateShading); + if (s->info.fs.uses_demote && spirv_version < SPIRV_VERSION(1, 6)) + spirv_builder_emit_extension(&ctx.builder, + "SPV_EXT_demote_to_helper_invocation"); break; case MESA_SHADER_VERTEX: if (BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID) || + BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_DRAW_ID) || BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) || BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX)) { - spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_shader_draw_parameters"); + if (spirv_version < SPIRV_VERSION(1, 3)) + spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_shader_draw_parameters"); spirv_builder_emit_cap(&ctx.builder, SpvCapabilityDrawParameters); } break; @@ -3783,9 +4469,16 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_builder_emit_cap(&ctx.builder, SpvCapabilityShaderViewportIndexLayerEXT); } } + } else if (s->info.stage == MESA_SHADER_FRAGMENT) { + /* incredibly, this is legal and intended. + * https://github.com/KhronosGroup/SPIRV-Registry/issues/95 + */ + if (s->info.inputs_read & (BITFIELD64_BIT(VARYING_SLOT_LAYER) | + BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID))) + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityGeometry); } - if (s->info.num_ssbos) + if (s->info.num_ssbos && spirv_version < SPIRV_VERSION(1, 1)) spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_storage_buffer_storage_class"); if (s->info.stage < MESA_SHADER_FRAGMENT && @@ -3796,46 +4489,40 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_builder_emit_cap(&ctx.builder, SpvCapabilityMultiViewport); } - if (s->info.num_textures) { - spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampled1D); - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImageQuery); - } - - if (s->info.num_images) { - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImage1D); - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImageQuery); + if (s->info.stage > MESA_SHADER_VERTEX && + s->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_VIEWPORT)) { + if (s->info.stage < MESA_SHADER_GEOMETRY) + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityShaderViewportIndex); + else + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityMultiViewport); } - if (s->info.bit_sizes_int & 8) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityInt8); - if (s->info.bit_sizes_int & 16) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityInt16); - if (s->info.bit_sizes_int & 64) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityInt64); - - if (s->info.bit_sizes_float & 16) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityFloat16); - if (s->info.bit_sizes_float & 64) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityFloat64); - ctx.stage = s->info.stage; - ctx.so_info = so_info; + ctx.sinfo = sinfo; ctx.GLSL_std_450 = spirv_builder_import(&ctx.builder, "GLSL.std.450"); + ctx.explicit_lod = true; spirv_builder_emit_source(&ctx.builder, SpvSourceLanguageUnknown, 0); - if (s->info.stage == MESA_SHADER_COMPUTE) { - SpvAddressingModel model; + SpvAddressingModel model = SpvAddressingModelLogical; + if (gl_shader_stage_is_compute(s->info.stage)) { if (s->info.cs.ptr_size == 32) model = SpvAddressingModelPhysical32; - else if (s->info.cs.ptr_size == 64) - model = SpvAddressingModelPhysical64; - else + else if (s->info.cs.ptr_size == 64) { + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityPhysicalStorageBufferAddresses); + model = SpvAddressingModelPhysicalStorageBuffer64; + } else model = SpvAddressingModelLogical; + } + + if (ctx.sinfo->have_vulkan_memory_model) { + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityVulkanMemoryModel); + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityVulkanMemoryModelDeviceScope); + spirv_builder_emit_mem_model(&ctx.builder, model, + SpvMemoryModelVulkan); + } else { spirv_builder_emit_mem_model(&ctx.builder, model, SpvMemoryModelGLSL450); - } else - spirv_builder_emit_mem_model(&ctx.builder, SpvAddressingModelLogical, - SpvMemoryModelGLSL450); + } if (s->info.stage == MESA_SHADER_FRAGMENT && s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { @@ -3861,6 +4548,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t exec_model = SpvExecutionModelFragment; break; case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: exec_model = SpvExecutionModelGLCompute; break; default: @@ -3868,46 +4556,143 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t } SpvId type_void = spirv_builder_type_void(&ctx.builder); - SpvId type_main = spirv_builder_type_function(&ctx.builder, type_void, - NULL, 0); + SpvId type_void_func = spirv_builder_type_function(&ctx.builder, type_void, + NULL, 0); SpvId entry_point = spirv_builder_new_id(&ctx.builder); spirv_builder_emit_name(&ctx.builder, entry_point, "main"); ctx.vars = _mesa_hash_table_create(ctx.mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); - ctx.image_vars = _mesa_hash_table_create(ctx.mem_ctx, _mesa_hash_u32, - _mesa_key_u32_equal); - - ctx.so_outputs = _mesa_hash_table_create(ctx.mem_ctx, _mesa_hash_u32, - _mesa_key_u32_equal); - nir_foreach_variable_with_modes(var, s, nir_var_mem_push_const) input_var_init(&ctx, var); nir_foreach_shader_in_variable(var, s) emit_input(&ctx, var); - int max_output = -1; + int max_output = 0; nir_foreach_shader_out_variable(var, s) { /* ignore SPIR-V built-ins, tagged with a sentinel value */ if (var->data.driver_location != UINT_MAX) { assert(var->data.driver_location < INT_MAX); - max_output = MAX2(max_output, (int)var->data.driver_location); + unsigned extent = glsl_count_attribute_slots(var->type, false); + max_output = MAX2(max_output, (int)var->data.driver_location + extent); } emit_output(&ctx, var); } + uint32_t tcs_vertices_out_word = 0; + + unsigned ubo_counter[2] = {0}; + nir_foreach_variable_with_modes(var, s, nir_var_mem_ubo) + ubo_counter[var->data.driver_location != 0]++; + nir_foreach_variable_with_modes(var, s, nir_var_mem_ubo) + emit_bo(&ctx, var, ubo_counter[var->data.driver_location != 0] > 1); + + unsigned ssbo_counter = 0; + nir_foreach_variable_with_modes(var, s, nir_var_mem_ssbo) + ssbo_counter++; + nir_foreach_variable_with_modes(var, s, nir_var_mem_ssbo) + emit_bo(&ctx, var, ssbo_counter > 1); + + nir_foreach_variable_with_modes(var, s, nir_var_image) + ctx.image_var[var->data.driver_location] = var; + nir_foreach_variable_with_modes(var, s, nir_var_uniform) { + if (glsl_type_is_sampler(glsl_without_array(var->type))) { + if (var->data.descriptor_set == ctx.bindless_set_idx) + ctx.bindless_sampler_var[var->data.driver_location] = var; + else + ctx.sampler_var[var->data.driver_location] = var; + ctx.last_sampler = MAX2(ctx.last_sampler, var->data.driver_location); + } + } + if (sinfo->sampler_mask) { + assert(s->info.stage == MESA_SHADER_KERNEL); + int desc_set = -1; + nir_foreach_variable_with_modes(var, s, nir_var_uniform) { + if (glsl_type_is_sampler(glsl_without_array(var->type))) { + desc_set = var->data.descriptor_set; + break; + } + } + assert(desc_set != -1); + u_foreach_bit(sampler, sinfo->sampler_mask) + emit_sampler(&ctx, sampler, desc_set); + } + nir_foreach_variable_with_modes(var, s, nir_var_image | nir_var_uniform) { + const struct glsl_type *type = glsl_without_array(var->type); + if (glsl_type_is_sampler(type)) + emit_image(&ctx, var, get_bare_image_type(&ctx, var, true)); + else if (glsl_type_is_image(type)) + emit_image(&ctx, var, get_bare_image_type(&ctx, var, false)); + } + + if (sinfo->float_controls.flush_denorms) { + unsigned execution_mode = s->info.float_controls_execution_mode; + bool flush_16_bit = nir_is_denorm_flush_to_zero(execution_mode, 16); + bool flush_32_bit = nir_is_denorm_flush_to_zero(execution_mode, 32); + bool flush_64_bit = nir_is_denorm_flush_to_zero(execution_mode, 64); + bool preserve_16_bit = nir_is_denorm_preserve(execution_mode, 16); + bool preserve_32_bit = nir_is_denorm_preserve(execution_mode, 32); + bool preserve_64_bit = nir_is_denorm_preserve(execution_mode, 64); + bool emit_cap_flush = false; + bool emit_cap_preserve = false; + + if (!sinfo->float_controls.denorms_all_independence) { + bool flush = flush_16_bit && flush_64_bit; + bool preserve = preserve_16_bit && preserve_64_bit; + + if (!sinfo->float_controls.denorms_32_bit_independence) { + flush = flush && flush_32_bit; + preserve = preserve && preserve_32_bit; + + flush_32_bit = flush; + preserve_32_bit = preserve; + } + + flush_16_bit = flush; + flush_64_bit = flush; + preserve_16_bit = preserve; + preserve_64_bit = preserve; + } + + if (flush_16_bit && sinfo->float_controls.flush_denorms & BITFIELD_BIT(0)) { + emit_cap_flush = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormFlushToZero, 16); + } + if (flush_32_bit && sinfo->float_controls.flush_denorms & BITFIELD_BIT(1)) { + emit_cap_flush = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormFlushToZero, 32); + } + if (flush_64_bit && sinfo->float_controls.flush_denorms & BITFIELD_BIT(2)) { + emit_cap_flush = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormFlushToZero, 64); + } - if (so_info) - emit_so_info(&ctx, so_info, max_output + 1); + if (preserve_16_bit && sinfo->float_controls.preserve_denorms & BITFIELD_BIT(0)) { + emit_cap_preserve = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormPreserve, 16); + } + if (preserve_32_bit && sinfo->float_controls.preserve_denorms & BITFIELD_BIT(1)) { + emit_cap_preserve = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormPreserve, 32); + } + if (preserve_64_bit && sinfo->float_controls.preserve_denorms & BITFIELD_BIT(2)) { + emit_cap_preserve = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormPreserve, 64); + } - /* we have to reverse iterate to match what's done in zink_compiler.c */ - foreach_list_typed_reverse(nir_variable, var, node, &s->variables) - if (_nir_shader_variable_has_mode(var, nir_var_uniform | - nir_var_mem_ubo | - nir_var_mem_ssbo)) - emit_uniform(&ctx, var); + if (emit_cap_flush) + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityDenormFlushToZero); + if (emit_cap_preserve) + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityDenormPreserve); + } switch (s->info.stage) { case MESA_SHADER_FRAGMENT: @@ -3924,6 +4709,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t SpvExecutionModeEarlyFragmentTests); if (s->info.fs.post_depth_coverage) { spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_post_depth_coverage"); + spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampleMaskPostDepthCoverage); spirv_builder_emit_exec_mode(&ctx.builder, entry_point, SpvExecutionModePostDepthCoverage); } @@ -3945,13 +4731,13 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_builder_emit_exec_mode(&ctx.builder, entry_point, SpvExecutionModeSampleInterlockUnorderedEXT); break; case MESA_SHADER_TESS_CTRL: - spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, - SpvExecutionModeOutputVertices, - s->info.tess.tcs_vertices_out); + tcs_vertices_out_word = spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeOutputVertices, + s->info.tess.tcs_vertices_out); break; case MESA_SHADER_TESS_EVAL: spirv_builder_emit_exec_mode(&ctx.builder, entry_point, - get_primitive_mode(s->info.tess.primitive_mode)); + get_primitive_mode(s->info.tess._primitive_mode)); spirv_builder_emit_exec_mode(&ctx.builder, entry_point, s->info.tess.ccw ? SpvExecutionModeVertexOrderCcw : SpvExecutionModeVertexOrderCw); @@ -3970,12 +4756,10 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t s->info.gs.invocations); spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, SpvExecutionModeOutputVertices, - s->info.gs.vertices_out); + MAX2(s->info.gs.vertices_out, 1)); break; + case MESA_SHADER_KERNEL: case MESA_SHADER_COMPUTE: - if (s->info.shared_size) - create_shared_block(&ctx, s->info.shared_size); - if (s->info.workgroup_size[0] || s->info.workgroup_size[1] || s->info.workgroup_size[2]) spirv_builder_emit_exec_mode_literal3(&ctx.builder, entry_point, SpvExecutionModeLocalSize, (uint32_t[3]){(uint32_t)s->info.workgroup_size[0], (uint32_t)s->info.workgroup_size[1], @@ -3990,9 +4774,31 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_builder_emit_name(&ctx.builder, sizes[i], names[i]); } SpvId var_type = get_uvec_type(&ctx, 32, 3); + // Even when using LocalSizeId this need to be initialized for nir_intrinsic_load_workgroup_size ctx.local_group_size_var = spirv_builder_spec_const_composite(&ctx.builder, var_type, sizes, 3); - spirv_builder_emit_name(&ctx.builder, ctx.local_group_size_var, "gl_LocalGroupSize"); - spirv_builder_emit_builtin(&ctx.builder, ctx.local_group_size_var, SpvBuiltInWorkgroupSize); + spirv_builder_emit_name(&ctx.builder, ctx.local_group_size_var, "gl_LocalGroupSizeARB"); + + /* WorkgroupSize is deprecated in SPIR-V 1.6 */ + if (spirv_version >= SPIRV_VERSION(1, 6)) { + spirv_builder_emit_exec_mode_id3(&ctx.builder, entry_point, + SpvExecutionModeLocalSizeId, + sizes); + } else { + spirv_builder_emit_builtin(&ctx.builder, ctx.local_group_size_var, SpvBuiltInWorkgroupSize); + } + } + if (s->info.cs.has_variable_shared_mem) { + ctx.shared_mem_size = spirv_builder_spec_const_uint(&ctx.builder, 32); + spirv_builder_emit_specid(&ctx.builder, ctx.shared_mem_size, ZINK_VARIABLE_SHARED_MEM); + spirv_builder_emit_name(&ctx.builder, ctx.shared_mem_size, "variable_shared_mem"); + } + if (s->info.cs.derivative_group) { + SpvCapability caps[] = { 0, SpvCapabilityComputeDerivativeGroupQuadsNV, SpvCapabilityComputeDerivativeGroupLinearNV }; + SpvExecutionMode modes[] = { 0, SpvExecutionModeDerivativeGroupQuadsNV, SpvExecutionModeDerivativeGroupLinearNV }; + spirv_builder_emit_extension(&ctx.builder, "SPV_NV_compute_shader_derivatives"); + spirv_builder_emit_cap(&ctx.builder, caps[s->info.cs.derivative_group]); + spirv_builder_emit_exec_mode(&ctx.builder, entry_point, modes[s->info.cs.derivative_group]); + ctx.explicit_lod = false; } break; default: @@ -4002,31 +4808,55 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySubgroupBallotKHR); spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_shader_ballot"); } - if (s->info.has_transform_feedback_varyings) { + if (s->info.has_transform_feedback_varyings && s->info.stage != MESA_SHADER_FRAGMENT) { spirv_builder_emit_cap(&ctx.builder, SpvCapabilityTransformFeedback); spirv_builder_emit_exec_mode(&ctx.builder, entry_point, SpvExecutionModeXfb); } + + if (s->info.stage == MESA_SHADER_FRAGMENT && s->info.fs.uses_discard) { + ctx.discard_func = spirv_builder_new_id(&ctx.builder); + spirv_builder_emit_name(&ctx.builder, ctx.discard_func, "discard"); + spirv_builder_function(&ctx.builder, ctx.discard_func, type_void, + SpvFunctionControlMaskNone, + type_void_func); + SpvId label = spirv_builder_new_id(&ctx.builder); + spirv_builder_label(&ctx.builder, label); + + /* kill is deprecated in SPIR-V 1.6, use terminate instead */ + if (spirv_version >= SPIRV_VERSION(1, 6)) + spirv_builder_emit_terminate(&ctx.builder); + else + spirv_builder_emit_kill(&ctx.builder); + + spirv_builder_function_end(&ctx.builder); + } + spirv_builder_function(&ctx.builder, entry_point, type_void, - SpvFunctionControlMaskNone, - type_main); + SpvFunctionControlMaskNone, + type_void_func); nir_function_impl *entry = nir_shader_get_entrypoint(s); nir_metadata_require(entry, nir_metadata_block_index); - ctx.defs = ralloc_array_size(ctx.mem_ctx, - sizeof(SpvId), entry->ssa_alloc); - if (!ctx.defs) + ctx.defs = rzalloc_array_size(ctx.mem_ctx, + sizeof(SpvId), entry->ssa_alloc); + ctx.def_types = ralloc_array_size(ctx.mem_ctx, + sizeof(nir_alu_type), entry->ssa_alloc); + if (!ctx.defs || !ctx.def_types) goto fail; + if (sinfo->have_sparse) { + spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySparseResidency); + /* this could be huge, so only alloc if needed since it's extremely unlikely to + * ever be used by anything except cts + */ + ctx.resident_defs = rzalloc_array_size(ctx.mem_ctx, + sizeof(SpvId), entry->ssa_alloc); + if (!ctx.resident_defs) + goto fail; + } ctx.num_defs = entry->ssa_alloc; - nir_index_local_regs(entry); - ctx.regs = ralloc_array_size(ctx.mem_ctx, - sizeof(SpvId), entry->reg_alloc); - if (!ctx.regs) - goto fail; - ctx.num_regs = entry->reg_alloc; - SpvId *block_ids = ralloc_array_size(ctx.mem_ctx, sizeof(SpvId), entry->num_blocks); if (!block_ids) @@ -4040,22 +4870,21 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t /* emit a block only for the variable declarations */ start_block(&ctx, spirv_builder_new_id(&ctx.builder)); - foreach_list_typed(nir_register, reg, node, &entry->registers) { - SpvId type = get_vec_from_bit_size(&ctx, reg->bit_size, reg->num_components); - SpvId pointer_type = spirv_builder_type_pointer(&ctx.builder, - SpvStorageClassFunction, - type); - SpvId var = spirv_builder_emit_var(&ctx.builder, pointer_type, - SpvStorageClassFunction); + spirv_builder_begin_local_vars(&ctx.builder); - ctx.regs[reg->index] = var; + nir_foreach_reg_decl(reg, entry) { + if (nir_intrinsic_bit_size(reg) == 1) + init_reg(&ctx, reg, nir_type_bool); } - emit_cf_list(&ctx, &entry->body); + nir_foreach_variable_with_modes(var, s, nir_var_shader_temp) + emit_shader_temp(&ctx, var); - /* vertex/tess shader emits copied xfb outputs at the end of the shader */ - if (so_info && (ctx.stage == MESA_SHADER_VERTEX || ctx.stage == MESA_SHADER_TESS_EVAL)) - emit_so_outputs(&ctx, so_info); + nir_foreach_function_temp_variable(var, entry) + emit_temp(&ctx, var); + + + emit_cf_list(&ctx, &entry->body); spirv_builder_return(&ctx.builder); // doesn't belong here, but whatevz spirv_builder_function_end(&ctx.builder); @@ -4074,7 +4903,8 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t if (!ret->words) goto fail; - ret->num_words = spirv_builder_get_words(&ctx.builder, ret->words, num_words, spirv_version); + ret->num_words = spirv_builder_get_words(&ctx.builder, ret->words, num_words, spirv_version, &tcs_vertices_out_word); + ret->tcs_vertices_out_word = tcs_vertices_out_word; assert(ret->num_words == num_words); ralloc_free(ctx.mem_ctx); diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h index 04b559473e4..67a56464d19 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h @@ -26,7 +26,7 @@ #include <stdlib.h> #include <stdint.h> -#include <vulkan/vulkan.h> +#include <vulkan/vulkan_core.h> #include "compiler/nir/nir.h" #include "compiler/shader_enums.h" @@ -39,13 +39,14 @@ struct spirv_shader { uint32_t *words; size_t num_words; + uint32_t tcs_vertices_out_word; }; struct nir_shader; struct pipe_stream_output_info; struct spirv_shader * -nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, +nir_to_spirv(struct nir_shader *s, const struct zink_shader_info *so_info, uint32_t spirv_version); void diff --git a/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.c b/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.c index 339af44f4c6..515d659e9af 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.c +++ b/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.c @@ -32,6 +32,7 @@ #include "util/hash_table.h" #define XXH_INLINE_ALL #include "util/xxhash.h" +#include "vk_util.h" #include <stdbool.h> #include <inttypes.h> @@ -62,11 +63,12 @@ spirv_buffer_prepare(struct spirv_buffer *b, void *mem_ctx, size_t needed) return spirv_buffer_grow(b, mem_ctx, needed); } -static inline void +static inline uint32_t spirv_buffer_emit_word(struct spirv_buffer *b, uint32_t word) { assert(b->num_words < b->room); - b->words[b->num_words++] = word; + b->words[b->num_words] = word; + return b->num_words++; } static int @@ -146,10 +148,10 @@ spirv_builder_emit_entry_point(struct spirv_builder *b, b->entry_points.words[pos] |= (3 + len + num_interfaces) << 16; spirv_buffer_prepare(&b->entry_points, b->mem_ctx, num_interfaces); for (int i = 0; i < num_interfaces; ++i) - spirv_buffer_emit_word(&b->entry_points, interfaces[i]); + spirv_buffer_emit_word(&b->entry_points, interfaces[i]); } -void +uint32_t spirv_builder_emit_exec_mode_literal(struct spirv_builder *b, SpvId entry_point, SpvExecutionMode exec_mode, uint32_t param) { @@ -157,7 +159,7 @@ spirv_builder_emit_exec_mode_literal(struct spirv_builder *b, SpvId entry_point, spirv_buffer_emit_word(&b->exec_modes, SpvOpExecutionMode | (4 << 16)); spirv_buffer_emit_word(&b->exec_modes, entry_point); spirv_buffer_emit_word(&b->exec_modes, exec_mode); - spirv_buffer_emit_word(&b->exec_modes, param); + return spirv_buffer_emit_word(&b->exec_modes, param); } void @@ -173,6 +175,18 @@ spirv_builder_emit_exec_mode_literal3(struct spirv_builder *b, SpvId entry_point } void +spirv_builder_emit_exec_mode_id3(struct spirv_builder *b, SpvId entry_point, + SpvExecutionMode exec_mode, SpvId param[3]) +{ + spirv_buffer_prepare(&b->exec_modes, b->mem_ctx, 6); + spirv_buffer_emit_word(&b->exec_modes, SpvOpExecutionModeId | (6 << 16)); + spirv_buffer_emit_word(&b->exec_modes, entry_point); + spirv_buffer_emit_word(&b->exec_modes, exec_mode); + for (unsigned i = 0; i < 3; i++) + spirv_buffer_emit_word(&b->exec_modes, param[i]); +} + +void spirv_builder_emit_exec_mode(struct spirv_builder *b, SpvId entry_point, SpvExecutionMode exec_mode) { @@ -216,6 +230,14 @@ spirv_builder_emit_decoration(struct spirv_builder *b, SpvId target, } void +spirv_builder_emit_rounding_mode(struct spirv_builder *b, SpvId target, + SpvFPRoundingMode rounding) +{ + uint32_t args[] = { rounding }; + emit_decoration(b, target, SpvDecorationFPRoundingMode, args, ARRAY_SIZE(args)); +} + +void spirv_builder_emit_input_attachment_index(struct spirv_builder *b, SpvId target, uint32_t id) { uint32_t args[] = { id }; @@ -254,32 +276,32 @@ spirv_builder_emit_builtin(struct spirv_builder *b, SpvId target, } void -spirv_builder_emit_vertex(struct spirv_builder *b, uint32_t stream) +spirv_builder_emit_vertex(struct spirv_builder *b, uint32_t stream, bool multistream) { unsigned words = 1; SpvOp op = SpvOpEmitVertex; - if (stream > 0) { + if (multistream) { op = SpvOpEmitStreamVertex; words++; } spirv_buffer_prepare(&b->instructions, b->mem_ctx, words); spirv_buffer_emit_word(&b->instructions, op | (words << 16)); - if (stream) + if (multistream) spirv_buffer_emit_word(&b->instructions, spirv_builder_const_uint(b, 32, stream)); } void -spirv_builder_end_primitive(struct spirv_builder *b, uint32_t stream) +spirv_builder_end_primitive(struct spirv_builder *b, uint32_t stream, bool multistream) { unsigned words = 1; SpvOp op = SpvOpEndPrimitive; - if (stream > 0) { + if (multistream || stream > 0) { op = SpvOpEndStreamPrimitive; words++; } spirv_buffer_prepare(&b->instructions, b->mem_ctx, words); spirv_buffer_emit_word(&b->instructions, op | (words << 16)); - if (stream) + if (multistream || stream > 0) spirv_buffer_emit_word(&b->instructions, spirv_builder_const_uint(b, 32, stream)); } @@ -403,6 +425,28 @@ spirv_builder_function_end(struct spirv_builder *b) spirv_buffer_emit_word(&b->instructions, SpvOpFunctionEnd | (1 << 16)); } +SpvId +spirv_builder_function_call(struct spirv_builder *b, SpvId result_type, + SpvId function, const SpvId arguments[], + size_t num_arguments) +{ + SpvId result = spirv_builder_new_id(b); + + int words = 4 + num_arguments; + spirv_buffer_prepare(&b->instructions, b->mem_ctx, words); + spirv_buffer_emit_word(&b->instructions, + SpvOpFunctionCall | (words << 16)); + spirv_buffer_emit_word(&b->instructions, result_type); + spirv_buffer_emit_word(&b->instructions, result); + spirv_buffer_emit_word(&b->instructions, function); + + for (int i = 0; i < num_arguments; ++i) + spirv_buffer_emit_word(&b->instructions, arguments[i]); + + return result; +} + + void spirv_builder_label(struct spirv_builder *b, SpvId label) { @@ -425,6 +469,17 @@ spirv_builder_emit_load(struct spirv_builder *b, SpvId result_type, return spirv_builder_emit_unop(b, SpvOpLoad, result_type, pointer); } +SpvId +spirv_builder_emit_load_aligned(struct spirv_builder *b, SpvId result_type, SpvId pointer, unsigned alignment, bool coherent) +{ + if (coherent) { + SpvId scope = spirv_builder_const_int(b, 32, SpvScopeDevice); + return spirv_builder_emit_quadop(b, SpvOpLoad, result_type, pointer, SpvMemoryAccessAlignedMask | SpvMemoryAccessNonPrivatePointerMask | SpvMemoryAccessMakePointerVisibleMask, alignment, scope); + } else { + return spirv_builder_emit_triop(b, SpvOpLoad, result_type, pointer, SpvMemoryAccessAlignedMask, alignment); + } +} + void spirv_builder_emit_store(struct spirv_builder *b, SpvId pointer, SpvId object) { @@ -435,6 +490,30 @@ spirv_builder_emit_store(struct spirv_builder *b, SpvId pointer, SpvId object) } void +spirv_builder_emit_store_aligned(struct spirv_builder *b, SpvId pointer, SpvId object, unsigned alignment, bool coherent) +{ + unsigned size = 5; + SpvMemoryAccessMask mask = SpvMemoryAccessAlignedMask; + + if (coherent) { + mask |= SpvMemoryAccessNonPrivatePointerMask | SpvMemoryAccessMakePointerAvailableMask; + size++; + } + + spirv_buffer_prepare(&b->instructions, b->mem_ctx, size); + spirv_buffer_emit_word(&b->instructions, SpvOpStore | (size << 16)); + spirv_buffer_emit_word(&b->instructions, pointer); + spirv_buffer_emit_word(&b->instructions, object); + spirv_buffer_emit_word(&b->instructions, mask); + spirv_buffer_emit_word(&b->instructions, alignment); + + if (coherent) { + SpvId scope = spirv_builder_const_int(b, 32, SpvScopeDevice); + spirv_buffer_emit_word(&b->instructions, scope); + } +} + +void spirv_builder_emit_atomic_store(struct spirv_builder *b, SpvId pointer, SpvScope scope, SpvMemorySemanticsMask semantics, SpvId object) { @@ -492,12 +571,13 @@ SpvId spirv_builder_emit_unop(struct spirv_builder *b, SpvOp op, SpvId result_type, SpvId operand) { + struct spirv_buffer *buf = op == SpvOpSpecConstant ? &b->types_const_defs : &b->instructions; SpvId result = spirv_builder_new_id(b); - spirv_buffer_prepare(&b->instructions, b->mem_ctx, 4); - spirv_buffer_emit_word(&b->instructions, op | (4 << 16)); - spirv_buffer_emit_word(&b->instructions, result_type); - spirv_buffer_emit_word(&b->instructions, result); - spirv_buffer_emit_word(&b->instructions, operand); + spirv_buffer_prepare(buf, b->mem_ctx, 4); + spirv_buffer_emit_word(buf, op | (4 << 16)); + spirv_buffer_emit_word(buf, result_type); + spirv_buffer_emit_word(buf, result); + spirv_buffer_emit_word(buf, operand); return result; } @@ -519,14 +599,16 @@ SpvId spirv_builder_emit_triop(struct spirv_builder *b, SpvOp op, SpvId result_type, SpvId operand0, SpvId operand1, SpvId operand2) { + struct spirv_buffer *buf = op == SpvOpSpecConstantOp ? &b->types_const_defs : &b->instructions; + SpvId result = spirv_builder_new_id(b); - spirv_buffer_prepare(&b->instructions, b->mem_ctx, 6); - spirv_buffer_emit_word(&b->instructions, op | (6 << 16)); - spirv_buffer_emit_word(&b->instructions, result_type); - spirv_buffer_emit_word(&b->instructions, result); - spirv_buffer_emit_word(&b->instructions, operand0); - spirv_buffer_emit_word(&b->instructions, operand1); - spirv_buffer_emit_word(&b->instructions, operand2); + spirv_buffer_prepare(buf, b->mem_ctx, 6); + spirv_buffer_emit_word(buf, op | (6 << 16)); + spirv_buffer_emit_word(buf, result_type); + spirv_buffer_emit_word(buf, result); + spirv_buffer_emit_word(buf, operand0); + spirv_buffer_emit_word(buf, operand1); + spirv_buffer_emit_word(buf, operand2); return result; } @@ -534,15 +616,17 @@ SpvId spirv_builder_emit_quadop(struct spirv_builder *b, SpvOp op, SpvId result_type, SpvId operand0, SpvId operand1, SpvId operand2, SpvId operand3) { + struct spirv_buffer *buf = op == SpvOpSpecConstantOp ? &b->types_const_defs : &b->instructions; + SpvId result = spirv_builder_new_id(b); - spirv_buffer_prepare(&b->instructions, b->mem_ctx, 7); - spirv_buffer_emit_word(&b->instructions, op | (7 << 16)); - spirv_buffer_emit_word(&b->instructions, result_type); - spirv_buffer_emit_word(&b->instructions, result); - spirv_buffer_emit_word(&b->instructions, operand0); - spirv_buffer_emit_word(&b->instructions, operand1); - spirv_buffer_emit_word(&b->instructions, operand2); - spirv_buffer_emit_word(&b->instructions, operand3); + spirv_buffer_prepare(buf, b->mem_ctx, 7); + spirv_buffer_emit_word(buf, op | (7 << 16)); + spirv_buffer_emit_word(buf, result_type); + spirv_buffer_emit_word(buf, result); + spirv_buffer_emit_word(buf, operand0); + spirv_buffer_emit_word(buf, operand1); + spirv_buffer_emit_word(buf, operand2); + spirv_buffer_emit_word(buf, operand3); return result; } @@ -551,17 +635,19 @@ spirv_builder_emit_hexop(struct spirv_builder *b, SpvOp op, SpvId result_type, SpvId operand0, SpvId operand1, SpvId operand2, SpvId operand3, SpvId operand4, SpvId operand5) { + struct spirv_buffer *buf = op == SpvOpSpecConstantOp ? &b->types_const_defs : &b->instructions; + SpvId result = spirv_builder_new_id(b); - spirv_buffer_prepare(&b->instructions, b->mem_ctx, 9); - spirv_buffer_emit_word(&b->instructions, op | (9 << 16)); - spirv_buffer_emit_word(&b->instructions, result_type); - spirv_buffer_emit_word(&b->instructions, result); - spirv_buffer_emit_word(&b->instructions, operand0); - spirv_buffer_emit_word(&b->instructions, operand1); - spirv_buffer_emit_word(&b->instructions, operand2); - spirv_buffer_emit_word(&b->instructions, operand3); - spirv_buffer_emit_word(&b->instructions, operand4); - spirv_buffer_emit_word(&b->instructions, operand5); + spirv_buffer_prepare(buf, b->mem_ctx, 9); + spirv_buffer_emit_word(buf, op | (9 << 16)); + spirv_buffer_emit_word(buf, result_type); + spirv_buffer_emit_word(buf, result); + spirv_buffer_emit_word(buf, operand0); + spirv_buffer_emit_word(buf, operand1); + spirv_buffer_emit_word(buf, operand2); + spirv_buffer_emit_word(buf, operand3); + spirv_buffer_emit_word(buf, operand4); + spirv_buffer_emit_word(buf, operand5); return result; } @@ -735,63 +821,112 @@ spirv_builder_emit_kill(struct spirv_builder *b) spirv_buffer_emit_word(&b->instructions, SpvOpKill | (1 << 16)); } +void +spirv_builder_emit_terminate(struct spirv_builder *b) +{ + spirv_buffer_prepare(&b->instructions, b->mem_ctx, 1); + spirv_buffer_emit_word(&b->instructions, SpvOpTerminateInvocation | (1 << 16)); +} + +void +spirv_builder_emit_demote(struct spirv_builder *b) +{ + spirv_buffer_prepare(&b->instructions, b->mem_ctx, 1); + spirv_buffer_emit_word(&b->instructions, SpvOpDemoteToHelperInvocation | (1 << 16)); +} + +SpvId +spirv_is_helper_invocation(struct spirv_builder *b) +{ + SpvId result = spirv_builder_new_id(b); + SpvId result_type = spirv_builder_type_bool(b); + + int words = 3; + spirv_buffer_prepare(&b->instructions, b->mem_ctx, words); + spirv_buffer_emit_word(&b->instructions, SpvOpIsHelperInvocationEXT | (words << 16)); + spirv_buffer_emit_word(&b->instructions, result_type); + spirv_buffer_emit_word(&b->instructions, result); + return result; +} + SpvId spirv_builder_emit_vote(struct spirv_builder *b, SpvOp op, SpvId src) { return spirv_builder_emit_binop(b, op, spirv_builder_type_bool(b), - spirv_builder_const_uint(b, 32, SpvScopeWorkgroup), src); + spirv_builder_const_uint(b, 32, SpvScopeSubgroup), src); +} + +static SpvId +sparse_wrap_result_type(struct spirv_builder *b, SpvId result_type) +{ + SpvId types[2]; + types[0] = spirv_builder_type_uint(b, 32); + types[1] = result_type; + return spirv_builder_type_struct(b, types, 2); } SpvId spirv_builder_emit_image_sample(struct spirv_builder *b, SpvId result_type, SpvId sampled_image, - SpvId coordinate, - bool proj, - SpvId lod, - SpvId bias, - SpvId dref, - SpvId dx, - SpvId dy, - SpvId const_offset, - SpvId offset) + const struct spriv_tex_src *src) { SpvId result = spirv_builder_new_id(b); - int opcode = SpvOpImageSampleImplicitLod; + bool proj = src->proj != 0; + int operands = 5; - if (proj) - opcode += SpvOpImageSampleProjImplicitLod - SpvOpImageSampleImplicitLod; - if (lod || (dx && dy)) - opcode += SpvOpImageSampleExplicitLod - SpvOpImageSampleImplicitLod; - if (dref) { - opcode += SpvOpImageSampleDrefImplicitLod - SpvOpImageSampleImplicitLod; - operands++; + int opcode; + if (src->sparse) { + opcode = SpvOpImageSparseSampleImplicitLod; + if (proj) + opcode += SpvOpImageSparseSampleProjImplicitLod - SpvOpImageSparseSampleImplicitLod; + if (src->lod || (src->dx && src->dy)) + opcode += SpvOpImageSparseSampleExplicitLod - SpvOpImageSparseSampleImplicitLod; + if (src->dref) { + opcode += SpvOpImageSparseSampleDrefImplicitLod - SpvOpImageSparseSampleImplicitLod; + operands++; + } + result_type = sparse_wrap_result_type(b, result_type); + } else { + opcode = SpvOpImageSampleImplicitLod; + if (proj) + opcode += SpvOpImageSampleProjImplicitLod - SpvOpImageSampleImplicitLod; + if (src->lod || (src->dx && src->dy)) + opcode += SpvOpImageSampleExplicitLod - SpvOpImageSampleImplicitLod; + if (src->dref) { + opcode += SpvOpImageSampleDrefImplicitLod - SpvOpImageSampleImplicitLod; + operands++; + } } SpvImageOperandsMask operand_mask = SpvImageOperandsMaskNone; - SpvId extra_operands[5]; + SpvId extra_operands[6]; int num_extra_operands = 1; - if (bias) { - extra_operands[num_extra_operands++] = bias; + if (src->bias) { + extra_operands[num_extra_operands++] = src->bias; operand_mask |= SpvImageOperandsBiasMask; } - if (lod) { - extra_operands[num_extra_operands++] = lod; + if (src->lod) { + extra_operands[num_extra_operands++] = src->lod; operand_mask |= SpvImageOperandsLodMask; - } else if (dx && dy) { - extra_operands[num_extra_operands++] = dx; - extra_operands[num_extra_operands++] = dy; + } else if (src->dx && src->dy) { + extra_operands[num_extra_operands++] = src->dx; + extra_operands[num_extra_operands++] = src->dy; operand_mask |= SpvImageOperandsGradMask; } - assert(!(const_offset && offset)); - if (const_offset) { - extra_operands[num_extra_operands++] = const_offset; + assert(!(src->const_offset && src->offset)); + if (src->const_offset) { + extra_operands[num_extra_operands++] = src->const_offset; operand_mask |= SpvImageOperandsConstOffsetMask; - } else if (offset) { - extra_operands[num_extra_operands++] = offset; + } else if (src->offset) { + extra_operands[num_extra_operands++] = src->offset; operand_mask |= SpvImageOperandsOffsetMask; } + if (src->min_lod) { + extra_operands[num_extra_operands++] = src->min_lod; + operand_mask |= SpvImageOperandsMinLodMask; + } /* finalize num_extra_operands / extra_operands */ extra_operands[0] = operand_mask; @@ -801,9 +936,9 @@ spirv_builder_emit_image_sample(struct spirv_builder *b, spirv_buffer_emit_word(&b->instructions, result_type); spirv_buffer_emit_word(&b->instructions, result); spirv_buffer_emit_word(&b->instructions, sampled_image); - spirv_buffer_emit_word(&b->instructions, coordinate); - if (dref) - spirv_buffer_emit_word(&b->instructions, dref); + spirv_buffer_emit_word(&b->instructions, src->coord); + if (src->dref) + spirv_buffer_emit_word(&b->instructions, src->dref); for (int i = 0; i < num_extra_operands; ++i) spirv_buffer_emit_word(&b->instructions, extra_operands[i]); return result; @@ -842,13 +977,16 @@ spirv_builder_emit_image_read(struct spirv_builder *b, SpvId coordinate, SpvId lod, SpvId sample, - SpvId offset) + SpvId offset, + bool sparse) { SpvId result = spirv_builder_new_id(b); SpvImageOperandsMask operand_mask = SpvImageOperandsMaskNone; SpvId extra_operands[5]; int num_extra_operands = 1; + if (sparse) + result_type = sparse_wrap_result_type(b, result_type); if (lod) { extra_operands[num_extra_operands++] = lod; operand_mask |= SpvImageOperandsLodMask; @@ -865,7 +1003,7 @@ spirv_builder_emit_image_read(struct spirv_builder *b, extra_operands[0] = operand_mask; spirv_buffer_prepare(&b->instructions, b->mem_ctx, 5 + num_extra_operands); - spirv_buffer_emit_word(&b->instructions, SpvOpImageRead | + spirv_buffer_emit_word(&b->instructions, (sparse ? SpvOpImageSparseRead : SpvOpImageRead) | ((5 + num_extra_operands) << 16)); spirv_buffer_emit_word(&b->instructions, result_type); spirv_buffer_emit_word(&b->instructions, result); @@ -915,40 +1053,37 @@ spirv_builder_emit_image_write(struct spirv_builder *b, SpvId spirv_builder_emit_image_gather(struct spirv_builder *b, - SpvId result_type, - SpvId image, - SpvId coordinate, - SpvId component, - SpvId lod, - SpvId sample, - SpvId const_offset, - SpvId offset, - SpvId dref) + SpvId result_type, + SpvId image, + const struct spriv_tex_src *src, + SpvId component) { SpvId result = spirv_builder_new_id(b); - SpvId op = SpvOpImageGather; + SpvId op = src->sparse ? SpvOpImageSparseGather : SpvOpImageGather; SpvImageOperandsMask operand_mask = SpvImageOperandsMaskNone; SpvId extra_operands[4]; int num_extra_operands = 1; - if (lod) { - extra_operands[num_extra_operands++] = lod; + if (src->lod) { + extra_operands[num_extra_operands++] = src->lod; operand_mask |= SpvImageOperandsLodMask; } - if (sample) { - extra_operands[num_extra_operands++] = sample; + if (src->sample) { + extra_operands[num_extra_operands++] = src->sample; operand_mask |= SpvImageOperandsSampleMask; } - assert(!(const_offset && offset)); - if (const_offset) { - extra_operands[num_extra_operands++] = const_offset; + assert(!(src->const_offset && src->offset)); + if (src->const_offset) { + extra_operands[num_extra_operands++] = src->const_offset; operand_mask |= SpvImageOperandsConstOffsetMask; - } else if (offset) { - extra_operands[num_extra_operands++] = offset; + } else if (src->offset) { + extra_operands[num_extra_operands++] = src->offset; operand_mask |= SpvImageOperandsOffsetMask; } - if (dref) - op = SpvOpImageDrefGather; + if (src->dref) + op = src->sparse ? SpvOpImageSparseDrefGather : SpvOpImageDrefGather; + if (src->sparse) + result_type = sparse_wrap_result_type(b, result_type); /* finalize num_extra_operands / extra_operands */ extra_operands[0] = operand_mask; @@ -958,9 +1093,9 @@ spirv_builder_emit_image_gather(struct spirv_builder *b, spirv_buffer_emit_word(&b->instructions, result_type); spirv_buffer_emit_word(&b->instructions, result); spirv_buffer_emit_word(&b->instructions, image); - spirv_buffer_emit_word(&b->instructions, coordinate); - if (dref) - spirv_buffer_emit_word(&b->instructions, dref); + spirv_buffer_emit_word(&b->instructions, src->coord); + if (src->dref) + spirv_buffer_emit_word(&b->instructions, src->dref); else spirv_buffer_emit_word(&b->instructions, component); for (int i = 0; i < num_extra_operands; ++i) @@ -972,44 +1107,42 @@ SpvId spirv_builder_emit_image_fetch(struct spirv_builder *b, SpvId result_type, SpvId image, - SpvId coordinate, - SpvId lod, - SpvId sample, - SpvId const_offset, - SpvId offset) + const struct spriv_tex_src *src) { SpvId result = spirv_builder_new_id(b); SpvImageOperandsMask operand_mask = SpvImageOperandsMaskNone; SpvId extra_operands[4]; int num_extra_operands = 1; - if (lod) { - extra_operands[num_extra_operands++] = lod; + if (src->lod) { + extra_operands[num_extra_operands++] = src->lod; operand_mask |= SpvImageOperandsLodMask; } - if (sample) { - extra_operands[num_extra_operands++] = sample; + if (src->sample) { + extra_operands[num_extra_operands++] = src->sample; operand_mask |= SpvImageOperandsSampleMask; } - assert(!(const_offset && offset)); - if (const_offset) { - extra_operands[num_extra_operands++] = const_offset; + assert(!(src->const_offset && src->offset)); + if (src->const_offset) { + extra_operands[num_extra_operands++] = src->const_offset; operand_mask |= SpvImageOperandsConstOffsetMask; - } else if (offset) { - extra_operands[num_extra_operands++] = offset; + } else if (src->offset) { + extra_operands[num_extra_operands++] = src->offset; operand_mask |= SpvImageOperandsOffsetMask; } + if (src->sparse) + result_type = sparse_wrap_result_type(b, result_type); /* finalize num_extra_operands / extra_operands */ extra_operands[0] = operand_mask; spirv_buffer_prepare(&b->instructions, b->mem_ctx, 5 + num_extra_operands); - spirv_buffer_emit_word(&b->instructions, SpvOpImageFetch | + spirv_buffer_emit_word(&b->instructions, (src->sparse ? SpvOpImageSparseFetch : SpvOpImageFetch) | ((5 + num_extra_operands) << 16)); spirv_buffer_emit_word(&b->instructions, result_type); spirv_buffer_emit_word(&b->instructions, result); spirv_buffer_emit_word(&b->instructions, image); - spirv_buffer_emit_word(&b->instructions, coordinate); + spirv_buffer_emit_word(&b->instructions, src->coord); for (int i = 0; i < num_extra_operands; ++i) spirv_buffer_emit_word(&b->instructions, extra_operands[i]); return result; @@ -1197,6 +1330,12 @@ SpvId spirv_builder_type_int(struct spirv_builder *b, unsigned width) { uint32_t args[] = { width, 1 }; + if (width == 8) + spirv_builder_emit_cap(b, SpvCapabilityInt8); + else if (width == 16) + spirv_builder_emit_cap(b, SpvCapabilityInt16); + else if (width == 64) + spirv_builder_emit_cap(b, SpvCapabilityInt64); return get_type_def(b, SpvOpTypeInt, args, ARRAY_SIZE(args)); } @@ -1204,6 +1343,12 @@ SpvId spirv_builder_type_uint(struct spirv_builder *b, unsigned width) { uint32_t args[] = { width, 0 }; + if (width == 8) + spirv_builder_emit_cap(b, SpvCapabilityInt8); + else if (width == 16) + spirv_builder_emit_cap(b, SpvCapabilityInt16); + else if (width == 64) + spirv_builder_emit_cap(b, SpvCapabilityInt64); return get_type_def(b, SpvOpTypeInt, args, ARRAY_SIZE(args)); } @@ -1211,6 +1356,10 @@ SpvId spirv_builder_type_float(struct spirv_builder *b, unsigned width) { uint32_t args[] = { width }; + if (width == 16) + spirv_builder_emit_cap(b, SpvCapabilityFloat16); + else if (width == 64) + spirv_builder_emit_cap(b, SpvCapabilityFloat64); return get_type_def(b, SpvOpTypeFloat, args, ARRAY_SIZE(args)); } @@ -1224,10 +1373,18 @@ spirv_builder_type_image(struct spirv_builder *b, SpvId sampled_type, sampled_type, dim, depth ? 1 : 0, arrayed ? 1 : 0, ms ? 1 : 0, sampled, image_format }; + if (sampled == 2 && ms && dim != SpvDimSubpassData) + spirv_builder_emit_cap(b, SpvCapabilityStorageImageMultisample); return get_type_def(b, SpvOpTypeImage, args, ARRAY_SIZE(args)); } SpvId +spirv_builder_emit_sampled_image(struct spirv_builder *b, SpvId result_type, SpvId image, SpvId sampler) +{ + return spirv_builder_emit_binop(b, SpvOpSampledImage, result_type, image, sampler); +} + +SpvId spirv_builder_type_sampled_image(struct spirv_builder *b, SpvId image_type) { uint32_t args[] = { image_type }; @@ -1235,6 +1392,13 @@ spirv_builder_type_sampled_image(struct spirv_builder *b, SpvId image_type) } SpvId +spirv_builder_type_sampler(struct spirv_builder *b) +{ + uint32_t args[1] = {0}; + return get_type_def(b, SpvOpTypeSampler, args, 0); +} + +SpvId spirv_builder_type_pointer(struct spirv_builder *b, SpvStorageClass storage_class, SpvId type) { @@ -1416,7 +1580,7 @@ spirv_builder_const_bool(struct spirv_builder *b, bool val) SpvId spirv_builder_const_int(struct spirv_builder *b, int width, int64_t val) { - assert(width >= 16); + assert(width >= 8); SpvId type = spirv_builder_type_int(b, width); if (width <= 32) return emit_constant_32(b, type, val); @@ -1428,6 +1592,12 @@ SpvId spirv_builder_const_uint(struct spirv_builder *b, int width, uint64_t val) { assert(width >= 8); + if (width == 8) + spirv_builder_emit_cap(b, SpvCapabilityInt8); + else if (width == 16) + spirv_builder_emit_cap(b, SpvCapabilityInt16); + else if (width == 64) + spirv_builder_emit_cap(b, SpvCapabilityInt64); SpvId type = spirv_builder_type_uint(b, width); if (width <= 32) return emit_constant_32(b, type, val); @@ -1439,7 +1609,17 @@ SpvId spirv_builder_spec_const_uint(struct spirv_builder *b, int width) { assert(width <= 32); - return spirv_builder_emit_unop(b, SpvOpSpecConstant, spirv_builder_type_uint(b, width), 0); + SpvId const_type = spirv_builder_type_uint(b, width); + SpvId result = spirv_builder_new_id(b); + spirv_buffer_prepare(&b->types_const_defs, b->mem_ctx, 4); + spirv_buffer_emit_word(&b->types_const_defs, SpvOpSpecConstant | (4 << 16)); + spirv_buffer_emit_word(&b->types_const_defs, const_type); + spirv_buffer_emit_word(&b->types_const_defs, result); + /* this is the default value for spec constants; + * if any users need a different default, add a param to pass for it + */ + spirv_buffer_emit_word(&b->types_const_defs, 1); + return result; } SpvId @@ -1447,12 +1627,15 @@ spirv_builder_const_float(struct spirv_builder *b, int width, double val) { assert(width >= 16); SpvId type = spirv_builder_type_float(b, width); - if (width == 16) + if (width == 16) { + spirv_builder_emit_cap(b, SpvCapabilityFloat16); return emit_constant_32(b, type, _mesa_float_to_half(val)); - else if (width == 32) + } else if (width == 32) return emit_constant_32(b, type, u_bitcast_f2u(val)); - else if (width == 64) + else if (width == 64) { + spirv_builder_emit_cap(b, SpvCapabilityFloat64); return emit_constant_64(b, type, u_bitcast_d2u(val)); + } unreachable("unhandled float-width"); } @@ -1492,7 +1675,7 @@ spirv_builder_emit_var(struct spirv_builder *b, SpvId type, { assert(storage_class != SpvStorageClassGeneric); struct spirv_buffer *buf = storage_class != SpvStorageClassFunction ? - &b->types_const_defs : &b->instructions; + &b->types_const_defs : &b->local_vars; SpvId ret = spirv_builder_new_id(b); spirv_buffer_prepare(buf, b->mem_ctx, 4); @@ -1549,12 +1732,14 @@ spirv_builder_get_num_words(struct spirv_builder *b) b->debug_names.num_words + b->decorations.num_words + b->types_const_defs.num_words + + b->local_vars.num_words + b->instructions.num_words; } size_t spirv_builder_get_words(struct spirv_builder *b, uint32_t *words, - size_t num_words, uint32_t spirv_version) + size_t num_words, uint32_t spirv_version, + uint32_t *tcs_vertices_out_word) { assert(num_words >= spirv_builder_get_num_words(b)); @@ -1581,15 +1766,31 @@ spirv_builder_get_words(struct spirv_builder *b, uint32_t *words, &b->debug_names, &b->decorations, &b->types_const_defs, - &b->instructions }; for (int i = 0; i < ARRAY_SIZE(buffers); ++i) { const struct spirv_buffer *buffer = buffers[i]; - for (int j = 0; j < buffer->num_words; ++j) - words[written++] = buffer->words[j]; + + if (buffer == &b->exec_modes && *tcs_vertices_out_word > 0) + *tcs_vertices_out_word += written; + + memcpy(words + written, buffer->words, + buffer->num_words * sizeof(uint32_t)); + written += buffer->num_words; } + typed_memcpy(&words[written], b->instructions.words, b->local_vars_begin); + written += b->local_vars_begin; + typed_memcpy(&words[written], b->local_vars.words, b->local_vars.num_words); + written += b->local_vars.num_words; + typed_memcpy(&words[written], &b->instructions.words[b->local_vars_begin], (b->instructions.num_words - b->local_vars_begin)); + written += b->instructions.num_words - b->local_vars_begin; assert(written == spirv_builder_get_num_words(b)); return written; } + +void +spirv_builder_begin_local_vars(struct spirv_builder *b) +{ + b->local_vars_begin = b->instructions.num_words; +} diff --git a/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.h b/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.h index d18c101b394..dbdf9d1fba6 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.h +++ b/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.h @@ -53,11 +53,30 @@ struct spirv_builder { struct spirv_buffer decorations; struct spirv_buffer types_const_defs; + struct spirv_buffer local_vars; struct hash_table *types; struct hash_table *consts; struct spirv_buffer instructions; SpvId prev_id; + unsigned local_vars_begin; +}; + +struct spriv_tex_src { + SpvId coord; + SpvId proj; + SpvId bias; + SpvId lod; + SpvId dref; + SpvId dx; + SpvId dy; + SpvId const_offset; + SpvId offset; + SpvId sample; + SpvId tex_offset; + SpvId bindless; + SpvId min_lod; + bool sparse; }; static inline SpvId @@ -90,6 +109,10 @@ spirv_builder_emit_decoration(struct spirv_builder *b, SpvId target, SpvDecoration decoration); void +spirv_builder_emit_rounding_mode(struct spirv_builder *b, SpvId target, + SpvFPRoundingMode rounding); + +void spirv_builder_emit_input_attachment_index(struct spirv_builder *b, SpvId target, uint32_t id); void @@ -146,13 +169,16 @@ spirv_builder_emit_entry_point(struct spirv_builder *b, SpvExecutionModel exec_model, SpvId entry_point, const char *name, const SpvId interfaces[], size_t num_interfaces); -void +uint32_t spirv_builder_emit_exec_mode_literal(struct spirv_builder *b, SpvId entry_point, SpvExecutionMode exec_mode, uint32_t param); void spirv_builder_emit_exec_mode_literal3(struct spirv_builder *b, SpvId entry_point, SpvExecutionMode exec_mode, uint32_t param[3]); void +spirv_builder_emit_exec_mode_id3(struct spirv_builder *b, SpvId entry_point, + SpvExecutionMode exec_mode, SpvId param[3]); +void spirv_builder_emit_exec_mode(struct spirv_builder *b, SpvId entry_point, SpvExecutionMode exec_mode); @@ -178,12 +204,16 @@ SpvId spirv_builder_emit_load(struct spirv_builder *b, SpvId result_type, SpvId pointer); +SpvId +spirv_builder_emit_load_aligned(struct spirv_builder *b, SpvId result_type, SpvId pointer, unsigned alignment, bool coherent); void spirv_builder_emit_atomic_store(struct spirv_builder *b, SpvId pointer, SpvScope scope, SpvMemorySemanticsMask semantics, SpvId object); void spirv_builder_emit_store(struct spirv_builder *b, SpvId pointer, SpvId object); +void +spirv_builder_emit_store_aligned(struct spirv_builder *b, SpvId pointer, SpvId object, unsigned alignment, bool coherent); SpvId spirv_builder_emit_access_chain(struct spirv_builder *b, SpvId result_type, @@ -268,6 +298,15 @@ spirv_builder_set_phi_operand(struct spirv_builder *b, size_t position, void spirv_builder_emit_kill(struct spirv_builder *b); +void +spirv_builder_emit_terminate(struct spirv_builder *b); + +void +spirv_builder_emit_demote(struct spirv_builder *b); + +SpvId +spirv_is_helper_invocation(struct spirv_builder *b); + SpvId spirv_builder_emit_vote(struct spirv_builder *b, SpvOp op, SpvId src); @@ -275,15 +314,7 @@ SpvId spirv_builder_emit_image_sample(struct spirv_builder *b, SpvId result_type, SpvId sampled_image, - SpvId coordinate, - bool proj, - SpvId lod, - SpvId bias, - SpvId dref, - SpvId dx, - SpvId dy, - SpvId const_offset, - SpvId offset); + const struct spriv_tex_src *src); SpvId spirv_builder_emit_image(struct spirv_builder *b, SpvId result_type, @@ -303,7 +334,8 @@ spirv_builder_emit_image_read(struct spirv_builder *b, SpvId coordinate, SpvId lod, SpvId sample, - SpvId offset); + SpvId offset, + bool sparse); void spirv_builder_emit_image_write(struct spirv_builder *b, @@ -318,22 +350,13 @@ SpvId spirv_builder_emit_image_fetch(struct spirv_builder *b, SpvId result_type, SpvId image, - SpvId coordinate, - SpvId lod, - SpvId sample, - SpvId const_offset, - SpvId offset); + const struct spriv_tex_src *src); SpvId spirv_builder_emit_image_gather(struct spirv_builder *b, SpvId result_type, SpvId image, - SpvId coordinate, - SpvId component, - SpvId lod, - SpvId sample, - SpvId const_offset, - SpvId offset, - SpvId dref); + const struct spriv_tex_src *src, + SpvId component); SpvId spirv_builder_emit_image_query_size(struct spirv_builder *b, @@ -379,6 +402,10 @@ spirv_builder_type_image(struct spirv_builder *b, SpvId sampled_type, SpvId spirv_builder_type_sampled_image(struct spirv_builder *b, SpvId image_type); +SpvId +spirv_builder_type_sampler(struct spirv_builder *b); +SpvId +spirv_builder_emit_sampled_image(struct spirv_builder *b, SpvId result_type, SpvId image, SpvId sampler); SpvId spirv_builder_type_pointer(struct spirv_builder *b, @@ -409,6 +436,11 @@ spirv_builder_type_function(struct spirv_builder *b, SpvId return_type, size_t num_parameter_types); SpvId +spirv_builder_function_call(struct spirv_builder *b, SpvId result_type, + SpvId function, const SpvId arguments[], + size_t num_arguments); + +SpvId spirv_builder_const_bool(struct spirv_builder *b, bool val); SpvId @@ -451,10 +483,13 @@ spirv_builder_get_num_words(struct spirv_builder *b); size_t spirv_builder_get_words(struct spirv_builder *b, uint32_t *words, - size_t num_words, uint32_t spirv_version); + size_t num_words, uint32_t spirv_version, + uint32_t *tcs_vertices_out_word); void -spirv_builder_emit_vertex(struct spirv_builder *b, uint32_t stream); +spirv_builder_emit_vertex(struct spirv_builder *b, uint32_t stream, bool multistream); +void +spirv_builder_end_primitive(struct spirv_builder *b, uint32_t stream, bool multistream); void -spirv_builder_end_primitive(struct spirv_builder *b, uint32_t stream); +spirv_builder_begin_local_vars(struct spirv_builder *b); #endif diff --git a/src/gallium/drivers/zink/nir_to_spirv/zink_nir_algebraic.py b/src/gallium/drivers/zink/nir_to_spirv/zink_nir_algebraic.py index af2419cf9d4..20ed4cfb565 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/zink_nir_algebraic.py +++ b/src/gallium/drivers/zink/nir_to_spirv/zink_nir_algebraic.py @@ -25,7 +25,7 @@ import sys lower_b2b = [ (('b2b32', 'a'), ('b2i32', 'a')), - (('b2b1', 'a'), ('i2b1', 'a')), + (('b2b1', 'a'), ('ine', 'a', 0)), ] def main(): diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c index f60590ca25e..6da1d571c74 100644 --- a/src/gallium/drivers/zink/zink_batch.c +++ b/src/gallium/drivers/zink/zink_batch.c @@ -1,23 +1,19 @@ #include "zink_batch.h" - #include "zink_context.h" -#include "zink_fence.h" +#include "zink_descriptors.h" #include "zink_framebuffer.h" -#include "zink_query.h" +#include "zink_kopper.h" #include "zink_program.h" -#include "zink_render_pass.h" +#include "zink_query.h" #include "zink_resource.h" #include "zink_screen.h" #include "zink_surface.h" -#include "util/hash_table.h" -#include "util/u_debug.h" -#include "util/set.h" - #ifdef VK_USE_PLATFORM_METAL_EXT #include "QuartzCore/CAMetalLayer.h" #endif -#include "wsi_common.h" + +#define MAX_VIEW_COUNT 500 void debug_describe_zink_batch_state(char *buf, const struct zink_batch_state *ptr) @@ -25,91 +21,221 @@ debug_describe_zink_batch_state(char *buf, const struct zink_batch_state *ptr) sprintf(buf, "zink_batch_state"); } +/* this resets the batch usage and tracking for a resource object */ +static void +reset_obj(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_resource_object *obj) +{ + /* if no batch usage exists after removing the usage from 'bs', this resource is considered fully idle */ + if (!zink_resource_object_usage_unset(obj, bs)) { + /* the resource is idle, so reset all access/reordering info */ + obj->unordered_read = true; + obj->unordered_write = true; + obj->access = 0; + obj->unordered_access = 0; + obj->last_write = 0; + obj->access_stage = 0; + obj->unordered_access_stage = 0; + obj->copies_need_reset = true; + obj->unsync_access = true; + /* also prune dead view objects */ + simple_mtx_lock(&obj->view_lock); + if (obj->is_buffer) { + while (util_dynarray_contains(&obj->views, VkBufferView)) + VKSCR(DestroyBufferView)(screen->dev, util_dynarray_pop(&obj->views, VkBufferView), NULL); + } else { + while (util_dynarray_contains(&obj->views, VkImageView)) + VKSCR(DestroyImageView)(screen->dev, util_dynarray_pop(&obj->views, VkImageView), NULL); + } + obj->view_prune_count = 0; + obj->view_prune_timeline = 0; + simple_mtx_unlock(&obj->view_lock); + if (obj->dt) + zink_kopper_prune_batch_usage(obj->dt, &bs->usage); + } else if (util_dynarray_num_elements(&obj->views, VkBufferView) > MAX_VIEW_COUNT && !zink_bo_has_unflushed_usage(obj->bo)) { + /* avoid ballooning from too many views on always-used resources: */ + simple_mtx_lock(&obj->view_lock); + /* ensure no existing view pruning is queued, double check elements in case pruning just finished */ + if (!obj->view_prune_timeline && util_dynarray_num_elements(&obj->views, VkBufferView) > MAX_VIEW_COUNT) { + /* prune all existing views */ + obj->view_prune_count = util_dynarray_num_elements(&obj->views, VkBufferView); + /* prune them when the views will definitely not be in use */ + obj->view_prune_timeline = MAX2(obj->bo->reads.u ? obj->bo->reads.u->usage : 0, + obj->bo->writes.u ? obj->bo->writes.u->usage : 0); + } + simple_mtx_unlock(&obj->view_lock); + } + /* resource objects are not unrefed here; + * this is typically the last ref on a resource object, and destruction will + * usually trigger an ioctl, so defer deletion to the submit thread to avoid blocking + */ + util_dynarray_append(&bs->unref_resources, struct zink_resource_object*, obj); +} + +/* reset all the resource objects in a given batch object list */ +static void +reset_obj_list(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_batch_obj_list *list) +{ + for (unsigned i = 0; i < list->num_buffers; i++) + reset_obj(screen, bs, list->objs[i]); + list->num_buffers = 0; +} + +/* reset a given batch state */ void zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs) { struct zink_screen *screen = zink_screen(ctx->base.screen); - if (VKSCR(ResetCommandPool)(screen->dev, bs->cmdpool, 0) != VK_SUCCESS) - debug_printf("vkResetCommandPool failed\n"); + VkResult result = VKSCR(ResetCommandPool)(screen->dev, bs->cmdpool, 0); + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkResetCommandPool failed (%s)", vk_Result_to_str(result)); + result = VKSCR(ResetCommandPool)(screen->dev, bs->unsynchronized_cmdpool, 0); + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkResetCommandPool failed (%s)", vk_Result_to_str(result)); + + /* unref/reset all used resources */ + reset_obj_list(screen, bs, &bs->real_objs); + reset_obj_list(screen, bs, &bs->slab_objs); + reset_obj_list(screen, bs, &bs->sparse_objs); + while (util_dynarray_contains(&bs->swapchain_obj, struct zink_resource_object*)) { + struct zink_resource_object *obj = util_dynarray_pop(&bs->swapchain_obj, struct zink_resource_object*); + reset_obj(screen, bs, obj); + } - /* unref all used resources */ - set_foreach_remove(bs->resources, entry) { - struct zink_resource_object *obj = (struct zink_resource_object *)entry->key; - if (!zink_resource_object_usage_unset(obj, bs)) { - obj->unordered_barrier = false; - obj->access = 0; - obj->access_stage = 0; + /* this is where bindless texture/buffer ids get recycled */ + for (unsigned i = 0; i < 2; i++) { + while (util_dynarray_contains(&bs->bindless_releases[i], uint32_t)) { + uint32_t handle = util_dynarray_pop(&bs->bindless_releases[i], uint32_t); + bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle); + struct util_idalloc *ids = i ? &ctx->di.bindless[is_buffer].img_slots : &ctx->di.bindless[is_buffer].tex_slots; + util_idalloc_free(ids, is_buffer ? handle - ZINK_MAX_BINDLESS_HANDLES : handle); } - util_dynarray_append(&bs->unref_resources, struct zink_resource_object*, obj); } - set_foreach_remove(bs->active_queries, entry) { + /* queries must only be destroyed once they are inactive */ + set_foreach_remove(&bs->active_queries, entry) { struct zink_query *query = (void*)entry->key; - zink_prune_query(screen, bs, query); - } - - set_foreach_remove(bs->surfaces, entry) { - struct zink_surface *surf = (struct zink_surface *)entry->key; - zink_batch_usage_unset(&surf->batch_uses, bs); - zink_surface_reference(screen, &surf, NULL); + zink_prune_query(bs, query); } - set_foreach_remove(bs->bufferviews, entry) { - struct zink_buffer_view *buffer_view = (struct zink_buffer_view *)entry->key; - zink_batch_usage_unset(&buffer_view->batch_uses, bs); - zink_buffer_view_reference(screen, &buffer_view, NULL); - } - - util_dynarray_foreach(&bs->dead_framebuffers, struct zink_framebuffer*, fb) { - zink_framebuffer_reference(screen, fb, NULL); - } - util_dynarray_clear(&bs->dead_framebuffers); + util_dynarray_foreach(&bs->dead_querypools, VkQueryPool, pool) + VKSCR(DestroyQueryPool)(screen->dev, *pool, NULL); + util_dynarray_clear(&bs->dead_querypools); + + util_dynarray_foreach(&bs->dgc.pipelines, VkPipeline, pipeline) + VKSCR(DestroyPipeline)(screen->dev, *pipeline, NULL); + util_dynarray_clear(&bs->dgc.pipelines); + util_dynarray_foreach(&bs->dgc.layouts, VkIndirectCommandsLayoutNV, iclayout) + VKSCR(DestroyIndirectCommandsLayoutNV)(screen->dev, *iclayout, NULL); + util_dynarray_clear(&bs->dgc.layouts); + + /* samplers are appended to the batch state in which they are destroyed + * to ensure deferred deletion without destroying in-use objects + */ util_dynarray_foreach(&bs->zombie_samplers, VkSampler, samp) { VKSCR(DestroySampler)(screen->dev, *samp, NULL); } util_dynarray_clear(&bs->zombie_samplers); - util_dynarray_clear(&bs->persistent_resources); - screen->batch_descriptor_reset(screen, bs); + zink_batch_descriptor_reset(screen, bs); + + util_dynarray_foreach(&bs->freed_sparse_backing_bos, struct zink_bo, bo) { + zink_bo_unref(screen, bo); + } + util_dynarray_clear(&bs->freed_sparse_backing_bos); - set_foreach_remove(bs->programs, entry) { + /* programs are refcounted and batch-tracked */ + set_foreach_remove(&bs->programs, entry) { struct zink_program *pg = (struct zink_program*)entry->key; zink_batch_usage_unset(&pg->batch_uses, bs); - if (pg->is_compute) { - struct zink_compute_program *comp = (struct zink_compute_program*)pg; - zink_compute_program_reference(screen, &comp, NULL); - } else { - struct zink_gfx_program *prog = (struct zink_gfx_program*)pg; - zink_gfx_program_reference(screen, &prog, NULL); - } + zink_program_reference(screen, &pg, NULL); } - pipe_resource_reference(&bs->flush_res, NULL); - bs->resource_size = 0; + bs->signal_semaphore = VK_NULL_HANDLE; + util_dynarray_clear(&bs->wait_semaphore_stages); + + bs->present = VK_NULL_HANDLE; + /* check the arrays first to avoid locking unnecessarily */ + if (util_dynarray_contains(&bs->acquires, VkSemaphore) || util_dynarray_contains(&bs->wait_semaphores, VkSemaphore)) { + simple_mtx_lock(&screen->semaphores_lock); + util_dynarray_append_dynarray(&screen->semaphores, &bs->acquires); + util_dynarray_clear(&bs->acquires); + util_dynarray_append_dynarray(&screen->semaphores, &bs->wait_semaphores); + util_dynarray_clear(&bs->wait_semaphores); + simple_mtx_unlock(&screen->semaphores_lock); + } + if (util_dynarray_contains(&bs->signal_semaphores, VkSemaphore) || util_dynarray_contains(&bs->fd_wait_semaphores, VkSemaphore)) { + simple_mtx_lock(&screen->semaphores_lock); + util_dynarray_append_dynarray(&screen->fd_semaphores, &bs->signal_semaphores); + util_dynarray_clear(&bs->signal_semaphores); + util_dynarray_append_dynarray(&screen->fd_semaphores, &bs->fd_wait_semaphores); + util_dynarray_clear(&bs->fd_wait_semaphores); + simple_mtx_unlock(&screen->semaphores_lock); + } + bs->swapchain = NULL; + + util_dynarray_foreach(&bs->fences, struct zink_tc_fence*, mfence) + zink_fence_reference(screen, mfence, NULL); + util_dynarray_clear(&bs->fences); + + bs->unordered_write_access = VK_ACCESS_NONE; + bs->unordered_write_stages = VK_PIPELINE_STAGE_NONE; + /* only increment batch generation if previously in-use to avoid false detection of batch completion */ + if (bs->fence.submitted) + bs->usage.submit_count++; /* only reset submitted here so that tc fence desync can pick up the 'completed' flag * before the state is reused */ bs->fence.submitted = false; bs->has_barriers = false; - bs->scanout_flush = false; + bs->has_unsync = false; if (bs->fence.batch_id) zink_screen_update_last_finished(screen, bs->fence.batch_id); - bs->submit_count++; bs->fence.batch_id = 0; bs->usage.usage = 0; + bs->next = NULL; + bs->last_added_obj = NULL; } +/* this is where deferred resource unrefs occur */ static void unref_resources(struct zink_screen *screen, struct zink_batch_state *bs) { while (util_dynarray_contains(&bs->unref_resources, struct zink_resource_object*)) { struct zink_resource_object *obj = util_dynarray_pop(&bs->unref_resources, struct zink_resource_object*); + /* view pruning may be deferred to avoid ballooning */ + if (obj->view_prune_timeline && zink_screen_check_last_finished(screen, obj->view_prune_timeline)) { + simple_mtx_lock(&obj->view_lock); + /* check again under lock in case multi-context use is in the same place */ + if (obj->view_prune_timeline && zink_screen_check_last_finished(screen, obj->view_prune_timeline)) { + /* prune `view_prune_count` views */ + if (obj->is_buffer) { + VkBufferView *views = obj->views.data; + for (unsigned i = 0; i < obj->view_prune_count; i++) + VKSCR(DestroyBufferView)(screen->dev, views[i], NULL); + } else { + VkImageView *views = obj->views.data; + for (unsigned i = 0; i < obj->view_prune_count; i++) + VKSCR(DestroyImageView)(screen->dev, views[i], NULL); + } + size_t offset = obj->view_prune_count * sizeof(VkBufferView); + uint8_t *data = obj->views.data; + /* shift the view array to the start */ + memcpy(data, data + offset, obj->views.size - offset); + /* adjust the array size */ + obj->views.size -= offset; + obj->view_prune_count = 0; + obj->view_prune_timeline = 0; + } + simple_mtx_unlock(&obj->view_lock); + } + /* this is typically where resource objects get destroyed */ zink_resource_object_reference(screen, &obj, NULL); } } +/* utility for resetting a batch state; called on context destruction */ void zink_clear_batch_state(struct zink_context *ctx, struct zink_batch_state *bs) { @@ -118,20 +244,37 @@ zink_clear_batch_state(struct zink_context *ctx, struct zink_batch_state *bs) unref_resources(zink_screen(ctx->base.screen), bs); } +/* utility for managing the singly-linked batch state list */ +static void +pop_batch_state(struct zink_context *ctx) +{ + const struct zink_batch_state *bs = ctx->batch_states; + ctx->batch_states = bs->next; + ctx->batch_states_count--; + if (ctx->last_batch_state == bs) + ctx->last_batch_state = NULL; +} + +/* reset all batch states and append to the free state list + * only usable after a full stall + */ void zink_batch_reset_all(struct zink_context *ctx) { - simple_mtx_lock(&ctx->batch_mtx); - hash_table_foreach(&ctx->batch_states, entry) { - struct zink_batch_state *bs = entry->data; + while (ctx->batch_states) { + struct zink_batch_state *bs = ctx->batch_states; bs->fence.completed = true; + pop_batch_state(ctx); zink_reset_batch_state(ctx, bs); - _mesa_hash_table_remove(&ctx->batch_states, entry); - util_dynarray_append(&ctx->free_batch_states, struct zink_batch_state *, bs); + if (ctx->last_free_batch_state) + ctx->last_free_batch_state->next = bs; + else + ctx->free_batch_states = bs; + ctx->last_free_batch_state = bs; } - simple_mtx_unlock(&ctx->batch_mtx); } +/* called only on context destruction */ void zink_batch_state_destroy(struct zink_screen *screen, struct zink_batch_state *bs) { @@ -143,79 +286,131 @@ zink_batch_state_destroy(struct zink_screen *screen, struct zink_batch_state *bs cnd_destroy(&bs->usage.flush); mtx_destroy(&bs->usage.mtx); - if (bs->fence.fence) - VKSCR(DestroyFence)(screen->dev, bs->fence.fence, NULL); - if (bs->cmdbuf) VKSCR(FreeCommandBuffers)(screen->dev, bs->cmdpool, 1, &bs->cmdbuf); - if (bs->barrier_cmdbuf) - VKSCR(FreeCommandBuffers)(screen->dev, bs->cmdpool, 1, &bs->barrier_cmdbuf); + if (bs->reordered_cmdbuf) + VKSCR(FreeCommandBuffers)(screen->dev, bs->cmdpool, 1, &bs->reordered_cmdbuf); if (bs->cmdpool) VKSCR(DestroyCommandPool)(screen->dev, bs->cmdpool, NULL); - + if (bs->unsynchronized_cmdbuf) + VKSCR(FreeCommandBuffers)(screen->dev, bs->unsynchronized_cmdpool, 1, &bs->unsynchronized_cmdbuf); + if (bs->unsynchronized_cmdpool) + VKSCR(DestroyCommandPool)(screen->dev, bs->unsynchronized_cmdpool, NULL); + free(bs->real_objs.objs); + free(bs->slab_objs.objs); + free(bs->sparse_objs.objs); + util_dynarray_fini(&bs->freed_sparse_backing_bos); + util_dynarray_fini(&bs->dead_querypools); + util_dynarray_fini(&bs->dgc.pipelines); + util_dynarray_fini(&bs->dgc.layouts); + util_dynarray_fini(&bs->swapchain_obj); util_dynarray_fini(&bs->zombie_samplers); - util_dynarray_fini(&bs->dead_framebuffers); util_dynarray_fini(&bs->unref_resources); - _mesa_set_destroy(bs->surfaces, NULL); - _mesa_set_destroy(bs->bufferviews, NULL); - _mesa_set_destroy(bs->programs, NULL); - _mesa_set_destroy(bs->active_queries, NULL); - screen->batch_descriptor_deinit(screen, bs); + util_dynarray_fini(&bs->bindless_releases[0]); + util_dynarray_fini(&bs->bindless_releases[1]); + util_dynarray_fini(&bs->acquires); + util_dynarray_fini(&bs->acquire_flags); + unsigned num_mfences = util_dynarray_num_elements(&bs->fence.mfences, void *); + struct zink_tc_fence **mfence = bs->fence.mfences.data; + for (unsigned i = 0; i < num_mfences; i++) { + mfence[i]->fence = NULL; + } + util_dynarray_fini(&bs->fence.mfences); + zink_batch_descriptor_deinit(screen, bs); ralloc_free(bs); } +/* batch states are created: + * - on context creation + * - dynamically up to a threshold if no free ones are available + */ static struct zink_batch_state * create_batch_state(struct zink_context *ctx) { struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_batch_state *bs = rzalloc(NULL, struct zink_batch_state); - bs->have_timelines = ctx->have_timelines; VkCommandPoolCreateInfo cpci = {0}; cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; cpci.queueFamilyIndex = screen->gfx_queue; - cpci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - if (VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->cmdpool) != VK_SUCCESS) - goto fail; + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->cmdpool), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result)); + goto fail; + } + ); + VRAM_ALLOC_LOOP(result, + VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->unsynchronized_cmdpool), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result)); + goto fail; + } + ); + + VkCommandBuffer cmdbufs[2]; VkCommandBufferAllocateInfo cbai = {0}; cbai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - cbai.commandPool = bs->cmdpool; cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - cbai.commandBufferCount = 1; + cbai.commandPool = bs->cmdpool; + cbai.commandBufferCount = 2; - if (VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->cmdbuf) != VK_SUCCESS) - goto fail; + VRAM_ALLOC_LOOP(result, + VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, cmdbufs), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result)); + goto fail; + } + ); - if (VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->barrier_cmdbuf) != VK_SUCCESS) - goto fail; + bs->cmdbuf = cmdbufs[0]; + bs->reordered_cmdbuf = cmdbufs[1]; + + cbai.commandPool = bs->unsynchronized_cmdpool; + cbai.commandBufferCount = 1; + VRAM_ALLOC_LOOP(result, + VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->unsynchronized_cmdbuf);, + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result)); + goto fail; + } + ); #define SET_CREATE_OR_FAIL(ptr) \ - ptr = _mesa_pointer_set_create(bs); \ - if (!ptr) \ + if (!_mesa_set_init(ptr, bs, _mesa_hash_pointer, _mesa_key_pointer_equal)) \ goto fail bs->ctx = ctx; - SET_CREATE_OR_FAIL(bs->resources); - SET_CREATE_OR_FAIL(bs->surfaces); - SET_CREATE_OR_FAIL(bs->bufferviews); - SET_CREATE_OR_FAIL(bs->programs); - SET_CREATE_OR_FAIL(bs->active_queries); + SET_CREATE_OR_FAIL(&bs->programs); + SET_CREATE_OR_FAIL(&bs->active_queries); + SET_CREATE_OR_FAIL(&bs->dmabuf_exports); + util_dynarray_init(&bs->signal_semaphores, NULL); + util_dynarray_init(&bs->wait_semaphores, NULL); + util_dynarray_init(&bs->fd_wait_semaphores, NULL); + util_dynarray_init(&bs->fences, NULL); + util_dynarray_init(&bs->dead_querypools, NULL); + util_dynarray_init(&bs->dgc.pipelines, NULL); + util_dynarray_init(&bs->dgc.layouts, NULL); + util_dynarray_init(&bs->wait_semaphore_stages, NULL); + util_dynarray_init(&bs->fd_wait_semaphore_stages, NULL); util_dynarray_init(&bs->zombie_samplers, NULL); - util_dynarray_init(&bs->dead_framebuffers, NULL); - util_dynarray_init(&bs->persistent_resources, NULL); + util_dynarray_init(&bs->freed_sparse_backing_bos, NULL); util_dynarray_init(&bs->unref_resources, NULL); + util_dynarray_init(&bs->acquires, NULL); + util_dynarray_init(&bs->acquire_flags, NULL); + util_dynarray_init(&bs->bindless_releases[0], NULL); + util_dynarray_init(&bs->bindless_releases[1], NULL); + util_dynarray_init(&bs->swapchain_obj, NULL); + util_dynarray_init(&bs->fence.mfences, NULL); cnd_init(&bs->usage.flush); mtx_init(&bs->usage.mtx, mtx_plain); + simple_mtx_init(&bs->exportable_lock, mtx_plain); + memset(&bs->buffer_indices_hashlist, -1, sizeof(bs->buffer_indices_hashlist)); - if (!screen->batch_descriptor_init(screen, bs)) - goto fail; - - VkFenceCreateInfo fci = {0}; - fci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - - if (VKSCR(CreateFence)(screen->dev, &fci, NULL, &bs->fence.fence) != VK_SUCCESS) + if (!zink_batch_descriptor_init(screen, bs)) goto fail; util_queue_fence_init(&bs->flush_completed); @@ -226,65 +421,78 @@ fail: return NULL; } +/* a batch state is considered "free" if it is both submitted and completed */ static inline bool -find_unused_state(struct hash_entry *entry) +find_unused_state(struct zink_batch_state *bs) { - struct zink_fence *fence = entry->data; + struct zink_fence *fence = &bs->fence; /* we can't reset these from fence_finish because threads */ bool completed = p_atomic_read(&fence->completed); bool submitted = p_atomic_read(&fence->submitted); return submitted && completed; } +/* find a "free" batch state */ static struct zink_batch_state * get_batch_state(struct zink_context *ctx, struct zink_batch *batch) { + struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_batch_state *bs = NULL; - simple_mtx_lock(&ctx->batch_mtx); - if (util_dynarray_num_elements(&ctx->free_batch_states, struct zink_batch_state*)) - bs = util_dynarray_pop(&ctx->free_batch_states, struct zink_batch_state*); + /* try from the ones that are known to be free first */ + if (ctx->free_batch_states) { + bs = ctx->free_batch_states; + ctx->free_batch_states = bs->next; + if (bs == ctx->last_free_batch_state) + ctx->last_free_batch_state = NULL; + } + /* try from the ones that are given back to the screen next */ if (!bs) { - hash_table_foreach(&ctx->batch_states, he) { - struct zink_fence *fence = he->data; - if (zink_screen_check_last_finished(zink_screen(ctx->base.screen), fence->batch_id) || find_unused_state(he)) { - bs = he->data; - _mesa_hash_table_remove(&ctx->batch_states, he); - break; - } + simple_mtx_lock(&screen->free_batch_states_lock); + if (screen->free_batch_states) { + bs = screen->free_batch_states; + bs->ctx = ctx; + screen->free_batch_states = bs->next; + if (bs == screen->last_free_batch_state) + screen->last_free_batch_state = NULL; + } + simple_mtx_unlock(&screen->free_batch_states_lock); + } + /* states are stored sequentially, so if the first one doesn't work, none of them will */ + if (!bs && ctx->batch_states && ctx->batch_states->next) { + /* only a submitted state can be reused */ + if (p_atomic_read(&ctx->batch_states->fence.submitted) && + /* a submitted state must have completed before it can be reused */ + (zink_screen_check_last_finished(screen, ctx->batch_states->fence.batch_id) || + p_atomic_read(&ctx->batch_states->fence.completed))) { + bs = ctx->batch_states; + pop_batch_state(ctx); } } - simple_mtx_unlock(&ctx->batch_mtx); if (bs) { - if (bs->fence.submitted && !bs->fence.completed) - /* this fence is already done, so we need vulkan to release the cmdbuf */ - zink_vkfence_wait(zink_screen(ctx->base.screen), &bs->fence, PIPE_TIMEOUT_INFINITE); zink_reset_batch_state(ctx, bs); } else { if (!batch->state) { /* this is batch init, so create a few more states for later use */ for (int i = 0; i < 3; i++) { struct zink_batch_state *state = create_batch_state(ctx); - util_dynarray_append(&ctx->free_batch_states, struct zink_batch_state *, state); + if (ctx->last_free_batch_state) + ctx->last_free_batch_state->next = state; + else + ctx->free_batch_states = state; + ctx->last_free_batch_state = state; } } + /* no batch states were available: make a new one */ bs = create_batch_state(ctx); } return bs; } +/* reset the batch object: get a new state and unset 'has_work' to disable flushing */ void zink_reset_batch(struct zink_context *ctx, struct zink_batch *batch) { - struct zink_screen *screen = zink_screen(ctx->base.screen); - - if (ctx->have_timelines && screen->last_finished > ctx->curr_batch && ctx->curr_batch == 1) { - if (!zink_screen_init_semaphore(screen)) { - debug_printf("timeline init failed, things are about to go dramatically wrong."); - ctx->have_timelines = false; - } - } - batch->state = get_batch_state(ctx, batch); assert(batch->state); @@ -292,8 +500,34 @@ zink_reset_batch(struct zink_context *ctx, struct zink_batch *batch) } void +zink_batch_bind_db(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_batch *batch = &ctx->batch; + unsigned count = 1; + VkDescriptorBufferBindingInfoEXT infos[2] = {0}; + infos[0].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT; + infos[0].address = batch->state->dd.db->obj->bda; + infos[0].usage = batch->state->dd.db->obj->vkusage; + assert(infos[0].usage); + + if (ctx->dd.bindless_init) { + infos[1].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT; + infos[1].address = ctx->dd.db.bindless_db->obj->bda; + infos[1].usage = ctx->dd.db.bindless_db->obj->vkusage; + assert(infos[1].usage); + count++; + } + VKSCR(CmdBindDescriptorBuffersEXT)(batch->state->cmdbuf, count, infos); + VKSCR(CmdBindDescriptorBuffersEXT)(batch->state->reordered_cmdbuf, count, infos); + batch->state->dd.db_bound = true; +} + +/* called on context creation and after flushing an old batch */ +void zink_start_batch(struct zink_context *ctx, struct zink_batch *batch) { + struct zink_screen *screen = zink_screen(ctx->base.screen); zink_reset_batch(ctx, batch); batch->state->usage.unflushed = true; @@ -301,109 +535,240 @@ zink_start_batch(struct zink_context *ctx, struct zink_batch *batch) VkCommandBufferBeginInfo cbbi = {0}; cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - if (VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi) != VK_SUCCESS) - debug_printf("vkBeginCommandBuffer failed\n"); - if (VKCTX(BeginCommandBuffer)(batch->state->barrier_cmdbuf, &cbbi) != VK_SUCCESS) - debug_printf("vkBeginCommandBuffer failed\n"); - batch->state->fence.batch_id = ctx->curr_batch; + VkResult result; + VRAM_ALLOC_LOOP(result, + VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi), + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); + ); + VRAM_ALLOC_LOOP(result, + VKCTX(BeginCommandBuffer)(batch->state->reordered_cmdbuf, &cbbi), + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); + ); + VRAM_ALLOC_LOOP(result, + VKCTX(BeginCommandBuffer)(batch->state->unsynchronized_cmdbuf, &cbbi), + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); + ); + batch->state->fence.completed = false; - if (ctx->last_fence) { - struct zink_batch_state *last_state = zink_batch_state(ctx->last_fence); + if (ctx->last_batch_state) { + struct zink_batch_state *last_state = ctx->last_batch_state; batch->last_batch_usage = &last_state->usage; } - if (!ctx->queries_disabled) - zink_resume_queries(ctx, batch); +#ifdef HAVE_RENDERDOC_APP_H + if (VKCTX(CmdInsertDebugUtilsLabelEXT) && screen->renderdoc_api) { + VkDebugUtilsLabelEXT capture_label; + /* Magic fallback which lets us bridge the Wine barrier over to Linux RenderDoc. */ + capture_label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; + capture_label.pNext = NULL; + capture_label.pLabelName = "vr-marker,frame_end,type,application"; + memset(capture_label.color, 0, sizeof(capture_label.color)); + VKCTX(CmdInsertDebugUtilsLabelEXT)(batch->state->unsynchronized_cmdbuf, &capture_label); + VKCTX(CmdInsertDebugUtilsLabelEXT)(batch->state->reordered_cmdbuf, &capture_label); + VKCTX(CmdInsertDebugUtilsLabelEXT)(batch->state->cmdbuf, &capture_label); + } + + unsigned renderdoc_frame = p_atomic_read(&screen->renderdoc_frame); + if (!(ctx->flags & ZINK_CONTEXT_COPY_ONLY) && screen->renderdoc_api && !screen->renderdoc_capturing && + ((screen->renderdoc_capture_all && screen->screen_id == 1) || (renderdoc_frame >= screen->renderdoc_capture_start && renderdoc_frame <= screen->renderdoc_capture_end))) { + screen->renderdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(screen->instance), NULL); + screen->renderdoc_capturing = true; + } +#endif + + /* descriptor buffers must always be bound at the start of a batch */ + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && !(ctx->flags & ZINK_CONTEXT_COPY_ONLY)) + zink_batch_bind_db(ctx); + /* zero init for unordered blits */ + if (screen->info.have_EXT_attachment_feedback_loop_dynamic_state) { + VKCTX(CmdSetAttachmentFeedbackLoopEnableEXT)(ctx->batch.state->cmdbuf, 0); + VKCTX(CmdSetAttachmentFeedbackLoopEnableEXT)(ctx->batch.state->reordered_cmdbuf, 0); + VKCTX(CmdSetAttachmentFeedbackLoopEnableEXT)(ctx->batch.state->unsynchronized_cmdbuf, 0); + } } +/* common operations to run post submit; split out for clarity */ static void post_submit(void *data, void *gdata, int thread_index) { struct zink_batch_state *bs = data; + struct zink_screen *screen = zink_screen(bs->ctx->base.screen); if (bs->is_device_lost) { if (bs->ctx->reset.reset) bs->ctx->reset.reset(bs->ctx->reset.data, PIPE_GUILTY_CONTEXT_RESET); - zink_screen(bs->ctx->base.screen)->device_lost = true; + else if (screen->abort_on_hang && !screen->robust_ctx_count) + /* if nothing can save us, abort */ + abort(); + screen->device_lost = true; + } else if (bs->ctx->batch_states_count > 5000) { + /* throttle in case something crazy is happening */ + zink_screen_timeline_wait(screen, bs->fence.batch_id - 2500, OS_TIMEOUT_INFINITE); } + /* this resets the buffer hashlist for the state's next use */ + memset(&bs->buffer_indices_hashlist, -1, sizeof(bs->buffer_indices_hashlist)); } +typedef enum { + ZINK_SUBMIT_WAIT_ACQUIRE, + ZINK_SUBMIT_WAIT_FD, + ZINK_SUBMIT_CMDBUF, + ZINK_SUBMIT_SIGNAL, + ZINK_SUBMIT_MAX +} zink_submit; + static void submit_queue(void *data, void *gdata, int thread_index) { struct zink_batch_state *bs = data; struct zink_context *ctx = bs->ctx; struct zink_screen *screen = zink_screen(ctx->base.screen); - VkSubmitInfo si = {0}; - - simple_mtx_lock(&ctx->batch_mtx); + VkSubmitInfo si[ZINK_SUBMIT_MAX] = {0}; + VkSubmitInfo *submit = si; + int num_si = ZINK_SUBMIT_MAX; while (!bs->fence.batch_id) - bs->fence.batch_id = p_atomic_inc_return(&screen->curr_batch); - _mesa_hash_table_insert_pre_hashed(&ctx->batch_states, bs->fence.batch_id, (void*)(uintptr_t)bs->fence.batch_id, bs); + bs->fence.batch_id = (uint32_t)p_atomic_inc_return(&screen->curr_batch); bs->usage.usage = bs->fence.batch_id; bs->usage.unflushed = false; - simple_mtx_unlock(&ctx->batch_mtx); - - VKSCR(ResetFences)(screen->dev, 1, &bs->fence.fence); uint64_t batch_id = bs->fence.batch_id; - si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - si.waitSemaphoreCount = 0; - si.pWaitSemaphores = NULL; - si.signalSemaphoreCount = 0; - si.pSignalSemaphores = NULL; - si.pWaitDstStageMask = NULL; - si.commandBufferCount = bs->has_barriers ? 2 : 1; - VkCommandBuffer cmdbufs[2] = { - bs->barrier_cmdbuf, - bs->cmdbuf, - }; - si.pCommandBuffers = bs->has_barriers ? cmdbufs : &cmdbufs[1]; + /* first submit is just for acquire waits since they have a separate array */ + for (unsigned i = 0; i < ARRAY_SIZE(si); i++) + si[i].sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + si[ZINK_SUBMIT_WAIT_ACQUIRE].waitSemaphoreCount = util_dynarray_num_elements(&bs->acquires, VkSemaphore); + si[ZINK_SUBMIT_WAIT_ACQUIRE].pWaitSemaphores = bs->acquires.data; + while (util_dynarray_num_elements(&bs->acquire_flags, VkPipelineStageFlags) < si[ZINK_SUBMIT_WAIT_ACQUIRE].waitSemaphoreCount) { + VkPipelineStageFlags mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + util_dynarray_append(&bs->acquire_flags, VkPipelineStageFlags, mask); + } + assert(util_dynarray_num_elements(&bs->acquires, VkSemaphore) <= util_dynarray_num_elements(&bs->acquire_flags, VkPipelineStageFlags)); + si[ZINK_SUBMIT_WAIT_ACQUIRE].pWaitDstStageMask = bs->acquire_flags.data; + + si[ZINK_SUBMIT_WAIT_FD].waitSemaphoreCount = util_dynarray_num_elements(&bs->fd_wait_semaphores, VkSemaphore); + si[ZINK_SUBMIT_WAIT_FD].pWaitSemaphores = bs->fd_wait_semaphores.data; + while (util_dynarray_num_elements(&bs->fd_wait_semaphore_stages, VkPipelineStageFlags) < si[ZINK_SUBMIT_WAIT_FD].waitSemaphoreCount) { + VkPipelineStageFlags mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + util_dynarray_append(&bs->fd_wait_semaphore_stages, VkPipelineStageFlags, mask); + } + assert(util_dynarray_num_elements(&bs->fd_wait_semaphores, VkSemaphore) <= util_dynarray_num_elements(&bs->fd_wait_semaphore_stages, VkPipelineStageFlags)); + si[ZINK_SUBMIT_WAIT_FD].pWaitDstStageMask = bs->fd_wait_semaphore_stages.data; + + if (si[ZINK_SUBMIT_WAIT_ACQUIRE].waitSemaphoreCount == 0) { + num_si--; + submit++; + if (si[ZINK_SUBMIT_WAIT_FD].waitSemaphoreCount == 0) { + num_si--; + submit++; + } + } + /* then the real submit */ + si[ZINK_SUBMIT_CMDBUF].waitSemaphoreCount = util_dynarray_num_elements(&bs->wait_semaphores, VkSemaphore); + si[ZINK_SUBMIT_CMDBUF].pWaitSemaphores = bs->wait_semaphores.data; + si[ZINK_SUBMIT_CMDBUF].pWaitDstStageMask = bs->wait_semaphore_stages.data; + VkCommandBuffer cmdbufs[3]; + unsigned c = 0; + if (bs->has_unsync) + cmdbufs[c++] = bs->unsynchronized_cmdbuf; + if (bs->has_barriers) + cmdbufs[c++] = bs->reordered_cmdbuf; + cmdbufs[c++] = bs->cmdbuf; + si[ZINK_SUBMIT_CMDBUF].pCommandBuffers = cmdbufs; + si[ZINK_SUBMIT_CMDBUF].commandBufferCount = c; + /* assorted signal submit from wsi/externals */ + si[ZINK_SUBMIT_CMDBUF].signalSemaphoreCount = util_dynarray_num_elements(&bs->signal_semaphores, VkSemaphore); + si[ZINK_SUBMIT_CMDBUF].pSignalSemaphores = bs->signal_semaphores.data; + + /* then the signal submit with the timeline (fence) semaphore */ + VkSemaphore signals[3]; + si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount = !!bs->signal_semaphore; + signals[0] = bs->signal_semaphore; + si[ZINK_SUBMIT_SIGNAL].pSignalSemaphores = signals; VkTimelineSemaphoreSubmitInfo tsi = {0}; - if (bs->have_timelines) { - tsi.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; - si.pNext = &tsi; - tsi.signalSemaphoreValueCount = 1; - tsi.pSignalSemaphoreValues = &batch_id; - si.signalSemaphoreCount = 1; - si.pSignalSemaphores = &screen->sem; + uint64_t signal_values[2] = {0}; + tsi.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; + si[ZINK_SUBMIT_SIGNAL].pNext = &tsi; + tsi.pSignalSemaphoreValues = signal_values; + signal_values[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount] = batch_id; + signals[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount++] = screen->sem; + tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount; + + if (bs->present) + signals[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount++] = bs->present; + tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount; + + + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(EndCommandBuffer)(bs->cmdbuf), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); + bs->is_device_lost = true; + goto end; + } + ); + if (bs->has_barriers) { + if (bs->unordered_write_access) { + VkMemoryBarrier mb; + mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mb.pNext = NULL; + mb.srcAccessMask = bs->unordered_write_access; + mb.dstAccessMask = VK_ACCESS_NONE; + VKSCR(CmdPipelineBarrier)(bs->reordered_cmdbuf, + bs->unordered_write_stages, + screen->info.have_KHR_synchronization2 ? VK_PIPELINE_STAGE_NONE : VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + 0, 1, &mb, 0, NULL, 0, NULL); + } + VRAM_ALLOC_LOOP(result, + VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); + bs->is_device_lost = true; + goto end; + } + ); + } + if (bs->has_unsync) { + VRAM_ALLOC_LOOP(result, + VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); + bs->is_device_lost = true; + goto end; + } + ); } - struct wsi_memory_signal_submit_info mem_signal = { - .sType = VK_STRUCTURE_TYPE_WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA, - .pNext = si.pNext, - }; + if (!si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount) + num_si--; - if (bs->flush_res && screen->needs_mesa_flush_wsi) { - struct zink_resource *flush_res = zink_resource(bs->flush_res); - mem_signal.memory = zink_bo_get_mem(flush_res->scanout_obj ? flush_res->scanout_obj->bo : flush_res->obj->bo); - si.pNext = &mem_signal; - } + simple_mtx_lock(&screen->queue_lock); + VRAM_ALLOC_LOOP(result, + VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkQueueSubmit failed (%s)", vk_Result_to_str(result)); + bs->is_device_lost = true; + } + ); + simple_mtx_unlock(&screen->queue_lock); - if (VKSCR(EndCommandBuffer)(bs->cmdbuf) != VK_SUCCESS) { - debug_printf("vkEndCommandBuffer failed\n"); - bs->is_device_lost = true; - goto end; - } - if (VKSCR(EndCommandBuffer)(bs->barrier_cmdbuf) != VK_SUCCESS) { - debug_printf("vkEndCommandBuffer failed\n"); - bs->is_device_lost = true; - goto end; - } + unsigned i = 0; + VkSemaphore *sem = bs->signal_semaphores.data; + set_foreach(&bs->dmabuf_exports, entry) { + struct zink_resource *res = (void*)entry->key; + for (; res; res = zink_resource(res->base.b.next)) + zink_screen_import_dmabuf_semaphore(screen, res, sem[i++]); - while (util_dynarray_contains(&bs->persistent_resources, struct zink_resource_object*)) { - struct zink_resource_object *obj = util_dynarray_pop(&bs->persistent_resources, struct zink_resource_object*); - VkMappedMemoryRange range = zink_resource_init_mem_range(screen, obj, 0, obj->size); - VKSCR(FlushMappedMemoryRanges)(screen->dev, 1, &range); + struct pipe_resource *pres = (void*)entry->key; + pipe_resource_reference(&pres, NULL); } + _mesa_set_clear(&bs->dmabuf_exports, NULL); - if (VKSCR(QueueSubmit)(bs->queue, 1, &si, bs->fence.fence) != VK_SUCCESS) { - debug_printf("ZINK: vkQueueSubmit() failed\n"); - bs->is_device_lost = true; - } - bs->submit_count++; + bs->usage.submit_count++; end: cnd_broadcast(&bs->usage.flush); @@ -411,212 +776,175 @@ end: unref_resources(screen, bs); } - -/* TODO: remove for wsi */ -static void -copy_scanout(struct zink_batch_state *bs, struct zink_resource *res) -{ - if (!bs->scanout_flush) - return; - struct zink_context *ctx = bs->ctx; - - VkImageCopy region = {0}; - struct pipe_box box = {0, 0, 0, - u_minify(res->base.b.width0, 0), - u_minify(res->base.b.height0, 0), res->base.b.array_size}; - box.depth = util_num_layers(&res->base.b, 0); - struct pipe_box *src_box = &box; - unsigned dstz = 0; - - region.srcSubresource.aspectMask = res->aspect; - region.srcSubresource.mipLevel = 0; - switch (res->base.b.target) { - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_CUBE_ARRAY: - case PIPE_TEXTURE_2D_ARRAY: - case PIPE_TEXTURE_1D_ARRAY: - /* these use layer */ - region.srcSubresource.baseArrayLayer = src_box->z; - region.srcSubresource.layerCount = src_box->depth; - region.srcOffset.z = 0; - region.extent.depth = 1; - break; - case PIPE_TEXTURE_3D: - /* this uses depth */ - region.srcSubresource.baseArrayLayer = 0; - region.srcSubresource.layerCount = 1; - region.srcOffset.z = src_box->z; - region.extent.depth = src_box->depth; - break; - default: - /* these must only copy one layer */ - region.srcSubresource.baseArrayLayer = 0; - region.srcSubresource.layerCount = 1; - region.srcOffset.z = 0; - region.extent.depth = 1; - } - - region.srcOffset.x = src_box->x; - region.srcOffset.y = src_box->y; - - region.dstSubresource.aspectMask = res->aspect; - region.dstSubresource.mipLevel = 0; - switch (res->base.b.target) { - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_CUBE_ARRAY: - case PIPE_TEXTURE_2D_ARRAY: - case PIPE_TEXTURE_1D_ARRAY: - /* these use layer */ - region.dstSubresource.baseArrayLayer = dstz; - region.dstSubresource.layerCount = src_box->depth; - region.dstOffset.z = 0; - break; - case PIPE_TEXTURE_3D: - /* this uses depth */ - region.dstSubresource.baseArrayLayer = 0; - region.dstSubresource.layerCount = 1; - region.dstOffset.z = dstz; - break; - default: - /* these must only copy one layer */ - region.dstSubresource.baseArrayLayer = 0; - region.dstSubresource.layerCount = 1; - region.dstOffset.z = 0; - } - - region.dstOffset.x = 0; - region.dstOffset.y = 0; - region.extent.width = src_box->width; - region.extent.height = src_box->height; - - VkImageMemoryBarrier imb1; - zink_resource_image_barrier_init(&imb1, res, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); - VKCTX(CmdPipelineBarrier)( - bs->cmdbuf, - res->obj->access_stage ? res->obj->access_stage : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, - 0, NULL, - 0, NULL, - 1, &imb1 - ); - - VkImageSubresourceRange isr = { - res->aspect, - 0, VK_REMAINING_MIP_LEVELS, - 0, VK_REMAINING_ARRAY_LAYERS - }; - VkImageMemoryBarrier imb = { - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - NULL, - 0, - VK_ACCESS_TRANSFER_WRITE_BIT, - res->scanout_obj_init ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, - res->scanout_obj->image, - isr - }; - VKCTX(CmdPipelineBarrier)( - bs->cmdbuf, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, - 0, NULL, - 0, NULL, - 1, &imb - ); - - VKCTX(CmdCopyImage)(bs->cmdbuf, res->obj->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - res->scanout_obj->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - 1, ®ion); - imb.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - imb.dstAccessMask = 0; - imb.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - VKCTX(CmdPipelineBarrier)( - bs->cmdbuf, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - 0, - 0, NULL, - 0, NULL, - 1, &imb - ); - /* separate flag to avoid annoying validation errors for new scanout objs */ - res->scanout_obj_init = true; -} - +/* called during flush */ void zink_end_batch(struct zink_context *ctx, struct zink_batch *batch) { - if (batch->state->flush_res) - copy_scanout(batch->state, zink_resource(batch->state->flush_res)); if (!ctx->queries_disabled) zink_suspend_queries(ctx, batch); - tc_driver_internal_flush_notify(ctx->tc); struct zink_screen *screen = zink_screen(ctx->base.screen); + if (ctx->tc && !ctx->track_renderpasses) + tc_driver_internal_flush_notify(ctx->tc); + struct zink_batch_state *bs; + + /* oom flushing is triggered to handle stupid piglit tests like streaming-texture-leak */ + if (ctx->oom_flush || ctx->batch_states_count > 25) { + assert(!ctx->batch_states_count || ctx->batch_states); + while (ctx->batch_states) { + bs = ctx->batch_states; + struct zink_fence *fence = &bs->fence; + /* once an incomplete state is reached, no more will be complete */ + if (!zink_check_batch_completion(ctx, fence->batch_id)) + break; - ctx->last_fence = &batch->state->fence; - if (ctx->oom_flush || _mesa_hash_table_num_entries(&ctx->batch_states) > 10) { - simple_mtx_lock(&ctx->batch_mtx); - hash_table_foreach(&ctx->batch_states, he) { - struct zink_fence *fence = he->data; - struct zink_batch_state *bs = he->data; - if (zink_check_batch_completion(ctx, fence->batch_id, true)) { - zink_reset_batch_state(ctx, he->data); - _mesa_hash_table_remove(&ctx->batch_states, he); - util_dynarray_append(&ctx->free_batch_states, struct zink_batch_state *, bs); - } + pop_batch_state(ctx); + zink_reset_batch_state(ctx, bs); + if (ctx->last_free_batch_state) + ctx->last_free_batch_state->next = bs; + else + ctx->free_batch_states = bs; + ctx->last_free_batch_state = bs; } - simple_mtx_unlock(&ctx->batch_mtx); - if (_mesa_hash_table_num_entries(&ctx->batch_states) > 50) + if (ctx->batch_states_count > 50) ctx->oom_flush = true; } + + bs = batch->state; + if (ctx->last_batch_state) + ctx->last_batch_state->next = bs; + else { + assert(!ctx->batch_states); + ctx->batch_states = bs; + } + ctx->last_batch_state = bs; + ctx->batch_states_count++; batch->work_count = 0; + /* this is swapchain presentation semaphore handling */ + if (batch->swapchain) { + if (zink_kopper_acquired(batch->swapchain->obj->dt, batch->swapchain->obj->dt_idx) && !batch->swapchain->obj->present) { + batch->state->present = zink_kopper_present(screen, batch->swapchain); + batch->state->swapchain = batch->swapchain; + } + batch->swapchain = NULL; + } + if (screen->device_lost) return; - if (screen->threaded) { - batch->state->queue = screen->thread_queue; - util_queue_add_job(&screen->flush_queue, batch->state, &batch->state->flush_completed, + if (ctx->tc) { + set_foreach(&bs->active_queries, entry) + zink_query_sync(ctx, (void*)entry->key); + } + + set_foreach(&bs->dmabuf_exports, entry) { + struct zink_resource *res = (void*)entry->key; + if (screen->info.have_KHR_synchronization2) { + VkImageMemoryBarrier2 imb; + zink_resource_image_barrier2_init(&imb, res, res->layout, 0, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); + imb.srcQueueFamilyIndex = screen->gfx_queue; + imb.dstQueueFamilyIndex = VK_QUEUE_FAMILY_FOREIGN_EXT; + VkDependencyInfo dep = { + VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + NULL, + 0, + 0, + NULL, + 0, + NULL, + 1, + &imb + }; + VKCTX(CmdPipelineBarrier2)(bs->cmdbuf, &dep); + } else { + VkImageMemoryBarrier imb; + zink_resource_image_barrier_init(&imb, res, res->layout, 0, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); + imb.srcQueueFamilyIndex = screen->gfx_queue; + imb.dstQueueFamilyIndex = VK_QUEUE_FAMILY_FOREIGN_EXT; + VKCTX(CmdPipelineBarrier)( + bs->cmdbuf, + res->obj->access_stage, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, + 0, NULL, + 0, NULL, + 1, &imb + ); + } + res->queue = VK_QUEUE_FAMILY_FOREIGN_EXT; + + for (; res; res = zink_resource(res->base.b.next)) { + VkSemaphore sem = zink_create_exportable_semaphore(screen); + if (sem) + util_dynarray_append(&ctx->batch.state->signal_semaphores, VkSemaphore, sem); + } + } + + if (screen->threaded_submit) { + util_queue_add_job(&screen->flush_queue, bs, &bs->flush_completed, submit_queue, post_submit, 0); } else { - batch->state->queue = screen->queue; - submit_queue(batch->state, NULL, 0); - post_submit(batch->state, NULL, 0); + submit_queue(bs, NULL, 0); + post_submit(bs, NULL, 0); + } +#ifdef HAVE_RENDERDOC_APP_H + if (!(ctx->flags & ZINK_CONTEXT_COPY_ONLY) && screen->renderdoc_capturing && p_atomic_read(&screen->renderdoc_frame) > screen->renderdoc_capture_end) { + screen->renderdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(screen->instance), NULL); + screen->renderdoc_capturing = false; } +#endif } -void -zink_batch_resource_usage_set(struct zink_batch *batch, struct zink_resource *res, bool write) +static int +batch_find_resource(struct zink_batch_state *bs, struct zink_resource_object *obj, struct zink_batch_obj_list *list) { - zink_resource_usage_set(res, batch->state, write); - if (write && res->scanout_obj) - batch->state->scanout_flush = true; - /* multiple array entries are fine */ - if (!res->obj->coherent && res->obj->persistent_maps) - util_dynarray_append(&batch->state->persistent_resources, struct zink_resource_object*, res->obj); - - batch->has_work = true; + unsigned hash = obj->bo->unique_id & (BUFFER_HASHLIST_SIZE-1); + int buffer_index = bs->buffer_indices_hashlist[hash]; + + /* not found or found */ + if (buffer_index < 0 || (buffer_index < list->num_buffers && list->objs[buffer_index] == obj)) + return buffer_index; + + /* Hash collision, look for the BO in the list of list->objs linearly. */ + for (int i = list->num_buffers - 1; i >= 0; i--) { + if (list->objs[i] == obj) { + /* Put this buffer in the hash list. + * This will prevent additional hash collisions if there are + * several consecutive lookup_buffer calls for the same buffer. + * + * Example: Assuming list->objs A,B,C collide in the hash list, + * the following sequence of list->objs: + * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC + * will collide here: ^ and here: ^, + * meaning that we should get very few collisions in the end. */ + bs->buffer_indices_hashlist[hash] = i & (BUFFER_HASHLIST_SIZE-1); + return i; + } + } + return -1; } void zink_batch_reference_resource_rw(struct zink_batch *batch, struct zink_resource *res, bool write) { - /* if the resource already has usage of any sort set for this batch, we can skip hashing */ - if (!zink_batch_usage_matches(res->obj->reads, batch->state) && - !zink_batch_usage_matches(res->obj->writes, batch->state)) { + /* if the resource already has usage of any sort set for this batch, */ + if (!zink_resource_usage_matches(res, batch->state) || + /* or if it's bound somewhere */ + !zink_resource_has_binds(res)) + /* then it already has a batch ref and doesn't need one here */ zink_batch_reference_resource(batch, res); - } - zink_batch_resource_usage_set(batch, res, write); + zink_batch_resource_usage_set(batch, res, write, res->obj->is_buffer); } -bool +void +zink_batch_add_wait_semaphore(struct zink_batch *batch, VkSemaphore sem) +{ + util_dynarray_append(&batch->state->acquires, VkSemaphore, sem); +} + +static bool batch_ptr_add_usage(struct zink_batch *batch, struct set *s, void *ptr) { bool found = false; @@ -624,6 +952,7 @@ batch_ptr_add_usage(struct zink_batch *batch, struct set *s, void *ptr) return !found; } +/* this is a vague, handwave-y estimate */ ALWAYS_INLINE static void check_oom_flush(struct zink_context *ctx, const struct zink_batch *batch) { @@ -634,87 +963,131 @@ check_oom_flush(struct zink_context *ctx, const struct zink_batch *batch) } } +/* this adds a ref (batch tracking) */ void zink_batch_reference_resource(struct zink_batch *batch, struct zink_resource *res) { - if (!batch_ptr_add_usage(batch, batch->state->resources, res->obj)) - return; - pipe_reference(NULL, &res->obj->reference); - batch->state->resource_size += res->obj->size; - check_oom_flush(batch->state->ctx, batch); - batch->has_work = true; + if (!zink_batch_reference_resource_move(batch, res)) + zink_resource_object_reference(NULL, NULL, res->obj); } -void +/* this adds batch usage */ +bool zink_batch_reference_resource_move(struct zink_batch *batch, struct zink_resource *res) { - if (!batch_ptr_add_usage(batch, batch->state->resources, res->obj)) - return; - batch->state->resource_size += res->obj->size; - check_oom_flush(batch->state->ctx, batch); - batch->has_work = true; -} + struct zink_batch_state *bs = batch->state; + + simple_mtx_lock(&batch->ref_lock); + /* swapchains are special */ + if (zink_is_swapchain(res)) { + struct zink_resource_object **swapchains = bs->swapchain_obj.data; + unsigned count = util_dynarray_num_elements(&bs->swapchain_obj, struct zink_resource_object*); + for (unsigned i = 0; i < count; i++) { + if (swapchains[i] == res->obj) { + simple_mtx_unlock(&batch->ref_lock); + return true; + } + } + util_dynarray_append(&bs->swapchain_obj, struct zink_resource_object*, res->obj); + simple_mtx_unlock(&batch->ref_lock); + return false; + } + /* Fast exit for no-op calls. + * This is very effective with suballocators and linear uploaders that + * are outside of the winsys. + */ + if (res->obj == bs->last_added_obj) { + simple_mtx_unlock(&batch->ref_lock); + return true; + } -void -zink_batch_reference_bufferview(struct zink_batch *batch, struct zink_buffer_view *buffer_view) -{ - if (!batch_ptr_add_usage(batch, batch->state->bufferviews, buffer_view)) - return; - pipe_reference(NULL, &buffer_view->reference); - batch->has_work = true; -} + struct zink_bo *bo = res->obj->bo; + struct zink_batch_obj_list *list; + if (!(res->base.b.flags & PIPE_RESOURCE_FLAG_SPARSE)) { + if (!bo->mem) { + list = &bs->slab_objs; + } else { + list = &bs->real_objs; + } + } else { + list = &bs->sparse_objs; + } + int idx = batch_find_resource(bs, res->obj, list); + if (idx >= 0) { + simple_mtx_unlock(&batch->ref_lock); + return true; + } -void -zink_batch_reference_surface(struct zink_batch *batch, struct zink_surface *surface) -{ - if (!batch_ptr_add_usage(batch, batch->state->surfaces, surface)) - return; - struct pipe_surface *surf = NULL; - pipe_surface_reference(&surf, &surface->base); + if (list->num_buffers >= list->max_buffers) { + unsigned new_max = MAX2(list->max_buffers + 16, (unsigned)(list->max_buffers * 1.3)); + struct zink_resource_object **objs = realloc(list->objs, new_max * sizeof(void*)); + if (!objs) { + /* things are about to go dramatically wrong anyway */ + mesa_loge("zink: buffer list realloc failed due to oom!\n"); + abort(); + } + list->objs = objs; + list->max_buffers = new_max; + } + idx = list->num_buffers++; + list->objs[idx] = res->obj; + unsigned hash = bo->unique_id & (BUFFER_HASHLIST_SIZE-1); + bs->buffer_indices_hashlist[hash] = idx & 0x7fff; + bs->last_added_obj = res->obj; + if (!(res->base.b.flags & PIPE_RESOURCE_FLAG_SPARSE)) { + bs->resource_size += res->obj->size; + } else { + /* Sparse backing pages are not directly referenced by the batch as + * there can be a lot of them. + * Instead, they are kept referenced in one of two ways: + * - While they are committed, they are directly referenced from the + * resource's state. + * - Upon de-commit, they are added to the freed_sparse_backing_bos + * list, which will defer destroying the resource until the batch + * performing unbind finishes. + */ + } + check_oom_flush(batch->state->ctx, batch); batch->has_work = true; + simple_mtx_unlock(&batch->ref_lock); + return false; } -void -zink_batch_reference_sampler_view(struct zink_batch *batch, - struct zink_sampler_view *sv) -{ - if (sv->base.target == PIPE_BUFFER) - zink_batch_reference_bufferview(batch, sv->buffer_view); - else - zink_batch_reference_surface(batch, sv->image_view); -} - +/* this is how programs achieve deferred deletion */ void zink_batch_reference_program(struct zink_batch *batch, struct zink_program *pg) { if (zink_batch_usage_matches(pg->batch_uses, batch->state) || - !batch_ptr_add_usage(batch, batch->state->programs, pg)) + !batch_ptr_add_usage(batch, &batch->state->programs, pg)) return; pipe_reference(NULL, &pg->reference); zink_batch_usage_set(&pg->batch_uses, batch->state); batch->has_work = true; } -void -zink_batch_reference_image_view(struct zink_batch *batch, - struct zink_image_view *image_view) +/* a fast (hopefully) way to check whether a given batch has completed */ +bool +zink_screen_usage_check_completion(struct zink_screen *screen, const struct zink_batch_usage *u) { - if (image_view->base.resource->target == PIPE_BUFFER) - zink_batch_reference_bufferview(batch, image_view->buffer_view); - else - zink_batch_reference_surface(batch, image_view->surface); + if (!zink_batch_usage_exists(u)) + return true; + if (zink_batch_usage_is_unflushed(u)) + return false; + + return zink_screen_timeline_wait(screen, u->usage, 0); } +/* an even faster check that doesn't ioctl */ bool -zink_screen_usage_check_completion(struct zink_screen *screen, const struct zink_batch_usage *u) +zink_screen_usage_check_completion_fast(struct zink_screen *screen, const struct zink_batch_usage *u) { if (!zink_batch_usage_exists(u)) return true; if (zink_batch_usage_is_unflushed(u)) return false; - return zink_screen_batch_id_wait(screen, u->usage, 0); + return zink_screen_check_last_finished(screen, u->usage); } bool @@ -724,11 +1097,11 @@ zink_batch_usage_check_completion(struct zink_context *ctx, const struct zink_ba return true; if (zink_batch_usage_is_unflushed(u)) return false; - return zink_check_batch_completion(ctx, u->usage, false); + return zink_check_batch_completion(ctx, u->usage); } -void -zink_batch_usage_wait(struct zink_context *ctx, struct zink_batch_usage *u) +static void +batch_usage_wait(struct zink_context *ctx, struct zink_batch_usage *u, bool trywait) { if (!zink_batch_usage_exists(u)) return; @@ -737,9 +1110,25 @@ zink_batch_usage_wait(struct zink_context *ctx, struct zink_batch_usage *u) ctx->base.flush(&ctx->base, NULL, PIPE_FLUSH_HINT_FINISH); else { //multi-context mtx_lock(&u->mtx); - cnd_wait(&u->flush, &u->mtx); + if (trywait) { + struct timespec ts = {0, 10000}; + cnd_timedwait(&u->flush, &u->mtx, &ts); + } else + cnd_wait(&u->flush, &u->mtx); mtx_unlock(&u->mtx); } } zink_wait_on_batch(ctx, u->usage); } + +void +zink_batch_usage_wait(struct zink_context *ctx, struct zink_batch_usage *u) +{ + batch_usage_wait(ctx, u, false); +} + +void +zink_batch_usage_try_wait(struct zink_context *ctx, struct zink_batch_usage *u) +{ + batch_usage_wait(ctx, u, true); +} diff --git a/src/gallium/drivers/zink/zink_batch.h b/src/gallium/drivers/zink/zink_batch.h index 4b077646369..67ffa9e0b04 100644 --- a/src/gallium/drivers/zink/zink_batch.h +++ b/src/gallium/drivers/zink/zink_batch.h @@ -24,7 +24,8 @@ #ifndef ZINK_BATCH_H #define ZINK_BATCH_H -#include <vulkan/vulkan.h> +#include <vulkan/vulkan_core.h> +#include "zink_types.h" #include "util/list.h" #include "util/set.h" @@ -36,90 +37,6 @@ extern "C" { #endif -struct pipe_reference; - -struct zink_buffer_view; -struct zink_context; -struct zink_descriptor_set; -struct zink_image_view; -struct zink_program; -struct zink_render_pass; -struct zink_resource; -struct zink_sampler_view; -struct zink_surface; - -struct zink_batch_usage { - uint32_t usage; - cnd_t flush; - mtx_t mtx; - bool unflushed; -}; - -/* not real api don't use */ -bool -batch_ptr_add_usage(struct zink_batch *batch, struct set *s, void *ptr); - -struct zink_batch_state { - struct zink_fence fence; - - struct zink_batch_usage usage; - struct zink_context *ctx; - VkCommandPool cmdpool; - VkCommandBuffer cmdbuf; - VkCommandBuffer barrier_cmdbuf; - - VkQueue queue; //duplicated from batch for threading - VkSemaphore sem; - - struct util_queue_fence flush_completed; - - struct pipe_resource *flush_res; - - struct set *programs; - - struct set *resources; - struct set *surfaces; - struct set *bufferviews; - - struct util_dynarray unref_resources; - - struct util_dynarray persistent_resources; - struct util_dynarray zombie_samplers; - struct util_dynarray dead_framebuffers; - - struct set *active_queries; /* zink_query objects which were active at some point in this batch */ - - struct zink_batch_descriptor_data *dd; - - VkDeviceSize resource_size; - - /* this is a monotonic int used to disambiguate internal fences from their tc fence references */ - unsigned submit_count; - - bool is_device_lost; - bool have_timelines; - bool has_barriers; - bool scanout_flush; -}; - -struct zink_batch { - struct zink_batch_state *state; - - struct zink_batch_usage *last_batch_usage; - - unsigned work_count; - - bool has_work; - bool last_was_compute; - bool in_rp; //renderpass is currently active -}; - - -static inline struct zink_batch_state * -zink_batch_state(struct zink_fence *fence) -{ - return (struct zink_batch_state *)fence; -} void zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs); @@ -145,7 +62,7 @@ void zink_end_batch(struct zink_context *ctx, struct zink_batch *batch); void -zink_batch_resource_usage_set(struct zink_batch *batch, struct zink_resource *res, bool write); +zink_batch_add_wait_semaphore(struct zink_batch *batch, VkSemaphore sem); void zink_batch_reference_resource_rw(struct zink_batch *batch, @@ -154,54 +71,43 @@ zink_batch_reference_resource_rw(struct zink_batch *batch, void zink_batch_reference_resource(struct zink_batch *batch, struct zink_resource *res); -void +bool zink_batch_reference_resource_move(struct zink_batch *batch, struct zink_resource *res); void -zink_batch_reference_sampler_view(struct zink_batch *batch, - struct zink_sampler_view *sv); - -void zink_batch_reference_program(struct zink_batch *batch, struct zink_program *pg); void -zink_batch_reference_image_view(struct zink_batch *batch, - struct zink_image_view *image_view); - -void -zink_batch_reference_bufferview(struct zink_batch *batch, struct zink_buffer_view *buffer_view); -void -zink_batch_reference_surface(struct zink_batch *batch, struct zink_surface *surface); - +zink_batch_bind_db(struct zink_context *ctx); void debug_describe_zink_batch_state(char *buf, const struct zink_batch_state *ptr); -static inline bool +static ALWAYS_INLINE bool zink_batch_usage_is_unflushed(const struct zink_batch_usage *u) { return u && u->unflushed; } -static inline void +static ALWAYS_INLINE void zink_batch_usage_unset(struct zink_batch_usage **u, struct zink_batch_state *bs) { (void)p_atomic_cmpxchg((uintptr_t *)u, (uintptr_t)&bs->usage, (uintptr_t)NULL); } -static inline void +static ALWAYS_INLINE void zink_batch_usage_set(struct zink_batch_usage **u, struct zink_batch_state *bs) { *u = &bs->usage; } -static inline bool +static ALWAYS_INLINE bool zink_batch_usage_matches(const struct zink_batch_usage *u, const struct zink_batch_state *bs) { return u == &bs->usage; } -static inline bool +static ALWAYS_INLINE bool zink_batch_usage_exists(const struct zink_batch_usage *u) { return u && (u->usage || u->unflushed); @@ -209,6 +115,8 @@ zink_batch_usage_exists(const struct zink_batch_usage *u) bool zink_screen_usage_check_completion(struct zink_screen *screen, const struct zink_batch_usage *u); +bool +zink_screen_usage_check_completion_fast(struct zink_screen *screen, const struct zink_batch_usage *u); bool zink_batch_usage_check_completion(struct zink_context *ctx, const struct zink_batch_usage *u); @@ -216,6 +124,9 @@ zink_batch_usage_check_completion(struct zink_context *ctx, const struct zink_ba void zink_batch_usage_wait(struct zink_context *ctx, struct zink_batch_usage *u); +void +zink_batch_usage_try_wait(struct zink_context *ctx, struct zink_batch_usage *u); + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/zink/zink_blit.c b/src/gallium/drivers/zink/zink_blit.c index 7085c8e033c..f74921a2a1a 100644 --- a/src/gallium/drivers/zink/zink_blit.c +++ b/src/gallium/drivers/zink/zink_blit.c @@ -1,4 +1,8 @@ +#include "zink_clear.h" #include "zink_context.h" +#include "zink_format.h" +#include "zink_inlines.h" +#include "zink_kopper.h" #include "zink_helpers.h" #include "zink_query.h" #include "zink_resource.h" @@ -21,7 +25,7 @@ apply_dst_clears(struct zink_context *ctx, const struct pipe_blit_info *info, bo } static bool -blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info) +blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info, bool *needs_present_readback) { if (util_format_get_mask(info->dst.format) != info->mask || util_format_get_mask(info->src.format) != info->mask || @@ -30,9 +34,17 @@ blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info) info->alpha_blend) return false; - if (info->src.box.width != info->dst.box.width || - info->src.box.height != info->dst.box.height || - info->src.box.depth != info->dst.box.depth) + if (info->src.box.width < 0 || + info->dst.box.width < 0 || + info->src.box.height < 0 || + info->dst.box.height < 0 || + info->src.box.depth < 0 || + info->dst.box.depth < 0) + return false; + /* vulkan resolves can't downscale */ + if (info->src.box.width > info->dst.box.width || + info->src.box.height > info->dst.box.height || + info->src.box.depth > info->dst.box.depth) return false; if (info->render_condition_enable && @@ -40,26 +52,39 @@ blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info) return false; struct zink_resource *src = zink_resource(info->src.resource); + struct zink_resource *use_src = src; struct zink_resource *dst = zink_resource(info->dst.resource); struct zink_screen *screen = zink_screen(ctx->base.screen); + /* aliased/swizzled formats need u_blitter */ if (src->format != zink_get_format(screen, info->src.format) || dst->format != zink_get_format(screen, info->dst.format)) return false; - if (info->dst.resource->target == PIPE_BUFFER) - util_range_add(info->dst.resource, &dst->valid_buffer_range, - info->dst.box.x, info->dst.box.x + info->dst.box.width); + if (src->format != dst->format) + return false; + apply_dst_clears(ctx, info, false); zink_fb_clears_apply_region(ctx, info->src.resource, zink_rect_from_box(&info->src.box)); + if (src->obj->dt) + *needs_present_readback = zink_kopper_acquire_readback(ctx, src, &use_src); + struct zink_batch *batch = &ctx->batch; - zink_batch_no_rp(ctx); - zink_batch_reference_resource_rw(batch, src, false); + zink_resource_setup_transfer_layouts(ctx, use_src, dst); + VkCommandBuffer cmdbuf = *needs_present_readback ? + ctx->batch.state->cmdbuf : + zink_get_cmdbuf(ctx, src, dst); + if (cmdbuf == ctx->batch.state->cmdbuf) + zink_flush_dgc_if_enabled(ctx); + zink_batch_reference_resource_rw(batch, use_src, false); zink_batch_reference_resource_rw(batch, dst, true); - zink_resource_setup_transfer_layouts(ctx, src, dst); - + bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "blit_resolve(%s->%s, %dx%d->%dx%d)", + util_format_short_name(info->src.format), + util_format_short_name(info->src.format), + info->src.box.width, info->src.box.height, + info->dst.box.width, info->dst.box.height); VkImageResolve region = {0}; region.srcSubresource.aspectMask = src->aspect; @@ -97,23 +122,28 @@ blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info) region.extent.width = info->dst.box.width; region.extent.height = info->dst.box.height; region.extent.depth = info->dst.box.depth; - VKCTX(CmdResolveImage)(batch->state->cmdbuf, src->obj->image, src->layout, + if (region.srcOffset.x + region.extent.width >= u_minify(src->base.b.width0, region.srcSubresource.mipLevel)) + region.extent.width = u_minify(src->base.b.width0, region.srcSubresource.mipLevel) - region.srcOffset.x; + if (region.dstOffset.x + region.extent.width >= u_minify(dst->base.b.width0, region.dstSubresource.mipLevel)) + region.extent.width = u_minify(dst->base.b.width0, region.dstSubresource.mipLevel) - region.dstOffset.x; + if (region.srcOffset.y + region.extent.height >= u_minify(src->base.b.height0, region.srcSubresource.mipLevel)) + region.extent.height = u_minify(src->base.b.height0, region.srcSubresource.mipLevel) - region.srcOffset.y; + if (region.dstOffset.y + region.extent.height >= u_minify(dst->base.b.height0, region.dstSubresource.mipLevel)) + region.extent.height = u_minify(dst->base.b.height0, region.dstSubresource.mipLevel) - region.dstOffset.y; + if (region.srcOffset.z + region.extent.depth >= u_minify(src->base.b.depth0, region.srcSubresource.mipLevel)) + region.extent.depth = u_minify(src->base.b.depth0, region.srcSubresource.mipLevel) - region.srcOffset.z; + if (region.dstOffset.z + region.extent.depth >= u_minify(dst->base.b.depth0, region.dstSubresource.mipLevel)) + region.extent.depth = u_minify(dst->base.b.depth0, region.dstSubresource.mipLevel) - region.dstOffset.z; + VKCTX(CmdResolveImage)(cmdbuf, use_src->obj->image, src->layout, dst->obj->image, dst->layout, 1, ®ion); + zink_cmd_debug_marker_end(ctx, cmdbuf, marker); return true; } -static VkFormatFeatureFlags -get_resource_features(struct zink_screen *screen, struct zink_resource *res) -{ - VkFormatProperties props = screen->format_props[res->base.b.format]; - return res->optimal_tiling ? props.optimalTilingFeatures : - props.linearTilingFeatures; -} - static bool -blit_native(struct zink_context *ctx, const struct pipe_blit_info *info) +blit_native(struct zink_context *ctx, const struct pipe_blit_info *info, bool *needs_present_readback) { if (util_format_get_mask(info->dst.format) != info->mask || util_format_get_mask(info->src.format) != info->mask || @@ -126,7 +156,7 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info) return false; if (util_format_is_depth_or_stencil(info->dst.format) && - info->dst.format != info->src.format) + (info->dst.format != info->src.format || info->filter == PIPE_TEX_FILTER_LINEAR)) return false; /* vkCmdBlitImage must not be used for multisampled source or destination images. */ @@ -134,15 +164,18 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info) return false; struct zink_resource *src = zink_resource(info->src.resource); + struct zink_resource *use_src = src; struct zink_resource *dst = zink_resource(info->dst.resource); struct zink_screen *screen = zink_screen(ctx->base.screen); if (src->format != zink_get_format(screen, info->src.format) || dst->format != zink_get_format(screen, info->dst.format)) return false; + if (src->format != VK_FORMAT_A8_UNORM_KHR && zink_format_is_emulated_alpha(info->src.format)) + return false; - if (!(get_resource_features(screen, src) & VK_FORMAT_FEATURE_BLIT_SRC_BIT) || - !(get_resource_features(screen, dst) & VK_FORMAT_FEATURE_BLIT_DST_BIT)) + if (!(src->obj->vkfeats & VK_FORMAT_FEATURE_BLIT_SRC_BIT) || + !(dst->obj->vkfeats & VK_FORMAT_FEATURE_BLIT_DST_BIT)) return false; if ((util_format_is_pure_sint(info->src.format) != @@ -152,22 +185,10 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info) return false; if (info->filter == PIPE_TEX_FILTER_LINEAR && - !(get_resource_features(screen, src) & - VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)) + !(src->obj->vkfeats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)) return false; - apply_dst_clears(ctx, info, false); - zink_fb_clears_apply_region(ctx, info->src.resource, zink_rect_from_box(&info->src.box)); - struct zink_batch *batch = &ctx->batch; - zink_batch_no_rp(ctx); - zink_batch_reference_resource_rw(batch, src, false); - zink_batch_reference_resource_rw(batch, dst, true); - - zink_resource_setup_transfer_layouts(ctx, src, dst); - if (info->dst.resource->target == PIPE_BUFFER) - util_range_add(info->dst.resource, &dst->valid_buffer_range, - info->dst.box.x, info->dst.box.x + info->dst.box.width); VkImageBlit region = {0}; region.srcSubresource.aspectMask = src->aspect; region.srcSubresource.mipLevel = info->src.level; @@ -176,13 +197,19 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info) region.srcOffsets[1].x = info->src.box.x + info->src.box.width; region.srcOffsets[1].y = info->src.box.y + info->src.box.height; - switch (src->base.b.target) { + enum pipe_texture_target src_target = src->base.b.target; + if (src->need_2D) + src_target = src_target == PIPE_TEXTURE_1D ? PIPE_TEXTURE_2D : PIPE_TEXTURE_2D_ARRAY; + switch (src_target) { case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_1D_ARRAY: /* these use layer */ region.srcSubresource.baseArrayLayer = info->src.box.z; + /* VUID-vkCmdBlitImage-srcImage-00240 */ + if (region.srcSubresource.baseArrayLayer && dst->base.b.target == PIPE_TEXTURE_3D) + return false; region.srcSubresource.layerCount = info->src.box.depth; region.srcOffsets[0].z = 0; region.srcOffsets[1].z = 1; @@ -211,13 +238,19 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info) assert(region.dstOffsets[0].x != region.dstOffsets[1].x); assert(region.dstOffsets[0].y != region.dstOffsets[1].y); - switch (dst->base.b.target) { + enum pipe_texture_target dst_target = dst->base.b.target; + if (dst->need_2D) + dst_target = dst_target == PIPE_TEXTURE_1D ? PIPE_TEXTURE_2D : PIPE_TEXTURE_2D_ARRAY; + switch (dst_target) { case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_1D_ARRAY: /* these use layer */ region.dstSubresource.baseArrayLayer = info->dst.box.z; + /* VUID-vkCmdBlitImage-srcImage-00240 */ + if (region.dstSubresource.baseArrayLayer && src->base.b.target == PIPE_TEXTURE_3D) + return false; region.dstSubresource.layerCount = info->dst.box.depth; region.dstOffsets[0].z = 0; region.dstOffsets[1].z = 1; @@ -238,14 +271,57 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info) } assert(region.dstOffsets[0].z != region.dstOffsets[1].z); - VKCTX(CmdBlitImage)(batch->state->cmdbuf, src->obj->image, src->layout, + apply_dst_clears(ctx, info, false); + zink_fb_clears_apply_region(ctx, info->src.resource, zink_rect_from_box(&info->src.box)); + + if (src->obj->dt) + *needs_present_readback = zink_kopper_acquire_readback(ctx, src, &use_src); + + struct zink_batch *batch = &ctx->batch; + zink_resource_setup_transfer_layouts(ctx, use_src, dst); + VkCommandBuffer cmdbuf = *needs_present_readback ? + ctx->batch.state->cmdbuf : + zink_get_cmdbuf(ctx, src, dst); + if (cmdbuf == ctx->batch.state->cmdbuf) + zink_flush_dgc_if_enabled(ctx); + zink_batch_reference_resource_rw(batch, use_src, false); + zink_batch_reference_resource_rw(batch, dst, true); + + bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "blit_native(%s->%s, %dx%d->%dx%d)", + util_format_short_name(info->src.format), + util_format_short_name(info->src.format), + info->src.box.width, info->src.box.height, + info->dst.box.width, info->dst.box.height); + + VKCTX(CmdBlitImage)(cmdbuf, use_src->obj->image, src->layout, dst->obj->image, dst->layout, 1, ®ion, zink_filter(info->filter)); + zink_cmd_debug_marker_end(ctx, cmdbuf, marker); + return true; } +static bool +try_copy_region(struct pipe_context *pctx, const struct pipe_blit_info *info) +{ + struct zink_context *ctx = zink_context(pctx); + struct zink_resource *src = zink_resource(info->src.resource); + struct zink_resource *dst = zink_resource(info->dst.resource); + /* if we're copying between resources with matching aspects then we can probably just copy_region */ + if (src->aspect != dst->aspect) + return false; + struct pipe_blit_info new_info = *info; + + if (src->aspect & VK_IMAGE_ASPECT_STENCIL_BIT && + new_info.render_condition_enable && + !ctx->render_condition_active) + new_info.render_condition_enable = false; + + return util_try_blit_via_copy_region(pctx, &new_info, ctx->render_condition_active); +} + void zink_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) @@ -254,9 +330,14 @@ zink_blit(struct pipe_context *pctx, const struct util_format_description *src_desc = util_format_description(info->src.format); const struct util_format_description *dst_desc = util_format_description(info->dst.format); - if (info->render_condition_enable && - unlikely(!zink_screen(pctx->screen)->info.have_EXT_conditional_rendering && !zink_check_conditional_render(ctx))) - return; + struct zink_resource *src = zink_resource(info->src.resource); + struct zink_resource *use_src = src; + struct zink_resource *dst = zink_resource(info->dst.resource); + bool needs_present_readback = false; + if (zink_is_swapchain(dst)) { + if (!zink_kopper_acquire(ctx, dst, UINT64_MAX)) + return; + } if (src_desc == dst_desc || src_desc->nr_channels != 4 || src_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || @@ -266,47 +347,158 @@ zink_blit(struct pipe_context *pctx, */ if (info->src.resource->nr_samples > 1 && info->dst.resource->nr_samples <= 1) { - if (blit_resolve(ctx, info)) - return; + if (blit_resolve(ctx, info, &needs_present_readback)) + goto end; } else { - if (blit_native(ctx, info)) - return; + if (try_copy_region(pctx, info)) + goto end; + if (blit_native(ctx, info, &needs_present_readback)) + goto end; } } - struct zink_resource *src = zink_resource(info->src.resource); - struct zink_resource *dst = zink_resource(info->dst.resource); - /* if we're copying between resources with matching aspects then we can probably just copy_region */ - if (src->aspect == dst->aspect) { - struct pipe_blit_info new_info = *info; - if (src->aspect & VK_IMAGE_ASPECT_STENCIL_BIT && - new_info.render_condition_enable && - !ctx->render_condition_active) - new_info.render_condition_enable = false; - if (util_try_blit_via_copy_region(pctx, &new_info)) - return; + bool stencil_blit = false; + if (!util_blitter_is_blit_supported(ctx->blitter, info)) { + if (util_format_is_depth_or_stencil(info->src.resource->format)) { + if (info->mask & PIPE_MASK_Z) { + struct pipe_blit_info depth_blit = *info; + depth_blit.mask = PIPE_MASK_Z; + if (util_blitter_is_blit_supported(ctx->blitter, &depth_blit)) { + zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES); + util_blitter_blit(ctx->blitter, &depth_blit); + } else { + mesa_loge("ZINK: depth blit unsupported %s -> %s", + util_format_short_name(info->src.resource->format), + util_format_short_name(info->dst.resource->format)); + } + } + if (info->mask & PIPE_MASK_S) + stencil_blit = true; + } + if (!stencil_blit) { + mesa_loge("ZINK: blit unsupported %s -> %s", + util_format_short_name(info->src.resource->format), + util_format_short_name(info->dst.resource->format)); + goto end; + } } - if (!util_blitter_is_blit_supported(ctx->blitter, info)) { - debug_printf("blit unsupported %s -> %s\n", - util_format_short_name(info->src.resource->format), - util_format_short_name(info->dst.resource->format)); - return; + if (src->obj->dt) { + zink_fb_clears_apply_region(ctx, info->src.resource, zink_rect_from_box(&info->src.box)); + needs_present_readback = zink_kopper_acquire_readback(ctx, src, &use_src); } /* this is discard_only because we're about to start a renderpass that will * flush all pending clears anyway */ apply_dst_clears(ctx, info, true); + zink_fb_clears_apply_region(ctx, info->src.resource, zink_rect_from_box(&info->src.box)); + unsigned rp_clears_enabled = ctx->rp_clears_enabled; + unsigned clears_enabled = ctx->clears_enabled; + if (!dst->fb_bind_count) { + /* avoid applying clears from fb unbind by storing and re-setting them after the blit */ + ctx->rp_clears_enabled = 0; + ctx->clears_enabled = 0; + } else { + unsigned bit; + /* convert to PIPE_CLEAR_XYZ */ + if (dst->fb_binds & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS)) + bit = PIPE_CLEAR_DEPTHSTENCIL; + else + bit = dst->fb_binds << 2; + rp_clears_enabled &= ~bit; + clears_enabled &= ~bit; + ctx->rp_clears_enabled &= bit; + ctx->clears_enabled &= bit; + } - if (info->dst.resource->target == PIPE_BUFFER) - util_range_add(info->dst.resource, &dst->valid_buffer_range, - info->dst.box.x, info->dst.box.x + info->dst.box.width); + /* this will draw a full-resource quad, so ignore existing data */ + bool whole = util_blit_covers_whole_resource(info); + if (whole) + pctx->invalidate_resource(pctx, info->dst.resource); + + zink_flush_dgc_if_enabled(ctx); + ctx->unordered_blitting = !(info->render_condition_enable && ctx->render_condition_active) && + zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering && + !needs_present_readback && + zink_get_cmdbuf(ctx, src, dst) == ctx->batch.state->reordered_cmdbuf; + VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf; + VkPipeline pipeline = ctx->gfx_pipeline_state.pipeline; + bool in_rp = ctx->batch.in_rp; + uint64_t tc_data = ctx->dynamic_fb.tc_info.data; + bool queries_disabled = ctx->queries_disabled; + bool rp_changed = ctx->rp_changed || (!ctx->fb_state.zsbuf && util_format_is_depth_or_stencil(info->dst.format)); + unsigned ds3_states = ctx->ds3_states; + bool rp_tc_info_updated = ctx->rp_tc_info_updated; + if (ctx->unordered_blitting) { + /* for unordered blit, swap the unordered cmdbuf for the main one for the whole op to avoid conditional hell */ + ctx->batch.state->cmdbuf = ctx->batch.state->reordered_cmdbuf; + ctx->batch.in_rp = false; + ctx->rp_changed = true; + ctx->queries_disabled = true; + ctx->batch.state->has_barriers = true; + ctx->pipeline_changed[0] = true; + zink_reset_ds3_states(ctx); + zink_select_draw_vbo(ctx); + } zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES); - - util_blitter_blit(ctx->blitter, info); + if (zink_format_needs_mutable(info->src.format, info->src.resource->format)) + zink_resource_object_init_mutable(ctx, src); + if (zink_format_needs_mutable(info->dst.format, info->dst.resource->format)) + zink_resource_object_init_mutable(ctx, dst); + zink_blit_barriers(ctx, use_src, dst, whole); + ctx->blitting = true; + + if (stencil_blit) { + struct pipe_surface *dst_view, dst_templ; + util_blitter_default_dst_texture(&dst_templ, info->dst.resource, info->dst.level, info->dst.box.z); + dst_view = pctx->create_surface(pctx, info->dst.resource, &dst_templ); + + util_blitter_clear_depth_stencil(ctx->blitter, dst_view, PIPE_CLEAR_STENCIL, + 0, 0, info->dst.box.x, info->dst.box.y, + info->dst.box.width, info->dst.box.height); + zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES | ZINK_BLIT_SAVE_FS_CONST_BUF); + util_blitter_stencil_fallback(ctx->blitter, + info->dst.resource, + info->dst.level, + &info->dst.box, + info->src.resource, + info->src.level, + &info->src.box, + info->scissor_enable ? &info->scissor : NULL); + + pipe_surface_release(pctx, &dst_view); + } else { + struct pipe_blit_info new_info = *info; + new_info.src.resource = &use_src->base.b; + util_blitter_blit(ctx->blitter, &new_info); + } + ctx->blitting = false; + ctx->rp_clears_enabled = rp_clears_enabled; + ctx->clears_enabled = clears_enabled; + if (ctx->unordered_blitting) { + zink_batch_no_rp(ctx); + ctx->batch.in_rp = in_rp; + ctx->gfx_pipeline_state.rp_state = zink_update_rendering_info(ctx); + ctx->rp_changed = rp_changed; + ctx->rp_tc_info_updated |= rp_tc_info_updated; + ctx->queries_disabled = queries_disabled; + ctx->dynamic_fb.tc_info.data = tc_data; + ctx->batch.state->cmdbuf = cmdbuf; + ctx->gfx_pipeline_state.pipeline = pipeline; + ctx->pipeline_changed[0] = true; + ctx->ds3_states = ds3_states; + zink_select_draw_vbo(ctx); + } + ctx->unordered_blitting = false; +end: + if (needs_present_readback) { + src->obj->unordered_read = false; + dst->obj->unordered_write = false; + zink_kopper_present_readback(ctx, src); + } } /* similar to radeonsi */ @@ -316,24 +508,27 @@ zink_blit_begin(struct zink_context *ctx, enum zink_blit_flags flags) util_blitter_save_vertex_elements(ctx->blitter, ctx->element_state); util_blitter_save_viewport(ctx->blitter, ctx->vp_state.viewport_states); - util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffers); - util_blitter_save_vertex_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_VERTEX]); - util_blitter_save_tessctrl_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_TESS_CTRL]); - util_blitter_save_tesseval_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]); - util_blitter_save_geometry_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_GEOMETRY]); + util_blitter_save_vertex_buffers(ctx->blitter, ctx->vertex_buffers, + util_last_bit(ctx->gfx_pipeline_state.vertex_buffers_enabled_mask)); + util_blitter_save_vertex_shader(ctx->blitter, ctx->gfx_stages[MESA_SHADER_VERTEX]); + util_blitter_save_tessctrl_shader(ctx->blitter, ctx->gfx_stages[MESA_SHADER_TESS_CTRL]); + util_blitter_save_tesseval_shader(ctx->blitter, ctx->gfx_stages[MESA_SHADER_TESS_EVAL]); + util_blitter_save_geometry_shader(ctx->blitter, ctx->gfx_stages[MESA_SHADER_GEOMETRY]); util_blitter_save_rasterizer(ctx->blitter, ctx->rast_state); util_blitter_save_so_targets(ctx->blitter, ctx->num_so_targets, ctx->so_targets); + if (flags & ZINK_BLIT_SAVE_FS_CONST_BUF) + util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, ctx->ubos[MESA_SHADER_FRAGMENT]); + if (flags & ZINK_BLIT_SAVE_FS) { - util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, ctx->ubos[PIPE_SHADER_FRAGMENT]); util_blitter_save_blend(ctx->blitter, ctx->gfx_pipeline_state.blend_state); util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->dsa_state); util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref); - util_blitter_save_sample_mask(ctx->blitter, ctx->gfx_pipeline_state.sample_mask); + util_blitter_save_sample_mask(ctx->blitter, ctx->gfx_pipeline_state.sample_mask, ctx->gfx_pipeline_state.min_samples + 1); util_blitter_save_scissor(ctx->blitter, ctx->vp_state.scissor_states); /* also util_blitter_save_window_rectangles when we have that? */ - util_blitter_save_fragment_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_FRAGMENT]); + util_blitter_save_fragment_shader(ctx->blitter, ctx->gfx_stages[MESA_SHADER_FRAGMENT]); } if (flags & ZINK_BLIT_SAVE_FB) @@ -342,27 +537,83 @@ zink_blit_begin(struct zink_context *ctx, enum zink_blit_flags flags) if (flags & ZINK_BLIT_SAVE_TEXTURES) { util_blitter_save_fragment_sampler_states(ctx->blitter, - ctx->di.num_samplers[PIPE_SHADER_FRAGMENT], - (void**)ctx->sampler_states[PIPE_SHADER_FRAGMENT]); + ctx->di.num_samplers[MESA_SHADER_FRAGMENT], + (void**)ctx->sampler_states[MESA_SHADER_FRAGMENT]); util_blitter_save_fragment_sampler_views(ctx->blitter, - ctx->di.num_sampler_views[PIPE_SHADER_FRAGMENT], - ctx->sampler_views[PIPE_SHADER_FRAGMENT]); + ctx->di.num_sampler_views[MESA_SHADER_FRAGMENT], + ctx->sampler_views[MESA_SHADER_FRAGMENT]); } if (flags & ZINK_BLIT_NO_COND_RENDER && ctx->render_condition_active) zink_stop_conditional_render(ctx); } +void +zink_blit_barriers(struct zink_context *ctx, struct zink_resource *src, struct zink_resource *dst, bool whole_dst) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (src && zink_is_swapchain(src)) { + if (!zink_kopper_acquire(ctx, src, UINT64_MAX)) + return; + } else if (dst && zink_is_swapchain(dst)) { + if (!zink_kopper_acquire(ctx, dst, UINT64_MAX)) + return; + } + + VkAccessFlagBits flags; + VkPipelineStageFlagBits pipeline; + if (util_format_is_depth_or_stencil(dst->base.b.format)) { + flags = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + if (!whole_dst) + flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + pipeline = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + } else { + flags = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + if (!whole_dst) + flags |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; + pipeline = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + } + if (src == dst) { + VkImageLayout layout = zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_layout ? + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : + VK_IMAGE_LAYOUT_GENERAL; + screen->image_barrier(ctx, src, layout, VK_ACCESS_SHADER_READ_BIT | flags, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | pipeline); + } else { + if (src) { + VkImageLayout layout = util_format_is_depth_or_stencil(src->base.b.format) && + src->obj->vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL : + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + screen->image_barrier(ctx, src, layout, + VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + if (!ctx->unordered_blitting) + src->obj->unordered_read = false; + } + VkImageLayout layout = util_format_is_depth_or_stencil(dst->base.b.format) ? + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + screen->image_barrier(ctx, dst, layout, flags, pipeline); + } + if (!ctx->unordered_blitting) + dst->obj->unordered_read = dst->obj->unordered_write = false; +} + bool zink_blit_region_fills(struct u_rect region, unsigned width, unsigned height) { struct u_rect intersect = {0, width, 0, height}; - - if (!u_rect_test_intersection(®ion, &intersect)) + struct u_rect r = { + MIN2(region.x0, region.x1), + MAX2(region.x0, region.x1), + MIN2(region.y0, region.y1), + MAX2(region.y0, region.y1), + }; + + if (!u_rect_test_intersection(&r, &intersect)) /* is this even a thing? */ return false; - u_rect_find_intersection(®ion, &intersect); + u_rect_find_intersection(&r, &intersect); if (intersect.x0 != 0 || intersect.y0 != 0 || intersect.x1 != width || intersect.y1 != height) return false; @@ -373,11 +624,23 @@ zink_blit_region_fills(struct u_rect region, unsigned width, unsigned height) bool zink_blit_region_covers(struct u_rect region, struct u_rect covers) { + struct u_rect r = { + MIN2(region.x0, region.x1), + MAX2(region.x0, region.x1), + MIN2(region.y0, region.y1), + MAX2(region.y0, region.y1), + }; + struct u_rect c = { + MIN2(covers.x0, covers.x1), + MAX2(covers.x0, covers.x1), + MIN2(covers.y0, covers.y1), + MAX2(covers.y0, covers.y1), + }; struct u_rect intersect; - if (!u_rect_test_intersection(®ion, &covers)) + if (!u_rect_test_intersection(&r, &c)) return false; - u_rect_union(&intersect, ®ion, &covers); - return intersect.x0 == covers.x0 && intersect.y0 == covers.y0 && - intersect.x1 == covers.x1 && intersect.y1 == covers.y1; + u_rect_union(&intersect, &r, &c); + return intersect.x0 == c.x0 && intersect.y0 == c.y0 && + intersect.x1 == c.x1 && intersect.y1 == c.y1; } diff --git a/src/gallium/drivers/zink/zink_bo.c b/src/gallium/drivers/zink/zink_bo.c index e673efefb3e..97fbae6de0d 100644 --- a/src/gallium/drivers/zink/zink_bo.c +++ b/src/gallium/drivers/zink/zink_bo.c @@ -29,11 +29,17 @@ * Mike Blumenkrantz <michael.blumenkrantz@gmail.com> */ +#include "zink_context.h" #include "zink_bo.h" #include "zink_resource.h" #include "zink_screen.h" #include "util/u_hash_table.h" +#if !defined(__APPLE__) && !defined(_WIN32) +#define ZINK_USE_DMABUF +#include <xf86drm.h> +#endif + struct zink_bo; struct zink_sparse_backing_chunk { @@ -63,7 +69,6 @@ struct zink_sparse_commitment { struct zink_slab { struct pb_slab base; - unsigned entry_size; struct zink_bo *buffer; struct zink_bo *entries; }; @@ -120,9 +125,19 @@ bo_destroy(struct zink_screen *screen, struct pb_buffer *pbuf) { struct zink_bo *bo = zink_bo(pbuf); - simple_mtx_lock(&screen->pb.bo_export_table_lock); - _mesa_hash_table_remove_key(screen->pb.bo_export_table, bo); - simple_mtx_unlock(&screen->pb.bo_export_table_lock); +#ifdef ZINK_USE_DMABUF + if (bo->mem && !bo->u.real.use_reusable_pool) { + simple_mtx_lock(&bo->u.real.export_lock); + list_for_each_entry_safe(struct bo_export, export, &bo->u.real.exports, link) { + struct drm_gem_close args = { .handle = export->gem_handle }; + drmIoctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &args); + list_del(&export->link); + free(export); + } + simple_mtx_unlock(&bo->u.real.export_lock); + simple_mtx_destroy(&bo->u.real.export_lock); + } +#endif if (!bo->u.real.is_user_ptr && bo->u.real.cpu_ptr) { bo->u.real.map_count = 1; @@ -141,7 +156,7 @@ bo_can_reclaim(struct zink_screen *screen, struct pb_buffer *pbuf) { struct zink_bo *bo = zink_bo(pbuf); - return zink_screen_usage_check_completion(screen, bo->reads) && zink_screen_usage_check_completion(screen, bo->writes); + return zink_screen_usage_check_completion(screen, bo->reads.u) && zink_screen_usage_check_completion(screen, bo->writes.u); } static bool @@ -156,9 +171,9 @@ static void bo_slab_free(struct zink_screen *screen, struct pb_slab *pslab) { struct zink_slab *slab = zink_slab(pslab); - ASSERTED unsigned slab_size = slab->buffer->base.size; + ASSERTED unsigned slab_size = slab->buffer->base.base.size; - assert(slab->base.num_entries * slab->entry_size <= slab_size); + assert(slab->base.num_entries * slab->base.entry_size <= slab_size); FREE(slab->entries); zink_bo_unref(screen, slab->buffer); FREE(slab); @@ -174,19 +189,21 @@ bo_slab_destroy(struct zink_screen *screen, struct pb_buffer *pbuf) //if (bo->base.usage & RADEON_FLAG_ENCRYPTED) //pb_slab_free(get_slabs(screen, bo->base.size, RADEON_FLAG_ENCRYPTED), &bo->u.slab.entry); //else - pb_slab_free(get_slabs(screen, bo->base.size, 0), &bo->u.slab.entry); + pb_slab_free(get_slabs(screen, bo->base.base.size, 0), &bo->u.slab.entry); } -static void +static bool clean_up_buffer_managers(struct zink_screen *screen) { + unsigned num_reclaims = 0; for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { - pb_slabs_reclaim(&screen->pb.bo_slabs[i]); + num_reclaims += pb_slabs_reclaim(&screen->pb.bo_slabs[i]); //if (screen->info.has_tmz_support) //pb_slabs_reclaim(&screen->bo_slabs_encrypted[i]); } - pb_cache_release_all_buffers(&screen->pb.bo_cache); + num_reclaims += pb_cache_release_all_buffers(&screen->pb.bo_cache); + return !!num_reclaims; } static unsigned @@ -211,11 +228,11 @@ bo_destroy_or_cache(struct zink_screen *screen, struct pb_buffer *pbuf) struct zink_bo *bo = zink_bo(pbuf); assert(bo->mem); /* slab buffers have a separate vtbl */ - bo->reads = NULL; - bo->writes = NULL; + bo->reads.u = NULL; + bo->writes.u = NULL; if (bo->u.real.use_reusable_pool) - pb_cache_add_buffer(bo->cache_entry); + pb_cache_add_buffer(&screen->pb.bo_cache, bo->cache_entry); else bo_destroy(screen, pbuf); } @@ -231,52 +248,84 @@ bo_create_internal(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, + unsigned mem_type_idx, unsigned flags, const void *pNext) { - struct zink_bo *bo; + struct zink_bo *bo = NULL; bool init_pb_cache; - /* too big for vk alloc */ - if (size > UINT32_MAX) - return NULL; - alignment = get_optimal_alignment(screen, size, alignment); - /* all non-suballocated bo can cache */ - init_pb_cache = true; - - bo = CALLOC(1, sizeof(struct zink_bo) + init_pb_cache * sizeof(struct pb_cache_entry)); - if (!bo) { - return NULL; - } - - if (init_pb_cache) { - bo->u.real.use_reusable_pool = true; - pb_cache_init_entry(&screen->pb.bo_cache, bo->cache_entry, &bo->base, heap); - } + VkMemoryAllocateFlagsInfo ai; + ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO; + ai.pNext = pNext; + ai.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; + ai.deviceMask = 0; + if (screen->info.have_KHR_buffer_device_address) + pNext = &ai; + + VkMemoryPriorityAllocateInfoEXT prio = { + VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT, + pNext, + (flags & ZINK_ALLOC_NO_SUBALLOC) ? 1.0 : 0.5, + }; + if (screen->info.have_EXT_memory_priority) + pNext = &prio; VkMemoryAllocateInfo mai; mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; mai.pNext = pNext; mai.allocationSize = size; - mai.memoryTypeIndex = screen->heap_map[heap]; + mai.memoryTypeIndex = mem_type_idx; if (screen->info.mem_props.memoryTypes[mai.memoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { alignment = MAX2(alignment, screen->info.props.limits.minMemoryMapAlignment); - mai.allocationSize = align(mai.allocationSize, screen->info.props.limits.minMemoryMapAlignment); + mai.allocationSize = align64(mai.allocationSize, screen->info.props.limits.minMemoryMapAlignment); + } + unsigned vk_heap_idx = screen->info.mem_props.memoryTypes[mem_type_idx].heapIndex; + if (mai.allocationSize > screen->info.mem_props.memoryHeaps[vk_heap_idx].size) { + mesa_loge("zink: can't allocate %"PRIu64" bytes from heap that's only %"PRIu64" bytes!\n", mai.allocationSize, screen->info.mem_props.memoryHeaps[vk_heap_idx].size); + return NULL; } + + /* all non-suballocated bo can cache */ + init_pb_cache = !pNext; + + if (!bo) + bo = CALLOC(1, sizeof(struct zink_bo) + init_pb_cache * sizeof(struct pb_cache_entry)); + if (!bo) { + return NULL; + } + VkResult ret = VKSCR(AllocateMemory)(screen->dev, &mai, NULL, &bo->mem); - if (!zink_screen_handle_vkresult(screen, ret)) + if (!zink_screen_handle_vkresult(screen, ret)) { + mesa_loge("zink: couldn't allocate memory: heap=%u size=%" PRIu64, heap, size); + if (zink_debug & ZINK_DEBUG_MEM) { + zink_debug_mem_print_stats(screen); + /* abort with mem debug to allow debugging */ + abort(); + } goto fail; + } + + if (init_pb_cache) { + bo->u.real.use_reusable_pool = true; + pb_cache_init_entry(&screen->pb.bo_cache, bo->cache_entry, &bo->base.base, mem_type_idx); + } else { +#ifdef ZINK_USE_DMABUF + list_inithead(&bo->u.real.exports); + simple_mtx_init(&bo->u.real.export_lock, mtx_plain); +#endif + } + simple_mtx_init(&bo->lock, mtx_plain); - pipe_reference_init(&bo->base.reference, 1); - bo->base.alignment_log2 = util_logbase2(alignment); - bo->base.size = size; + pipe_reference_init(&bo->base.base.reference, 1); + bo->base.base.alignment_log2 = util_logbase2(alignment); + bo->base.base.size = mai.allocationSize; bo->base.vtbl = &bo_vtbl; - bo->base.placement = vk_domain_from_heap(heap); - bo->base.usage = flags; - bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id); + bo->base.base.placement = mem_type_idx; + bo->base.base.usage = flags; return bo; @@ -333,15 +382,15 @@ sparse_backing_alloc(struct zink_screen *screen, struct zink_bo *bo, return NULL; } - assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, ZINK_SPARSE_BUFFER_PAGE_SIZE)); + assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.base.size, ZINK_SPARSE_BUFFER_PAGE_SIZE)); - size = MIN3(bo->base.size / 16, + size = MIN3(bo->base.base.size / 16, 8 * 1024 * 1024, - bo->base.size - (uint64_t)bo->u.sparse.num_backing_pages * ZINK_SPARSE_BUFFER_PAGE_SIZE); + bo->base.base.size - (uint64_t)bo->u.sparse.num_backing_pages * ZINK_SPARSE_BUFFER_PAGE_SIZE); size = MAX2(size, ZINK_SPARSE_BUFFER_PAGE_SIZE); buf = zink_bo_create(screen, size, ZINK_SPARSE_BUFFER_PAGE_SIZE, - bo->base.placement, ZINK_ALLOC_NO_SUBALLOC, NULL); + ZINK_HEAP_DEVICE_LOCAL, 0, screen->heap_map[ZINK_HEAP_DEVICE_LOCAL][0], NULL); if (!buf) { FREE(best_backing->chunks); FREE(best_backing); @@ -349,7 +398,7 @@ sparse_backing_alloc(struct zink_screen *screen, struct zink_bo *bo, } /* We might have gotten a bigger buffer than requested via caching. */ - pages = buf->size / ZINK_SPARSE_BUFFER_PAGE_SIZE; + pages = buf->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE; best_backing->bo = zink_bo(buf); best_backing->num_chunks = 1; @@ -380,7 +429,7 @@ static void sparse_free_backing_buffer(struct zink_screen *screen, struct zink_bo *bo, struct zink_sparse_backing *backing) { - bo->u.sparse.num_backing_pages -= backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE; + bo->u.sparse.num_backing_pages -= backing->bo->base.base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE; list_del(&backing->list); zink_bo_unref(screen, backing->bo); @@ -447,7 +496,7 @@ sparse_backing_free(struct zink_screen *screen, struct zink_bo *bo, } if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 && - backing->chunks[0].end == backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE) + backing->chunks[0].end == backing->bo->base.base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE) sparse_free_backing_buffer(screen, bo, backing); return true; @@ -458,7 +507,7 @@ bo_sparse_destroy(struct zink_screen *screen, struct pb_buffer *pbuf) { struct zink_bo *bo = zink_bo(pbuf); - assert(!bo->mem && bo->base.usage & ZINK_ALLOC_SPARSE); + assert(!bo->mem && bo->base.base.usage & ZINK_ALLOC_SPARSE); while (!list_is_empty(&bo->u.sparse.backing)) { sparse_free_backing_buffer(screen, bo, @@ -494,13 +543,15 @@ bo_sparse_create(struct zink_screen *screen, uint64_t size) return NULL; simple_mtx_init(&bo->lock, mtx_plain); - pipe_reference_init(&bo->base.reference, 1); - bo->base.alignment_log2 = util_logbase2(ZINK_SPARSE_BUFFER_PAGE_SIZE); - bo->base.size = size; + pipe_reference_init(&bo->base.base.reference, 1); + bo->base.base.alignment_log2 = util_logbase2(ZINK_SPARSE_BUFFER_PAGE_SIZE); + bo->base.base.size = size; bo->base.vtbl = &bo_sparse_vtbl; - bo->base.placement = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + unsigned placement = zink_mem_type_idx_from_types(screen, ZINK_HEAP_DEVICE_LOCAL_SPARSE, UINT32_MAX); + assert(placement != UINT32_MAX); + bo->base.base.placement = placement; bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id); - bo->base.usage = ZINK_ALLOC_SPARSE; + bo->base.base.usage = ZINK_ALLOC_SPARSE; bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE); bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages, @@ -519,7 +570,7 @@ error_alloc_commitments: } struct pb_buffer * -zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, const void *pNext) +zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, unsigned mem_type_idx, const void *pNext) { struct zink_bo *bo; /* pull in sparse flag */ @@ -527,9 +578,9 @@ zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, en //struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ? //screen->bo_slabs_encrypted : screen->bo_slabs; - struct pb_slabs *slabs = screen->pb.bo_slabs; + struct pb_slabs *bo_slabs = screen->pb.bo_slabs; - struct pb_slabs *last_slab = &slabs[NUM_SLAB_ALLOCATORS - 1]; + struct pb_slabs *last_slab = &bo_slabs[NUM_SLAB_ALLOCATORS - 1]; unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1); /* Sub-allocate small buffers from slabs. */ @@ -563,20 +614,33 @@ zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, en } struct pb_slabs *slabs = get_slabs(screen, alloc_size, flags); - entry = pb_slab_alloc(slabs, alloc_size, heap); + bool reclaim_all = false; + if (heap == ZINK_HEAP_DEVICE_LOCAL_VISIBLE && !screen->resizable_bar) { + unsigned low_bound = 128 * 1024 * 1024; //128MB is a very small BAR + if (screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY) + low_bound *= 2; //nvidia has fat textures or something + unsigned vk_heap_idx = screen->info.mem_props.memoryTypes[mem_type_idx].heapIndex; + reclaim_all = screen->info.mem_props.memoryHeaps[vk_heap_idx].size <= low_bound; + if (reclaim_all) + reclaim_all = clean_up_buffer_managers(screen); + } + entry = pb_slab_alloc_reclaimed(slabs, alloc_size, mem_type_idx, reclaim_all); if (!entry) { /* Clean up buffer managers and try again. */ - clean_up_buffer_managers(screen); - - entry = pb_slab_alloc(slabs, alloc_size, heap); + if (clean_up_buffer_managers(screen)) + entry = pb_slab_alloc_reclaimed(slabs, alloc_size, mem_type_idx, true); } if (!entry) return NULL; bo = container_of(entry, struct zink_bo, u.slab.entry); - pipe_reference_init(&bo->base.reference, 1); - bo->base.size = size; - assert(alignment <= 1 << bo->base.alignment_log2); + assert(bo->base.base.placement == mem_type_idx); + pipe_reference_init(&bo->base.base.reference, 1); + bo->base.base.size = size; + memset(&bo->reads, 0, sizeof(bo->reads)); + memset(&bo->writes, 0, sizeof(bo->writes)); + bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id); + assert(alignment <= 1 << bo->base.base.alignment_log2); return &bo->base; } @@ -602,21 +666,25 @@ no_slab: if (use_reusable_pool) { /* Get a buffer from the cache. */ bo = (struct zink_bo*) - pb_cache_reclaim_buffer(&screen->pb.bo_cache, size, alignment, 0, heap); - if (bo) + pb_cache_reclaim_buffer(&screen->pb.bo_cache, size, alignment, 0, mem_type_idx); + assert(!bo || bo->base.base.placement == mem_type_idx); + if (bo) { + memset(&bo->reads, 0, sizeof(bo->reads)); + memset(&bo->writes, 0, sizeof(bo->writes)); return &bo->base; + } } /* Create a new one. */ - bo = bo_create_internal(screen, size, alignment, heap, flags, pNext); + bo = bo_create_internal(screen, size, alignment, heap, mem_type_idx, flags, pNext); if (!bo) { /* Clean up buffer managers and try again. */ - clean_up_buffer_managers(screen); - - bo = bo_create_internal(screen, size, alignment, heap, flags, pNext); + if (clean_up_buffer_managers(screen)) + bo = bo_create_internal(screen, size, alignment, heap, mem_type_idx, flags, pNext); if (!bo) return NULL; } + assert(bo->base.base.placement == mem_type_idx); return &bo->base; } @@ -642,11 +710,16 @@ zink_bo_map(struct zink_screen *screen, struct zink_bo *bo) * be atomic thanks to the lock. */ cpu = real->u.real.cpu_ptr; if (!cpu) { - VkResult result = VKSCR(MapMemory)(screen->dev, real->mem, 0, real->base.size, 0, &cpu); + VkResult result = VKSCR(MapMemory)(screen->dev, real->mem, 0, real->base.base.size, 0, &cpu); if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkMapMemory failed (%s)", vk_Result_to_str(result)); simple_mtx_unlock(&real->lock); return NULL; } + if (unlikely(zink_debug & ZINK_DEBUG_MAP)) { + p_atomic_add(&screen->mapped_vram, real->base.base.size); + mesa_loge("NEW MAP(%"PRIu64") TOTAL(%"PRIu64")", real->base.base.size, screen->mapped_vram); + } p_atomic_set(&real->u.real.cpu_ptr, cpu); } simple_mtx_unlock(&real->lock); @@ -665,95 +738,73 @@ zink_bo_unmap(struct zink_screen *screen, struct zink_bo *bo) if (p_atomic_dec_zero(&real->u.real.map_count)) { p_atomic_set(&real->u.real.cpu_ptr, NULL); + if (unlikely(zink_debug & ZINK_DEBUG_MAP)) { + p_atomic_add(&screen->mapped_vram, -real->base.base.size); + mesa_loge("UNMAP(%"PRIu64") TOTAL(%"PRIu64")", real->base.base.size, screen->mapped_vram); + } VKSCR(UnmapMemory)(screen->dev, real->mem); } } - -static inline struct zink_screen ** -get_screen_ptr_for_commit(uint8_t *mem) -{ - return (struct zink_screen**)(mem + sizeof(VkBindSparseInfo) + sizeof(VkSparseBufferMemoryBindInfo) + sizeof(VkSparseMemoryBind)); -} - -static bool -resource_commit(struct zink_screen *screen, VkBindSparseInfo *sparse) -{ - VkQueue queue = screen->threaded ? screen->thread_queue : screen->queue; - - VkResult ret = VKSCR(QueueBindSparse)(queue, 1, sparse, VK_NULL_HANDLE); - return zink_screen_handle_vkresult(screen, ret); -} - +/* see comment in zink_batch_reference_resource_move for how references on sparse backing buffers are organized */ static void -submit_resource_commit(void *data, void *gdata, int thread_index) +track_freed_sparse_bo(struct zink_context *ctx, struct zink_sparse_backing *backing) { - struct zink_screen **screen = get_screen_ptr_for_commit(data); - resource_commit(*screen, data); - free(data); + pipe_reference(NULL, &backing->bo->base.base.reference); + util_dynarray_append(&ctx->batch.state->freed_sparse_backing_bos, struct zink_bo*, backing->bo); } -static bool -do_commit_single(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t offset, uint32_t size, bool commit) +static VkSemaphore +buffer_commit_single(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t bo_offset, uint32_t offset, uint32_t size, bool commit, VkSemaphore wait) { - - uint8_t *mem = malloc(sizeof(VkBindSparseInfo) + sizeof(VkSparseBufferMemoryBindInfo) + sizeof(VkSparseMemoryBind) + sizeof(void*)); - if (!mem) - return false; - VkBindSparseInfo *sparse = (void*)mem; - sparse->sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; - sparse->pNext = NULL; - sparse->waitSemaphoreCount = 0; - sparse->bufferBindCount = 1; - sparse->imageOpaqueBindCount = 0; - sparse->imageBindCount = 0; - sparse->signalSemaphoreCount = 0; - - VkSparseBufferMemoryBindInfo *sparse_bind = (void*)(mem + sizeof(VkBindSparseInfo)); - sparse_bind->buffer = res->obj->buffer; - sparse_bind->bindCount = 1; - sparse->pBufferBinds = sparse_bind; - - VkSparseMemoryBind *mem_bind = (void*)(mem + sizeof(VkBindSparseInfo) + sizeof(VkSparseBufferMemoryBindInfo)); - mem_bind->resourceOffset = offset; - mem_bind->size = MIN2(res->base.b.width0 - offset, size); - mem_bind->memory = commit ? bo->mem : VK_NULL_HANDLE; - mem_bind->memoryOffset = 0; - mem_bind->flags = 0; - sparse_bind->pBinds = mem_bind; - - struct zink_screen **ptr = get_screen_ptr_for_commit(mem); - *ptr = screen; - - if (screen->threaded) { - /* this doesn't need any kind of fencing because any access to this resource - * will be automagically synchronized by queue dispatch */ - util_queue_add_job(&screen->flush_queue, mem, NULL, submit_resource_commit, NULL, 0); - } else { - bool ret = resource_commit(screen, sparse); - free(sparse); - return ret; - } - return true; + VkSemaphore sem = zink_create_semaphore(screen); + VkBindSparseInfo sparse = {0}; + sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; + sparse.bufferBindCount = res->obj->storage_buffer ? 2 : 1; + sparse.waitSemaphoreCount = !!wait; + sparse.pWaitSemaphores = &wait; + sparse.signalSemaphoreCount = 1; + sparse.pSignalSemaphores = &sem; + + VkSparseBufferMemoryBindInfo sparse_bind[2]; + sparse_bind[0].buffer = res->obj->buffer; + sparse_bind[1].buffer = res->obj->storage_buffer; + sparse_bind[0].bindCount = 1; + sparse_bind[1].bindCount = 1; + sparse.pBufferBinds = sparse_bind; + + VkSparseMemoryBind mem_bind; + mem_bind.resourceOffset = offset; + mem_bind.size = MIN2(res->base.b.width0 - offset, size); + mem_bind.memory = commit ? (bo->mem ? bo->mem : bo->u.slab.real->mem) : VK_NULL_HANDLE; + mem_bind.memoryOffset = bo_offset * ZINK_SPARSE_BUFFER_PAGE_SIZE + (commit ? (bo->mem ? 0 : bo->offset) : 0); + mem_bind.flags = 0; + sparse_bind[0].pBinds = &mem_bind; + sparse_bind[1].pBinds = &mem_bind; + + VkResult ret = VKSCR(QueueBindSparse)(screen->queue_sparse, 1, &sparse, VK_NULL_HANDLE); + if (zink_screen_handle_vkresult(screen, ret)) + return sem; + VKSCR(DestroySemaphore)(screen->dev, sem, NULL); + return VK_NULL_HANDLE; } -bool -zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t offset, uint32_t size, bool commit) +static bool +buffer_bo_commit(struct zink_context *ctx, struct zink_resource *res, uint32_t offset, uint32_t size, bool commit, VkSemaphore *sem) { bool ok = true; + struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_bo *bo = res->obj->bo; assert(offset % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0); - assert(offset <= bo->base.size); - assert(size <= bo->base.size - offset); - assert(size % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0 || offset + size == bo->base.size); + assert(offset <= bo->base.base.size); + assert(size <= bo->base.base.size - offset); + assert(size % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0 || offset + size == res->obj->size); struct zink_sparse_commitment *comm = bo->u.sparse.commitments; uint32_t va_page = offset / ZINK_SPARSE_BUFFER_PAGE_SIZE; uint32_t end_va_page = va_page + DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE); - - simple_mtx_lock(&bo->lock); - + VkSemaphore cur_sem = VK_NULL_HANDLE; if (commit) { while (va_page < end_va_page) { uint32_t span_va_page; @@ -780,10 +831,10 @@ zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t o ok = false; goto out; } - if (!do_commit_single(screen, res, backing->bo, - (uint64_t)span_va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE, - (uint64_t)backing_size * ZINK_SPARSE_BUFFER_PAGE_SIZE, true)) { - + cur_sem = buffer_commit_single(screen, res, backing->bo, backing_start, + (uint64_t)span_va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE, + (uint64_t)backing_size * ZINK_SPARSE_BUFFER_PAGE_SIZE, true, cur_sem); + if (!cur_sem) { ok = sparse_backing_free(screen, bo, backing, backing_start, backing_size); assert(ok && "sufficient memory should already be allocated"); @@ -801,13 +852,8 @@ zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t o } } } else { - if (!do_commit_single(screen, res, NULL, - (uint64_t)va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE, - (uint64_t)(end_va_page - va_page) * ZINK_SPARSE_BUFFER_PAGE_SIZE, false)) { - ok = false; - goto out; - } - + bool done = false; + uint32_t base_page = va_page; while (va_page < end_va_page) { struct zink_sparse_backing *backing; uint32_t backing_start; @@ -819,6 +865,17 @@ zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t o continue; } + if (!done) { + cur_sem = buffer_commit_single(screen, res, NULL, 0, + (uint64_t)base_page * ZINK_SPARSE_BUFFER_PAGE_SIZE, + (uint64_t)(end_va_page - base_page) * ZINK_SPARSE_BUFFER_PAGE_SIZE, false, cur_sem); + if (!cur_sem) { + ok = false; + goto out; + } + } + done = true; + /* Group contiguous spans of pages. */ backing = comm[va_page].backing; backing_start = comm[va_page].page; @@ -835,6 +892,7 @@ zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t o span_pages++; } + track_freed_sparse_bo(ctx, backing); if (!sparse_backing_free(screen, bo, backing, backing_start, span_pages)) { /* Couldn't allocate tracking data structures, so we have to leak */ fprintf(stderr, "zink: leaking sparse backing memory\n"); @@ -843,11 +901,292 @@ zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t o } } out: + *sem = cur_sem; + return ok; +} + +static VkSemaphore +texture_commit_single(struct zink_screen *screen, struct zink_resource *res, VkSparseImageMemoryBind *ibind, unsigned num_binds, bool commit, VkSemaphore wait) +{ + VkSemaphore sem = zink_create_semaphore(screen); + VkBindSparseInfo sparse = {0}; + sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; + sparse.imageBindCount = 1; + sparse.waitSemaphoreCount = !!wait; + sparse.pWaitSemaphores = &wait; + sparse.signalSemaphoreCount = 1; + sparse.pSignalSemaphores = &sem; + + VkSparseImageMemoryBindInfo sparse_ibind; + sparse_ibind.image = res->obj->image; + sparse_ibind.bindCount = num_binds; + sparse_ibind.pBinds = ibind; + sparse.pImageBinds = &sparse_ibind; + + VkResult ret = VKSCR(QueueBindSparse)(screen->queue_sparse, 1, &sparse, VK_NULL_HANDLE); + if (zink_screen_handle_vkresult(screen, ret)) + return sem; + VKSCR(DestroySemaphore)(screen->dev, sem, NULL); + return VK_NULL_HANDLE; +} + +static VkSemaphore +texture_commit_miptail(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t bo_offset, uint32_t offset, bool commit, VkSemaphore wait) +{ + VkSemaphore sem = zink_create_semaphore(screen); + VkBindSparseInfo sparse = {0}; + sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; + sparse.imageOpaqueBindCount = 1; + sparse.waitSemaphoreCount = !!wait; + sparse.pWaitSemaphores = &wait; + sparse.signalSemaphoreCount = 1; + sparse.pSignalSemaphores = &sem; + + VkSparseImageOpaqueMemoryBindInfo sparse_bind; + sparse_bind.image = res->obj->image; + sparse_bind.bindCount = 1; + sparse.pImageOpaqueBinds = &sparse_bind; + + VkSparseMemoryBind mem_bind; + mem_bind.resourceOffset = offset; + mem_bind.size = MIN2(ZINK_SPARSE_BUFFER_PAGE_SIZE, res->sparse.imageMipTailSize - offset); + mem_bind.memory = commit ? (bo->mem ? bo->mem : bo->u.slab.real->mem) : VK_NULL_HANDLE; + mem_bind.memoryOffset = bo_offset + (commit ? (bo->mem ? 0 : bo->offset) : 0); + mem_bind.flags = 0; + sparse_bind.pBinds = &mem_bind; + + VkResult ret = VKSCR(QueueBindSparse)(screen->queue_sparse, 1, &sparse, VK_NULL_HANDLE); + if (zink_screen_handle_vkresult(screen, ret)) + return sem; + VKSCR(DestroySemaphore)(screen->dev, sem, NULL); + return VK_NULL_HANDLE; +} + +bool +zink_bo_commit(struct zink_context *ctx, struct zink_resource *res, unsigned level, struct pipe_box *box, bool commit, VkSemaphore *sem) +{ + bool ok = true; + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_bo *bo = res->obj->bo; + VkSemaphore cur_sem = VK_NULL_HANDLE; + + simple_mtx_lock(&screen->queue_lock); + simple_mtx_lock(&bo->lock); + if (res->base.b.target == PIPE_BUFFER) { + ok = buffer_bo_commit(ctx, res, box->x, box->width, commit, &cur_sem); + goto out; + } + + int gwidth, gheight, gdepth; + gwidth = res->sparse.formatProperties.imageGranularity.width; + gheight = res->sparse.formatProperties.imageGranularity.height; + gdepth = res->sparse.formatProperties.imageGranularity.depth; + assert(gwidth && gheight && gdepth); + + struct zink_sparse_commitment *comm = bo->u.sparse.commitments; + VkImageSubresource subresource = { res->aspect, level, 0 }; + unsigned nwidth = DIV_ROUND_UP(box->width, gwidth); + unsigned nheight = DIV_ROUND_UP(box->height, gheight); + unsigned ndepth = DIV_ROUND_UP(box->depth, gdepth); + VkExtent3D lastBlockExtent = { + (box->width % gwidth) ? box->width % gwidth : gwidth, + (box->height % gheight) ? box->height % gheight : gheight, + (box->depth % gdepth) ? box->depth % gdepth : gdepth + }; +#define NUM_BATCHED_BINDS 50 + VkSparseImageMemoryBind ibind[NUM_BATCHED_BINDS]; + uint32_t backing_start[NUM_BATCHED_BINDS], backing_size[NUM_BATCHED_BINDS]; + struct zink_sparse_backing *backing[NUM_BATCHED_BINDS]; + unsigned i = 0; + bool commits_pending = false; + uint32_t va_page_offset = 0; + for (unsigned l = 0; l < level; l++) { + unsigned mipwidth = DIV_ROUND_UP(MAX2(res->base.b.width0 >> l, 1), gwidth); + unsigned mipheight = DIV_ROUND_UP(MAX2(res->base.b.height0 >> l, 1), gheight); + unsigned mipdepth = DIV_ROUND_UP(res->base.b.array_size > 1 ? res->base.b.array_size : MAX2(res->base.b.depth0 >> l, 1), gdepth); + va_page_offset += mipwidth * mipheight * mipdepth; + } + for (unsigned d = 0; d < ndepth; d++) { + for (unsigned h = 0; h < nheight; h++) { + for (unsigned w = 0; w < nwidth; w++) { + ibind[i].subresource = subresource; + ibind[i].flags = 0; + // Offset + ibind[i].offset.x = w * gwidth; + ibind[i].offset.y = h * gheight; + if (res->base.b.array_size > 1) { + ibind[i].subresource.arrayLayer = d * gdepth; + ibind[i].offset.z = 0; + } else { + ibind[i].offset.z = d * gdepth; + } + // Size of the page + ibind[i].extent.width = (w == nwidth - 1) ? lastBlockExtent.width : gwidth; + ibind[i].extent.height = (h == nheight - 1) ? lastBlockExtent.height : gheight; + ibind[i].extent.depth = (d == ndepth - 1 && res->base.b.target != PIPE_TEXTURE_CUBE) ? lastBlockExtent.depth : gdepth; + uint32_t va_page = va_page_offset + + (d + (box->z / gdepth)) * ((MAX2(res->base.b.width0 >> level, 1) / gwidth) * (MAX2(res->base.b.height0 >> level, 1) / gheight)) + + (h + (box->y / gheight)) * (MAX2(res->base.b.width0 >> level, 1) / gwidth) + + (w + (box->x / gwidth)); + + uint32_t end_va_page = va_page + 1; + + if (commit) { + while (va_page < end_va_page) { + uint32_t span_va_page; + + /* Skip pages that are already committed. */ + if (comm[va_page].backing) { + va_page++; + continue; + } + + /* Determine length of uncommitted span. */ + span_va_page = va_page; + while (va_page < end_va_page && !comm[va_page].backing) + va_page++; + + /* Fill the uncommitted span with chunks of backing memory. */ + while (span_va_page < va_page) { + backing_size[i] = va_page - span_va_page; + backing[i] = sparse_backing_alloc(screen, bo, &backing_start[i], &backing_size[i]); + if (!backing[i]) { + ok = false; + goto out; + } + if (level >= res->sparse.imageMipTailFirstLod) { + uint32_t offset = res->sparse.imageMipTailOffset + d * res->sparse.imageMipTailStride; + cur_sem = texture_commit_miptail(screen, res, backing[i]->bo, backing_start[i], offset, commit, cur_sem); + if (!cur_sem) + goto out; + } else { + ibind[i].memory = backing[i]->bo->mem ? backing[i]->bo->mem : backing[i]->bo->u.slab.real->mem; + ibind[i].memoryOffset = backing_start[i] * ZINK_SPARSE_BUFFER_PAGE_SIZE + + (backing[i]->bo->mem ? 0 : backing[i]->bo->offset); + commits_pending = true; + } + + while (backing_size[i]) { + comm[span_va_page].backing = backing[i]; + comm[span_va_page].page = backing_start[i]; + span_va_page++; + backing_start[i]++; + backing_size[i]--; + } + i++; + } + } + } else { + ibind[i].memory = VK_NULL_HANDLE; + ibind[i].memoryOffset = 0; + + while (va_page < end_va_page) { + /* Skip pages that are already uncommitted. */ + if (!comm[va_page].backing) { + va_page++; + continue; + } + + /* Group contiguous spans of pages. */ + backing[i] = comm[va_page].backing; + backing_start[i] = comm[va_page].page; + comm[va_page].backing = NULL; + + backing_size[i] = 1; + va_page++; + + while (va_page < end_va_page && + comm[va_page].backing == backing[i] && + comm[va_page].page == backing_start[i] + backing_size[i]) { + comm[va_page].backing = NULL; + va_page++; + backing_size[i]++; + } + if (level >= res->sparse.imageMipTailFirstLod) { + uint32_t offset = res->sparse.imageMipTailOffset + d * res->sparse.imageMipTailStride; + cur_sem = texture_commit_miptail(screen, res, NULL, 0, offset, commit, cur_sem); + if (!cur_sem) + goto out; + } else { + commits_pending = true; + } + i++; + } + } + if (i == ARRAY_SIZE(ibind)) { + cur_sem = texture_commit_single(screen, res, ibind, ARRAY_SIZE(ibind), commit, cur_sem); + if (!cur_sem) { + for (unsigned s = 0; s < i; s++) { + ok = sparse_backing_free(screen, backing[s]->bo, backing[s], backing_start[s], backing_size[s]); + if (!ok) { + /* Couldn't allocate tracking data structures, so we have to leak */ + fprintf(stderr, "zink: leaking sparse backing memory\n"); + } + } + ok = false; + goto out; + } + commits_pending = false; + i = 0; + } + } + } + } + if (commits_pending) { + cur_sem = texture_commit_single(screen, res, ibind, i, commit, cur_sem); + if (!cur_sem) { + for (unsigned s = 0; s < i; s++) { + ok = sparse_backing_free(screen, backing[s]->bo, backing[s], backing_start[s], backing_size[s]); + if (!ok) { + /* Couldn't allocate tracking data structures, so we have to leak */ + fprintf(stderr, "zink: leaking sparse backing memory\n"); + } + } + ok = false; + } + } +out: simple_mtx_unlock(&bo->lock); + simple_mtx_unlock(&screen->queue_lock); + *sem = cur_sem; return ok; } +bool +zink_bo_get_kms_handle(struct zink_screen *screen, struct zink_bo *bo, int fd, uint32_t *handle) +{ +#ifdef ZINK_USE_DMABUF + assert(bo->mem && !bo->u.real.use_reusable_pool); + simple_mtx_lock(&bo->u.real.export_lock); + list_for_each_entry(struct bo_export, export, &bo->u.real.exports, link) { + if (export->drm_fd == fd) { + simple_mtx_unlock(&bo->u.real.export_lock); + *handle = export->gem_handle; + return true; + } + } + struct bo_export *export = CALLOC_STRUCT(bo_export); + if (!export) { + simple_mtx_unlock(&bo->u.real.export_lock); + return false; + } + bool success = drmPrimeFDToHandle(screen->drm_fd, fd, handle) == 0; + if (success) { + list_addtail(&export->link, &bo->u.real.exports); + export->gem_handle = *handle; + export->drm_fd = screen->drm_fd; + } else { + mesa_loge("zink: failed drmPrimeFDToHandle %s", strerror(errno)); + FREE(export); + } + simple_mtx_unlock(&bo->u.real.export_lock); + return success; +#else + return false; +#endif +} + static const struct pb_vtbl bo_slab_vtbl = { /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */ (void*)bo_slab_destroy @@ -855,11 +1194,9 @@ static const struct pb_vtbl bo_slab_vtbl = { }; static struct pb_slab * -bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index, bool encrypted) +bo_slab_alloc(void *priv, unsigned mem_type_idx, unsigned entry_size, unsigned group_index, bool encrypted) { struct zink_screen *screen = priv; - VkMemoryPropertyFlags domains = vk_domain_from_heap(heap); - uint32_t base_id; unsigned slab_size = 0; struct zink_slab *slab = CALLOC_STRUCT(zink_slab); @@ -898,40 +1235,32 @@ bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_ind } assert(slab_size != 0); - slab->buffer = zink_bo(zink_bo_create(screen, slab_size, slab_size, heap, 0, NULL)); + slab->buffer = zink_bo(zink_bo_create(screen, slab_size, slab_size, zink_heap_from_domain_flags(screen->info.mem_props.memoryTypes[mem_type_idx].propertyFlags, 0), + 0, mem_type_idx, NULL)); if (!slab->buffer) goto fail; - slab_size = slab->buffer->base.size; + slab_size = slab->buffer->base.base.size; slab->base.num_entries = slab_size / entry_size; slab->base.num_free = slab->base.num_entries; - slab->entry_size = entry_size; + slab->base.group_index = group_index; + slab->base.entry_size = entry_size; slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); if (!slab->entries) goto fail_buffer; list_inithead(&slab->base.free); -#ifdef _MSC_VER - /* C11 too hard for msvc, no __sync_fetch_and_add */ - base_id = p_atomic_add_return(&screen->pb.next_bo_unique_id, slab->base.num_entries) - slab->base.num_entries; -#else - base_id = __sync_fetch_and_add(&screen->pb.next_bo_unique_id, slab->base.num_entries); -#endif for (unsigned i = 0; i < slab->base.num_entries; ++i) { struct zink_bo *bo = &slab->entries[i]; simple_mtx_init(&bo->lock, mtx_plain); - bo->base.alignment_log2 = util_logbase2(get_slab_entry_alignment(screen, entry_size)); - bo->base.size = entry_size; + bo->base.base.alignment_log2 = util_logbase2(get_slab_entry_alignment(screen, entry_size)); + bo->base.base.size = entry_size; bo->base.vtbl = &bo_slab_vtbl; bo->offset = slab->buffer->offset + i * entry_size; - bo->base.placement = domains; - bo->unique_id = base_id + i; bo->u.slab.entry.slab = &slab->base; - bo->u.slab.entry.group_index = group_index; - bo->u.slab.entry.entry_size = entry_size; if (slab->buffer->mem) { /* The slab is not suballocated. */ @@ -941,6 +1270,7 @@ bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_ind bo->u.slab.real = slab->buffer->u.slab.real; assert(bo->u.slab.real->mem); } + bo->base.base.placement = bo->u.slab.real->base.base.placement; list_addtail(&bo->u.slab.entry.head, &slab->base.free); } @@ -958,9 +1288,9 @@ fail: } static struct pb_slab * -bo_slab_alloc_normal(void *priv, unsigned heap, unsigned entry_size, unsigned group_index) +bo_slab_alloc_normal(void *priv, unsigned mem_type_idx, unsigned entry_size, unsigned group_index) { - return bo_slab_alloc(priv, heap, entry_size, group_index, false); + return bo_slab_alloc(priv, mem_type_idx, entry_size, group_index, false); } bool @@ -970,12 +1300,12 @@ zink_bo_init(struct zink_screen *screen) for (uint32_t i = 0; i < screen->info.mem_props.memoryHeapCount; ++i) total_mem += screen->info.mem_props.memoryHeaps[i].size; /* Create managers. */ - pb_cache_init(&screen->pb.bo_cache, ZINK_HEAP_MAX, + pb_cache_init(&screen->pb.bo_cache, screen->info.mem_props.memoryTypeCount, 500000, 2.0f, 0, - total_mem / 8, screen, + total_mem / 8, offsetof(struct zink_bo, cache_entry), screen, (void*)bo_destroy, (void*)bo_can_reclaim); - unsigned min_slab_order = 8; /* 256 bytes */ + unsigned min_slab_order = MIN_SLAB_ORDER; /* 256 bytes */ unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */ unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS; @@ -988,7 +1318,7 @@ zink_bo_init(struct zink_screen *screen) if (!pb_slabs_init(&screen->pb.bo_slabs[i], min_order, max_order, - ZINK_HEAP_MAX, true, + screen->info.mem_props.memoryTypeCount, true, screen, bo_can_reclaim_slab, bo_slab_alloc_normal, @@ -998,8 +1328,6 @@ zink_bo_init(struct zink_screen *screen) min_slab_order = max_order + 1; } screen->pb.min_alloc_size = 1 << screen->pb.bo_slabs[0].min_order; - screen->pb.bo_export_table = util_hash_table_create_ptr_keys(); - simple_mtx_init(&screen->pb.bo_export_table_lock, mtx_plain); return true; } @@ -1011,6 +1339,4 @@ zink_bo_deinit(struct zink_screen *screen) pb_slabs_deinit(&screen->pb.bo_slabs[i]); } pb_cache_deinit(&screen->pb.bo_cache); - _mesa_hash_table_destroy(screen->pb.bo_export_table, NULL); - simple_mtx_destroy(&screen->pb.bo_export_table_lock); } diff --git a/src/gallium/drivers/zink/zink_bo.h b/src/gallium/drivers/zink/zink_bo.h index 0fd74cb7923..cd7338aff5a 100644 --- a/src/gallium/drivers/zink/zink_bo.h +++ b/src/gallium/drivers/zink/zink_bo.h @@ -26,116 +26,48 @@ #ifndef ZINK_BO_H #define ZINK_BO_H -#include <vulkan/vulkan.h> -#include "pipebuffer/pb_cache.h" -#include "pipebuffer/pb_slab.h" +#include "zink_types.h" #include "zink_batch.h" #define VK_VIS_VRAM (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) -enum zink_resource_access { - ZINK_RESOURCE_ACCESS_READ = 1, - ZINK_RESOURCE_ACCESS_WRITE = 32, - ZINK_RESOURCE_ACCESS_RW = ZINK_RESOURCE_ACCESS_READ | ZINK_RESOURCE_ACCESS_WRITE, -}; - - -enum zink_heap { - ZINK_HEAP_DEVICE_LOCAL, - ZINK_HEAP_DEVICE_LOCAL_SPARSE, - ZINK_HEAP_DEVICE_LOCAL_VISIBLE, - ZINK_HEAP_HOST_VISIBLE_COHERENT, - ZINK_HEAP_HOST_VISIBLE_CACHED, - ZINK_HEAP_MAX, -}; - -enum zink_alloc_flag { - ZINK_ALLOC_SPARSE = 1<<0, - ZINK_ALLOC_NO_SUBALLOC = 1<<1, -}; - - -struct zink_bo { - struct pb_buffer base; - - union { - struct { - void *cpu_ptr; /* for user_ptr and permanent maps */ - int map_count; - - bool is_user_ptr; - bool use_reusable_pool; - - /* Whether buffer_get_handle or buffer_from_handle has been called, - * it can only transition from false to true. Protected by lock. - */ - bool is_shared; - } real; - struct { - struct pb_slab_entry entry; - struct zink_bo *real; - } slab; - struct { - uint32_t num_va_pages; - uint32_t num_backing_pages; - - struct list_head backing; - - /* Commitment information for each page of the virtual memory area. */ - struct zink_sparse_commitment *commitments; - } sparse; - } u; - - VkDeviceMemory mem; - uint64_t offset; - - uint32_t unique_id; - - simple_mtx_t lock; - - struct zink_batch_usage *reads; - struct zink_batch_usage *writes; - - struct pb_cache_entry cache_entry[]; -}; - -static inline struct zink_bo * -zink_bo(struct pb_buffer *pbuf) -{ - return (struct zink_bo*)pbuf; -} +#define VK_STAGING_RAM (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT) +#define VK_LAZY_VRAM (VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + -static inline enum zink_alloc_flag +static ALWAYS_INLINE enum zink_alloc_flag zink_alloc_flags_from_heap(enum zink_heap heap) { - enum zink_alloc_flag flags = 0; switch (heap) { case ZINK_HEAP_DEVICE_LOCAL_SPARSE: - flags |= ZINK_ALLOC_SPARSE; + return ZINK_ALLOC_SPARSE; break; default: break; } - return flags; + return (enum zink_alloc_flag)0; } -static inline VkMemoryPropertyFlags +static ALWAYS_INLINE VkMemoryPropertyFlags vk_domain_from_heap(enum zink_heap heap) { - VkMemoryPropertyFlags domains = 0; + VkMemoryPropertyFlags domains = (VkMemoryPropertyFlags)0; switch (heap) { case ZINK_HEAP_DEVICE_LOCAL: case ZINK_HEAP_DEVICE_LOCAL_SPARSE: domains = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; break; + case ZINK_HEAP_DEVICE_LOCAL_LAZY: + domains = VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; case ZINK_HEAP_DEVICE_LOCAL_VISIBLE: domains = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; break; case ZINK_HEAP_HOST_VISIBLE_COHERENT: domains = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; break; - case ZINK_HEAP_HOST_VISIBLE_CACHED: - domains = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + case ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED: + domains = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; break; default: break; @@ -143,7 +75,7 @@ vk_domain_from_heap(enum zink_heap heap) return domains; } -static inline enum zink_heap +static ALWAYS_INLINE enum zink_heap zink_heap_from_domain_flags(VkMemoryPropertyFlags domains, enum zink_alloc_flag flags) { if (flags & ZINK_ALLOC_SPARSE) @@ -156,11 +88,22 @@ zink_heap_from_domain_flags(VkMemoryPropertyFlags domains, enum zink_alloc_flag return ZINK_HEAP_DEVICE_LOCAL; if (domains & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) - return ZINK_HEAP_HOST_VISIBLE_CACHED; + return ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED; return ZINK_HEAP_HOST_VISIBLE_COHERENT; } +static ALWAYS_INLINE unsigned +zink_mem_type_idx_from_types(struct zink_screen *screen, enum zink_heap heap, uint32_t types) +{ + for (unsigned i = 0; i < screen->heap_count[heap]; i++) { + if (types & BITFIELD_BIT(screen->heap_map[heap][i])) { + return screen->heap_map[heap][i]; + } + } + return UINT32_MAX; +} + bool zink_bo_init(struct zink_screen *screen); @@ -168,24 +111,27 @@ void zink_bo_deinit(struct zink_screen *screen); struct pb_buffer * -zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, const void *pNext); +zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, unsigned mem_type_idx, const void *pNext); -static inline uint64_t +bool +zink_bo_get_kms_handle(struct zink_screen *screen, struct zink_bo *bo, int fd, uint32_t *handle); + +static ALWAYS_INLINE uint64_t zink_bo_get_offset(const struct zink_bo *bo) { return bo->offset; } -static inline VkDeviceMemory +static ALWAYS_INLINE VkDeviceMemory zink_bo_get_mem(const struct zink_bo *bo) { return bo->mem ? bo->mem : bo->u.slab.real->mem; } -static inline VkDeviceSize +static ALWAYS_INLINE VkDeviceSize zink_bo_get_size(const struct zink_bo *bo) { - return bo->mem ? bo->base.size : bo->u.slab.real->base.size; + return bo->mem ? bo->base.base.size : bo->u.slab.real->base.base.size; } void * @@ -194,67 +140,90 @@ void zink_bo_unmap(struct zink_screen *screen, struct zink_bo *bo); bool -zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t offset, uint32_t size, bool commit); +zink_bo_commit(struct zink_context *ctx, struct zink_resource *res, unsigned level, struct pipe_box *box, bool commit, VkSemaphore *sem); -static inline bool +static ALWAYS_INLINE bool zink_bo_has_unflushed_usage(const struct zink_bo *bo) { - return zink_batch_usage_is_unflushed(bo->reads) || - zink_batch_usage_is_unflushed(bo->writes); + return zink_batch_usage_is_unflushed(bo->reads.u) || + zink_batch_usage_is_unflushed(bo->writes.u); } -static inline bool +static ALWAYS_INLINE bool zink_bo_has_usage(const struct zink_bo *bo) { - return zink_batch_usage_exists(bo->reads) || - zink_batch_usage_exists(bo->writes); + return zink_bo_has_unflushed_usage(bo) || + (zink_batch_usage_exists(bo->reads.u) && bo->reads.submit_count == bo->reads.u->submit_count) || + (zink_batch_usage_exists(bo->writes.u) && bo->writes.submit_count == bo->writes.u->submit_count); } -static inline bool +static ALWAYS_INLINE bool zink_bo_usage_matches(const struct zink_bo *bo, const struct zink_batch_state *bs) { - return zink_batch_usage_matches(bo->reads, bs) || - zink_batch_usage_matches(bo->writes, bs); + return (zink_batch_usage_matches(bo->reads.u, bs) && bo->reads.submit_count == bo->reads.u->submit_count) || + (zink_batch_usage_matches(bo->writes.u, bs) && bo->writes.submit_count == bo->writes.u->submit_count); } -static inline bool +static ALWAYS_INLINE bool zink_bo_usage_check_completion(struct zink_screen *screen, struct zink_bo *bo, enum zink_resource_access access) { - if (access & ZINK_RESOURCE_ACCESS_READ && !zink_screen_usage_check_completion(screen, bo->reads)) + if (access & ZINK_RESOURCE_ACCESS_READ && !zink_screen_usage_check_completion(screen, bo->reads.u)) + return false; + if (access & ZINK_RESOURCE_ACCESS_WRITE && !zink_screen_usage_check_completion(screen, bo->writes.u)) + return false; + return true; +} + +static ALWAYS_INLINE bool +zink_bo_usage_check_completion_fast(struct zink_screen *screen, struct zink_bo *bo, enum zink_resource_access access) +{ + if (access & ZINK_RESOURCE_ACCESS_READ && !zink_screen_usage_check_completion_fast(screen, bo->reads.u)) return false; - if (access & ZINK_RESOURCE_ACCESS_WRITE && !zink_screen_usage_check_completion(screen, bo->writes)) + if (access & ZINK_RESOURCE_ACCESS_WRITE && !zink_screen_usage_check_completion_fast(screen, bo->writes.u)) return false; return true; } -static inline void +static ALWAYS_INLINE void zink_bo_usage_wait(struct zink_context *ctx, struct zink_bo *bo, enum zink_resource_access access) { if (access & ZINK_RESOURCE_ACCESS_READ) - zink_batch_usage_wait(ctx, bo->reads); + zink_batch_usage_wait(ctx, bo->reads.u); if (access & ZINK_RESOURCE_ACCESS_WRITE) - zink_batch_usage_wait(ctx, bo->writes); + zink_batch_usage_wait(ctx, bo->writes.u); } -static inline void +static ALWAYS_INLINE void +zink_bo_usage_try_wait(struct zink_context *ctx, struct zink_bo *bo, enum zink_resource_access access) +{ + if (access & ZINK_RESOURCE_ACCESS_READ) + zink_batch_usage_try_wait(ctx, bo->reads.u); + if (access & ZINK_RESOURCE_ACCESS_WRITE) + zink_batch_usage_try_wait(ctx, bo->writes.u); +} + +static ALWAYS_INLINE void zink_bo_usage_set(struct zink_bo *bo, struct zink_batch_state *bs, bool write) { - if (write) - zink_batch_usage_set(&bo->writes, bs); - else - zink_batch_usage_set(&bo->reads, bs); + if (write) { + zink_batch_usage_set(&bo->writes.u, bs); + bo->writes.submit_count = bs->usage.submit_count; + } else { + zink_batch_usage_set(&bo->reads.u, bs); + bo->reads.submit_count = bs->usage.submit_count; + } } -static inline bool +static ALWAYS_INLINE bool zink_bo_usage_unset(struct zink_bo *bo, struct zink_batch_state *bs) { - zink_batch_usage_unset(&bo->reads, bs); - zink_batch_usage_unset(&bo->writes, bs); - return bo->reads || bo->writes; + zink_batch_usage_unset(&bo->reads.u, bs); + zink_batch_usage_unset(&bo->writes.u, bs); + return bo->reads.u || bo->writes.u; } -static inline void +static ALWAYS_INLINE void zink_bo_unref(struct zink_screen *screen, struct zink_bo *bo) { struct pb_buffer *pbuf = &bo->base; diff --git a/src/gallium/drivers/zink/zink_clear.c b/src/gallium/drivers/zink/zink_clear.c index 660203f7f07..236c67703ab 100644 --- a/src/gallium/drivers/zink/zink_clear.c +++ b/src/gallium/drivers/zink/zink_clear.c @@ -21,13 +21,14 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "zink_batch.h" +#include "zink_clear.h" #include "zink_context.h" +#include "zink_format.h" +#include "zink_inlines.h" #include "zink_query.h" -#include "zink_resource.h" -#include "zink_screen.h" #include "util/u_blitter.h" -#include "util/u_dynarray.h" #include "util/format/u_format.h" #include "util/format_srgb.h" #include "util/u_framebuffer.h" @@ -37,24 +38,6 @@ #include "util/u_helpers.h" static inline bool -check_3d_layers(struct pipe_surface *psurf) -{ - if (psurf->texture->target != PIPE_TEXTURE_3D) - return true; - /* SPEC PROBLEM: - * though the vk spec doesn't seem to explicitly address this, currently drivers - * are claiming that all 3D images have a single "3D" layer regardless of layercount, - * so we can never clear them if we aren't trying to clear only layer 0 - */ - if (psurf->u.tex.first_layer) - return false; - - if (psurf->u.tex.last_layer - psurf->u.tex.first_layer > 0) - return false; - return true; -} - -static inline bool scissor_states_equal(const struct pipe_scissor_state *a, const struct pipe_scissor_state *b) { return a->minx == b->minx && a->miny == b->miny && a->maxx == b->maxx && a->maxy == b->maxy; @@ -70,15 +53,17 @@ clear_in_rp(struct pipe_context *pctx, struct zink_context *ctx = zink_context(pctx); struct pipe_framebuffer_state *fb = &ctx->fb_state; + zink_flush_dgc_if_enabled(ctx); + VkClearAttachment attachments[1 + PIPE_MAX_COLOR_BUFS]; int num_attachments = 0; if (buffers & PIPE_CLEAR_COLOR) { VkClearColorValue color; - color.float32[0] = pcolor->f[0]; - color.float32[1] = pcolor->f[1]; - color.float32[2] = pcolor->f[2]; - color.float32[3] = pcolor->f[3]; + color.uint32[0] = pcolor->ui[0]; + color.uint32[1] = pcolor->ui[1]; + color.uint32[2] = pcolor->ui[2]; + color.uint32[3] = pcolor->ui[3]; for (unsigned i = 0; i < fb->nr_cbufs; i++) { if (!(buffers & (PIPE_CLEAR_COLOR0 << i)) || !fb->cbufs[i]) @@ -98,6 +83,8 @@ clear_in_rp(struct pipe_context *pctx, if (buffers & PIPE_CLEAR_STENCIL) aspect |= VK_IMAGE_ASPECT_STENCIL_BIT; + assert(zink_is_zsbuf_used(ctx)); + attachments[num_attachments].aspectMask = aspect; attachments[num_attachments].clearValue.depthStencil.depth = depth; attachments[num_attachments].clearValue.depthStencil.stencil = stencil; @@ -106,10 +93,13 @@ clear_in_rp(struct pipe_context *pctx, VkClearRect cr = {0}; if (scissor_state) { + /* invalid clear */ + if (scissor_state->minx > ctx->fb_state.width || scissor_state->miny > ctx->fb_state.height) + return; cr.rect.offset.x = scissor_state->minx; cr.rect.offset.y = scissor_state->miny; - cr.rect.extent.width = MIN2(fb->width, scissor_state->maxx - scissor_state->minx); - cr.rect.extent.height = MIN2(fb->height, scissor_state->maxy - scissor_state->miny); + cr.rect.extent.width = MIN2(fb->width - cr.rect.offset.x, scissor_state->maxx - scissor_state->minx); + cr.rect.extent.height = MIN2(fb->height - cr.rect.offset.y, scissor_state->maxy - scissor_state->miny); } else { cr.rect.extent.width = fb->width; cr.rect.extent.height = fb->height; @@ -117,75 +107,42 @@ clear_in_rp(struct pipe_context *pctx, cr.baseArrayLayer = 0; cr.layerCount = util_framebuffer_get_num_layers(fb); struct zink_batch *batch = &ctx->batch; - zink_batch_rp(ctx); + assert(batch->in_rp); VKCTX(CmdClearAttachments)(batch->state->cmdbuf, num_attachments, attachments, 1, &cr); + /* + Rendering within a subpass containing a feedback loop creates a data race, except in the following + cases: + • If a memory dependency is inserted between when the attachment is written and when it is + subsequently read by later fragments. Pipeline barriers expressing a subpass self-dependency + are the only way to achieve this, and one must be inserted every time a fragment will read + values at a particular sample (x, y, layer, sample) coordinate, if those values have been written + since the most recent pipeline barrier + + VK 1.3.211, Chapter 8: Render Pass + */ + if (ctx->fbfetch_outputs) + ctx->base.texture_barrier(&ctx->base, PIPE_TEXTURE_BARRIER_FRAMEBUFFER); } -static void -clear_color_no_rp(struct zink_context *ctx, struct zink_resource *res, const union pipe_color_union *pcolor, unsigned level, unsigned layer, unsigned layerCount) -{ - struct zink_batch *batch = &ctx->batch; - zink_batch_no_rp(ctx); - VkImageSubresourceRange range = {0}; - range.baseMipLevel = level; - range.levelCount = 1; - range.baseArrayLayer = layer; - range.layerCount = layerCount; - range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - - VkClearColorValue color; - color.float32[0] = pcolor->f[0]; - color.float32[1] = pcolor->f[1]; - color.float32[2] = pcolor->f[2]; - color.float32[3] = pcolor->f[3]; - - if (zink_resource_image_needs_barrier(res, VK_IMAGE_LAYOUT_GENERAL, 0, 0) && - zink_resource_image_needs_barrier(res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, 0)) - zink_resource_image_barrier(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, 0); - zink_batch_reference_resource_rw(batch, res, true); - VKCTX(CmdClearColorImage)(batch->state->cmdbuf, res->obj->image, res->layout, &color, 1, &range); -} - -static void -clear_zs_no_rp(struct zink_context *ctx, struct zink_resource *res, VkImageAspectFlags aspects, double depth, unsigned stencil, unsigned level, unsigned layer, unsigned layerCount) +static struct zink_framebuffer_clear_data * +add_new_clear(struct zink_framebuffer_clear *fb_clear) { - struct zink_batch *batch = &ctx->batch; - zink_batch_no_rp(ctx); - VkImageSubresourceRange range = {0}; - range.baseMipLevel = level; - range.levelCount = 1; - range.baseArrayLayer = layer; - range.layerCount = layerCount; - range.aspectMask = aspects; - - VkClearDepthStencilValue zs_value = {depth, stencil}; - - if (zink_resource_image_needs_barrier(res, VK_IMAGE_LAYOUT_GENERAL, 0, 0) && - zink_resource_image_needs_barrier(res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, 0)) - zink_resource_image_barrier(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, 0); - zink_batch_reference_resource_rw(batch, res, true); - VKCTX(CmdClearDepthStencilImage)(batch->state->cmdbuf, res->obj->image, res->layout, &zs_value, 1, &range); + struct zink_framebuffer_clear_data cd = {0}; + util_dynarray_append(&fb_clear->clears, struct zink_framebuffer_clear_data, cd); + return zink_fb_clear_element(fb_clear, zink_fb_clear_count(fb_clear) - 1); } - - static struct zink_framebuffer_clear_data * get_clear_data(struct zink_context *ctx, struct zink_framebuffer_clear *fb_clear, const struct pipe_scissor_state *scissor_state) { - struct zink_framebuffer_clear_data *clear = NULL; unsigned num_clears = zink_fb_clear_count(fb_clear); if (num_clears) { struct zink_framebuffer_clear_data *last_clear = zink_fb_clear_element(fb_clear, num_clears - 1); /* if we're completely overwriting the previous clear, merge this into the previous clear */ if (!scissor_state || (last_clear->has_scissor && scissor_states_equal(&last_clear->scissor, scissor_state))) - clear = last_clear; - } - if (!clear) { - struct zink_framebuffer_clear_data cd = {0}; - util_dynarray_append(&fb_clear->clears, struct zink_framebuffer_clear_data, cd); - clear = zink_fb_clear_element(fb_clear, zink_fb_clear_count(fb_clear) - 1); + return last_clear; } - return clear; + return add_new_clear(fb_clear); } void @@ -196,22 +153,98 @@ zink_clear(struct pipe_context *pctx, double depth, unsigned stencil) { struct zink_context *ctx = zink_context(pctx); + struct zink_screen *screen = zink_screen(pctx->screen); struct pipe_framebuffer_state *fb = &ctx->fb_state; struct zink_batch *batch = &ctx->batch; bool needs_rp = false; - if (unlikely(!zink_screen(pctx->screen)->info.have_EXT_conditional_rendering && !zink_check_conditional_render(ctx))) - return; - if (scissor_state) { struct u_rect scissor = {scissor_state->minx, scissor_state->maxx, scissor_state->miny, scissor_state->maxy}; needs_rp = !zink_blit_region_fills(scissor, fb->width, fb->height); } + if (unlikely(ctx->fb_layer_mismatch)) { + /* this is a terrible scenario: + * at least one attachment has a layerCount greater than the others, + * so iterate over all the mismatched attachments and pre-clear them separately, + * then continue to flag them as need (additional) clearing + * to avoid loadOp=LOAD + */ + unsigned x = 0; + unsigned y = 0; + unsigned w = ctx->fb_state.width; + unsigned h = ctx->fb_state.height; + if (scissor_state) { + x = scissor_state->minx; + y = scissor_state->miny; + w = scissor_state->minx + scissor_state->maxx; + h = scissor_state->miny + scissor_state->maxy; + } + unsigned clear_buffers = buffers >> 2; + for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) { + if (ctx->fb_state.cbufs[i] && + (ctx->fb_layer_mismatch & clear_buffers & BITFIELD_BIT(i))) { + if (ctx->void_clears & (PIPE_CLEAR_COLOR0 << i)) { + union pipe_color_union color; + color.f[0] = color.f[1] = color.f[2] = 0; + color.f[3] = 1.0; + pctx->clear_render_target(pctx, ctx->fb_state.cbufs[i], &color, + 0, 0, + ctx->fb_state.cbufs[i]->width, ctx->fb_state.cbufs[i]->height, + ctx->render_condition_active); + } + pctx->clear_render_target(pctx, ctx->fb_state.cbufs[i], pcolor, + x, y, w, h, ctx->render_condition_active); + } + } + if (ctx->fb_state.zsbuf && (buffers & PIPE_CLEAR_DEPTHSTENCIL)) + pctx->clear_depth_stencil(pctx, ctx->fb_state.zsbuf, buffers & PIPE_CLEAR_DEPTHSTENCIL, depth, stencil, + x, y, w, h, ctx->render_condition_active); + } if (batch->in_rp) { - clear_in_rp(pctx, buffers, scissor_state, pcolor, depth, stencil); - return; + if (buffers & PIPE_CLEAR_DEPTHSTENCIL && (ctx->zsbuf_unused || ctx->zsbuf_readonly)) { + /* this will need a layout change */ + assert(!ctx->track_renderpasses); + zink_batch_no_rp(ctx); + } else { + clear_in_rp(pctx, buffers, scissor_state, pcolor, depth, stencil); + return; + } + } + + unsigned rp_clears_enabled = ctx->rp_clears_enabled; + + if (ctx->void_clears & buffers) { + unsigned void_clears = ctx->void_clears & buffers; + ctx->void_clears &= ~buffers; + union pipe_color_union color; + color.f[0] = color.f[1] = color.f[2] = 0; + color.f[3] = 1.0; + for (unsigned i = 0; i < fb->nr_cbufs; i++) { + if ((void_clears & (PIPE_CLEAR_COLOR0 << i)) && fb->cbufs[i]) { + struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[i]; + unsigned num_clears = zink_fb_clear_count(fb_clear); + if (num_clears) { + if (zink_fb_clear_first_needs_explicit(fb_clear)) { + /* a scissored clear exists: + * - extend the clear array + * - shift existing clears back by one position + * - inject void clear base of array + */ + add_new_clear(fb_clear); + struct zink_framebuffer_clear_data *clear = fb_clear->clears.data; + memmove(clear + 1, clear, num_clears); + memcpy(&clear->color, &color, sizeof(color)); + } else { + /* no void clear needed */ + } + void_clears &= ~(PIPE_CLEAR_COLOR0 << i); + } + } + } + if (void_clears) + pctx->clear(pctx, void_clears, NULL, &color, 0, 0); } if (buffers & PIPE_CLEAR_COLOR) { @@ -224,11 +257,10 @@ zink_clear(struct pipe_context *pctx, ctx->clears_enabled |= PIPE_CLEAR_COLOR0 << i; clear->conditional = ctx->render_condition_active; clear->has_scissor = needs_rp; + memcpy(&clear->color, pcolor, sizeof(union pipe_color_union)); + zink_convert_color(screen, psurf->format, &clear->color, pcolor); if (scissor_state && needs_rp) clear->scissor = *scissor_state; - clear->color.color = *pcolor; - clear->color.srgb = psurf->format != psurf->texture->format && - !util_format_is_srgb(psurf->format) && util_format_is_srgb(psurf->texture->format); if (zink_fb_clear_first_needs_explicit(fb_clear)) ctx->rp_clears_enabled &= ~(PIPE_CLEAR_COLOR0 << i); else @@ -250,11 +282,18 @@ zink_clear(struct pipe_context *pctx, if (buffers & PIPE_CLEAR_STENCIL) clear->zs.stencil = stencil; clear->zs.bits |= (buffers & PIPE_CLEAR_DEPTHSTENCIL); - if (zink_fb_clear_first_needs_explicit(fb_clear)) + if (zink_fb_clear_first_needs_explicit(fb_clear)) { ctx->rp_clears_enabled &= ~PIPE_CLEAR_DEPTHSTENCIL; - else + if (!ctx->track_renderpasses) + ctx->dynamic_fb.tc_info.zsbuf_clear_partial = true; + } else { ctx->rp_clears_enabled |= (buffers & PIPE_CLEAR_DEPTHSTENCIL); + if (!ctx->track_renderpasses) + ctx->dynamic_fb.tc_info.zsbuf_clear = true; + } } + assert(!ctx->batch.in_rp); + ctx->rp_changed |= ctx->rp_clears_enabled != rp_clears_enabled; } static inline bool @@ -297,7 +336,7 @@ zink_clear_framebuffer(struct zink_context *ctx, unsigned clear_buffers) goto out; /* colors don't match, fire this one off */ - if (!colors_equal(&a->color.color, &b->color.color)) + if (!colors_equal(&a->color, &b->color)) goto out; } } else { @@ -341,7 +380,7 @@ out: } zink_clear(&ctx->base, clear_bits, clear->has_scissor ? &clear->scissor : NULL, - &clear->color.color, + &clear->color, zsclear ? zsclear->zs.depth : 0, zsclear ? zsclear->zs.stencil : 0); } @@ -358,8 +397,10 @@ out: } to_clear = 0; } - for (int i = 0; i < ARRAY_SIZE(ctx->fb_clears); i++) - zink_fb_clear_reset(ctx, i); + if (ctx->clears_enabled & PIPE_CLEAR_DEPTHSTENCIL) + zink_fb_clear_reset(ctx, PIPE_MAX_COLOR_BUFS); + u_foreach_bit(i, ctx->clears_enabled >> 2) + zink_fb_clear_reset(ctx, i); } static struct pipe_surface * @@ -374,6 +415,101 @@ create_clear_surface(struct pipe_context *pctx, struct pipe_resource *pres, unsi return pctx->create_surface(pctx, pres, &tmpl); } +static void +set_clear_fb(struct pipe_context *pctx, struct pipe_surface *psurf, struct pipe_surface *zsurf) +{ + struct pipe_framebuffer_state fb_state = {0}; + fb_state.width = psurf ? psurf->width : zsurf->width; + fb_state.height = psurf ? psurf->height : zsurf->height; + fb_state.nr_cbufs = !!psurf; + fb_state.cbufs[0] = psurf; + fb_state.zsbuf = zsurf; + pctx->set_framebuffer_state(pctx, &fb_state); +} + +void +zink_clear_texture_dynamic(struct pipe_context *pctx, + struct pipe_resource *pres, + unsigned level, + const struct pipe_box *box, + const void *data) +{ + struct zink_context *ctx = zink_context(pctx); + struct zink_screen *screen = zink_screen(pctx->screen); + struct zink_resource *res = zink_resource(pres); + + bool full_clear = 0 <= box->x && u_minify(pres->width0, level) >= box->x + box->width && + 0 <= box->y && u_minify(pres->height0, level) >= box->y + box->height && + 0 <= box->z && u_minify(pres->target == PIPE_TEXTURE_3D ? pres->depth0 : pres->array_size, level) >= box->z + box->depth; + + struct pipe_surface *surf = create_clear_surface(pctx, pres, level, box); + + VkRenderingAttachmentInfo att = {0}; + att.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO; + att.imageView = zink_csurface(surf)->image_view; + att.imageLayout = res->aspect & VK_IMAGE_ASPECT_COLOR_BIT ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + att.loadOp = full_clear ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD; + att.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + + VkRenderingInfo info = {0}; + info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO; + info.renderArea.offset.x = box->x; + info.renderArea.offset.y = box->y; + info.renderArea.extent.width = box->width; + info.renderArea.extent.height = box->height; + info.layerCount = MAX2(box->depth, 1); + + union pipe_color_union color, tmp; + float depth = 0.0; + uint8_t stencil = 0; + if (res->aspect & VK_IMAGE_ASPECT_COLOR_BIT) { + util_format_unpack_rgba(pres->format, tmp.ui, data, 1); + zink_convert_color(screen, surf->format, &color, &tmp); + } else { + if (res->aspect & VK_IMAGE_ASPECT_DEPTH_BIT) + util_format_unpack_z_float(pres->format, &depth, data, 1); + + if (res->aspect & VK_IMAGE_ASPECT_STENCIL_BIT) + util_format_unpack_s_8uint(pres->format, &stencil, data, 1); + } + + zink_blit_barriers(ctx, NULL, res, full_clear); + VkCommandBuffer cmdbuf = zink_get_cmdbuf(ctx, NULL, res); + if (cmdbuf == ctx->batch.state->cmdbuf && ctx->batch.in_rp) + zink_batch_no_rp(ctx); + + if (res->aspect & VK_IMAGE_ASPECT_COLOR_BIT) { + memcpy(&att.clearValue, &color, sizeof(float) * 4); + info.colorAttachmentCount = 1; + info.pColorAttachments = &att; + } else { + att.clearValue.depthStencil.depth = depth; + att.clearValue.depthStencil.stencil = stencil; + if (res->aspect & VK_IMAGE_ASPECT_DEPTH_BIT) + info.pDepthAttachment = &att; + if (res->aspect & VK_IMAGE_ASPECT_STENCIL_BIT) + info.pStencilAttachment = &att; + } + VKCTX(CmdBeginRendering)(cmdbuf, &info); + if (!full_clear) { + VkClearRect rect; + rect.rect = info.renderArea; + rect.baseArrayLayer = box->z; + rect.layerCount = box->depth; + + VkClearAttachment clear_att; + clear_att.aspectMask = res->aspect; + clear_att.colorAttachment = 0; + clear_att.clearValue = att.clearValue; + + VKCTX(CmdClearAttachments)(cmdbuf, 1, &clear_att, 1, &rect); + } + VKCTX(CmdEndRendering)(cmdbuf); + zink_batch_reference_resource_rw(&ctx->batch, res, true); + /* this will never destroy the surface */ + pipe_surface_reference(&surf, NULL); +} + void zink_clear_texture(struct pipe_context *pctx, struct pipe_resource *pres, @@ -383,27 +519,24 @@ zink_clear_texture(struct pipe_context *pctx, { struct zink_context *ctx = zink_context(pctx); struct zink_resource *res = zink_resource(pres); - struct pipe_screen *pscreen = pctx->screen; - struct u_rect region = zink_rect_from_box(box); - bool needs_rp = !zink_blit_region_fills(region, pres->width0, pres->height0) || ctx->render_condition_active; struct pipe_surface *surf = NULL; + struct pipe_scissor_state scissor = {box->x, box->y, box->x + box->width, box->y + box->height}; if (res->aspect & VK_IMAGE_ASPECT_COLOR_BIT) { union pipe_color_union color; util_format_unpack_rgba(pres->format, color.ui, data, 1); - if (pscreen->is_format_supported(pscreen, pres->format, pres->target, 0, 0, - PIPE_BIND_RENDER_TARGET) && !needs_rp) { - zink_batch_no_rp(ctx); - clear_color_no_rp(ctx, res, &color, level, box->z, box->depth); - } else { - surf = create_clear_surface(pctx, pres, level, box); - zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS); - util_blitter_clear_render_target(ctx->blitter, surf, &color, box->x, box->y, box->width, box->height); - } - if (res->base.b.target == PIPE_BUFFER) - util_range_add(&res->base.b, &res->valid_buffer_range, box->x, box->x + box->width); + surf = create_clear_surface(pctx, pres, level, box); + util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state); + set_clear_fb(pctx, surf, NULL); + zink_blit_barriers(ctx, NULL, res, false); + ctx->blitting = true; + ctx->queries_disabled = true; + pctx->clear(pctx, PIPE_CLEAR_COLOR0, &scissor, &color, 0, 0); + util_blitter_restore_fb_state(ctx->blitter); + ctx->queries_disabled = false; + ctx->blitting = false; } else { float depth = 0.0; uint8_t stencil = 0; @@ -414,19 +547,21 @@ zink_clear_texture(struct pipe_context *pctx, if (res->aspect & VK_IMAGE_ASPECT_STENCIL_BIT) util_format_unpack_s_8uint(pres->format, &stencil, data, 1); - if (!needs_rp) { - zink_batch_no_rp(ctx); - clear_zs_no_rp(ctx, res, res->aspect, depth, stencil, level, box->z, box->depth); - } else { - unsigned flags = 0; - if (res->aspect & VK_IMAGE_ASPECT_DEPTH_BIT) - flags |= PIPE_CLEAR_DEPTH; - if (res->aspect & VK_IMAGE_ASPECT_STENCIL_BIT) - flags |= PIPE_CLEAR_STENCIL; - surf = create_clear_surface(pctx, pres, level, box); - zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS); - util_blitter_clear_depth_stencil(ctx->blitter, surf, flags, depth, stencil, box->x, box->y, box->width, box->height); - } + unsigned flags = 0; + if (res->aspect & VK_IMAGE_ASPECT_DEPTH_BIT) + flags |= PIPE_CLEAR_DEPTH; + if (res->aspect & VK_IMAGE_ASPECT_STENCIL_BIT) + flags |= PIPE_CLEAR_STENCIL; + surf = create_clear_surface(pctx, pres, level, box); + util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state); + zink_blit_barriers(ctx, NULL, res, false); + ctx->blitting = true; + set_clear_fb(pctx, NULL, surf); + ctx->queries_disabled = true; + pctx->clear(pctx, flags, &scissor, NULL, depth, stencil); + util_blitter_restore_fb_state(ctx->blitter); + ctx->queries_disabled = false; + ctx->blitting = false; } /* this will never destroy the surface */ pipe_surface_reference(&surf, NULL); @@ -454,11 +589,10 @@ zink_clear_buffer(struct pipe_context *pctx, - size is the number of bytes to fill, and must be either a multiple of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of the buffer */ - struct zink_batch *batch = &ctx->batch; - zink_batch_no_rp(ctx); - zink_batch_reference_resource_rw(batch, res, true); - util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + size); - VKCTX(CmdFillBuffer)(batch->state->cmdbuf, res->obj->buffer, offset, size, *(uint32_t*)clear_value); + zink_resource_buffer_transfer_dst_barrier(ctx, res, offset, size); + VkCommandBuffer cmdbuf = zink_get_cmdbuf(ctx, NULL, res); + zink_batch_reference_resource_rw(&ctx->batch, res, true); + VKCTX(CmdFillBuffer)(cmdbuf, res->obj->buffer, offset, size, *(uint32_t*)clear_value); return; } struct pipe_transfer *xfer; @@ -484,10 +618,23 @@ zink_clear_render_target(struct pipe_context *pctx, struct pipe_surface *dst, bool render_condition_enabled) { struct zink_context *ctx = zink_context(pctx); - zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | (render_condition_enabled ? 0 : ZINK_BLIT_NO_COND_RENDER)); - util_blitter_clear_render_target(ctx->blitter, dst, color, dstx, dsty, width, height); - if (!render_condition_enabled && ctx->render_condition_active) + zink_flush_dgc_if_enabled(ctx); + bool render_condition_active = ctx->render_condition_active; + if (!render_condition_enabled && render_condition_active) { + zink_stop_conditional_render(ctx); + ctx->render_condition_active = false; + } + util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state); + set_clear_fb(pctx, dst, NULL); + struct pipe_scissor_state scissor = {dstx, dsty, dstx + width, dsty + height}; + zink_blit_barriers(ctx, NULL, zink_resource(dst->texture), false); + ctx->blitting = true; + pctx->clear(pctx, PIPE_CLEAR_COLOR0, &scissor, color, 0, 0); + util_blitter_restore_fb_state(ctx->blitter); + ctx->blitting = false; + if (!render_condition_enabled && render_condition_active) zink_start_conditional_render(ctx); + ctx->render_condition_active = render_condition_active; } void @@ -497,10 +644,36 @@ zink_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *dst, bool render_condition_enabled) { struct zink_context *ctx = zink_context(pctx); - zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | (render_condition_enabled ? 0 : ZINK_BLIT_NO_COND_RENDER)); - util_blitter_clear_depth_stencil(ctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); - if (!render_condition_enabled && ctx->render_condition_active) + /* check for stencil fallback */ + bool blitting = ctx->blitting; + zink_flush_dgc_if_enabled(ctx); + bool render_condition_active = ctx->render_condition_active; + if (!render_condition_enabled && render_condition_active) { + zink_stop_conditional_render(ctx); + ctx->render_condition_active = false; + } + bool cur_attachment = zink_csurface(ctx->fb_state.zsbuf) == zink_csurface(dst); + if (dstx > ctx->fb_state.width || dsty > ctx->fb_state.height || + dstx + width > ctx->fb_state.width || + dsty + height > ctx->fb_state.height) + cur_attachment = false; + if (!cur_attachment) { + if (!blitting) { + util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state); + set_clear_fb(pctx, NULL, dst); + zink_blit_barriers(ctx, NULL, zink_resource(dst->texture), false); + ctx->blitting = true; + } + } + struct pipe_scissor_state scissor = {dstx, dsty, dstx + width, dsty + height}; + pctx->clear(pctx, clear_flags, &scissor, NULL, depth, stencil); + if (!cur_attachment && !blitting) { + util_blitter_restore_fb_state(ctx->blitter); + ctx->blitting = false; + } + if (!render_condition_enabled && render_condition_active) zink_start_conditional_render(ctx); + ctx->render_condition_active = render_condition_active; } bool @@ -519,65 +692,43 @@ zink_fb_clear_first_needs_explicit(struct zink_framebuffer_clear *fb_clear) return zink_fb_clear_element_needs_explicit(zink_fb_clear_element(fb_clear, 0)); } -void -zink_fb_clear_util_unpack_clear_color(struct zink_framebuffer_clear_data *clear, enum pipe_format format, union pipe_color_union *color) -{ - const struct util_format_description *desc = util_format_description(format); - if (clear->color.srgb) { - /* if SRGB mode is disabled for the fb with a backing srgb image then we have to - * convert this to srgb color - */ - for (unsigned j = 0; j < MIN2(3, desc->nr_channels); j++) { - assert(desc->channel[j].normalized); - color->f[j] = util_format_srgb_to_linear_float(clear->color.color.f[j]); - } - color->f[3] = clear->color.color.f[3]; - } else { - for (unsigned i = 0; i < 4; i++) - color->f[i] = clear->color.color.f[i]; - } -} - static void fb_clears_apply_internal(struct zink_context *ctx, struct pipe_resource *pres, int i) { - struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[i]; - if (!zink_fb_clear_enabled(ctx, i)) return; if (ctx->batch.in_rp) zink_clear_framebuffer(ctx, BITFIELD_BIT(i)); - else if (zink_resource(pres)->aspect == VK_IMAGE_ASPECT_COLOR_BIT) { - if (zink_fb_clear_needs_explicit(fb_clear) || !check_3d_layers(ctx->fb_state.cbufs[i])) - /* this will automatically trigger all the clears */ - zink_batch_rp(ctx); - else { - struct pipe_surface *psurf = ctx->fb_state.cbufs[i]; - struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0); - union pipe_color_union color; - zink_fb_clear_util_unpack_clear_color(clear, psurf->format, &color); - - clear_color_no_rp(ctx, zink_resource(pres), &color, - psurf->u.tex.level, psurf->u.tex.first_layer, - psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1); + else { + struct zink_resource *res = zink_resource(pres); + bool queries_disabled = ctx->queries_disabled; + VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf; + /* slightly different than the u_blitter handling: + * this can be called recursively while unordered_blitting=true + */ + bool can_reorder = zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering && + !ctx->render_condition_active && + !ctx->unordered_blitting && + zink_get_cmdbuf(ctx, NULL, res) == ctx->batch.state->reordered_cmdbuf; + if (can_reorder) { + /* set unordered_blitting but NOT blitting: + * let begin_rendering handle layouts + */ + ctx->unordered_blitting = true; + /* for unordered clears, swap the unordered cmdbuf for the main one for the whole op to avoid conditional hell */ + ctx->batch.state->cmdbuf = ctx->batch.state->reordered_cmdbuf; + ctx->rp_changed = true; + ctx->queries_disabled = true; + ctx->batch.state->has_barriers = true; } - zink_fb_clear_reset(ctx, i); - return; - } else { - if (zink_fb_clear_needs_explicit(fb_clear) || !check_3d_layers(ctx->fb_state.zsbuf)) - /* this will automatically trigger all the clears */ - zink_batch_rp(ctx); - else { - struct pipe_surface *psurf = ctx->fb_state.zsbuf; - struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0); - VkImageAspectFlags aspects = 0; - if (clear->zs.bits & PIPE_CLEAR_DEPTH) - aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; - if (clear->zs.bits & PIPE_CLEAR_STENCIL) - aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - clear_zs_no_rp(ctx, zink_resource(pres), aspects, clear->zs.depth, clear->zs.stencil, - psurf->u.tex.level, psurf->u.tex.first_layer, - psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1); + /* this will automatically trigger all the clears */ + zink_batch_rp(ctx); + if (can_reorder) { + zink_batch_no_rp(ctx); + ctx->unordered_blitting = false; + ctx->rp_changed = true; + ctx->queries_disabled = queries_disabled; + ctx->batch.state->cmdbuf = cmdbuf; } } zink_fb_clear_reset(ctx, i); @@ -586,6 +737,7 @@ fb_clears_apply_internal(struct zink_context *ctx, struct pipe_resource *pres, i void zink_fb_clear_reset(struct zink_context *ctx, unsigned i) { + unsigned rp_clears_enabled = ctx->clears_enabled; util_dynarray_clear(&ctx->fb_clears[i].clears); if (i == PIPE_MAX_COLOR_BUFS) { ctx->clears_enabled &= ~PIPE_CLEAR_DEPTHSTENCIL; @@ -594,6 +746,8 @@ zink_fb_clear_reset(struct zink_context *ctx, unsigned i) ctx->clears_enabled &= ~(PIPE_CLEAR_COLOR0 << i); ctx->rp_clears_enabled &= ~(PIPE_CLEAR_COLOR0 << i); } + if (ctx->rp_clears_enabled != rp_clears_enabled) + ctx->rp_loadop_changed = true; } void @@ -603,7 +757,6 @@ zink_fb_clears_apply(struct zink_context *ctx, struct pipe_resource *pres) for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { if (ctx->fb_state.cbufs[i] && ctx->fb_state.cbufs[i]->texture == pres) { fb_clears_apply_internal(ctx, pres, i); - return; } } } else { @@ -621,7 +774,6 @@ zink_fb_clears_discard(struct zink_context *ctx, struct pipe_resource *pres) if (ctx->fb_state.cbufs[i] && ctx->fb_state.cbufs[i]->texture == pres) { if (zink_fb_clear_enabled(ctx, i)) { zink_fb_clear_reset(ctx, i); - return; } } } @@ -695,7 +847,6 @@ zink_fb_clears_apply_or_discard(struct zink_context *ctx, struct pipe_resource * for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { if (ctx->fb_state.cbufs[i] && ctx->fb_state.cbufs[i]->texture == pres) { fb_clears_apply_or_discard_internal(ctx, pres, region, discard_only, false, i); - return; } } } else { @@ -712,7 +863,6 @@ zink_fb_clears_apply_region(struct zink_context *ctx, struct pipe_resource *pres for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { if (ctx->fb_state.cbufs[i] && ctx->fb_state.cbufs[i]->texture == pres) { fb_clears_apply_or_discard_internal(ctx, pres, region, false, true, i); - return; } } } else { @@ -721,3 +871,32 @@ zink_fb_clears_apply_region(struct zink_context *ctx, struct pipe_resource *pres } } } + +void +zink_fb_clear_rewrite(struct zink_context *ctx, unsigned idx, enum pipe_format before, enum pipe_format after) +{ + /* if the values for the clear color are incompatible, they must be rewritten; + * this occurs if: + * - the formats' srgb-ness does not match + * - the formats' signedness does not match + */ + const struct util_format_description *bdesc = util_format_description(before); + const struct util_format_description *adesc = util_format_description(after); + int bfirst_non_void_chan = util_format_get_first_non_void_channel(before); + int afirst_non_void_chan = util_format_get_first_non_void_channel(after); + bool bsigned = false, asigned = false; + if (bfirst_non_void_chan > 0) + bsigned = bdesc->channel[bfirst_non_void_chan].type == UTIL_FORMAT_TYPE_SIGNED; + if (afirst_non_void_chan > 0) + asigned = adesc->channel[afirst_non_void_chan].type == UTIL_FORMAT_TYPE_SIGNED; + if (util_format_is_srgb(before) == util_format_is_srgb(after) && + bsigned == asigned) + return; + struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[idx]; + for (int j = 0; j < zink_fb_clear_count(fb_clear); j++) { + struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, j); + uint32_t data[4]; + util_format_pack_rgba(before, data, clear->color.ui, 1); + util_format_unpack_rgba(after, clear->color.ui, data, 1); + } +} diff --git a/src/gallium/drivers/zink/zink_clear.h b/src/gallium/drivers/zink/zink_clear.h index 5f6492a17a7..8df6c70aa42 100644 --- a/src/gallium/drivers/zink/zink_clear.h +++ b/src/gallium/drivers/zink/zink_clear.h @@ -24,34 +24,9 @@ * Mike Blumenkrantz <michael.blumenkrantz@gmail.com> */ -#include "util/u_dynarray.h" -#include "pipe/p_state.h" -#include <vulkan/vulkan.h> #include "util/u_rect.h" - -struct zink_context; -struct zink_resource; - -struct zink_framebuffer_clear_data { - union { - struct { - union pipe_color_union color; - bool srgb; - } color; - struct { - float depth; - unsigned stencil; - uint8_t bits : 2; // PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL - } zs; - }; - struct pipe_scissor_state scissor; - bool has_scissor; - bool conditional; -}; - -struct zink_framebuffer_clear { - struct util_dynarray clears; -}; +#include "zink_types.h" +#include "zink_screen.h" void zink_clear(struct pipe_context *pctx, @@ -66,6 +41,12 @@ zink_clear_texture(struct pipe_context *ctx, const struct pipe_box *box, const void *data); void +zink_clear_texture_dynamic(struct pipe_context *ctx, + struct pipe_resource *p_res, + unsigned level, + const struct pipe_box *box, + const void *data); +void zink_clear_buffer(struct pipe_context *pctx, struct pipe_resource *pres, unsigned offset, @@ -115,6 +96,13 @@ zink_fb_clear_element_needs_explicit(struct zink_framebuffer_clear_data *clear) return clear->has_scissor || clear->conditional; } +static inline bool +zink_fb_clear_full_exists(struct zink_context *ctx, unsigned clear_buffer) +{ + struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[clear_buffer]; + return zink_fb_clear_count(fb_clear) && !zink_fb_clear_first_needs_explicit(fb_clear); +} + void zink_clear_apply_conditionals(struct zink_context *ctx); @@ -131,4 +119,4 @@ void zink_fb_clears_apply_region(struct zink_context *ctx, struct pipe_resource *pres, struct u_rect region); void -zink_fb_clear_util_unpack_clear_color(struct zink_framebuffer_clear_data *clear, enum pipe_format format, union pipe_color_union *color); +zink_fb_clear_rewrite(struct zink_context *ctx, unsigned idx, enum pipe_format before, enum pipe_format after); diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index dd8c5311a1c..63cc33c3e4c 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -21,8 +21,10 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "nir_opcodes.h" #include "zink_context.h" #include "zink_compiler.h" +#include "zink_descriptors.h" #include "zink_program.h" #include "zink_screen.h" #include "nir_to_spirv/nir_to_spirv.h" @@ -30,313 +32,992 @@ #include "pipe/p_state.h" #include "nir.h" +#include "nir_xfb_info.h" +#include "nir/nir_draw_helpers.h" #include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_serialize.h" +#include "compiler/nir/nir_builtin_builder.h" #include "nir/tgsi_to_nir.h" #include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_from_mesa.h" #include "util/u_memory.h" +#include "compiler/spirv/nir_spirv.h" +#include "vk_util.h" + +bool +zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask); + + static void -create_vs_pushconst(nir_shader *nir) +copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src) { - nir_variable *vs_pushconst; - /* create compatible layout for the ntv push constant loader */ - struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2); - fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0); - fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed"); - fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed); - fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0); - fields[1].name = ralloc_asprintf(nir, "draw_id"); - fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id); - vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const, - glsl_struct_type(fields, 2, "struct", false), "vs_pushconst"); - vs_pushconst->data.location = INT_MAX; //doesn't really matter + assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type)); + if (glsl_type_is_struct_or_ifc(dst->type)) { + for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) { + copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i)); + } + } else if (glsl_type_is_array_or_matrix(dst->type)) { + unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type); + for (unsigned i = 0; i < count; i++) { + copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i)); + } + } else { + nir_def *load = nir_load_deref(b, src); + nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components)); + } +} + +static bool +is_clipcull_dist(int location) +{ + switch (location) { + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + case VARYING_SLOT_CULL_DIST0: + case VARYING_SLOT_CULL_DIST1: + return true; + default: break; + } + return false; } +#define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field) + static void -create_cs_pushconst(nir_shader *nir) +create_gfx_pushconst(nir_shader *nir) { - nir_variable *cs_pushconst; +#define PUSHCONST_MEMBER(member_idx, field) \ +fields[member_idx].type = \ + glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \ +fields[member_idx].name = ralloc_asprintf(nir, #field); \ +fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field); + + nir_variable *pushconst; /* create compatible layout for the ntv push constant loader */ - struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field)); - fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0); - fields[0].name = ralloc_asprintf(nir, "work_dim"); - fields[0].offset = 0; - cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const, - glsl_struct_type(fields, 1, "struct", false), "cs_pushconst"); - cs_pushconst->data.location = INT_MAX; //doesn't really matter + struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX); + PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed); + PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id); + PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered); + PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level); + PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level); + PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern); + PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale); + PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width); + + pushconst = nir_variable_create(nir, nir_var_mem_push_const, + glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false), + "gfx_pushconst"); + pushconst->data.location = INT_MAX; //doesn't really matter + +#undef PUSHCONST_MEMBER } static bool -reads_work_dim(nir_shader *shader) +lower_basevertex_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data) { - return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM); + if (instr->intrinsic != nir_intrinsic_load_base_vertex) + return false; + + b->cursor = nir_after_instr(&instr->instr); + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink); + load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED)); + load->num_components = 1; + nir_def_init(&load->instr, &load->def, 1, 32); + nir_builder_instr_insert(b, &load->instr); + + nir_def *composite = nir_build_alu(b, nir_op_bcsel, + nir_build_alu(b, nir_op_ieq, &load->def, nir_imm_int(b, 1), NULL, NULL), + &instr->def, + nir_imm_int(b, 0), + NULL); + + nir_def_rewrite_uses_after(&instr->def, composite, + composite->parent_instr); + return true; } static bool -lower_discard_if_instr(nir_builder *b, nir_instr *instr_, UNUSED void *cb_data) +lower_basevertex(nir_shader *shader) { - if (instr_->type != nir_instr_type_intrinsic) + if (shader->info.stage != MESA_SHADER_VERTEX) return false; - nir_intrinsic_instr *instr = nir_instr_as_intrinsic(instr_); + if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX)) + return false; + + return nir_shader_intrinsics_pass(shader, lower_basevertex_instr, + nir_metadata_dominance, NULL); +} - if (instr->intrinsic == nir_intrinsic_discard_if) { - b->cursor = nir_before_instr(&instr->instr); - nir_if *if_stmt = nir_push_if(b, nir_ssa_for_src(b, instr->src[0], 1)); - nir_discard(b); - nir_pop_if(b, if_stmt); - nir_instr_remove(&instr->instr); +static bool +lower_drawid_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data) +{ + if (instr->intrinsic != nir_intrinsic_load_draw_id) + return false; + + b->cursor = nir_before_instr(&instr->instr); + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink); + load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID)); + load->num_components = 1; + nir_def_init(&load->instr, &load->def, 1, 32); + nir_builder_instr_insert(b, &load->instr); + + nir_def_rewrite_uses(&instr->def, &load->def); + + return true; +} + +static bool +lower_drawid(nir_shader *shader) +{ + if (shader->info.stage != MESA_SHADER_VERTEX) + return false; + + if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID)) + return false; + + return nir_shader_intrinsics_pass(shader, lower_drawid_instr, + nir_metadata_dominance, NULL); +} + +struct lower_gl_point_state { + nir_variable *gl_pos_out; + nir_variable *gl_point_size; +}; + +static bool +lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data) +{ + struct lower_gl_point_state *state = data; + nir_def *vp_scale, *pos; + + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter && + intrin->intrinsic != nir_intrinsic_emit_vertex) + return false; + + if (nir_intrinsic_stream_id(intrin) != 0) + return false; + + if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter || + intrin->intrinsic == nir_intrinsic_end_primitive) { + nir_instr_remove(&intrin->instr); return true; } - /* a shader like this (shaders@glsl-fs-discard-04): - uniform int j, k; + b->cursor = nir_before_instr(instr); + + // viewport-map endpoints + nir_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE); + vp_scale = nir_load_push_constant_zink(b, 2, 32, vp_const_pos); + + // Load point info values + nir_def *point_size = nir_load_var(b, state->gl_point_size); + nir_def *point_pos = nir_load_var(b, state->gl_pos_out); + + // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w + nir_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0)); + w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3)); + // halt_w_delta = w_delta / 2 + nir_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5); + + // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w + nir_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1)); + h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3)); + // halt_h_delta = h_delta / 2 + nir_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5); + + nir_def *point_dir[4][2] = { + { nir_imm_float(b, -1), nir_imm_float(b, -1) }, + { nir_imm_float(b, -1), nir_imm_float(b, 1) }, + { nir_imm_float(b, 1), nir_imm_float(b, -1) }, + { nir_imm_float(b, 1), nir_imm_float(b, 1) } + }; - void main() - { - for (int i = 0; i < j; i++) { - if (i > k) - continue; - discard; - } - gl_FragColor = vec4(0.0, 1.0, 0.0, 0.0); - } + nir_def *point_pos_x = nir_channel(b, point_pos, 0); + nir_def *point_pos_y = nir_channel(b, point_pos, 1); + for (size_t i = 0; i < 4; i++) { + pos = nir_vec4(b, + nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x), + nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y), + nir_channel(b, point_pos, 2), + nir_channel(b, point_pos, 3)); + nir_store_var(b, state->gl_pos_out, pos, 0xf); - will generate nir like: + nir_emit_vertex(b); + } - loop { - //snip - if ssa_11 { - block block_5: - / preds: block_4 / - vec1 32 ssa_17 = iadd ssa_50, ssa_31 - / succs: block_7 / - } else { - block block_6: - / preds: block_4 / - intrinsic discard () () <-- not last instruction - vec1 32 ssa_23 = iadd ssa_50, ssa_31 <-- dead code loop itr increment - / succs: block_7 / - } - //snip - } + nir_end_primitive(b); - which means that we can't assert like this: + nir_instr_remove(&intrin->instr); - assert(instr->intrinsic != nir_intrinsic_discard || - nir_block_last_instr(instr->instr.block) == &instr->instr); + return true; +} +static bool +lower_gl_point_gs(nir_shader *shader) +{ + struct lower_gl_point_state state; - and it's unnecessary anyway since post-vtn optimizing will dce the instructions following the discard - */ + shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP; + shader->info.gs.vertices_out *= 4; + + // Gets the gl_Position in and out + state.gl_pos_out = + nir_find_variable_with_location(shader, nir_var_shader_out, + VARYING_SLOT_POS); + state.gl_point_size = + nir_find_variable_with_location(shader, nir_var_shader_out, + VARYING_SLOT_PSIZ); + + // if position in or gl_PointSize aren't written, we have nothing to do + if (!state.gl_pos_out || !state.gl_point_size) + return false; + + return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr, + nir_metadata_dominance, &state); +} + +struct lower_pv_mode_state { + nir_variable *varyings[VARYING_SLOT_MAX][4]; + nir_variable *pos_counter; + nir_variable *out_pos_counter; + nir_variable *ring_offset; + unsigned ring_size; + unsigned primitive_vert_count; + unsigned prim; +}; + +static nir_def* +lower_pv_mode_gs_ring_index(nir_builder *b, + struct lower_pv_mode_state *state, + nir_def *index) +{ + nir_def *ring_offset = nir_load_var(b, state->ring_offset); + return nir_imod_imm(b, nir_iadd(b, index, ring_offset), + state->ring_size); +} + +/* Given the final deref of chain of derefs this function will walk up the chain + * until it finds a var deref. + * + * It will then recreate an identical chain that ends with the provided deref. + */ +static nir_deref_instr* +replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new) +{ + nir_deref_instr *parent = nir_deref_instr_parent(old); + if (!parent) + return new; + switch(old->deref_type) { + case nir_deref_type_var: + return new; + case nir_deref_type_array: + return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa); + case nir_deref_type_struct: + return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index); + case nir_deref_type_array_wildcard: + case nir_deref_type_ptr_as_array: + case nir_deref_type_cast: + unreachable("unexpected deref type"); + } + unreachable("impossible deref type"); +} + +static bool +lower_pv_mode_gs_store(nir_builder *b, + nir_intrinsic_instr *intrin, + struct lower_pv_mode_state *state) +{ + b->cursor = nir_before_instr(&intrin->instr); + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (nir_deref_mode_is(deref, nir_var_shader_out)) { + nir_variable *var = nir_deref_instr_get_variable(deref); + + gl_varying_slot location = var->data.location; + unsigned location_frac = var->data.location_frac; + assert(state->varyings[location][location_frac]); + nir_def *pos_counter = nir_load_var(b, state->pos_counter); + nir_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter); + nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]); + nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index); + // recreate the chain of deref that lead to the store. + nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref); + nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin)); + nir_instr_remove(&intrin->instr); + return true; + } return false; } +static void +lower_pv_mode_emit_rotated_prim(nir_builder *b, + struct lower_pv_mode_state *state, + nir_def *current_vertex) +{ + nir_def *two = nir_imm_int(b, 2); + nir_def *three = nir_imm_int(b, 3); + bool is_triangle = state->primitive_vert_count == 3; + /* This shader will always see the last three vertices emitted by the user gs. + * The following table is used to to rotate primitives within a strip generated + * by the user gs such that the last vertex becomes the first. + * + * [lines, tris][even/odd index][vertex mod 3] + */ + static const unsigned vert_maps[2][2][3] = { + {{1, 0, 0}, {1, 0, 0}}, + {{2, 0, 1}, {2, 1, 0}} + }; + /* When the primive supplied to the gs comes from a strip, the last provoking vertex + * is either the last or the second, depending on whether the triangle is at an odd + * or even position within the strip. + * + * odd or even primitive within draw + */ + nir_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two); + for (unsigned i = 0; i < state->primitive_vert_count; i++) { + /* odd or even triangle within strip emitted by user GS + * this is handled using the table + */ + nir_def *odd_user_prim = nir_imod(b, current_vertex, two); + unsigned offset_even = vert_maps[is_triangle][0][i]; + unsigned offset_odd = vert_maps[is_triangle][1][i]; + nir_def *offset_even_value = nir_imm_int(b, offset_even); + nir_def *offset_odd_value = nir_imm_int(b, offset_odd); + nir_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim), + offset_odd_value, offset_even_value); + /* Here we account for how triangles are provided to the gs from a strip. + * For even primitives we rotate by 3, meaning we do nothing. + * For odd primitives we rotate by 2, combined with the previous rotation this + * means the second vertex becomes the last. + */ + if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP) + rotated_i = nir_imod(b, nir_iadd(b, rotated_i, + nir_isub(b, three, + odd_prim)), + three); + /* Triangles that come from fans are provided to the gs the same way as + * odd triangles from a strip so always rotate by 2. + */ + else if (state->prim == ZINK_PVE_PRIMITIVE_FAN) + rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2), + three); + rotated_i = nir_iadd(b, rotated_i, current_vertex); + nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) { + gl_varying_slot location = var->data.location; + unsigned location_frac = var->data.location_frac; + if (state->varyings[location][location_frac]) { + nir_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i); + nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index); + copy_vars(b, nir_build_deref_var(b, var), value); + } + } + nir_emit_vertex(b); + } +} + static bool -lower_discard_if(nir_shader *shader) +lower_pv_mode_gs_emit_vertex(nir_builder *b, + nir_intrinsic_instr *intrin, + struct lower_pv_mode_state *state) { - return nir_shader_instructions_pass(shader, - lower_discard_if_instr, - nir_metadata_dominance, - NULL); + b->cursor = nir_before_instr(&intrin->instr); + + // increment pos_counter + nir_def *pos_counter = nir_load_var(b, state->pos_counter); + nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1); + + nir_instr_remove(&intrin->instr); + return true; } static bool -lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data) +lower_pv_mode_gs_end_primitive(nir_builder *b, + nir_intrinsic_instr *intrin, + struct lower_pv_mode_state *state) { - if (in->type != nir_instr_type_intrinsic) - return false; - nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); - if (instr->intrinsic != nir_intrinsic_load_work_dim) - return false; + b->cursor = nir_before_instr(&intrin->instr); - if (instr->intrinsic == nir_intrinsic_load_work_dim) { - b->cursor = nir_after_instr(&instr->instr); - nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_intrinsic_set_range(load, 3 * sizeof(uint32_t)); - load->num_components = 1; - nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim"); - nir_builder_instr_insert(b, &load->instr); + nir_def *pos_counter = nir_load_var(b, state->pos_counter); + nir_push_loop(b); + { + nir_def *out_pos_counter = nir_load_var(b, state->out_pos_counter); + nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter), + nir_imm_int(b, state->primitive_vert_count))); + nir_jump(b, nir_jump_break); + nir_pop_if(b, NULL); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa); + lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter); + nir_end_primitive(b); + + nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1); } + nir_pop_loop(b, NULL); + /* Set the ring offset such that when position 0 is + * read we get the last value written + */ + nir_store_var(b, state->ring_offset, pos_counter, 1); + nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1); + nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1); + nir_instr_remove(&intrin->instr); return true; } static bool -lower_work_dim(nir_shader *shader) +lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data) { - if (shader->info.stage != MESA_SHADER_KERNEL) + if (instr->type != nir_instr_type_intrinsic) return false; - if (!reads_work_dim(shader)) + struct lower_pv_mode_state *state = data; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_store_deref: + return lower_pv_mode_gs_store(b, intrin, state); + case nir_intrinsic_copy_deref: + unreachable("should be lowered"); + case nir_intrinsic_emit_vertex_with_counter: + case nir_intrinsic_emit_vertex: + return lower_pv_mode_gs_emit_vertex(b, intrin, state); + case nir_intrinsic_end_primitive: + case nir_intrinsic_end_primitive_with_counter: + return lower_pv_mode_gs_end_primitive(b, intrin, state); + default: return false; + } +} + +static bool +lower_pv_mode_gs(nir_shader *shader, unsigned prim) +{ + nir_builder b; + struct lower_pv_mode_state state; + memset(state.varyings, 0, sizeof(state.varyings)); + + nir_function_impl *entry = nir_shader_get_entrypoint(shader); + b = nir_builder_at(nir_before_impl(entry)); + + state.primitive_vert_count = + mesa_vertices_per_prim(shader->info.gs.output_primitive); + state.ring_size = shader->info.gs.vertices_out; + + nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) { + gl_varying_slot location = var->data.location; + unsigned location_frac = var->data.location_frac; + + char name[100]; + snprintf(name, sizeof(name), "__tmp_primverts_%d_%d", location, location_frac); + state.varyings[location][location_frac] = + nir_local_variable_create(entry, + glsl_array_type(var->type, + state.ring_size, + false), + name); + } + + state.pos_counter = nir_local_variable_create(entry, + glsl_uint_type(), + "__pos_counter"); + + state.out_pos_counter = nir_local_variable_create(entry, + glsl_uint_type(), + "__out_pos_counter"); - return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL); + state.ring_offset = nir_local_variable_create(entry, + glsl_uint_type(), + "__ring_offset"); + + state.prim = prim; + + // initialize pos_counter and out_pos_counter + nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1); + nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1); + nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1); + + shader->info.gs.vertices_out = (shader->info.gs.vertices_out - + (state.primitive_vert_count - 1)) * + state.primitive_vert_count; + return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr, + nir_metadata_dominance, &state); +} + +struct lower_line_stipple_state { + nir_variable *pos_out; + nir_variable *stipple_out; + nir_variable *prev_pos; + nir_variable *pos_counter; + nir_variable *stipple_counter; + bool line_rectangular; +}; + +static nir_def * +viewport_map(nir_builder *b, nir_def *vert, + nir_def *scale) +{ + nir_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3)); + nir_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2), + w_recip); + return nir_fmul(b, ndc_point, scale); } static bool -lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data) +lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data) { - if (instr->type != nir_instr_type_deref) - return false; - nir_deref_instr *deref = nir_instr_as_deref(instr); - if (deref->deref_type != nir_deref_type_var) + struct lower_line_stipple_state *state = data; + if (instr->type != nir_instr_type_intrinsic) return false; - nir_variable *var = nir_deref_instr_get_variable(deref); - if (var->data.mode != nir_var_shader_in) + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter && + intrin->intrinsic != nir_intrinsic_emit_vertex) return false; - if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3) + + b->cursor = nir_before_instr(instr); + + nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0)); + // viewport-map endpoints + nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32, + nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE)); + nir_def *prev = nir_load_var(b, state->prev_pos); + nir_def *curr = nir_load_var(b, state->pos_out); + prev = viewport_map(b, prev, vp_scale); + curr = viewport_map(b, curr, vp_scale); + + // calculate length of line + nir_def *len; + if (state->line_rectangular) + len = nir_fast_distance(b, prev, curr); + else { + nir_def *diff = nir_fabs(b, nir_fsub(b, prev, curr)); + len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1)); + } + // update stipple_counter + nir_store_var(b, state->stipple_counter, + nir_fadd(b, nir_load_var(b, state->stipple_counter), + len), 1); + nir_pop_if(b, NULL); + // emit stipple out + nir_copy_var(b, state->stipple_out, state->stipple_counter); + nir_copy_var(b, state->prev_pos, state->pos_out); + + // update prev_pos and pos_counter for next vertex + b->cursor = nir_after_instr(instr); + nir_store_var(b, state->pos_counter, + nir_iadd_imm(b, nir_load_var(b, state->pos_counter), + 1), 1); + + return true; +} + +static bool +lower_line_stipple_gs(nir_shader *shader, bool line_rectangular) +{ + nir_builder b; + struct lower_line_stipple_state state; + + state.pos_out = + nir_find_variable_with_location(shader, nir_var_shader_out, + VARYING_SLOT_POS); + + // if position isn't written, we have nothing to do + if (!state.pos_out) return false; - /* create second variable for the split */ - nir_variable *var2 = nir_variable_clone(var, b->shader); - /* split new variable into second slot */ - var2->data.driver_location++; - nir_shader_add_variable(b->shader, var2); + state.stipple_out = nir_variable_create(shader, nir_var_shader_out, + glsl_float_type(), + "__stipple"); + state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE; + state.stipple_out->data.driver_location = shader->num_outputs++; + state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0); + shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location); + + // create temp variables + state.prev_pos = nir_variable_create(shader, nir_var_shader_temp, + glsl_vec4_type(), + "__prev_pos"); + state.pos_counter = nir_variable_create(shader, nir_var_shader_temp, + glsl_uint_type(), + "__pos_counter"); + state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp, + glsl_float_type(), + "__stipple_counter"); + + state.line_rectangular = line_rectangular; + // initialize pos_counter and stipple_counter + nir_function_impl *entry = nir_shader_get_entrypoint(shader); + b = nir_builder_at(nir_before_impl(entry)); + nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1); + nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1); + + return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr, + nir_metadata_dominance, &state); +} - unsigned total_num_components = glsl_get_vector_elements(var->type); - /* new variable is the second half of the dvec */ - var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2); - /* clamp original variable to a dvec2 */ - deref->type = var->type = glsl_vector_type(glsl_get_base_type(var->type), 2); +static bool +lower_line_stipple_fs(nir_shader *shader) +{ + nir_builder b; + nir_function_impl *entry = nir_shader_get_entrypoint(shader); + b = nir_builder_at(nir_after_impl(entry)); + + // create stipple counter + nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in, + glsl_float_type(), + "__stipple"); + stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE; + stipple->data.driver_location = shader->num_inputs++; + stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0); + shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location); + + nir_variable *sample_mask_out = + nir_find_variable_with_location(shader, nir_var_shader_out, + FRAG_RESULT_SAMPLE_MASK); + if (!sample_mask_out) { + sample_mask_out = nir_variable_create(shader, nir_var_shader_out, + glsl_uint_type(), "sample_mask"); + sample_mask_out->data.driver_location = shader->num_outputs++; + sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK; + } - /* create deref instr for new variable */ - b->cursor = nir_after_instr(instr); - nir_deref_instr *deref2 = nir_build_deref_var(b, var2); - - nir_foreach_use_safe(use_src, &deref->dest.ssa) { - nir_instr *use_instr = use_src->parent_instr; - assert(use_instr->type == nir_instr_type_intrinsic && - nir_instr_as_intrinsic(use_instr)->intrinsic == nir_intrinsic_load_deref); - - /* this is a load instruction for the deref, and we need to split it into two instructions that we can - * then zip back into a single ssa def */ - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr); - /* clamp the first load to 2 64bit components */ - intr->num_components = intr->dest.ssa.num_components = 2; - b->cursor = nir_after_instr(use_instr); - /* this is the second load instruction for the second half of the dvec3/4 components */ - nir_intrinsic_instr *intr2 = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref); - intr2->src[0] = nir_src_for_ssa(&deref2->dest.ssa); - intr2->num_components = total_num_components - 2; - nir_ssa_dest_init(&intr2->instr, &intr2->dest, intr2->num_components, 64, NULL); - nir_builder_instr_insert(b, &intr2->instr); - - nir_ssa_def *def[4]; - /* create a new dvec3/4 comprised of all the loaded components from both variables */ - def[0] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 0)); - def[1] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 1)); - def[2] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 0)); - if (total_num_components == 4) - def[3] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 1)); - nir_ssa_def *new_vec = nir_vec(b, def, total_num_components); - /* use the assembled dvec3/4 for all other uses of the load */ - nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec, - new_vec->parent_instr); + nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32, + nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN)); + nir_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16)); + pattern = nir_iand_imm(&b, pattern, 0xffff); + + nir_def *sample_mask_in = nir_load_sample_mask_in(&b); + nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL); + nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL); + nir_store_var(&b, v, sample_mask_in, 1); + nir_store_var(&b, sample_mask, sample_mask_in, 1); + nir_push_loop(&b); + { + nir_def *value = nir_load_var(&b, v); + nir_def *index = nir_ufind_msb(&b, value); + nir_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index); + nir_def *new_value = nir_ixor(&b, value, index_mask); + nir_store_var(&b, v, new_value, 1); + nir_push_if(&b, nir_ieq_imm(&b, value, 0)); + nir_jump(&b, nir_jump_break); + nir_pop_if(&b, NULL); + + nir_def *stipple_pos = + nir_interp_deref_at_sample(&b, 1, 32, + &nir_build_deref_var(&b, stipple)->def, index); + stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor), + nir_imm_float(&b, 16.0)); + stipple_pos = nir_f2i32(&b, stipple_pos); + nir_def *bit = + nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1); + nir_push_if(&b, nir_ieq_imm(&b, bit, 0)); + { + nir_def *sample_mask_value = nir_load_var(&b, sample_mask); + sample_mask_value = nir_ixor(&b, sample_mask_value, index_mask); + nir_store_var(&b, sample_mask, sample_mask_value, 1); + } + nir_pop_if(&b, NULL); } + nir_pop_loop(&b, NULL); + nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1); return true; } -/* "64-bit three- and four-component vectors consume two consecutive locations." - * - 14.1.4. Location Assignment - * - * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which - * are assigned to consecutive locations, loaded separately, and then assembled back into a - * composite value that's used in place of the original loaded ssa src - */ +struct lower_line_smooth_state { + nir_variable *pos_out; + nir_variable *line_coord_out; + nir_variable *prev_pos; + nir_variable *pos_counter; + nir_variable *prev_varyings[VARYING_SLOT_MAX][4], + *varyings[VARYING_SLOT_MAX][4]; // location_frac +}; + static bool -lower_64bit_vertex_attribs(nir_shader *shader) +lower_line_smooth_gs_store(nir_builder *b, + nir_intrinsic_instr *intrin, + struct lower_line_smooth_state *state) { - if (shader->info.stage != MESA_SHADER_VERTEX) - return false; + b->cursor = nir_before_instr(&intrin->instr); + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (nir_deref_mode_is(deref, nir_var_shader_out)) { + nir_variable *var = nir_deref_instr_get_variable(deref); + + // we take care of position elsewhere + gl_varying_slot location = var->data.location; + unsigned location_frac = var->data.location_frac; + if (location != VARYING_SLOT_POS) { + assert(state->varyings[location]); + nir_store_var(b, state->varyings[location][location_frac], + intrin->src[1].ssa, + nir_intrinsic_write_mask(intrin)); + nir_instr_remove(&intrin->instr); + return true; + } + } - return nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL); + return false; } static bool -lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data) +lower_line_smooth_gs_emit_vertex(nir_builder *b, + nir_intrinsic_instr *intrin, + struct lower_line_smooth_state *state) { - if (in->type != nir_instr_type_intrinsic) - return false; - nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); - if (instr->intrinsic != nir_intrinsic_load_base_vertex) - return false; + b->cursor = nir_before_instr(&intrin->instr); + + nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0)); + nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32, + nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE)); + nir_def *prev = nir_load_var(b, state->prev_pos); + nir_def *curr = nir_load_var(b, state->pos_out); + nir_def *prev_vp = viewport_map(b, prev, vp_scale); + nir_def *curr_vp = viewport_map(b, curr, vp_scale); + + nir_def *width = nir_load_push_constant_zink(b, 1, 32, + nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH)); + nir_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5); + + const unsigned yx[2] = { 1, 0 }; + nir_def *vec = nir_fsub(b, curr_vp, prev_vp); + nir_def *len = nir_fast_length(b, vec); + nir_def *dir = nir_normalize(b, vec); + nir_def *half_length = nir_fmul_imm(b, len, 0.5); + half_length = nir_fadd_imm(b, half_length, 0.5); + + nir_def *vp_scale_rcp = nir_frcp(b, vp_scale); + nir_def *tangent = + nir_fmul(b, + nir_fmul(b, + nir_swizzle(b, dir, yx, 2), + nir_imm_vec2(b, 1.0, -1.0)), + vp_scale_rcp); + tangent = nir_fmul(b, tangent, half_width); + tangent = nir_pad_vector_imm_int(b, tangent, 0, 4); + dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5); + + nir_def *line_offets[8] = { + nir_fadd(b, tangent, nir_fneg(b, dir)), + nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)), + tangent, + nir_fneg(b, tangent), + tangent, + nir_fneg(b, tangent), + nir_fadd(b, tangent, dir), + nir_fadd(b, nir_fneg(b, tangent), dir), + }; + nir_def *line_coord = + nir_vec4(b, half_width, half_width, half_length, half_length); + nir_def *line_coords[8] = { + nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, -1, 1)), + nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, -1, 1)), + nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)), + nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)), + nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)), + nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)), + nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 1, 1)), + nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 1, 1)), + }; - b->cursor = nir_after_instr(&instr->instr); - nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_intrinsic_set_range(load, 4); - load->num_components = 1; - nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed"); - nir_builder_instr_insert(b, &load->instr); + /* emit first end-cap, and start line */ + for (int i = 0; i < 4; ++i) { + nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) { + gl_varying_slot location = var->data.location; + unsigned location_frac = var->data.location_frac; + if (state->prev_varyings[location][location_frac]) + nir_copy_var(b, var, state->prev_varyings[location][location_frac]); + } + nir_store_var(b, state->pos_out, + nir_fadd(b, prev, nir_fmul(b, line_offets[i], + nir_channel(b, prev, 3))), 0xf); + nir_store_var(b, state->line_coord_out, line_coords[i], 0xf); + nir_emit_vertex(b); + } - nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel, - nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL), - &instr->dest.ssa, - nir_imm_int(b, 0), - NULL); + /* finish line and emit last end-cap */ + for (int i = 4; i < 8; ++i) { + nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) { + gl_varying_slot location = var->data.location; + unsigned location_frac = var->data.location_frac; + if (state->varyings[location][location_frac]) + nir_copy_var(b, var, state->varyings[location][location_frac]); + } + nir_store_var(b, state->pos_out, + nir_fadd(b, curr, nir_fmul(b, line_offets[i], + nir_channel(b, curr, 3))), 0xf); + nir_store_var(b, state->line_coord_out, line_coords[i], 0xf); + nir_emit_vertex(b); + } + nir_end_primitive(b); - nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite, - composite->parent_instr); + nir_pop_if(b, NULL); + + nir_copy_var(b, state->prev_pos, state->pos_out); + nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) { + gl_varying_slot location = var->data.location; + unsigned location_frac = var->data.location_frac; + if (state->varyings[location][location_frac]) + nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]); + } + + // update prev_pos and pos_counter for next vertex + b->cursor = nir_after_instr(&intrin->instr); + nir_store_var(b, state->pos_counter, + nir_iadd_imm(b, nir_load_var(b, state->pos_counter), + 1), 1); + + nir_instr_remove(&intrin->instr); return true; } static bool -lower_basevertex(nir_shader *shader) +lower_line_smooth_gs_end_primitive(nir_builder *b, + nir_intrinsic_instr *intrin, + struct lower_line_smooth_state *state) { - if (shader->info.stage != MESA_SHADER_VERTEX) - return false; + b->cursor = nir_before_instr(&intrin->instr); - if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX)) - return false; + // reset line counter + nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1); - return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL); + nir_instr_remove(&intrin->instr); + return true; } - static bool -lower_drawid_instr(nir_builder *b, nir_instr *in, void *data) +lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data) { - if (in->type != nir_instr_type_intrinsic) + if (instr->type != nir_instr_type_intrinsic) return false; - nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); - if (instr->intrinsic != nir_intrinsic_load_draw_id) + + struct lower_line_smooth_state *state = data; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_store_deref: + return lower_line_smooth_gs_store(b, intrin, state); + case nir_intrinsic_copy_deref: + unreachable("should be lowered"); + case nir_intrinsic_emit_vertex_with_counter: + case nir_intrinsic_emit_vertex: + return lower_line_smooth_gs_emit_vertex(b, intrin, state); + case nir_intrinsic_end_primitive: + case nir_intrinsic_end_primitive_with_counter: + return lower_line_smooth_gs_end_primitive(b, intrin, state); + default: return false; + } +} - b->cursor = nir_before_instr(&instr->instr); - nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1)); - nir_intrinsic_set_range(load, 4); - load->num_components = 1; - nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id"); - nir_builder_instr_insert(b, &load->instr); +static bool +lower_line_smooth_gs(nir_shader *shader) +{ + nir_builder b; + struct lower_line_smooth_state state; + + memset(state.varyings, 0, sizeof(state.varyings)); + memset(state.prev_varyings, 0, sizeof(state.prev_varyings)); + nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) { + gl_varying_slot location = var->data.location; + unsigned location_frac = var->data.location_frac; + if (location == VARYING_SLOT_POS) + continue; - nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa); + char name[100]; + snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac); + state.varyings[location][location_frac] = + nir_variable_create(shader, nir_var_shader_temp, + var->type, name); - return true; + snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac); + state.prev_varyings[location][location_frac] = + nir_variable_create(shader, nir_var_shader_temp, + var->type, name); + } + + state.pos_out = + nir_find_variable_with_location(shader, nir_var_shader_out, + VARYING_SLOT_POS); + + // if position isn't written, we have nothing to do + if (!state.pos_out) + return false; + + unsigned location = 0; + nir_foreach_shader_in_variable(var, shader) { + if (var->data.driver_location >= location) + location = var->data.driver_location + 1; + } + + state.line_coord_out = + nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(), + "__line_coord"); + state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE; + state.line_coord_out->data.driver_location = location; + state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0); + shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location); + shader->num_outputs++; + + // create temp variables + state.prev_pos = nir_variable_create(shader, nir_var_shader_temp, + glsl_vec4_type(), + "__prev_pos"); + state.pos_counter = nir_variable_create(shader, nir_var_shader_temp, + glsl_uint_type(), + "__pos_counter"); + + // initialize pos_counter + nir_function_impl *entry = nir_shader_get_entrypoint(shader); + b = nir_builder_at(nir_before_impl(entry)); + nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1); + + shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out; + shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP; + + return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr, + nir_metadata_dominance, &state); } static bool -lower_drawid(nir_shader *shader) +lower_line_smooth_fs(nir_shader *shader, bool lower_stipple) { - if (shader->info.stage != MESA_SHADER_VERTEX) - return false; + int dummy; + nir_builder b; - if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID)) - return false; + nir_variable *stipple_counter = NULL, *stipple_pattern = NULL; + if (lower_stipple) { + stipple_counter = nir_variable_create(shader, nir_var_shader_in, + glsl_float_type(), + "__stipple"); + stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE; + stipple_counter->data.driver_location = shader->num_inputs++; + stipple_counter->data.location = + MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0); + shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location); + + stipple_pattern = nir_variable_create(shader, nir_var_shader_temp, + glsl_uint_type(), + "stipple_pattern"); + + // initialize stipple_pattern + nir_function_impl *entry = nir_shader_get_entrypoint(shader); + b = nir_builder_at(nir_before_impl(entry)); + nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32, + nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN)); + nir_store_var(&b, stipple_pattern, pattern, 1); + } - return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL); + nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern); + return true; } static bool @@ -353,11 +1034,314 @@ lower_dual_blend(nir_shader *shader) return progress; } +static bool +lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_alu) + return false; + nir_alu_instr *alu_instr = (nir_alu_instr *) instr; + if (alu_instr->op != nir_op_pack_64_2x32 && + alu_instr->op != nir_op_unpack_64_2x32) + return false; + b->cursor = nir_before_instr(&alu_instr->instr); + nir_def *src = nir_ssa_for_alu_src(b, alu_instr, 0); + nir_def *dest; + switch (alu_instr->op) { + case nir_op_pack_64_2x32: + dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1)); + break; + case nir_op_unpack_64_2x32: + dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src)); + break; + default: + unreachable("Impossible opcode"); + } + nir_def_rewrite_uses(&alu_instr->def, dest); + nir_instr_remove(&alu_instr->instr); + return true; +} + +static bool +lower_64bit_pack(nir_shader *shader) +{ + return nir_shader_instructions_pass(shader, lower_64bit_pack_instr, + nir_metadata_block_index | nir_metadata_dominance, NULL); +} + +nir_shader * +zink_create_quads_emulation_gs(const nir_shader_compiler_options *options, + const nir_shader *prev_stage) +{ + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, + options, + "filled quad gs"); + + nir_shader *nir = b.shader; + nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY; + nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP; + nir->info.gs.vertices_in = 4; + nir->info.gs.vertices_out = 6; + nir->info.gs.invocations = 1; + nir->info.gs.active_stream_mask = 1; + + nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings; + memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride)); + if (prev_stage->xfb_info) { + size_t size = nir_xfb_info_size(prev_stage->xfb_info->output_count); + nir->xfb_info = ralloc_memdup(nir, prev_stage->xfb_info, size); + } + + nir_variable *in_vars[VARYING_SLOT_MAX]; + nir_variable *out_vars[VARYING_SLOT_MAX]; + unsigned num_vars = 0; + + /* Create input/output variables. */ + nir_foreach_shader_out_variable(var, prev_stage) { + assert(!var->data.patch); + + /* input vars can't be created for those */ + if (var->data.location == VARYING_SLOT_LAYER || + var->data.location == VARYING_SLOT_VIEW_INDEX || + /* psiz not needed for quads */ + var->data.location == VARYING_SLOT_PSIZ) + continue; + + char name[100]; + if (var->name) + snprintf(name, sizeof(name), "in_%s", var->name); + else + snprintf(name, sizeof(name), "in_%d", var->data.driver_location); + + nir_variable *in = nir_variable_clone(var, nir); + ralloc_free(in->name); + in->name = ralloc_strdup(in, name); + in->type = glsl_array_type(var->type, 4, false); + in->data.mode = nir_var_shader_in; + nir_shader_add_variable(nir, in); + + if (var->name) + snprintf(name, sizeof(name), "out_%s", var->name); + else + snprintf(name, sizeof(name), "out_%d", var->data.driver_location); + + nir_variable *out = nir_variable_clone(var, nir); + ralloc_free(out->name); + out->name = ralloc_strdup(out, name); + out->data.mode = nir_var_shader_out; + nir_shader_add_variable(nir, out); + + in_vars[num_vars] = in; + out_vars[num_vars++] = out; + } + + int mapping_first[] = {0, 1, 2, 0, 2, 3}; + int mapping_last[] = {0, 1, 3, 1, 2, 3}; + nir_def *last_pv_vert_def = nir_load_provoking_last(&b); + last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0); + for (unsigned i = 0; i < 6; ++i) { + /* swap indices 2 and 3 */ + nir_def *idx = nir_bcsel(&b, last_pv_vert_def, + nir_imm_int(&b, mapping_last[i]), + nir_imm_int(&b, mapping_first[i])); + /* Copy inputs to outputs. */ + for (unsigned j = 0; j < num_vars; ++j) { + if (in_vars[j]->data.location == VARYING_SLOT_EDGE) { + continue; + } + nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx); + copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value); + } + nir_emit_vertex(&b, 0); + if (i == 2) + nir_end_primitive(&b, 0); + } + + nir_end_primitive(&b, 0); + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + nir_validate_shader(nir, "in zink_create_quads_emulation_gs"); + return nir; +} + +static bool +lower_system_values_to_inlined_uniforms_instr(nir_builder *b, + nir_intrinsic_instr *intrin, + void *data) +{ + int inlined_uniform_offset; + switch (intrin->intrinsic) { + case nir_intrinsic_load_flat_mask: + inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t); + break; + case nir_intrinsic_load_provoking_last: + inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t); + break; + default: + return false; + } + + b->cursor = nir_before_instr(&intrin->instr); + assert(intrin->def.bit_size == 32 || intrin->def.bit_size == 64); + /* nir_inline_uniforms can't handle bit_size != 32 (it will silently ignore + * anything with a different bit_size) so we need to split the load. */ + int num_dwords = intrin->def.bit_size / 32; + nir_def *dwords[2] = {NULL}; + for (unsigned i = 0; i < num_dwords; i++) + dwords[i] = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0), + nir_imm_int(b, inlined_uniform_offset + i), + .align_mul = intrin->def.bit_size / 8, + .align_offset = 0, + .range_base = 0, .range = ~0); + nir_def *new_dest_def; + if (intrin->def.bit_size == 32) + new_dest_def = dwords[0]; + else + new_dest_def = nir_pack_64_2x32_split(b, dwords[0], dwords[1]); + nir_def_rewrite_uses(&intrin->def, new_dest_def); + nir_instr_remove(&intrin->instr); + return true; +} + +bool +zink_lower_system_values_to_inlined_uniforms(nir_shader *nir) +{ + return nir_shader_intrinsics_pass(nir, + lower_system_values_to_inlined_uniforms_instr, + nir_metadata_dominance, NULL); +} + +/* from radeonsi */ +static unsigned +amd_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer) +{ + /* TODO: maybe implement shader profiles to disable, cf. 39804ebf1766d38004259085e1fec4ed8db86f1c */ + + switch (consumer->info.stage) { + case MESA_SHADER_TESS_CTRL: /* VS->TCS */ + /* Non-amplifying shaders can always have their variyng expressions + * moved into later shaders. + */ + return UINT_MAX; + + case MESA_SHADER_GEOMETRY: /* VS->GS, TES->GS */ + return consumer->info.gs.vertices_in == 1 ? UINT_MAX : + consumer->info.gs.vertices_in == 2 ? 20 : 14; + + case MESA_SHADER_TESS_EVAL: /* VS->TES, TCS->TES */ + case MESA_SHADER_FRAGMENT: + /* Up to 3 uniforms and 5 ALUs. */ + return 14; + + default: + unreachable("unexpected shader stage"); + } +} + +/* from radeonsi */ +static unsigned +amd_varying_estimate_instr_cost(nir_instr *instr) +{ + unsigned dst_bit_size, src_bit_size, num_dst_dwords; + nir_op alu_op; + + /* This is a very loose approximation based on gfx10. */ + switch (instr->type) { + case nir_instr_type_alu: + dst_bit_size = nir_instr_as_alu(instr)->def.bit_size; + src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size; + alu_op = nir_instr_as_alu(instr)->op; + num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32); + + switch (alu_op) { + case nir_op_mov: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + case nir_op_vec5: + case nir_op_vec8: + case nir_op_vec16: + case nir_op_fabs: + case nir_op_fneg: + case nir_op_fsat: + return 0; + + case nir_op_imul: + case nir_op_umul_low: + return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords; + + case nir_op_imul_high: + case nir_op_umul_high: + case nir_op_imul_2x32_64: + case nir_op_umul_2x32_64: + return 4; + + case nir_op_fexp2: + case nir_op_flog2: + case nir_op_frcp: + case nir_op_frsq: + case nir_op_fsqrt: + case nir_op_fsin: + case nir_op_fcos: + case nir_op_fsin_amd: + case nir_op_fcos_amd: + return 4; /* FP16 & FP32. */ + + case nir_op_fpow: + return 4 + 1 + 4; /* log2 + mul + exp2 */ + + case nir_op_fsign: + return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */ + + case nir_op_idiv: + case nir_op_udiv: + case nir_op_imod: + case nir_op_umod: + case nir_op_irem: + return dst_bit_size == 64 ? 80 : 40; + + case nir_op_fdiv: + return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */ + + case nir_op_fmod: + case nir_op_frem: + return dst_bit_size == 64 ? 80 : 8; + + default: + /* Double opcodes. Comparisons have always full performance. */ + if ((dst_bit_size == 64 && + nir_op_infos[alu_op].output_type & nir_type_float) || + (dst_bit_size >= 8 && src_bit_size == 64 && + nir_op_infos[alu_op].input_types[0] & nir_type_float)) + return 16; + + return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32); + } + + case nir_instr_type_intrinsic: + dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size; + num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32); + + switch (nir_instr_as_intrinsic(instr)->intrinsic) { + case nir_intrinsic_load_deref: + /* Uniform or UBO load. + * Set a low cost to balance the number of scalar loads and ALUs. + */ + return 3 * num_dst_dwords; + + default: + unreachable("unexpected intrinsic"); + } + + default: + unreachable("unexpected instr type"); + } +} + void zink_screen_init_compiler(struct zink_screen *screen) { static const struct nir_shader_compiler_options default_options = { + .io_options = nir_io_glsl_lower_derefs, .lower_ffma16 = true, .lower_ffma32 = true, .lower_ffma64 = true, @@ -366,25 +1350,41 @@ zink_screen_init_compiler(struct zink_screen *screen) .lower_flrp32 = true, .lower_fpow = true, .lower_fsat = true, + .lower_hadd = true, + .lower_iadd_sat = true, + .lower_fisnormal = true, .lower_extract_byte = true, .lower_extract_word = true, .lower_insert_byte = true, .lower_insert_word = true, + + /* We can only support 32-bit ldexp, but NIR doesn't have a flag + * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit + * ldexp, so we don't just always lower it in NIR). Given that ldexp is + * effectively unused (no instances in shader-db), it's not worth the + * effort to do so. + * */ + .lower_ldexp = true, + .lower_mul_high = true, - .lower_rotate = true, + .lower_to_scalar = true, .lower_uadd_carry = true, - .lower_pack_64_2x32_split = true, - .lower_unpack_64_2x32_split = true, - .lower_pack_32_2x16_split = true, - .lower_unpack_32_2x16_split = true, + .compact_arrays = true, + .lower_usub_borrow = true, + .lower_uadd_sat = true, + .lower_usub_sat = true, .lower_vector_cmp = true, .lower_int64_options = 0, - .lower_doubles_options = ~nir_lower_fp64_full_software, + .lower_doubles_options = nir_lower_dround_even, .lower_uniforms_to_ubo = true, .has_fsub = true, .has_isub = true, .lower_mul_2x32_64 = true, .support_16bit_alu = true, /* not quite what it sounds like */ + .support_indirect_inputs = BITFIELD_MASK(MESA_SHADER_COMPUTE), + .support_indirect_outputs = BITFIELD_MASK(MESA_SHADER_COMPUTE), + .max_unroll_iterations = 0, + .use_interpolated_input_intrinsics = true, }; screen->nir_options = default_options; @@ -396,13 +1396,49 @@ zink_screen_init_compiler(struct zink_screen *screen) screen->nir_options.lower_doubles_options = ~0; screen->nir_options.lower_flrp64 = true; screen->nir_options.lower_ffma64 = true; + /* soft fp64 function inlining will blow up loop bodies and effectively + * stop Vulkan drivers from unrolling the loops. + */ + screen->nir_options.max_unroll_iterations_fp64 = 32; + } + + if (screen->driver_workarounds.io_opt) { + screen->nir_options.io_options |= nir_io_glsl_opt_varyings; + + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_MESA_RADV: + case VK_DRIVER_ID_AMD_OPEN_SOURCE: + case VK_DRIVER_ID_AMD_PROPRIETARY: + screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost; + screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost; + break; + default: + mesa_logw("zink: instruction costs not implemented for this implementation!"); + screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost; + screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost; + } } + + /* + The OpFRem and OpFMod instructions use cheap approximations of remainder, + and the error can be large due to the discontinuity in trunc() and floor(). + This can produce mathematically unexpected results in some cases, such as + FMod(x,x) computing x rather than 0, and can also cause the result to have + a different sign than the infinitely precise result. + + -Table 84. Precision of core SPIR-V Instructions + * for drivers that are known to have imprecise fmod for doubles, lower dmod + */ + if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV || + screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE || + screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY) + screen->nir_options.lower_doubles_options = nir_lower_dmod; } const void * zink_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir, - enum pipe_shader_type shader) + gl_shader_stage shader) { assert(ir == PIPE_SHADER_IR_NIR); return &zink_screen(pscreen)->nir_options; @@ -420,23 +1456,201 @@ zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens) return tgsi_to_nir(tokens, screen, false); } + +static bool +def_is_64bit(nir_def *def, void *state) +{ + bool *lower = (bool *)state; + if (def && (def->bit_size == 64)) { + *lower = true; + return false; + } + return true; +} + +static bool +src_is_64bit(nir_src *src, void *state) +{ + bool *lower = (bool *)state; + if (src && (nir_src_bit_size(*src) == 64)) { + *lower = true; + return false; + } + return true; +} + +static bool +filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data) +{ + bool lower = false; + /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_* + * doesn't have const variants, so do the ugly const_cast here. */ + nir_instr *instr = (nir_instr *)const_instr; + + nir_foreach_def(instr, def_is_64bit, &lower); + if (lower) + return true; + nir_foreach_src(instr, src_is_64bit, &lower); + return lower; +} + +static bool +filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data) +{ + nir_instr *instr = (nir_instr *)const_instr; + nir_alu_instr *alu = nir_instr_as_alu(instr); + switch (alu->op) { + case nir_op_pack_64_2x32_split: + case nir_op_pack_32_2x16_split: + case nir_op_unpack_32_2x16_split_x: + case nir_op_unpack_32_2x16_split_y: + case nir_op_unpack_64_2x32_split_x: + case nir_op_unpack_64_2x32_split_y: + return true; + default: + break; + } + return false; +} + + +struct bo_vars { + nir_variable *uniforms[5]; + nir_variable *ubo[5]; + nir_variable *ssbo[5]; + uint32_t first_ubo; + uint32_t first_ssbo; +}; + +static struct bo_vars +get_bo_vars(struct zink_shader *zs, nir_shader *shader) +{ + struct bo_vars bo; + memset(&bo, 0, sizeof(bo)); + if (zs->ubos_used) + bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2; + assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS); + if (zs->ssbos_used) + bo.first_ssbo = ffs(zs->ssbos_used) - 1; + assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS); + nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) { + unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1; + if (var->data.mode == nir_var_mem_ssbo) { + assert(!bo.ssbo[idx]); + bo.ssbo[idx] = var; + } else { + if (var->data.driver_location) { + assert(!bo.ubo[idx]); + bo.ubo[idx] = var; + } else { + assert(!bo.uniforms[idx]); + bo.uniforms[idx] = var; + } + } + } + return bo; +} + +static bool +bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) +{ + struct bo_vars *bo = data; + if (instr->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + nir_variable *var = NULL; + nir_def *offset = NULL; + bool is_load = true; + b->cursor = nir_before_instr(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_store_ssbo: + var = bo->ssbo[intr->def.bit_size >> 4]; + offset = intr->src[2].ssa; + is_load = false; + break; + case nir_intrinsic_load_ssbo: + var = bo->ssbo[intr->def.bit_size >> 4]; + offset = intr->src[1].ssa; + break; + case nir_intrinsic_load_ubo: + if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0) + var = bo->uniforms[intr->def.bit_size >> 4]; + else + var = bo->ubo[intr->def.bit_size >> 4]; + offset = intr->src[1].ssa; + break; + default: + return false; + } + nir_src offset_src = nir_src_for_ssa(offset); + if (!nir_src_is_const(offset_src)) + return false; + + unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32; + const struct glsl_type *strct_type = glsl_get_array_element(var->type); + unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0)); + bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0; + if (has_unsized || offset_bytes + intr->num_components - 1 < size) + return false; + + unsigned rewrites = 0; + nir_def *result[2]; + for (unsigned i = 0; i < intr->num_components; i++) { + if (offset_bytes + i >= size) { + rewrites++; + if (is_load) + result[i] = nir_imm_zero(b, 1, intr->def.bit_size); + } + } + assert(rewrites == intr->num_components); + if (is_load) { + nir_def *load = nir_vec(b, result, intr->num_components); + nir_def_rewrite_uses(&intr->def, load); + } + nir_instr_remove(instr); + return true; +} + +static bool +bound_bo_access(nir_shader *shader, struct zink_shader *zs) +{ + struct bo_vars bo = get_bo_vars(zs, shader); + return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo); +} + static void -optimize_nir(struct nir_shader *s) +optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink) { bool progress; do { progress = false; + if (s->options->lower_int64_options) + NIR_PASS_V(s, nir_lower_int64); + if (s->options->lower_doubles_options & nir_lower_fp64_full_software) + NIR_PASS_V(s, lower_64bit_pack); NIR_PASS_V(s, nir_lower_vars_to_ssa); + NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL); + NIR_PASS(progress, s, nir_opt_copy_prop_vars); NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_opt_remove_phis); + if (s->options->lower_int64_options) { + NIR_PASS(progress, s, nir_lower_64bit_phis); + NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL); + } NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dead_cf); + NIR_PASS(progress, s, nir_lower_phis_to_scalar, false); NIR_PASS(progress, s, nir_opt_cse); NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); NIR_PASS(progress, s, zink_nir_lower_b2b); + if (zs) + NIR_PASS(progress, s, bound_bo_access, zs); + if (can_shrink) + NIR_PASS(progress, s, nir_opt_shrink_vectors, false); } while (progress); do { @@ -458,16 +1672,17 @@ optimize_nir(struct nir_shader *s) static bool lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data) { + bool ms = data != NULL; if (instr->type != nir_instr_type_intrinsic) return false; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); if (intr->intrinsic != nir_intrinsic_load_deref) return false; - nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); - if (var != data) + nir_variable *var = nir_intrinsic_get_var(intr, 0); + if (!var->data.fb_fetch_output) return false; b->cursor = nir_after_instr(instr); - nir_variable *fbfetch = nir_variable_clone(data, b->shader); + nir_variable *fbfetch = nir_variable_clone(var, b->shader); /* If Dim is SubpassData, ... Image Format must be Unknown * - SPIRV OpTypeImage specification */ @@ -475,18 +1690,20 @@ lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data) fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */ fbfetch->data.mode = nir_var_uniform; fbfetch->data.binding = ZINK_FBFETCH_BINDING; - fbfetch->type = glsl_image_type(GLSL_SAMPLER_DIM_SUBPASS, false, GLSL_TYPE_FLOAT); + fbfetch->data.binding = ZINK_FBFETCH_BINDING; + fbfetch->data.sample = ms; + enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS; + fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); nir_shader_add_variable(b->shader, fbfetch); - nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa; - nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), nir_ssa_undef(b, 1, 32), nir_imm_int(b, 0)); - unsigned swiz[4] = {2, 1, 0, 3}; - nir_ssa_def *swizzle = nir_swizzle(b, load, swiz, 4); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, swizzle); + nir_def *deref = &nir_build_deref_var(b, fbfetch)->def; + nir_def *sample = ms ? nir_load_sample_id(b) : nir_undef(b, 1, 32); + nir_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0)); + nir_def_rewrite_uses(&intr->def, load); return true; } static bool -lower_fbfetch(nir_shader *shader, nir_variable **fbfetch) +lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms) { nir_foreach_shader_out_variable(var, shader) { if (var->data.fb_fetch_output) { @@ -497,71 +1714,375 @@ lower_fbfetch(nir_shader *shader, nir_variable **fbfetch) assert(*fbfetch); if (!*fbfetch) return false; - return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, *fbfetch); + return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms); +} + +/* + * Add a check for out of bounds LOD for every texel fetch op + * It boils down to: + * - if (lod < query_levels(tex)) + * - res = txf(tex) + * - else + * - res = (0, 0, 0, 1) + */ +static bool +lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data) +{ + if (in->type != nir_instr_type_tex) + return false; + nir_tex_instr *txf = nir_instr_as_tex(in); + if (txf->op != nir_texop_txf) + return false; + + b->cursor = nir_before_instr(in); + int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod); + assert(lod_idx >= 0); + nir_src lod_src = txf->src[lod_idx].src; + if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0) + return false; + + nir_def *lod = lod_src.ssa; + + int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset); + int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle); + nir_tex_instr *levels = nir_tex_instr_create(b->shader, + !!(offset_idx >= 0) + !!(handle_idx >= 0)); + levels->op = nir_texop_query_levels; + levels->texture_index = txf->texture_index; + levels->dest_type = nir_type_int | lod->bit_size; + if (offset_idx >= 0) { + levels->src[0].src_type = nir_tex_src_texture_offset; + levels->src[0].src = nir_src_for_ssa(txf->src[offset_idx].src.ssa); + } + if (handle_idx >= 0) { + levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle; + levels->src[!!(offset_idx >= 0)].src = nir_src_for_ssa(txf->src[handle_idx].src.ssa); + } + nir_def_init(&levels->instr, &levels->def, + nir_tex_instr_dest_size(levels), 32); + nir_builder_instr_insert(b, &levels->instr); + + nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->def)); + nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in)); + nir_builder_instr_insert(b, &new_txf->instr); + + nir_if *lod_oob_else = nir_push_else(b, lod_oob_if); + nir_const_value oob_values[4] = {0}; + unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type); + oob_values[3] = (txf->dest_type & nir_type_float) ? + nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size); + nir_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values); + + nir_pop_if(b, lod_oob_else); + nir_def *robust_txf = nir_if_phi(b, &new_txf->def, oob_val); + + nir_def_rewrite_uses(&txf->def, robust_txf); + nir_instr_remove_v(in); + return true; +} + +/* This pass is used to workaround the lack of out of bounds LOD robustness + * for texel fetch ops in VK_EXT_image_robustness. + */ +static bool +lower_txf_lod_robustness(nir_shader *shader) +{ + return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL); } /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */ static bool check_psiz(struct nir_shader *s) { + bool have_psiz = false; nir_foreach_shader_out_variable(var, s) { if (var->data.location == VARYING_SLOT_PSIZ) { /* genuine PSIZ outputs will have this set */ - return !!var->data.explicit_location; + have_psiz |= !!var->data.explicit_location; } } - return false; + return have_psiz; +} + +static nir_variable * +find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz, nir_variable_mode mode) +{ + assert((int)location >= 0); + + nir_foreach_variable_with_modes(var, nir, mode) { + if (var->data.location == location && (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)) { + unsigned num_components = glsl_get_vector_elements(var->type); + if (glsl_type_is_64bit(glsl_without_array(var->type))) + num_components *= 2; + if (is_clipcull_dist(var->data.location)) + num_components = glsl_get_aoa_size(var->type); + if (var->data.location_frac <= location_frac && + var->data.location_frac + num_components > location_frac) + return var; + } + } + return NULL; +} + +static bool +is_inlined(const bool *inlined, const nir_xfb_output_info *output) +{ + unsigned num_components = util_bitcount(output->component_mask); + for (unsigned i = 0; i < num_components; i++) + if (!inlined[output->component_offset + i]) + return false; + return true; } static void -update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info, - uint64_t outputs_written, bool have_psiz) -{ - uint8_t reverse_map[64] = {0}; - unsigned slot = 0; - /* semi-copied from iris */ - while (outputs_written) { - int bit = u_bit_scan64(&outputs_written); - /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */ - if (bit == VARYING_SLOT_PSIZ && !have_psiz) - continue; - reverse_map[slot++] = bit; +update_psiz_location(nir_shader *nir, nir_variable *psiz) +{ + uint32_t last_output = util_last_bit64(nir->info.outputs_written); + if (last_output < VARYING_SLOT_VAR0) + last_output = VARYING_SLOT_VAR0; + else + last_output++; + /* this should get fixed up by slot remapping */ + psiz->data.location = last_output; +} + +static const struct glsl_type * +clamp_slot_type(const struct glsl_type *type, unsigned slot) +{ + /* could be dvec/dmat/mat: each member is the same */ + const struct glsl_type *plain = glsl_without_array_or_matrix(type); + /* determine size of each member type */ + unsigned slot_count = glsl_count_vec4_slots(plain, false, false); + /* normalize slot idx to current type's size */ + slot %= slot_count; + unsigned slot_components = glsl_get_components(plain); + if (glsl_base_type_is_64bit(glsl_get_base_type(plain))) + slot_components *= 2; + /* create a vec4 mask of the selected slot's components out of all the components */ + uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4); + /* return a vecN of the selected components */ + slot_components = util_bitcount(mask); + return glsl_vec_type(slot_components); +} + +static const struct glsl_type * +unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx) +{ + const struct glsl_type *type = slot_type; + unsigned slot_count = 0; + unsigned cur_slot = 0; + /* iterate over all the members in the struct, stopping once the slot idx is reached */ + for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) { + /* use array type for slot counting but return array member type for unroll */ + const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i); + type = glsl_without_array(arraytype); + slot_count = glsl_count_vec4_slots(arraytype, false, false); } + *slot_idx -= (cur_slot - slot_count); + if (!glsl_type_is_struct_or_ifc(type)) + /* this is a fully unrolled struct: find the number of vec components to output */ + type = clamp_slot_type(type, *slot_idx); + return type; +} + +static unsigned +get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot) +{ + assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false)); + const struct glsl_type *orig_type = var->type; + const struct glsl_type *type = glsl_without_array(var->type); + unsigned slot_idx = slot - so_slot; + if (type != orig_type) + slot_idx %= glsl_count_vec4_slots(type, false, false); + /* need to find the vec4 that's being exported by this slot */ + while (glsl_type_is_struct_or_ifc(type)) + type = unroll_struct_type(type, &slot_idx); + + /* arrays here are already fully unrolled from their structs, so slot handling is implicit */ + unsigned num_components = glsl_get_components(glsl_without_array(type)); + /* special handling: clip/cull distance are arrays with vector semantics */ + if (is_clipcull_dist(var->data.location)) { + num_components = glsl_array_size(type); + if (slot_idx) + /* this is the second vec4 */ + num_components %= 4; + else + /* this is the first vec4 */ + num_components = MIN2(num_components, 4); + } + assert(num_components); + /* gallium handles xfb in terms of 32bit units */ + if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type)))) + num_components *= 2; + return num_components; +} + +static unsigned +get_var_slot_count(nir_shader *nir, nir_variable *var) +{ + assert(var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out); + const struct glsl_type *type = var->type; + if (nir_is_arrayed_io(var, nir->info.stage)) + type = glsl_get_array_element(type); + unsigned slot_count = 0; + if ((nir->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in && var->data.location >= VERT_ATTRIB_GENERIC0) || + var->data.location >= VARYING_SLOT_VAR0) + slot_count = glsl_count_vec4_slots(type, false, false); + else if (glsl_type_is_array(type)) + slot_count = DIV_ROUND_UP(glsl_get_aoa_size(type), 4); + else + slot_count = 1; + return slot_count; +} - nir_foreach_shader_out_variable(var, zs->nir) - var->data.explicit_xfb_buffer = 0; - bool inlined[64] = {0}; - for (unsigned i = 0; i < so_info->num_outputs; i++) { - const struct pipe_stream_output *output = &so_info->output[i]; - unsigned slot = reverse_map[output->register_index]; +static const nir_xfb_output_info * +find_packed_output(const nir_xfb_info *xfb_info, unsigned slot) +{ + for (unsigned i = 0; i < xfb_info->output_count; i++) { + const nir_xfb_output_info *packed_output = &xfb_info->outputs[i]; + if (packed_output->location == slot) + return packed_output; + } + return NULL; +} + +static void +update_so_info(struct zink_shader *zs, nir_shader *nir, uint64_t outputs_written, bool have_psiz) +{ + bool inlined[VARYING_SLOT_MAX][4] = {0}; + uint64_t packed = 0; + uint8_t packed_components[VARYING_SLOT_MAX] = {0}; + uint8_t packed_streams[VARYING_SLOT_MAX] = {0}; + uint8_t packed_buffers[VARYING_SLOT_MAX] = {0}; + uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0}; + for (unsigned i = 0; i < nir->xfb_info->output_count; i++) { + const nir_xfb_output_info *output = &nir->xfb_info->outputs[i]; + unsigned xfb_components = util_bitcount(output->component_mask); /* always set stride to be used during draw */ - zs->streamout.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer]; - if ((zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) && - !output->start_component) { + zs->sinfo.stride[output->buffer] = nir->xfb_info->buffers[output->buffer].stride; + if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) { + for (unsigned c = 0; !is_inlined(inlined[output->location], output) && c < xfb_components; c++) { + unsigned slot = output->location; + if (inlined[slot][output->component_offset + c]) + continue; + nir_variable *var = NULL; + while (!var && slot < VARYING_SLOT_TESS_MAX) + var = find_var_with_location_frac(nir, slot--, output->component_offset + c, have_psiz, nir_var_shader_out); + slot = output->location; + unsigned slot_count = var ? get_var_slot_count(nir, var) : 0; + if (!var || var->data.location > slot || var->data.location + slot_count <= slot) { + /* if no variable is found for the xfb output, no output exists */ + inlined[slot][c + output->component_offset] = true; + continue; + } + if (var->data.explicit_xfb_buffer) { + /* handle dvec3 where gallium splits streamout over 2 registers */ + for (unsigned j = 0; j < xfb_components; j++) + inlined[slot][c + output->component_offset + j] = true; + } + if (is_inlined(inlined[slot], output)) + continue; + assert(!glsl_type_is_array(var->type) || is_clipcull_dist(var->data.location)); + assert(!glsl_type_is_struct_or_ifc(var->type)); + unsigned num_components = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : glsl_get_vector_elements(var->type); + if (glsl_type_is_64bit(glsl_without_array(var->type))) + num_components *= 2; + /* if this is the entire variable, try to blast it out during the initial declaration + * structs must be handled later to ensure accurate analysis + */ + if ((num_components == xfb_components || + num_components < xfb_components || + (num_components > xfb_components && xfb_components == 4))) { + var->data.explicit_xfb_buffer = 1; + var->data.xfb.buffer = output->buffer; + var->data.xfb.stride = zs->sinfo.stride[output->buffer]; + var->data.offset = (output->offset + c * sizeof(uint32_t)); + var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer]; + for (unsigned j = 0; j < MIN2(num_components, xfb_components); j++) + inlined[slot][c + output->component_offset + j] = true; + } else { + /* otherwise store some metadata for later */ + packed |= BITFIELD64_BIT(slot); + packed_components[slot] += xfb_components; + packed_streams[slot] |= BITFIELD_BIT(nir->xfb_info->buffer_to_stream[output->buffer]); + packed_buffers[slot] |= BITFIELD_BIT(output->buffer); + for (unsigned j = 0; j < xfb_components; j++) + packed_offsets[output->location][j + output->component_offset + c] = output->offset + j * sizeof(uint32_t); + } + } + } + } + + /* if this was flagged as a packed output before, and if all the components are + * being output with the same stream on the same buffer with increasing offsets, this entire variable + * can be consolidated into a single output to conserve locations + */ + for (unsigned i = 0; i < nir->xfb_info->output_count; i++) { + const nir_xfb_output_info *output = &nir->xfb_info->outputs[i]; + unsigned slot = output->location; + if (is_inlined(inlined[slot], output)) + continue; + if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) { nir_variable *var = NULL; while (!var) - var = nir_find_variable_with_location(zs->nir, nir_var_shader_out, slot--); - slot++; - if (inlined[slot]) - continue; - assert(var && var->data.location == slot); - /* if this is the entire variable, try to blast it out during the initial declaration */ - if (glsl_get_components(var->type) == output->num_components) { - var->data.explicit_xfb_buffer = 1; - var->data.xfb.buffer = output->output_buffer; - var->data.xfb.stride = so_info->stride[output->output_buffer] * 4; - var->data.offset = output->dst_offset * 4; - var->data.stream = output->stream; - inlined[slot] = true; + var = find_var_with_location_frac(nir, slot--, output->component_offset, have_psiz, nir_var_shader_out); + slot = output->location; + unsigned slot_count = var ? get_var_slot_count(nir, var) : 0; + if (!var || var->data.location > slot || var->data.location + slot_count <= slot) continue; + /* this is a lowered 64bit variable that can't be exported due to packing */ + if (var->data.is_xfb) + goto out; + + unsigned num_slots = is_clipcull_dist(var->data.location) ? + glsl_array_size(var->type) / 4 : + glsl_count_vec4_slots(var->type, false, false); + /* for each variable, iterate over all the variable's slots and inline the outputs */ + for (unsigned j = 0; j < num_slots; j++) { + slot = var->data.location + j; + const nir_xfb_output_info *packed_output = find_packed_output(nir->xfb_info, slot); + if (!packed_output) + goto out; + + /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */ + if (!(packed & BITFIELD64_BIT(slot)) || + util_bitcount(packed_streams[slot]) != 1 || + util_bitcount(packed_buffers[slot]) != 1) + goto out; + + /* if all the components the variable exports to this slot aren't captured, skip consolidation */ + unsigned num_components = get_slot_components(var, slot, var->data.location); + if (num_components != packed_components[slot]) + goto out; + + /* in order to pack the xfb output, all the offsets must be sequentially incrementing */ + uint32_t prev_offset = packed_offsets[packed_output->location][0]; + for (unsigned k = 1; k < num_components; k++) { + /* if the offsets are not incrementing as expected, skip consolidation */ + if (packed_offsets[packed_output->location][k] != prev_offset + sizeof(uint32_t)) + goto out; + prev_offset = packed_offsets[packed_output->location][k + packed_output->component_offset]; + } } + /* this output can be consolidated: blast out all the data inlined */ + var->data.explicit_xfb_buffer = 1; + var->data.xfb.buffer = output->buffer; + var->data.xfb.stride = zs->sinfo.stride[output->buffer]; + var->data.offset = output->offset; + var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer]; + /* mark all slot components inlined to skip subsequent loop iterations */ + for (unsigned j = 0; j < num_slots; j++) { + slot = var->data.location + j; + for (unsigned k = 0; k < packed_components[slot]; k++) + inlined[slot][k] = true; + packed &= ~BITFIELD64_BIT(slot); + } + continue; } - zs->streamout.so_info.output[zs->streamout.so_info.num_outputs] = *output; - /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ - zs->streamout.so_info_slots[zs->streamout.so_info.num_outputs++] = reverse_map[output->register_index]; +out: + unreachable("xfb should be inlined by now!"); } - zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs; } struct decompose_state { @@ -585,7 +2106,7 @@ lower_attrib(nir_builder *b, nir_instr *instr, void *data) return false; unsigned num_components = glsl_get_vector_elements(split[0]->type); b->cursor = nir_after_instr(instr); - nir_ssa_def *loads[4]; + nir_def *loads[4]; for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++) loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1])); if (state->needs_w) { @@ -593,8 +2114,8 @@ lower_attrib(nir_builder *b, nir_instr *instr, void *data) loads[3] = nir_channel(b, loads[0], 3); loads[0] = nir_channel(b, loads[0], 0); } - nir_ssa_def *new_load = nir_vec(b, loads, num_components); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load); + nir_def *new_load = nir_vec(b, loads, num_components); + nir_def_rewrite_uses(&intr->def, new_load); nir_instr_remove_v(instr); return true; } @@ -633,7 +2154,489 @@ decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decompose } nir_fixup_deref_modes(nir); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); - optimize_nir(nir); + optimize_nir(nir, NULL, true); + return true; +} + +static bool +rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) +{ + struct zink_screen *screen = data; + const bool has_int64 = screen->info.feats.features.shaderInt64; + if (instr->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + b->cursor = nir_before_instr(instr); + switch (intr->intrinsic) { + case nir_intrinsic_ssbo_atomic: + case nir_intrinsic_ssbo_atomic_swap: { + /* convert offset to uintN_t[idx] */ + nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, intr->def.bit_size / 8); + nir_src_rewrite(&intr->src[1], offset); + return true; + } + case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_ubo: { + /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */ + bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo && + nir_src_is_const(intr->src[0]) && + nir_src_as_uint(intr->src[0]) == 0 && + intr->def.bit_size == 64 && + nir_intrinsic_align_offset(intr) % 8 != 0; + force_2x32 |= intr->def.bit_size == 64 && !has_int64; + nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8); + nir_src_rewrite(&intr->src[1], offset); + /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ + if (force_2x32) { + /* this is always scalarized */ + assert(intr->def.num_components == 1); + /* rewrite as 2x32 */ + nir_def *load[2]; + for (unsigned i = 0; i < 2; i++) { + if (intr->intrinsic == nir_intrinsic_load_ssbo) + load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0); + else + load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4); + nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr)); + } + /* cast back to 64bit */ + nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]); + nir_def_rewrite_uses(&intr->def, casted); + nir_instr_remove(instr); + } + return true; + } + case nir_intrinsic_load_scratch: + case nir_intrinsic_load_shared: { + b->cursor = nir_before_instr(instr); + bool force_2x32 = intr->def.bit_size == 64 && !has_int64; + nir_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8); + nir_src_rewrite(&intr->src[0], offset); + /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ + if (force_2x32) { + /* this is always scalarized */ + assert(intr->def.num_components == 1); + /* rewrite as 2x32 */ + nir_def *load[2]; + for (unsigned i = 0; i < 2; i++) + load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0); + /* cast back to 64bit */ + nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]); + nir_def_rewrite_uses(&intr->def, casted); + nir_instr_remove(instr); + return true; + } + break; + } + case nir_intrinsic_store_ssbo: { + b->cursor = nir_before_instr(instr); + bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64; + nir_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8); + nir_src_rewrite(&intr->src[2], offset); + /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ + if (force_2x32) { + /* this is always scalarized */ + assert(intr->src[0].ssa->num_components == 1); + nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)}; + for (unsigned i = 0; i < 2; i++) + nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0); + nir_instr_remove(instr); + } + return true; + } + case nir_intrinsic_store_scratch: + case nir_intrinsic_store_shared: { + b->cursor = nir_before_instr(instr); + bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64; + nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8); + nir_src_rewrite(&intr->src[1], offset); + /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ + if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) { + /* this is always scalarized */ + assert(intr->src[0].ssa->num_components == 1); + nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)}; + for (unsigned i = 0; i < 2; i++) + nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0); + nir_instr_remove(instr); + } + return true; + } + default: + break; + } + return false; +} + +static bool +rewrite_bo_access(nir_shader *shader, struct zink_screen *screen) +{ + return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen); +} + +static nir_variable * +get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size) +{ + nir_variable *var, **ptr; + unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1; + + if (ssbo) + ptr = &bo->ssbo[bit_size >> 4]; + else { + if (!idx) { + ptr = &bo->uniforms[bit_size >> 4]; + } else + ptr = &bo->ubo[bit_size >> 4]; + } + var = *ptr; + if (!var) { + if (ssbo) + var = bo->ssbo[32 >> 4]; + else { + if (!idx) + var = bo->uniforms[32 >> 4]; + else + var = bo->ubo[32 >> 4]; + } + var = nir_variable_clone(var, shader); + if (ssbo) + var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size); + else + var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size); + *ptr = var; + nir_shader_add_variable(shader, var); + + struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2); + fields[0].name = ralloc_strdup(shader, "base"); + fields[1].name = ralloc_strdup(shader, "unsized"); + unsigned array_size = glsl_get_length(var->type); + const struct glsl_type *bare_type = glsl_without_array(var->type); + const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0); + unsigned length = glsl_get_length(array_type); + const struct glsl_type *type; + const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8); + if (bit_size > 32) { + assert(bit_size == 64); + type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8); + } else { + type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8); + } + fields[0].type = type; + fields[1].type = unsized; + var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0); + var->data.driver_location = idx; + } + return var; +} + +static void +rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo) +{ + nir_intrinsic_op op; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_ssbo_atomic) + op = nir_intrinsic_deref_atomic; + else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap) + op = nir_intrinsic_deref_atomic_swap; + else + unreachable("unknown intrinsic"); + nir_def *offset = intr->src[1].ssa; + nir_src *src = &intr->src[0]; + nir_variable *var = get_bo_var(b->shader, bo, true, src, + intr->def.bit_size); + nir_deref_instr *deref_var = nir_build_deref_var(b, var); + nir_def *idx = src->ssa; + if (bo->first_ssbo) + idx = nir_iadd_imm(b, idx, -bo->first_ssbo); + nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx); + nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0); + + /* generate new atomic deref ops for every component */ + nir_def *result[4]; + unsigned num_components = intr->def.num_components; + for (unsigned i = 0; i < num_components; i++) { + nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset); + nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op); + nir_def_init(&new_instr->instr, &new_instr->def, 1, + intr->def.bit_size); + nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr)); + new_instr->src[0] = nir_src_for_ssa(&deref_arr->def); + /* deref ops have no offset src, so copy the srcs after it */ + for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++) + new_instr->src[j - 1] = nir_src_for_ssa(intr->src[j].ssa); + nir_builder_instr_insert(b, &new_instr->instr); + + result[i] = &new_instr->def; + offset = nir_iadd_imm(b, offset, 1); + } + + nir_def *load = nir_vec(b, result, num_components); + nir_def_rewrite_uses(&intr->def, load); + nir_instr_remove(instr); +} + +static bool +remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) +{ + struct bo_vars *bo = data; + if (instr->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + nir_variable *var = NULL; + nir_def *offset = NULL; + bool is_load = true; + b->cursor = nir_before_instr(instr); + nir_src *src; + bool ssbo = true; + switch (intr->intrinsic) { + case nir_intrinsic_ssbo_atomic: + case nir_intrinsic_ssbo_atomic_swap: + rewrite_atomic_ssbo_instr(b, instr, bo); + return true; + case nir_intrinsic_store_ssbo: + src = &intr->src[1]; + var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0])); + offset = intr->src[2].ssa; + is_load = false; + break; + case nir_intrinsic_load_ssbo: + src = &intr->src[0]; + var = get_bo_var(b->shader, bo, true, src, intr->def.bit_size); + offset = intr->src[1].ssa; + break; + case nir_intrinsic_load_ubo: + src = &intr->src[0]; + var = get_bo_var(b->shader, bo, false, src, intr->def.bit_size); + offset = intr->src[1].ssa; + ssbo = false; + break; + default: + return false; + } + assert(var); + assert(offset); + nir_deref_instr *deref_var = nir_build_deref_var(b, var); + nir_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa; + if (!ssbo && bo->first_ubo && var->data.driver_location) + idx = nir_iadd_imm(b, idx, -bo->first_ubo); + else if (ssbo && bo->first_ssbo) + idx = nir_iadd_imm(b, idx, -bo->first_ssbo); + nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, + nir_i2iN(b, idx, deref_var->def.bit_size)); + nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0); + assert(intr->num_components <= 2); + if (is_load) { + nir_def *result[2]; + for (unsigned i = 0; i < intr->num_components; i++) { + nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, + nir_i2iN(b, offset, deref_struct->def.bit_size)); + result[i] = nir_load_deref(b, deref_arr); + if (intr->intrinsic == nir_intrinsic_load_ssbo) + nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr)); + offset = nir_iadd_imm(b, offset, 1); + } + nir_def *load = nir_vec(b, result, intr->num_components); + nir_def_rewrite_uses(&intr->def, load); + } else { + nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, + nir_i2iN(b, offset, deref_struct->def.bit_size)); + nir_build_store_deref(b, &deref_arr->def, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr)); + } + nir_instr_remove(instr); + return true; +} + +static bool +remove_bo_access(nir_shader *shader, struct zink_shader *zs) +{ + struct bo_vars bo = get_bo_vars(zs, shader); + return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo); +} + +static bool +filter_io_instr(nir_intrinsic_instr *intr, bool *is_load, bool *is_input, bool *is_interp) +{ + switch (intr->intrinsic) { + case nir_intrinsic_load_interpolated_input: + *is_interp = true; + FALLTHROUGH; + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_vertex_input: + *is_input = true; + FALLTHROUGH; + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_primitive_output: + *is_load = true; + FALLTHROUGH; + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_primitive_output: + case nir_intrinsic_store_per_vertex_output: + break; + default: + return false; + } + return true; +} + +static bool +io_instr_is_arrayed(nir_intrinsic_instr *intr) +{ + switch (intr->intrinsic) { + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_primitive_output: + case nir_intrinsic_store_per_primitive_output: + case nir_intrinsic_store_per_vertex_output: + return true; + default: + break; + } + return false; +} + +static bool +find_var_deref(nir_shader *nir, nir_variable *var) +{ + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_deref) + continue; + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->deref_type == nir_deref_type_var && deref->var == var) + return true; + } + } + } + return false; +} + +static bool +find_var_io(nir_shader *nir, nir_variable *var) +{ + nir_foreach_function(function, nir) { + if (!function->impl) + continue; + + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + continue; + if (var->data.mode == nir_var_shader_in && !is_input) + continue; + if (var->data.mode == nir_var_shader_out && is_input) + continue; + unsigned slot_offset = 0; + if (var->data.fb_fetch_output && !is_load) + continue; + if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && !is_input && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index) + continue; + nir_src *src_offset = nir_get_io_offset_src(intr); + if (src_offset && nir_src_is_const(*src_offset)) + slot_offset = nir_src_as_uint(*src_offset); + unsigned slot_count = get_var_slot_count(nir, var); + if (var->data.mode & (nir_var_shader_out | nir_var_shader_in) && + var->data.fb_fetch_output == nir_intrinsic_io_semantics(intr).fb_fetch_output && + var->data.location <= nir_intrinsic_io_semantics(intr).location + slot_offset && + var->data.location + slot_count > nir_intrinsic_io_semantics(intr).location + slot_offset) + return true; + } + } + } + return false; +} + +struct clamp_layer_output_state { + nir_variable *original; + nir_variable *clamped; +}; + +static void +clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state) +{ + nir_def *is_layered = nir_load_push_constant_zink(b, 1, 32, + nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED)); + nir_deref_instr *original_deref = nir_build_deref_var(b, state->original); + nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped); + nir_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1), + nir_load_deref(b, original_deref), + nir_imm_int(b, 0)); + nir_store_deref(b, clamped_deref, layer, 0); +} + +static bool +clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data) +{ + struct clamp_layer_output_state *state = data; + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter && + intr->intrinsic != nir_intrinsic_emit_vertex) + return false; + b->cursor = nir_before_instr(instr); + clamp_layer_output_emit(b, state); + return true; + } + default: return false; + } +} + +static bool +clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location) +{ + switch (vs->info.stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_GEOMETRY: + case MESA_SHADER_TESS_EVAL: + break; + default: + unreachable("invalid last vertex stage!"); + } + struct clamp_layer_output_state state = {0}; + state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER); + if (!state.original || (!find_var_deref(vs, state.original) && !find_var_io(vs, state.original))) + return false; + state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped"); + state.clamped->data.location = VARYING_SLOT_LAYER; + nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER); + if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) { + state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot + state.original->data.driver_location = (*next_location)++; + if (fs_var) { + fs_var->data.location = state.original->data.location; + fs_var->data.driver_location = state.original->data.driver_location; + } + } else { + if (state.original->data.explicit_xfb_buffer) { + /* Will xfb the clamped output but still better than nothing */ + state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer; + state.clamped->data.xfb.buffer = state.original->data.xfb.buffer; + state.clamped->data.xfb.stride = state.original->data.xfb.stride; + state.clamped->data.offset = state.original->data.offset; + state.clamped->data.stream = state.original->data.stream; + } + state.original->data.mode = nir_var_shader_temp; + nir_fixup_deref_modes(vs); + } + if (vs->info.stage == MESA_SHADER_GEOMETRY) { + nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state); + } else { + nir_builder b; + nir_function_impl *impl = nir_shader_get_entrypoint(vs); + b = nir_builder_at(nir_after_impl(impl)); + assert(impl->end_block->predecessors->entries == 1); + clamp_layer_output_emit(&b, &state); + nir_metadata_preserve(impl, nir_metadata_dominance); + } + optimize_nir(vs, NULL, true); + NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL); return true; } @@ -641,9 +2644,9 @@ static void assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map) { unsigned slot = var->data.location; - switch (var->data.location) { + switch (slot) { + case -1: case VARYING_SLOT_POS: - case VARYING_SLOT_PNTC: case VARYING_SLOT_PSIZ: case VARYING_SLOT_LAYER: case VARYING_SLOT_PRIMITIVE_ID: @@ -659,21 +2662,22 @@ assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reser default: if (var->data.patch) { - assert(var->data.location >= VARYING_SLOT_PATCH0); - slot = var->data.location - VARYING_SLOT_PATCH0; - } else if (var->data.location >= VARYING_SLOT_VAR0 && - var->data.mode == nir_var_shader_in && - stage == MESA_SHADER_TESS_EVAL) { - slot = var->data.location - VARYING_SLOT_VAR0; - } else { - if (slot_map[var->data.location] == 0xff) { - assert(*reserved < MAX_VARYING); - slot_map[var->data.location] = *reserved; - *reserved += glsl_count_vec4_slots(var->type, false, false); - } - slot = slot_map[var->data.location]; - assert(slot < MAX_VARYING); + assert(slot >= VARYING_SLOT_PATCH0); + slot -= VARYING_SLOT_PATCH0; } + if (slot_map[slot] == 0xff) { + assert(*reserved < MAX_VARYING); + unsigned num_slots; + if (nir_is_arrayed_io(var, stage)) + num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false); + else + num_slots = glsl_count_vec4_slots(var->type, false, false); + assert(*reserved + num_slots <= MAX_VARYING); + for (unsigned i = 0; i < num_slots; i++) + slot_map[slot + i] = (*reserved)++; + } + slot = slot_map[slot]; + assert(slot < MAX_VARYING); var->data.driver_location = slot; } } @@ -690,9 +2694,9 @@ is_texcoord(gl_shader_stage stage, const nir_variable *var) static bool assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map) { - switch (var->data.location) { + unsigned slot = var->data.location; + switch (slot) { case VARYING_SLOT_POS: - case VARYING_SLOT_PNTC: case VARYING_SLOT_PSIZ: case VARYING_SLOT_LAYER: case VARYING_SLOT_PRIMITIVE_ID: @@ -707,87 +2711,1298 @@ assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reser break; default: if (var->data.patch) { - assert(var->data.location >= VARYING_SLOT_PATCH0); - var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0; - } else if (var->data.location >= VARYING_SLOT_VAR0 && - stage == MESA_SHADER_TESS_CTRL && - var->data.mode == nir_var_shader_out) - var->data.driver_location = var->data.location - VARYING_SLOT_VAR0; - else { - if (slot_map[var->data.location] == (unsigned char)-1) { - if (!is_texcoord(stage, var)) - /* dead io */ - return false; - /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE */ - slot_map[var->data.location] = (*reserved)++; + assert(slot >= VARYING_SLOT_PATCH0); + slot -= VARYING_SLOT_PATCH0; + } + if (slot_map[slot] == (unsigned char)-1) { + /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE, + * so keep for now and eliminate later + */ + if (is_texcoord(stage, var)) { + var->data.driver_location = -1; + return true; } - var->data.driver_location = slot_map[var->data.location]; + /* patch variables may be read in the workgroup */ + if (stage != MESA_SHADER_TESS_CTRL) + /* dead io */ + return false; + unsigned num_slots; + if (nir_is_arrayed_io(var, stage)) + num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false); + else + num_slots = glsl_count_vec4_slots(var->type, false, false); + assert(*reserved + num_slots <= MAX_VARYING); + for (unsigned i = 0; i < num_slots; i++) + slot_map[slot + i] = (*reserved)++; } + var->data.driver_location = slot_map[slot]; } return true; } static bool -rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data) +rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data) { nir_variable *var = data; if (instr->type != nir_instr_type_intrinsic) return false; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_load_deref) + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + return false; + if (!is_load) return false; - nir_variable *deref_var = nir_intrinsic_get_var(intr, 0); - if (deref_var != var) + unsigned location = nir_intrinsic_io_semantics(intr).location; + if (location != var->data.location) return false; - nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest)); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef); + b->cursor = nir_before_instr(instr); + nir_def *zero = nir_imm_zero(b, intr->def.num_components, + intr->def.bit_size); + if (b->shader->info.stage == MESA_SHADER_FRAGMENT) { + switch (location) { + case VARYING_SLOT_COL0: + case VARYING_SLOT_COL1: + case VARYING_SLOT_BFC0: + case VARYING_SLOT_BFC1: + /* default color is 0,0,0,1 */ + if (intr->def.num_components == 4) + zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3); + break; + default: + break; + } + } + nir_def_rewrite_uses(&intr->def, zero); + nir_instr_remove(instr); return true; } + + +static bool +delete_psiz_store_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + switch (intr->intrinsic) { + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_primitive_output: + case nir_intrinsic_store_per_vertex_output: + break; + default: + return false; + } + if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ) + return false; + if (!data || (nir_src_is_const(intr->src[0]) && fabs(nir_src_as_float(intr->src[0]) - 1.0) < FLT_EPSILON)) { + nir_instr_remove(&intr->instr); + return true; + } + return false; +} + +static bool +delete_psiz_store(nir_shader *nir, bool one) +{ + bool progress = nir_shader_intrinsics_pass(nir, delete_psiz_store_instr, + nir_metadata_dominance, one ? nir : NULL); + if (progress) + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + return progress; +} + +struct write_components { + unsigned slot; + uint32_t component_mask; +}; + +static bool +fill_zero_reads(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + struct write_components *wc = data; + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + return false; + if (!is_input) + return false; + nir_io_semantics s = nir_intrinsic_io_semantics(intr); + if (wc->slot < s.location || wc->slot >= s.location + s.num_slots) + return false; + unsigned num_components = intr->num_components; + unsigned c = nir_intrinsic_component(intr); + if (intr->def.bit_size == 64) + num_components *= 2; + nir_src *src_offset = nir_get_io_offset_src(intr); + if (!nir_src_is_const(*src_offset)) + return false; + unsigned slot_offset = nir_src_as_uint(*src_offset); + if (s.location + slot_offset != wc->slot) + return false; + uint32_t readmask = BITFIELD_MASK(intr->num_components) << c; + if (intr->def.bit_size == 64) + readmask |= readmask << (intr->num_components + c); + /* handle dvec3/dvec4 */ + if (num_components + c > 4) + readmask >>= 4; + if ((wc->component_mask & readmask) == readmask) + return false; + uint32_t rewrite_mask = readmask & ~wc->component_mask; + if (!rewrite_mask) + return false; + b->cursor = nir_after_instr(&intr->instr); + nir_def *zero = nir_imm_zero(b, intr->def.num_components, intr->def.bit_size); + if (b->shader->info.stage == MESA_SHADER_FRAGMENT) { + switch (wc->slot) { + case VARYING_SLOT_COL0: + case VARYING_SLOT_COL1: + case VARYING_SLOT_BFC0: + case VARYING_SLOT_BFC1: + /* default color is 0,0,0,1 */ + if (intr->def.num_components == 4) + zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3); + break; + default: + break; + } + } + rewrite_mask >>= c; + nir_def *dest = &intr->def; + u_foreach_bit(component, rewrite_mask) + dest = nir_vector_insert_imm(b, dest, nir_channel(b, zero, component), component); + nir_def_rewrite_uses_after(&intr->def, dest, dest->parent_instr); + return true; +} + +static bool +find_max_write_components(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + struct write_components *wc = data; + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + return false; + if (is_input || is_load) + return false; + nir_io_semantics s = nir_intrinsic_io_semantics(intr); + if (wc->slot < s.location || wc->slot >= s.location + s.num_slots) + return false; + unsigned location = s.location; + unsigned c = nir_intrinsic_component(intr); + uint32_t wrmask = nir_intrinsic_write_mask(intr) << c; + if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) { + unsigned num_components = intr->num_components * 2; + nir_src *src_offset = nir_get_io_offset_src(intr); + if (nir_src_is_const(*src_offset)) { + if (location + nir_src_as_uint(*src_offset) != wc->slot && num_components + c < 4) + return false; + } + wrmask |= wrmask << intr->num_components; + /* handle dvec3/dvec4 */ + if (num_components + c > 4) + wrmask >>= 4; + } + wc->component_mask |= wrmask; + return false; +} + void -zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer) +zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer) { unsigned reserved = 0; unsigned char slot_map[VARYING_SLOT_MAX]; memset(slot_map, -1, sizeof(slot_map)); bool do_fixup = false; nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer; + nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ); + if (var) { + bool can_remove = false; + if (!nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) { + /* maintenance5 guarantees "A default size of 1.0 is used if PointSize is not written" */ + if (screen->info.have_KHR_maintenance5 && !var->data.explicit_xfb_buffer && delete_psiz_store(producer, true)) + can_remove = !(producer->info.outputs_written & VARYING_BIT_PSIZ); + else if (consumer->info.stage != MESA_SHADER_FRAGMENT) + can_remove = !var->data.explicit_location; + } + /* remove injected pointsize from all but the last vertex stage */ + if (can_remove) { + var->data.mode = nir_var_shader_temp; + nir_fixup_deref_modes(producer); + delete_psiz_store(producer, false); + NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL); + optimize_nir(producer, NULL, true); + } + } + if (consumer->info.stage != MESA_SHADER_FRAGMENT) { + producer->info.has_transform_feedback_varyings = false; + nir_foreach_shader_out_variable(var_out, producer) + var_out->data.explicit_xfb_buffer = false; + } if (producer->info.stage == MESA_SHADER_TESS_CTRL) { /* never assign from tcs -> tes, always invert */ - nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in) - assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map); - nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) { - if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map)) + nir_foreach_variable_with_modes(var_in, consumer, nir_var_shader_in) + assign_producer_var_io(consumer->info.stage, var_in, &reserved, slot_map); + nir_foreach_variable_with_modes_safe(var_out, producer, nir_var_shader_out) { + if (!assign_consumer_var_io(producer->info.stage, var_out, &reserved, slot_map)) /* this is an output, nothing more needs to be done for it to be dropped */ do_fixup = true; } } else { - nir_foreach_variable_with_modes(var, producer, nir_var_shader_out) - assign_producer_var_io(producer->info.stage, var, &reserved, slot_map); - nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) { - if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) { + nir_foreach_variable_with_modes(var_out, producer, nir_var_shader_out) + assign_producer_var_io(producer->info.stage, var_out, &reserved, slot_map); + nir_foreach_variable_with_modes_safe(var_in, consumer, nir_var_shader_in) { + if (!assign_consumer_var_io(consumer->info.stage, var_in, &reserved, slot_map)) { do_fixup = true; - /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */ - nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var); + /* input needs to be rewritten */ + nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var_in); } } + if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer) + do_fixup |= clamp_layer_output(producer, consumer, &reserved); + } + nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer)); + if (producer->info.io_lowered && consumer->info.io_lowered) { + u_foreach_bit64(slot, producer->info.outputs_written & BITFIELD64_RANGE(VARYING_SLOT_VAR0, 31)) { + struct write_components wc = {slot, 0}; + nir_shader_intrinsics_pass(producer, find_max_write_components, nir_metadata_all, &wc); + assert(wc.component_mask); + if (wc.component_mask != BITFIELD_MASK(4)) + do_fixup |= nir_shader_intrinsics_pass(consumer, fill_zero_reads, nir_metadata_dominance, &wc); + } } if (!do_fixup) return; nir_fixup_deref_modes(nir); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); - optimize_nir(nir); + optimize_nir(nir, NULL, true); +} + +/* all types that hit this function contain something that is 64bit */ +static const struct glsl_type * +rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only) +{ + if (glsl_type_is_array(type)) { + const struct glsl_type *child = glsl_get_array_element(type); + unsigned elements = glsl_array_size(type); + unsigned stride = glsl_get_explicit_stride(type); + return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride); + } + /* rewrite structs recursively */ + if (glsl_type_is_struct_or_ifc(type)) { + unsigned nmembers = glsl_get_length(type); + struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2); + unsigned xfb_offset = 0; + for (unsigned i = 0; i < nmembers; i++) { + const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i); + fields[i] = *f; + xfb_offset += glsl_get_component_slots(fields[i].type) * 4; + if (i < nmembers - 1 && xfb_offset % 8 && + (glsl_contains_double(glsl_get_struct_field(type, i + 1)) || + (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) { + var->data.is_xfb = true; + } + fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only); + } + return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type)); + } + if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only)) + return type; + if (doubles_only && glsl_type_is_vector_or_scalar(type)) + return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type)); + enum glsl_base_type base_type; + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_UINT64: + base_type = GLSL_TYPE_UINT; + break; + case GLSL_TYPE_INT64: + base_type = GLSL_TYPE_INT; + break; + case GLSL_TYPE_DOUBLE: + base_type = GLSL_TYPE_FLOAT; + break; + default: + unreachable("unknown 64-bit vertex attribute format!"); + } + if (glsl_type_is_scalar(type)) + return glsl_vector_type(base_type, 2); + unsigned num_components; + if (glsl_type_is_matrix(type)) { + /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */ + unsigned vec_components = glsl_get_vector_elements(type); + if (vec_components == 3) + vec_components = 4; + num_components = vec_components * 2 * glsl_get_matrix_columns(type); + } else { + num_components = glsl_get_vector_elements(type) * 2; + if (num_components <= 4) + return glsl_vector_type(base_type, num_components); + } + /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */ + struct glsl_struct_field fields[8] = {0}; + unsigned remaining = num_components; + unsigned nfields = 0; + for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) { + assert(i < ARRAY_SIZE(fields)); + fields[i].name = ""; + fields[i].offset = i * 16; + fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining)); + } + char buf[64]; + snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type)); + return glsl_struct_type(fields, nfields, buf, true); } -VkShaderModule -zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key) +static const struct glsl_type * +deref_is_matrix(nir_deref_instr *deref) { - VkShaderModule mod = VK_NULL_HANDLE; - void *streamout = NULL; - nir_shader *nir = nir_shader_clone(NULL, base_nir); + if (glsl_type_is_matrix(deref->type)) + return deref->type; + nir_deref_instr *parent = nir_deref_instr_parent(deref); + if (parent) + return deref_is_matrix(parent); + return NULL; +} +static bool +lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var, + struct hash_table *derefs, struct set *deletes, bool doubles_only) +{ + bool func_progress = false; + nir_builder b = nir_builder_create(impl); + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + switch (instr->type) { + case nir_instr_type_deref: { + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (!(deref->modes & var->data.mode)) + continue; + if (nir_deref_instr_get_variable(deref) != var) + continue; + + /* matrix types are special: store the original deref type for later use */ + const struct glsl_type *matrix = deref_is_matrix(deref); + nir_deref_instr *parent = nir_deref_instr_parent(deref); + if (!matrix) { + /* if this isn't a direct matrix deref, it's maybe a matrix row deref */ + hash_table_foreach(derefs, he) { + /* propagate parent matrix type to row deref */ + if (he->key == parent) + matrix = he->data; + } + } + if (matrix) + _mesa_hash_table_insert(derefs, deref, (void*)matrix); + if (deref->deref_type == nir_deref_type_var) + deref->type = var->type; + else + deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only); + } + break; + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_deref && + intr->intrinsic != nir_intrinsic_load_deref) + break; + if (nir_intrinsic_get_var(intr, 0) != var) + break; + if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) || + (intr->intrinsic == nir_intrinsic_load_deref && intr->def.bit_size != 64)) + break; + b.cursor = nir_before_instr(instr); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + unsigned num_components = intr->num_components * 2; + nir_def *comp[NIR_MAX_VEC_COMPONENTS]; + /* this is the stored matrix type from the deref */ + struct hash_entry *he = _mesa_hash_table_search(derefs, deref); + const struct glsl_type *matrix = he ? he->data : NULL; + if (doubles_only && !matrix) + break; + func_progress = true; + if (intr->intrinsic == nir_intrinsic_store_deref) { + /* first, unpack the src data to 32bit vec2 components */ + for (unsigned i = 0; i < intr->num_components; i++) { + nir_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i)); + comp[i * 2] = nir_channel(&b, ssa, 0); + comp[i * 2 + 1] = nir_channel(&b, ssa, 1); + } + unsigned wrmask = nir_intrinsic_write_mask(intr); + unsigned mask = 0; + /* expand writemask for doubled components */ + for (unsigned i = 0; i < intr->num_components; i++) { + if (wrmask & BITFIELD_BIT(i)) + mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1); + } + if (matrix) { + /* matrix types always come from array (row) derefs */ + assert(deref->deref_type == nir_deref_type_array); + nir_deref_instr *var_deref = nir_deref_instr_parent(deref); + /* let optimization clean up consts later */ + nir_def *index = deref->arr.index.ssa; + /* this might be an indirect array index: + * - iterate over matrix columns + * - add if blocks for each column + * - perform the store in the block + */ + for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) { + nir_push_if(&b, nir_ieq_imm(&b, index, idx)); + unsigned vec_components = glsl_get_vector_elements(matrix); + /* always clamp dvec3 to 4 components */ + if (vec_components == 3) + vec_components = 4; + unsigned start_component = idx * vec_components * 2; + /* struct member */ + unsigned member = start_component / 4; + /* number of components remaining */ + unsigned remaining = num_components; + for (unsigned i = 0; i < num_components; member++) { + if (!(mask & BITFIELD_BIT(i))) + continue; + assert(member < glsl_get_length(var_deref->type)); + /* deref the rewritten struct to the appropriate vec4/vec2 */ + nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member); + unsigned incr = MIN2(remaining, 4); + /* assemble the write component vec */ + nir_def *val = nir_vec(&b, &comp[i], incr); + /* use the number of components being written as the writemask */ + if (glsl_get_vector_elements(strct->type) > val->num_components) + val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type)); + nir_store_deref(&b, strct, val, BITFIELD_MASK(incr)); + remaining -= incr; + i += incr; + } + nir_pop_if(&b, NULL); + } + _mesa_set_add(deletes, &deref->instr); + } else if (num_components <= 4) { + /* simple store case: just write out the components */ + nir_def *dest = nir_vec(&b, comp, num_components); + nir_store_deref(&b, deref, dest, mask); + } else { + /* writing > 4 components: access the struct and write to the appropriate vec4 members */ + for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) { + if (!(mask & BITFIELD_MASK(4))) + continue; + nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i); + nir_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4)); + if (glsl_get_vector_elements(strct->type) > dest->num_components) + dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type)); + nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4)); + mask >>= 4; + } + } + } else { + nir_def *dest = NULL; + if (matrix) { + /* matrix types always come from array (row) derefs */ + assert(deref->deref_type == nir_deref_type_array); + nir_deref_instr *var_deref = nir_deref_instr_parent(deref); + /* let optimization clean up consts later */ + nir_def *index = deref->arr.index.ssa; + /* this might be an indirect array index: + * - iterate over matrix columns + * - add if blocks for each column + * - phi the loads using the array index + */ + unsigned cols = glsl_get_matrix_columns(matrix); + nir_def *dests[4]; + for (unsigned idx = 0; idx < cols; idx++) { + /* don't add an if for the final row: this will be handled in the else */ + if (idx < cols - 1) + nir_push_if(&b, nir_ieq_imm(&b, index, idx)); + unsigned vec_components = glsl_get_vector_elements(matrix); + /* always clamp dvec3 to 4 components */ + if (vec_components == 3) + vec_components = 4; + unsigned start_component = idx * vec_components * 2; + /* struct member */ + unsigned member = start_component / 4; + /* number of components remaining */ + unsigned remaining = num_components; + /* component index */ + unsigned comp_idx = 0; + for (unsigned i = 0; i < num_components; member++) { + assert(member < glsl_get_length(var_deref->type)); + nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member); + nir_def *load = nir_load_deref(&b, strct); + unsigned incr = MIN2(remaining, 4); + /* repack the loads to 64bit */ + for (unsigned c = 0; c < incr / 2; c++, comp_idx++) + comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2))); + remaining -= incr; + i += incr; + } + dest = dests[idx] = nir_vec(&b, comp, intr->num_components); + if (idx < cols - 1) + nir_push_else(&b, NULL); + } + /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */ + for (unsigned idx = cols - 1; idx >= 1; idx--) { + nir_pop_if(&b, NULL); + dest = nir_if_phi(&b, dests[idx - 1], dest); + } + _mesa_set_add(deletes, &deref->instr); + } else if (num_components <= 4) { + /* simple load case */ + nir_def *load = nir_load_deref(&b, deref); + /* pack 32bit loads into 64bit: this will automagically get optimized out later */ + for (unsigned i = 0; i < intr->num_components; i++) { + comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2))); + } + dest = nir_vec(&b, comp, intr->num_components); + } else { + /* writing > 4 components: access the struct and load the appropriate vec4 members */ + for (unsigned i = 0; i < 2; i++, num_components -= 4) { + nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i); + nir_def *load = nir_load_deref(&b, strct); + comp[i * 2] = nir_pack_64_2x32(&b, + nir_trim_vector(&b, load, 2)); + if (num_components > 2) + comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2))); + } + dest = nir_vec(&b, comp, intr->num_components); + } + nir_def_rewrite_uses_after(&intr->def, dest, instr); + } + _mesa_set_add(deletes, instr); + break; + } + break; + default: break; + } + } + } + if (func_progress) + nir_metadata_preserve(impl, nir_metadata_none); + /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */ + set_foreach_remove(deletes, he) + nir_instr_remove((void*)he->key); + return func_progress; +} + +static bool +lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs, + struct set *deletes, bool doubles_only) +{ + if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type))) + return false; + var->type = rewrite_64bit_type(shader, var->type, var, doubles_only); + /* once type is rewritten, rewrite all loads and stores */ + nir_foreach_function_impl(impl, shader) + lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only); + return true; +} + +/* rewrite all input/output variables using 32bit types and load/stores */ +static bool +lower_64bit_vars(nir_shader *shader, bool doubles_only) +{ + bool progress = false; + struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + nir_foreach_function_impl(impl, shader) { + nir_foreach_function_temp_variable(var, impl) { + if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type))) + continue; + var->type = rewrite_64bit_type(shader, var->type, var, doubles_only); + progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only); + } + } + ralloc_free(deletes); + ralloc_free(derefs); + if (progress) { + nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL); + nir_lower_phis_to_scalar(shader, false); + optimize_nir(shader, NULL, true); + } + return progress; +} + +static void +zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file) +{ + FILE *fp = fopen(file, "wb"); + if (fp) { + fwrite(words, 1, size, fp); + fclose(fp); + fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file); + } +} + +static VkShaderStageFlagBits +zink_get_next_stage(gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | + VK_SHADER_STAGE_GEOMETRY_BIT | + VK_SHADER_STAGE_FRAGMENT_BIT; + case MESA_SHADER_TESS_CTRL: + return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + case MESA_SHADER_TESS_EVAL: + return VK_SHADER_STAGE_GEOMETRY_BIT | + VK_SHADER_STAGE_FRAGMENT_BIT; + case MESA_SHADER_GEOMETRY: + return VK_SHADER_STAGE_FRAGMENT_BIT; + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: + return 0; + default: + unreachable("invalid shader stage"); + } +} + +struct zink_shader_object +zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg) +{ + VkShaderModuleCreateInfo smci = {0}; + VkShaderCreateInfoEXT sci = {0}; + + if (!spirv) + spirv = zs->spirv; + + if (zink_debug & ZINK_DEBUG_SPIRV) { + char buf[256]; + static int i; + snprintf(buf, sizeof(buf), "dump%02d.spv", i++); + zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf); + } + + sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT; + sci.stage = mesa_to_vk_shader_stage(zs->info.stage); + sci.nextStage = zink_get_next_stage(zs->info.stage); + sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT; + sci.codeSize = spirv->num_words * sizeof(uint32_t); + sci.pCode = spirv->words; + sci.pName = "main"; + VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0}; + if (pg) { + sci.setLayoutCount = pg->num_dsl; + sci.pSetLayouts = pg->dsl; + } else { + sci.setLayoutCount = zs->info.stage + 1; + dsl[zs->info.stage] = zs->precompile.dsl;; + sci.pSetLayouts = dsl; + } + VkPushConstantRange pcr; + pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; + pcr.offset = 0; + pcr.size = sizeof(struct zink_gfx_push_constant); + sci.pushConstantRangeCount = 1; + sci.pPushConstantRanges = &pcr; + + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = spirv->num_words * sizeof(uint32_t); + smci.pCode = spirv->words; + +#ifndef NDEBUG + if (zink_debug & ZINK_DEBUG_VALIDATION) { + static const struct spirv_to_nir_options spirv_options = { + .environment = NIR_SPIRV_VULKAN, + .caps = { + .float64 = true, + .int16 = true, + .int64 = true, + .tessellation = true, + .float_controls = true, + .image_ms_array = true, + .image_read_without_format = true, + .image_write_without_format = true, + .storage_image_ms = true, + .geometry_streams = true, + .storage_8bit = true, + .storage_16bit = true, + .variable_pointers = true, + .stencil_export = true, + .post_depth_coverage = true, + .transform_feedback = true, + .device_group = true, + .draw_parameters = true, + .shader_viewport_index_layer = true, + .multiview = true, + .physical_storage_buffer_address = true, + .int64_atomics = true, + .subgroup_arithmetic = true, + .subgroup_basic = true, + .subgroup_ballot = true, + .subgroup_quad = true, + .subgroup_shuffle = true, + .subgroup_vote = true, + .vk_memory_model = true, + .vk_memory_model_device_scope = true, + .int8 = true, + .float16 = true, + .demote_to_helper_invocation = true, + .sparse_residency = true, + .min_lod = true, + .workgroup_memory_explicit_layout = true, + }, + .ubo_addr_format = nir_address_format_32bit_index_offset, + .ssbo_addr_format = nir_address_format_32bit_index_offset, + .phys_ssbo_addr_format = nir_address_format_64bit_global, + .push_const_addr_format = nir_address_format_logical, + .shared_addr_format = nir_address_format_32bit_offset, + }; + uint32_t num_spec_entries = 0; + struct nir_spirv_specialization *spec_entries = NULL; + VkSpecializationInfo sinfo = {0}; + VkSpecializationMapEntry me[3]; + uint32_t size[3] = {1,1,1}; + if (!zs->info.workgroup_size[0]) { + sinfo.mapEntryCount = 3; + sinfo.pMapEntries = &me[0]; + sinfo.dataSize = sizeof(uint32_t) * 3; + sinfo.pData = size; + uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z}; + for (int i = 0; i < 3; i++) { + me[i].size = sizeof(uint32_t); + me[i].constantID = ids[i]; + me[i].offset = i * sizeof(uint32_t); + } + spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries); + } + nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words, + spec_entries, num_spec_entries, + clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options); + assert(nir); + ralloc_free(nir); + free(spec_entries); + } +#endif + + VkResult ret; + struct zink_shader_object obj = {0}; + if (!can_shobj || !screen->info.have_EXT_shader_object) + ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod); + else + ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj); + ASSERTED bool success = zink_screen_handle_vkresult(screen, ret); + assert(success); + return obj; +} + +static void +prune_io(nir_shader *nir) +{ + nir_foreach_shader_in_variable_safe(var, nir) { + if (!find_var_deref(nir, var) && !find_var_io(nir, var)) + var->data.mode = nir_var_shader_temp; + } + nir_foreach_shader_out_variable_safe(var, nir) { + if (!find_var_deref(nir, var) && !find_var_io(nir, var)) + var->data.mode = nir_var_shader_temp; + } + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); +} + +static void +flag_shadow_tex(nir_variable *var, struct zink_shader *zs) +{ + /* unconvert from zink_binding() */ + uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT); + assert(sampler_id < 32); //bitfield size for tracking + zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id); +} + +static nir_def * +rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs) +{ + assert(var); + const struct glsl_type *type = glsl_without_array(var->type); + enum glsl_base_type ret_type = glsl_get_sampler_result_type(type); + bool is_int = glsl_base_type_is_integer(ret_type); + unsigned bit_size = glsl_base_type_get_bit_size(ret_type); + unsigned dest_size = tex->def.bit_size; + b->cursor = nir_after_instr(&tex->instr); + unsigned num_components = tex->def.num_components; + bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse; + if (bit_size == dest_size && !rewrite_depth) + return NULL; + nir_def *dest = &tex->def; + if (rewrite_depth && zs) { + if (nir_def_components_read(dest) & ~1) { + /* this needs recompiles */ + if (b->shader->info.stage == MESA_SHADER_FRAGMENT) + flag_shadow_tex(var, zs); + else + mesa_loge("unhandled old-style shadow sampler in non-fragment stage!"); + return NULL; + } + /* If only .x is used in the NIR, then it's effectively not a legacy depth + * sample anyway and we don't want to ask for shader recompiles. This is + * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or + * LUMINANCE, so apps just use the first channel. + */ + tex->def.num_components = 1; + tex->is_new_style_shadow = true; + } + if (bit_size != dest_size) { + tex->def.bit_size = bit_size; + tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type); + + if (is_int) { + if (glsl_unsigned_base_type_of(ret_type) == ret_type) + dest = nir_u2uN(b, &tex->def, dest_size); + else + dest = nir_i2iN(b, &tex->def, dest_size); + } else { + dest = nir_f2fN(b, &tex->def, dest_size); + } + if (rewrite_depth) + return dest; + nir_def_rewrite_uses_after(&tex->def, dest, dest->parent_instr); + } else if (rewrite_depth) { + return dest; + } + return dest; +} + +struct lower_zs_swizzle_state { + bool shadow_only; + unsigned base_sampler_id; + const struct zink_zs_swizzle_key *swizzle; +}; + +static bool +lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data) +{ + struct lower_zs_swizzle_state *state = data; + const struct zink_zs_swizzle_key *swizzle_key = state->swizzle; + assert(state->shadow_only || swizzle_key); + if (instr->type != nir_instr_type_tex) + return false; + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (tex->op == nir_texop_txs || tex->op == nir_texop_lod || + (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow) + return false; + if (tex->is_shadow && tex->op == nir_texop_tg4) + /* Will not even try to emulate the shadow comparison */ + return false; + int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle); + nir_variable *var = NULL; + if (handle != -1) + /* gtfo bindless depth texture mode */ + return false; + nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) { + if (glsl_type_is_sampler(glsl_without_array(img->type))) { + unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1; + if (tex->texture_index >= img->data.driver_location && + tex->texture_index < img->data.driver_location + size) { + var = img; + break; + } + } + } + assert(var); + uint32_t sampler_id = var->data.binding - state->base_sampler_id; + const struct glsl_type *type = glsl_without_array(var->type); + enum glsl_base_type ret_type = glsl_get_sampler_result_type(type); + bool is_int = glsl_base_type_is_integer(ret_type); + unsigned num_components = tex->def.num_components; + if (tex->is_shadow) + tex->is_new_style_shadow = true; + nir_def *dest = rewrite_tex_dest(b, tex, var, NULL); + assert(dest || !state->shadow_only); + if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id))) + return false; + else if (!dest) + dest = &tex->def; + else + tex->def.num_components = 1; + if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) { + /* these require manual swizzles */ + if (tex->op == nir_texop_tg4) { + assert(!tex->is_shadow); + nir_def *swizzle; + switch (swizzle_key->swizzle[sampler_id].s[tex->component]) { + case PIPE_SWIZZLE_0: + swizzle = nir_imm_zero(b, 4, tex->def.bit_size); + break; + case PIPE_SWIZZLE_1: + if (is_int) + swizzle = nir_imm_intN_t(b, 4, tex->def.bit_size); + else + swizzle = nir_imm_floatN_t(b, 4, tex->def.bit_size); + break; + default: + if (!tex->component) + return false; + tex->component = 0; + return true; + } + nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr); + return true; + } + nir_def *vec[4]; + for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) { + switch (swizzle_key->swizzle[sampler_id].s[i]) { + case PIPE_SWIZZLE_0: + vec[i] = nir_imm_zero(b, 1, tex->def.bit_size); + break; + case PIPE_SWIZZLE_1: + if (is_int) + vec[i] = nir_imm_intN_t(b, 1, tex->def.bit_size); + else + vec[i] = nir_imm_floatN_t(b, 1, tex->def.bit_size); + break; + default: + vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i); + break; + } + } + nir_def *swizzle = nir_vec(b, vec, num_components); + nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr); + } else { + assert(tex->is_shadow); + nir_def *vec[4] = {dest, dest, dest, dest}; + nir_def *splat = nir_vec(b, vec, num_components); + nir_def_rewrite_uses_after(dest, splat, splat->parent_instr); + } + return true; +} + +/* Applies in-shader swizzles when necessary for depth/shadow sampling. + * + * SPIRV only has new-style (scalar result) shadow sampling, so to emulate + * !is_new_style_shadow (vec4 result) shadow sampling we lower to a + * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR + * shader to expand out to vec4. Since this depends on sampler state, it's a + * draw-time shader recompile to do so. + * + * We may also need to apply shader swizzles for + * driver_workarounds.needs_zs_shader_swizzle. + */ +static bool +lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only) +{ + /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */ + unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage; + struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle}; + return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state); +} + +static bool +invert_point_coord_instr(nir_builder *b, nir_intrinsic_instr *intr, + void *data) +{ + if (intr->intrinsic != nir_intrinsic_load_point_coord) + return false; + b->cursor = nir_after_instr(&intr->instr); + nir_def *def = nir_vec2(b, nir_channel(b, &intr->def, 0), + nir_fsub_imm(b, 1.0, nir_channel(b, &intr->def, 1))); + nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr); + return true; +} + +static bool +invert_point_coord(nir_shader *nir) +{ + if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD)) + return false; + return nir_shader_intrinsics_pass(nir, invert_point_coord_instr, + nir_metadata_dominance, NULL); +} + +static bool +lower_sparse_instr(nir_builder *b, nir_instr *instr, void *data) +{ + b->cursor = nir_after_instr(instr); + + switch (instr->type) { + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (!tex->is_sparse) + return false; + + nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &tex->def)); + nir_def *vec = nir_vector_insert_imm(b, &tex->def, res, + tex->def.num_components - 1); + nir_def_rewrite_uses_after(&tex->def, vec, vec->parent_instr); + return true; + } + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_image_deref_sparse_load: { + nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &intrin->def)); + nir_def *vec = nir_vector_insert_imm(b, &intrin->def, res, 4); + nir_def_rewrite_uses_after(&intrin->def, vec, vec->parent_instr); + return true; + } + + case nir_intrinsic_sparse_residency_code_and: { + nir_def *res = nir_iand(b, intrin->src[0].ssa, intrin->src[1].ssa); + nir_def_rewrite_uses(&intrin->def, res); + return true; + } + + case nir_intrinsic_is_sparse_texels_resident: { + nir_def *res = nir_i2b(b, intrin->src[0].ssa); + nir_def_rewrite_uses(&intrin->def, res); + return true; + } + + default: + return false; + } + } + + default: + return false; + } +} + +static bool +lower_sparse(nir_shader *shader) +{ + return nir_shader_instructions_pass(shader, lower_sparse_instr, + nir_metadata_dominance, NULL); +} + +static bool +add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + return false; + bool is_special_io = (b->shader->info.stage == MESA_SHADER_VERTEX && is_input) || + (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_input); + unsigned loc = nir_intrinsic_io_semantics(intr).location; + nir_src *src_offset = nir_get_io_offset_src(intr); + const unsigned slot_offset = src_offset && nir_src_is_const(*src_offset) ? nir_src_as_uint(*src_offset) : 0; + unsigned location = loc + slot_offset; + unsigned frac = nir_intrinsic_component(intr); + unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]); + /* set c aligned/rounded down to dword */ + unsigned c = frac; + if (frac && bit_size < 32) + c = frac * bit_size / 32; + /* loop over all the variables and rewrite corresponding access */ + nir_foreach_variable_with_modes(var, b->shader, is_input ? nir_var_shader_in : nir_var_shader_out) { + const struct glsl_type *type = var->type; + if (nir_is_arrayed_io(var, b->shader->info.stage)) + type = glsl_get_array_element(type); + unsigned slot_count = get_var_slot_count(b->shader, var); + /* filter access that isn't specific to this variable */ + if (var->data.location > location || var->data.location + slot_count <= location) + continue; + if (var->data.fb_fetch_output != nir_intrinsic_io_semantics(intr).fb_fetch_output) + continue; + if (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_load && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index) + continue; + + unsigned size = 0; + bool is_struct = glsl_type_is_struct(glsl_without_array(type)); + if (is_struct) + size = get_slot_components(var, var->data.location + slot_offset, var->data.location); + else if (!is_special_io && var->data.compact) + size = glsl_get_aoa_size(type); + else + size = glsl_get_vector_elements(glsl_without_array(type)); + assert(size); + if (glsl_type_is_64bit(glsl_without_array(var->type))) + size *= 2; + if (var->data.location != location && size > 4 && size % 4 && !is_struct) { + /* adjust for dvec3-type slot overflow */ + assert(location > var->data.location); + size -= (location - var->data.location) * 4; + } + assert(size); + if (var->data.location_frac + size <= c || var->data.location_frac > c) + continue; + + b->cursor = nir_before_instr(&intr->instr); + nir_deref_instr *deref = nir_build_deref_var(b, var); + if (nir_is_arrayed_io(var, b->shader->info.stage)) { + assert(intr->intrinsic != nir_intrinsic_store_output); + deref = nir_build_deref_array(b, deref, intr->src[!is_load].ssa); + } + if (glsl_type_is_array(type)) { + /* unroll array derefs */ + unsigned idx = var->data.compact ? (frac - var->data.location_frac) : 0; + assert(src_offset); + if (var->data.location < VARYING_SLOT_VAR0) { + if (src_offset) { + /* clip/cull dist and tess levels use different array offset semantics */ + bool is_clipdist = (b->shader->info.stage != MESA_SHADER_VERTEX || var->data.mode == nir_var_shader_out) && + is_clipcull_dist(var->data.location); + bool is_tess_level = b->shader->info.stage == MESA_SHADER_TESS_CTRL && + var->data.location >= VARYING_SLOT_TESS_LEVEL_INNER && var->data.location >= VARYING_SLOT_TESS_LEVEL_OUTER; + bool is_builtin_array = is_clipdist || is_tess_level; + /* this is explicit for ease of debugging but could be collapsed at some point in the future*/ + if (nir_src_is_const(*src_offset)) { + unsigned offset = slot_offset; + if (is_builtin_array) + offset *= 4; + if (is_clipdist) { + if (loc == VARYING_SLOT_CLIP_DIST1 || loc == VARYING_SLOT_CULL_DIST1) + offset += 4; + } + deref = nir_build_deref_array_imm(b, deref, offset + idx); + } else { + nir_def *offset = src_offset->ssa; + if (is_builtin_array) + nir_imul_imm(b, offset, 4); + deref = nir_build_deref_array(b, deref, idx ? nir_iadd_imm(b, offset, idx) : src_offset->ssa); + } + } else { + deref = nir_build_deref_array_imm(b, deref, idx); + } + type = glsl_get_array_element(type); + } else { + idx += location - var->data.location; + /* need to convert possible N*M to [N][M] */ + nir_def *nm = idx ? nir_iadd_imm(b, src_offset->ssa, idx) : src_offset->ssa; + while (glsl_type_is_array(type)) { + const struct glsl_type *elem = glsl_get_array_element(type); + unsigned type_size = glsl_count_vec4_slots(elem, false, false); + nir_def *n = glsl_type_is_array(elem) ? nir_udiv_imm(b, nm, type_size) : nm; + if (glsl_type_is_vector_or_scalar(elem) && glsl_type_is_64bit(elem) && glsl_get_vector_elements(elem) > 2) + n = nir_udiv_imm(b, n, 2); + deref = nir_build_deref_array(b, deref, n); + nm = nir_umod_imm(b, nm, type_size); + type = glsl_get_array_element(type); + } + } + } else if (glsl_type_is_struct(type)) { + deref = nir_build_deref_struct(b, deref, slot_offset); + } + assert(!glsl_type_is_array(type)); + unsigned num_components = glsl_get_vector_elements(type); + if (is_load) { + nir_def *load; + if (is_interp) { + nir_def *interp = intr->src[0].ssa; + nir_intrinsic_instr *interp_intr = nir_instr_as_intrinsic(interp->parent_instr); + assert(interp_intr); + var->data.interpolation = nir_intrinsic_interp_mode(interp_intr); + switch (interp_intr->intrinsic) { + case nir_intrinsic_load_barycentric_centroid: + load = nir_interp_deref_at_centroid(b, num_components, bit_size, &deref->def); + break; + case nir_intrinsic_load_barycentric_sample: + var->data.sample = 1; + load = nir_load_deref(b, deref); + break; + case nir_intrinsic_load_barycentric_pixel: + load = nir_load_deref(b, deref); + break; + case nir_intrinsic_load_barycentric_at_sample: + load = nir_interp_deref_at_sample(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa); + break; + case nir_intrinsic_load_barycentric_at_offset: + load = nir_interp_deref_at_offset(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa); + break; + default: + unreachable("unhandled interp!"); + } + } else { + load = nir_load_deref(b, deref); + } + /* filter needed components */ + if (intr->num_components < load->num_components) + load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << (c - var->data.location_frac)); + nir_def_rewrite_uses(&intr->def, load); + } else { + nir_def *store = intr->src[0].ssa; + /* pad/filter components to match deref type */ + if (intr->num_components < num_components) { + nir_def *zero = nir_imm_zero(b, 1, bit_size); + nir_def *vec[4] = {zero, zero, zero, zero}; + u_foreach_bit(i, nir_intrinsic_write_mask(intr)) + vec[c - var->data.location_frac + i] = nir_channel(b, store, i); + store = nir_vec(b, vec, num_components); + } if (store->num_components > num_components) { + store = nir_channels(b, store, nir_intrinsic_write_mask(intr)); + } + if (store->bit_size != glsl_get_bit_size(type)) { + /* this should be some weird bindless io conversion */ + assert(store->bit_size == 64 && glsl_get_bit_size(type) == 32); + assert(num_components != store->num_components); + store = nir_unpack_64_2x32(b, store); + } + nir_store_deref(b, deref, store, BITFIELD_RANGE(c - var->data.location_frac, intr->num_components)); + } + nir_instr_remove(&intr->instr); + return true; + } + unreachable("failed to find variable for explicit io!"); + return true; +} + +static bool +add_derefs(nir_shader *nir) +{ + return nir_shader_intrinsics_pass(nir, add_derefs_instr, + nir_metadata_dominance, NULL); +} + +static struct zink_shader_object +compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg) +{ + struct zink_shader_info *sinfo = &zs->sinfo; + prune_io(nir); + + switch (nir->info.stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + NIR_PASS_V(nir, nir_divergence_analysis); + break; + default: break; + } + NIR_PASS_V(nir, nir_convert_from_ssa, true); + + if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV)) + nir_index_ssa_defs(nir_shader_get_entrypoint(nir)); + if (zink_debug & ZINK_DEBUG_NIR) { + fprintf(stderr, "NIR shader:\n---8<---\n"); + nir_print_shader(nir, stderr); + fprintf(stderr, "---8<---\n"); + } + + struct zink_shader_object obj = {0}; + struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version); + if (spirv) + obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg); + + /* TODO: determine if there's any reason to cache spirv output? */ + if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) + zs->spirv = spirv; + else + obj.spirv = spirv; + return obj; +} + +static bool +remove_interpolate_at_sample(struct nir_builder *b, nir_intrinsic_instr *interp, void *data) +{ + if (interp->intrinsic != nir_intrinsic_interp_deref_at_sample) + return false; + + b->cursor = nir_before_instr(&interp->instr); + nir_def *res = nir_load_deref(b, nir_src_as_deref(interp->src[0])); + nir_def_rewrite_uses(&interp->def, res); + + return true; +} + +struct zink_shader_object +zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, + nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg) +{ + bool need_optimize = true; + bool inlined_uniforms = false; + + NIR_PASS_V(nir, add_derefs); + NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8); if (key) { if (key->inline_uniforms) { NIR_PASS_V(nir, nir_inline_uniforms, @@ -795,54 +4010,101 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad key->base.inlined_uniform_values, nir->info.inlinable_uniform_dw_offsets); - optimize_nir(nir); - - /* This must be done again. */ - NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | - nir_var_shader_out); + inlined_uniforms = true; } /* TODO: use a separate mem ctx here for ralloc */ - switch (zs->nir->info.stage) { - case MESA_SHADER_VERTEX: { - uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0; - const struct zink_vs_key *vs_key = zink_vs_key(key); - switch (vs_key->size) { - case 4: - decomposed_attrs = vs_key->u32.decomposed_attrs; - decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w; + + if (!screen->optimal_keys) { + switch (zs->info.stage) { + case MESA_SHADER_VERTEX: { + uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0; + const struct zink_vs_key *vs_key = zink_vs_key(key); + switch (vs_key->size) { + case 4: + decomposed_attrs = vs_key->u32.decomposed_attrs; + decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w; + break; + case 2: + decomposed_attrs = vs_key->u16.decomposed_attrs; + decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w; + break; + case 1: + decomposed_attrs = vs_key->u8.decomposed_attrs; + decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w; + break; + default: break; + } + if (decomposed_attrs || decomposed_attrs_without_w) + NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w); break; - case 2: - decomposed_attrs = vs_key->u16.decomposed_attrs; - decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w; + } + + case MESA_SHADER_GEOMETRY: + if (zink_gs_key(key)->lower_line_stipple) { + NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular); + NIR_PASS_V(nir, nir_lower_var_copies); + need_optimize = true; + } + + if (zink_gs_key(key)->lower_line_smooth) { + NIR_PASS_V(nir, lower_line_smooth_gs); + NIR_PASS_V(nir, nir_lower_var_copies); + need_optimize = true; + } + + if (zink_gs_key(key)->lower_gl_point) { + NIR_PASS_V(nir, lower_gl_point_gs); + need_optimize = true; + } + + if (zink_gs_key(key)->lower_pv_mode) { + NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode); + need_optimize = true; //TODO verify that this is required + } break; - case 1: - decomposed_attrs = vs_key->u8.decomposed_attrs; - decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w; + + default: break; - default: break; } - if (decomposed_attrs || decomposed_attrs_without_w) - NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w); - FALLTHROUGH; } + + switch (zs->info.stage) { + case MESA_SHADER_VERTEX: case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: if (zink_vs_key_base(key)->last_vertex_stage) { - if (zs->streamout.have_xfb) - streamout = &zs->streamout; - - if (!zink_vs_key_base(key)->clip_halfz) { + if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) { NIR_PASS_V(nir, nir_lower_clip_halfz); } if (zink_vs_key_base(key)->push_drawid) { NIR_PASS_V(nir, lower_drawid); } + } else { + nir->xfb_info = NULL; } + if (zink_vs_key_base(key)->robust_access) + NIR_PASS(need_optimize, nir, lower_txf_lod_robustness); break; case MESA_SHADER_FRAGMENT: - if (!zink_fs_key(key)->samples && - nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) { + if (zink_fs_key(key)->lower_line_smooth) { + NIR_PASS_V(nir, lower_line_smooth_fs, + zink_fs_key(key)->lower_line_stipple); + need_optimize = true; + } else if (zink_fs_key(key)->lower_line_stipple) + NIR_PASS_V(nir, lower_line_stipple_fs); + + if (zink_fs_key(key)->lower_point_smooth) { + NIR_PASS_V(nir, nir_lower_point_smooth); + NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf); + nir->info.fs.uses_discard = true; + need_optimize = true; + } + + if (zink_fs_key(key)->robust_access) + NIR_PASS(need_optimize, nir, lower_txf_lod_robustness); + + if (!zink_fs_key_base(key)->samples && zink_shader_uses_samples(zs)) { /* VK will always use gl_SampleMask[] values even if sample count is 0, * so we need to skip this write here to mimic GL's behavior of ignoring it */ @@ -852,73 +4114,158 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad } nir_fixup_deref_modes(nir); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); - optimize_nir(nir); + NIR_PASS_V(nir, nir_shader_intrinsics_pass, remove_interpolate_at_sample, + nir_metadata_dominance | nir_metadata_block_index, NULL); + + need_optimize = true; } - if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) { + if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) { NIR_PASS_V(nir, lower_dual_blend); } - if (zink_fs_key(key)->coord_replace_bits) { - NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits, - false, zink_fs_key(key)->coord_replace_yinvert); + if (zink_fs_key_base(key)->coord_replace_bits) + NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false); + if (zink_fs_key_base(key)->point_coord_yinvert) + NIR_PASS_V(nir, invert_point_coord); + if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) { + nir_foreach_shader_in_variable(var, nir) + var->data.sample = true; + nir->info.fs.uses_sample_qualifier = true; + nir->info.fs.uses_sample_shading = true; } + if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle) + NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true); if (nir->info.fs.uses_fbfetch_output) { nir_variable *fbfetch = NULL; - NIR_PASS_V(nir, lower_fbfetch, &fbfetch); + NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms); /* old variable must be deleted to avoid spirv errors */ fbfetch->data.mode = nir_var_shader_temp; nir_fixup_deref_modes(nir); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); - optimize_nir(nir); + need_optimize = true; } + nir_foreach_shader_in_variable_safe(var, nir) { + if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1) + continue; + nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var); + var->data.mode = nir_var_shader_temp; + nir_fixup_deref_modes(nir); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); + need_optimize = true; + } + break; + case MESA_SHADER_COMPUTE: + if (zink_cs_key(key)->robust_access) + NIR_PASS(need_optimize, nir, lower_txf_lod_robustness); break; default: break; } + if (key->base.needs_zs_shader_swizzle) { + assert(extra_data); + NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false); + } + if (key->base.nonseamless_cube_mask) { + NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask); + need_optimize = true; + } } - NIR_PASS_V(nir, nir_convert_from_ssa, true); - - struct spirv_shader *spirv = nir_to_spirv(nir, streamout, screen->spirv_version); - if (!spirv) - goto done; + if (screen->driconf.inline_uniforms) { + NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL); + NIR_PASS_V(nir, rewrite_bo_access, screen); + NIR_PASS_V(nir, remove_bo_access, zs); + need_optimize = true; + } + if (inlined_uniforms) { + optimize_nir(nir, zs, true); + + /* This must be done again. */ + NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | + nir_var_shader_out); + + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT) + zs->can_inline = false; + } else if (need_optimize) + optimize_nir(nir, zs, true); + bool has_sparse = false; + NIR_PASS(has_sparse, nir, lower_sparse); + if (has_sparse) + optimize_nir(nir, zs, false); + + struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg); + ralloc_free(nir); + return obj; +} - if (zink_debug & ZINK_DEBUG_SPIRV) { - char buf[256]; - static int i; - snprintf(buf, sizeof(buf), "dump%02d.spv", i++); - FILE *fp = fopen(buf, "wb"); - if (fp) { - fwrite(spirv->words, sizeof(uint32_t), spirv->num_words, fp); - fclose(fp); - fprintf(stderr, "wrote '%s'...\n", buf); +struct zink_shader_object +zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs) +{ + nir_shader *nir = zink_shader_deserialize(screen, zs); + /* TODO: maybe compile multiple variants for different set counts for compact mode? */ + int set = zs->info.stage == MESA_SHADER_FRAGMENT; + if (screen->info.have_EXT_shader_object) + set = zs->info.stage; + unsigned offsets[4]; + zink_descriptor_shader_get_binding_offsets(zs, offsets); + nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) { + if (var->data.descriptor_set == screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]) + continue; + var->data.descriptor_set = set; + switch (var->data.mode) { + case nir_var_mem_ubo: + var->data.binding = !!var->data.driver_location; + break; + case nir_var_uniform: + if (glsl_type_is_sampler(glsl_without_array(var->type))) + var->data.binding += offsets[1]; + break; + case nir_var_mem_ssbo: + var->data.binding += offsets[2]; + break; + case nir_var_image: + var->data.binding += offsets[3]; + break; + default: break; + } + } + NIR_PASS_V(nir, add_derefs); + NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8); + if (screen->driconf.inline_uniforms) { + NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL); + NIR_PASS_V(nir, rewrite_bo_access, screen); + NIR_PASS_V(nir, remove_bo_access, zs); + } + optimize_nir(nir, zs, true); + zink_descriptor_shader_init(screen, zs); + nir_shader *nir_clone = NULL; + if (screen->info.have_EXT_shader_object) + nir_clone = nir_shader_clone(nir, nir); + struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL); + if (screen->info.have_EXT_shader_object && !zs->info.internal) { + /* always try to pre-generate a tcs in case it's needed */ + if (zs->info.stage == MESA_SHADER_TESS_EVAL) { + nir_shader *nir_tcs = NULL; + /* use max pcp for compat */ + zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir_clone, 32, &nir_tcs); + nir_tcs->info.separate_shader = true; + zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs); + ralloc_free(nir_tcs); } } - - VkShaderModuleCreateInfo smci = {0}; - smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - smci.codeSize = spirv->num_words * sizeof(uint32_t); - smci.pCode = spirv->words; - - if (VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod) != VK_SUCCESS) - mod = VK_NULL_HANDLE; - -done: ralloc_free(nir); - - /* TODO: determine if there's any reason to cache spirv output? */ - ralloc_free(spirv); - return mod; + spirv_shader_delete(obj.spirv); + obj.spirv = NULL; + return obj; } static bool -lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data) +lower_baseinstance_instr(nir_builder *b, nir_intrinsic_instr *intr, + void *data) { - if (instr->type != nir_instr_type_intrinsic) - return false; - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); if (intr->intrinsic != nir_intrinsic_load_instance_id) return false; - b->cursor = nir_after_instr(instr); - nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b)); - nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr); + b->cursor = nir_after_instr(&intr->instr); + nir_def *def = nir_isub(b, &intr->def, nir_load_base_instance(b)); + nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr); return true; } @@ -927,146 +4274,379 @@ lower_baseinstance(nir_shader *shader) { if (shader->info.stage != MESA_SHADER_VERTEX) return false; - return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL); + return nir_shader_intrinsics_pass(shader, lower_baseinstance_instr, + nir_metadata_dominance, NULL); } -bool nir_lower_dynamic_bo_access(nir_shader *shader); - /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access * so instead we delete all those broken variables and just make new ones */ static bool -unbreak_bos(nir_shader *shader) +unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size) { - uint32_t ssbo_used = 0; - uint32_t ubo_used = 0; uint64_t max_ssbo_size = 0; uint64_t max_ubo_size = 0; - bool ssbo_sizes[PIPE_MAX_SHADER_BUFFERS] = {false}; + uint64_t max_uniform_size = 0; - if (!shader->info.num_ssbos && !shader->info.num_ubos && !shader->num_uniforms) + if (!shader->info.num_ssbos && !shader->info.num_ubos) return false; + + nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) { + const struct glsl_type *type = glsl_without_array(var->type); + if (type_is_counter(type)) + continue; + /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */ + unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false); + const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL; + if (interface_type) { + unsigned block_size = glsl_get_explicit_size(interface_type, true); + if (glsl_get_length(interface_type) == 1) { + /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */ + const struct glsl_type *f = glsl_get_struct_field(interface_type, 0); + if (glsl_type_is_array(f) && !glsl_array_size(f)) + block_size = 0; + } + if (block_size) { + block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4); + size = MAX2(size, block_size); + } + } + if (var->data.mode == nir_var_mem_ubo) { + if (var->data.driver_location) + max_ubo_size = MAX2(max_ubo_size, size); + else + max_uniform_size = MAX2(max_uniform_size, size); + } else { + max_ssbo_size = MAX2(max_ssbo_size, size); + if (interface_type) { + if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1))) + needs_size = true; + } + } + var->data.mode = nir_var_shader_temp; + } + nir_fixup_deref_modes(shader); + NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL); + optimize_nir(shader, NULL, true); + + struct glsl_struct_field field = {0}; + field.name = ralloc_strdup(shader, "base"); + if (shader->info.num_ubos) { + if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) { + field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4); + nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, + glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0), + "uniform_0@32"); + var->interface_type = var->type; + var->data.mode = nir_var_mem_ubo; + var->data.driver_location = 0; + } + + unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo; + uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0); + if (num_ubos && ubos_used) { + field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4); + /* shrink array as much as possible */ + unsigned first_ubo = ffs(ubos_used) - 2; + assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS); + num_ubos -= first_ubo; + assert(num_ubos); + nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, + glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0), + "ubos@32"); + var->interface_type = var->type; + var->data.mode = nir_var_mem_ubo; + var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo; + } + } + if (shader->info.num_ssbos && zs->ssbos_used) { + /* shrink array as much as possible */ + unsigned first_ssbo = ffs(zs->ssbos_used) - 1; + assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS); + unsigned num_ssbos = shader->info.num_ssbos - first_ssbo; + assert(num_ssbos); + const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4); + field.type = ssbo_type; + nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo, + glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0), + "ssbos@32"); + var->interface_type = var->type; + var->data.mode = nir_var_mem_ssbo; + var->data.driver_location = first_ssbo; + } + return true; +} + +static uint32_t +get_src_mask_ssbo(unsigned total, nir_src src) +{ + if (nir_src_is_const(src)) + return BITFIELD_BIT(nir_src_as_uint(src)); + return BITFIELD_MASK(total); +} + +static uint32_t +get_src_mask_ubo(unsigned total, nir_src src) +{ + if (nir_src_is_const(src)) + return BITFIELD_BIT(nir_src_as_uint(src)); + return BITFIELD_MASK(total) & ~BITFIELD_BIT(0); +} + +static bool +analyze_io(struct zink_shader *zs, nir_shader *shader) +{ + bool ret = false; nir_function_impl *impl = nir_shader_get_entrypoint(shader); nir_foreach_block(block, impl) { nir_foreach_instr(instr, block) { + if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) { + /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */ + nir_tex_instr *tex = nir_instr_as_tex(instr); + nir_foreach_variable_with_modes(img, shader, nir_var_uniform) { + if (glsl_type_is_sampler(glsl_without_array(img->type))) { + unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1; + if (tex->texture_index >= img->data.driver_location && + tex->texture_index < img->data.driver_location + size) { + BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1)); + break; + } + } + } + continue; + } if (instr->type != nir_instr_type_intrinsic) continue; - + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { case nir_intrinsic_store_ssbo: - ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[1])); + zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]); break; - + case nir_intrinsic_get_ssbo_size: { - uint32_t slot = nir_src_as_uint(intrin->src[0]); - ssbo_used |= BITFIELD_BIT(slot); - ssbo_sizes[slot] = true; + zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]); + ret = true; break; } - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: - case nir_intrinsic_ssbo_atomic_fmin: - case nir_intrinsic_ssbo_atomic_fmax: - case nir_intrinsic_ssbo_atomic_fcomp_swap: + case nir_intrinsic_ssbo_atomic: + case nir_intrinsic_ssbo_atomic_swap: case nir_intrinsic_load_ssbo: - ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0])); + zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]); break; case nir_intrinsic_load_ubo: case nir_intrinsic_load_ubo_vec4: - ubo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0])); + zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]); break; default: break; } } } + return ret; +} - nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) { - const struct glsl_type *type = glsl_without_array(var->type); - if (type_is_counter(type)) - continue; - unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false); - if (var->data.mode == nir_var_mem_ubo) - max_ubo_size = MAX2(max_ubo_size, size); - else - max_ssbo_size = MAX2(max_ssbo_size, size); - var->data.mode = nir_var_shader_temp; +struct zink_bindless_info { + nir_variable *bindless[4]; + unsigned bindless_set; +}; + +/* this is a "default" bindless texture used if the shader has no texture variables */ +static nir_variable * +create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set) +{ + unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0; + nir_variable *var; + + const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT); + var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture"); + var->data.descriptor_set = descriptor_set; + var->data.driver_location = var->data.binding = binding; + return var; +} + +/* this is a "default" bindless image used if the shader has no image variables */ +static nir_variable * +create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set) +{ + unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2; + nir_variable *var; + + const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); + var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image"); + var->data.descriptor_set = descriptor_set; + var->data.driver_location = var->data.binding = binding; + var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM; + return var; +} + +/* rewrite bindless instructions as array deref instructions */ +static bool +lower_bindless_instr(nir_builder *b, nir_instr *in, void *data) +{ + struct zink_bindless_info *bindless = data; + + if (in->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(in); + int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle); + if (idx == -1) + return false; + + nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0]; + if (!var) { + var = create_bindless_texture(b->shader, tex, bindless->bindless_set); + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) + bindless->bindless[1] = var; + else + bindless->bindless[0] = var; + } + b->cursor = nir_before_instr(in); + nir_deref_instr *deref = nir_build_deref_var(b, var); + if (glsl_type_is_array(var->type)) + deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32)); + nir_src_rewrite(&tex->src[idx].src, &deref->def); + + /* bindless sampling uses the variable type directly, which means the tex instr has to exactly + * match up with it in contrast to normal sampler ops where things are a bit more flexible; + * this results in cases where a shader is passed with sampler2DArray but the tex instr only has + * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors + * + * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing + * - Warhammer 40k: Dawn of War III + */ + unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type)); + unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord); + unsigned coord_components = nir_src_num_components(tex->src[c].src); + if (coord_components < needed_components) { + nir_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components); + nir_src_rewrite(&tex->src[c].src, def); + tex->coord_components = needed_components; + } + return true; } + if (in->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); + + nir_intrinsic_op op; +#define OP_SWAP(OP) \ + case nir_intrinsic_bindless_image_##OP: \ + op = nir_intrinsic_image_deref_##OP; \ + break; + + + /* convert bindless intrinsics to deref intrinsics */ + switch (instr->intrinsic) { + OP_SWAP(atomic) + OP_SWAP(atomic_swap) + OP_SWAP(format) + OP_SWAP(load) + OP_SWAP(order) + OP_SWAP(samples) + OP_SWAP(size) + OP_SWAP(store) + default: + return false; + } + + enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); + nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2]; + if (!var) + var = create_bindless_image(b->shader, dim, bindless->bindless_set); + instr->intrinsic = op; + b->cursor = nir_before_instr(in); + nir_deref_instr *deref = nir_build_deref_var(b, var); + if (glsl_type_is_array(var->type)) + deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32)); + nir_src_rewrite(&instr->src[0], &deref->def); + return true; +} + +static bool +lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless) +{ + if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless)) + return false; nir_fixup_deref_modes(shader); NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL); - optimize_nir(shader); + optimize_nir(shader, NULL, true); + return true; +} - if (!ssbo_used && !ubo_used) +/* convert shader image/texture io variables to int64 handles for bindless indexing */ +static bool +lower_bindless_io_instr(nir_builder *b, nir_intrinsic_instr *instr, + void *data) +{ + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(instr, &is_load, &is_input, &is_interp)) return false; - struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2); - fields[0].name = ralloc_strdup(shader, "base"); - fields[1].name = ralloc_strdup(shader, "unsized"); - if (ubo_used) { - const struct glsl_type *ubo_type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4); - fields[0].type = ubo_type; - u_foreach_bit(slot, ubo_used) { - char buf[64]; - snprintf(buf, sizeof(buf), "ubo_slot_%u", slot); - nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, glsl_struct_type(fields, 1, "struct", false), buf); - var->interface_type = var->type; - var->data.driver_location = slot; - } - } - if (ssbo_used) { - const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4); - const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4); - fields[0].type = ssbo_type; - u_foreach_bit(slot, ssbo_used) { - char buf[64]; - snprintf(buf, sizeof(buf), "ssbo_slot_%u", slot); - if (ssbo_sizes[slot]) - fields[1].type = unsized; - else - fields[1].type = NULL; - nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo, - glsl_struct_type(fields, 1 + !!ssbo_sizes[slot], "struct", false), buf); - var->interface_type = var->type; - var->data.driver_location = slot; - } - } + nir_variable *var = find_var_with_location_frac(b->shader, nir_intrinsic_io_semantics(instr).location, nir_intrinsic_component(instr), false, is_input ? nir_var_shader_in : nir_var_shader_out); + if (var->data.bindless) + return false; + if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out) + return false; + if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type)) + return false; + + var->type = glsl_vector_type(GLSL_TYPE_INT, 2); + var->data.bindless = 1; return true; } +static bool +lower_bindless_io(nir_shader *shader) +{ + return nir_shader_intrinsics_pass(shader, lower_bindless_io_instr, + nir_metadata_dominance, NULL); +} + static uint32_t -zink_binding(gl_shader_stage stage, VkDescriptorType type, int index) +zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors) { if (stage == MESA_SHADER_NONE) { unreachable("not supported"); } else { + unsigned base = stage; + /* clamp compute bindings for better driver efficiency */ + if (gl_shader_stage_is_compute(stage)) + base = 0; switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - assert(index < PIPE_MAX_CONSTANT_BUFFERS); - return (stage * PIPE_MAX_CONSTANT_BUFFERS) + index; + return base * 2 + !!index; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + assert(stage == MESA_SHADER_KERNEL); + FALLTHROUGH; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + if (stage == MESA_SHADER_KERNEL) { + assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS); + return index + PIPE_MAX_SAMPLERS; + } + FALLTHROUGH; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + assert(index < PIPE_MAX_SAMPLERS); + assert(stage != MESA_SHADER_KERNEL); + return (base * PIPE_MAX_SAMPLERS) + index; + + case VK_DESCRIPTOR_TYPE_SAMPLER: assert(index < PIPE_MAX_SAMPLERS); - return (stage * PIPE_MAX_SAMPLERS) + index; + assert(stage == MESA_SHADER_KERNEL); + return index; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - assert(index < PIPE_MAX_SHADER_BUFFERS); - return (stage * PIPE_MAX_SHADER_BUFFERS) + index; + return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2)); case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - assert(index < PIPE_MAX_SHADER_IMAGES); - return (stage * PIPE_MAX_SHADER_IMAGES) + index; + assert(index < ZINK_MAX_SHADER_IMAGES); + if (stage == MESA_SHADER_KERNEL) + return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0); + return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS)); default: unreachable("unexpected type"); @@ -1074,40 +4654,1542 @@ zink_binding(gl_shader_stage stage, VkDescriptorType type, int index) } } +static void +handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless) +{ + if (glsl_type_is_struct(type)) { + for (unsigned i = 0; i < glsl_get_length(type); i++) + handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless); + return; + } + + /* just a random scalar in a struct */ + if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type)) + return; + + VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type); + unsigned binding; + switch (vktype) { + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + binding = 0; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + binding = 1; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + binding = 2; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + binding = 3; + break; + default: + unreachable("unknown"); + } + if (!bindless->bindless[binding]) { + bindless->bindless[binding] = nir_variable_clone(var, nir); + bindless->bindless[binding]->data.bindless = 0; + bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set; + bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0); + bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding; + if (!bindless->bindless[binding]->data.image.format) + bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM; + nir_shader_add_variable(nir, bindless->bindless[binding]); + } else { + assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type))); + } + var->data.mode = nir_var_shader_temp; +} + +static bool +convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data) +{ + struct zink_screen *screen = data; + if (instr->type != nir_instr_type_tex) + return false; + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow) + return false; + if (tex->is_sparse && screen->need_2D_sparse) { + /* no known case of this exists: only nvidia can hit it, and nothing uses it */ + mesa_loge("unhandled/unsupported 1D sparse texture!"); + abort(); + } + tex->sampler_dim = GLSL_SAMPLER_DIM_2D; + b->cursor = nir_before_instr(instr); + tex->coord_components++; + unsigned srcs[] = { + nir_tex_src_coord, + nir_tex_src_offset, + nir_tex_src_ddx, + nir_tex_src_ddy, + }; + for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) { + unsigned c = nir_tex_instr_src_index(tex, srcs[i]); + if (c == -1) + continue; + if (tex->src[c].src.ssa->num_components == tex->coord_components) + continue; + nir_def *def; + nir_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size); + if (tex->src[c].src.ssa->num_components == 1) + def = nir_vec2(b, tex->src[c].src.ssa, zero); + else + def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1)); + nir_src_rewrite(&tex->src[c].src, def); + } + b->cursor = nir_after_instr(instr); + unsigned needed_components = nir_tex_instr_dest_size(tex); + unsigned num_components = tex->def.num_components; + if (needed_components > num_components) { + tex->def.num_components = needed_components; + assert(num_components < 3); + /* take either xz or just x since this is promoted to 2D from 1D */ + uint32_t mask = num_components == 2 ? (1|4) : 1; + nir_def *dst = nir_channels(b, &tex->def, mask); + nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr); + } + return true; +} + +static bool +lower_1d_shadow(nir_shader *shader, struct zink_screen *screen) +{ + bool found = false; + nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) { + const struct glsl_type *type = glsl_without_array(var->type); + unsigned length = glsl_get_length(var->type); + if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D) + continue; + const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type)); + var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler; + + found = true; + } + if (found) + nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen); + return found; +} + +static void +scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs) +{ + nir_foreach_function_impl(impl, shader) { + nir_foreach_block_safe(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + zs->sinfo.have_sparse |= tex->is_sparse; + } + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_image_deref_load || + intr->intrinsic == nir_intrinsic_image_deref_sparse_load || + intr->intrinsic == nir_intrinsic_image_deref_store || + intr->intrinsic == nir_intrinsic_image_deref_atomic || + intr->intrinsic == nir_intrinsic_image_deref_atomic_swap || + intr->intrinsic == nir_intrinsic_image_deref_size || + intr->intrinsic == nir_intrinsic_image_deref_samples || + intr->intrinsic == nir_intrinsic_image_deref_format || + intr->intrinsic == nir_intrinsic_image_deref_order) { + + nir_variable *var = nir_intrinsic_get_var(intr, 0); + + /* Structs have been lowered already, so get_aoa_size is sufficient. */ + const unsigned size = + glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1; + BITSET_SET_RANGE(shader->info.images_used, var->data.binding, + var->data.binding + (MAX2(size, 1) - 1)); + } + if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample) + zs->uses_sample = true; + if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident || + intr->intrinsic == nir_intrinsic_image_deref_sparse_load) + zs->sinfo.have_sparse = true; + + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (filter_io_instr(intr, &is_load, &is_input, &is_interp)) { + nir_io_semantics s = nir_intrinsic_io_semantics(intr); + if (io_instr_is_arrayed(intr) && s.location < VARYING_SLOT_PATCH0) { + if (is_input) + zs->arrayed_inputs |= BITFIELD64_BIT(s.location); + else + zs->arrayed_outputs |= BITFIELD64_BIT(s.location); + } + /* TODO: delete this once #10826 is fixed */ + if (!(is_input && shader->info.stage == MESA_SHADER_VERTEX)) { + if (is_clipcull_dist(s.location)) { + unsigned frac = nir_intrinsic_component(intr) + 1; + if (s.location < VARYING_SLOT_CULL_DIST0) { + if (s.location == VARYING_SLOT_CLIP_DIST1) + frac += 4; + shader->info.clip_distance_array_size = MAX3(shader->info.clip_distance_array_size, frac, s.num_slots); + } else { + if (s.location == VARYING_SLOT_CULL_DIST1) + frac += 4; + shader->info.cull_distance_array_size = MAX3(shader->info.cull_distance_array_size, frac, s.num_slots); + } + } + } + } + + static bool warned = false; + if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) { + switch (intr->intrinsic) { + case nir_intrinsic_image_deref_atomic: { + nir_variable *var = nir_intrinsic_get_var(intr, 0); + if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd && + util_format_is_float(var->data.image.format)) + fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n"); + break; + } + default: + break; + } + } + } + } + } +} + +static bool +match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data) +{ + if (in->type != nir_instr_type_tex) + return false; + nir_tex_instr *tex = nir_instr_as_tex(in); + if (tex->op == nir_texop_txs || tex->op == nir_texop_lod) + return false; + int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle); + nir_variable *var = NULL; + if (handle != -1) { + var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src)); + } else { + nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) { + if (glsl_type_is_sampler(glsl_without_array(img->type))) { + unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1; + if (tex->texture_index >= img->data.driver_location && + tex->texture_index < img->data.driver_location + size) { + var = img; + break; + } + } + } + } + return !!rewrite_tex_dest(b, tex, var, data); +} + +static bool +match_tex_dests(nir_shader *shader, struct zink_shader *zs) +{ + return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs); +} + +static bool +split_bitfields_instr(nir_builder *b, nir_instr *in, void *data) +{ + if (in->type != nir_instr_type_alu) + return false; + nir_alu_instr *alu = nir_instr_as_alu(in); + switch (alu->op) { + case nir_op_ubitfield_extract: + case nir_op_ibitfield_extract: + case nir_op_bitfield_insert: + break; + default: + return false; + } + unsigned num_components = alu->def.num_components; + if (num_components == 1) + return false; + b->cursor = nir_before_instr(in); + nir_def *dests[NIR_MAX_VEC_COMPONENTS]; + for (unsigned i = 0; i < num_components; i++) { + if (alu->op == nir_op_bitfield_insert) + dests[i] = nir_bitfield_insert(b, + nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]), + nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]), + nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]), + nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i])); + else if (alu->op == nir_op_ubitfield_extract) + dests[i] = nir_ubitfield_extract(b, + nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]), + nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]), + nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i])); + else + dests[i] = nir_ibitfield_extract(b, + nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]), + nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]), + nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i])); + } + nir_def *dest = nir_vec(b, dests, num_components); + nir_def_rewrite_uses_after(&alu->def, dest, in); + nir_instr_remove(in); + return true; +} + + +static bool +split_bitfields(nir_shader *shader) +{ + return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL); +} + +static bool +strip_tex_ms_instr(nir_builder *b, nir_instr *in, void *data) +{ + if (in->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(in); + switch (intr->intrinsic) { + case nir_intrinsic_image_deref_samples: + b->cursor = nir_before_instr(in); + nir_def_rewrite_uses_after(&intr->def, nir_imm_zero(b, 1, intr->def.bit_size), in); + nir_instr_remove(in); + break; + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_load: + break; + default: + return false; + } + enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr); + if (dim != GLSL_SAMPLER_DIM_MS) + return false; + + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + nir_deref_instr *parent = nir_deref_instr_parent(deref); + if (parent) { + parent->type = var->type; + deref->type = glsl_without_array(var->type); + } else { + deref->type = var->type; + } + nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D); + return true; +} + + +static bool +strip_tex_ms(nir_shader *shader) +{ + bool progress = false; + nir_foreach_image_variable(var, shader) { + const struct glsl_type *bare_type = glsl_without_array(var->type); + if (glsl_get_sampler_dim(bare_type) != GLSL_SAMPLER_DIM_MS) + continue; + unsigned array_size = 0; + if (glsl_type_is_array(var->type)) + array_size = glsl_array_size(var->type); + + const struct glsl_type *new_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, glsl_sampler_type_is_array(bare_type), glsl_get_sampler_result_type(bare_type)); + if (array_size) + new_type = glsl_array_type(new_type, array_size, glsl_get_explicit_stride(var->type)); + var->type = new_type; + progress = true; + } + if (!progress) + return false; + return nir_shader_instructions_pass(shader, strip_tex_ms_instr, nir_metadata_all, NULL); +} + +static void +rewrite_cl_derefs(nir_shader *nir, nir_variable *var) +{ + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_deref) + continue; + nir_deref_instr *deref = nir_instr_as_deref(instr); + nir_variable *img = nir_deref_instr_get_variable(deref); + if (img != var) + continue; + if (glsl_type_is_array(var->type)) { + if (deref->deref_type == nir_deref_type_array) + deref->type = glsl_without_array(var->type); + else + deref->type = var->type; + } else { + deref->type = var->type; + } + } + } + } +} + +static void +type_image(nir_shader *nir, nir_variable *var) +{ + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_image_deref_load || + intr->intrinsic == nir_intrinsic_image_deref_sparse_load || + intr->intrinsic == nir_intrinsic_image_deref_store || + intr->intrinsic == nir_intrinsic_image_deref_atomic || + intr->intrinsic == nir_intrinsic_image_deref_atomic_swap || + intr->intrinsic == nir_intrinsic_image_deref_samples || + intr->intrinsic == nir_intrinsic_image_deref_format || + intr->intrinsic == nir_intrinsic_image_deref_order) { + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *img = nir_deref_instr_get_variable(deref); + if (img != var) + continue; + nir_alu_type alu_type = nir_intrinsic_src_type(intr); + const struct glsl_type *type = glsl_without_array(var->type); + if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) { + assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type)); + continue; + } + const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type)); + if (glsl_type_is_array(var->type)) + img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type)); + var->type = img_type; + rewrite_cl_derefs(nir, var); + return; + } + } + } + } + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_image_deref_size) + continue; + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *img = nir_deref_instr_get_variable(deref); + if (img != var) + continue; + nir_alu_type alu_type = nir_type_uint32; + const struct glsl_type *type = glsl_without_array(var->type); + if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) { + continue; + } + const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type)); + if (glsl_type_is_array(var->type)) + img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type)); + var->type = img_type; + rewrite_cl_derefs(nir, var); + return; + } + } + } + var->data.mode = nir_var_shader_temp; +} + +static bool +type_sampler_vars(nir_shader *nir, unsigned *sampler_mask) +{ + bool progress = false; + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_tex) + continue; + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (nir_tex_instr_need_sampler(tex)) + *sampler_mask |= BITFIELD_BIT(tex->sampler_index); + nir_variable *var = nir_find_sampler_variable_with_tex_index(nir, tex->texture_index); + assert(var); + if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID && + nir_tex_instr_is_query(tex)) + continue; + const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type)); + unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1; + if (size > 1) + img_type = glsl_array_type(img_type, size, 0); + var->type = img_type; + progress = true; + } + } + } + return progress; +} + +static bool +delete_samplers(nir_shader *nir) +{ + bool progress = false; + nir_foreach_variable_with_modes(var, nir, nir_var_uniform) { + if (glsl_type_is_sampler(glsl_without_array(var->type))) { + var->data.mode = nir_var_shader_temp; + progress = true; + } + } + return progress; +} + +static bool +type_images(nir_shader *nir, unsigned *sampler_mask) +{ + bool progress = false; + progress |= delete_samplers(nir); + progress |= type_sampler_vars(nir, sampler_mask); + nir_foreach_variable_with_modes(var, nir, nir_var_image) { + type_image(nir, var); + progress = true; + } + return progress; +} + +/* attempt to assign io for separate shaders */ +static bool +fixup_io_locations(nir_shader *nir) +{ + nir_variable_mode modes; + if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX) + modes = nir_var_shader_in | nir_var_shader_out; + else + modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out; + u_foreach_bit(mode, modes) { + nir_variable_mode m = BITFIELD_BIT(mode); + if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) || + (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) { + /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules: + * - i/o interface blocks don't need to match + * - any location can be present or not + * - it just has to work + * + * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS + * since it's a builtin and yolo it with all the other legacy crap + */ + nir_foreach_variable_with_modes(var, nir, m) { + if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE)) + continue; + if (var->data.location == VARYING_SLOT_VAR0) + var->data.driver_location = 0; + else if (var->data.patch) + var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0; + else + var->data.driver_location = var->data.location; + } + continue; + } + /* i/o interface blocks are required to be EXACT matches between stages: + * iterate over all locations and set locations incrementally + */ + unsigned slot = 0; + for (unsigned i = 0; i < VARYING_SLOT_TESS_MAX; i++) { + if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE)) + continue; + bool found = false; + unsigned size = 0; + nir_foreach_variable_with_modes(var, nir, m) { + if (var->data.location != i) + continue; + /* only add slots for non-component vars or first-time component vars */ + if (!var->data.location_frac || !size) { + /* ensure variable is given enough slots */ + if (nir_is_arrayed_io(var, nir->info.stage)) + size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false); + else + size += glsl_count_vec4_slots(var->type, false, false); + } + if (var->data.patch) + var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0; + else + var->data.driver_location = slot; + found = true; + } + slot += size; + if (found) { + /* ensure the consumed slots aren't double iterated */ + i += size - 1; + } else { + /* locations used between stages are not required to be contiguous */ + if (i >= VARYING_SLOT_VAR0) + slot++; + } + } + } + return true; +} + +static uint64_t +zink_flat_flags(struct nir_shader *shader) +{ + uint64_t flat_flags = 0; + nir_foreach_shader_in_variable(var, shader) { + if (var->data.interpolation == INTERP_MODE_FLAT) + flat_flags |= BITFIELD64_BIT(var->data.location); + } + + return flat_flags; +} + +struct rework_io_state { + /* these are search criteria */ + bool indirect_only; + unsigned location; + nir_variable_mode mode; + gl_shader_stage stage; + nir_shader *nir; + const char *name; + + /* these are found by scanning */ + bool arrayed_io; + bool medium_precision; + bool fb_fetch_output; + bool dual_source_blend_index; + uint32_t component_mask; + uint32_t ignored_component_mask; + unsigned array_size; + unsigned bit_size; + unsigned base; + nir_alu_type type; + /* must be last */ + char *newname; +}; + +/* match an existing variable against the rework state */ +static nir_variable * +find_rework_var(nir_shader *nir, struct rework_io_state *ris) +{ + nir_foreach_variable_with_modes(var, nir, ris->mode) { + const struct glsl_type *type = var->type; + if (nir_is_arrayed_io(var, nir->info.stage)) + type = glsl_get_array_element(type); + if (var->data.fb_fetch_output != ris->fb_fetch_output) + continue; + if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out && ris->dual_source_blend_index != var->data.index) + continue; + unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false); + if (var->data.location > ris->location + ris->array_size || var->data.location + num_slots <= ris->location) + continue; + unsigned num_components = glsl_get_vector_elements(glsl_without_array(type)); + assert(!glsl_type_contains_64bit(type)); + uint32_t component_mask = ris->component_mask ? ris->component_mask : BITFIELD_MASK(4); + if (BITFIELD_RANGE(var->data.location_frac, num_components) & component_mask) + return var; + } + return NULL; +} + +static void +update_io_var_name(struct rework_io_state *ris, const char *name) +{ + if (!(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))) + return; + if (!name) + return; + if (ris->name && !strcmp(ris->name, name)) + return; + if (ris->newname && !strcmp(ris->newname, name)) + return; + if (ris->newname) { + ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->newname, name); + } else if (ris->name) { + ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->name, name); + } else { + ris->newname = ralloc_strdup(ris->nir, name); + } +} + +/* check/update tracking state for variable info */ +static void +update_io_var_state(nir_intrinsic_instr *intr, struct rework_io_state *ris) +{ + bool is_load = false; + bool is_input = false; + bool is_interp = false; + filter_io_instr(intr, &is_load, &is_input, &is_interp); + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + unsigned frac = nir_intrinsic_component(intr); + /* the mask of components for the instruction */ + uint32_t cmask = is_load ? BITFIELD_RANGE(frac, intr->num_components) : (nir_intrinsic_write_mask(intr) << frac); + + /* always check for existing variables first */ + struct rework_io_state test = { + .location = ris->location, + .mode = ris->mode, + .stage = ris->stage, + .arrayed_io = io_instr_is_arrayed(intr), + .medium_precision = sem.medium_precision, + .fb_fetch_output = sem.fb_fetch_output, + .dual_source_blend_index = sem.dual_source_blend_index, + .component_mask = cmask, + .array_size = sem.num_slots > 1 ? sem.num_slots : 0, + }; + if (find_rework_var(ris->nir, &test)) + return; + + /* filter ignored components to scan later: + * - ignore no-overlapping-components case + * - always match fbfetch and dual src blend + */ + if (ris->component_mask && + (!(ris->component_mask & cmask) || ris->fb_fetch_output != sem.fb_fetch_output || ris->dual_source_blend_index != sem.dual_source_blend_index)) { + ris->ignored_component_mask |= cmask; + return; + } + + assert(!ris->indirect_only || sem.num_slots > 1); + if (sem.num_slots > 1) + ris->array_size = MAX2(ris->array_size, sem.num_slots); + + assert(!ris->component_mask || ris->arrayed_io == io_instr_is_arrayed(intr)); + ris->arrayed_io = io_instr_is_arrayed(intr); + + ris->component_mask |= cmask; + + unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]); + assert(!ris->bit_size || ris->bit_size == bit_size); + ris->bit_size = bit_size; + + nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr); + if (ris->type) { + /* in the case of clashing types, this heuristic guarantees some semblance of a match */ + if (ris->type & nir_type_float || type & nir_type_float) { + ris->type = nir_type_float | bit_size; + } else if (ris->type & nir_type_int || type & nir_type_int) { + ris->type = nir_type_int | bit_size; + } else if (ris->type & nir_type_uint || type & nir_type_uint) { + ris->type = nir_type_uint | bit_size; + } else { + assert(bit_size == 1); + ris->type = nir_type_bool; + } + } else { + ris->type = type; + } + + update_io_var_name(ris, intr->name); + + ris->medium_precision |= sem.medium_precision; + ris->fb_fetch_output |= sem.fb_fetch_output; + ris->dual_source_blend_index |= sem.dual_source_blend_index; + if (ris->stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) + ris->base = nir_intrinsic_base(intr); +} + +/* instruction-level scanning for variable data */ +static bool +scan_io_var_usage(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + struct rework_io_state *ris = data; + bool is_load = false; + bool is_input = false; + bool is_interp = false; + /* mode-based filtering */ + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + return false; + if (ris->mode == nir_var_shader_in) { + if (!is_input) + return false; + } else { + if (is_input) + return false; + } + /* location-based filtering */ + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + if (sem.location != ris->location && (ris->location > sem.location || ris->location + ris->array_size <= sem.location)) + return false; + + /* only scan indirect i/o when indirect_only is set */ + nir_src *src_offset = nir_get_io_offset_src(intr); + if (!nir_src_is_const(*src_offset)) { + if (!ris->indirect_only) + return false; + update_io_var_state(intr, ris); + return false; + } + + /* don't scan direct i/o when indirect_only is set */ + if (ris->indirect_only) + return false; + + update_io_var_state(intr, ris); + return false; +} + +/* scan a given i/o slot for state info */ +static struct rework_io_state +scan_io_var_slot(nir_shader *nir, nir_variable_mode mode, unsigned location, bool scan_indirects) +{ + struct rework_io_state ris = { + .location = location, + .mode = mode, + .stage = nir->info.stage, + .nir = nir, + }; + + struct rework_io_state test; + do { + update_io_var_name(&test, ris.newname ? ris.newname : ris.name); + test = ris; + /* always run indirect scan first to detect potential overlaps */ + if (scan_indirects) { + ris.indirect_only = true; + nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris); + } + ris.indirect_only = false; + nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris); + /* keep scanning until no changes found */ + } while (memcmp(&ris, &test, offsetof(struct rework_io_state, newname))); + return ris; +} + +/* create a variable using explicit/scan info */ +static void +create_io_var(nir_shader *nir, struct rework_io_state *ris) +{ + char name[1024]; + assert(ris->component_mask); + if (ris->newname || ris->name) { + snprintf(name, sizeof(name), "%s", ris->newname ? ris->newname : ris->name); + /* always use builtin name where possible */ + } else if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) { + snprintf(name, sizeof(name), "%s", gl_vert_attrib_name(ris->location)); + } else if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out) { + snprintf(name, sizeof(name), "%s", gl_frag_result_name(ris->location)); + } else if (nir_slot_is_sysval_output(ris->location, nir->info.stage)) { + snprintf(name, sizeof(name), "%s", gl_varying_slot_name_for_stage(ris->location, nir->info.stage)); + } else { + int c = ffs(ris->component_mask) - 1; + if (c) + snprintf(name, sizeof(name), "slot_%u_c%u", ris->location, c); + else + snprintf(name, sizeof(name), "slot_%u", ris->location); + } + /* calculate vec/array type */ + int frac = ffs(ris->component_mask) - 1; + int num_components = util_last_bit(ris->component_mask) - frac; + assert(ris->component_mask == BITFIELD_RANGE(frac, num_components)); + const struct glsl_type *vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(ris->type), num_components); + if (ris->array_size) + vec_type = glsl_array_type(vec_type, ris->array_size, glsl_get_explicit_stride(vec_type)); + if (ris->arrayed_io) { + /* tess size may be unknown with generated tcs */ + unsigned arrayed = nir->info.stage == MESA_SHADER_GEOMETRY ? + nir->info.gs.vertices_in : 32 /* MAX_PATCH_VERTICES */; + vec_type = glsl_array_type(vec_type, arrayed, glsl_get_explicit_stride(vec_type)); + } + nir_variable *var = nir_variable_create(nir, ris->mode, vec_type, name); + var->data.location_frac = frac; + var->data.location = ris->location; + /* gallium vertex inputs use intrinsic 'base' indexing */ + if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) + var->data.driver_location = ris->base; + var->data.patch = ris->location >= VARYING_SLOT_PATCH0 || + ((nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) && + (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER)); + /* set flat by default: add_derefs will fill this in later after more shader passes */ + if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_in) + var->data.interpolation = INTERP_MODE_FLAT; + var->data.fb_fetch_output = ris->fb_fetch_output; + var->data.index = ris->dual_source_blend_index; + var->data.precision = ris->medium_precision; + /* only clip/cull dist and tess levels are compact */ + if (nir->info.stage != MESA_SHADER_VERTEX || ris->mode != nir_var_shader_in) + var->data.compact = is_clipcull_dist(ris->location) || (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER); +} + +/* loop the i/o mask and generate variables for specified locations */ +static void +loop_io_var_mask(nir_shader *nir, nir_variable_mode mode, bool indirect, bool patch, uint64_t mask) +{ + bool is_vertex_input = nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in; + u_foreach_bit64(slot, mask) { + if (patch) + slot += VARYING_SLOT_PATCH0; + + /* this should've been handled explicitly */ + assert(is_vertex_input || !is_clipcull_dist(slot)); + + unsigned remaining = 0; + do { + /* scan the slot for usage */ + struct rework_io_state ris = scan_io_var_slot(nir, mode, slot, indirect); + /* one of these must be true or things have gone very wrong */ + assert(indirect || ris.component_mask || find_rework_var(nir, &ris) || remaining); + /* release builds only */ + if (!ris.component_mask) + break; + + /* whatever reaches this point is either enough info to create a variable or an existing variable */ + if (!find_rework_var(nir, &ris)) + create_io_var(nir, &ris); + /* scanning may detect multiple potential variables per location at component offsets: process again */ + remaining = ris.ignored_component_mask; + } while (remaining); + } +} + +/* for a given mode, generate variables */ +static void +rework_io_vars(nir_shader *nir, nir_variable_mode mode, struct zink_shader *zs) +{ + assert(mode == nir_var_shader_out || mode == nir_var_shader_in); + assert(util_bitcount(mode) == 1); + bool found = false; + /* if no i/o, skip */ + if (mode == nir_var_shader_out) + found = nir->info.outputs_written || nir->info.outputs_read || nir->info.patch_outputs_written || nir->info.patch_outputs_read; + else + found = nir->info.inputs_read || nir->info.patch_inputs_read; + if (!found) + return; + + /* use local copies to enable incremental processing */ + uint64_t inputs_read = nir->info.inputs_read; + uint64_t inputs_read_indirectly = nir->info.inputs_read_indirectly; + uint64_t outputs_accessed = nir->info.outputs_written | nir->info.outputs_read; + uint64_t outputs_accessed_indirectly = nir->info.outputs_accessed_indirectly; + + /* fragment outputs are special: handle separately */ + if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) { + assert(!outputs_accessed_indirectly); + u_foreach_bit64(slot, outputs_accessed) { + struct rework_io_state ris = { + .location = slot, + .mode = mode, + .stage = nir->info.stage, + }; + /* explicitly handle builtins */ + switch (slot) { + case FRAG_RESULT_DEPTH: + case FRAG_RESULT_STENCIL: + case FRAG_RESULT_SAMPLE_MASK: + ris.bit_size = 32; + ris.component_mask = 0x1; + ris.type = slot == FRAG_RESULT_DEPTH ? nir_type_float32 : nir_type_uint32; + create_io_var(nir, &ris); + outputs_accessed &= ~BITFIELD64_BIT(slot); + break; + default: + break; + } + } + /* the rest of the outputs can be generated normally */ + loop_io_var_mask(nir, mode, false, false, outputs_accessed); + return; + } + + /* vertex inputs are special: handle separately */ + if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) { + assert(!inputs_read_indirectly); + u_foreach_bit64(slot, inputs_read) { + /* explicitly handle builtins */ + if (slot != VERT_ATTRIB_POS && slot != VERT_ATTRIB_POINT_SIZE) + continue; + + uint32_t component_mask = slot == VERT_ATTRIB_POINT_SIZE ? 0x1 : 0xf; + struct rework_io_state ris = { + .location = slot, + .mode = mode, + .stage = nir->info.stage, + .bit_size = 32, + .component_mask = component_mask, + .type = nir_type_float32, + .newname = scan_io_var_slot(nir, nir_var_shader_in, slot, false).newname, + }; + create_io_var(nir, &ris); + inputs_read &= ~BITFIELD64_BIT(slot); + } + /* the rest of the inputs can be generated normally */ + loop_io_var_mask(nir, mode, false, false, inputs_read); + return; + } + + /* these are the masks to process based on the mode: nothing "special" as above */ + uint64_t mask = mode == nir_var_shader_in ? inputs_read : outputs_accessed; + uint64_t indirect_mask = mode == nir_var_shader_in ? inputs_read_indirectly : outputs_accessed_indirectly; + u_foreach_bit64(slot, mask) { + struct rework_io_state ris = { + .location = slot, + .mode = mode, + .stage = nir->info.stage, + .arrayed_io = (mode == nir_var_shader_in ? zs->arrayed_inputs : zs->arrayed_outputs) & BITFIELD64_BIT(slot), + }; + /* explicitly handle builtins */ + unsigned max_components = 0; + switch (slot) { + case VARYING_SLOT_FOGC: + /* use intr components */ + break; + case VARYING_SLOT_POS: + case VARYING_SLOT_CLIP_VERTEX: + case VARYING_SLOT_PNTC: + case VARYING_SLOT_BOUNDING_BOX0: + case VARYING_SLOT_BOUNDING_BOX1: + max_components = 4; + ris.type = nir_type_float32; + break; + case VARYING_SLOT_CLIP_DIST0: + max_components = nir->info.clip_distance_array_size; + assert(max_components); + ris.type = nir_type_float32; + break; + case VARYING_SLOT_CULL_DIST0: + max_components = nir->info.cull_distance_array_size; + assert(max_components); + ris.type = nir_type_float32; + break; + case VARYING_SLOT_CLIP_DIST1: + case VARYING_SLOT_CULL_DIST1: + mask &= ~BITFIELD64_BIT(slot); + indirect_mask &= ~BITFIELD64_BIT(slot); + continue; + case VARYING_SLOT_TESS_LEVEL_OUTER: + max_components = 4; + ris.type = nir_type_float32; + break; + case VARYING_SLOT_TESS_LEVEL_INNER: + max_components = 2; + ris.type = nir_type_float32; + break; + case VARYING_SLOT_PRIMITIVE_ID: + case VARYING_SLOT_LAYER: + case VARYING_SLOT_VIEWPORT: + case VARYING_SLOT_FACE: + case VARYING_SLOT_VIEW_INDEX: + case VARYING_SLOT_VIEWPORT_MASK: + ris.type = nir_type_int32; + max_components = 1; + break; + case VARYING_SLOT_PSIZ: + max_components = 1; + ris.type = nir_type_float32; + break; + default: + break; + } + if (!max_components) + continue; + switch (slot) { + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + case VARYING_SLOT_CULL_DIST0: + case VARYING_SLOT_CULL_DIST1: + case VARYING_SLOT_TESS_LEVEL_OUTER: + case VARYING_SLOT_TESS_LEVEL_INNER: + /* compact arrays */ + ris.component_mask = 0x1; + ris.array_size = max_components; + break; + default: + ris.component_mask = BITFIELD_MASK(max_components); + break; + } + ris.bit_size = 32; + create_io_var(nir, &ris); + mask &= ~BITFIELD64_BIT(slot); + /* eliminate clip/cull distance scanning early */ + indirect_mask &= ~BITFIELD64_BIT(slot); + } + + /* patch i/o */ + if ((nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out) || + (nir->info.stage == MESA_SHADER_TESS_EVAL && mode == nir_var_shader_in)) { + uint64_t patch_outputs_accessed = nir->info.patch_outputs_read | nir->info.patch_outputs_written; + uint64_t indirect_patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read_indirectly : nir->info.patch_outputs_accessed_indirectly; + uint64_t patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read : patch_outputs_accessed; + + loop_io_var_mask(nir, mode, true, true, indirect_patch_mask); + loop_io_var_mask(nir, mode, false, true, patch_mask); + } + + /* regular i/o */ + loop_io_var_mask(nir, mode, true, false, indirect_mask); + loop_io_var_mask(nir, mode, false, false, mask); +} + +static int +zink_type_size(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +static nir_mem_access_size_align +mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes, + uint8_t bit_size, uint32_t align, + uint32_t align_offset, bool offset_is_const, + const void *cb_data) +{ + align = nir_combined_align(align, align_offset); + + assert(util_is_power_of_two_nonzero(align)); + + /* simply drop the bit_size for unaligned load/stores */ + if (align < (bit_size / 8)) { + return (nir_mem_access_size_align){ + .num_components = MIN2(bytes / align, 4), + .bit_size = align * 8, + .align = align, + }; + } else { + return (nir_mem_access_size_align){ + .num_components = MIN2(bytes / (bit_size / 8), 4), + .bit_size = bit_size, + .align = bit_size / 8, + }; + } +} + +static nir_mem_access_size_align +mem_access_scratch_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes, + uint8_t bit_size, uint32_t align, + uint32_t align_offset, bool offset_is_const, + const void *cb_data) +{ + bit_size = *(const uint8_t *)cb_data; + align = nir_combined_align(align, align_offset); + + assert(util_is_power_of_two_nonzero(align)); + + return (nir_mem_access_size_align){ + .num_components = MIN2(bytes / (bit_size / 8), 4), + .bit_size = bit_size, + .align = bit_size / 8, + }; +} + +static bool +alias_scratch_memory_scan_bit_size(struct nir_builder *b, nir_intrinsic_instr *instr, void *data) +{ + uint8_t *bit_size = data; + switch (instr->intrinsic) { + case nir_intrinsic_load_scratch: + *bit_size = MIN2(*bit_size, instr->def.bit_size); + return false; + case nir_intrinsic_store_scratch: + *bit_size = MIN2(*bit_size, instr->src[0].ssa->bit_size); + return false; + default: + return false; + } +} + +static bool +alias_scratch_memory(nir_shader *nir) +{ + uint8_t bit_size = 64; + + nir_shader_intrinsics_pass(nir, alias_scratch_memory_scan_bit_size, nir_metadata_all, &bit_size); + nir_lower_mem_access_bit_sizes_options lower_scratch_mem_access_options = { + .modes = nir_var_function_temp, + .may_lower_unaligned_stores_to_atomics = true, + .callback = mem_access_scratch_size_align_cb, + .cb_data = &bit_size, + }; + return nir_lower_mem_access_bit_sizes(nir, &lower_scratch_mem_access_options); +} + +static uint8_t +lower_vec816_alu(const nir_instr *instr, const void *cb_data) +{ + return 4; +} + +static bool +fix_vertex_input_locations_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp) || !is_input) + return false; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + if (sem.location < VERT_ATTRIB_GENERIC0) + return false; + sem.location = VERT_ATTRIB_GENERIC0 + nir_intrinsic_base(intr); + nir_intrinsic_set_io_semantics(intr, sem); + return true; +} + +static bool +fix_vertex_input_locations(nir_shader *nir) +{ + if (nir->info.stage != MESA_SHADER_VERTEX) + return false; + + return nir_shader_intrinsics_pass(nir, fix_vertex_input_locations_instr, nir_metadata_all, NULL); +} + +struct trivial_revectorize_state { + bool has_xfb; + uint32_t component_mask; + nir_intrinsic_instr *base; + nir_intrinsic_instr *next_emit_vertex; + nir_intrinsic_instr *merge[NIR_MAX_VEC_COMPONENTS]; + struct set *deletions; +}; + +/* always skip xfb; scalarized xfb is preferred */ +static bool +intr_has_xfb(nir_intrinsic_instr *intr) +{ + if (!nir_intrinsic_has_io_xfb(intr)) + return false; + for (unsigned i = 0; i < 2; i++) { + if (nir_intrinsic_io_xfb(intr).out[i].num_components || nir_intrinsic_io_xfb2(intr).out[i].num_components) { + return true; + } + } + return false; +} + +/* helper to avoid vectorizing i/o for different vertices */ +static nir_intrinsic_instr * +find_next_emit_vertex(nir_intrinsic_instr *intr) +{ + bool found = false; + nir_foreach_instr_safe(instr, intr->instr.block) { + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr); + if (!found && test_intr != intr) + continue; + if (!found) { + assert(intr == test_intr); + found = true; + continue; + } + if (test_intr->intrinsic == nir_intrinsic_emit_vertex) + return test_intr; + } + } + return NULL; +} + +/* scan for vectorizable instrs on a given location */ +static bool +trivial_revectorize_intr_scan(nir_shader *nir, nir_intrinsic_instr *intr, struct trivial_revectorize_state *state) +{ + nir_intrinsic_instr *base = state->base; + + if (intr == base) + return false; + + if (intr->intrinsic != base->intrinsic) + return false; + + if (_mesa_set_search(state->deletions, intr)) + return false; + + bool is_load = false; + bool is_input = false; + bool is_interp = false; + filter_io_instr(intr, &is_load, &is_input, &is_interp); + + nir_io_semantics base_sem = nir_intrinsic_io_semantics(base); + nir_io_semantics test_sem = nir_intrinsic_io_semantics(intr); + nir_alu_type base_type = is_load ? nir_intrinsic_dest_type(base) : nir_intrinsic_src_type(base); + nir_alu_type test_type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr); + int c = nir_intrinsic_component(intr); + /* already detected */ + if (state->component_mask & BITFIELD_BIT(c)) + return false; + /* not a match */ + if (base_sem.location != test_sem.location || base_sem.num_slots != test_sem.num_slots || base_type != test_type) + return false; + /* only vectorize when all srcs match */ + for (unsigned i = !is_input; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++) { + if (!nir_srcs_equal(intr->src[i], base->src[i])) + return false; + } + /* never match xfb */ + state->has_xfb |= intr_has_xfb(intr); + if (state->has_xfb) + return false; + if (nir->info.stage == MESA_SHADER_GEOMETRY) { + /* only match same vertex */ + if (state->next_emit_vertex != find_next_emit_vertex(intr)) + return false; + } + uint32_t mask = is_load ? BITFIELD_RANGE(c, intr->num_components) : (nir_intrinsic_write_mask(intr) << c); + state->component_mask |= mask; + u_foreach_bit(component, mask) + state->merge[component] = intr; + + return true; +} + +static bool +trivial_revectorize_scan(struct nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + return false; + if (intr->num_components != 1) + return false; + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + if (!is_input || b->shader->info.stage != MESA_SHADER_VERTEX) { + /* always ignore compact arrays */ + switch (sem.location) { + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + case VARYING_SLOT_CULL_DIST0: + case VARYING_SLOT_CULL_DIST1: + case VARYING_SLOT_TESS_LEVEL_INNER: + case VARYING_SLOT_TESS_LEVEL_OUTER: + return false; + default: break; + } + } + /* always ignore to-be-deleted instrs */ + if (_mesa_set_search(data, intr)) + return false; + + /* never vectorize xfb */ + if (intr_has_xfb(intr)) + return false; + + int ic = nir_intrinsic_component(intr); + uint32_t mask = is_load ? BITFIELD_RANGE(ic, intr->num_components) : (nir_intrinsic_write_mask(intr) << ic); + /* already vectorized */ + if (util_bitcount(mask) == 4) + return false; + struct trivial_revectorize_state state = { + .component_mask = mask, + .base = intr, + /* avoid clobbering i/o for different vertices */ + .next_emit_vertex = b->shader->info.stage == MESA_SHADER_GEOMETRY ? find_next_emit_vertex(intr) : NULL, + .deletions = data, + }; + u_foreach_bit(bit, mask) + state.merge[bit] = intr; + bool progress = false; + nir_foreach_instr(instr, intr->instr.block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr); + /* no matching across vertex emission */ + if (test_intr->intrinsic == nir_intrinsic_emit_vertex) + break; + progress |= trivial_revectorize_intr_scan(b->shader, test_intr, &state); + } + if (!progress || state.has_xfb) + return false; + + /* verify nothing crazy happened */ + assert(state.component_mask); + for (unsigned i = 0; i < 4; i++) { + assert(!state.merge[i] || !intr_has_xfb(state.merge[i])); + } + + unsigned first_component = ffs(state.component_mask) - 1; + unsigned num_components = util_bitcount(state.component_mask); + unsigned num_contiguous = 0; + uint32_t contiguous_mask = 0; + for (unsigned i = 0; i < num_components; i++) { + unsigned c = i + first_component; + /* calc mask of contiguous components to vectorize */ + if (state.component_mask & BITFIELD_BIT(c)) { + num_contiguous++; + contiguous_mask |= BITFIELD_BIT(c); + } + /* on the first gap or the the last component, vectorize */ + if (!(state.component_mask & BITFIELD_BIT(c)) || i == num_components - 1) { + if (num_contiguous > 1) { + /* reindex to enable easy src/dest index comparison */ + nir_index_ssa_defs(nir_shader_get_entrypoint(b->shader)); + /* determine the first/last instr to use for the base (vectorized) load/store */ + unsigned first_c = ffs(contiguous_mask) - 1; + nir_intrinsic_instr *base = NULL; + unsigned test_idx = is_load ? UINT32_MAX : 0; + for (unsigned j = 0; j < num_contiguous; j++) { + unsigned merge_c = j + first_c; + nir_intrinsic_instr *merge_intr = state.merge[merge_c]; + /* avoid breaking ssa ordering by using: + * - first instr for vectorized load + * - last instr for vectorized store + * this guarantees all srcs have been seen + */ + if ((is_load && merge_intr->def.index < test_idx) || + (!is_load && merge_intr->src[0].ssa->index >= test_idx)) { + test_idx = is_load ? merge_intr->def.index : merge_intr->src[0].ssa->index; + base = merge_intr; + } + } + assert(base); + /* update instr components */ + nir_intrinsic_set_component(base, nir_intrinsic_component(state.merge[first_c])); + unsigned orig_components = base->num_components; + base->num_components = num_contiguous; + /* do rewrites after loads and before stores */ + b->cursor = is_load ? nir_after_instr(&base->instr) : nir_before_instr(&base->instr); + if (is_load) { + base->def.num_components = num_contiguous; + /* iterate the contiguous loaded components and rewrite merged dests */ + for (unsigned j = 0; j < num_contiguous; j++) { + unsigned merge_c = j + first_c; + nir_intrinsic_instr *merge_intr = state.merge[merge_c]; + /* detect if the merged instr loaded multiple components and use swizzle mask for rewrite */ + unsigned use_components = merge_intr == base ? orig_components : merge_intr->def.num_components; + nir_def *swiz = nir_channels(b, &base->def, BITFIELD_RANGE(j, use_components)); + nir_def_rewrite_uses_after(&merge_intr->def, swiz, merge_intr == base ? swiz->parent_instr : &merge_intr->instr); + j += use_components - 1; + } + } else { + nir_def *comp[NIR_MAX_VEC_COMPONENTS]; + /* generate swizzled vec of store components and rewrite store src */ + for (unsigned j = 0; j < num_contiguous; j++) { + unsigned merge_c = j + first_c; + nir_intrinsic_instr *merge_intr = state.merge[merge_c]; + /* detect if the merged instr stored multiple components and extract them for rewrite */ + unsigned use_components = merge_intr == base ? orig_components : merge_intr->num_components; + for (unsigned k = 0; k < use_components; k++) + comp[j + k] = nir_channel(b, merge_intr->src[0].ssa, k); + j += use_components - 1; + } + nir_def *val = nir_vec(b, comp, num_contiguous); + nir_src_rewrite(&base->src[0], val); + nir_intrinsic_set_write_mask(base, BITFIELD_MASK(num_contiguous)); + } + /* deleting instructions during a foreach explodes the compiler, so delete later */ + for (unsigned j = 0; j < num_contiguous; j++) { + unsigned merge_c = j + first_c; + nir_intrinsic_instr *merge_intr = state.merge[merge_c]; + if (merge_intr != base) + _mesa_set_add(data, &merge_intr->instr); + } + } + contiguous_mask = 0; + num_contiguous = 0; + } + } + + return true; +} + +/* attempt to revectorize scalar i/o, ignoring xfb and "hard stuff" */ +static bool +trivial_revectorize(nir_shader *nir) +{ + struct set deletions; + + if (nir->info.stage > MESA_SHADER_FRAGMENT) + return false; + + _mesa_set_init(&deletions, NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + bool progress = nir_shader_intrinsics_pass(nir, trivial_revectorize_scan, nir_metadata_dominance, &deletions); + /* now it's safe to delete */ + set_foreach_remove(&deletions, entry) { + nir_instr *instr = (void*)entry->key; + nir_instr_remove(instr); + } + ralloc_free(deletions.table); + return progress; +} + struct zink_shader * -zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, - const struct pipe_stream_output_info *so_info) +zink_shader_create(struct zink_screen *screen, struct nir_shader *nir) { - struct zink_shader *ret = CALLOC_STRUCT(zink_shader); + struct zink_shader *ret = rzalloc(NULL, struct zink_shader); bool have_psiz = false; + ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX && + nir->info.outputs_written & VARYING_BIT_EDGE; + + ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model; + ret->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout; + if (screen->info.have_KHR_shader_float_controls) { + if (screen->info.props12.shaderDenormFlushToZeroFloat16) + ret->sinfo.float_controls.flush_denorms |= 0x1; + if (screen->info.props12.shaderDenormFlushToZeroFloat32) + ret->sinfo.float_controls.flush_denorms |= 0x2; + if (screen->info.props12.shaderDenormFlushToZeroFloat64) + ret->sinfo.float_controls.flush_denorms |= 0x4; + + if (screen->info.props12.shaderDenormPreserveFloat16) + ret->sinfo.float_controls.preserve_denorms |= 0x1; + if (screen->info.props12.shaderDenormPreserveFloat32) + ret->sinfo.float_controls.preserve_denorms |= 0x2; + if (screen->info.props12.shaderDenormPreserveFloat64) + ret->sinfo.float_controls.preserve_denorms |= 0x4; + + ret->sinfo.float_controls.denorms_all_independence = + screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL; + + ret->sinfo.float_controls.denorms_32_bit_independence = + ret->sinfo.float_controls.denorms_all_independence || + screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY; + } + ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]; + + util_queue_fence_init(&ret->precompile.fence); + util_dynarray_init(&ret->pipeline_libs, ret); ret->hash = _mesa_hash_pointer(ret); ret->programs = _mesa_pointer_set_create(NULL); simple_mtx_init(&ret->lock, mtx_plain); - nir_variable_mode indirect_derefs_modes = nir_var_function_temp; - if (nir->info.stage == MESA_SHADER_TESS_CTRL || - nir->info.stage == MESA_SHADER_TESS_EVAL) - indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out; + if (nir->info.stage == MESA_SHADER_KERNEL) { + nir_lower_mem_access_bit_sizes_options lower_mem_access_options = { + .modes = nir_var_all ^ nir_var_function_temp, + .may_lower_unaligned_stores_to_atomics = true, + .callback = mem_access_size_align_cb, + .cb_data = screen, + }; + NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options); + NIR_PASS_V(nir, alias_scratch_memory); + NIR_PASS_V(nir, nir_lower_alu_width, lower_vec816_alu, NULL); + NIR_PASS_V(nir, nir_lower_alu_vec8_16_srcs); + } - NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes, - UINT32_MAX); + NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, NULL, NULL); + optimize_nir(nir, NULL, true); + nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) { + if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) { + NIR_PASS_V(nir, lower_bindless_io); + break; + } + } + if (nir->info.stage < MESA_SHADER_FRAGMENT) + nir_gather_xfb_info_from_intrinsics(nir); + NIR_PASS_V(nir, fix_vertex_input_locations); + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + scan_nir(screen, nir, ret); + NIR_PASS_V(nir, nir_opt_vectorize, NULL, NULL); + NIR_PASS_V(nir, trivial_revectorize); + if (nir->info.io_lowered) { + rework_io_vars(nir, nir_var_shader_in, ret); + rework_io_vars(nir, nir_var_shader_out, ret); + nir_sort_variables_by_location(nir, nir_var_shader_in); + nir_sort_variables_by_location(nir, nir_var_shader_out); + } - if (nir->info.stage == MESA_SHADER_VERTEX) - create_vs_pushconst(nir); - else if (nir->info.stage == MESA_SHADER_TESS_CTRL || + if (nir->info.stage < MESA_SHADER_COMPUTE) + create_gfx_pushconst(nir); + + if (nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); - else if (nir->info.stage == MESA_SHADER_KERNEL) - create_cs_pushconst(nir); if (nir->info.stage < MESA_SHADER_FRAGMENT) have_psiz = check_psiz(nir); + if (nir->info.stage == MESA_SHADER_FRAGMENT) + ret->flat_flags = zink_flat_flags(nir); + + if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader) + NIR_PASS_V(nir, fixup_io_locations); + NIR_PASS_V(nir, lower_basevertex); - NIR_PASS_V(nir, lower_work_dim); - NIR_PASS_V(nir, nir_lower_regs_to_ssa); NIR_PASS_V(nir, lower_baseinstance); + NIR_PASS_V(nir, split_bitfields); + if (!screen->info.feats.features.shaderStorageImageMultisample) + NIR_PASS_V(nir, strip_tex_ms); + NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */ + + if (screen->info.have_EXT_shader_demote_to_helper_invocation) { + NIR_PASS_V(nir, nir_lower_discard_or_demote, true); + } + + if (screen->need_2D_zs) + NIR_PASS_V(nir, lower_1d_shadow, screen); { nir_lower_subgroups_options subgroup_options = {0}; @@ -1116,25 +6198,57 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, subgroup_options.ballot_bit_size = 32; subgroup_options.ballot_components = 4; subgroup_options.lower_subgroup_masks = true; + if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) { + subgroup_options.subgroup_size = 1; + subgroup_options.lower_vote_trivial = true; + } + subgroup_options.lower_inverse_ballot = true; NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options); } - optimize_nir(nir); + optimize_nir(nir, NULL, true); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); - NIR_PASS_V(nir, lower_discard_if); - NIR_PASS_V(nir, nir_lower_fragcolor, - nir->info.fs.color_is_dual_source ? 1 : 8); - NIR_PASS_V(nir, lower_64bit_vertex_attribs); - NIR_PASS_V(nir, unbreak_bos); + NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf | + nir_lower_demote_if_to_cf | + nir_lower_terminate_if_to_cf)); + + bool needs_size = analyze_io(ret, nir); + NIR_PASS_V(nir, unbreak_bos, ret, needs_size); + /* run in compile if there could be inlined uniforms */ + if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) { + NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL); + NIR_PASS_V(nir, rewrite_bo_access, screen); + NIR_PASS_V(nir, remove_bo_access, ret); + } - if (zink_debug & ZINK_DEBUG_NIR) { - fprintf(stderr, "NIR shader:\n---8<---\n"); - nir_print_shader(nir, stderr); - fprintf(stderr, "---8<---\n"); + struct zink_bindless_info bindless = {0}; + bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]; + nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) + var->data.is_xfb = false; + + optimize_nir(nir, NULL, true); + prune_io(nir); + + unsigned sampler_mask = 0; + if (nir->info.stage == MESA_SHADER_KERNEL) { + NIR_PASS_V(nir, type_images, &sampler_mask); + enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW; + VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER; + u_foreach_bit(s, sampler_mask) { + ret->bindings[ztype][ret->num_bindings[ztype]].index = s; + ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors); + ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; + ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; + ret->num_bindings[ztype]++; + } + ret->sinfo.sampler_mask = sampler_mask; } - foreach_list_typed_reverse(nir_variable, var, node, &nir->variables) { + unsigned ubo_binding_mask = 0; + unsigned ssbo_binding_mask = 0; + foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) { if (_nir_shader_variable_has_mode(var, nir_var_uniform | + nir_var_image | nir_var_mem_ubo | nir_var_mem_ssbo)) { enum zink_descriptor_type ztype; @@ -1143,42 +6257,56 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, ztype = ZINK_DESCRIPTOR_TYPE_UBO; /* buffer 0 is a push descriptor */ var->data.descriptor_set = !!var->data.driver_location; - var->data.binding = !var->data.driver_location ? nir->info.stage : + var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) : zink_binding(nir->info.stage, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - var->data.driver_location); + var->data.driver_location, + screen->compact_descriptors); assert(var->data.driver_location || var->data.binding < 10); VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; int binding = var->data.binding; - ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; - ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding; - ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; - ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; - ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index); - ret->num_bindings[ztype]++; + if (!var->data.driver_location) { + ret->has_uniforms = true; + } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) { + ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; + ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding; + ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; + ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type); + assert(ret->bindings[ztype][ret->num_bindings[ztype]].size); + ret->num_bindings[ztype]++; + ubo_binding_mask |= BITFIELD_BIT(binding); + } } else if (var->data.mode == nir_var_mem_ssbo) { ztype = ZINK_DESCRIPTOR_TYPE_SSBO; - var->data.descriptor_set = ztype + 1; - var->data.binding = zink_binding(nir->info.stage, + var->data.descriptor_set = screen->desc_set_id[ztype]; + var->data.binding = zink_binding(clamp_stage(&nir->info), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - var->data.driver_location); - ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; - ret->ssbos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index); - ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; - ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; - ret->num_bindings[ztype]++; + var->data.driver_location, + screen->compact_descriptors); + if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) { + ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; + ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; + ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type); + assert(ret->bindings[ztype][ret->num_bindings[ztype]].size); + ret->num_bindings[ztype]++; + ssbo_binding_mask |= BITFIELD_BIT(var->data.binding); + } } else { - assert(var->data.mode == nir_var_uniform); - if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) { + assert(var->data.mode == nir_var_uniform || + var->data.mode == nir_var_image); + if (var->data.bindless) { + ret->bindless = true; + handle_bindless_var(nir, var, type, &bindless); + } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) { VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type); - if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) - ret->num_texel_buffers++; + if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; ztype = zink_desc_type_from_vktype(vktype); var->data.driver_location = var->data.binding; - var->data.descriptor_set = ztype + 1; - var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location); + var->data.descriptor_set = screen->desc_set_id[ztype]; + var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors); ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; @@ -1187,14 +6315,33 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, else ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; ret->num_bindings[ztype]++; + } else if (var->data.mode == nir_var_uniform) { + /* this is a dead uniform */ + var->data.mode = 0; + exec_node_remove(&var->node); } } } } - - ret->nir = nir; - if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings) - update_so_info(ret, so_info, nir->info.outputs_written, have_psiz); + bool bindless_lowered = false; + NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless); + ret->bindless |= bindless_lowered; + + if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64) + NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64); + if (nir->info.stage != MESA_SHADER_KERNEL) + NIR_PASS_V(nir, match_tex_dests, ret); + + if (!nir->info.internal) + nir_foreach_shader_out_variable(var, nir) + var->data.explicit_xfb_buffer = 0; + if (nir->xfb_info && nir->xfb_info->output_count && nir->info.outputs_written) + update_so_info(ret, nir, nir->info.outputs_written, have_psiz); + zink_shader_serialize_blob(nir, &ret->blob); + memcpy(&ret->info, &nir->info, sizeof(nir->info)); + ret->info.name = ralloc_strdup(ret, nir->info.name); + + ret->can_inline = true; return ret; } @@ -1205,18 +6352,24 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr) struct zink_screen *screen = zink_screen(pscreen); nir_shader *nir = nirptr; - if (!screen->info.feats.features.shaderImageGatherExtended) { - nir_lower_tex_options tex_opts = {0}; + nir_lower_tex_options tex_opts = { + .lower_invalid_implicit_lod = true, + }; + /* + Sampled Image must be an object whose type is OpTypeSampledImage. + The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D, + or Rect, and the Arrayed and MS operands must be 0. + - SPIRV, OpImageSampleProj* opcodes + */ + tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | + BITFIELD_BIT(GLSL_SAMPLER_DIM_MS); + tex_opts.lower_txp_array = true; + if (!screen->info.feats.features.shaderImageGatherExtended) tex_opts.lower_tg4_offsets = true; - NIR_PASS_V(nir, nir_lower_tex, &tex_opts); - } - NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false); - if (nir->info.stage == MESA_SHADER_GEOMETRY) - NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream); - optimize_nir(nir); - if (nir->info.num_ubos || nir->info.num_ssbos) - NIR_PASS_V(nir, nir_lower_dynamic_bo_access); - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + NIR_PASS_V(nir, nir_lower_tex, &tex_opts); + optimize_nir(nir, NULL, false); + if (nir->info.stage == MESA_SHADER_VERTEX) + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); if (screen->driconf.inline_uniforms) nir_find_inlinable_uniforms(nir); @@ -1224,39 +6377,133 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr) } void -zink_shader_free(struct zink_context *ctx, struct zink_shader *shader) -{ - struct zink_screen *screen = zink_screen(ctx->base.screen); - set_foreach(shader->programs, entry) { - if (shader->nir->info.stage == MESA_SHADER_COMPUTE) { - struct zink_compute_program *comp = (void*)entry->key; - if (!comp->base.removed) { - _mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader); - comp->base.removed = true; +zink_shader_free(struct zink_screen *screen, struct zink_shader *shader) +{ + _mesa_set_destroy(shader->programs, NULL); + util_queue_fence_wait(&shader->precompile.fence); + util_queue_fence_destroy(&shader->precompile.fence); + zink_descriptor_shader_deinit(screen, shader); + if (screen->info.have_EXT_shader_object) { + VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL); + } else { + if (shader->precompile.obj.mod) + VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL); + if (shader->precompile.gpl) + VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL); + } + blob_finish(&shader->blob); + ralloc_free(shader->spirv); + free(shader->precompile.bindings); + ralloc_free(shader); +} + +static bool +gfx_shader_prune(struct zink_screen *screen, struct zink_shader *shader) +{ + /* this shader may still be precompiling, so access here must be locked and singular */ + simple_mtx_lock(&shader->lock); + struct set_entry *entry = _mesa_set_next_entry(shader->programs, NULL); + struct zink_gfx_program *prog = (void*)(entry ? entry->key : NULL); + if (entry) + _mesa_set_remove(shader->programs, entry); + simple_mtx_unlock(&shader->lock); + if (!prog) + return false; + gl_shader_stage stage = shader->info.stage; + assert(stage < ZINK_GFX_SHADER_COUNT); + unsigned stages_present = prog->stages_present; + if (prog->shaders[MESA_SHADER_TESS_CTRL] && + prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated) + stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); + unsigned idx = zink_program_cache_stages(stages_present); + if (!prog->base.removed && prog->stages_present == prog->stages_remaining && + (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) { + struct hash_table *ht = &prog->base.ctx->program_cache[idx]; + simple_mtx_lock(&prog->base.ctx->program_lock[idx]); + struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders); + assert(he && he->data == prog); + _mesa_hash_table_remove(ht, he); + prog->base.removed = true; + simple_mtx_unlock(&prog->base.ctx->program_lock[idx]); + util_queue_fence_wait(&prog->base.cache_fence); + + for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) { + for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { + hash_table_foreach(&prog->pipelines[r][i], table_entry) { + struct zink_gfx_pipeline_cache_entry *pc_entry = table_entry->data; + + util_queue_fence_wait(&pc_entry->fence); + } } - comp->shader = NULL; - zink_compute_program_reference(screen, &comp, NULL); - } else { - struct zink_gfx_program *prog = (void*)entry->key; - enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage); - assert(pstage < ZINK_SHADER_COUNT); - if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) { - _mesa_hash_table_remove_key(&ctx->program_cache[prog->stages_present >> 2], prog->shaders); - prog->base.removed = true; + } + } + if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) { + prog->shaders[stage] = NULL; + prog->stages_remaining &= ~BITFIELD_BIT(stage); + } + /* only remove generated tcs during parent tes destruction */ + if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs) + prog->shaders[MESA_SHADER_TESS_CTRL] = NULL; + if (stage != MESA_SHADER_FRAGMENT && + prog->shaders[MESA_SHADER_GEOMETRY] && + prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent == + shader) { + prog->shaders[MESA_SHADER_GEOMETRY] = NULL; + } + zink_gfx_program_reference(screen, &prog, NULL); + return true; +} + +void +zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader) +{ + assert(shader->info.stage != MESA_SHADER_COMPUTE); + util_queue_fence_wait(&shader->precompile.fence); + + /* if the shader is still precompiling, the program set must be pruned under lock */ + while (gfx_shader_prune(screen, shader)); + + while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) { + struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*); + if (!libs->removed) { + libs->removed = true; + unsigned idx = zink_program_cache_stages(libs->stages_present); + simple_mtx_lock(&screen->pipeline_libs_lock[idx]); + _mesa_set_remove_key(&screen->pipeline_libs[idx], libs); + simple_mtx_unlock(&screen->pipeline_libs_lock[idx]); + } + zink_gfx_lib_cache_unref(screen, libs); + } + if (shader->info.stage == MESA_SHADER_TESS_EVAL && + shader->non_fs.generated_tcs) { + /* automatically destroy generated tcs shaders when tes is destroyed */ + zink_gfx_shader_free(screen, shader->non_fs.generated_tcs); + shader->non_fs.generated_tcs = NULL; + } + if (shader->info.stage != MESA_SHADER_FRAGMENT) { + for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) { + for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) { + if (shader->non_fs.generated_gs[i][j]) { + /* automatically destroy generated gs shaders when owner is destroyed */ + zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]); + shader->non_fs.generated_gs[i][j] = NULL; + } } - prog->shaders[pstage] = NULL; - if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated) - /* automatically destroy generated tcs shaders when tes is destroyed */ - zink_shader_free(ctx, shader->generated); - zink_gfx_program_reference(screen, &prog, NULL); } } - _mesa_set_destroy(shader->programs, NULL); - ralloc_free(shader->nir); - FREE(shader); + zink_shader_free(screen, shader); } +struct zink_shader_object +zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg) +{ + assert(zs->info.stage == MESA_SHADER_TESS_CTRL); + /* shortcut all the nir passes since we just have to change this one word */ + zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices; + return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg); +} + /* creating a passthrough tcs shader that's roughly: #version 150 @@ -1279,9 +6526,10 @@ void main() */ struct zink_shader * -zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch) +zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret) { - struct zink_shader *ret = CALLOC_STRUCT(zink_shader); + struct zink_shader *ret = rzalloc(NULL, struct zink_shader); + util_queue_fence_init(&ret->precompile.fence); ret->hash = _mesa_hash_pointer(ret); ret->programs = _mesa_pointer_set_create(NULL); simple_mtx_init(&ret->lock, mtx_plain); @@ -1291,20 +6539,22 @@ zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsig fn->is_entrypoint = true; nir_function_impl *impl = nir_function_impl_create(fn); - nir_builder b; - nir_builder_init(&b, impl); - b.cursor = nir_before_block(nir_start_block(impl)); + nir_builder b = nir_builder_at(nir_before_impl(impl)); - nir_ssa_def *invocation_id = nir_load_invocation_id(&b); + nir_def *invocation_id = nir_load_invocation_id(&b); - nir_foreach_shader_out_variable(var, vs->nir) { - const struct glsl_type *type = var->type; + nir_foreach_shader_in_variable(var, tes) { + if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) + continue; const struct glsl_type *in_type = var->type; const struct glsl_type *out_type = var->type; char buf[1024]; snprintf(buf, sizeof(buf), "%s_out", var->name); - in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0); - out_type = glsl_array_type(type, vertices_per_patch, 0); + if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) { + const struct glsl_type *type = var->type; + in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0); + out_type = glsl_array_type(type, vertices_per_patch, 0); + } nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name); nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf); @@ -1318,15 +6568,10 @@ zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsig implementation-dependent maximum patch size (gl_MaxPatchVertices). - ARB_tessellation_shader */ - for (unsigned i = 0; i < vertices_per_patch; i++) { - /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */ - nir_if *start_block = nir_push_if(&b, nir_ieq(&b, invocation_id, nir_imm_int(&b, i))); - nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id); - nir_ssa_def *load = nir_load_deref(&b, in_array_var); - nir_deref_instr *out_array_var = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, out), i); - nir_store_deref(&b, out_array_var, load, 0xff); - nir_pop_if(&b, start_block); - } + /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */ + nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id); + nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id); + copy_vars(&b, out_value, in_value); } nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner"); gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER; @@ -1335,24 +6580,12 @@ zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsig gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER; gl_TessLevelOuter->data.patch = 1; - /* hacks so we can size these right for now */ - struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3); - /* just use a single blob for padding here because it's easier */ - fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0); - fields[0].name = ralloc_asprintf(nir, "padding"); - fields[0].offset = 0; - fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0); - fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner"); - fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level); - fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0); - fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter"); - fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level); - nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const, - glsl_struct_type(fields, 3, "struct", false), "pushconst"); - pushconst->data.location = VARYING_SLOT_VAR0; - - nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8); - nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16); + create_gfx_pushconst(nir); + + nir_def *load_inner = nir_load_push_constant_zink(&b, 2, 32, + nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL)); + nir_def *load_outer = nir_load_push_constant_zink(&b, 4, 32, + nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL)); for (unsigned i = 0; i < 2; i++) { nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i); @@ -1366,13 +6599,58 @@ zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsig nir->info.tess.tcs_vertices_out = vertices_per_patch; nir_validate_shader(nir, "created"); - NIR_PASS_V(nir, nir_lower_regs_to_ssa); - optimize_nir(nir); + optimize_nir(nir, NULL, true); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); - NIR_PASS_V(nir, lower_discard_if); NIR_PASS_V(nir, nir_convert_from_ssa, true); - ret->nir = nir; - ret->is_generated = true; + *nir_ret = nir; + zink_shader_serialize_blob(nir, &ret->blob); + memcpy(&ret->info, &nir->info, sizeof(nir->info)); + ret->non_fs.is_generated = true; return ret; } + +bool +zink_shader_has_cubes(nir_shader *nir) +{ + nir_foreach_variable_with_modes(var, nir, nir_var_uniform) { + const struct glsl_type *type = glsl_without_array(var->type); + if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE) + return true; + } + return false; +} + +nir_shader * +zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob) +{ + struct blob_reader blob_reader; + blob_reader_init(&blob_reader, blob->data, blob->size); + return nir_deserialize(NULL, &screen->nir_options, &blob_reader); +} + +nir_shader * +zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs) +{ + return zink_shader_blob_deserialize(screen, &zs->blob); +} + +void +zink_shader_serialize_blob(nir_shader *nir, struct blob *blob) +{ + blob_init(blob); +#ifndef NDEBUG + bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI)); +#else + bool strip = false; +#endif + nir_serialize(blob, nir, strip); +} + +void +zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp) +{ + nir_shader *nir = zink_shader_deserialize(screen, zs); + nir_print_shader(nir, fp); + ralloc_free(nir); +} diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index 270bf12c54b..e901ee45f7b 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -24,95 +24,72 @@ #ifndef ZINK_COMPILER_H #define ZINK_COMPILER_H -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "compiler/nir/nir.h" -#include "compiler/shader_info.h" -#include "util/u_live_shader_cache.h" - -#include <vulkan/vulkan.h> -#include "zink_descriptors.h" +#include "zink_types.h" #define ZINK_WORKGROUP_SIZE_X 1 #define ZINK_WORKGROUP_SIZE_Y 2 #define ZINK_WORKGROUP_SIZE_Z 3 +#define ZINK_VARIABLE_SHARED_MEM 4 +#define ZINK_INLINE_VAL_FLAT_MASK 0 +#define ZINK_INLINE_VAL_PV_LAST_VERT 2 -struct pipe_screen; -struct zink_context; -struct zink_screen; -struct zink_shader_key; -struct zink_shader_module; -struct zink_gfx_program; - -struct nir_shader_compiler_options; -struct nir_shader; +/* stop inlining shaders if they have >limit ssa vals after inlining: + * recompile time isn't worth the inline + */ +#define ZINK_ALWAYS_INLINE_LIMIT 1500 -struct set; +struct zink_shader_key; +struct spirv_shader; struct tgsi_token; -struct zink_so_info { - struct pipe_stream_output_info so_info; - unsigned so_info_slots[PIPE_MAX_SO_OUTPUTS]; - bool have_xfb; -}; +static inline gl_shader_stage +clamp_stage(const shader_info *info) +{ + return info->stage == MESA_SHADER_KERNEL ? MESA_SHADER_COMPUTE : info->stage; +} const void * zink_get_compiler_options(struct pipe_screen *screen, enum pipe_shader_ir ir, - enum pipe_shader_type shader); + gl_shader_stage shader); struct nir_shader * zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens); -struct zink_shader { - struct util_live_shader base; - uint32_t hash; - struct nir_shader *nir; - - struct zink_so_info streamout; - - struct { - int index; - int binding; - VkDescriptorType type; - unsigned char size; - } bindings[ZINK_DESCRIPTOR_TYPES][ZINK_MAX_DESCRIPTORS_PER_TYPE]; - size_t num_bindings[ZINK_DESCRIPTOR_TYPES]; - unsigned num_texel_buffers; - uint32_t ubos_used; // bitfield of which ubo indices are used - uint32_t ssbos_used; // bitfield of which ssbo indices are used - - simple_mtx_t lock; - struct set *programs; - - union { - struct zink_shader *generated; // a generated shader that this shader "owns" - bool is_generated; // if this is a driver-created shader (e.g., tcs) - nir_variable *fbfetch; //for fs output - }; -}; +nir_shader* +zink_create_quads_emulation_gs(const nir_shader_compiler_options *options, + const nir_shader *prev_stage); + +bool +zink_lower_system_values_to_inlined_uniforms(nir_shader *nir); void zink_screen_init_compiler(struct zink_screen *screen); void -zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer); -VkShaderModule -zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, const struct zink_shader_key *key); - +zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer); +/* pass very large shader key data with extra_data */ +struct zink_shader_object +zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg); +struct zink_shader_object +zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs); struct zink_shader * -zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, - const struct pipe_stream_output_info *so_info); +zink_shader_create(struct zink_screen *screen, struct nir_shader *nir); char * zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr); void -zink_shader_free(struct zink_context *ctx, struct zink_shader *shader); +zink_shader_free(struct zink_screen *screen, struct zink_shader *shader); +void +zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader); +struct zink_shader_object +zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg); +struct zink_shader_object +zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg); struct zink_shader * -zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch); +zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret); static inline bool zink_shader_descriptor_is_buffer(struct zink_shader *zs, enum zink_descriptor_type type, unsigned i) @@ -121,4 +98,14 @@ zink_shader_descriptor_is_buffer(struct zink_shader *zs, enum zink_descriptor_ty zs->bindings[type][i].type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; } +bool +zink_shader_has_cubes(nir_shader *nir); +nir_shader * +zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob); +nir_shader * +zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs); +void +zink_shader_serialize_blob(nir_shader *nir, struct blob *blob); +void +zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp); #endif diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index 1d98bcbed86..77b60828366 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -21,34 +21,45 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "zink_clear.h" #include "zink_context.h" - -#include "zink_batch.h" -#include "zink_compiler.h" +#include "zink_descriptors.h" #include "zink_fence.h" #include "zink_format.h" #include "zink_framebuffer.h" #include "zink_helpers.h" -#include "zink_program.h" +#include "zink_inlines.h" +#include "zink_kopper.h" #include "zink_pipeline.h" +#include "zink_program.h" #include "zink_query.h" #include "zink_render_pass.h" #include "zink_resource.h" #include "zink_screen.h" #include "zink_state.h" #include "zink_surface.h" -#include "zink_inlines.h" + +#include "nir/pipe_nir.h" #include "util/u_blitter.h" #include "util/u_debug.h" #include "util/format_srgb.h" #include "util/format/u_format.h" #include "util/u_helpers.h" #include "util/u_inlines.h" +#include "util/u_sample_positions.h" +#include "util/u_string.h" #include "util/u_thread.h" +#include "util/perf/u_trace.h" #include "util/u_cpu_detect.h" +#include "util/thread_sched.h" #include "util/strndup.h" #include "nir.h" +#include "nir_builder.h" + +#include "vk_format.h" + +#include "driver_trace/tr_context.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" @@ -57,11 +68,27 @@ #include "util/xxhash.h" static void -calc_descriptor_hash_sampler_state(struct zink_sampler_state *sampler_state) +update_tc_info(struct zink_context *ctx) { - void *hash_data = &sampler_state->sampler; - size_t data_size = sizeof(VkSampler); - sampler_state->hash = XXH32(hash_data, data_size, 0); + if (ctx->track_renderpasses) { + const struct tc_renderpass_info *info = threaded_context_get_renderpass_info(ctx->tc); + ctx->rp_changed |= ctx->dynamic_fb.tc_info.data != info->data; + ctx->dynamic_fb.tc_info.data = info->data; + } else { + struct tc_renderpass_info info = ctx->dynamic_fb.tc_info; + bool zsbuf_used = !ctx->zsbuf_unused; + bool zsbuf_write = zink_is_zsbuf_write(ctx); + ctx->dynamic_fb.tc_info.data32[0] = 0; + if (ctx->clears_enabled & PIPE_CLEAR_DEPTHSTENCIL) + ctx->dynamic_fb.tc_info.zsbuf_clear_partial = true; + if (ctx->rp_clears_enabled & PIPE_CLEAR_DEPTHSTENCIL) + ctx->dynamic_fb.tc_info.zsbuf_clear = true; + if (ctx->dynamic_fb.tc_info.zsbuf_clear != info.zsbuf_clear) + ctx->rp_loadop_changed = true; + if (zink_is_zsbuf_write(ctx) != zsbuf_write) + ctx->rp_layout_changed = true; + ctx->rp_changed |= zink_is_zsbuf_used(ctx) != zsbuf_used; + } } void @@ -73,8 +100,18 @@ debug_describe_zink_buffer_view(char *buf, const struct zink_buffer_view *ptr) ALWAYS_INLINE static void check_resource_for_batch_ref(struct zink_context *ctx, struct zink_resource *res) { - if (!zink_resource_has_binds(res)) - zink_batch_reference_resource(&ctx->batch, res); + if (!zink_resource_has_binds(res)) { + /* avoid desync between usage and tracking: + * - if usage exists, it must be removed before the context is destroyed + * - having usage does not imply having tracking + * - if tracking will be added here, also reapply usage to avoid dangling usage once tracking is removed + * TODO: somehow fix this for perf because it's an extra hash lookup + */ + if (!res->obj->dt && zink_resource_has_usage(res)) + zink_batch_reference_resource_rw(&ctx->batch, res, !!res->obj->bo->writes.u); + else + zink_batch_reference_resource(&ctx->batch, res); + } } static void @@ -83,10 +120,32 @@ zink_context_destroy(struct pipe_context *pctx) struct zink_context *ctx = zink_context(pctx); struct zink_screen *screen = zink_screen(pctx->screen); - if (screen->queue && !screen->device_lost && VKSCR(QueueWaitIdle)(screen->queue) != VK_SUCCESS) - debug_printf("vkQueueWaitIdle failed\n"); + struct pipe_framebuffer_state fb = {0}; + pctx->set_framebuffer_state(pctx, &fb); + + if (util_queue_is_initialized(&screen->flush_queue)) + util_queue_finish(&screen->flush_queue); + if (ctx->batch.state && !screen->device_lost) { + simple_mtx_lock(&screen->queue_lock); + VkResult result = VKSCR(QueueWaitIdle)(screen->queue); + simple_mtx_unlock(&screen->queue_lock); + + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkQueueWaitIdle failed (%s)", vk_Result_to_str(result)); + } + + for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_cache); i++) { + simple_mtx_lock((&ctx->program_lock[i])); + hash_table_foreach(&ctx->program_cache[i], entry) { + struct zink_program *pg = entry->data; + zink_program_finish(ctx, pg); + pg->removed = true; + } + simple_mtx_unlock((&ctx->program_lock[i])); + } - util_blitter_destroy(ctx->blitter); + if (ctx->blitter) + util_blitter_destroy(ctx->blitter); for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) pipe_surface_release(&ctx->base, &ctx->fb_state.cbufs[i]); pipe_surface_release(&ctx->base, &ctx->fb_state.zsbuf); @@ -98,47 +157,124 @@ zink_context_destroy(struct pipe_context *pctx) pipe_surface_release(&ctx->base, &ctx->dummy_surface[i]); zink_buffer_view_reference(screen, &ctx->dummy_bufferview, NULL); - simple_mtx_destroy(&ctx->batch_mtx); - zink_clear_batch_state(ctx, ctx->batch.state); - zink_batch_state_destroy(screen, ctx->batch.state); - hash_table_foreach(&ctx->batch_states, entry) { - struct zink_batch_state *bs = entry->data; + zink_descriptors_deinit_bindless(ctx); + + struct zink_batch_state *bs = ctx->batch_states; + while (bs) { + struct zink_batch_state *bs_next = bs->next; zink_clear_batch_state(ctx, bs); - zink_batch_state_destroy(screen, bs); + /* restore link as we insert them into the screens free_batch_states + * list below + */ + bs->next = bs_next; + bs = bs_next; } - util_dynarray_foreach(&ctx->free_batch_states, struct zink_batch_state*, bs) { - zink_clear_batch_state(ctx, *bs); - zink_batch_state_destroy(screen, *bs); + bs = ctx->free_batch_states; + while (bs) { + struct zink_batch_state *bs_next = bs->next; + zink_clear_batch_state(ctx, bs); + bs->ctx = NULL; + /* restore link as we insert them into the screens free_batch_states + * list below + */ + bs->next = bs_next; + bs = bs_next; } - - if (screen->info.have_KHR_imageless_framebuffer) { - hash_table_foreach(&ctx->framebuffer_cache, he) - zink_destroy_framebuffer(screen, he->data); - } else if (ctx->framebuffer) { - simple_mtx_lock(&screen->framebuffer_mtx); - struct hash_entry *entry = _mesa_hash_table_search(&screen->framebuffer_cache, &ctx->framebuffer->state); - if (zink_framebuffer_reference(screen, &ctx->framebuffer, NULL)) - _mesa_hash_table_remove(&screen->framebuffer_cache, entry); - simple_mtx_unlock(&screen->framebuffer_mtx); + simple_mtx_lock(&screen->free_batch_states_lock); + if (ctx->batch_states) { + if (screen->free_batch_states) + screen->last_free_batch_state->next = ctx->batch_states; + else { + screen->free_batch_states = ctx->batch_states; + screen->last_free_batch_state = screen->free_batch_states; + } + } + while (screen->last_free_batch_state && screen->last_free_batch_state->next) + screen->last_free_batch_state = screen->last_free_batch_state->next; + if (ctx->free_batch_states) { + if (screen->free_batch_states) + screen->last_free_batch_state->next = ctx->free_batch_states; + else { + screen->free_batch_states = ctx->free_batch_states; + screen->last_free_batch_state = ctx->last_free_batch_state; + } + } + while (screen->last_free_batch_state && screen->last_free_batch_state->next) + screen->last_free_batch_state = screen->last_free_batch_state->next; + if (ctx->batch.state) { + zink_clear_batch_state(ctx, ctx->batch.state); + if (screen->free_batch_states) + screen->last_free_batch_state->next = ctx->batch.state; + else { + screen->free_batch_states = ctx->batch.state; + screen->last_free_batch_state = screen->free_batch_states; + } + } + while (screen->last_free_batch_state && screen->last_free_batch_state->next) + screen->last_free_batch_state = screen->last_free_batch_state->next; + simple_mtx_unlock(&screen->free_batch_states_lock); + + for (unsigned i = 0; i < 2; i++) { + util_idalloc_fini(&ctx->di.bindless[i].tex_slots); + util_idalloc_fini(&ctx->di.bindless[i].img_slots); + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + free(ctx->di.bindless[i].db.buffer_infos); + else + free(ctx->di.bindless[i].t.buffer_infos); + free(ctx->di.bindless[i].img_infos); + util_dynarray_fini(&ctx->di.bindless[i].updates); + util_dynarray_fini(&ctx->di.bindless[i].resident); } + if (ctx->null_fs) + pctx->delete_fs_state(pctx, ctx->null_fs); + + hash_table_foreach(&ctx->framebuffer_cache, he) + zink_destroy_framebuffer(screen, he->data); + hash_table_foreach(ctx->render_pass_cache, he) zink_destroy_render_pass(screen, he->data); + zink_context_destroy_query_pools(ctx); + set_foreach(&ctx->gfx_inputs, he) { + struct zink_gfx_input_key *ikey = (void*)he->key; + VKSCR(DestroyPipeline)(screen->dev, ikey->pipeline, NULL); + } + set_foreach(&ctx->gfx_outputs, he) { + struct zink_gfx_output_key *okey = (void*)he->key; + VKSCR(DestroyPipeline)(screen->dev, okey->pipeline, NULL); + } u_upload_destroy(pctx->stream_uploader); u_upload_destroy(pctx->const_uploader); slab_destroy_child(&ctx->transfer_pool); for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_cache); i++) _mesa_hash_table_clear(&ctx->program_cache[i], NULL); - _mesa_hash_table_clear(&ctx->compute_program_cache, NULL); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_lock); i++) + simple_mtx_destroy(&ctx->program_lock[i]); _mesa_hash_table_destroy(ctx->render_pass_cache, NULL); slab_destroy_child(&ctx->transfer_pool_unsync); - screen->descriptors_deinit(ctx); + if (zink_debug & ZINK_DEBUG_DGC) { + for (unsigned i = 0; i < ARRAY_SIZE(ctx->dgc.upload); i++) + u_upload_destroy(ctx->dgc.upload[i]); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->dgc.buffers); i++) { + if (!ctx->dgc.buffers[i]) + continue; + struct pipe_resource *pres = &ctx->dgc.buffers[i]->base.b; + pipe_resource_reference(&pres, NULL); + } + util_dynarray_fini(&ctx->dgc.pipelines); + } + + zink_descriptors_deinit(ctx); - zink_descriptor_layouts_deinit(ctx); + if (!(ctx->flags & ZINK_CONTEXT_COPY_ONLY)) + p_atomic_dec(&screen->base.num_contexts); - p_atomic_dec(&screen->base.num_contexts); + util_dynarray_foreach(&ctx->di.global_bindings, struct pipe_resource *, res) { + pipe_resource_reference(res, NULL); + } + util_dynarray_fini(&ctx->di.global_bindings); ralloc_free(ctx); } @@ -180,11 +316,20 @@ zink_set_device_reset_callback(struct pipe_context *pctx, const struct pipe_device_reset_callback *cb) { struct zink_context *ctx = zink_context(pctx); + bool had_reset = !!ctx->reset.reset; if (cb) ctx->reset = *cb; else memset(&ctx->reset, 0, sizeof(ctx->reset)); + + bool have_reset = !!ctx->reset.reset; + if (had_reset != have_reset) { + if (have_reset) + p_atomic_inc(&zink_screen(pctx->screen)->robust_ctx_count); + else + p_atomic_dec(&zink_screen(pctx->screen)->robust_ctx_count); + } } static void @@ -192,18 +337,30 @@ zink_set_context_param(struct pipe_context *pctx, enum pipe_context_param param, unsigned value) { struct zink_context *ctx = zink_context(pctx); + struct zink_screen *screen = zink_screen(ctx->base.screen); switch (param) { - case PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE: - util_set_thread_affinity(zink_screen(ctx->base.screen)->flush_queue.threads[0], - util_get_cpu_caps()->L3_affinity_mask[value], - NULL, util_get_cpu_caps()->num_cpu_mask_bits); + case PIPE_CONTEXT_PARAM_UPDATE_THREAD_SCHEDULING: + if (screen->threaded_submit) + util_thread_sched_apply_policy(screen->flush_queue.threads[0], + UTIL_THREAD_DRIVER_SUBMIT, value, NULL); break; default: break; } } +static void +zink_set_debug_callback(struct pipe_context *pctx, const struct util_debug_callback *cb) +{ + struct zink_context *ctx = zink_context(pctx); + + if (cb) + ctx->dbg = *cb; + else + memset(&ctx->dbg, 0, sizeof(ctx->dbg)); +} + static VkSamplerMipmapMode sampler_mipmap_mode(enum pipe_tex_mipfilter filter) { @@ -231,6 +388,18 @@ sampler_address_mode(enum pipe_tex_wrap filter) unreachable("unexpected wrap"); } +/* unnormalizedCoordinates only support CLAMP_TO_EDGE or CLAMP_TO_BORDER */ +static VkSamplerAddressMode +sampler_address_mode_unnormalized(enum pipe_tex_wrap filter) +{ + switch (filter) { + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + default: break; + } + unreachable("unexpected wrap"); +} + static VkCompareOp compare_op(enum pipe_compare_func op) { @@ -281,13 +450,24 @@ zink_create_sampler_state(struct pipe_context *pctx, const struct pipe_sampler_state *state) { struct zink_screen *screen = zink_screen(pctx->screen); + ASSERTED struct zink_context *zink = zink_context(pctx); bool need_custom = false; - + bool need_clamped_border_color = false; VkSamplerCreateInfo sci = {0}; VkSamplerCustomBorderColorCreateInfoEXT cbci = {0}; + VkSamplerCustomBorderColorCreateInfoEXT cbci_clamped = {0}; sci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + if (screen->info.have_EXT_non_seamless_cube_map && !state->seamless_cube_map) + sci.flags |= VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT; + if (state->unnormalized_coords) { + assert(zink->flags & PIPE_CONTEXT_COMPUTE_ONLY); + sci.unnormalizedCoordinates = state->unnormalized_coords; + } sci.magFilter = zink_filter(state->mag_img_filter); - sci.minFilter = zink_filter(state->min_img_filter); + if (sci.unnormalizedCoordinates) + sci.minFilter = sci.magFilter; + else + sci.minFilter = zink_filter(state->min_img_filter); VkSamplerReductionModeCreateInfo rci; rci.sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO; @@ -306,20 +486,31 @@ zink_create_sampler_state(struct pipe_context *pctx, if (state->reduction_mode) sci.pNext = &rci; - if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + if (sci.unnormalizedCoordinates) { + sci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + } else if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { sci.mipmapMode = sampler_mipmap_mode(state->min_mip_filter); sci.minLod = state->min_lod; - sci.maxLod = state->max_lod; + sci.maxLod = MAX2(state->max_lod, state->min_lod); } else { sci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - sci.minLod = 0; - sci.maxLod = 0.25f; + sci.minLod = CLAMP(state->min_lod, 0.0f, 0.25f); + sci.maxLod = CLAMP(state->max_lod, 0.0f, 0.25f); + } + + if (!sci.unnormalizedCoordinates) { + sci.addressModeU = sampler_address_mode(state->wrap_s); + sci.addressModeV = sampler_address_mode(state->wrap_t); + sci.addressModeW = sampler_address_mode(state->wrap_r); + } else { + sci.addressModeU = sampler_address_mode_unnormalized(state->wrap_s); + sci.addressModeV = sampler_address_mode_unnormalized(state->wrap_t); + sci.addressModeW = sampler_address_mode_unnormalized(state->wrap_r); } - sci.addressModeU = sampler_address_mode(state->wrap_s); - sci.addressModeV = sampler_address_mode(state->wrap_t); - sci.addressModeW = sampler_address_mode(state->wrap_r); - sci.mipLodBias = state->lod_bias; + sci.mipLodBias = CLAMP(state->lod_bias, + -screen->info.props.limits.maxSamplerLodBias, + screen->info.props.limits.maxSamplerLodBias); need_custom |= wrap_needs_border_color(state->wrap_s); need_custom |= wrap_needs_border_color(state->wrap_t); @@ -336,12 +527,58 @@ zink_create_sampler_state(struct pipe_context *pctx, sci.borderColor = get_border_color(&state->border_color, is_integer, need_custom); if (sci.borderColor > VK_BORDER_COLOR_INT_OPAQUE_WHITE && need_custom) { + if (!screen->info.border_color_feats.customBorderColorWithoutFormat && + screen->info.driver_props.driverID != VK_DRIVER_ID_MESA_TURNIP) { + static bool warned = false; + warn_missing_feature(warned, "customBorderColorWithoutFormat"); + } if (screen->info.have_EXT_custom_border_color && - screen->info.border_color_feats.customBorderColorWithoutFormat) { + (screen->info.border_color_feats.customBorderColorWithoutFormat || state->border_color_format)) { + if (!screen->info.have_EXT_border_color_swizzle) { + static bool warned = false; + warn_missing_feature(warned, "VK_EXT_border_color_swizzle"); + } + + if (!is_integer && !screen->have_D24_UNORM_S8_UINT) { + union pipe_color_union clamped_border_color; + for (unsigned i = 0; i < 4; ++i) { + /* Use channel 0 on purpose, so that we can use OPAQUE_WHITE + * when the border color is 1.0. */ + clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1); + } + if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) != 0) { + need_clamped_border_color = true; + cbci_clamped.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT; + cbci_clamped.format = VK_FORMAT_UNDEFINED; + /* these are identical unions */ + memcpy(&cbci_clamped.customBorderColor, &clamped_border_color, sizeof(union pipe_color_union)); + } + } cbci.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT; - cbci.format = VK_FORMAT_UNDEFINED; - /* these are identical unions */ - memcpy(&cbci.customBorderColor, &state->border_color, sizeof(union pipe_color_union)); + if (screen->info.border_color_feats.customBorderColorWithoutFormat) { + cbci.format = VK_FORMAT_UNDEFINED; + /* these are identical unions */ + memcpy(&cbci.customBorderColor, &state->border_color, sizeof(union pipe_color_union)); + } else { + if (util_format_is_depth_or_stencil(state->border_color_format)) { + if (is_integer) { + cbci.format = VK_FORMAT_S8_UINT; + for (unsigned i = 0; i < 4; i++) + cbci.customBorderColor.uint32[i] = CLAMP(state->border_color.ui[i], 0, 255); + } else { + cbci.format = zink_get_format(screen, util_format_get_depth_only(state->border_color_format)); + /* these are identical unions */ + memcpy(&cbci.customBorderColor, &state->border_color, sizeof(union pipe_color_union)); + } + } else { + cbci.format = zink_get_format(screen, state->border_color_format); + union pipe_color_union color; + for (unsigned i = 0; i < 4; i++) { + zink_format_clamp_channel_srgb(util_format_description(state->border_color_format), &color, &state->border_color, i); + } + zink_convert_color(screen, state->border_color_format, (void*)&cbci.customBorderColor, &color); + } + } cbci.pNext = sci.pNext; sci.pNext = &cbci; UNUSED uint32_t check = p_atomic_inc_return(&screen->cur_custom_border_color_samplers); @@ -350,8 +587,6 @@ zink_create_sampler_state(struct pipe_context *pctx, sci.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; // TODO with custom shader if we're super interested? } - sci.unnormalizedCoordinates = !state->normalized_coords; - if (state->max_anisotropy > 1) { sci.maxAnisotropy = state->max_anisotropy; sci.anisotropyEnable = VK_TRUE; @@ -361,34 +596,37 @@ zink_create_sampler_state(struct pipe_context *pctx, if (!sampler) return NULL; - if (VKSCR(CreateSampler)(screen->dev, &sci, NULL, &sampler->sampler) != VK_SUCCESS) { + VkResult result = VKSCR(CreateSampler)(screen->dev, &sci, NULL, &sampler->sampler); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateSampler failed (%s)", vk_Result_to_str(result)); FREE(sampler); return NULL; } - util_dynarray_init(&sampler->desc_set_refs.refs, NULL); - calc_descriptor_hash_sampler_state(sampler); + if (need_clamped_border_color) { + sci.pNext = &cbci_clamped; + result = VKSCR(CreateSampler)(screen->dev, &sci, NULL, &sampler->sampler_clamped); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateSampler failed (%s)", vk_Result_to_str(result)); + VKSCR(DestroySampler)(screen->dev, sampler->sampler, NULL); + FREE(sampler); + return NULL; + } + } sampler->custom_border_color = need_custom; + if (!screen->info.have_EXT_non_seamless_cube_map) + sampler->emulate_nonseamless = !state->seamless_cube_map; return sampler; } ALWAYS_INLINE static VkImageLayout -get_layout_for_binding(struct zink_resource *res, enum zink_descriptor_type type, bool is_compute) +get_layout_for_binding(const struct zink_context *ctx, struct zink_resource *res, enum zink_descriptor_type type, bool is_compute) { if (res->obj->is_buffer) return 0; switch (type) { case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: - return res->image_bind_count[is_compute] ? - VK_IMAGE_LAYOUT_GENERAL : - res->aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) ? - //Vulkan-Docs#1490 - //(res->aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL : - //res->aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL : - (res->aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : - res->aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : - VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL) : - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + return zink_descriptor_util_image_layout_eval(ctx, res, is_compute); case ZINK_DESCRIPTOR_TYPE_IMAGE: return VK_IMAGE_LAYOUT_GENERAL; default: @@ -398,12 +636,23 @@ get_layout_for_binding(struct zink_resource *res, enum zink_descriptor_type type } ALWAYS_INLINE static struct zink_surface * -get_imageview_for_binding(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type, unsigned idx) +get_imageview_for_binding(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned idx) { switch (type) { case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: { struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->sampler_views[stage][idx]); - return sampler_view->base.texture ? sampler_view->image_view : NULL; + if (!sampler_view || !sampler_view->base.texture) + return NULL; + /* if this is a non-seamless cube sampler, return the cube array view */ + if (ctx->di.emulate_nonseamless[stage] & ctx->di.cubes[stage] & BITFIELD_BIT(idx)) + return sampler_view->cube_array; + bool needs_zs_shader_swizzle = (ctx->di.zs_swizzle[stage].mask & BITFIELD_BIT(idx)) && + zink_screen(ctx->base.screen)->driver_workarounds.needs_zs_shader_swizzle; + bool needs_shadow_shader_swizzle = (stage == MESA_SHADER_FRAGMENT) && ctx->gfx_stages[MESA_SHADER_FRAGMENT] && + (ctx->di.zs_swizzle[MESA_SHADER_FRAGMENT].mask & ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask & BITFIELD_BIT(idx)); + if (sampler_view->zs_view && (needs_zs_shader_swizzle || needs_shadow_shader_swizzle)) + return sampler_view->zs_view; + return sampler_view->image_view; } case ZINK_DESCRIPTOR_TYPE_IMAGE: { struct zink_image_view *image_view = &ctx->image_views[stage][idx]; @@ -417,7 +666,7 @@ get_imageview_for_binding(struct zink_context *ctx, enum pipe_shader_type stage, } ALWAYS_INLINE static struct zink_buffer_view * -get_bufferview_for_binding(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type, unsigned idx) +get_bufferview_for_binding(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned idx) { switch (type) { case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: { @@ -436,52 +685,65 @@ get_bufferview_for_binding(struct zink_context *ctx, enum pipe_shader_type stage } ALWAYS_INLINE static struct zink_resource * -update_descriptor_state_ubo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot, struct zink_resource *res) +update_descriptor_state_ubo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot, struct zink_resource *res) { struct zink_screen *screen = zink_screen(ctx->base.screen); bool have_null_descriptors = screen->info.rb2_feats.nullDescriptor; const enum zink_descriptor_type type = ZINK_DESCRIPTOR_TYPE_UBO; ctx->di.descriptor_res[type][shader][slot] = res; - ctx->di.ubos[shader][slot].offset = ctx->ubos[shader][slot].buffer_offset; - if (res) { - ctx->di.ubos[shader][slot].buffer = res->obj->buffer; - ctx->di.ubos[shader][slot].range = ctx->ubos[shader][slot].buffer_size; - assert(ctx->di.ubos[shader][slot].range <= screen->info.props.limits.maxUniformBufferRange); - } else { - VkBuffer null_buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; - ctx->di.ubos[shader][slot].buffer = have_null_descriptors ? VK_NULL_HANDLE : null_buffer; - ctx->di.ubos[shader][slot].range = VK_WHOLE_SIZE; - } - if (!slot) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { if (res) - ctx->di.push_valid |= BITFIELD64_BIT(shader); + ctx->di.db.ubos[shader][slot].address = res->obj->bda + ctx->ubos[shader][slot].buffer_offset; else - ctx->di.push_valid &= ~BITFIELD64_BIT(shader); + ctx->di.db.ubos[shader][slot].address = 0; + ctx->di.db.ubos[shader][slot].range = res ? ctx->ubos[shader][slot].buffer_size : VK_WHOLE_SIZE; + assert(ctx->di.db.ubos[shader][slot].range == VK_WHOLE_SIZE || + ctx->di.db.ubos[shader][slot].range <= screen->info.props.limits.maxUniformBufferRange); + } else { + ctx->di.t.ubos[shader][slot].offset = ctx->ubos[shader][slot].buffer_offset; + if (res) { + ctx->di.t.ubos[shader][slot].buffer = res->obj->buffer; + ctx->di.t.ubos[shader][slot].range = ctx->ubos[shader][slot].buffer_size; + assert(ctx->di.t.ubos[shader][slot].range <= screen->info.props.limits.maxUniformBufferRange); + } else { + VkBuffer null_buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; + ctx->di.t.ubos[shader][slot].buffer = have_null_descriptors ? VK_NULL_HANDLE : null_buffer; + ctx->di.t.ubos[shader][slot].range = VK_WHOLE_SIZE; + } } + return res; } ALWAYS_INLINE static struct zink_resource * -update_descriptor_state_ssbo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot, struct zink_resource *res) +update_descriptor_state_ssbo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot, struct zink_resource *res) { struct zink_screen *screen = zink_screen(ctx->base.screen); bool have_null_descriptors = screen->info.rb2_feats.nullDescriptor; const enum zink_descriptor_type type = ZINK_DESCRIPTOR_TYPE_SSBO; ctx->di.descriptor_res[type][shader][slot] = res; - ctx->di.ssbos[shader][slot].offset = ctx->ssbos[shader][slot].buffer_offset; - if (res) { - ctx->di.ssbos[shader][slot].buffer = res->obj->buffer; - ctx->di.ssbos[shader][slot].range = ctx->ssbos[shader][slot].buffer_size; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + if (res) + ctx->di.db.ssbos[shader][slot].address = res->obj->bda + ctx->ssbos[shader][slot].buffer_offset; + else + ctx->di.db.ssbos[shader][slot].address = 0; + ctx->di.db.ssbos[shader][slot].range = res ? ctx->ssbos[shader][slot].buffer_size : VK_WHOLE_SIZE; } else { - VkBuffer null_buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; - ctx->di.ssbos[shader][slot].buffer = have_null_descriptors ? VK_NULL_HANDLE : null_buffer; - ctx->di.ssbos[shader][slot].range = VK_WHOLE_SIZE; + ctx->di.t.ssbos[shader][slot].offset = ctx->ssbos[shader][slot].buffer_offset; + if (res) { + ctx->di.t.ssbos[shader][slot].buffer = res->obj->buffer; + ctx->di.t.ssbos[shader][slot].range = ctx->ssbos[shader][slot].buffer_size; + } else { + VkBuffer null_buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; + ctx->di.t.ssbos[shader][slot].buffer = have_null_descriptors ? VK_NULL_HANDLE : null_buffer; + ctx->di.t.ssbos[shader][slot].range = VK_WHOLE_SIZE; + } } return res; } ALWAYS_INLINE static struct zink_resource * -update_descriptor_state_sampler(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot, struct zink_resource *res) +update_descriptor_state_sampler(struct zink_context *ctx, gl_shader_stage shader, unsigned slot, struct zink_resource *res) { struct zink_screen *screen = zink_screen(ctx->base.screen); bool have_null_descriptors = screen->info.rb2_feats.nullDescriptor; @@ -489,36 +751,62 @@ update_descriptor_state_sampler(struct zink_context *ctx, enum pipe_shader_type ctx->di.descriptor_res[type][shader][slot] = res; if (res) { if (res->obj->is_buffer) { - struct zink_buffer_view *bv = get_bufferview_for_binding(ctx, shader, type, slot); - ctx->di.tbos[shader][slot] = bv->buffer_view; - ctx->di.sampler_surfaces[shader][slot].bufferview = bv; - ctx->di.sampler_surfaces[shader][slot].is_buffer = true; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + ctx->di.db.tbos[shader][slot].address = res->obj->bda + ctx->sampler_views[shader][slot]->u.buf.offset; + ctx->di.db.tbos[shader][slot].range = zink_sampler_view(ctx->sampler_views[shader][slot])->tbo_size; + ctx->di.db.tbos[shader][slot].format = zink_get_format(screen, ctx->sampler_views[shader][slot]->format); + } else { + struct zink_buffer_view *bv = get_bufferview_for_binding(ctx, shader, type, slot); + ctx->di.t.tbos[shader][slot] = bv->buffer_view; + } } else { struct zink_surface *surface = get_imageview_for_binding(ctx, shader, type, slot); - ctx->di.textures[shader][slot].imageLayout = get_layout_for_binding(res, type, shader == PIPE_SHADER_COMPUTE); + ctx->di.textures[shader][slot].imageLayout = ctx->blitting ? res->layout : get_layout_for_binding(ctx, res, type, shader == MESA_SHADER_COMPUTE); ctx->di.textures[shader][slot].imageView = surface->image_view; - ctx->di.sampler_surfaces[shader][slot].surface = surface; - ctx->di.sampler_surfaces[shader][slot].is_buffer = false; + if (!screen->have_D24_UNORM_S8_UINT && + ctx->sampler_states[shader][slot] && ctx->sampler_states[shader][slot]->sampler_clamped) { + struct zink_sampler_state *state = ctx->sampler_states[shader][slot]; + VkSampler sampler = (surface->base.format == PIPE_FORMAT_Z24X8_UNORM && surface->ivci.format == VK_FORMAT_D32_SFLOAT) || + (surface->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT && surface->ivci.format == VK_FORMAT_D32_SFLOAT_S8_UINT) ? + state->sampler_clamped : + state->sampler; + if (ctx->di.textures[shader][slot].sampler != sampler) { + ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1); + ctx->di.textures[shader][slot].sampler = sampler; + } + } } } else { if (likely(have_null_descriptors)) { ctx->di.textures[shader][slot].imageView = VK_NULL_HANDLE; ctx->di.textures[shader][slot].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - ctx->di.tbos[shader][slot] = VK_NULL_HANDLE; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + ctx->di.db.tbos[shader][slot].address = 0; + ctx->di.db.tbos[shader][slot].range = VK_WHOLE_SIZE; + } else { + ctx->di.t.tbos[shader][slot] = VK_NULL_HANDLE; + } } else { - struct zink_surface *null_surface = zink_csurface(ctx->dummy_surface[0]); + assert(zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB); + struct zink_surface *null_surface = zink_get_dummy_surface(ctx, 0); struct zink_buffer_view *null_bufferview = ctx->dummy_bufferview; ctx->di.textures[shader][slot].imageView = null_surface->image_view; ctx->di.textures[shader][slot].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - ctx->di.tbos[shader][slot] = null_bufferview->buffer_view; + ctx->di.t.tbos[shader][slot] = null_bufferview->buffer_view; } - memset(&ctx->di.sampler_surfaces[shader][slot], 0, sizeof(ctx->di.sampler_surfaces[shader][slot])); } return res; } +void +zink_update_shadow_samplerviews(struct zink_context *ctx, unsigned mask) +{ + u_foreach_bit(slot, mask) + update_descriptor_state_sampler(ctx, MESA_SHADER_FRAGMENT, slot, ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][MESA_SHADER_FRAGMENT][slot]); +} + ALWAYS_INLINE static struct zink_resource * -update_descriptor_state_image(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot, struct zink_resource *res) +update_descriptor_state_image(struct zink_context *ctx, gl_shader_stage shader, unsigned slot, struct zink_resource *res) { struct zink_screen *screen = zink_screen(ctx->base.screen); bool have_null_descriptors = screen->info.rb2_feats.nullDescriptor; @@ -526,62 +814,130 @@ update_descriptor_state_image(struct zink_context *ctx, enum pipe_shader_type sh ctx->di.descriptor_res[type][shader][slot] = res; if (res) { if (res->obj->is_buffer) { - struct zink_buffer_view *bv = get_bufferview_for_binding(ctx, shader, type, slot); - ctx->di.texel_images[shader][slot] = bv->buffer_view; - ctx->di.image_surfaces[shader][slot].bufferview = bv; - ctx->di.image_surfaces[shader][slot].is_buffer = true; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + ctx->di.db.texel_images[shader][slot].address = res->obj->bda + ctx->image_views[shader][slot].base.u.buf.offset; + ctx->di.db.texel_images[shader][slot].range = ctx->image_views[shader][slot].base.u.buf.size; + ctx->di.db.texel_images[shader][slot].format = zink_get_format(screen, ctx->image_views[shader][slot].base.format); + } else { + struct zink_buffer_view *bv = get_bufferview_for_binding(ctx, shader, type, slot); + ctx->di.t.texel_images[shader][slot] = bv->buffer_view; + } } else { struct zink_surface *surface = get_imageview_for_binding(ctx, shader, type, slot); ctx->di.images[shader][slot].imageLayout = VK_IMAGE_LAYOUT_GENERAL; ctx->di.images[shader][slot].imageView = surface->image_view; - ctx->di.image_surfaces[shader][slot].surface = surface; - ctx->di.image_surfaces[shader][slot].is_buffer = false; } } else { if (likely(have_null_descriptors)) { memset(&ctx->di.images[shader][slot], 0, sizeof(ctx->di.images[shader][slot])); - ctx->di.texel_images[shader][slot] = VK_NULL_HANDLE; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + ctx->di.db.texel_images[shader][slot].address = 0; + ctx->di.db.texel_images[shader][slot].range = VK_WHOLE_SIZE; + } else { + ctx->di.t.texel_images[shader][slot] = VK_NULL_HANDLE; + } } else { - struct zink_surface *null_surface = zink_csurface(ctx->dummy_surface[0]); + assert(zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB); + struct zink_surface *null_surface = zink_get_dummy_surface(ctx, 0); struct zink_buffer_view *null_bufferview = ctx->dummy_bufferview; ctx->di.images[shader][slot].imageView = null_surface->image_view; ctx->di.images[shader][slot].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - ctx->di.texel_images[shader][slot] = null_bufferview->buffer_view; + ctx->di.t.texel_images[shader][slot] = null_bufferview->buffer_view; } - memset(&ctx->di.image_surfaces[shader][slot], 0, sizeof(ctx->di.image_surfaces[shader][slot])); } return res; } static void +update_nonseamless_shader_key(struct zink_context *ctx, gl_shader_stage pstage) +{ + const uint32_t new_mask = ctx->di.emulate_nonseamless[pstage] & ctx->di.cubes[pstage]; + if (pstage == MESA_SHADER_COMPUTE) { + if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask != new_mask) + ctx->compute_dirty = true; + ctx->compute_pipeline_state.key.base.nonseamless_cube_mask = new_mask; + } else { + if (zink_get_shader_key_base(ctx, pstage)->nonseamless_cube_mask != new_mask) + zink_set_shader_key_base(ctx, pstage)->nonseamless_cube_mask = new_mask; + } +} + +static void zink_bind_sampler_states(struct pipe_context *pctx, - enum pipe_shader_type shader, + gl_shader_stage shader, unsigned start_slot, unsigned num_samplers, void **samplers) { struct zink_context *ctx = zink_context(pctx); + struct zink_screen *screen = zink_screen(pctx->screen); for (unsigned i = 0; i < num_samplers; ++i) { struct zink_sampler_state *state = samplers[i]; - if (ctx->sampler_states[shader][start_slot + i] != state) - zink_screen(pctx->screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot, 1); + if (samplers[i] == ctx->sampler_states[shader][start_slot + i]) + continue; + ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot, 1); ctx->sampler_states[shader][start_slot + i] = state; - ctx->di.textures[shader][start_slot + i].sampler = state ? state->sampler : VK_NULL_HANDLE; - if (state) - zink_batch_usage_set(&state->batch_uses, ctx->batch.state); + if (state) { + ctx->di.textures[shader][start_slot + i].sampler = state->sampler; + if (state->sampler_clamped && !screen->have_D24_UNORM_S8_UINT) { + struct zink_surface *surface = get_imageview_for_binding(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot + i); + if (surface && + ((surface->base.format == PIPE_FORMAT_Z24X8_UNORM && surface->ivci.format == VK_FORMAT_D32_SFLOAT) || + (surface->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT && surface->ivci.format == VK_FORMAT_D32_SFLOAT_S8_UINT))) + ctx->di.textures[shader][start_slot + i].sampler = state->sampler_clamped; + } + } else { + ctx->di.textures[shader][start_slot + i].sampler = VK_NULL_HANDLE; + } } ctx->di.num_samplers[shader] = start_slot + num_samplers; } static void +zink_bind_sampler_states_nonseamless(struct pipe_context *pctx, + gl_shader_stage shader, + unsigned start_slot, + unsigned num_samplers, + void **samplers) +{ + struct zink_context *ctx = zink_context(pctx); + uint32_t old_mask = ctx->di.emulate_nonseamless[shader]; + uint32_t mask = BITFIELD_RANGE(start_slot, num_samplers); + ctx->di.emulate_nonseamless[shader] &= ~mask; + for (unsigned i = 0; i < num_samplers; ++i) { + struct zink_sampler_state *state = samplers[i]; + const uint32_t bit = BITFIELD_BIT(start_slot + i); + if (!state) + continue; + if (state->emulate_nonseamless) + ctx->di.emulate_nonseamless[shader] |= bit; + if (state->emulate_nonseamless != (old_mask & bit) && (ctx->di.cubes[shader] & bit)) { + struct zink_surface *surface = get_imageview_for_binding(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot + i); + if (surface && ctx->di.images[shader][start_slot + i].imageView != surface->image_view) { + ctx->di.images[shader][start_slot + i].imageView = surface->image_view; + update_descriptor_state_sampler(ctx, shader, start_slot + i, zink_resource(surface->base.texture)); + ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot + i, 1); + } + } + } + zink_bind_sampler_states(pctx, shader, start_slot, num_samplers, samplers); + update_nonseamless_shader_key(ctx, shader); +} + +static void zink_delete_sampler_state(struct pipe_context *pctx, void *sampler_state) { struct zink_sampler_state *sampler = sampler_state; struct zink_batch *batch = &zink_context(pctx)->batch; - zink_descriptor_set_refs_clear(&sampler->desc_set_refs, sampler_state); - util_dynarray_append(&batch->state->zombie_samplers, VkSampler, - sampler->sampler); + /* may be called if context_create fails */ + if (batch->state) { + util_dynarray_append(&batch->state->zombie_samplers, VkSampler, + sampler->sampler); + if (sampler->sampler_clamped) + util_dynarray_append(&batch->state->zombie_samplers, VkSampler, + sampler->sampler_clamped); + } if (sampler->custom_border_color) p_atomic_dec(&zink_screen(pctx->screen)->cur_custom_border_color_samplers); FREE(sampler); @@ -607,29 +963,57 @@ hash_bufferview(void *bvci) return _mesa_hash_data((char*)bvci + offset, sizeof(VkBufferViewCreateInfo) - offset); } -static struct zink_buffer_view * -get_buffer_view(struct zink_context *ctx, struct zink_resource *res, enum pipe_format format, uint32_t offset, uint32_t range) +static VkBufferViewCreateInfo +create_bvci(struct zink_context *ctx, struct zink_resource *res, enum pipe_format format, uint32_t offset, uint32_t range) { struct zink_screen *screen = zink_screen(ctx->base.screen); - struct zink_buffer_view *buffer_view = NULL; - VkBufferViewCreateInfo bvci = {0}; + VkBufferViewCreateInfo bvci; + // Zero whole struct (including alignment holes), so hash_bufferview + // does not access potentially uninitialized data. + memset(&bvci, 0, sizeof(bvci)); bvci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; - bvci.buffer = res->obj->buffer; + bvci.pNext = NULL; + if (screen->format_props[format].bufferFeatures & VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT) + bvci.buffer = res->obj->storage_buffer ? res->obj->storage_buffer : res->obj->buffer; + else + bvci.buffer = res->obj->buffer; bvci.format = zink_get_format(screen, format); assert(bvci.format); bvci.offset = offset; bvci.range = !offset && range == res->base.b.width0 ? VK_WHOLE_SIZE : range; + unsigned blocksize = util_format_get_blocksize(format); + if (bvci.range != VK_WHOLE_SIZE) { + /* clamp out partial texels */ + bvci.range -= bvci.range % blocksize; + if (bvci.offset + bvci.range >= res->base.b.width0) + bvci.range = VK_WHOLE_SIZE; + } + uint64_t clamp = blocksize * screen->info.props.limits.maxTexelBufferElements; + if (bvci.range == VK_WHOLE_SIZE && res->base.b.width0 > clamp) + bvci.range = clamp; + bvci.flags = 0; + return bvci; +} + +static struct zink_buffer_view * +get_buffer_view(struct zink_context *ctx, struct zink_resource *res, VkBufferViewCreateInfo *bvci) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_buffer_view *buffer_view = NULL; - uint32_t hash = hash_bufferview(&bvci); + uint32_t hash = hash_bufferview(bvci); simple_mtx_lock(&res->bufferview_mtx); - struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->bufferview_cache, hash, &bvci); + struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->bufferview_cache, hash, bvci); if (he) { buffer_view = he->data; p_atomic_inc(&buffer_view->reference.count); } else { VkBufferView view; - if (VKSCR(CreateBufferView)(screen->dev, &bvci, NULL, &view) != VK_SUCCESS) + VkResult result = VKSCR(CreateBufferView)(screen->dev, bvci, NULL, &view); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateBufferView failed (%s)", vk_Result_to_str(result)); goto out; + } buffer_view = CALLOC_STRUCT(zink_buffer_view); if (!buffer_view) { VKSCR(DestroyBufferView)(screen->dev, view, NULL); @@ -637,8 +1021,7 @@ get_buffer_view(struct zink_context *ctx, struct zink_resource *res, enum pipe_f } pipe_reference_init(&buffer_view->reference, 1); pipe_resource_reference(&buffer_view->pres, &res->base.b); - util_dynarray_init(&buffer_view->desc_set_refs.refs, NULL); - buffer_view->bvci = bvci; + buffer_view->bvci = *bvci; buffer_view->buffer_view = view; buffer_view->hash = hash; _mesa_hash_table_insert_pre_hashed(&res->bufferview_cache, hash, &buffer_view->bvci, buffer_view); @@ -678,15 +1061,58 @@ clamp_zs_swizzle(enum pipe_swizzle swizzle) return swizzle; } +ALWAYS_INLINE static enum pipe_swizzle +clamp_alpha_swizzle(enum pipe_swizzle swizzle) +{ + if (swizzle == PIPE_SWIZZLE_W) + return PIPE_SWIZZLE_X; + if (swizzle < PIPE_SWIZZLE_W) + return PIPE_SWIZZLE_0; + return swizzle; +} + +ALWAYS_INLINE static enum pipe_swizzle +clamp_luminance_swizzle(enum pipe_swizzle swizzle) +{ + if (swizzle == PIPE_SWIZZLE_W) + return PIPE_SWIZZLE_1; + if (swizzle < PIPE_SWIZZLE_W) + return PIPE_SWIZZLE_X; + return swizzle; +} + +ALWAYS_INLINE static enum pipe_swizzle +clamp_luminance_alpha_swizzle(enum pipe_swizzle swizzle) +{ + if (swizzle == PIPE_SWIZZLE_W) + return PIPE_SWIZZLE_Y; + if (swizzle < PIPE_SWIZZLE_W) + return PIPE_SWIZZLE_X; + return swizzle; +} + +ALWAYS_INLINE static bool +viewtype_is_cube(const VkImageViewCreateInfo *ivci) +{ + return ivci->viewType == VK_IMAGE_VIEW_TYPE_CUBE || + ivci->viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; +} + static struct pipe_sampler_view * zink_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *pres, const struct pipe_sampler_view *state) { struct zink_screen *screen = zink_screen(pctx->screen); struct zink_resource *res = zink_resource(pres); - struct zink_sampler_view *sampler_view = CALLOC_STRUCT(zink_sampler_view); + struct zink_context *ctx = zink_context(pctx); + struct zink_sampler_view *sampler_view = CALLOC_STRUCT_CL(zink_sampler_view); bool err; + if (!sampler_view) { + mesa_loge("ZINK: failed to allocate sampler_view!"); + return NULL; + } + sampler_view->base = *state; sampler_view->base.texture = NULL; pipe_resource_reference(&sampler_view->base.texture, pres); @@ -699,46 +1125,136 @@ zink_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *pres, struct pipe_surface templ = {0}; templ.u.tex.level = state->u.tex.first_level; templ.format = state->format; + /* avoid needing mutable for depth/stencil sampling */ + if (util_format_is_depth_and_stencil(pres->format)) + templ.format = pres->format; if (state->target != PIPE_TEXTURE_3D) { templ.u.tex.first_layer = state->u.tex.first_layer; templ.u.tex.last_layer = state->u.tex.last_layer; } + if (zink_is_swapchain(res)) { + if (!zink_kopper_acquire(ctx, res, UINT64_MAX)) { + FREE_CL(sampler_view); + return NULL; + } + } + ivci = create_ivci(screen, res, &templ, state->target); ivci.subresourceRange.levelCount = state->u.tex.last_level - state->u.tex.first_level + 1; ivci.subresourceRange.aspectMask = sampler_aspect_from_format(state->format); + bool red_depth_sampler_view = false; /* samplers for stencil aspects of packed formats need to always use stencil swizzle */ if (ivci.subresourceRange.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { ivci.components.r = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_r)); ivci.components.g = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_g)); ivci.components.b = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_b)); ivci.components.a = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_a)); + + /* If we're sampling depth and we might need to do shader rewrites for + * legacy shadow sampling, then set up an extra image view that just + * returns the red (depth) component, so you can always have the shadow + * result available in the red component for the in-shader swizzling. + * (Or if we have PVR's needs_zs_shader_swizzle and are sampling ONE + * value for stencil, which also uses that view). + */ + if (ivci.subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT || + zink_screen(ctx->base.screen)->driver_workarounds.needs_zs_shader_swizzle) { + VkComponentSwizzle *swizzle = (VkComponentSwizzle*)&ivci.components; + for (unsigned i = 0; i < 4; i++) { + if (swizzle[i] == VK_COMPONENT_SWIZZLE_ONE || + (swizzle[i] == VK_COMPONENT_SWIZZLE_ZERO && ivci.subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)) + red_depth_sampler_view = true; + } + /* this is the data that will be used in shader rewrites */ + sampler_view->swizzle.s[0] = clamp_zs_swizzle(sampler_view->base.swizzle_r); + sampler_view->swizzle.s[1] = clamp_zs_swizzle(sampler_view->base.swizzle_g); + sampler_view->swizzle.s[2] = clamp_zs_swizzle(sampler_view->base.swizzle_b); + sampler_view->swizzle.s[3] = clamp_zs_swizzle(sampler_view->base.swizzle_a); + } } else { + enum pipe_swizzle swizzle[4] = { + sampler_view->base.swizzle_r, + sampler_view->base.swizzle_g, + sampler_view->base.swizzle_b, + sampler_view->base.swizzle_a + }; /* if we have e.g., R8G8B8X8, then we have to ignore alpha since we're just emulating * these formats */ - if (zink_format_is_voidable_rgba_variant(state->format)) { - const struct util_format_description *desc = util_format_description(state->format); - sampler_view->base.swizzle_r = zink_clamp_void_swizzle(desc, sampler_view->base.swizzle_r); - sampler_view->base.swizzle_g = zink_clamp_void_swizzle(desc, sampler_view->base.swizzle_g); - sampler_view->base.swizzle_b = zink_clamp_void_swizzle(desc, sampler_view->base.swizzle_b); - sampler_view->base.swizzle_a = zink_clamp_void_swizzle(desc, sampler_view->base.swizzle_a); - } - ivci.components.r = zink_component_mapping(sampler_view->base.swizzle_r); - ivci.components.g = zink_component_mapping(sampler_view->base.swizzle_g); - ivci.components.b = zink_component_mapping(sampler_view->base.swizzle_b); - ivci.components.a = zink_component_mapping(sampler_view->base.swizzle_a); + if (zink_format_is_voidable_rgba_variant(state->format)) { + const struct util_format_description *view_desc = util_format_description(state->format); + for (int i = 0; i < 4; ++i) + swizzle[i] = zink_clamp_void_swizzle(view_desc, swizzle[i]); + } else if (util_format_is_alpha(state->format) && res->format != VK_FORMAT_A8_UNORM_KHR) { + for (int i = 0; i < 4; ++i) + swizzle[i] = clamp_alpha_swizzle(swizzle[i]); + } else if (util_format_is_luminance(pres->format) || + util_format_is_luminance_alpha(pres->format)) { + if (util_format_is_luminance(pres->format)) { + for (int i = 0; i < 4; ++i) + swizzle[i] = clamp_luminance_swizzle(swizzle[i]); + } else { + for (int i = 0; i < 4; ++i) + swizzle[i] = clamp_luminance_alpha_swizzle(swizzle[i]); + } + if (state->format != pres->format) { + /* luminance / luminance-alpha formats can be reinterpreted + * as red / red-alpha formats by the state-tracker, and we + * need to whack the green/blue channels here to the + * correct values for that to work. + */ + enum pipe_format linear = util_format_linear(pres->format); + if (state->format == util_format_luminance_to_red(linear)) { + assert(swizzle[1] == PIPE_SWIZZLE_X || + swizzle[1] == PIPE_SWIZZLE_0); + assert(swizzle[2] == PIPE_SWIZZLE_X || + swizzle[2] == PIPE_SWIZZLE_0); + swizzle[1] = swizzle[2] = PIPE_SWIZZLE_0; + } else + assert(state->format == linear); + } + } else if (util_format_is_red_alpha(pres->format)) { + /* RA formats are mapped to RG with adjusted swizzle */ + assert(util_format_is_red_green(vk_format_to_pipe_format(ivci.format))); + swizzle[3] = PIPE_SWIZZLE_Y; + } + + ivci.components.r = zink_component_mapping(swizzle[0]); + ivci.components.g = zink_component_mapping(swizzle[1]); + ivci.components.b = zink_component_mapping(swizzle[2]); + ivci.components.a = zink_component_mapping(swizzle[3]); } assert(ivci.format); - sampler_view->image_view = (struct zink_surface*)zink_get_surface(zink_context(pctx), pres, &templ, &ivci); + sampler_view->image_view = zink_get_surface(ctx, pres, &templ, &ivci); + if (!screen->info.have_EXT_non_seamless_cube_map && viewtype_is_cube(&sampler_view->image_view->ivci)) { + ivci.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + sampler_view->cube_array = zink_get_surface(ctx, pres, &templ, &ivci); + } else if (red_depth_sampler_view) { + /* there is only one component, and real swizzling can't be done here, + * so ensure the shader gets the sampled data + */ + ivci.components.r = VK_COMPONENT_SWIZZLE_R; + ivci.components.g = VK_COMPONENT_SWIZZLE_R; + ivci.components.b = VK_COMPONENT_SWIZZLE_R; + ivci.components.a = VK_COMPONENT_SWIZZLE_R; + sampler_view->zs_view = zink_get_surface(ctx, pres, &templ, &ivci); + } err = !sampler_view->image_view; } else { - sampler_view->buffer_view = get_buffer_view(zink_context(pctx), res, state->format, state->u.buf.offset, state->u.buf.size); + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + /* always enforce limit clamping */ + unsigned blocksize = util_format_get_blocksize(state->format); + sampler_view->tbo_size = MIN2(state->u.buf.size / blocksize, screen->info.props.limits.maxTexelBufferElements) * blocksize; + return &sampler_view->base; + } + VkBufferViewCreateInfo bvci = create_bvci(ctx, res, state->format, state->u.buf.offset, state->u.buf.size); + sampler_view->buffer_view = get_buffer_view(ctx, res, &bvci); err = !sampler_view->buffer_view; } if (err) { - FREE(sampler_view); + FREE_CL(sampler_view); return NULL; } return &sampler_view->base; @@ -749,13 +1265,19 @@ zink_destroy_buffer_view(struct zink_screen *screen, struct zink_buffer_view *bu { struct zink_resource *res = zink_resource(buffer_view->pres); simple_mtx_lock(&res->bufferview_mtx); + if (buffer_view->reference.count) { + /* got a cache hit during deletion */ + simple_mtx_unlock(&res->bufferview_mtx); + return; + } struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->bufferview_cache, buffer_view->hash, &buffer_view->bvci); assert(he); _mesa_hash_table_remove(&res->bufferview_cache, he); simple_mtx_unlock(&res->bufferview_mtx); + simple_mtx_lock(&res->obj->view_lock); + util_dynarray_append(&res->obj->views, VkBufferView, buffer_view->buffer_view); + simple_mtx_unlock(&res->obj->view_lock); pipe_resource_reference(&buffer_view->pres, NULL); - VKSCR(DestroyBufferView)(screen->dev, buffer_view->buffer_view, NULL); - zink_descriptor_set_refs_clear(&buffer_view->desc_set_refs, buffer_view); FREE(buffer_view); } @@ -764,13 +1286,16 @@ zink_sampler_view_destroy(struct pipe_context *pctx, struct pipe_sampler_view *pview) { struct zink_sampler_view *view = zink_sampler_view(pview); - if (pview->texture->target == PIPE_BUFFER) - zink_buffer_view_reference(zink_screen(pctx->screen), &view->buffer_view, NULL); - else { + if (pview->texture->target == PIPE_BUFFER) { + if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) + zink_buffer_view_reference(zink_screen(pctx->screen), &view->buffer_view, NULL); + } else { zink_surface_reference(zink_screen(pctx->screen), &view->image_view, NULL); + zink_surface_reference(zink_screen(pctx->screen), &view->cube_array, NULL); + zink_surface_reference(zink_screen(pctx->screen), &view->zs_view, NULL); } pipe_resource_reference(&pview->texture, NULL); - FREE(view); + FREE_CL(view); } static void @@ -781,68 +1306,7 @@ zink_get_sample_position(struct pipe_context *ctx, { /* TODO: handle this I guess */ assert(zink_screen(ctx->screen)->info.props.limits.standardSampleLocations); - /* from 26.4. Multisampling */ - switch (sample_count) { - case 0: - case 1: { - float pos[][2] = { {0.5,0.5}, }; - out_value[0] = pos[sample_index][0]; - out_value[1] = pos[sample_index][1]; - break; - } - case 2: { - float pos[][2] = { {0.75,0.75}, - {0.25,0.25}, }; - out_value[0] = pos[sample_index][0]; - out_value[1] = pos[sample_index][1]; - break; - } - case 4: { - float pos[][2] = { {0.375, 0.125}, - {0.875, 0.375}, - {0.125, 0.625}, - {0.625, 0.875}, }; - out_value[0] = pos[sample_index][0]; - out_value[1] = pos[sample_index][1]; - break; - } - case 8: { - float pos[][2] = { {0.5625, 0.3125}, - {0.4375, 0.6875}, - {0.8125, 0.5625}, - {0.3125, 0.1875}, - {0.1875, 0.8125}, - {0.0625, 0.4375}, - {0.6875, 0.9375}, - {0.9375, 0.0625}, }; - out_value[0] = pos[sample_index][0]; - out_value[1] = pos[sample_index][1]; - break; - } - case 16: { - float pos[][2] = { {0.5625, 0.5625}, - {0.4375, 0.3125}, - {0.3125, 0.625}, - {0.75, 0.4375}, - {0.1875, 0.375}, - {0.625, 0.8125}, - {0.8125, 0.6875}, - {0.6875, 0.1875}, - {0.375, 0.875}, - {0.5, 0.0625}, - {0.25, 0.125}, - {0.125, 0.75}, - {0.0, 0.5}, - {0.9375, 0.25}, - {0.875, 0.9375}, - {0.0625, 0.0}, }; - out_value[0] = pos[sample_index][0]; - out_value[1] = pos[sample_index][1]; - break; - } - default: - unreachable("unhandled sample count!"); - } + u_default_get_sample_position(ctx, sample_count, sample_index, out_value); } static void @@ -869,90 +1333,61 @@ update_existing_vbo(struct zink_context *ctx, unsigned slot) if (!ctx->vertex_buffers[slot].buffer.resource) return; struct zink_resource *res = zink_resource(ctx->vertex_buffers[slot].buffer.resource); + res->vbo_bind_count--; res->vbo_bind_mask &= ~BITFIELD_BIT(slot); - ctx->vbufs[slot] = VK_NULL_HANDLE; - ctx->vbuf_offsets[slot] = 0; - update_res_bind_count(ctx, res, false, true); -} - -ALWAYS_INLINE static struct zink_resource * -set_vertex_buffer_clamped(struct zink_context *ctx, unsigned slot) -{ - const struct pipe_vertex_buffer *ctx_vb = &ctx->vertex_buffers[slot]; - struct zink_resource *res = zink_resource(ctx_vb->buffer.resource); - struct zink_screen *screen = zink_screen(ctx->base.screen); - if (ctx_vb->buffer_offset > screen->info.props.limits.maxVertexInputAttributeOffset) { - /* buffer offset exceeds maximum: make a tmp buffer at this offset */ - ctx->vbufs[slot] = zink_resource_tmp_buffer(screen, res, ctx_vb->buffer_offset, 0, &ctx->vbuf_offsets[slot]); - util_dynarray_append(&res->obj->tmp, VkBuffer, ctx->vbufs[slot]); - /* the driver is broken and sets a min alignment that's larger than its max offset: rebind as staging buffer */ - if (unlikely(ctx->vbuf_offsets[slot] > screen->info.props.limits.maxVertexInputAttributeOffset)) { - static bool warned = false; - if (!warned) - debug_printf("zink: this vulkan driver is BROKEN! maxVertexInputAttributeOffset < VkMemoryRequirements::alignment\n"); - warned = true; - } - } else { - ctx->vbufs[slot] = res->obj->buffer; - ctx->vbuf_offsets[slot] = ctx_vb->buffer_offset; + if (!res->vbo_bind_count) { + res->gfx_barrier &= ~VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + res->barrier_access[0] &= ~VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; } - assert(ctx->vbufs[slot]); - return res; + update_res_bind_count(ctx, res, false, true); } static void zink_set_vertex_buffers(struct pipe_context *pctx, - unsigned start_slot, unsigned num_buffers, - unsigned unbind_num_trailing_slots, - bool take_ownership, const struct pipe_vertex_buffer *buffers) { struct zink_context *ctx = zink_context(pctx); + const bool have_input_state = zink_screen(pctx->screen)->info.have_EXT_vertex_input_dynamic_state; const bool need_state_change = !zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state && - !zink_screen(pctx->screen)->info.have_EXT_vertex_input_dynamic_state; - uint32_t enabled_buffers = ctx->gfx_pipeline_state.vertex_buffers_enabled_mask; - enabled_buffers |= u_bit_consecutive(start_slot, num_buffers); - enabled_buffers &= ~u_bit_consecutive(start_slot + num_buffers, unbind_num_trailing_slots); - - if (buffers) { - if (need_state_change) - ctx->vertex_state_changed = true; - for (unsigned i = 0; i < num_buffers; ++i) { - const struct pipe_vertex_buffer *vb = buffers + i; - struct pipe_vertex_buffer *ctx_vb = &ctx->vertex_buffers[start_slot + i]; - update_existing_vbo(ctx, start_slot + i); - if (!take_ownership) - pipe_resource_reference(&ctx_vb->buffer.resource, vb->buffer.resource); - else { - pipe_resource_reference(&ctx_vb->buffer.resource, NULL); - ctx_vb->buffer.resource = vb->buffer.resource; - } - if (vb->buffer.resource) { - struct zink_resource *res = zink_resource(vb->buffer.resource); - res->vbo_bind_mask |= BITFIELD_BIT(start_slot + i); - update_res_bind_count(ctx, res, false, false); - ctx_vb->stride = vb->stride; - ctx_vb->buffer_offset = vb->buffer_offset; - /* always barrier before possible rebind */ - zink_resource_buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT); - set_vertex_buffer_clamped(ctx, start_slot + i); - } else - enabled_buffers &= ~BITFIELD_BIT(i); - } - } else { - if (need_state_change) - ctx->vertex_state_changed = true; - for (unsigned i = 0; i < num_buffers; ++i) { - update_existing_vbo(ctx, start_slot + i); - pipe_resource_reference(&ctx->vertex_buffers[start_slot + i].buffer.resource, NULL); + !have_input_state; + unsigned last_count = util_last_bit(ctx->gfx_pipeline_state.vertex_buffers_enabled_mask); + uint32_t enabled_buffers = BITFIELD_MASK(num_buffers); + + assert(!num_buffers || buffers); + + for (unsigned i = 0; i < num_buffers; ++i) { + const struct pipe_vertex_buffer *vb = buffers + i; + struct pipe_vertex_buffer *ctx_vb = &ctx->vertex_buffers[i]; + update_existing_vbo(ctx, i); + pipe_resource_reference(&ctx_vb->buffer.resource, NULL); + ctx_vb->buffer.resource = vb->buffer.resource; + + if (vb->buffer.resource) { + struct zink_resource *res = zink_resource(vb->buffer.resource); + res->vbo_bind_mask |= BITFIELD_BIT(i); + res->vbo_bind_count++; + res->gfx_barrier |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + res->barrier_access[0] |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + update_res_bind_count(ctx, res, false, false); + ctx_vb->buffer_offset = vb->buffer_offset; + /* always barrier before possible rebind */ + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT); + zink_batch_resource_usage_set(&ctx->batch, res, false, true); + res->obj->unordered_read = false; + } else { + enabled_buffers &= ~BITFIELD_BIT(i); } } - for (unsigned i = 0; i < unbind_num_trailing_slots; i++) { - update_existing_vbo(ctx, start_slot + i); - pipe_resource_reference(&ctx->vertex_buffers[start_slot + i].buffer.resource, NULL); + for (unsigned i = num_buffers; i < last_count; i++) { + update_existing_vbo(ctx, i); + pipe_resource_reference(&ctx->vertex_buffers[i].buffer.resource, NULL); } + if (need_state_change) + ctx->vertex_state_changed = true; + else if (!have_input_state && ctx->gfx_pipeline_state.vertex_buffers_enabled_mask != enabled_buffers) + ctx->vertex_state_changed = true; ctx->gfx_pipeline_state.vertex_buffers_enabled_mask = enabled_buffers; ctx->vertex_buffers_dirty = num_buffers > 0; #ifndef NDEBUG @@ -971,14 +1406,9 @@ zink_set_viewport_states(struct pipe_context *pctx, for (unsigned i = 0; i < num_viewports; ++i) ctx->vp_state.viewport_states[start_slot + i] = state[i]; - ctx->vp_state.num_viewports = start_slot + num_viewports; - if (!zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state) { - if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports) - ctx->gfx_pipeline_state.dirty = true; - ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports; - } ctx->vp_state_changed = true; + zink_flush_dgc_if_enabled(ctx); } static void @@ -991,11 +1421,12 @@ zink_set_scissor_states(struct pipe_context *pctx, for (unsigned i = 0; i < num_scissors; i++) ctx->vp_state.scissor_states[start_slot + i] = states[i]; ctx->scissor_changed = true; + zink_flush_dgc_if_enabled(ctx); } static void zink_set_inlinable_constants(struct pipe_context *pctx, - enum pipe_shader_type shader, + gl_shader_stage shader, uint num_values, uint32_t *values) { struct zink_context *ctx = (struct zink_context *)pctx; @@ -1003,50 +1434,75 @@ zink_set_inlinable_constants(struct pipe_context *pctx, uint32_t *inlinable_uniforms; struct zink_shader_key *key = NULL; - if (shader == PIPE_SHADER_COMPUTE) { - inlinable_uniforms = ctx->compute_inlinable_uniforms; + if (shader == MESA_SHADER_COMPUTE) { + key = &ctx->compute_pipeline_state.key; } else { + assert(!zink_screen(pctx->screen)->optimal_keys || + (shader == MESA_SHADER_GEOMETRY && + ctx->gfx_stages[MESA_SHADER_GEOMETRY] && + ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)); key = &ctx->gfx_pipeline_state.shader_keys.key[shader]; - inlinable_uniforms = key->base.inlined_uniform_values; } + inlinable_uniforms = key->base.inlined_uniform_values; if (!(ctx->inlinable_uniforms_valid_mask & bit) || memcmp(inlinable_uniforms, values, num_values * 4)) { memcpy(inlinable_uniforms, values, num_values * 4); - ctx->dirty_shader_stages |= bit; + if (shader == MESA_SHADER_COMPUTE) + ctx->compute_dirty = true; + else + ctx->dirty_gfx_stages |= bit; ctx->inlinable_uniforms_valid_mask |= bit; - if (key) - key->inline_uniforms = true; + key->inline_uniforms = true; } } ALWAYS_INLINE static void -unbind_ubo(struct zink_context *ctx, struct zink_resource *res, enum pipe_shader_type pstage, unsigned slot) +unbind_descriptor_stage(struct zink_resource *res, gl_shader_stage pstage) +{ + if (!res->sampler_binds[pstage] && !res->image_binds[pstage] && !res->all_bindless) + res->gfx_barrier &= ~zink_pipeline_flags_from_pipe_stage(pstage); +} + +ALWAYS_INLINE static void +unbind_buffer_descriptor_stage(struct zink_resource *res, gl_shader_stage pstage) +{ + if (!res->ubo_bind_mask[pstage] && !res->ssbo_bind_mask[pstage]) + unbind_descriptor_stage(res, pstage); +} + +ALWAYS_INLINE static void +unbind_ubo(struct zink_context *ctx, struct zink_resource *res, gl_shader_stage pstage, unsigned slot) { if (!res) return; res->ubo_bind_mask[pstage] &= ~BITFIELD_BIT(slot); - res->ubo_bind_count[pstage == PIPE_SHADER_COMPUTE]--; - update_res_bind_count(ctx, res, pstage == PIPE_SHADER_COMPUTE, true); + res->ubo_bind_count[pstage == MESA_SHADER_COMPUTE]--; + unbind_buffer_descriptor_stage(res, pstage); + if (!res->ubo_bind_count[pstage == MESA_SHADER_COMPUTE]) + res->barrier_access[pstage == MESA_SHADER_COMPUTE] &= ~VK_ACCESS_UNIFORM_READ_BIT; + update_res_bind_count(ctx, res, pstage == MESA_SHADER_COMPUTE, true); } static void -invalidate_inlined_uniforms(struct zink_context *ctx, enum pipe_shader_type pstage) +invalidate_inlined_uniforms(struct zink_context *ctx, gl_shader_stage pstage) { unsigned bit = BITFIELD_BIT(pstage); if (!(ctx->inlinable_uniforms_valid_mask & bit)) return; ctx->inlinable_uniforms_valid_mask &= ~bit; - ctx->dirty_shader_stages |= bit; - if (pstage == PIPE_SHADER_COMPUTE) + if (pstage == MESA_SHADER_COMPUTE) { + ctx->compute_dirty = true; return; - + } + assert(!zink_screen(ctx->base.screen)->optimal_keys || (pstage == MESA_SHADER_GEOMETRY && ctx->is_generated_gs_bound)); + ctx->dirty_gfx_stages |= bit; struct zink_shader_key *key = &ctx->gfx_pipeline_state.shader_keys.key[pstage]; key->inline_uniforms = false; } static void zink_set_constant_buffer(struct pipe_context *pctx, - enum pipe_shader_type shader, uint index, + gl_shader_stage shader, uint index, bool take_ownership, const struct pipe_constant_buffer *cb) { @@ -1067,15 +1523,19 @@ zink_set_constant_buffer(struct pipe_context *pctx, if (new_res) { if (new_res != res) { unbind_ubo(ctx, res, shader, index); - new_res->ubo_bind_count[shader == PIPE_SHADER_COMPUTE]++; + new_res->ubo_bind_count[shader == MESA_SHADER_COMPUTE]++; new_res->ubo_bind_mask[shader] |= BITFIELD_BIT(index); - update_res_bind_count(ctx, new_res, shader == PIPE_SHADER_COMPUTE, false); + new_res->gfx_barrier |= zink_pipeline_flags_from_pipe_stage(shader); + new_res->barrier_access[shader == MESA_SHADER_COMPUTE] |= VK_ACCESS_UNIFORM_READ_BIT; + update_res_bind_count(ctx, new_res, shader == MESA_SHADER_COMPUTE, false); } - zink_batch_resource_usage_set(&ctx->batch, new_res, false); - zink_fake_buffer_barrier(new_res, VK_ACCESS_UNIFORM_READ_BIT, - zink_pipeline_flags_from_pipe_stage(shader)); + zink_screen(ctx->base.screen)->buffer_barrier(ctx, new_res, VK_ACCESS_UNIFORM_READ_BIT, + new_res->gfx_barrier); + zink_batch_resource_usage_set(&ctx->batch, new_res, false, true); + if (!ctx->unordered_blitting) + new_res->obj->unordered_read = false; } - update |= ((index || screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY) && ctx->ubos[shader][index].buffer_offset != offset) || + update |= ctx->ubos[shader][index].buffer_offset != offset || !!res != !!buffer || (res && res->obj->buffer != new_res->obj->buffer) || ctx->ubos[shader][index].buffer_size != cb->buffer_size; @@ -1115,23 +1575,42 @@ zink_set_constant_buffer(struct pipe_context *pctx, } if (update) - zink_screen(pctx->screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_UBO, index, 1); + ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_UBO, index, 1); } ALWAYS_INLINE static void -unbind_ssbo(struct zink_context *ctx, struct zink_resource *res, enum pipe_shader_type pstage, unsigned slot, bool writable) +unbind_descriptor_reads(struct zink_resource *res, bool is_compute) +{ + if (!res->sampler_bind_count[is_compute] && !res->image_bind_count[is_compute] && !res->all_bindless) + res->barrier_access[is_compute] &= ~VK_ACCESS_SHADER_READ_BIT; +} + +ALWAYS_INLINE static void +unbind_buffer_descriptor_reads(struct zink_resource *res, bool is_compute) +{ + if (!res->ssbo_bind_count[is_compute] && !res->all_bindless) + unbind_descriptor_reads(res, is_compute); +} + +ALWAYS_INLINE static void +unbind_ssbo(struct zink_context *ctx, struct zink_resource *res, gl_shader_stage pstage, unsigned slot, bool writable) { if (!res) return; res->ssbo_bind_mask[pstage] &= ~BITFIELD_BIT(slot); - update_res_bind_count(ctx, res, pstage == PIPE_SHADER_COMPUTE, true); + res->ssbo_bind_count[pstage == MESA_SHADER_COMPUTE]--; + unbind_buffer_descriptor_stage(res, pstage); + unbind_buffer_descriptor_reads(res, pstage == MESA_SHADER_COMPUTE); + update_res_bind_count(ctx, res, pstage == MESA_SHADER_COMPUTE, true); if (writable) - res->write_bind_count[pstage == PIPE_SHADER_COMPUTE]--; + res->write_bind_count[pstage == MESA_SHADER_COMPUTE]--; + if (!res->write_bind_count[pstage == MESA_SHADER_COMPUTE]) + res->barrier_access[pstage == MESA_SHADER_COMPUTE] &= ~VK_ACCESS_SHADER_WRITE_BIT; } static void zink_set_shader_buffers(struct pipe_context *pctx, - enum pipe_shader_type p_stage, + gl_shader_stage p_stage, unsigned start_slot, unsigned count, const struct pipe_shader_buffer *buffers, unsigned writable_bitmask) @@ -1142,43 +1621,52 @@ zink_set_shader_buffers(struct pipe_context *pctx, unsigned modified_bits = u_bit_consecutive(start_slot, count); unsigned old_writable_mask = ctx->writable_ssbos[p_stage]; + assert(!ctx->unordered_blitting); ctx->writable_ssbos[p_stage] &= ~modified_bits; ctx->writable_ssbos[p_stage] |= writable_bitmask << start_slot; for (unsigned i = 0; i < count; i++) { - struct pipe_shader_buffer *ssbo = &ctx->ssbos[p_stage][start_slot + i]; + unsigned slot = start_slot + i; + struct pipe_shader_buffer *ssbo = &ctx->ssbos[p_stage][slot]; struct zink_resource *res = ssbo->buffer ? zink_resource(ssbo->buffer) : NULL; - bool was_writable = old_writable_mask & BITFIELD64_BIT(start_slot + i); + bool was_writable = old_writable_mask & BITFIELD64_BIT(slot); if (buffers && buffers[i].buffer) { struct zink_resource *new_res = zink_resource(buffers[i].buffer); if (new_res != res) { - unbind_ssbo(ctx, res, p_stage, i, was_writable); - new_res->ssbo_bind_mask[p_stage] |= BITFIELD_BIT(i); - update_res_bind_count(ctx, new_res, p_stage == PIPE_SHADER_COMPUTE, false); + unbind_ssbo(ctx, res, p_stage, slot, was_writable); + new_res->ssbo_bind_mask[p_stage] |= BITFIELD_BIT(slot); + new_res->ssbo_bind_count[p_stage == MESA_SHADER_COMPUTE]++; + new_res->gfx_barrier |= zink_pipeline_flags_from_pipe_stage(p_stage); + update_res_bind_count(ctx, new_res, p_stage == MESA_SHADER_COMPUTE, false); } VkAccessFlags access = VK_ACCESS_SHADER_READ_BIT; - if (ctx->writable_ssbos[p_stage] & BITFIELD64_BIT(start_slot + i)) { - new_res->write_bind_count[p_stage == PIPE_SHADER_COMPUTE]++; + if (ctx->writable_ssbos[p_stage] & BITFIELD64_BIT(slot)) { + new_res->write_bind_count[p_stage == MESA_SHADER_COMPUTE]++; access |= VK_ACCESS_SHADER_WRITE_BIT; } pipe_resource_reference(&ssbo->buffer, &new_res->base.b); - zink_batch_resource_usage_set(&ctx->batch, new_res, access & VK_ACCESS_SHADER_WRITE_BIT); + new_res->barrier_access[p_stage == MESA_SHADER_COMPUTE] |= access; ssbo->buffer_offset = buffers[i].buffer_offset; ssbo->buffer_size = MIN2(buffers[i].buffer_size, new_res->base.b.width0 - ssbo->buffer_offset); util_range_add(&new_res->base.b, &new_res->valid_buffer_range, ssbo->buffer_offset, ssbo->buffer_offset + ssbo->buffer_size); - zink_fake_buffer_barrier(new_res, access, - zink_pipeline_flags_from_pipe_stage(p_stage)); + zink_screen(ctx->base.screen)->buffer_barrier(ctx, new_res, access, + new_res->gfx_barrier); + zink_batch_resource_usage_set(&ctx->batch, new_res, access & VK_ACCESS_SHADER_WRITE_BIT, true); update = true; - max_slot = MAX2(max_slot, start_slot + i); - update_descriptor_state_ssbo(ctx, p_stage, start_slot + i, new_res); + max_slot = MAX2(max_slot, slot); + update_descriptor_state_ssbo(ctx, p_stage, slot, new_res); + if (zink_resource_access_is_write(access)) + new_res->obj->unordered_write = false; + new_res->obj->unordered_read = false; } else { - update = !!res; + if (res) + update = true; ssbo->buffer_offset = 0; ssbo->buffer_size = 0; if (res) { - unbind_ssbo(ctx, res, p_stage, i, was_writable); - update_descriptor_state_ssbo(ctx, p_stage, start_slot + i, NULL); + unbind_ssbo(ctx, res, p_stage, slot, was_writable); + update_descriptor_state_ssbo(ctx, p_stage, slot, NULL); } pipe_resource_reference(&ssbo->buffer, NULL); } @@ -1186,26 +1674,26 @@ zink_set_shader_buffers(struct pipe_context *pctx, if (start_slot + count >= ctx->di.num_ssbos[p_stage]) ctx->di.num_ssbos[p_stage] = max_slot + 1; if (update) - zink_screen(pctx->screen)->context_invalidate_descriptor_state(ctx, p_stage, ZINK_DESCRIPTOR_TYPE_SSBO, start_slot, count); + ctx->invalidate_descriptor_state(ctx, p_stage, ZINK_DESCRIPTOR_TYPE_SSBO, start_slot, count); } static void update_binds_for_samplerviews(struct zink_context *ctx, struct zink_resource *res, bool is_compute) { - VkImageLayout layout = get_layout_for_binding(res, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, is_compute); + VkImageLayout layout = get_layout_for_binding(ctx, res, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, is_compute); if (is_compute) { - u_foreach_bit(slot, res->sampler_binds[PIPE_SHADER_COMPUTE]) { - if (ctx->di.textures[PIPE_SHADER_COMPUTE][slot].imageLayout != layout) { - update_descriptor_state_sampler(ctx, PIPE_SHADER_COMPUTE, slot, res); - zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, PIPE_SHADER_COMPUTE, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1); + u_foreach_bit(slot, res->sampler_binds[MESA_SHADER_COMPUTE]) { + if (ctx->di.textures[MESA_SHADER_COMPUTE][slot].imageLayout != layout) { + update_descriptor_state_sampler(ctx, MESA_SHADER_COMPUTE, slot, res); + ctx->invalidate_descriptor_state(ctx, MESA_SHADER_COMPUTE, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1); } } } else { - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) { + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { u_foreach_bit(slot, res->sampler_binds[i]) { if (ctx->di.textures[i][slot].imageLayout != layout) { update_descriptor_state_sampler(ctx, i, slot, res); - zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1); + ctx->invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1); } } } @@ -1215,7 +1703,7 @@ update_binds_for_samplerviews(struct zink_context *ctx, struct zink_resource *re static void flush_pending_clears(struct zink_context *ctx, struct zink_resource *res) { - if (res->fb_binds && ctx->clears_enabled) + if (res->fb_bind_count && ctx->clears_enabled) zink_fb_clears_apply(ctx, &res->base.b); } @@ -1231,155 +1719,334 @@ unbind_shader_image_counts(struct zink_context *ctx, struct zink_resource *res, update_binds_for_samplerviews(ctx, res, is_compute); } -ALWAYS_INLINE static void +ALWAYS_INLINE static bool check_for_layout_update(struct zink_context *ctx, struct zink_resource *res, bool is_compute) { - VkImageLayout layout = res->bind_count[is_compute] ? zink_descriptor_util_image_layout_eval(res, is_compute) : VK_IMAGE_LAYOUT_UNDEFINED; - VkImageLayout other_layout = res->bind_count[!is_compute] ? zink_descriptor_util_image_layout_eval(res, !is_compute) : VK_IMAGE_LAYOUT_UNDEFINED; - if (res->bind_count[is_compute] && res->layout != layout) - _mesa_set_add(ctx->need_barriers[is_compute], res); - if (res->bind_count[!is_compute] && (layout != other_layout || res->layout != other_layout)) - _mesa_set_add(ctx->need_barriers[!is_compute], res); + VkImageLayout layout = res->bind_count[is_compute] ? zink_descriptor_util_image_layout_eval(ctx, res, is_compute) : VK_IMAGE_LAYOUT_UNDEFINED; + VkImageLayout other_layout = res->bind_count[!is_compute] ? zink_descriptor_util_image_layout_eval(ctx, res, !is_compute) : VK_IMAGE_LAYOUT_UNDEFINED; + bool ret = false; + if (!is_compute && res->fb_binds && !(ctx->feedback_loops & res->fb_binds)) { + /* always double check feedback loops */ + ret = !!_mesa_set_add(ctx->need_barriers[0], res); + } else { + if (res->bind_count[is_compute] && layout && res->layout != layout) + ret = !!_mesa_set_add(ctx->need_barriers[is_compute], res); + if (res->bind_count[!is_compute] && other_layout && (layout != other_layout || res->layout != other_layout)) + ret = !!_mesa_set_add(ctx->need_barriers[!is_compute], res); + } + return ret; } static void -unbind_shader_image(struct zink_context *ctx, enum pipe_shader_type stage, unsigned slot) +unbind_shader_image(struct zink_context *ctx, gl_shader_stage stage, unsigned slot) { struct zink_image_view *image_view = &ctx->image_views[stage][slot]; - bool is_compute = stage == PIPE_SHADER_COMPUTE; + bool is_compute = stage == MESA_SHADER_COMPUTE; if (!image_view->base.resource) return; struct zink_resource *res = zink_resource(image_view->base.resource); + res->image_binds[stage] &= ~BITFIELD_BIT(slot); unbind_shader_image_counts(ctx, res, is_compute, image_view->base.access & PIPE_IMAGE_ACCESS_WRITE); - + if (!res->write_bind_count[is_compute]) + res->barrier_access[stage == MESA_SHADER_COMPUTE] &= ~VK_ACCESS_SHADER_WRITE_BIT; + if (image_view->base.resource->target == PIPE_BUFFER) { - if (zink_batch_usage_exists(image_view->buffer_view->batch_uses)) - zink_batch_reference_bufferview(&ctx->batch, image_view->buffer_view); + unbind_buffer_descriptor_stage(res, stage); + unbind_buffer_descriptor_reads(res, stage == MESA_SHADER_COMPUTE); zink_buffer_view_reference(zink_screen(ctx->base.screen), &image_view->buffer_view, NULL); + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + pipe_resource_reference(&image_view->base.resource, NULL); } else { + unbind_descriptor_stage(res, stage); + unbind_descriptor_reads(res, stage == MESA_SHADER_COMPUTE); if (!res->image_bind_count[is_compute]) check_for_layout_update(ctx, res, is_compute); - if (zink_batch_usage_exists(image_view->surface->batch_uses)) - zink_batch_reference_surface(&ctx->batch, image_view->surface); zink_surface_reference(zink_screen(ctx->base.screen), &image_view->surface, NULL); } - pipe_resource_reference(&image_view->base.resource, NULL); image_view->base.resource = NULL; image_view->surface = NULL; } +static struct zink_buffer_view * +create_image_bufferview(struct zink_context *ctx, const struct pipe_image_view *view) +{ + struct zink_resource *res = zink_resource(view->resource); + VkBufferViewCreateInfo bvci = create_bvci(ctx, res, view->format, view->u.buf.offset, view->u.buf.size); + struct zink_buffer_view *buffer_view = get_buffer_view(ctx, res, &bvci); + if (!buffer_view) + return NULL; + util_range_add(&res->base.b, &res->valid_buffer_range, view->u.buf.offset, + view->u.buf.offset + view->u.buf.size); + return buffer_view; +} + +static void +finalize_image_bind(struct zink_context *ctx, struct zink_resource *res, bool is_compute) +{ + /* if this is the first image bind and there are sampler binds, the image's sampler layout + * must be updated to GENERAL + */ + if (res->image_bind_count[is_compute] == 1 && + res->bind_count[is_compute] > 1) + update_binds_for_samplerviews(ctx, res, is_compute); + if (!check_for_layout_update(ctx, res, is_compute)) { + /* no deferred barrier: unset unordered usage immediately */ + // TODO: figure out a way to link up layouts between unordered and main cmdbuf + // if (zink_resource_access_is_write(res->barrier_access[is_compute])) + res->obj->unordered_write = false; + res->obj->unordered_read = false; + } +} + +static struct zink_surface * +create_image_surface(struct zink_context *ctx, const struct pipe_image_view *view, bool is_compute) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_resource *res = zink_resource(view->resource); + struct pipe_surface tmpl = {0}; + enum pipe_texture_target target = res->base.b.target; + tmpl.format = view->format; + tmpl.u.tex.level = view->u.tex.level; + tmpl.u.tex.first_layer = view->u.tex.first_layer; + tmpl.u.tex.last_layer = view->u.tex.last_layer; + unsigned depth = 1 + tmpl.u.tex.last_layer - tmpl.u.tex.first_layer; + switch (target) { + case PIPE_TEXTURE_3D: + if (depth < u_minify(res->base.b.depth0, view->u.tex.level)) { + assert(depth == 1); + target = PIPE_TEXTURE_2D; + if (!screen->info.have_EXT_image_2d_view_of_3d || + !screen->info.view2d_feats.image2DViewOf3D) { + static bool warned = false; + warn_missing_feature(warned, "image2DViewOf3D"); + } + } else { + assert(tmpl.u.tex.first_layer == 0); + tmpl.u.tex.last_layer = 0; + } + break; + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_1D_ARRAY: + if (depth < res->base.b.array_size && depth == 1) + target = target == PIPE_TEXTURE_2D_ARRAY ? PIPE_TEXTURE_2D : PIPE_TEXTURE_1D; + break; + default: break; + } + if (zink_format_needs_mutable(view->resource->format, view->format)) + /* mutable not set by default */ + zink_resource_object_init_mutable(ctx, res); + VkImageViewCreateInfo ivci = create_ivci(screen, res, &tmpl, target); + struct zink_surface *surface = zink_get_surface(ctx, view->resource, &tmpl, &ivci); + if (!surface) + return NULL; + if (is_compute) + flush_pending_clears(ctx, res); + return surface; +} + static void zink_set_shader_images(struct pipe_context *pctx, - enum pipe_shader_type p_stage, + gl_shader_stage shader_type, unsigned start_slot, unsigned count, unsigned unbind_num_trailing_slots, const struct pipe_image_view *images) { struct zink_context *ctx = zink_context(pctx); + struct zink_screen *screen = zink_screen(pctx->screen); bool update = false; + bool is_compute = shader_type == MESA_SHADER_COMPUTE; + assert(!ctx->unordered_blitting); for (unsigned i = 0; i < count; i++) { - struct zink_image_view *image_view = &ctx->image_views[p_stage][start_slot + i]; - if (images && images[i].resource) { - struct zink_resource *res = zink_resource(images[i].resource); - struct zink_resource *old_res = zink_resource(image_view->base.resource); + struct zink_image_view *a = &ctx->image_views[shader_type][start_slot + i]; + const struct pipe_image_view *b = images ? &images[i] : NULL; + struct zink_resource *res = b ? zink_resource(b->resource) : NULL; + if (b && b->resource) { if (!zink_resource_object_init_storage(ctx, res)) { debug_printf("couldn't create storage image!"); continue; } - if (res != old_res) { - if (old_res) { - unbind_shader_image_counts(ctx, old_res, p_stage == PIPE_SHADER_COMPUTE, image_view->base.access & PIPE_IMAGE_ACCESS_WRITE); - if (!old_res->obj->is_buffer && !old_res->image_bind_count[p_stage == PIPE_SHADER_COMPUTE]) - check_for_layout_update(ctx, old_res, p_stage == PIPE_SHADER_COMPUTE); - } - update_res_bind_count(ctx, res, p_stage == PIPE_SHADER_COMPUTE, false); - } - util_copy_image_view(&image_view->base, images + i); + VkAccessFlags access = 0; - if (image_view->base.access & PIPE_IMAGE_ACCESS_WRITE) { - zink_resource(image_view->base.resource)->write_bind_count[p_stage == PIPE_SHADER_COMPUTE]++; + if (b->access & PIPE_IMAGE_ACCESS_WRITE) { access |= VK_ACCESS_SHADER_WRITE_BIT; } - if (image_view->base.access & PIPE_IMAGE_ACCESS_READ) { + if (b->access & PIPE_IMAGE_ACCESS_READ) { access |= VK_ACCESS_SHADER_READ_BIT; } - res->image_bind_count[p_stage == PIPE_SHADER_COMPUTE]++; - if (images[i].resource->target == PIPE_BUFFER) { - image_view->buffer_view = get_buffer_view(ctx, res, images[i].format, images[i].u.buf.offset, images[i].u.buf.size); - assert(image_view->buffer_view); - util_range_add(&res->base.b, &res->valid_buffer_range, images[i].u.buf.offset, - images[i].u.buf.offset + images[i].u.buf.size); - zink_batch_usage_set(&image_view->buffer_view->batch_uses, ctx->batch.state); - zink_fake_buffer_barrier(res, access, - zink_pipeline_flags_from_pipe_stage(p_stage)); + + bool changed = false; + if (!a->base.resource || a->base.resource != b->resource) { + /* this needs a full unbind+bind */ + changed = true; + unbind_shader_image(ctx, shader_type, start_slot + i); + update_res_bind_count(ctx, res, is_compute, false); + res->image_bind_count[is_compute]++; + /* always increment write_bind_count on new bind */ + if (b->access & PIPE_IMAGE_ACCESS_WRITE) + res->write_bind_count[is_compute]++; + /* db mode refcounts these */ + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && b->resource->target == PIPE_BUFFER) + pipe_resource_reference(&a->base.resource, b->resource); } else { - struct pipe_surface tmpl = {0}; - tmpl.format = images[i].format; - tmpl.nr_samples = 1; - tmpl.u.tex.level = images[i].u.tex.level; - tmpl.u.tex.first_layer = images[i].u.tex.first_layer; - tmpl.u.tex.last_layer = images[i].u.tex.last_layer; - struct pipe_surface *psurf = pctx->create_surface(pctx, &res->base.b, &tmpl); - /* this is actually a zink_ctx_surface, but we just want the inner surface */ - image_view->surface = zink_csurface(psurf); - FREE(psurf); - assert(image_view->surface); - /* if this is the first image bind and there are sampler binds, the image's sampler layout - * must be updated to GENERAL - */ - if (res->image_bind_count[p_stage == PIPE_SHADER_COMPUTE] == 1 && - res->bind_count[p_stage == PIPE_SHADER_COMPUTE] > 1) - update_binds_for_samplerviews(ctx, res, p_stage == PIPE_SHADER_COMPUTE); - check_for_layout_update(ctx, res, p_stage == PIPE_SHADER_COMPUTE); - zink_batch_usage_set(&image_view->surface->batch_uses, ctx->batch.state); - flush_pending_clears(ctx, res); - } - zink_batch_resource_usage_set(&ctx->batch, zink_resource(image_view->base.resource), - zink_resource_access_is_write(access)); - update = true; - update_descriptor_state_image(ctx, p_stage, start_slot + i, res); - } else if (image_view->base.resource) { - update |= !!image_view->base.resource; + /* resource matches: check for write flag change and partial rebind */ + + /* previous bind didn't have write: increment */ + if ((b->access & PIPE_IMAGE_ACCESS_WRITE) && !(a->base.access & PIPE_IMAGE_ACCESS_WRITE)) + res->write_bind_count[is_compute]++; + /* previous bind had write: decrement */ + else if (!(b->access & PIPE_IMAGE_ACCESS_WRITE) && (a->base.access & PIPE_IMAGE_ACCESS_WRITE)) { + res->write_bind_count[is_compute]--; + if (!res->write_bind_count[is_compute]) + res->barrier_access[is_compute] &= ~VK_ACCESS_SHADER_WRITE_BIT; + } + + /* this may need a partial rebind */ + changed = a->base.format != b->format || zink_resource(a->base.resource)->obj != res->obj; + if (!changed) { + if (b->resource->target == PIPE_BUFFER) { + /* db mode has no partial rebind */ + if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) + changed = !!memcmp(&a->base.u.buf, &b->u.buf, sizeof(b->u.buf)); + } else { + /* no memcmp, these are bitfields */ + changed = a->base.u.tex.first_layer != b->u.tex.first_layer || + a->base.u.tex.last_layer != b->u.tex.last_layer || + a->base.u.tex.level != b->u.tex.level; + } + } + } + + if (changed) { + /* this is a partial rebind */ + if (b->resource->target == PIPE_BUFFER) { + /* db has no partial rebind */ + if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) { + /* bufferview rebind: get updated bufferview and unref old one */ + struct zink_buffer_view *bv = create_image_bufferview(ctx, b); + /* identical rebind was already checked above */ + assert(bv && bv != a->buffer_view); + zink_buffer_view_reference(screen, &a->buffer_view, NULL); + /* ref already added by create */ + a->buffer_view = bv; + } + } else { + /* image rebind: get updated surface and unref old one */ + struct zink_surface *surface = create_image_surface(ctx, b, is_compute); + /* identical rebind was already checked above */ + assert(surface && surface != a->surface); + zink_surface_reference(screen, &a->surface, NULL); + /* ref already added by create */ + a->surface = surface; + } + } - unbind_shader_image(ctx, p_stage, start_slot + i); - update_descriptor_state_image(ctx, p_stage, start_slot + i, NULL); + /* these operations occur regardless of binding/rebinding */ + res->gfx_barrier |= zink_pipeline_flags_from_pipe_stage(shader_type); + res->barrier_access[is_compute] |= access; + if (b->resource->target == PIPE_BUFFER) { + screen->buffer_barrier(ctx, res, access, + res->gfx_barrier); + zink_batch_resource_usage_set(&ctx->batch, res, + zink_resource_access_is_write(access), true); + if (zink_resource_access_is_write(access)) + res->obj->unordered_write = false; + res->obj->unordered_read = false; + } else { + finalize_image_bind(ctx, res, is_compute); + zink_batch_resource_usage_set(&ctx->batch, res, + zink_resource_access_is_write(access), false); + } + memcpy(&a->base, images + i, sizeof(struct pipe_image_view)); + if (b->resource->target == PIPE_BUFFER) { + /* always enforce limit clamping */ + unsigned blocksize = util_format_get_blocksize(a->base.format); + a->base.u.buf.size = MIN2(a->base.u.buf.size / blocksize, screen->info.props.limits.maxTexelBufferElements) * blocksize; + } + update = true; + res->image_binds[shader_type] |= BITFIELD_BIT(start_slot + i); + } else if (a->base.resource) { + update = true; + unbind_shader_image(ctx, shader_type, start_slot + i); } + update_descriptor_state_image(ctx, shader_type, start_slot + i, res); } for (unsigned i = 0; i < unbind_num_trailing_slots; i++) { - update |= !!ctx->image_views[p_stage][start_slot + count + i].base.resource; - unbind_shader_image(ctx, p_stage, start_slot + count + i); - update_descriptor_state_image(ctx, p_stage, start_slot + count + i, NULL); + update |= !!ctx->image_views[shader_type][start_slot + count + i].base.resource; + unbind_shader_image(ctx, shader_type, start_slot + count + i); + update_descriptor_state_image(ctx, shader_type, start_slot + count + i, NULL); } - ctx->di.num_images[p_stage] = start_slot + count; + ctx->di.num_images[shader_type] = start_slot + count; if (update) - zink_screen(pctx->screen)->context_invalidate_descriptor_state(ctx, p_stage, ZINK_DESCRIPTOR_TYPE_IMAGE, start_slot, count); + ctx->invalidate_descriptor_state(ctx, shader_type, ZINK_DESCRIPTOR_TYPE_IMAGE, start_slot, count); } -ALWAYS_INLINE static void -check_samplerview_for_batch_ref(struct zink_context *ctx, struct zink_sampler_view *sv) +static void +update_feedback_loop_dynamic_state(struct zink_context *ctx) { - const struct zink_resource *res = zink_resource(sv->base.texture); - if ((res->obj->is_buffer && zink_batch_usage_exists(sv->buffer_view->batch_uses)) || - (!res->obj->is_buffer && zink_batch_usage_exists(sv->image_view->batch_uses))) - zink_batch_reference_sampler_view(&ctx->batch, sv); + if (!zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_dynamic_state) + return; + VkImageAspectFlags aspects = 0; + if (ctx->feedback_loops & BITFIELD_MASK(PIPE_MAX_COLOR_BUFS)) + aspects |= VK_IMAGE_ASPECT_COLOR_BIT; + if (ctx->feedback_loops & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS)) + aspects |= VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + VKCTX(CmdSetAttachmentFeedbackLoopEnableEXT)(ctx->batch.state->cmdbuf, aspects); +} + +static void +update_feedback_loop_state(struct zink_context *ctx, unsigned idx, unsigned feedback_loops) +{ + if (feedback_loops != ctx->feedback_loops) { + if (idx == PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop_zs) { + if (ctx->gfx_pipeline_state.feedback_loop_zs) + ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.feedback_loop_zs = false; + } else if (idx < PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop) { + if (ctx->gfx_pipeline_state.feedback_loop) + ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.feedback_loop = false; + } + update_feedback_loop_dynamic_state(ctx); + } + ctx->feedback_loops = feedback_loops; } ALWAYS_INLINE static void -unbind_samplerview(struct zink_context *ctx, enum pipe_shader_type stage, unsigned slot) +unbind_samplerview(struct zink_context *ctx, gl_shader_stage stage, unsigned slot) { struct zink_sampler_view *sv = zink_sampler_view(ctx->sampler_views[stage][slot]); if (!sv || !sv->base.texture) return; struct zink_resource *res = zink_resource(sv->base.texture); - check_samplerview_for_batch_ref(ctx, sv); - update_res_bind_count(ctx, res, stage == PIPE_SHADER_COMPUTE, true); + res->sampler_bind_count[stage == MESA_SHADER_COMPUTE]--; + if (stage != MESA_SHADER_COMPUTE && !res->sampler_bind_count[0] && res->fb_bind_count) { + u_foreach_bit(idx, res->fb_binds) { + if (ctx->feedback_loops & BITFIELD_BIT(idx)) { + ctx->dynamic_fb.attachments[idx].imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + ctx->rp_layout_changed = true; + } + update_feedback_loop_state(ctx, idx, ctx->feedback_loops & ~BITFIELD_BIT(idx)); + } + } + update_res_bind_count(ctx, res, stage == MESA_SHADER_COMPUTE, true); res->sampler_binds[stage] &= ~BITFIELD_BIT(slot); + if (res->obj->is_buffer) { + unbind_buffer_descriptor_stage(res, stage); + unbind_buffer_descriptor_reads(res, stage == MESA_SHADER_COMPUTE); + } else { + unbind_descriptor_stage(res, stage); + unbind_descriptor_reads(res, stage == MESA_SHADER_COMPUTE); + if (!res->sampler_bind_count[stage == MESA_SHADER_COMPUTE]) + check_for_layout_update(ctx, res, stage == MESA_SHADER_COMPUTE); + } + assert(slot < 32); + ctx->di.zs_swizzle[stage].mask &= ~BITFIELD_BIT(slot); } static void zink_set_sampler_views(struct pipe_context *pctx, - enum pipe_shader_type shader_type, + gl_shader_stage shader_type, unsigned start_slot, unsigned num_views, unsigned unbind_num_trailing_slots, @@ -1387,81 +2054,518 @@ zink_set_sampler_views(struct pipe_context *pctx, struct pipe_sampler_view **views) { struct zink_context *ctx = zink_context(pctx); - unsigned i; + + const uint32_t mask = BITFIELD_RANGE(start_slot, num_views); + uint32_t shadow_mask = ctx->di.zs_swizzle[shader_type].mask; + ctx->di.cubes[shader_type] &= ~mask; bool update = false; - for (i = 0; i < num_views; ++i) { - struct pipe_sampler_view *pview = views ? views[i] : NULL; - struct zink_sampler_view *a = zink_sampler_view(ctx->sampler_views[shader_type][start_slot + i]); - struct zink_sampler_view *b = zink_sampler_view(pview); - struct zink_resource *res = b ? zink_resource(b->base.texture) : NULL; - if (b && b->base.texture) { - if (!a || zink_resource(a->base.texture) != res) { - if (a) - unbind_samplerview(ctx, shader_type, start_slot + i); - update_res_bind_count(ctx, res, shader_type == PIPE_SHADER_COMPUTE, false); - } else if (a != b) { - check_samplerview_for_batch_ref(ctx, a); - } - if (res->base.b.target == PIPE_BUFFER) { - if (b->buffer_view->bvci.buffer != res->obj->buffer) { - /* if this resource has been rebound while it wasn't set here, - * its backing resource will have changed and thus we need to update - * the bufferview - */ - struct zink_buffer_view *buffer_view = get_buffer_view(ctx, res, b->base.format, b->base.u.buf.offset, b->base.u.buf.size); - assert(buffer_view != b->buffer_view); - if (zink_batch_usage_exists(b->buffer_view->batch_uses)) - zink_batch_reference_bufferview(&ctx->batch, b->buffer_view); - zink_buffer_view_reference(zink_screen(ctx->base.screen), &b->buffer_view, NULL); - b->buffer_view = buffer_view; - update = true; + bool shadow_update = false; + if (views) { + for (unsigned i = 0; i < num_views; ++i) { + struct pipe_sampler_view *pview = views[i]; + struct zink_sampler_view *a = zink_sampler_view(ctx->sampler_views[shader_type][start_slot + i]); + struct zink_sampler_view *b = zink_sampler_view(pview); + + if (a == b) { + if (take_ownership) { + struct pipe_sampler_view *view = views[i]; + pipe_sampler_view_reference(&view, NULL); } - zink_batch_usage_set(&b->buffer_view->batch_uses, ctx->batch.state); - zink_fake_buffer_barrier(res, VK_ACCESS_SHADER_READ_BIT, - zink_pipeline_flags_from_pipe_stage(shader_type)); - if (!a || a->buffer_view->buffer_view != b->buffer_view->buffer_view) - update = true; - } else if (!res->obj->is_buffer) { - if (res->obj != b->image_view->obj) { - struct pipe_surface *psurf = &b->image_view->base; - VkImageView iv = b->image_view->image_view; - zink_rebind_surface(ctx, &psurf); - b->image_view = zink_surface(psurf); - update |= iv != b->image_view->image_view; - } else if (a != b) - update = true; - flush_pending_clears(ctx, res); - check_for_layout_update(ctx, res, shader_type == PIPE_SHADER_COMPUTE); - zink_batch_usage_set(&b->image_view->batch_uses, ctx->batch.state); - if (!a) - update = true; - } - res->sampler_binds[shader_type] |= BITFIELD_BIT(start_slot + i); - zink_batch_resource_usage_set(&ctx->batch, res, false); - } else if (a) { - unbind_samplerview(ctx, shader_type, start_slot + i); - update = true; - } - if (take_ownership) { - pipe_sampler_view_reference(&ctx->sampler_views[shader_type][start_slot + i], NULL); - ctx->sampler_views[shader_type][start_slot + i] = pview; - } else { - pipe_sampler_view_reference(&ctx->sampler_views[shader_type][start_slot + i], pview); + continue; + } + + struct zink_resource *res = b ? zink_resource(b->base.texture) : NULL; + if (b && b->base.texture) { + if (!a || zink_resource(a->base.texture) != res) { + if (a) + unbind_samplerview(ctx, shader_type, start_slot + i); + update_res_bind_count(ctx, res, shader_type == MESA_SHADER_COMPUTE, false); + res->sampler_bind_count[shader_type == MESA_SHADER_COMPUTE]++; + res->gfx_barrier |= zink_pipeline_flags_from_pipe_stage(shader_type); + res->barrier_access[shader_type == MESA_SHADER_COMPUTE] |= VK_ACCESS_SHADER_READ_BIT; + } + if (res->base.b.target == PIPE_BUFFER) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + if (!a || a->base.texture != b->base.texture || zink_resource(a->base.texture)->obj != res->obj || + memcmp(&a->base.u.buf, &b->base.u.buf, sizeof(b->base.u.buf))) + update = true; + } else if (b->buffer_view->bvci.buffer != res->obj->buffer) { + /* if this resource has been rebound while it wasn't set here, + * its backing resource will have changed and thus we need to update + * the bufferview + */ + VkBufferViewCreateInfo bvci = b->buffer_view->bvci; + bvci.buffer = res->obj->buffer; + struct zink_buffer_view *buffer_view = get_buffer_view(ctx, res, &bvci); + assert(buffer_view != b->buffer_view); + zink_buffer_view_reference(zink_screen(ctx->base.screen), &b->buffer_view, NULL); + b->buffer_view = buffer_view; + update = true; + } else if (!a || a->buffer_view->buffer_view != b->buffer_view->buffer_view) + update = true; + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_SHADER_READ_BIT, + res->gfx_barrier); + zink_batch_resource_usage_set(&ctx->batch, res, false, true); + if (!ctx->unordered_blitting) + res->obj->unordered_read = false; + } else { + if (zink_format_needs_mutable(res->base.b.format, b->image_view->base.format)) + /* mutable not set by default */ + zink_resource_object_init_mutable(ctx, res); + if (res->obj != b->image_view->obj) { + struct pipe_surface *psurf = &b->image_view->base; + VkImageView iv = b->image_view->image_view; + zink_rebind_surface(ctx, &psurf); + b->image_view = zink_surface(psurf); + update |= iv != b->image_view->image_view; + } else if (a != b) + update = true; + if (shader_type == MESA_SHADER_COMPUTE) + flush_pending_clears(ctx, res); + if (b->cube_array) { + ctx->di.cubes[shader_type] |= BITFIELD_BIT(start_slot + i); + } + if (!check_for_layout_update(ctx, res, shader_type == MESA_SHADER_COMPUTE) && !ctx->unordered_blitting) { + /* no deferred barrier: unset unordered usage immediately */ + res->obj->unordered_read = false; + // TODO: figure out a way to link up layouts between unordered and main cmdbuf + res->obj->unordered_write = false; + } + if (!a) + update = true; + zink_batch_resource_usage_set(&ctx->batch, res, false, false); + if (b->zs_view) { + assert(start_slot + i < 32); //bitfield size + ctx->di.zs_swizzle[shader_type].mask |= BITFIELD_BIT(start_slot + i); + /* this is already gonna be slow, so don't bother trying to micro-optimize */ + shadow_update |= memcmp(&ctx->di.zs_swizzle[shader_type].swizzle[start_slot + i], + &b->swizzle, sizeof(struct zink_zs_swizzle)); + memcpy(&ctx->di.zs_swizzle[shader_type].swizzle[start_slot + i], &b->swizzle, sizeof(struct zink_zs_swizzle)); + } else { + assert(start_slot + i < 32); //bitfield size + ctx->di.zs_swizzle[shader_type].mask &= ~BITFIELD_BIT(start_slot + i); + } + } + res->sampler_binds[shader_type] |= BITFIELD_BIT(start_slot + i); + } else if (a) { + unbind_samplerview(ctx, shader_type, start_slot + i); + update = true; + } + if (take_ownership) { + pipe_sampler_view_reference(&ctx->sampler_views[shader_type][start_slot + i], NULL); + ctx->sampler_views[shader_type][start_slot + i] = pview; + } else { + pipe_sampler_view_reference(&ctx->sampler_views[shader_type][start_slot + i], pview); + } + update_descriptor_state_sampler(ctx, shader_type, start_slot + i, res); } - update_descriptor_state_sampler(ctx, shader_type, start_slot + i, res); + } else { + unbind_num_trailing_slots += num_views; + num_views = 0; } - for (; i < num_views + unbind_num_trailing_slots; ++i) { - update |= !!ctx->sampler_views[shader_type][start_slot + i]; - unbind_samplerview(ctx, shader_type, start_slot + i); + for (unsigned i = 0; i < unbind_num_trailing_slots; ++i) { + unsigned slot = start_slot + num_views + i; + update |= !!ctx->sampler_views[shader_type][slot]; + unbind_samplerview(ctx, shader_type, slot); pipe_sampler_view_reference( - &ctx->sampler_views[shader_type][start_slot + i], + &ctx->sampler_views[shader_type][slot], NULL); - update_descriptor_state_sampler(ctx, shader_type, start_slot + i, NULL); + update_descriptor_state_sampler(ctx, shader_type, slot, NULL); } ctx->di.num_sampler_views[shader_type] = start_slot + num_views; - if (update) - zink_screen(pctx->screen)->context_invalidate_descriptor_state(ctx, shader_type, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot, num_views); + if (update) { + struct zink_screen *screen = zink_screen(pctx->screen); + ctx->invalidate_descriptor_state(ctx, shader_type, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot, num_views); + if (!screen->info.have_EXT_non_seamless_cube_map) + update_nonseamless_shader_key(ctx, shader_type); + shadow_update |= shadow_mask != ctx->di.zs_swizzle[shader_type].mask; + zink_set_zs_needs_shader_swizzle_key(ctx, shader_type, shadow_update); + } +} + +static uint64_t +zink_create_texture_handle(struct pipe_context *pctx, struct pipe_sampler_view *view, const struct pipe_sampler_state *state) +{ + struct zink_context *ctx = zink_context(pctx); + struct zink_resource *res = zink_resource(view->texture); + struct zink_sampler_view *sv = zink_sampler_view(view); + struct zink_bindless_descriptor *bd; + bd = calloc(1, sizeof(struct zink_bindless_descriptor)); + if (!bd) + return 0; + + bd->sampler = pctx->create_sampler_state(pctx, state); + if (!bd->sampler) { + free(bd); + return 0; + } + + bd->ds.is_buffer = res->base.b.target == PIPE_BUFFER; + if (res->base.b.target == PIPE_BUFFER) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + pipe_resource_reference(&bd->ds.db.pres, view->texture); + bd->ds.db.format = view->format; + bd->ds.db.offset = view->u.buf.offset; + bd->ds.db.size = view->u.buf.size; + } else { + zink_buffer_view_reference(zink_screen(pctx->screen), &bd->ds.bufferview, sv->buffer_view); + } + } else { + zink_surface_reference(zink_screen(pctx->screen), &bd->ds.surface, sv->image_view); + } + uint64_t handle = util_idalloc_alloc(&ctx->di.bindless[bd->ds.is_buffer].tex_slots); + if (bd->ds.is_buffer) + handle += ZINK_MAX_BINDLESS_HANDLES; + bd->handle = handle; + _mesa_hash_table_insert(&ctx->di.bindless[bd->ds.is_buffer].tex_handles, (void*)(uintptr_t)handle, bd); + return handle; +} + +static void +zink_delete_texture_handle(struct pipe_context *pctx, uint64_t handle) +{ + struct zink_context *ctx = zink_context(pctx); + bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle); + struct hash_entry *he = _mesa_hash_table_search(&ctx->di.bindless[is_buffer].tex_handles, (void*)(uintptr_t)handle); + assert(he); + struct zink_bindless_descriptor *bd = he->data; + struct zink_descriptor_surface *ds = &bd->ds; + _mesa_hash_table_remove(&ctx->di.bindless[is_buffer].tex_handles, he); + uint32_t h = handle; + util_dynarray_append(&ctx->batch.state->bindless_releases[0], uint32_t, h); + + if (ds->is_buffer) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + pipe_resource_reference(&ds->db.pres, NULL); + } else { + zink_buffer_view_reference(zink_screen(pctx->screen), &ds->bufferview, NULL); + } + } else { + zink_surface_reference(zink_screen(pctx->screen), &ds->surface, NULL); + pctx->delete_sampler_state(pctx, bd->sampler); + } + free(ds); +} + +static void +rebind_bindless_bufferview(struct zink_context *ctx, struct zink_resource *res, struct zink_descriptor_surface *ds) +{ + /* descriptor buffer is unaffected by this */ + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + return; + /* if this resource has been rebound while it wasn't set here, + * its backing resource will have changed and thus we need to update + * the bufferview + */ + VkBufferViewCreateInfo bvci = ds->bufferview->bvci; + bvci.buffer = res->obj->buffer; + struct zink_buffer_view *buffer_view = get_buffer_view(ctx, res, &bvci); + assert(buffer_view != ds->bufferview); + zink_buffer_view_reference(zink_screen(ctx->base.screen), &ds->bufferview, NULL); + ds->bufferview = buffer_view; +} + +static void +zero_bindless_descriptor(struct zink_context *ctx, uint32_t handle, bool is_buffer, bool is_image) +{ + if (likely(zink_screen(ctx->base.screen)->info.rb2_feats.nullDescriptor)) { + if (is_buffer) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + ctx->di.bindless[is_image].db.buffer_infos[handle].address = 0; + ctx->di.bindless[is_image].db.buffer_infos[handle].range = 0; + } else { + VkBufferView *bv = &ctx->di.bindless[is_image].t.buffer_infos[handle]; + *bv = VK_NULL_HANDLE; + } + } else { + VkDescriptorImageInfo *ii = &ctx->di.bindless[is_image].img_infos[handle]; + memset(ii, 0, sizeof(*ii)); + } + } else { + if (is_buffer) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + ctx->di.bindless[is_image].db.buffer_infos[handle].address = zink_resource(ctx->dummy_bufferview->pres)->obj->bda; + ctx->di.bindless[is_image].db.buffer_infos[handle].range = 1; + } else { + VkBufferView *bv = &ctx->di.bindless[is_image].t.buffer_infos[handle]; + struct zink_buffer_view *null_bufferview = ctx->dummy_bufferview; + *bv = null_bufferview->buffer_view; + } + } else { + struct zink_surface *null_surface = zink_get_dummy_surface(ctx, 0); + VkDescriptorImageInfo *ii = &ctx->di.bindless[is_image].img_infos[handle]; + ii->sampler = VK_NULL_HANDLE; + ii->imageView = null_surface->image_view; + ii->imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + } +} + +static void +unbind_bindless_descriptor(struct zink_context *ctx, struct zink_resource *res) +{ + if (!res->bindless[1]) { + /* check to remove write access */ + for (unsigned i = 0; i < 2; i++) { + if (!res->write_bind_count[i]) + res->barrier_access[i] &= ~VK_ACCESS_SHADER_WRITE_BIT; + } + } + bool is_buffer = res->base.b.target == PIPE_BUFFER; + if (!res->all_bindless) { + /* check to remove read access */ + if (is_buffer) { + for (unsigned i = 0; i < 2; i++) + unbind_buffer_descriptor_reads(res, i); + } else { + for (unsigned i = 0; i < 2; i++) + unbind_descriptor_reads(res, i); + } + } + for (unsigned i = 0; i < 2; i++) { + if (!res->image_bind_count[i]) + check_for_layout_update(ctx, res, i); + } +} + +static void +zink_make_texture_handle_resident(struct pipe_context *pctx, uint64_t handle, bool resident) +{ + struct zink_context *ctx = zink_context(pctx); + bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle); + struct hash_entry *he = _mesa_hash_table_search(&ctx->di.bindless[is_buffer].tex_handles, (void*)(uintptr_t)handle); + assert(he); + struct zink_bindless_descriptor *bd = he->data; + struct zink_descriptor_surface *ds = &bd->ds; + struct zink_resource *res = zink_descriptor_surface_resource(ds); + if (is_buffer) + handle -= ZINK_MAX_BINDLESS_HANDLES; + if (resident) { + update_res_bind_count(ctx, res, false, false); + update_res_bind_count(ctx, res, true, false); + res->bindless[0]++; + if (is_buffer) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + ctx->di.bindless[0].db.buffer_infos[handle].address = res->obj->bda + ds->db.offset; + ctx->di.bindless[0].db.buffer_infos[handle].range = ds->db.size; + ctx->di.bindless[0].db.buffer_infos[handle].format = zink_get_format(zink_screen(ctx->base.screen), ds->db.format); + } else { + if (ds->bufferview->bvci.buffer != res->obj->buffer) + rebind_bindless_bufferview(ctx, res, ds); + VkBufferView *bv = &ctx->di.bindless[0].t.buffer_infos[handle]; + *bv = ds->bufferview->buffer_view; + } + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + zink_batch_resource_usage_set(&ctx->batch, res, false, true); + res->obj->unordered_read = false; + } else { + VkDescriptorImageInfo *ii = &ctx->di.bindless[0].img_infos[handle]; + ii->sampler = bd->sampler->sampler; + ii->imageView = ds->surface->image_view; + ii->imageLayout = zink_descriptor_util_image_layout_eval(ctx, res, false); + flush_pending_clears(ctx, res); + if (!check_for_layout_update(ctx, res, false)) { + res->obj->unordered_read = false; + // TODO: figure out a way to link up layouts between unordered and main cmdbuf + res->obj->unordered_write = false; + } + if (!check_for_layout_update(ctx, res, true)) { + res->obj->unordered_read = false; + // TODO: figure out a way to link up layouts between unordered and main cmdbuf + res->obj->unordered_write = false; + } + zink_batch_resource_usage_set(&ctx->batch, res, false, false); + res->obj->unordered_write = false; + } + res->gfx_barrier |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + res->barrier_access[0] |= VK_ACCESS_SHADER_READ_BIT; + res->barrier_access[1] |= VK_ACCESS_SHADER_READ_BIT; + util_dynarray_append(&ctx->di.bindless[0].resident, struct zink_bindless_descriptor *, bd); + uint32_t h = is_buffer ? handle + ZINK_MAX_BINDLESS_HANDLES : handle; + util_dynarray_append(&ctx->di.bindless[0].updates, uint32_t, h); + } else { + zero_bindless_descriptor(ctx, handle, is_buffer, false); + util_dynarray_delete_unordered(&ctx->di.bindless[0].resident, struct zink_bindless_descriptor *, bd); + update_res_bind_count(ctx, res, false, true); + update_res_bind_count(ctx, res, true, true); + res->bindless[0]--; + unbind_bindless_descriptor(ctx, res); + } + ctx->di.bindless_dirty[0] = true; +} + +static uint64_t +zink_create_image_handle(struct pipe_context *pctx, const struct pipe_image_view *view) +{ + struct zink_context *ctx = zink_context(pctx); + struct zink_resource *res = zink_resource(view->resource); + struct zink_bindless_descriptor *bd; + if (!zink_resource_object_init_storage(ctx, res)) { + debug_printf("couldn't create storage image!"); + return 0; + } + bd = calloc(1, sizeof(struct zink_bindless_descriptor)); + if (!bd) + return 0; + bd->sampler = NULL; + + bd->ds.is_buffer = res->base.b.target == PIPE_BUFFER; + if (res->base.b.target == PIPE_BUFFER) + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + pipe_resource_reference(&bd->ds.db.pres, view->resource); + bd->ds.db.format = view->format; + bd->ds.db.offset = view->u.buf.offset; + bd->ds.db.size = view->u.buf.size; + } else { + bd->ds.bufferview = create_image_bufferview(ctx, view); + } + else + bd->ds.surface = create_image_surface(ctx, view, false); + uint64_t handle = util_idalloc_alloc(&ctx->di.bindless[bd->ds.is_buffer].img_slots); + if (bd->ds.is_buffer) + handle += ZINK_MAX_BINDLESS_HANDLES; + bd->handle = handle; + _mesa_hash_table_insert(&ctx->di.bindless[bd->ds.is_buffer].img_handles, (void*)(uintptr_t)handle, bd); + return handle; +} + +static void +zink_delete_image_handle(struct pipe_context *pctx, uint64_t handle) +{ + struct zink_context *ctx = zink_context(pctx); + bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle); + struct hash_entry *he = _mesa_hash_table_search(&ctx->di.bindless[is_buffer].img_handles, (void*)(uintptr_t)handle); + assert(he); + struct zink_descriptor_surface *ds = he->data; + _mesa_hash_table_remove(&ctx->di.bindless[is_buffer].img_handles, he); + uint32_t h = handle; + util_dynarray_append(&ctx->batch.state->bindless_releases[1], uint32_t, h); + + if (ds->is_buffer) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + pipe_resource_reference(&ds->db.pres, NULL); + } else { + zink_buffer_view_reference(zink_screen(pctx->screen), &ds->bufferview, NULL); + } + } else { + zink_surface_reference(zink_screen(pctx->screen), &ds->surface, NULL); + } + free(ds); +} + +static void +zink_make_image_handle_resident(struct pipe_context *pctx, uint64_t handle, unsigned paccess, bool resident) +{ + struct zink_context *ctx = zink_context(pctx); + bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle); + struct hash_entry *he = _mesa_hash_table_search(&ctx->di.bindless[is_buffer].img_handles, (void*)(uintptr_t)handle); + assert(he); + struct zink_bindless_descriptor *bd = he->data; + struct zink_descriptor_surface *ds = &bd->ds; + bd->access = paccess; + struct zink_resource *res = zink_descriptor_surface_resource(ds); + VkAccessFlags access = 0; + if (paccess & PIPE_IMAGE_ACCESS_WRITE) { + if (resident) { + res->write_bind_count[0]++; + res->write_bind_count[1]++; + } else { + res->write_bind_count[0]--; + res->write_bind_count[1]--; + } + access |= VK_ACCESS_SHADER_WRITE_BIT; + } + if (paccess & PIPE_IMAGE_ACCESS_READ) { + access |= VK_ACCESS_SHADER_READ_BIT; + } + if (is_buffer) + handle -= ZINK_MAX_BINDLESS_HANDLES; + if (resident) { + update_res_bind_count(ctx, res, false, false); + update_res_bind_count(ctx, res, true, false); + res->image_bind_count[0]++; + res->image_bind_count[1]++; + res->bindless[1]++; + if (is_buffer) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + ctx->di.bindless[0].db.buffer_infos[handle].address = res->obj->bda + ds->db.offset; + ctx->di.bindless[0].db.buffer_infos[handle].range = ds->db.size; + ctx->di.bindless[0].db.buffer_infos[handle].format = zink_get_format(zink_screen(ctx->base.screen), ds->db.format); + } else { + if (ds->bufferview->bvci.buffer != res->obj->buffer) + rebind_bindless_bufferview(ctx, res, ds); + VkBufferView *bv = &ctx->di.bindless[1].t.buffer_infos[handle]; + *bv = ds->bufferview->buffer_view; + } + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, access, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + zink_batch_resource_usage_set(&ctx->batch, res, zink_resource_access_is_write(access), true); + if (zink_resource_access_is_write(access)) + res->obj->unordered_write = false; + res->obj->unordered_read = false; + } else { + VkDescriptorImageInfo *ii = &ctx->di.bindless[1].img_infos[handle]; + ii->sampler = VK_NULL_HANDLE; + ii->imageView = ds->surface->image_view; + ii->imageLayout = VK_IMAGE_LAYOUT_GENERAL; + finalize_image_bind(ctx, res, false); + finalize_image_bind(ctx, res, true); + zink_batch_resource_usage_set(&ctx->batch, res, zink_resource_access_is_write(access), false); + res->obj->unordered_write = false; + } + res->gfx_barrier |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + res->barrier_access[0] |= access; + res->barrier_access[1] |= access; + util_dynarray_append(&ctx->di.bindless[1].resident, struct zink_bindless_descriptor *, bd); + uint32_t h = is_buffer ? handle + ZINK_MAX_BINDLESS_HANDLES : handle; + util_dynarray_append(&ctx->di.bindless[1].updates, uint32_t, h); + } else { + zero_bindless_descriptor(ctx, handle, is_buffer, true); + util_dynarray_delete_unordered(&ctx->di.bindless[1].resident, struct zink_bindless_descriptor *, bd); + unbind_shader_image_counts(ctx, res, false, false); + unbind_shader_image_counts(ctx, res, true, false); + res->bindless[1]--; + unbind_bindless_descriptor(ctx, res); + } + ctx->di.bindless_dirty[1] = true; +} + +static void +zink_set_global_binding(struct pipe_context *pctx, + unsigned first, unsigned count, + struct pipe_resource **resources, + uint32_t **handles) +{ + struct zink_context *ctx = zink_context(pctx); + + size_t size = ctx->di.global_bindings.capacity; + if (!util_dynarray_resize(&ctx->di.global_bindings, struct pipe_resource*, first + count + 8)) + unreachable("zink: out of memory somehow"); + if (size != ctx->di.global_bindings.capacity) { + uint8_t *data = ctx->di.global_bindings.data; + memset(data + size, 0, ctx->di.global_bindings.capacity - size); + } + + struct pipe_resource **globals = ctx->di.global_bindings.data; + for (unsigned i = 0; i < count; i++) { + if (resources && resources[i]) { + struct zink_resource *res = zink_resource(resources[i]); + + util_range_add(&res->base.b, &res->valid_buffer_range, 0, res->base.b.width0); + pipe_resource_reference(&globals[first + i], resources[i]); + + uint64_t addr = 0; + memcpy(&addr, handles[i], sizeof(addr)); + addr += zink_resource_get_address(zink_screen(pctx->screen), res); + memcpy(handles[i], &addr, sizeof(addr)); + zink_resource_usage_set(res, ctx->batch.state, true); + res->obj->unordered_read = res->obj->unordered_write = false; + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + } else if (globals[i]) { + zink_batch_reference_resource(&ctx->batch, zink_resource(globals[first + i])); + pipe_resource_reference(&globals[first + i], NULL); + } + } } static void @@ -1493,316 +2597,619 @@ static void zink_set_patch_vertices(struct pipe_context *pctx, uint8_t patch_vertices) { struct zink_context *ctx = zink_context(pctx); - ctx->gfx_pipeline_state.patch_vertices = patch_vertices; + if (zink_set_tcs_key_patches(ctx, patch_vertices)) { + ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch = patch_vertices; + if (zink_screen(ctx->base.screen)->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) + VKCTX(CmdSetPatchControlPointsEXT)(ctx->batch.state->cmdbuf, patch_vertices); + else + ctx->gfx_pipeline_state.dirty = true; + zink_flush_dgc_if_enabled(ctx); + } } -void +static void +init_null_fbfetch(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + ctx->di.null_fbfetch_init = true; + if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) + return; + VkDescriptorGetInfoEXT info; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; + info.pNext = NULL; + info.type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + info.data.pInputAttachmentImage = &ctx->di.fbfetch; + if (screen->info.db_props.inputAttachmentDescriptorSize) + VKSCR(GetDescriptorEXT)(screen->dev, &info, screen->info.db_props.inputAttachmentDescriptorSize, ctx->di.fbfetch_db); +} + +bool zink_update_fbfetch(struct zink_context *ctx) { const bool had_fbfetch = ctx->di.fbfetch.imageLayout == VK_IMAGE_LAYOUT_GENERAL; - if (!ctx->gfx_stages[PIPE_SHADER_FRAGMENT] || - !ctx->gfx_stages[PIPE_SHADER_FRAGMENT]->nir->info.fs.uses_fbfetch_output) { + if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT] || + !ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.fs.uses_fbfetch_output) { if (!had_fbfetch) - return; + return false; + zink_batch_no_rp(ctx); ctx->di.fbfetch.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; ctx->di.fbfetch.imageView = zink_screen(ctx->base.screen)->info.rb2_feats.nullDescriptor ? VK_NULL_HANDLE : - zink_csurface(ctx->dummy_surface[0])->image_view; - zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, PIPE_SHADER_FRAGMENT, ZINK_DESCRIPTOR_TYPE_UBO, 0, 1); - return; + zink_get_dummy_surface(ctx, 0)->image_view; + ctx->invalidate_descriptor_state(ctx, MESA_SHADER_FRAGMENT, ZINK_DESCRIPTOR_TYPE_UBO, 0, 1); + return true; } bool changed = !had_fbfetch; if (ctx->fb_state.cbufs[0]) { VkImageView fbfetch = zink_csurface(ctx->fb_state.cbufs[0])->image_view; + if (!fbfetch) + /* swapchain image: retry later */ + return false; changed |= fbfetch != ctx->di.fbfetch.imageView; ctx->di.fbfetch.imageView = zink_csurface(ctx->fb_state.cbufs[0])->image_view; + + bool fbfetch_ms = ctx->fb_state.cbufs[0]->texture->nr_samples > 1; + if (zink_get_fs_base_key(ctx)->fbfetch_ms != fbfetch_ms) + zink_set_fs_base_key(ctx)->fbfetch_ms = fbfetch_ms; + } else { + ctx->di.fbfetch.imageView = zink_screen(ctx->base.screen)->info.rb2_feats.nullDescriptor ? + VK_NULL_HANDLE : + zink_get_dummy_surface(ctx, 0)->image_view; } + bool ret = false; ctx->di.fbfetch.imageLayout = VK_IMAGE_LAYOUT_GENERAL; - if (changed) - zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, PIPE_SHADER_FRAGMENT, ZINK_DESCRIPTOR_TYPE_UBO, 0, 1); + if (changed) { + ctx->invalidate_descriptor_state(ctx, MESA_SHADER_FRAGMENT, ZINK_DESCRIPTOR_TYPE_UBO, 0, 1); + if (!had_fbfetch) { + ret = true; + zink_batch_no_rp(ctx); + } + } + return ret; } -static size_t -rp_state_size(const struct zink_render_pass_pipeline_state *pstate) +void +zink_update_vk_sample_locations(struct zink_context *ctx) { - return offsetof(struct zink_render_pass_pipeline_state, attachments) + - sizeof(pstate->attachments[0]) * pstate->num_attachments; + if (ctx->gfx_pipeline_state.sample_locations_enabled && ctx->sample_locations_changed) { + unsigned samples = ctx->gfx_pipeline_state.rast_samples + 1; + unsigned idx = util_logbase2_ceil(MAX2(samples, 1)); + VkExtent2D grid_size = zink_screen(ctx->base.screen)->maxSampleLocationGridSize[idx]; + + for (unsigned pixel = 0; pixel < grid_size.width * grid_size.height; pixel++) { + for (unsigned sample = 0; sample < samples; sample++) { + unsigned pixel_x = pixel % grid_size.width; + unsigned pixel_y = pixel / grid_size.width; + unsigned wi = pixel * samples + sample; + unsigned ri = (pixel_y * grid_size.width + pixel_x % grid_size.width); + ri = ri * samples + sample; + ctx->vk_sample_locations[wi].x = (ctx->sample_locations[ri] & 0xf) / 16.0f; + ctx->vk_sample_locations[wi].y = (16 - (ctx->sample_locations[ri] >> 4)) / 16.0f; + } + } + } } -static uint32_t -hash_rp_state(const void *key) -{ - const struct zink_render_pass_pipeline_state *s = key; - return _mesa_hash_data(key, rp_state_size(s)); +static unsigned +find_rp_state(struct zink_context *ctx) +{ + bool found = false; + /* calc the state idx using the samples to account for msrtss */ + unsigned idx = zink_screen(ctx->base.screen)->info.have_EXT_multisampled_render_to_single_sampled && ctx->transient_attachments ? + util_logbase2_ceil(ctx->gfx_pipeline_state.rast_samples + 1) : 0; + struct set_entry *he = _mesa_set_search_or_add(&ctx->rendering_state_cache[idx], &ctx->gfx_pipeline_state.rendering_info, &found); + struct zink_rendering_info *info; + if (found) { + info = (void*)he->key; + return info->id; + } + info = ralloc(ctx, struct zink_rendering_info); + memcpy(info, &ctx->gfx_pipeline_state.rendering_info, sizeof(VkPipelineRenderingCreateInfo)); + info->id = ctx->rendering_state_cache[idx].entries; + he->key = info; + return info->id; } -static bool -equals_rp_state(const void *a, const void *b) +unsigned +zink_update_rendering_info(struct zink_context *ctx) { - return !memcmp(a, b, rp_state_size(a)); + for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { + struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]); + ctx->gfx_pipeline_state.rendering_formats[i] = surf ? surf->info.format[0] : VK_FORMAT_UNDEFINED; + } + ctx->gfx_pipeline_state.rendering_info.depthAttachmentFormat = VK_FORMAT_UNDEFINED; + ctx->gfx_pipeline_state.rendering_info.stencilAttachmentFormat = VK_FORMAT_UNDEFINED; + if (ctx->fb_state.zsbuf && zink_is_zsbuf_used(ctx)) { + struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf); + bool has_depth = util_format_has_depth(util_format_description(ctx->fb_state.zsbuf->format)); + bool has_stencil = util_format_has_stencil(util_format_description(ctx->fb_state.zsbuf->format)); + + if (has_depth) + ctx->gfx_pipeline_state.rendering_info.depthAttachmentFormat = surf->info.format[0]; + if (has_stencil) + ctx->gfx_pipeline_state.rendering_info.stencilAttachmentFormat = surf->info.format[0]; + } + return find_rp_state(ctx); } -static uint32_t -hash_render_pass_state(const void *key) +static unsigned +calc_max_dummy_fbo_size(struct zink_context *ctx) { - struct zink_render_pass_state* s = (struct zink_render_pass_state*)key; - return _mesa_hash_data(key, offsetof(struct zink_render_pass_state, rts) + sizeof(s->rts[0]) * s->num_rts); + unsigned size = MAX2(ctx->fb_state.width, ctx->fb_state.height); + return size ? size : MIN2(256, zink_screen(ctx->base.screen)->info.props.limits.maxImageDimension2D); } -static bool -equals_render_pass_state(const void *a, const void *b) +static unsigned +begin_rendering(struct zink_context *ctx) { - const struct zink_render_pass_state *s_a = a, *s_b = b; - if (s_a->num_rts != s_b->num_rts) - return false; - return memcmp(a, b, offsetof(struct zink_render_pass_state, rts) + sizeof(s_a->rts[0]) * s_a->num_rts) == 0; -} + unsigned clear_buffers = 0; + ctx->gfx_pipeline_state.render_pass = NULL; + zink_update_vk_sample_locations(ctx); + bool has_swapchain = zink_render_update_swapchain(ctx); + if (has_swapchain) + zink_render_fixup_swapchain(ctx); + bool has_depth = false; + bool has_stencil = false; + bool changed_layout = false; + bool changed_size = false; + bool zsbuf_used = zink_is_zsbuf_used(ctx); + bool use_tc_info = !ctx->blitting && ctx->track_renderpasses; + if (ctx->rp_changed || ctx->rp_layout_changed || (!ctx->batch.in_rp && ctx->rp_loadop_changed)) { + /* init imageviews, base loadOp, formats */ + for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { + struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]); + if (!surf) + continue; -static struct zink_render_pass * -get_render_pass(struct zink_context *ctx) -{ - struct zink_screen *screen = zink_screen(ctx->base.screen); - const struct pipe_framebuffer_state *fb = &ctx->fb_state; - struct zink_render_pass_state state = {0}; - uint32_t clears = 0; - state.swapchain_init = ctx->new_swapchain; - state.samples = fb->samples > 0; + if (!zink_resource(surf->base.texture)->valid) + ctx->dynamic_fb.attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + else + ctx->dynamic_fb.attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + if (use_tc_info) { + if (ctx->dynamic_fb.tc_info.cbuf_invalidate & BITFIELD_BIT(i)) + ctx->dynamic_fb.attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + else + ctx->dynamic_fb.attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + } + } + + /* unset depth and stencil info: reset below */ + VkImageLayout zlayout = ctx->dynamic_fb.info.pDepthAttachment ? ctx->dynamic_fb.info.pDepthAttachment->imageLayout : VK_IMAGE_LAYOUT_UNDEFINED; + VkImageLayout slayout = ctx->dynamic_fb.info.pStencilAttachment ? ctx->dynamic_fb.info.pStencilAttachment->imageLayout : VK_IMAGE_LAYOUT_UNDEFINED; + ctx->dynamic_fb.info.pDepthAttachment = NULL; + ctx->dynamic_fb.info.pStencilAttachment = NULL; + + if (ctx->fb_state.zsbuf && zsbuf_used) { + struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf); + has_depth = util_format_has_depth(util_format_description(ctx->fb_state.zsbuf->format)); + has_stencil = util_format_has_stencil(util_format_description(ctx->fb_state.zsbuf->format)); + + /* depth may or may not be used but init it anyway */ + if (zink_resource(surf->base.texture)->valid) + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + else + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + + if (use_tc_info) { + if (ctx->dynamic_fb.tc_info.zsbuf_invalidate) + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + else + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + } + + /* stencil may or may not be used but init it anyway */ + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].loadOp = ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].loadOp; + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].storeOp = ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].storeOp; + + if (has_depth) { + ctx->dynamic_fb.info.pDepthAttachment = &ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS]; + /* stencil info only set for clears below */ + } + if (has_stencil) { + /* must be stencil-only */ + ctx->dynamic_fb.info.pStencilAttachment = &ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS + 1]; + } + } else { + ctx->dynamic_fb.info.pDepthAttachment = NULL; + } + if (zlayout != (ctx->dynamic_fb.info.pDepthAttachment ? ctx->dynamic_fb.info.pDepthAttachment->imageLayout : VK_IMAGE_LAYOUT_UNDEFINED)) + changed_layout = true; + if (slayout != (ctx->dynamic_fb.info.pStencilAttachment ? ctx->dynamic_fb.info.pStencilAttachment->imageLayout : VK_IMAGE_LAYOUT_UNDEFINED)) + changed_layout = true; - u_foreach_bit(i, ctx->fbfetch_outputs) - state.rts[i].fbfetch = true; + /* similar to begin_render_pass(), but just filling in VkRenderingInfo */ + for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { + /* these are no-ops */ + if (!ctx->fb_state.cbufs[i] || !zink_fb_clear_enabled(ctx, i)) + continue; + /* these need actual clear calls inside the rp */ + struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(&ctx->fb_clears[i], 0); + if (zink_fb_clear_needs_explicit(&ctx->fb_clears[i])) { + clear_buffers |= (PIPE_CLEAR_COLOR0 << i); + if (zink_fb_clear_count(&ctx->fb_clears[i]) < 2 || + zink_fb_clear_element_needs_explicit(clear)) + continue; + } + /* we now know there's one clear that can be done here */ + memcpy(&ctx->dynamic_fb.attachments[i].clearValue, &clear->color, sizeof(float) * 4); + ctx->dynamic_fb.attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + } + if (ctx->fb_state.zsbuf && zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS)) { + struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS]; + struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0); + if (!zink_fb_clear_element_needs_explicit(clear)) { + /* base zs clear info */ + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].clearValue.depthStencil.depth = clear->zs.depth; + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].clearValue.depthStencil.stencil = clear->zs.stencil; + /* always init separate stencil attachment */ + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].clearValue.depthStencil.stencil = clear->zs.stencil; + if ((zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH)) + /* initiate a depth clear */ + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + if ((zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_STENCIL)) { + /* use a stencil clear, also set stencil attachment */ + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + } + } + } + if (changed_size || changed_layout) + ctx->rp_changed = true; + ctx->rp_loadop_changed = false; + ctx->rp_layout_changed = false; + } + /* always assemble clear_buffers mask: + * if a scissored clear must be triggered during glFlush, + * the renderpass metadata may be unchanged (e.g., LOAD from previous rp), + * but the buffer mask must still be returned + */ + if (ctx->clears_enabled) { + for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { + /* these are no-ops */ + if (!ctx->fb_state.cbufs[i] || !zink_fb_clear_enabled(ctx, i)) + continue; + /* these need actual clear calls inside the rp */ + if (zink_fb_clear_needs_explicit(&ctx->fb_clears[i])) + clear_buffers |= (PIPE_CLEAR_COLOR0 << i); + } + if (ctx->fb_state.zsbuf && zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS)) { + struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS]; + struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0); + if (zink_fb_clear_needs_explicit(fb_clear)) { + for (int j = !zink_fb_clear_element_needs_explicit(clear); + (clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL && j < zink_fb_clear_count(fb_clear); + j++) + clear_buffers |= zink_fb_clear_element(fb_clear, j)->zs.bits; + } + } + } + + if (!ctx->rp_changed && ctx->batch.in_rp) + return 0; + ctx->rp_changed = false; - for (int i = 0; i < fb->nr_cbufs; i++) { - struct pipe_surface *surf = fb->cbufs[i]; + /* update pipeline info id for compatibility VUs */ + unsigned rp_state = zink_update_rendering_info(ctx); + /* validate zs VUs: attachment must be null or format must be valid */ + assert(!ctx->dynamic_fb.info.pDepthAttachment || ctx->gfx_pipeline_state.rendering_info.depthAttachmentFormat); + assert(!ctx->dynamic_fb.info.pStencilAttachment || ctx->gfx_pipeline_state.rendering_info.stencilAttachmentFormat); + bool rp_changed = ctx->gfx_pipeline_state.rp_state != rp_state; + if (!rp_changed && ctx->batch.in_rp) + return 0; + + zink_batch_no_rp(ctx); + for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { + VkImageView iv = VK_NULL_HANDLE; + struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]); if (surf) { - state.rts[i].format = zink_get_format(screen, surf->format); - state.rts[i].samples = surf->texture->nr_samples > 0 ? surf->texture->nr_samples : - VK_SAMPLE_COUNT_1_BIT; - state.rts[i].clear_color = zink_fb_clear_enabled(ctx, i) && !zink_fb_clear_first_needs_explicit(&ctx->fb_clears[i]); - clears |= !!state.rts[i].clear_color ? PIPE_CLEAR_COLOR0 << i : 0; - state.rts[i].swapchain = surf->texture->bind & PIPE_BIND_SCANOUT; + iv = zink_prep_fb_attachment(ctx, surf, i); + if (!iv) + /* dead swapchain */ + return 0; + ctx->dynamic_fb.attachments[i].imageLayout = zink_resource(surf->base.texture)->layout; + } + ctx->dynamic_fb.attachments[i].imageView = iv; + } + if (has_swapchain) { + ASSERTED struct zink_resource *res = zink_resource(ctx->fb_state.cbufs[0]->texture); + zink_render_fixup_swapchain(ctx); + if (res->use_damage) + ctx->dynamic_fb.info.renderArea = res->damage; + /* clamp for late swapchain resize */ + if (res->base.b.width0 < ctx->dynamic_fb.info.renderArea.extent.width) + ctx->dynamic_fb.info.renderArea.extent.width = res->base.b.width0; + if (res->base.b.height0 < ctx->dynamic_fb.info.renderArea.extent.height) + ctx->dynamic_fb.info.renderArea.extent.height = res->base.b.height0; + } + if (ctx->fb_state.zsbuf && zsbuf_used) { + struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf); + VkImageView iv = zink_prep_fb_attachment(ctx, surf, ctx->fb_state.nr_cbufs); + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].imageView = iv; + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].imageLayout = zink_resource(surf->base.texture)->layout; + assert(ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].imageLayout != VK_IMAGE_LAYOUT_UNDEFINED); + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].imageView = iv; + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].imageLayout = zink_resource(surf->base.texture)->layout; + assert(ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].imageLayout != VK_IMAGE_LAYOUT_UNDEFINED); + if (ctx->transient_attachments & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS)) { + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS + 1].resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; } else { - state.rts[i].format = VK_FORMAT_R8_UINT; - state.rts[i].samples = fb->samples; - } - state.num_rts++; - } - state.num_cbufs = fb->nr_cbufs; - - if (fb->zsbuf) { - struct zink_resource *zsbuf = zink_resource(fb->zsbuf->texture); - struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS]; - state.rts[fb->nr_cbufs].format = zsbuf->format; - state.rts[fb->nr_cbufs].samples = zsbuf->base.b.nr_samples > 0 ? zsbuf->base.b.nr_samples : VK_SAMPLE_COUNT_1_BIT; - state.rts[fb->nr_cbufs].clear_color = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && - !zink_fb_clear_first_needs_explicit(fb_clear) && - (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH); - state.rts[fb->nr_cbufs].clear_stencil = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && - !zink_fb_clear_first_needs_explicit(fb_clear) && - (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_STENCIL); - if (state.rts[fb->nr_cbufs].clear_color) - clears |= PIPE_CLEAR_DEPTH; - if (state.rts[fb->nr_cbufs].clear_stencil) - clears |= PIPE_CLEAR_STENCIL; - const uint64_t outputs_written = ctx->gfx_stages[PIPE_SHADER_FRAGMENT] ? - ctx->gfx_stages[PIPE_SHADER_FRAGMENT]->nir->info.outputs_written : 0; - bool needs_write = (ctx->dsa_state && ctx->dsa_state->hw_state.depth_write) || - outputs_written & (BITFIELD64_BIT(FRAG_RESULT_DEPTH) | BITFIELD64_BIT(FRAG_RESULT_STENCIL)); - state.rts[fb->nr_cbufs].needs_write = needs_write || state.rts[fb->nr_cbufs].clear_color || state.rts[fb->nr_cbufs].clear_stencil; - state.num_rts++; - } - state.have_zsbuf = fb->zsbuf != NULL; - assert(clears == ctx->rp_clears_enabled); - state.clears = clears; - uint32_t hash = hash_render_pass_state(&state); - struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->render_pass_cache, hash, - &state); - struct zink_render_pass *rp; - if (entry) { - rp = entry->data; - assert(rp->state.clears == clears); - } else { - struct zink_render_pass_pipeline_state pstate; - pstate.samples = state.samples; - rp = zink_create_render_pass(screen, &state, &pstate); - if (!_mesa_hash_table_insert_pre_hashed(ctx->render_pass_cache, hash, &rp->state, rp)) - return NULL; - bool found = false; - struct set_entry *entry = _mesa_set_search_or_add(&ctx->render_pass_state_cache, &pstate, &found); - struct zink_render_pass_pipeline_state *ppstate; - if (!found) { - entry->key = ralloc(ctx, struct zink_render_pass_pipeline_state); - ppstate = (void*)entry->key; - memcpy(ppstate, &pstate, rp_state_size(&pstate)); - ppstate->id = ctx->render_pass_state_cache.entries; + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].resolveMode = 0; + ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS + 1].resolveMode = 0; } - ppstate = (void*)entry->key; - rp->pipeline_state = ppstate->id; } - return rp; + ctx->zsbuf_unused = !zsbuf_used; + assert(ctx->fb_state.width >= ctx->dynamic_fb.info.renderArea.extent.width); + assert(ctx->fb_state.height >= ctx->dynamic_fb.info.renderArea.extent.height); + ctx->gfx_pipeline_state.dirty |= rp_changed; + ctx->gfx_pipeline_state.rp_state = rp_state; + + VkMultisampledRenderToSingleSampledInfoEXT msrtss = { + VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, + NULL, + VK_TRUE, + ctx->gfx_pipeline_state.rast_samples + 1, + }; + + if (zink_screen(ctx->base.screen)->info.have_EXT_multisampled_render_to_single_sampled) + ctx->dynamic_fb.info.pNext = ctx->transient_attachments ? &msrtss : NULL; + assert(!ctx->transient_attachments || msrtss.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT); + VKCTX(CmdBeginRendering)(ctx->batch.state->cmdbuf, &ctx->dynamic_fb.info); + ctx->batch.in_rp = true; + return clear_buffers; } -static uint32_t -hash_framebuffer_imageless(const void *key) +ALWAYS_INLINE static void +update_layered_rendering_state(struct zink_context *ctx) { - struct zink_framebuffer_state* s = (struct zink_framebuffer_state*)key; - return _mesa_hash_data(key, offsetof(struct zink_framebuffer_state, infos) + sizeof(s->infos[0]) * s->num_attachments); + if (!zink_screen(ctx->base.screen)->driver_workarounds.needs_sanitised_layer) + return; + unsigned framebffer_is_layered = zink_framebuffer_get_num_layers(&ctx->fb_state) > 1; + VKCTX(CmdPushConstants)( + ctx->batch.state->cmdbuf, + zink_screen(ctx->base.screen)->gfx_push_constant_layout, + VK_SHADER_STAGE_ALL_GRAPHICS, + offsetof(struct zink_gfx_push_constant, framebuffer_is_layered), sizeof(unsigned), + &framebffer_is_layered); } -static bool -equals_framebuffer_imageless(const void *a, const void *b) +ALWAYS_INLINE static void +batch_ref_fb_surface(struct zink_context *ctx, struct pipe_surface *psurf) { - struct zink_framebuffer_state *s = (struct zink_framebuffer_state*)a; - return memcmp(a, b, offsetof(struct zink_framebuffer_state, infos) + sizeof(s->infos[0]) * s->num_attachments) == 0; + if (!psurf) + return; + zink_batch_reference_resource(&ctx->batch, zink_resource(psurf->texture)); + struct zink_surface *transient = zink_transient_surface(psurf); + if (transient) + zink_batch_reference_resource(&ctx->batch, zink_resource(transient->base.texture)); } -static void -setup_framebuffer(struct zink_context *ctx) +void +zink_batch_rp(struct zink_context *ctx) { - struct zink_screen *screen = zink_screen(ctx->base.screen); - struct zink_render_pass *rp = ctx->gfx_pipeline_state.render_pass; - - if (ctx->gfx_pipeline_state.sample_locations_enabled && ctx->sample_locations_changed) { - unsigned samples = ctx->gfx_pipeline_state.rast_samples + 1; - unsigned idx = util_logbase2_ceil(MAX2(samples, 1)); - VkExtent2D grid_size = screen->maxSampleLocationGridSize[idx]; - - for (unsigned pixel = 0; pixel < grid_size.width * grid_size.height; pixel++) { - for (unsigned sample = 0; sample < samples; sample++) { - unsigned pixel_x = pixel % grid_size.width; - unsigned pixel_y = pixel / grid_size.width; - unsigned wi = pixel * samples + sample; - unsigned ri = (pixel_y * grid_size.width + pixel_x % grid_size.width); - ri = ri * samples + sample; - ctx->vk_sample_locations[wi].x = (ctx->sample_locations[ri] & 0xf) / 16.0f; - ctx->vk_sample_locations[wi].y = (16 - (ctx->sample_locations[ri] >> 4)) / 16.0f; - } - } + assert(!(ctx->batch.in_rp && ctx->rp_changed)); + if (!ctx->track_renderpasses && !ctx->blitting) { + if (ctx->rp_tc_info_updated) + zink_parse_tc_info(ctx); + } + if (ctx->batch.in_rp && !ctx->rp_layout_changed) + return; + bool in_rp = ctx->batch.in_rp; + if (!in_rp && ctx->void_clears) { + union pipe_color_union color; + color.f[0] = color.f[1] = color.f[2] = 0; + color.f[3] = 1.0; + ctx->base.clear(&ctx->base, ctx->void_clears, NULL, &color, 0, 0); + ctx->void_clears = 0; + } + if (!ctx->blitting) { + if (ctx->rp_tc_info_updated) + update_tc_info(ctx); + ctx->rp_tc_info_updated = false; } + bool maybe_has_query_ends = !ctx->track_renderpasses || ctx->dynamic_fb.tc_info.has_query_ends; + ctx->queries_in_rp = maybe_has_query_ends; + /* if possible, out-of-renderpass resume any queries that were stopped when previous rp ended */ + if (!ctx->queries_disabled && !maybe_has_query_ends) { + zink_resume_queries(ctx, &ctx->batch); + zink_query_update_gs_states(ctx); + } + unsigned clear_buffers; + /* use renderpass for multisample-to-singlesample or fbfetch: + * - msrtss is TODO + * - dynamic rendering doesn't have input attachments + */ + if (!zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering || + (ctx->transient_attachments && !zink_screen(ctx->base.screen)->info.have_EXT_multisampled_render_to_single_sampled) || + (ctx->fbfetch_outputs && !zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering_local_read)) + clear_buffers = zink_begin_render_pass(ctx); + else + clear_buffers = begin_rendering(ctx); + assert(!ctx->rp_changed); - if (rp) - ctx->rp_changed |= ctx->rp_clears_enabled != rp->state.clears; - if (ctx->rp_changed) - rp = get_render_pass(ctx); + /* update the render-passes HUD query */ + ctx->hud.render_passes++; - ctx->fb_changed |= rp != ctx->gfx_pipeline_state.render_pass; - if (rp->pipeline_state != ctx->gfx_pipeline_state.rp_state) { - ctx->gfx_pipeline_state.rp_state = rp->pipeline_state; - ctx->gfx_pipeline_state.dirty = true; + if (!in_rp && ctx->batch.in_rp) { + /* only hit this for valid swapchain and new renderpass */ + if (ctx->render_condition.query) + zink_start_conditional_render(ctx); + zink_clear_framebuffer(ctx, clear_buffers); + if (ctx->pipeline_changed[0]) { + for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) + batch_ref_fb_surface(ctx, ctx->fb_state.cbufs[i]); + batch_ref_fb_surface(ctx, ctx->fb_state.zsbuf); + } } + /* unable to previously determine that queries didn't split renderpasses: ensure queries start inside renderpass */ + if (!ctx->queries_disabled && maybe_has_query_ends) { + zink_resume_queries(ctx, &ctx->batch); + zink_query_update_gs_states(ctx); + } +} - ctx->rp_changed = false; +void +zink_batch_no_rp_safe(struct zink_context *ctx) +{ + if (!ctx->batch.in_rp) + return; + zink_flush_dgc_if_enabled(ctx); + if (ctx->render_condition.query) + zink_stop_conditional_render(ctx); + /* suspend all queries that were started in a renderpass + * they can then be resumed upon beginning a new renderpass + */ + if (!ctx->queries_disabled) + zink_query_renderpass_suspend(ctx); + if (ctx->gfx_pipeline_state.render_pass) + zink_end_render_pass(ctx); + else { + VKCTX(CmdEndRendering)(ctx->batch.state->cmdbuf); + ctx->batch.in_rp = false; + } + assert(!ctx->batch.in_rp); +} - if (!ctx->fb_changed) +void +zink_batch_no_rp(struct zink_context *ctx) +{ + if (!ctx->batch.in_rp) return; + if (ctx->track_renderpasses && !ctx->blitting) + tc_renderpass_info_reset(&ctx->dynamic_fb.tc_info); + zink_batch_no_rp_safe(ctx); +} - ctx->init_framebuffer(screen, ctx->framebuffer, rp); - ctx->fb_changed = false; - ctx->gfx_pipeline_state.render_pass = rp; +ALWAYS_INLINE static void +update_res_sampler_layouts(struct zink_context *ctx, struct zink_resource *res) +{ + unsigned find = res->sampler_bind_count[0]; + for (unsigned i = 0; find && i < MESA_SHADER_COMPUTE; i++) { + u_foreach_bit(slot, res->sampler_binds[i]) { + /* only set layout, skip rest of update */ + if (ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][i][slot] == res) + ctx->di.textures[i][slot].imageLayout = zink_descriptor_util_image_layout_eval(ctx, res, false); + find--; + if (!find) break; + } + } } -static VkImageView -prep_fb_attachment(struct zink_context *ctx, struct pipe_surface *psurf, unsigned i) +VkImageView +zink_prep_fb_attachment(struct zink_context *ctx, struct zink_surface *surf, unsigned i) { - if (!psurf) - return zink_csurface(ctx->dummy_surface[util_logbase2_ceil(ctx->fb_state.samples)])->image_view; - - struct zink_surface *surf = zink_csurface(psurf); - zink_batch_resource_usage_set(&ctx->batch, zink_resource(surf->base.texture), true); - zink_batch_usage_set(&surf->batch_uses, ctx->batch.state); + struct zink_resource *res; + if (!surf) { + surf = zink_get_dummy_surface(ctx, util_logbase2_ceil(ctx->fb_state.samples)); + res = zink_resource(surf->base.texture); + } else { + res = zink_resource(surf->base.texture); + zink_batch_resource_usage_set(&ctx->batch, res, true, false); + } - struct zink_resource *res = zink_resource(surf->base.texture); VkAccessFlags access; VkPipelineStageFlags pipeline; - VkImageLayout layout = zink_render_pass_attachment_get_barrier_info(ctx->gfx_pipeline_state.render_pass, - i, &pipeline, &access); - zink_resource_image_barrier(ctx, res, layout, access, pipeline); + if (zink_is_swapchain(res)) { + if (!zink_kopper_acquire(ctx, res, UINT64_MAX)) + return VK_NULL_HANDLE; + zink_surface_swapchain_update(ctx, surf); + if (!i) + zink_update_fbfetch(ctx); + } + if (ctx->blitting) + return surf->image_view; + VkImageLayout layout; + /* depth attachment is stored as the last attachment, but bitfields always use PIPE_MAX_COLOR_BUFS */ + int idx = i == ctx->fb_state.nr_cbufs ? PIPE_MAX_COLOR_BUFS : i; + if (ctx->feedback_loops & BITFIELD_BIT(idx)) { + /* reevaluate feedback loop in case layout change eliminates the loop */ + if (!res->sampler_bind_count[0] || (idx == PIPE_MAX_COLOR_BUFS && !zink_is_zsbuf_write(ctx))) + update_feedback_loop_state(ctx, i, ctx->feedback_loops & ~BITFIELD_BIT(idx)); + } + if (ctx->track_renderpasses) { + layout = zink_tc_renderpass_info_parse(ctx, &ctx->dynamic_fb.tc_info, idx, &pipeline, &access); + assert(i < ctx->fb_state.nr_cbufs || layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL || !zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS)); + if (i == ctx->fb_state.nr_cbufs && zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS)) + assert(ctx->dynamic_fb.tc_info.zsbuf_clear || ctx->dynamic_fb.tc_info.zsbuf_clear_partial || ctx->dynamic_fb.tc_info.zsbuf_load); + } else { + if (ctx->gfx_pipeline_state.render_pass) { + layout = zink_render_pass_attachment_get_barrier_info(&ctx->gfx_pipeline_state.render_pass->state.rts[i], + i < ctx->fb_state.nr_cbufs, &pipeline, &access); + } else { + struct zink_rt_attrib rt; + if (i < ctx->fb_state.nr_cbufs) + zink_init_color_attachment(ctx, i, &rt); + else + zink_init_zs_attachment(ctx, &rt); + layout = zink_render_pass_attachment_get_barrier_info(&rt, i < ctx->fb_state.nr_cbufs, &pipeline, &access); + /* avoid unnecessary read-only layout change */ + if (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL && + res->layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && + !res->bind_count[0]) + layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + } + } + /* + The image subresources for a storage image must be in the VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR or + VK_IMAGE_LAYOUT_GENERAL layout in order to access its data in a shader. + - 14.1.1. Storage Image + */ + if (res->image_bind_count[0]) + layout = VK_IMAGE_LAYOUT_GENERAL; + else if (!zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_layout && + layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) + layout = VK_IMAGE_LAYOUT_GENERAL; + if (res->valid || res->layout != layout) + zink_screen(ctx->base.screen)->image_barrier(ctx, res, layout, access, pipeline); + if (!(res->aspect & VK_IMAGE_ASPECT_COLOR_BIT)) + ctx->zsbuf_readonly = res->layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + res->obj->unordered_read = res->obj->unordered_write = false; + if (i == ctx->fb_state.nr_cbufs && res->sampler_bind_count[0]) + update_res_sampler_layouts(ctx, res); return surf->image_view; } -static unsigned -begin_render_pass(struct zink_context *ctx) -{ - struct zink_batch *batch = &ctx->batch; - struct pipe_framebuffer_state *fb_state = &ctx->fb_state; +static uint32_t +hash_rendering_state(const void *key) +{ + const VkPipelineRenderingCreateInfo *info = key; + uint32_t hash = 0; + /* + uint32_t colorAttachmentCount; + const VkFormat* pColorAttachmentFormats; + VkFormat depthAttachmentFormat; + VkFormat stencilAttachmentFormat; + * this data is not optimally arranged, so it must be manually hashed + */ + hash = XXH32(&info->colorAttachmentCount, sizeof(uint32_t), hash); + hash = XXH32(&info->depthAttachmentFormat, sizeof(uint32_t), hash); + hash = XXH32(&info->stencilAttachmentFormat, sizeof(VkFormat), hash); + return XXH32(info->pColorAttachmentFormats, sizeof(VkFormat) * info->colorAttachmentCount, hash); +} - VkRenderPassBeginInfo rpbi = {0}; - rpbi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - rpbi.renderPass = ctx->gfx_pipeline_state.render_pass->render_pass; - rpbi.renderArea.offset.x = 0; - rpbi.renderArea.offset.y = 0; - rpbi.renderArea.extent.width = fb_state->width; - rpbi.renderArea.extent.height = fb_state->height; +static bool +equals_rendering_state(const void *a, const void *b) +{ + const VkPipelineRenderingCreateInfo *ai = a; + const VkPipelineRenderingCreateInfo *bi = b; + return ai->colorAttachmentCount == bi->colorAttachmentCount && + ai->depthAttachmentFormat == bi->depthAttachmentFormat && + ai->stencilAttachmentFormat == bi->stencilAttachmentFormat && + !memcmp(ai->pColorAttachmentFormats, bi->pColorAttachmentFormats, sizeof(VkFormat) * ai->colorAttachmentCount); +} - VkClearValue clears[PIPE_MAX_COLOR_BUFS + 1] = {0}; - unsigned clear_buffers = 0; - uint32_t clear_validate = 0; - for (int i = 0; i < fb_state->nr_cbufs; i++) { - /* these are no-ops */ - if (!fb_state->cbufs[i] || !zink_fb_clear_enabled(ctx, i)) - continue; - /* these need actual clear calls inside the rp */ - struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(&ctx->fb_clears[i], 0); - if (zink_fb_clear_needs_explicit(&ctx->fb_clears[i])) { - clear_buffers |= (PIPE_CLEAR_COLOR0 << i); - if (zink_fb_clear_count(&ctx->fb_clears[i]) < 2 || - zink_fb_clear_element_needs_explicit(clear)) - continue; - } - /* we now know there's one clear that can be done here */ - zink_fb_clear_util_unpack_clear_color(clear, fb_state->cbufs[i]->format, (void*)&clears[i].color); - rpbi.clearValueCount = i + 1; - clear_validate |= PIPE_CLEAR_COLOR0 << i; - assert(ctx->framebuffer->rp->state.clears); - } - if (fb_state->zsbuf && zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS)) { - struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS]; - struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0); - if (!zink_fb_clear_element_needs_explicit(clear)) { - clears[fb_state->nr_cbufs].depthStencil.depth = clear->zs.depth; - clears[fb_state->nr_cbufs].depthStencil.stencil = clear->zs.stencil; - rpbi.clearValueCount = fb_state->nr_cbufs + 1; - clear_validate |= clear->zs.bits; - assert(ctx->framebuffer->rp->state.clears); - } - if (zink_fb_clear_needs_explicit(fb_clear)) { - for (int j = !zink_fb_clear_element_needs_explicit(clear); - (clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL && j < zink_fb_clear_count(fb_clear); - j++) - clear_buffers |= zink_fb_clear_element(fb_clear, j)->zs.bits; - } - } - assert(clear_validate == ctx->framebuffer->rp->state.clears); - rpbi.pClearValues = &clears[0]; - rpbi.framebuffer = ctx->framebuffer->fb; - - assert(ctx->gfx_pipeline_state.render_pass && ctx->framebuffer); - - VkRenderPassAttachmentBeginInfo infos; - VkImageView att[PIPE_MAX_COLOR_BUFS + 1]; - infos.sType = VK_STRUCTURE_TYPE_RENDER_PASS_ATTACHMENT_BEGIN_INFO; - infos.pNext = NULL; - infos.attachmentCount = ctx->framebuffer->state.num_attachments; - infos.pAttachments = att; - for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) - att[i] = prep_fb_attachment(ctx, ctx->fb_state.cbufs[i], i); - att[ctx->fb_state.nr_cbufs] = prep_fb_attachment(ctx, ctx->fb_state.zsbuf, ctx->fb_state.nr_cbufs); - if (zink_screen(ctx->base.screen)->info.have_KHR_imageless_framebuffer) { -#ifndef NDEBUG - for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { - assert(!ctx->fb_state.cbufs[i] || zink_resource(ctx->fb_state.cbufs[i]->texture)->obj->vkusage == ctx->framebuffer->state.infos[i].usage); - assert(!ctx->fb_state.cbufs[i] || zink_resource(ctx->fb_state.cbufs[i]->texture)->obj->vkflags == ctx->framebuffer->state.infos[i].flags); - } - assert(!ctx->fb_state.zsbuf || zink_resource(ctx->fb_state.zsbuf->texture)->obj->vkusage == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].usage); - assert(!ctx->fb_state.zsbuf || zink_resource(ctx->fb_state.zsbuf->texture)->obj->vkflags == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].flags); -#endif - rpbi.pNext = &infos; - } +static uint32_t +hash_framebuffer_imageless(const void *key) +{ + struct zink_framebuffer_state* s = (struct zink_framebuffer_state*)key; + return _mesa_hash_data(key, offsetof(struct zink_framebuffer_state, infos) + sizeof(s->infos[0]) * s->num_attachments); +} - VKCTX(CmdBeginRenderPass)(batch->state->cmdbuf, &rpbi, VK_SUBPASS_CONTENTS_INLINE); - batch->in_rp = true; - ctx->new_swapchain = false; - return clear_buffers; +static bool +equals_framebuffer_imageless(const void *a, const void *b) +{ + struct zink_framebuffer_state *s = (struct zink_framebuffer_state*)a; + return memcmp(a, b, offsetof(struct zink_framebuffer_state, infos) + sizeof(s->infos[0]) * s->num_attachments) == 0; } void @@ -1832,38 +3239,15 @@ zink_evaluate_depth_buffer(struct pipe_context *pctx) zink_batch_no_rp(ctx); } -void -zink_begin_render_pass(struct zink_context *ctx) -{ - setup_framebuffer(ctx); - assert(ctx->gfx_pipeline_state.render_pass); - unsigned clear_buffers = begin_render_pass(ctx); - - if (ctx->render_condition.query) - zink_start_conditional_render(ctx); - zink_clear_framebuffer(ctx, clear_buffers); -} - -void -zink_end_render_pass(struct zink_context *ctx) -{ - if (ctx->batch.in_rp) { - if (ctx->render_condition.query) - zink_stop_conditional_render(ctx); - VKCTX(CmdEndRenderPass)(ctx->batch.state->cmdbuf); - } - ctx->batch.in_rp = false; -} - static void sync_flush(struct zink_context *ctx, struct zink_batch_state *bs) { - if (zink_screen(ctx->base.screen)->threaded) + if (zink_screen(ctx->base.screen)->threaded_submit) util_queue_fence_wait(&bs->flush_completed); } static inline VkAccessFlags -get_access_flags_for_binding(struct zink_context *ctx, enum zink_descriptor_type type, enum pipe_shader_type stage, unsigned idx) +get_access_flags_for_binding(struct zink_context *ctx, enum zink_descriptor_type type, gl_shader_stage stage, unsigned idx) { VkAccessFlags flags = 0; switch (type) { @@ -1893,7 +3277,7 @@ get_access_flags_for_binding(struct zink_context *ctx, enum zink_descriptor_type } static void -update_resource_refs_for_stage(struct zink_context *ctx, enum pipe_shader_type stage) +update_resource_refs_for_stage(struct zink_context *ctx, gl_shader_stage stage) { struct zink_batch *batch = &ctx->batch; unsigned max_slot[] = { @@ -1902,32 +3286,25 @@ update_resource_refs_for_stage(struct zink_context *ctx, enum pipe_shader_type s [ZINK_DESCRIPTOR_TYPE_SSBO] = ctx->di.num_ssbos[stage], [ZINK_DESCRIPTOR_TYPE_IMAGE] = ctx->di.num_images[stage] }; - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) { + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) { for (unsigned j = 0; j < max_slot[i]; j++) { if (ctx->di.descriptor_res[i][stage][j]) { struct zink_resource *res = ctx->di.descriptor_res[i][stage][j]; if (!res) continue; + bool is_buffer = res->obj->is_buffer; bool is_write = zink_resource_access_is_write(get_access_flags_for_binding(ctx, i, stage, j)); - zink_batch_resource_usage_set(batch, res, is_write); - - struct zink_sampler_view *sv = zink_sampler_view(ctx->sampler_views[stage][j]); - struct zink_sampler_state *sampler_state = ctx->sampler_states[stage][j]; - struct zink_image_view *iv = &ctx->image_views[stage][j]; - if (sampler_state && i == ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW && j <= ctx->di.num_samplers[stage]) - zink_batch_usage_set(&sampler_state->batch_uses, ctx->batch.state); - if (sv && i == ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW && j <= ctx->di.num_sampler_views[stage]) { - if (res->obj->is_buffer) - zink_batch_usage_set(&sv->buffer_view->batch_uses, ctx->batch.state); - else - zink_batch_usage_set(&sv->image_view->batch_uses, ctx->batch.state); - zink_batch_reference_sampler_view(batch, sv); - } else if (i == ZINK_DESCRIPTOR_TYPE_IMAGE && j <= ctx->di.num_images[stage]) { - if (res->obj->is_buffer) - zink_batch_usage_set(&iv->buffer_view->batch_uses, ctx->batch.state); + if (zink_is_swapchain(res)) { + if (!zink_kopper_acquire(ctx, res, UINT64_MAX)) + /* technically this is a failure condition, but there's no safe way out */ + continue; + } + zink_batch_resource_usage_set(batch, res, is_write, is_buffer); + if (!ctx->unordered_blitting) { + if (is_write || !res->obj->is_buffer) + res->obj->unordered_read = res->obj->unordered_write = false; else - zink_batch_usage_set(&iv->surface->batch_uses, ctx->batch.state); - zink_batch_reference_image_view(batch, iv); + res->obj->unordered_read = false; } } } @@ -1939,39 +3316,106 @@ zink_update_descriptor_refs(struct zink_context *ctx, bool compute) { struct zink_batch *batch = &ctx->batch; if (compute) { - update_resource_refs_for_stage(ctx, PIPE_SHADER_COMPUTE); + update_resource_refs_for_stage(ctx, MESA_SHADER_COMPUTE); if (ctx->curr_compute) zink_batch_reference_program(batch, &ctx->curr_compute->base); } else { - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) update_resource_refs_for_stage(ctx, i); unsigned vertex_buffers_enabled_mask = ctx->gfx_pipeline_state.vertex_buffers_enabled_mask; unsigned last_vbo = util_last_bit(vertex_buffers_enabled_mask); for (unsigned i = 0; i < last_vbo + 1; i++) { - if (ctx->vertex_buffers[i].buffer.resource) - zink_batch_resource_usage_set(batch, zink_resource(ctx->vertex_buffers[i].buffer.resource), false); + struct zink_resource *res = zink_resource(ctx->vertex_buffers[i].buffer.resource); + if (res) { + zink_batch_resource_usage_set(batch, res, false, true); + if (!ctx->unordered_blitting) + res->obj->unordered_read = false; + } } if (ctx->curr_program) zink_batch_reference_program(batch, &ctx->curr_program->base); } + if (ctx->di.bindless_refs_dirty) { + ctx->di.bindless_refs_dirty = false; + for (unsigned i = 0; i < 2; i++) { + util_dynarray_foreach(&ctx->di.bindless[i].resident, struct zink_bindless_descriptor*, bd) { + struct zink_resource *res = zink_descriptor_surface_resource(&(*bd)->ds); + zink_batch_resource_usage_set(&ctx->batch, res, (*bd)->access & PIPE_IMAGE_ACCESS_WRITE, res->obj->is_buffer); + if (!ctx->unordered_blitting) { + if ((*bd)->access & PIPE_IMAGE_ACCESS_WRITE || !res->obj->is_buffer) + res->obj->unordered_read = res->obj->unordered_write = false; + else + res->obj->unordered_read = false; + } + } + } + } + + unsigned global_count = util_dynarray_num_elements(&ctx->di.global_bindings, struct zink_resource*); + struct zink_resource **globals = ctx->di.global_bindings.data; + for (unsigned i = 0; i < global_count; i++) { + struct zink_resource *res = globals[i]; + if (!res) + continue; + zink_batch_resource_usage_set(batch, res, true, true); + res->obj->unordered_read = res->obj->unordered_write = false; + } +} + +static void +reapply_color_write(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + assert(screen->info.have_EXT_color_write_enable); + const VkBool32 enables[PIPE_MAX_COLOR_BUFS] = {1, 1, 1, 1, 1, 1, 1, 1}; + const VkBool32 disables[PIPE_MAX_COLOR_BUFS] = {0}; + const unsigned max_att = MIN2(PIPE_MAX_COLOR_BUFS, screen->info.props.limits.maxColorAttachments); + VKCTX(CmdSetColorWriteEnableEXT)(ctx->batch.state->cmdbuf, max_att, ctx->disable_color_writes ? disables : enables); + VKCTX(CmdSetColorWriteEnableEXT)(ctx->batch.state->reordered_cmdbuf, max_att, enables); + assert(screen->info.have_EXT_extended_dynamic_state); + if (ctx->dsa_state) + VKCTX(CmdSetDepthWriteEnable)(ctx->batch.state->cmdbuf, ctx->disable_color_writes ? VK_FALSE : ctx->dsa_state->hw_state.depth_write); } static void stall(struct zink_context *ctx) { - sync_flush(ctx, zink_batch_state(ctx->last_fence)); - zink_vkfence_wait(zink_screen(ctx->base.screen), ctx->last_fence, PIPE_TIMEOUT_INFINITE); + struct zink_screen *screen = zink_screen(ctx->base.screen); + sync_flush(ctx, ctx->last_batch_state); + zink_screen_timeline_wait(screen, ctx->last_batch_state->fence.batch_id, OS_TIMEOUT_INFINITE); zink_batch_reset_all(ctx); } +void +zink_reset_ds3_states(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (!screen->info.have_EXT_extended_dynamic_state3) + return; + if (screen->have_full_ds3) + ctx->ds3_states = UINT32_MAX; + else + ctx->ds3_states = BITFIELD_MASK(ZINK_DS3_BLEND_A2C); + if (!screen->info.dynamic_state3_feats.extendedDynamicState3AlphaToOneEnable) + ctx->ds3_states &= ~BITFIELD_BIT(ZINK_DS3_BLEND_A21); + if (!screen->info.dynamic_state3_feats.extendedDynamicState3LineStippleEnable) + ctx->ds3_states &= ~BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE_ON); + if (screen->driver_workarounds.no_linestipple) + ctx->ds3_states &= ~BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE); +} + static void flush_batch(struct zink_context *ctx, bool sync) { struct zink_batch *batch = &ctx->batch; + assert(!ctx->unordered_blitting); if (ctx->clears_enabled) /* start rp to do all the clears */ - zink_begin_render_pass(ctx); - zink_end_render_pass(ctx); + zink_batch_rp(ctx); + zink_batch_no_rp_safe(ctx); + + util_queue_fence_wait(&ctx->unsync_fence); + util_queue_fence_reset(&ctx->flush_fence); zink_end_batch(ctx, batch); ctx->deferred_fence = NULL; @@ -1981,8 +3425,9 @@ flush_batch(struct zink_context *ctx, bool sync) if (ctx->batch.state->is_device_lost) { check_device_lost(ctx); } else { + struct zink_screen *screen = zink_screen(ctx->base.screen); zink_start_batch(ctx, batch); - if (zink_screen(ctx->base.screen)->info.have_EXT_transform_feedback && ctx->num_so_targets) + if (screen->info.have_EXT_transform_feedback && ctx->num_so_targets) ctx->dirty_so_targets = true; ctx->pipeline_changed[0] = ctx->pipeline_changed[1] = true; zink_select_draw_vbo(ctx); @@ -1990,9 +3435,25 @@ flush_batch(struct zink_context *ctx, bool sync) if (ctx->oom_stall) stall(ctx); + zink_reset_ds3_states(ctx); + ctx->oom_flush = false; ctx->oom_stall = false; + ctx->dd.bindless_bound = false; + ctx->di.bindless_refs_dirty = true; + ctx->sample_locations_changed = ctx->gfx_pipeline_state.sample_locations_enabled; + if (zink_screen(ctx->base.screen)->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) { + VKCTX(CmdSetPatchControlPointsEXT)(ctx->batch.state->cmdbuf, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch); + VKCTX(CmdSetPatchControlPointsEXT)(ctx->batch.state->reordered_cmdbuf, 1); + } + update_feedback_loop_dynamic_state(ctx); + if (screen->info.have_EXT_color_write_enable) + reapply_color_write(ctx); + update_layered_rendering_state(ctx); + tc_renderpass_info_reset(&ctx->dynamic_fb.tc_info); + ctx->rp_tc_info_updated = true; } + util_queue_fence_signal(&ctx->flush_fence); } void @@ -2016,30 +3477,124 @@ static bool rebind_fb_state(struct zink_context *ctx, struct zink_resource *match_res, bool from_set_fb) { bool rebind = false; - for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { + for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) rebind |= rebind_fb_surface(ctx, &ctx->fb_state.cbufs[i], match_res); - if (from_set_fb && ctx->fb_state.cbufs[i] && ctx->fb_state.cbufs[i]->texture->bind & PIPE_BIND_SCANOUT) - ctx->new_swapchain = true; - } rebind |= rebind_fb_surface(ctx, &ctx->fb_state.zsbuf, match_res); return rebind; } static void -unbind_fb_surface(struct zink_context *ctx, struct pipe_surface *surf, bool changed) +unbind_fb_surface(struct zink_context *ctx, struct pipe_surface *surf, unsigned idx, bool changed) { + ctx->dynamic_fb.attachments[idx].imageView = VK_NULL_HANDLE; if (!surf) return; + struct zink_resource *res = zink_resource(surf->texture); if (changed) { - zink_fb_clears_apply(ctx, surf->texture); - if (zink_batch_usage_exists(zink_csurface(surf)->batch_uses)) - zink_batch_reference_surface(&ctx->batch, zink_csurface(surf)); ctx->rp_changed = true; } - struct zink_resource *res = zink_resource(surf->texture); - res->fb_binds--; - if (!res->fb_binds) - check_resource_for_batch_ref(ctx, res); + res->fb_bind_count--; + if (!res->fb_bind_count && !res->bind_count[0]) + _mesa_set_remove_key(ctx->need_barriers[0], res); + unsigned feedback_loops = ctx->feedback_loops; + if (ctx->feedback_loops & BITFIELD_BIT(idx)) { + ctx->dynamic_fb.attachments[idx].imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + ctx->rp_layout_changed = true; + } + ctx->feedback_loops &= ~BITFIELD_BIT(idx); + if (feedback_loops != ctx->feedback_loops) { + if (idx == PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop_zs) { + if (ctx->gfx_pipeline_state.feedback_loop_zs) + ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.feedback_loop_zs = false; + } else if (idx < PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop) { + if (ctx->gfx_pipeline_state.feedback_loop) + ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.feedback_loop = false; + } + } + res->fb_binds &= ~BITFIELD_BIT(idx); + /* this is called just before the resource loses a reference, so a refcount==1 means the resource will be destroyed */ + if (!res->fb_bind_count && res->base.b.reference.count > 1) { + if (ctx->track_renderpasses && !ctx->blitting) { + if (!(res->base.b.bind & PIPE_BIND_DISPLAY_TARGET) && util_format_is_depth_or_stencil(surf->format)) + /* assume that all depth buffers which are not swapchain images will be used for sampling to avoid splitting renderpasses */ + zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + if (!zink_is_swapchain(res) && !util_format_is_depth_or_stencil(surf->format)) + /* assume that all color buffers which are not swapchain images will be used for sampling to avoid splitting renderpasses */ + zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + } + if (res->sampler_bind_count[0]) { + update_res_sampler_layouts(ctx, res); + if (res->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL && !ctx->blitting) + _mesa_set_add(ctx->need_barriers[0], res); + } + } +} + +void +zink_set_null_fs(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + bool prev_disable_fs = ctx->disable_fs; + ctx->disable_fs = ctx->rast_state && ctx->rast_state->base.rasterizer_discard && + (ctx->primitives_generated_active || (!ctx->queries_disabled && ctx->primitives_generated_suspended)); + struct zink_shader *zs = ctx->gfx_stages[MESA_SHADER_FRAGMENT]; + unsigned compact = screen->compact_descriptors ? ZINK_DESCRIPTOR_COMPACT : 0; + /* can't use CWE if side effects */ + bool no_cwe = (zs && (zs->ssbos_used || zs->bindless || zs->num_bindings[ZINK_DESCRIPTOR_TYPE_IMAGE - compact])) || + ctx->fs_query_active || ctx->occlusion_query_active || !screen->info.have_EXT_color_write_enable; + bool prev_disable_color_writes = ctx->disable_color_writes; + ctx->disable_color_writes = ctx->disable_fs && !no_cwe; + + if (ctx->disable_fs == prev_disable_fs) { + /* if this is a true no-op then return */ + if (!ctx->disable_fs || ctx->disable_color_writes == !no_cwe) + return; + /* else changing disable modes */ + } + + /* either of these cases requires removing the previous mode */ + if (!ctx->disable_fs || (prev_disable_fs && prev_disable_color_writes != !no_cwe)) { + if (prev_disable_color_writes) + reapply_color_write(ctx); + else + ctx->base.bind_fs_state(&ctx->base, ctx->saved_fs); + ctx->saved_fs = NULL; + /* fs/CWE reenabled, fs active, done */ + if (!ctx->disable_fs) + return; + } + + /* always use CWE when possible */ + if (!no_cwe) { + reapply_color_write(ctx); + return; + } + /* otherwise need to bind a null fs */ + if (!ctx->null_fs) { + nir_shader *nir = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, &screen->nir_options, "null_fs").shader; + nir->info.separate_shader = true; + ctx->null_fs = pipe_shader_from_nir(&ctx->base, nir); + } + ctx->saved_fs = ctx->gfx_stages[MESA_SHADER_FRAGMENT]; + ctx->base.bind_fs_state(&ctx->base, ctx->null_fs); +} + +static void +check_framebuffer_surface_mutable(struct pipe_context *pctx, struct pipe_surface *psurf) +{ + struct zink_context *ctx = zink_context(pctx); + struct zink_ctx_surface *csurf = (struct zink_ctx_surface *)psurf; + if (!csurf->needs_mutable) + return; + zink_resource_object_init_mutable(ctx, zink_resource(psurf->texture)); + struct pipe_surface *psurf2 = pctx->create_surface(pctx, psurf->texture, psurf); + pipe_resource_reference(&psurf2->texture, NULL); + struct zink_ctx_surface *csurf2 = (struct zink_ctx_surface *)psurf2; + zink_surface_reference(zink_screen(pctx->screen), &csurf->surf, csurf2->surf); + pctx->surface_destroy(pctx, psurf2); + csurf->needs_mutable = false; } static void @@ -2047,99 +3602,188 @@ zink_set_framebuffer_state(struct pipe_context *pctx, const struct pipe_framebuffer_state *state) { struct zink_context *ctx = zink_context(pctx); + struct zink_screen *screen = zink_screen(pctx->screen); + unsigned samples = state->nr_cbufs || state->zsbuf ? 0 : state->samples; + unsigned w = ctx->fb_state.width; + unsigned h = ctx->fb_state.height; + unsigned layers = MAX2(zink_framebuffer_get_num_layers(state), 1); + bool flush_clears = ctx->clears_enabled && + (ctx->dynamic_fb.info.layerCount != layers || + state->width != w || state->height != h); + if (ctx->clears_enabled && !flush_clears) { + for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { + if (i >= state->nr_cbufs || !ctx->fb_state.cbufs[i] || !state->cbufs[i]) + flush_clears |= zink_fb_clear_enabled(ctx, i); + else if (zink_fb_clear_enabled(ctx, i) && ctx->fb_state.cbufs[i] != state->cbufs[i]) { + struct zink_surface *a = zink_csurface(ctx->fb_state.cbufs[i]); + struct zink_surface *b = zink_csurface(state->cbufs[i]); + if (a == b) + continue; + if (!a || !b || memcmp(&a->base.u.tex, &b->base.u.tex, sizeof(b->base.u.tex)) || + a->base.texture != b->base.texture) + flush_clears = true; + else if (a->base.format != b->base.format) + zink_fb_clear_rewrite(ctx, i, a->base.format, b->base.format); + } + } + } + if (ctx->fb_state.zsbuf != state->zsbuf) + flush_clears |= zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS); + if (flush_clears) { + bool queries_disabled = ctx->queries_disabled; + ctx->queries_disabled = true; + zink_batch_rp(ctx); + ctx->queries_disabled = queries_disabled; + } + /* need to ensure we start a new rp on next draw */ + zink_batch_no_rp_safe(ctx); for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { - struct pipe_surface *surf = ctx->fb_state.cbufs[i]; - unbind_fb_surface(ctx, surf, i >= state->nr_cbufs || surf != state->cbufs[i]); + struct pipe_surface *psurf = ctx->fb_state.cbufs[i]; + if (i < state->nr_cbufs) + ctx->rp_changed |= !!zink_transient_surface(psurf) != !!zink_transient_surface(state->cbufs[i]); + unbind_fb_surface(ctx, psurf, i, i >= state->nr_cbufs || psurf != state->cbufs[i]); + if (psurf && ctx->needs_present == zink_resource(psurf->texture)) + ctx->needs_present = NULL; } if (ctx->fb_state.zsbuf) { - struct pipe_surface *surf = ctx->fb_state.zsbuf; - struct zink_resource *res = zink_resource(surf->texture); - bool changed = surf != state->zsbuf; - unbind_fb_surface(ctx, surf, changed); + struct pipe_surface *psurf = ctx->fb_state.zsbuf; + struct zink_resource *res = zink_resource(psurf->texture); + bool changed = psurf != state->zsbuf; + unbind_fb_surface(ctx, psurf, PIPE_MAX_COLOR_BUFS, changed); + if (!changed) + ctx->rp_changed |= !!zink_transient_surface(psurf) != !!zink_transient_surface(state->zsbuf); if (changed && unlikely(res->obj->needs_zs_evaluate)) /* have to flush zs eval while the sample location data still exists, * so just throw some random barrier */ - zink_resource_image_barrier(ctx, res, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); } /* renderpass changes if the number or types of attachments change */ ctx->rp_changed |= ctx->fb_state.nr_cbufs != state->nr_cbufs; ctx->rp_changed |= !!ctx->fb_state.zsbuf != !!state->zsbuf; - - unsigned w = ctx->fb_state.width; - unsigned h = ctx->fb_state.height; + if (ctx->fb_state.nr_cbufs != state->nr_cbufs) { + ctx->blend_state_changed |= screen->have_full_ds3; + if (state->nr_cbufs && screen->have_full_ds3) + ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_ON) | BITFIELD_BIT(ZINK_DS3_BLEND_WRITE) | BITFIELD_BIT(ZINK_DS3_BLEND_EQ); + } util_copy_framebuffer_state(&ctx->fb_state, state); - zink_update_fbfetch(ctx); - unsigned prev_void_alpha_attachments = ctx->gfx_pipeline_state.void_alpha_attachments; - ctx->gfx_pipeline_state.void_alpha_attachments = 0; + ctx->rp_changed |= zink_update_fbfetch(ctx); + ctx->transient_attachments = 0; + ctx->fb_layer_mismatch = 0; + + ctx->dynamic_fb.info.renderArea.offset.x = 0; + ctx->dynamic_fb.info.renderArea.offset.y = 0; + ctx->dynamic_fb.info.renderArea.extent.width = state->width; + ctx->dynamic_fb.info.renderArea.extent.height = state->height; + ctx->dynamic_fb.info.colorAttachmentCount = ctx->fb_state.nr_cbufs; + ctx->rp_changed |= ctx->dynamic_fb.info.layerCount != layers; + ctx->dynamic_fb.info.layerCount = layers; + ctx->gfx_pipeline_state.rendering_info.colorAttachmentCount = ctx->fb_state.nr_cbufs; + + ctx->void_clears = 0; for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { - struct pipe_surface *surf = ctx->fb_state.cbufs[i]; - if (surf) { - zink_resource(surf->texture)->fb_binds++; - ctx->gfx_pipeline_state.void_alpha_attachments |= util_format_has_alpha1(surf->format) ? BITFIELD_BIT(i) : 0; + struct pipe_surface *psurf = ctx->fb_state.cbufs[i]; + if (psurf) { + struct zink_surface *transient = zink_transient_surface(psurf); + if (transient || psurf->nr_samples) + ctx->transient_attachments |= BITFIELD_BIT(i); + if (!samples) + samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, psurf->nr_samples ? psurf->nr_samples : 1); + struct zink_resource *res = zink_resource(psurf->texture); + check_framebuffer_surface_mutable(pctx, psurf); + if (zink_csurface(psurf)->info.layerCount > layers) + ctx->fb_layer_mismatch |= BITFIELD_BIT(i); + if (res->modifiers) { + assert(!ctx->needs_present || ctx->needs_present == res); + ctx->needs_present = res; + } + if (res->obj->dt) { + /* #6274 */ + if (!zink_screen(ctx->base.screen)->info.have_KHR_swapchain_mutable_format && + psurf->format != res->base.b.format) { + static bool warned = false; + if (!warned) { + mesa_loge("zink: SRGB framebuffer unsupported without KHR_swapchain_mutable_format"); + warned = true; + } + } + } + res->fb_bind_count++; + res->fb_binds |= BITFIELD_BIT(i); + batch_ref_fb_surface(ctx, ctx->fb_state.cbufs[i]); + if (util_format_has_alpha1(psurf->format)) { + if (!res->valid && !zink_fb_clear_full_exists(ctx, i)) + ctx->void_clears |= (PIPE_CLEAR_COLOR0 << i); + } } } - if (ctx->gfx_pipeline_state.void_alpha_attachments != prev_void_alpha_attachments) - ctx->gfx_pipeline_state.dirty = true; + unsigned depth_bias_scale_factor = ctx->depth_bias_scale_factor; if (ctx->fb_state.zsbuf) { - struct pipe_surface *surf = ctx->fb_state.zsbuf; - zink_resource(surf->texture)->fb_binds++; + struct pipe_surface *psurf = ctx->fb_state.zsbuf; + struct zink_surface *transient = zink_transient_surface(psurf); + check_framebuffer_surface_mutable(pctx, psurf); + batch_ref_fb_surface(ctx, ctx->fb_state.zsbuf); + if (transient || psurf->nr_samples) + ctx->transient_attachments |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS); + if (!samples) + samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, psurf->nr_samples ? psurf->nr_samples : 1); + if (zink_csurface(psurf)->info.layerCount > layers) + ctx->fb_layer_mismatch |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS); + zink_resource(psurf->texture)->fb_bind_count++; + zink_resource(psurf->texture)->fb_binds |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS); + switch (psurf->format) { + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z16_UNORM_S8_UINT: + ctx->depth_bias_scale_factor = zink_screen(ctx->base.screen)->driver_workarounds.z16_unscaled_bias; + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_X24S8_UINT: + case PIPE_FORMAT_X8Z24_UNORM: + ctx->depth_bias_scale_factor = zink_screen(ctx->base.screen)->driver_workarounds.z24_unscaled_bias; + break; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_Z32_UNORM: + ctx->depth_bias_scale_factor = 1<<23; + break; + default: + ctx->depth_bias_scale_factor = 0; + } + } else { + ctx->depth_bias_scale_factor = 0; } + if (depth_bias_scale_factor != ctx->depth_bias_scale_factor && + ctx->rast_state && ctx->rast_state->base.offset_units_unscaled) + ctx->rast_state_changed = true; + rebind_fb_state(ctx, NULL, true); + ctx->fb_state.samples = MAX2(samples, 1); + zink_update_framebuffer_state(ctx); if (ctx->fb_state.width != w || ctx->fb_state.height != h) ctx->scissor_changed = true; - rebind_fb_state(ctx, NULL, true); - ctx->fb_state.samples = util_framebuffer_get_num_samples(state); - /* get_framebuffer adds a ref if the fb is reused or created; - * always do get_framebuffer first to avoid deleting the same fb - * we're about to use - */ - struct zink_framebuffer *fb = ctx->get_framebuffer(ctx); - struct zink_screen *screen = zink_screen(ctx->base.screen); - if (ctx->framebuffer && !screen->info.have_KHR_imageless_framebuffer) { - simple_mtx_lock(&screen->framebuffer_mtx); - struct hash_entry *he = _mesa_hash_table_search(&screen->framebuffer_cache, &ctx->framebuffer->state); - if (ctx->framebuffer && !ctx->framebuffer->state.num_attachments) { - /* if this has no attachments then its lifetime has ended */ - _mesa_hash_table_remove(&screen->framebuffer_cache, he); - he = NULL; - /* ensure an unflushed fb doesn't get destroyed by deferring it */ - util_dynarray_append(&ctx->batch.state->dead_framebuffers, struct zink_framebuffer*, ctx->framebuffer); - ctx->framebuffer = NULL; - } - /* a framebuffer loses 1 ref every time we unset it; - * we do NOT add refs here, as the ref has already been added in - * get_framebuffer() - */ - if (zink_framebuffer_reference(screen, &ctx->framebuffer, NULL) && he) - _mesa_hash_table_remove(&screen->framebuffer_cache, he); - simple_mtx_unlock(&screen->framebuffer_mtx); - } - ctx->fb_changed |= ctx->framebuffer != fb; - ctx->framebuffer = fb; uint8_t rast_samples = ctx->fb_state.samples - 1; - /* update the shader key if applicable: - * if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count: - * in GL, rast_samples==1 means ignore gl_SampleMask[] - * in VK, gl_SampleMask[] is never ignored - */ - if (rast_samples != ctx->gfx_pipeline_state.rast_samples && - (!ctx->gfx_stages[PIPE_SHADER_FRAGMENT] || - ctx->gfx_stages[PIPE_SHADER_FRAGMENT]->nir->info.outputs_written & (1 << FRAG_RESULT_SAMPLE_MASK))) - zink_set_fs_key(ctx)->samples = ctx->fb_state.samples > 0; + if (rast_samples != ctx->gfx_pipeline_state.rast_samples) + zink_update_fs_key_samples(ctx); if (ctx->gfx_pipeline_state.rast_samples != rast_samples) { ctx->sample_locations_changed |= ctx->gfx_pipeline_state.sample_locations_enabled; - ctx->gfx_pipeline_state.dirty = true; + zink_flush_dgc_if_enabled(ctx); + if (screen->have_full_ds3) + ctx->sample_mask_changed = true; + else + ctx->gfx_pipeline_state.dirty = true; } ctx->gfx_pipeline_state.rast_samples = rast_samples; - /* need to ensure we start a new rp on next draw */ - zink_batch_no_rp(ctx); /* this is an ideal time to oom flush since it won't split a renderpass */ - if (ctx->oom_flush) + if (ctx->oom_flush && !ctx->unordered_blitting) flush_batch(ctx, false); + else + update_layered_rendering_state(ctx); + + ctx->rp_tc_info_updated = !ctx->blitting; } static void @@ -2148,14 +3792,32 @@ zink_set_blend_color(struct pipe_context *pctx, { struct zink_context *ctx = zink_context(pctx); memcpy(ctx->blend_constants, color->color, sizeof(float) * 4); + + ctx->blend_color_changed = true; + zink_flush_dgc_if_enabled(ctx); } static void zink_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) { struct zink_context *ctx = zink_context(pctx); + if (ctx->gfx_pipeline_state.sample_mask == sample_mask) + return; ctx->gfx_pipeline_state.sample_mask = sample_mask; + zink_flush_dgc_if_enabled(ctx); + if (zink_screen(pctx->screen)->have_full_ds3) + ctx->sample_mask_changed = true; + else + ctx->gfx_pipeline_state.dirty = true; +} + +static void +zink_set_min_samples(struct pipe_context *pctx, unsigned min_samples) +{ + struct zink_context *ctx = zink_context(pctx); + ctx->gfx_pipeline_state.min_samples = min_samples - 1; ctx->gfx_pipeline_state.dirty = true; + zink_flush_dgc_if_enabled(ctx); } static void @@ -2170,402 +3832,7 @@ zink_set_sample_locations(struct pipe_context *pctx, size_t size, const uint8_t if (locations) memcpy(ctx->sample_locations, locations, size); -} - -static VkAccessFlags -access_src_flags(VkImageLayout layout) -{ - switch (layout) { - case VK_IMAGE_LAYOUT_UNDEFINED: - return 0; - - case VK_IMAGE_LAYOUT_GENERAL: - return VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - return VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - return VK_ACCESS_SHADER_READ_BIT; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - return VK_ACCESS_TRANSFER_READ_BIT; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - return VK_ACCESS_TRANSFER_WRITE_BIT; - - case VK_IMAGE_LAYOUT_PREINITIALIZED: - return VK_ACCESS_HOST_WRITE_BIT; - - case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - return 0; - - default: - unreachable("unexpected layout"); - } -} - -static VkAccessFlags -access_dst_flags(VkImageLayout layout) -{ - switch (layout) { - case VK_IMAGE_LAYOUT_UNDEFINED: - return 0; - - case VK_IMAGE_LAYOUT_GENERAL: - return VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - return VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - return VK_ACCESS_SHADER_READ_BIT; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - return VK_ACCESS_TRANSFER_READ_BIT; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: - return VK_ACCESS_SHADER_READ_BIT; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - return VK_ACCESS_TRANSFER_WRITE_BIT; - - case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - return 0; - - default: - unreachable("unexpected layout"); - } -} - -static VkPipelineStageFlags -pipeline_dst_stage(VkImageLayout layout) -{ - switch (layout) { - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - return VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - return VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - return VK_PIPELINE_STAGE_TRANSFER_BIT; - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - return VK_PIPELINE_STAGE_TRANSFER_BIT; - - case VK_IMAGE_LAYOUT_GENERAL: - return VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: - return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - - default: - return VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - } -} - -#define ALL_READ_ACCESS_FLAGS \ - (VK_ACCESS_INDIRECT_COMMAND_READ_BIT | \ - VK_ACCESS_INDEX_READ_BIT | \ - VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | \ - VK_ACCESS_UNIFORM_READ_BIT | \ - VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | \ - VK_ACCESS_SHADER_READ_BIT | \ - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | \ - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | \ - VK_ACCESS_TRANSFER_READ_BIT |\ - VK_ACCESS_HOST_READ_BIT |\ - VK_ACCESS_MEMORY_READ_BIT |\ - VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |\ - VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT |\ - VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT |\ - VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR |\ - VK_ACCESS_SHADING_RATE_IMAGE_READ_BIT_NV |\ - VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT |\ - VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_NV |\ - VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV |\ - VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV) - - -bool -zink_resource_access_is_write(VkAccessFlags flags) -{ - return (flags & ALL_READ_ACCESS_FLAGS) != flags; -} - -bool -zink_resource_image_needs_barrier(struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline) -{ - if (!pipeline) - pipeline = pipeline_dst_stage(new_layout); - if (!flags) - flags = access_dst_flags(new_layout); - return res->layout != new_layout || (res->obj->access_stage & pipeline) != pipeline || - (res->obj->access & flags) != flags || - zink_resource_access_is_write(res->obj->access) || - zink_resource_access_is_write(flags); -} - -bool -zink_resource_image_barrier_init(VkImageMemoryBarrier *imb, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline) -{ - if (!pipeline) - pipeline = pipeline_dst_stage(new_layout); - if (!flags) - flags = access_dst_flags(new_layout); - - VkImageSubresourceRange isr = { - res->aspect, - 0, VK_REMAINING_MIP_LEVELS, - 0, VK_REMAINING_ARRAY_LAYERS - }; - *imb = (VkImageMemoryBarrier){ - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - NULL, - res->obj->access ? res->obj->access : access_src_flags(res->layout), - flags, - res->layout, - new_layout, - VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, - res->obj->image, - isr - }; - return res->obj->needs_zs_evaluate || zink_resource_image_needs_barrier(res, new_layout, flags, pipeline); -} - -static inline bool -is_shader_pipline_stage(VkPipelineStageFlags pipeline) -{ - return pipeline & (VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); -} - -static void -resource_check_defer_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkPipelineStageFlags pipeline) -{ - assert(res->obj->is_buffer); - if (res->bind_count[0] - res->so_bind_count > 0) { - if ((res->obj->is_buffer && res->vbo_bind_mask && !(pipeline & VK_PIPELINE_STAGE_VERTEX_INPUT_BIT)) || - ((!res->obj->is_buffer || util_bitcount(res->vbo_bind_mask) != res->bind_count[0]) && !is_shader_pipline_stage(pipeline))) - /* gfx rebind */ - _mesa_set_add(ctx->need_barriers[0], res); - } - if (res->bind_count[1] && !(pipeline & VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) - /* compute rebind */ - _mesa_set_add(ctx->need_barriers[1], res); -} - -static inline VkCommandBuffer -get_cmdbuf(struct zink_context *ctx, struct zink_resource *res) -{ - if ((res->obj->access && !res->obj->unordered_barrier) || !ctx->batch.in_rp) { - zink_batch_no_rp(ctx); - res->obj->unordered_barrier = false; - return ctx->batch.state->cmdbuf; - } - res->obj->unordered_barrier = true; - ctx->batch.state->has_barriers = true; - return ctx->batch.state->barrier_cmdbuf; -} - -static void -resource_check_defer_image_barrier(struct zink_context *ctx, struct zink_resource *res, VkImageLayout layout, VkPipelineStageFlags pipeline) -{ - assert(!res->obj->is_buffer); - - bool is_compute = pipeline == VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - /* if this is a non-shader barrier and there are binds, always queue a shader barrier */ - bool is_shader = is_shader_pipline_stage(pipeline); - if ((is_shader || !res->bind_count[is_compute]) && - /* if no layout change is needed between gfx and compute, do nothing */ - !res->bind_count[!is_compute] && (!is_compute || !res->fb_binds)) - return; - - if (res->bind_count[!is_compute] && is_shader) { - /* if the layout is the same between gfx and compute, do nothing */ - if (layout == zink_descriptor_util_image_layout_eval(res, !is_compute)) - return; - } - /* queue a layout change if a layout change will be needed */ - if (res->bind_count[!is_compute]) - _mesa_set_add(ctx->need_barriers[!is_compute], res); - /* also queue a layout change if this is a non-shader layout */ - if (res->bind_count[is_compute] && !is_shader) - _mesa_set_add(ctx->need_barriers[is_compute], res); -} - -void -zink_resource_image_barrier(struct zink_context *ctx, struct zink_resource *res, - VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline) -{ - VkImageMemoryBarrier imb; - if (!pipeline) - pipeline = pipeline_dst_stage(new_layout); - - if (!zink_resource_image_barrier_init(&imb, res, new_layout, flags, pipeline)) - return; - /* only barrier if we're changing layout or doing something besides read -> read */ - VkCommandBuffer cmdbuf = get_cmdbuf(ctx, res); - assert(new_layout); - if (!res->obj->access_stage) - imb.srcAccessMask = 0; - if (res->obj->needs_zs_evaluate) - imb.pNext = &res->obj->zs_evaluate; - res->obj->needs_zs_evaluate = false; - if (res->dmabuf_acquire) { - imb.srcQueueFamilyIndex = VK_QUEUE_FAMILY_FOREIGN_EXT; - imb.dstQueueFamilyIndex = zink_screen(ctx->base.screen)->gfx_queue; - res->dmabuf_acquire = false; - } - VKCTX(CmdPipelineBarrier)( - cmdbuf, - res->obj->access_stage ? res->obj->access_stage : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - pipeline, - 0, - 0, NULL, - 0, NULL, - 1, &imb - ); - - resource_check_defer_image_barrier(ctx, res, new_layout, pipeline); - - if (res->obj->unordered_barrier) { - res->obj->access |= imb.dstAccessMask; - res->obj->access_stage |= pipeline; - } else { - res->obj->access = imb.dstAccessMask; - res->obj->access_stage = pipeline; - } - res->layout = new_layout; -} - - -VkPipelineStageFlags -zink_pipeline_flags_from_stage(VkShaderStageFlagBits stage) -{ - switch (stage) { - case VK_SHADER_STAGE_VERTEX_BIT: - return VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; - case VK_SHADER_STAGE_FRAGMENT_BIT: - return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - case VK_SHADER_STAGE_GEOMETRY_BIT: - return VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; - case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: - return VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT; - case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: - return VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; - case VK_SHADER_STAGE_COMPUTE_BIT: - return VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - default: - unreachable("unknown shader stage bit"); - } -} - -ALWAYS_INLINE static VkPipelineStageFlags -pipeline_access_stage(VkAccessFlags flags) -{ - if (flags & (VK_ACCESS_UNIFORM_READ_BIT | - VK_ACCESS_SHADER_READ_BIT | - VK_ACCESS_SHADER_WRITE_BIT)) - return VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV | - VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV | - VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - return VK_PIPELINE_STAGE_TRANSFER_BIT; -} - -ALWAYS_INLINE static bool -zink_resource_buffer_needs_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline) -{ - if (!res->obj->access || !res->obj->access_stage) - return true; - if (!pipeline) - pipeline = pipeline_access_stage(flags); - return zink_resource_access_is_write(res->obj->access) || - zink_resource_access_is_write(flags) || - ((res->obj->access_stage & pipeline) != pipeline && !(res->obj->access_stage & (pipeline - 1))) || - (res->obj->access & flags) != flags; -} - -void -zink_fake_buffer_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline) -{ - res->obj->access = flags; - res->obj->access_stage = pipeline; -} - -void -zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline) -{ - VkMemoryBarrier bmb; - if (!pipeline) - pipeline = pipeline_access_stage(flags); - if (!zink_resource_buffer_needs_barrier(res, flags, pipeline)) - return; - - bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - bmb.pNext = NULL; - bmb.srcAccessMask = res->obj->access; - bmb.dstAccessMask = flags; - if (!res->obj->access_stage) - bmb.srcAccessMask = 0; - VkCommandBuffer cmdbuf = get_cmdbuf(ctx, res); - /* only barrier if we're changing layout or doing something besides read -> read */ - VKCTX(CmdPipelineBarrier)( - cmdbuf, - res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access), - pipeline, - 0, - 1, &bmb, - 0, NULL, - 0, NULL - ); - - resource_check_defer_buffer_barrier(ctx, res, pipeline); - - if (res->obj->unordered_barrier) { - res->obj->access |= bmb.dstAccessMask; - res->obj->access_stage |= pipeline; - } else { - res->obj->access = bmb.dstAccessMask; - res->obj->access_stage = pipeline; - } -} - -bool -zink_resource_needs_barrier(struct zink_resource *res, VkImageLayout layout, VkAccessFlags flags, VkPipelineStageFlags pipeline) -{ - if (res->base.b.target == PIPE_BUFFER) - return zink_resource_buffer_needs_barrier(res, flags, pipeline); - return zink_resource_image_needs_barrier(res, layout, flags, pipeline); -} - -VkShaderStageFlagBits -zink_shader_stage(enum pipe_shader_type type) -{ - VkShaderStageFlagBits stages[] = { - [PIPE_SHADER_VERTEX] = VK_SHADER_STAGE_VERTEX_BIT, - [PIPE_SHADER_FRAGMENT] = VK_SHADER_STAGE_FRAGMENT_BIT, - [PIPE_SHADER_GEOMETRY] = VK_SHADER_STAGE_GEOMETRY_BIT, - [PIPE_SHADER_TESS_CTRL] = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, - [PIPE_SHADER_TESS_EVAL] = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, - [PIPE_SHADER_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT, - }; - return stages[type]; + zink_flush_dgc_if_enabled(ctx); } static void @@ -2577,32 +3844,85 @@ zink_flush(struct pipe_context *pctx, bool deferred = flags & PIPE_FLUSH_DEFERRED; bool deferred_fence = false; struct zink_batch *batch = &ctx->batch; - struct zink_fence *fence = NULL; + struct zink_batch_state *bs = NULL; struct zink_screen *screen = zink_screen(ctx->base.screen); - unsigned submit_count = 0; + VkSemaphore export_sem = VK_NULL_HANDLE; /* triggering clears will force has_work */ - if (!deferred && ctx->clears_enabled) + if (!deferred && ctx->clears_enabled) { + /* if fbfetch outputs are active, disable them when flushing clears */ + unsigned fbfetch_outputs = ctx->fbfetch_outputs; + if (fbfetch_outputs) { + ctx->fbfetch_outputs = 0; + ctx->rp_changed = true; + } + if (ctx->fb_state.zsbuf) + zink_blit_barriers(ctx, NULL, zink_resource(ctx->fb_state.zsbuf->texture), false); + + for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) { + if (ctx->fb_state.cbufs[i]) + zink_blit_barriers(ctx, NULL, zink_resource(ctx->fb_state.cbufs[i]->texture), false); + } + ctx->blitting = true; /* start rp to do all the clears */ - zink_begin_render_pass(ctx); + zink_batch_rp(ctx); + ctx->blitting = false; + ctx->fbfetch_outputs = fbfetch_outputs; + ctx->rp_changed |= fbfetch_outputs > 0; + } + + if (flags & PIPE_FLUSH_END_OF_FRAME) { +#ifdef HAVE_RENDERDOC_APP_H + p_atomic_inc(&screen->renderdoc_frame); +#endif + if (ctx->needs_present && ctx->needs_present->obj->dt_idx != UINT32_MAX && + zink_is_swapchain(ctx->needs_present)) { + zink_kopper_readback_update(ctx, ctx->needs_present); + screen->image_barrier(ctx, ctx->needs_present, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 0, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + } + ctx->needs_present = NULL; + } + + if (flags & PIPE_FLUSH_FENCE_FD) { + assert(!deferred && pfence); + const VkExportSemaphoreCreateInfo esci = { + .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, + .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + }; + const VkSemaphoreCreateInfo sci = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &esci, + }; + VkResult result = VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &export_sem); + if (zink_screen_handle_vkresult(screen, result)) { + assert(!batch->state->signal_semaphore); + batch->state->signal_semaphore = export_sem; + batch->has_work = true; + } else { + mesa_loge("ZINK: vkCreateSemaphore failed (%s)", vk_Result_to_str(result)); + + /* let flush proceed and ensure a null sem for fence_get_fd to return -1 */ + export_sem = VK_NULL_HANDLE; + } + } if (!batch->has_work) { if (pfence) { /* reuse last fence */ - fence = ctx->last_fence; + bs = ctx->last_batch_state; } if (!deferred) { - struct zink_batch_state *last = zink_batch_state(ctx->last_fence); + struct zink_batch_state *last = ctx->last_batch_state; if (last) { sync_flush(ctx, last); if (last->is_device_lost) check_device_lost(ctx); } } - tc_driver_internal_flush_notify(ctx->tc); + if (ctx->tc && !ctx->track_renderpasses) + tc_driver_internal_flush_notify(ctx->tc); } else { - fence = &batch->state->fence; - submit_count = batch->state->submit_count; + bs = batch->state; if (deferred && !(flags & PIPE_FLUSH_FENCE_FD) && pfence) deferred_fence = true; else @@ -2622,35 +3942,33 @@ zink_flush(struct pipe_context *pctx, *pfence = (struct pipe_fence_handle *)mfence; } - mfence->fence = fence; - if (fence) - mfence->submit_count = submit_count; + assert(!mfence->fence); + mfence->fence = &bs->fence; + mfence->sem = export_sem; + if (bs) { + mfence->submit_count = bs->usage.submit_count; + util_dynarray_append(&bs->fence.mfences, struct zink_tc_fence *, mfence); + } + if (export_sem) { + pipe_reference(NULL, &mfence->reference); + util_dynarray_append(&ctx->batch.state->fences, struct zink_tc_fence*, mfence); + } if (deferred_fence) { - assert(fence); + assert(bs); mfence->deferred_ctx = pctx; - assert(!ctx->deferred_fence || ctx->deferred_fence == fence); - ctx->deferred_fence = fence; + assert(!ctx->deferred_fence || ctx->deferred_fence == &bs->fence); + ctx->deferred_fence = &bs->fence; } - if (!fence || flags & TC_FLUSH_ASYNC) { + if (!bs || flags & TC_FLUSH_ASYNC) { if (!util_queue_fence_is_signalled(&mfence->ready)) util_queue_fence_signal(&mfence->ready); } } - if (fence) { + if (bs) { if (!(flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC))) - sync_flush(ctx, zink_batch_state(fence)); - - if (flags & PIPE_FLUSH_END_OF_FRAME && !(flags & TC_FLUSH_ASYNC) && !deferred) { - /* if the first frame has not yet occurred, we need an explicit fence here - * in some cases in order to correctly draw the first frame, though it's - * unknown at this time why this is the case - */ - if (!ctx->first_frame_done) - zink_vkfence_wait(screen, fence, PIPE_TIMEOUT_INFINITE); - ctx->first_frame_done = true; - } + sync_flush(ctx, bs); } } @@ -2661,51 +3979,28 @@ zink_fence_wait(struct pipe_context *pctx) if (ctx->batch.has_work) pctx->flush(pctx, NULL, PIPE_FLUSH_HINT_FINISH); - if (ctx->last_fence) + if (ctx->last_batch_state) stall(ctx); } void -zink_wait_on_batch(struct zink_context *ctx, uint32_t batch_id) +zink_wait_on_batch(struct zink_context *ctx, uint64_t batch_id) { - struct zink_batch_state *bs = ctx->batch.state; - assert(bs); - if (!batch_id || bs->fence.batch_id == batch_id) + struct zink_batch_state *bs; + if (!batch_id) { /* not submitted yet */ flush_batch(ctx, true); - if (ctx->have_timelines) { - if (!zink_screen_timeline_wait(zink_screen(ctx->base.screen), batch_id, UINT64_MAX)) - check_device_lost(ctx); - return; + bs = ctx->last_batch_state; + assert(bs); + batch_id = bs->fence.batch_id; } - simple_mtx_lock(&ctx->batch_mtx); - struct zink_fence *fence; - - assert(batch_id || ctx->last_fence); - if (ctx->last_fence && (!batch_id || batch_id == zink_batch_state(ctx->last_fence)->fence.batch_id)) - fence = ctx->last_fence; - else { - struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&ctx->batch_states, batch_id, (void*)(uintptr_t)batch_id); - if (!he) { - simple_mtx_unlock(&ctx->batch_mtx); - /* if we can't find it, it either must have finished already or is on a different context */ - if (!zink_screen_check_last_finished(zink_screen(ctx->base.screen), batch_id)) { - /* if it hasn't finished, it's on another context, so force a flush so there's something to wait on */ - ctx->batch.has_work = true; - zink_fence_wait(&ctx->base); - } - return; - } - fence = he->data; - } - simple_mtx_unlock(&ctx->batch_mtx); - assert(fence); - sync_flush(ctx, zink_batch_state(fence)); - zink_vkfence_wait(zink_screen(ctx->base.screen), fence, PIPE_TIMEOUT_INFINITE); + assert(batch_id); + if (!zink_screen_timeline_wait(zink_screen(ctx->base.screen), batch_id, UINT64_MAX)) + check_device_lost(ctx); } bool -zink_check_batch_completion(struct zink_context *ctx, uint32_t batch_id, bool have_lock) +zink_check_batch_completion(struct zink_context *ctx, uint64_t batch_id) { assert(ctx->batch.state); if (!batch_id) @@ -2715,76 +4010,60 @@ zink_check_batch_completion(struct zink_context *ctx, uint32_t batch_id, bool ha if (zink_screen_check_last_finished(zink_screen(ctx->base.screen), batch_id)) return true; - if (ctx->have_timelines) { - bool success = zink_screen_timeline_wait(zink_screen(ctx->base.screen), batch_id, 0); - if (!success) - check_device_lost(ctx); - return success; - } - struct zink_fence *fence; - - if (!have_lock) - simple_mtx_lock(&ctx->batch_mtx); - - if (ctx->last_fence && batch_id == zink_batch_state(ctx->last_fence)->fence.batch_id) - fence = ctx->last_fence; - else { - struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&ctx->batch_states, batch_id, (void*)(uintptr_t)batch_id); - /* if we can't find it, it either must have finished already or is on a different context */ - if (!he) { - if (!have_lock) - simple_mtx_unlock(&ctx->batch_mtx); - /* return compare against last_finished, since this has info from all contexts */ - return zink_screen_check_last_finished(zink_screen(ctx->base.screen), batch_id); - } - fence = he->data; - } - if (!have_lock) - simple_mtx_unlock(&ctx->batch_mtx); - assert(fence); - if (zink_screen(ctx->base.screen)->threaded && - !util_queue_fence_is_signalled(&zink_batch_state(fence)->flush_completed)) - return false; - return zink_vkfence_wait(zink_screen(ctx->base.screen), fence, 0); + bool success = zink_screen_timeline_wait(zink_screen(ctx->base.screen), batch_id, 0); + if (!success) + check_device_lost(ctx); + return success; } static void zink_texture_barrier(struct pipe_context *pctx, unsigned flags) { struct zink_context *ctx = zink_context(pctx); + VkAccessFlags dst = flags == PIPE_TEXTURE_BARRIER_FRAMEBUFFER ? + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT : + VK_ACCESS_SHADER_READ_BIT; + if (!ctx->framebuffer || !ctx->framebuffer->state.num_attachments) return; - VkMemoryBarrier bmb; - bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - bmb.pNext = NULL; - bmb.srcAccessMask = 0; - bmb.dstAccessMask = 0; - zink_batch_no_rp(ctx); - if (ctx->fb_state.zsbuf) { - VkMemoryBarrier dmb; - dmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + /* if this is a fb barrier, flush all pending clears */ + if (ctx->rp_clears_enabled && dst == VK_ACCESS_INPUT_ATTACHMENT_READ_BIT) + zink_batch_rp(ctx); + + /* this is not an in-renderpass barrier */ + if (!ctx->fbfetch_outputs) + zink_batch_no_rp(ctx); + + if (zink_screen(ctx->base.screen)->info.have_KHR_synchronization2) { + VkDependencyInfo dep = {0}; + dep.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO; + dep.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + dep.memoryBarrierCount = 1; + + VkMemoryBarrier2 dmb = {0}; + dmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2; dmb.pNext = NULL; - dmb.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - dmb.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - VKCTX(CmdPipelineBarrier)( - ctx->batch.state->cmdbuf, - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - 0, - 1, &dmb, - 0, NULL, - 0, NULL - ); + dmb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dmb.dstAccessMask = dst; + dmb.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dmb.dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT; + dep.pMemoryBarriers = &dmb; + + /* if zs fbfetch is a thing? + if (ctx->fb_state.zsbuf) { + const VkPipelineStageFlagBits2 depth_flags = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT; + dmb.dstAccessMask |= VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + dmb.srcStageMask |= depth_flags; + dmb.dstStageMask |= depth_flags; + } + */ + VKCTX(CmdPipelineBarrier2)(ctx->batch.state->cmdbuf, &dep); } else { - bmb.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - bmb.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT; - } - if (ctx->fb_state.nr_cbufs > 0) { - bmb.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - bmb.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT; - } - if (bmb.srcAccessMask) + VkMemoryBarrier bmb = {0}; + bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + bmb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + bmb.dstAccessMask = dst; VKCTX(CmdPipelineBarrier)( ctx->batch.state->cmdbuf, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, @@ -2794,6 +4073,7 @@ zink_texture_barrier(struct pipe_context *pctx, unsigned flags) 0, NULL, 0, NULL ); + } } static inline void @@ -2805,7 +4085,7 @@ mem_barrier(struct zink_context *ctx, VkPipelineStageFlags src_stage, VkPipeline mb.pNext = NULL; mb.srcAccessMask = src; mb.dstAccessMask = dst; - zink_end_render_pass(ctx); + zink_batch_no_rp(ctx); VKCTX(CmdPipelineBarrier)(batch->state->cmdbuf, src_stage, dst_stage, 0, 1, &mb, 0, NULL, 0, NULL); } @@ -2829,11 +4109,11 @@ zink_flush_memory_barrier(struct zink_context *ctx, bool is_compute) VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT); + if (ctx->memory_barrier & PIPE_BARRIER_INDIRECT_BUFFER) + mem_barrier(ctx, src, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT); if (!is_compute) { - if (ctx->memory_barrier & PIPE_BARRIER_INDIRECT_BUFFER) - mem_barrier(ctx, src, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, - VK_ACCESS_SHADER_WRITE_BIT, - VK_ACCESS_INDIRECT_COMMAND_READ_BIT); if (ctx->memory_barrier & PIPE_BARRIER_VERTEX_BUFFER) mem_barrier(ctx, gfx_flags, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_SHADER_WRITE_BIT, @@ -2878,227 +4158,19 @@ zink_flush_resource(struct pipe_context *pctx, struct pipe_resource *pres) { struct zink_context *ctx = zink_context(pctx); - /* TODO: this is not futureproof and should be updated once proper - * WSI support is added - */ - if (pres->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) - pipe_resource_reference(&ctx->batch.state->flush_res, pres); -} - -void -zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, - unsigned dst_offset, unsigned src_offset, unsigned size) -{ - VkBufferCopy region; - region.srcOffset = src_offset; - region.dstOffset = dst_offset; - region.size = size; - - struct zink_batch *batch = &ctx->batch; - zink_batch_no_rp(ctx); - zink_batch_reference_resource_rw(batch, src, false); - zink_batch_reference_resource_rw(batch, dst, true); - util_range_add(&dst->base.b, &dst->valid_buffer_range, dst_offset, dst_offset + size); - zink_resource_buffer_barrier(ctx, src, VK_ACCESS_TRANSFER_READ_BIT, 0); - zink_resource_buffer_barrier(ctx, dst, VK_ACCESS_TRANSFER_WRITE_BIT, 0); - VKCTX(CmdCopyBuffer)(batch->state->cmdbuf, src->obj->buffer, dst->obj->buffer, 1, ®ion); -} - -void -zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, - unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, - unsigned src_level, const struct pipe_box *src_box, enum pipe_map_flags map_flags) -{ - struct zink_resource *img = dst->base.b.target == PIPE_BUFFER ? src : dst; - struct zink_resource *buf = dst->base.b.target == PIPE_BUFFER ? dst : src; - struct zink_batch *batch = &ctx->batch; - zink_batch_no_rp(ctx); - - bool buf2img = buf == src; - - if (buf2img) { - zink_resource_image_barrier(ctx, img, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, 0); - zink_resource_buffer_barrier(ctx, buf, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); - } else { - zink_resource_image_barrier(ctx, img, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 0, 0); - zink_resource_buffer_barrier(ctx, buf, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); - util_range_add(&dst->base.b, &dst->valid_buffer_range, dstx, dstx + src_box->width); - } - - VkBufferImageCopy region = {0}; - region.bufferOffset = buf2img ? src_box->x : dstx; - region.bufferRowLength = 0; - region.bufferImageHeight = 0; - region.imageSubresource.mipLevel = buf2img ? dst_level : src_level; - switch (img->base.b.target) { - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_CUBE_ARRAY: - case PIPE_TEXTURE_2D_ARRAY: - case PIPE_TEXTURE_1D_ARRAY: - /* these use layer */ - region.imageSubresource.baseArrayLayer = buf2img ? dstz : src_box->z; - region.imageSubresource.layerCount = src_box->depth; - region.imageOffset.z = 0; - region.imageExtent.depth = 1; - break; - case PIPE_TEXTURE_3D: - /* this uses depth */ - region.imageSubresource.baseArrayLayer = 0; - region.imageSubresource.layerCount = 1; - region.imageOffset.z = buf2img ? dstz : src_box->z; - region.imageExtent.depth = src_box->depth; - break; - default: - /* these must only copy one layer */ - region.imageSubresource.baseArrayLayer = 0; - region.imageSubresource.layerCount = 1; - region.imageOffset.z = 0; - region.imageExtent.depth = 1; - } - region.imageOffset.x = buf2img ? dstx : src_box->x; - region.imageOffset.y = buf2img ? dsty : src_box->y; - - region.imageExtent.width = src_box->width; - region.imageExtent.height = src_box->height; - - zink_batch_reference_resource_rw(batch, img, buf2img); - zink_batch_reference_resource_rw(batch, buf, !buf2img); - - /* we're using u_transfer_helper_deinterleave, which means we'll be getting PIPE_MAP_* usage - * to indicate whether to copy either the depth or stencil aspects - */ - unsigned aspects = 0; - if (map_flags) { - assert((map_flags & (PIPE_MAP_DEPTH_ONLY | PIPE_MAP_STENCIL_ONLY)) != - (PIPE_MAP_DEPTH_ONLY | PIPE_MAP_STENCIL_ONLY)); - if (map_flags & PIPE_MAP_DEPTH_ONLY) - aspects = VK_IMAGE_ASPECT_DEPTH_BIT; - else if (map_flags & PIPE_MAP_STENCIL_ONLY) - aspects = VK_IMAGE_ASPECT_STENCIL_BIT; - } - if (!aspects) - aspects = img->aspect; - while (aspects) { - int aspect = 1 << u_bit_scan(&aspects); - region.imageSubresource.aspectMask = aspect; - - /* this may or may not work with multisampled depth/stencil buffers depending on the driver implementation: - * - * srcImage must have a sample count equal to VK_SAMPLE_COUNT_1_BIT - * - vkCmdCopyImageToBuffer spec - * - * dstImage must have a sample count equal to VK_SAMPLE_COUNT_1_BIT - * - vkCmdCopyBufferToImage spec - */ - if (buf2img) - VKCTX(CmdCopyBufferToImage)(batch->state->cmdbuf, buf->obj->buffer, img->obj->image, img->layout, 1, ®ion); - else - VKCTX(CmdCopyImageToBuffer)(batch->state->cmdbuf, img->obj->image, img->layout, buf->obj->buffer, 1, ®ion); - } -} - -static void -zink_resource_copy_region(struct pipe_context *pctx, - struct pipe_resource *pdst, - unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *psrc, - unsigned src_level, const struct pipe_box *src_box) -{ - struct zink_resource *dst = zink_resource(pdst); - struct zink_resource *src = zink_resource(psrc); - struct zink_context *ctx = zink_context(pctx); - if (dst->base.b.target != PIPE_BUFFER && src->base.b.target != PIPE_BUFFER) { - VkImageCopy region = {0}; - if (util_format_get_num_planes(src->base.b.format) == 1 && - util_format_get_num_planes(dst->base.b.format) == 1) { - /* If neither the calling command’s srcImage nor the calling command’s dstImage - * has a multi-planar image format then the aspectMask member of srcSubresource - * and dstSubresource must match - * - * -VkImageCopy spec - */ - assert(src->aspect == dst->aspect); - } else - unreachable("planar formats not yet handled"); - - zink_fb_clears_apply_or_discard(ctx, pdst, (struct u_rect){dstx, dstx + src_box->width, dsty, dsty + src_box->height}, false); - zink_fb_clears_apply_region(ctx, psrc, zink_rect_from_box(src_box)); - - region.srcSubresource.aspectMask = src->aspect; - region.srcSubresource.mipLevel = src_level; - switch (src->base.b.target) { - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_CUBE_ARRAY: - case PIPE_TEXTURE_2D_ARRAY: - case PIPE_TEXTURE_1D_ARRAY: - /* these use layer */ - region.srcSubresource.baseArrayLayer = src_box->z; - region.srcSubresource.layerCount = src_box->depth; - region.srcOffset.z = 0; - region.extent.depth = 1; - break; - case PIPE_TEXTURE_3D: - /* this uses depth */ - region.srcSubresource.baseArrayLayer = 0; - region.srcSubresource.layerCount = 1; - region.srcOffset.z = src_box->z; - region.extent.depth = src_box->depth; - break; - default: - /* these must only copy one layer */ - region.srcSubresource.baseArrayLayer = 0; - region.srcSubresource.layerCount = 1; - region.srcOffset.z = 0; - region.extent.depth = 1; - } - - region.srcOffset.x = src_box->x; - region.srcOffset.y = src_box->y; - - region.dstSubresource.aspectMask = dst->aspect; - region.dstSubresource.mipLevel = dst_level; - switch (dst->base.b.target) { - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_CUBE_ARRAY: - case PIPE_TEXTURE_2D_ARRAY: - case PIPE_TEXTURE_1D_ARRAY: - /* these use layer */ - region.dstSubresource.baseArrayLayer = dstz; - region.dstSubresource.layerCount = src_box->depth; - region.dstOffset.z = 0; - break; - case PIPE_TEXTURE_3D: - /* this uses depth */ - region.dstSubresource.baseArrayLayer = 0; - region.dstSubresource.layerCount = 1; - region.dstOffset.z = dstz; - break; - default: - /* these must only copy one layer */ - region.dstSubresource.baseArrayLayer = 0; - region.dstSubresource.layerCount = 1; - region.dstOffset.z = 0; + struct zink_resource *res = zink_resource(pres); + if (res->obj->dt) { + if (zink_kopper_acquired(res->obj->dt, res->obj->dt_idx) && (!ctx->clears_enabled || !res->fb_bind_count)) { + zink_batch_no_rp_safe(ctx); + zink_kopper_readback_update(ctx, res); + zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 0, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + zink_batch_reference_resource_rw(&ctx->batch, res, true); + } else { + ctx->needs_present = res; } - - region.dstOffset.x = dstx; - region.dstOffset.y = dsty; - region.extent.width = src_box->width; - region.extent.height = src_box->height; - - struct zink_batch *batch = &ctx->batch; - zink_batch_no_rp(ctx); - zink_batch_reference_resource_rw(batch, src, false); - zink_batch_reference_resource_rw(batch, dst, true); - - zink_resource_setup_transfer_layouts(ctx, src, dst); - VKCTX(CmdCopyImage)(batch->state->cmdbuf, src->obj->image, src->layout, - dst->obj->image, dst->layout, - 1, ®ion); - } else if (dst->base.b.target == PIPE_BUFFER && - src->base.b.target == PIPE_BUFFER) { - zink_copy_buffer(ctx, dst, src, dstx, src_box->x, src_box->width); - } else - zink_copy_image_buffer(ctx, dst, src, dst_level, dstx, dsty, dstz, src_level, src_box, 0); + ctx->batch.swapchain = res; + } else if (res->dmabuf) + res->queue = VK_QUEUE_FAMILY_FOREIGN_EXT; } static struct pipe_stream_output_target * @@ -3112,11 +4184,7 @@ zink_create_stream_output_target(struct pipe_context *pctx, if (!t) return NULL; - /* using PIPE_BIND_CUSTOM here lets us create a custom pipe buffer resource, - * which allows us to differentiate and use VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT - * as we must for this case - */ - t->counter_buffer = pipe_buffer_create(pctx->screen, PIPE_BIND_STREAM_OUTPUT | PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, 4); + t->counter_buffer = pipe_buffer_create(pctx->screen, PIPE_BIND_STREAM_OUTPUT, PIPE_USAGE_DEFAULT, 4); if (!t->counter_buffer) { FREE(t); return NULL; @@ -3151,6 +4219,11 @@ zink_set_stream_output_targets(struct pipe_context *pctx, { struct zink_context *ctx = zink_context(pctx); + /* always set counter_buffer_valid=false on unbind: + * - on resume (indicated by offset==-1), set counter_buffer_valid=true + * - otherwise the counter buffer is invalidated + */ + if (num_targets == 0) { for (unsigned i = 0; i < ctx->num_so_targets; i++) { if (ctx->so_targets[i]) { @@ -3169,15 +4242,8 @@ zink_set_stream_output_targets(struct pipe_context *pctx, pipe_so_target_reference(&ctx->so_targets[i], targets[i]); if (!t) continue; - struct zink_resource *res = zink_resource(t->counter_buffer); - if (offsets[0] == (unsigned)-1) - ctx->xfb_barrier |= zink_resource_buffer_needs_barrier(res, - VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); - else - ctx->xfb_barrier |= zink_resource_buffer_needs_barrier(res, - VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT, - VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT); + if (offsets[0] != (unsigned)-1) + t->counter_buffer_valid = false; struct zink_resource *so = zink_resource(ctx->so_targets[i]->buffer); if (so) { so->so_bind_count++; @@ -3199,6 +4265,7 @@ zink_set_stream_output_targets(struct pipe_context *pctx, /* TODO: possibly avoid rebinding on resume if resuming from same buffers? */ ctx->dirty_so_targets = true; } + zink_flush_dgc_if_enabled(ctx); } void @@ -3228,24 +4295,26 @@ zink_rebind_framebuffer(struct zink_context *ctx, struct zink_resource *res) return; zink_batch_no_rp(ctx); - if (zink_screen(ctx->base.screen)->info.have_KHR_imageless_framebuffer) { - struct zink_framebuffer *fb = ctx->get_framebuffer(ctx); - ctx->fb_changed |= ctx->framebuffer != fb; - ctx->framebuffer = fb; - } + struct zink_framebuffer *fb = zink_get_framebuffer(ctx); + ctx->fb_changed |= ctx->framebuffer != fb; + ctx->framebuffer = fb; } ALWAYS_INLINE static struct zink_resource * -rebind_ubo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot) +rebind_ubo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot) { struct zink_resource *res = update_descriptor_state_ubo(ctx, shader, slot, ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_UBO][shader][slot]); - zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_UBO, slot, 1); + if (res) { + res->obj->unordered_read = false; + res->obj->access |= VK_ACCESS_SHADER_READ_BIT; + } + ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_UBO, slot, 1); return res; } ALWAYS_INLINE static struct zink_resource * -rebind_ssbo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot) +rebind_ssbo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot) { const struct pipe_shader_buffer *ssbo = &ctx->ssbos[shader][slot]; struct zink_resource *res = zink_resource(ssbo->buffer); @@ -3254,58 +4323,86 @@ rebind_ssbo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slo util_range_add(&res->base.b, &res->valid_buffer_range, ssbo->buffer_offset, ssbo->buffer_offset + ssbo->buffer_size); update_descriptor_state_ssbo(ctx, shader, slot, res); - zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SSBO, slot, 1); + if (res) { + res->obj->unordered_read = false; + res->obj->access |= VK_ACCESS_SHADER_READ_BIT; + if (ctx->writable_ssbos[shader] & BITFIELD_BIT(slot)) { + res->obj->unordered_write = false; + res->obj->access |= VK_ACCESS_SHADER_WRITE_BIT; + } + } + ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SSBO, slot, 1); return res; } ALWAYS_INLINE static struct zink_resource * -rebind_tbo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot) +rebind_tbo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot) { struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->sampler_views[shader][slot]); if (!sampler_view || sampler_view->base.texture->target != PIPE_BUFFER) return NULL; struct zink_resource *res = zink_resource(sampler_view->base.texture); - if (zink_batch_usage_exists(sampler_view->buffer_view->batch_uses)) - zink_batch_reference_bufferview(&ctx->batch, sampler_view->buffer_view); - zink_buffer_view_reference(zink_screen(ctx->base.screen), &sampler_view->buffer_view, NULL); - sampler_view->buffer_view = get_buffer_view(ctx, res, sampler_view->base.format, - sampler_view->base.u.buf.offset, sampler_view->base.u.buf.size); + if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) { + VkBufferViewCreateInfo bvci = sampler_view->buffer_view->bvci; + bvci.buffer = res->obj->buffer; + zink_buffer_view_reference(zink_screen(ctx->base.screen), &sampler_view->buffer_view, NULL); + sampler_view->buffer_view = get_buffer_view(ctx, res, &bvci); + } update_descriptor_state_sampler(ctx, shader, slot, res); - zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1); + if (res) { + res->obj->unordered_read = false; + res->obj->access |= VK_ACCESS_SHADER_READ_BIT; + } + ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1); return res; } ALWAYS_INLINE static struct zink_resource * -rebind_ibo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot) +rebind_ibo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot) { struct zink_image_view *image_view = &ctx->image_views[shader][slot]; struct zink_resource *res = zink_resource(image_view->base.resource); if (!res || res->base.b.target != PIPE_BUFFER) return NULL; - zink_descriptor_set_refs_clear(&image_view->buffer_view->desc_set_refs, image_view->buffer_view); - if (zink_batch_usage_exists(image_view->buffer_view->batch_uses)) - zink_batch_reference_bufferview(&ctx->batch, image_view->buffer_view); - zink_buffer_view_reference(zink_screen(ctx->base.screen), &image_view->buffer_view, NULL); + VkBufferViewCreateInfo bvci; + if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) { + bvci = image_view->buffer_view->bvci; + bvci.buffer = res->obj->buffer; + zink_buffer_view_reference(zink_screen(ctx->base.screen), &image_view->buffer_view, NULL); + } if (!zink_resource_object_init_storage(ctx, res)) { debug_printf("couldn't create storage image!"); return NULL; } - image_view->buffer_view = get_buffer_view(ctx, res, image_view->base.format, - image_view->base.u.buf.offset, image_view->base.u.buf.size); - assert(image_view->buffer_view); + if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) { + image_view->buffer_view = get_buffer_view(ctx, res, &bvci); + assert(image_view->buffer_view); + } + if (res) { + res->obj->unordered_read = false; + res->obj->access |= VK_ACCESS_SHADER_READ_BIT; + if (image_view->base.access & PIPE_IMAGE_ACCESS_WRITE) { + res->obj->unordered_write = false; + res->obj->access |= VK_ACCESS_SHADER_WRITE_BIT; + } + } util_range_add(&res->base.b, &res->valid_buffer_range, image_view->base.u.buf.offset, image_view->base.u.buf.offset + image_view->base.u.buf.size); update_descriptor_state_image(ctx, shader, slot, res); - zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_IMAGE, slot, 1); + ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_IMAGE, slot, 1); return res; } static unsigned -rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_t rebind_mask, const unsigned expected_num_rebinds) +rebind_buffer(struct zink_context *ctx, struct zink_resource *res, uint32_t rebind_mask, const unsigned expected_num_rebinds) { unsigned num_rebinds = 0; bool has_write = false; + if (!zink_resource_has_binds(res)) + return 0; + + assert(!res->bindless[1]); //TODO if ((rebind_mask & BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER)) || (!rebind_mask && res->so_bind_count && ctx->num_so_targets)) { for (unsigned i = 0; i < ctx->num_so_targets; i++) { if (ctx->so_targets[i]) { @@ -3316,25 +4413,29 @@ rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_ } } } + rebind_mask &= ~BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER); } - if (num_rebinds && expected_num_rebinds == num_rebinds) + if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask) goto end; if ((rebind_mask & BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER)) || (!rebind_mask && res->vbo_bind_mask)) { u_foreach_bit(slot, res->vbo_bind_mask) { if (ctx->vertex_buffers[slot].buffer.resource != &res->base.b) //wrong context goto end; - set_vertex_buffer_clamped(ctx, slot); + res->obj->access |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + res->obj->access_stage |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + res->obj->unordered_read = false; num_rebinds++; } + rebind_mask &= ~BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER); ctx->vertex_buffers_dirty = true; } - if (num_rebinds && expected_num_rebinds == num_rebinds) + if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask) goto end; const uint32_t ubo_mask = rebind_mask ? - rebind_mask & BITFIELD_RANGE(TC_BINDING_UBO_VS, PIPE_SHADER_TYPES) : - ((res->ubo_bind_count[0] ? BITFIELD_RANGE(TC_BINDING_UBO_VS, (PIPE_SHADER_TYPES - 1)) : 0) | + rebind_mask & BITFIELD_RANGE(TC_BINDING_UBO_VS, MESA_SHADER_STAGES) : + ((res->ubo_bind_count[0] ? BITFIELD_RANGE(TC_BINDING_UBO_VS, (MESA_SHADER_STAGES - 1)) : 0) | (res->ubo_bind_count[1] ? BITFIELD_BIT(TC_BINDING_UBO_CS) : 0)); u_foreach_bit(shader, ubo_mask >> TC_BINDING_UBO_VS) { u_foreach_bit(slot, res->ubo_bind_mask[shader]) { @@ -3344,12 +4445,13 @@ rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_ num_rebinds++; } } - if (num_rebinds && expected_num_rebinds == num_rebinds) + rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_UBO_VS, MESA_SHADER_STAGES); + if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask) goto end; const unsigned ssbo_mask = rebind_mask ? - rebind_mask & BITFIELD_RANGE(TC_BINDING_SSBO_VS, PIPE_SHADER_TYPES) : - BITFIELD_RANGE(TC_BINDING_SSBO_VS, PIPE_SHADER_TYPES); + rebind_mask & BITFIELD_RANGE(TC_BINDING_SSBO_VS, MESA_SHADER_STAGES) : + BITFIELD_RANGE(TC_BINDING_SSBO_VS, MESA_SHADER_STAGES); u_foreach_bit(shader, ssbo_mask >> TC_BINDING_SSBO_VS) { u_foreach_bit(slot, res->ssbo_bind_mask[shader]) { struct pipe_shader_buffer *ssbo = &ctx->ssbos[shader][slot]; @@ -3360,11 +4462,12 @@ rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_ num_rebinds++; } } - if (num_rebinds && expected_num_rebinds == num_rebinds) + rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_SSBO_VS, MESA_SHADER_STAGES); + if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask) goto end; const unsigned sampler_mask = rebind_mask ? - rebind_mask & BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, PIPE_SHADER_TYPES) : - BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, PIPE_SHADER_TYPES); + rebind_mask & BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, MESA_SHADER_STAGES) : + BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, MESA_SHADER_STAGES); u_foreach_bit(shader, sampler_mask >> TC_BINDING_SAMPLERVIEW_VS) { u_foreach_bit(slot, res->sampler_binds[shader]) { struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->sampler_views[shader][slot]); @@ -3374,12 +4477,13 @@ rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_ num_rebinds++; } } - if (num_rebinds && expected_num_rebinds == num_rebinds) + rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, MESA_SHADER_STAGES); + if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask) goto end; const unsigned image_mask = rebind_mask ? - rebind_mask & BITFIELD_RANGE(TC_BINDING_IMAGE_VS, PIPE_SHADER_TYPES) : - BITFIELD_RANGE(TC_BINDING_IMAGE_VS, PIPE_SHADER_TYPES); + rebind_mask & BITFIELD_RANGE(TC_BINDING_IMAGE_VS, MESA_SHADER_STAGES) : + BITFIELD_RANGE(TC_BINDING_IMAGE_VS, MESA_SHADER_STAGES); unsigned num_image_rebinds_remaining = rebind_mask ? expected_num_rebinds - num_rebinds : res->image_bind_count[0] + res->image_bind_count[1]; u_foreach_bit(shader, image_mask >> TC_BINDING_IMAGE_VS) { for (unsigned slot = 0; num_image_rebinds_remaining && slot < ctx->di.num_images[shader]; slot++) { @@ -3395,24 +4499,372 @@ rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_ } } end: - zink_batch_resource_usage_set(&ctx->batch, res, has_write); + if (num_rebinds) + zink_batch_resource_usage_set(&ctx->batch, res, has_write, true); return num_rebinds; } +void +zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, + unsigned dst_offset, unsigned src_offset, unsigned size) +{ + VkBufferCopy region; + region.srcOffset = src_offset; + region.dstOffset = dst_offset; + region.size = size; + + struct zink_batch *batch = &ctx->batch; + + struct pipe_box box; + u_box_3d((int)src_offset, 0, 0, (int)size, 0, 0, &box); + /* must barrier if something wrote the valid buffer range */ + bool valid_write = zink_check_valid_buffer_src_access(ctx, src, src_offset, size); + bool unordered_src = !valid_write && !zink_check_unordered_transfer_access(src, 0, &box); + zink_screen(ctx->base.screen)->buffer_barrier(ctx, src, VK_ACCESS_TRANSFER_READ_BIT, 0); + bool unordered_dst = zink_resource_buffer_transfer_dst_barrier(ctx, dst, dst_offset, size); + bool can_unorder = unordered_dst && unordered_src && !ctx->no_reorder; + VkCommandBuffer cmdbuf = can_unorder ? ctx->batch.state->reordered_cmdbuf : zink_get_cmdbuf(ctx, src, dst); + ctx->batch.state->has_barriers |= can_unorder; + zink_batch_reference_resource_rw(batch, src, false); + zink_batch_reference_resource_rw(batch, dst, true); + if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) { + VkMemoryBarrier mb; + mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mb.pNext = NULL; + mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + VKCTX(CmdPipelineBarrier)(cmdbuf, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, 1, &mb, 0, NULL, 0, NULL); + } + bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "copy_buffer(%d)", size); + VKCTX(CmdCopyBuffer)(cmdbuf, src->obj->buffer, dst->obj->buffer, 1, ®ion); + zink_cmd_debug_marker_end(ctx, cmdbuf, marker); +} + +void +zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, + unsigned src_level, const struct pipe_box *src_box, enum pipe_map_flags map_flags) +{ + struct zink_resource *img = dst->base.b.target == PIPE_BUFFER ? src : dst; + struct zink_resource *use_img = img; + struct zink_resource *buf = dst->base.b.target == PIPE_BUFFER ? dst : src; + struct zink_batch *batch = &ctx->batch; + bool needs_present_readback = false; + + bool buf2img = buf == src; + bool unsync = !!(map_flags & PIPE_MAP_UNSYNCHRONIZED); + if (unsync) { + util_queue_fence_wait(&ctx->flush_fence); + util_queue_fence_reset(&ctx->unsync_fence); + } + + if (buf2img) { + if (zink_is_swapchain(img)) { + if (!zink_kopper_acquire(ctx, img, UINT64_MAX)) + return; + } + struct pipe_box box = *src_box; + box.x = dstx; + box.y = dsty; + box.z = dstz; + zink_resource_image_transfer_dst_barrier(ctx, img, dst_level, &box, unsync); + if (!unsync) + zink_screen(ctx->base.screen)->buffer_barrier(ctx, buf, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + } else { + assert(!(map_flags & PIPE_MAP_UNSYNCHRONIZED)); + if (zink_is_swapchain(img)) + needs_present_readback = zink_kopper_acquire_readback(ctx, img, &use_img); + zink_screen(ctx->base.screen)->image_barrier(ctx, use_img, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 0, 0); + zink_resource_buffer_transfer_dst_barrier(ctx, buf, dstx, src_box->width); + } + + VkBufferImageCopy region = {0}; + region.bufferOffset = buf2img ? src_box->x : dstx; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.mipLevel = buf2img ? dst_level : src_level; + enum pipe_texture_target img_target = img->base.b.target; + if (img->need_2D) + img_target = img_target == PIPE_TEXTURE_1D ? PIPE_TEXTURE_2D : PIPE_TEXTURE_2D_ARRAY; + switch (img_target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_1D_ARRAY: + /* these use layer */ + region.imageSubresource.baseArrayLayer = buf2img ? dstz : src_box->z; + region.imageSubresource.layerCount = src_box->depth; + region.imageOffset.z = 0; + region.imageExtent.depth = 1; + break; + case PIPE_TEXTURE_3D: + /* this uses depth */ + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset.z = buf2img ? dstz : src_box->z; + region.imageExtent.depth = src_box->depth; + break; + default: + /* these must only copy one layer */ + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset.z = 0; + region.imageExtent.depth = 1; + } + region.imageOffset.x = buf2img ? dstx : src_box->x; + region.imageOffset.y = buf2img ? dsty : src_box->y; + + region.imageExtent.width = src_box->width; + region.imageExtent.height = src_box->height; + + VkCommandBuffer cmdbuf = unsync ? + ctx->batch.state->unsynchronized_cmdbuf : + /* never promote to unordered if swapchain was acquired */ + needs_present_readback ? + ctx->batch.state->cmdbuf : + buf2img ? zink_get_cmdbuf(ctx, buf, use_img) : zink_get_cmdbuf(ctx, use_img, buf); + zink_batch_reference_resource_rw(batch, use_img, buf2img); + zink_batch_reference_resource_rw(batch, buf, !buf2img); + if (unsync) { + ctx->batch.state->has_unsync = true; + use_img->obj->unsync_access = true; + } + + /* we're using u_transfer_helper_deinterleave, which means we'll be getting PIPE_MAP_* usage + * to indicate whether to copy either the depth or stencil aspects + */ + unsigned aspects = 0; + if (map_flags) { + assert((map_flags & (PIPE_MAP_DEPTH_ONLY | PIPE_MAP_STENCIL_ONLY)) != + (PIPE_MAP_DEPTH_ONLY | PIPE_MAP_STENCIL_ONLY)); + if (map_flags & PIPE_MAP_DEPTH_ONLY) + aspects = VK_IMAGE_ASPECT_DEPTH_BIT; + else if (map_flags & PIPE_MAP_STENCIL_ONLY) + aspects = VK_IMAGE_ASPECT_STENCIL_BIT; + } + if (!aspects) + aspects = img->aspect; + if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) { + VkMemoryBarrier mb; + mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mb.pNext = NULL; + mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + VKCTX(CmdPipelineBarrier)(cmdbuf, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, 1, &mb, 0, NULL, 0, NULL); + } + while (aspects) { + int aspect = 1 << u_bit_scan(&aspects); + region.imageSubresource.aspectMask = aspect; + + /* MSAA transfers should have already been handled by U_TRANSFER_HELPER_MSAA_MAP, since + * there's no way to resolve using this interface: + * + * srcImage must have a sample count equal to VK_SAMPLE_COUNT_1_BIT + * - vkCmdCopyImageToBuffer spec + * + * dstImage must have a sample count equal to VK_SAMPLE_COUNT_1_BIT + * - vkCmdCopyBufferToImage spec + */ + assert(img->base.b.nr_samples <= 1); + bool marker; + if (buf2img) { + marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "copy_buffer2image(%s, %dx%dx%d)", + util_format_short_name(dst->base.b.format), + region.imageExtent.width, + region.imageExtent.height, + MAX2(region.imageSubresource.layerCount, region.imageExtent.depth)); + VKCTX(CmdCopyBufferToImage)(cmdbuf, buf->obj->buffer, use_img->obj->image, use_img->layout, 1, ®ion); + } else { + marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "copy_image2buffer(%s, %dx%dx%d)", + util_format_short_name(src->base.b.format), + region.imageExtent.width, + region.imageExtent.height, + MAX2(region.imageSubresource.layerCount, region.imageExtent.depth)); + VKCTX(CmdCopyImageToBuffer)(cmdbuf, use_img->obj->image, use_img->layout, buf->obj->buffer, 1, ®ion); + } + zink_cmd_debug_marker_end(ctx, cmdbuf, marker); + } + if (unsync) + util_queue_fence_signal(&ctx->unsync_fence); + if (needs_present_readback) { + assert(!unsync); + if (buf2img) { + img->obj->unordered_write = false; + buf->obj->unordered_read = false; + } else { + img->obj->unordered_read = false; + buf->obj->unordered_write = false; + } + zink_kopper_present_readback(ctx, img); + } + + if (ctx->oom_flush && !ctx->batch.in_rp && !ctx->unordered_blitting) + flush_batch(ctx, false); +} + +static void +zink_resource_copy_region(struct pipe_context *pctx, + struct pipe_resource *pdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *psrc, + unsigned src_level, const struct pipe_box *src_box) +{ + struct zink_resource *dst = zink_resource(pdst); + struct zink_resource *src = zink_resource(psrc); + struct zink_context *ctx = zink_context(pctx); + if (dst->base.b.target != PIPE_BUFFER && src->base.b.target != PIPE_BUFFER) { + VkImageCopy region; + /* fill struct holes */ + memset(®ion, 0, sizeof(region)); + if (util_format_get_num_planes(src->base.b.format) == 1 && + util_format_get_num_planes(dst->base.b.format) == 1) { + /* If neither the calling command’s srcImage nor the calling command’s dstImage + * has a multi-planar image format then the aspectMask member of srcSubresource + * and dstSubresource must match + * + * -VkImageCopy spec + */ + assert(src->aspect == dst->aspect); + } else + unreachable("planar formats not yet handled"); + + + region.srcSubresource.aspectMask = src->aspect; + region.srcSubresource.mipLevel = src_level; + enum pipe_texture_target src_target = src->base.b.target; + if (src->need_2D) + src_target = src_target == PIPE_TEXTURE_1D ? PIPE_TEXTURE_2D : PIPE_TEXTURE_2D_ARRAY; + switch (src_target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_1D_ARRAY: + /* these use layer */ + region.srcSubresource.baseArrayLayer = src_box->z; + region.srcSubresource.layerCount = src_box->depth; + region.srcOffset.z = 0; + region.extent.depth = 1; + break; + case PIPE_TEXTURE_3D: + /* this uses depth */ + region.srcSubresource.baseArrayLayer = 0; + region.srcSubresource.layerCount = 1; + region.srcOffset.z = src_box->z; + region.extent.depth = src_box->depth; + break; + default: + /* these must only copy one layer */ + region.srcSubresource.baseArrayLayer = 0; + region.srcSubresource.layerCount = 1; + region.srcOffset.z = 0; + region.extent.depth = 1; + } + + region.srcOffset.x = src_box->x; + region.srcOffset.y = src_box->y; + + region.dstSubresource.aspectMask = dst->aspect; + region.dstSubresource.mipLevel = dst_level; + enum pipe_texture_target dst_target = dst->base.b.target; + if (dst->need_2D) + dst_target = dst_target == PIPE_TEXTURE_1D ? PIPE_TEXTURE_2D : PIPE_TEXTURE_2D_ARRAY; + switch (dst_target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_1D_ARRAY: + /* these use layer */ + region.dstSubresource.baseArrayLayer = dstz; + region.dstSubresource.layerCount = src_box->depth; + region.dstOffset.z = 0; + break; + case PIPE_TEXTURE_3D: + /* this uses depth */ + region.dstSubresource.baseArrayLayer = 0; + region.dstSubresource.layerCount = 1; + region.dstOffset.z = dstz; + break; + default: + /* these must only copy one layer */ + region.dstSubresource.baseArrayLayer = 0; + region.dstSubresource.layerCount = 1; + region.dstOffset.z = 0; + } + + region.dstOffset.x = dstx; + region.dstOffset.y = dsty; + region.extent.width = src_box->width; + region.extent.height = src_box->height; + + /* ignore no-op copies */ + if (src == dst && + !memcmp(®ion.dstOffset, ®ion.srcOffset, sizeof(region.srcOffset)) && + !memcmp(®ion.dstSubresource, ®ion.srcSubresource, sizeof(region.srcSubresource))) + return; + + zink_fb_clears_apply_or_discard(ctx, pdst, (struct u_rect){dstx, dstx + src_box->width, dsty, dsty + src_box->height}, false); + zink_fb_clears_apply_region(ctx, psrc, zink_rect_from_box(src_box)); + + struct zink_batch *batch = &ctx->batch; + zink_resource_setup_transfer_layouts(ctx, src, dst); + VkCommandBuffer cmdbuf = zink_get_cmdbuf(ctx, src, dst); + zink_batch_reference_resource_rw(batch, src, false); + zink_batch_reference_resource_rw(batch, dst, true); + + if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) { + VkMemoryBarrier mb; + mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mb.pNext = NULL; + mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + VKCTX(CmdPipelineBarrier)(cmdbuf, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, 1, &mb, 0, NULL, 0, NULL); + } + bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "copy_image(%s->%s, %dx%dx%d)", + util_format_short_name(psrc->format), + util_format_short_name(pdst->format), + region.extent.width, + region.extent.height, + MAX2(region.srcSubresource.layerCount, region.extent.depth)); + VKCTX(CmdCopyImage)(cmdbuf, src->obj->image, src->layout, + dst->obj->image, dst->layout, + 1, ®ion); + zink_cmd_debug_marker_end(ctx, cmdbuf, marker); + } else if (dst->base.b.target == PIPE_BUFFER && + src->base.b.target == PIPE_BUFFER) { + zink_copy_buffer(ctx, dst, src, dstx, src_box->x, src_box->width); + } else + zink_copy_image_buffer(ctx, dst, src, dst_level, dstx, dsty, dstz, src_level, src_box, 0); + if (ctx->oom_flush && !ctx->batch.in_rp && !ctx->unordered_blitting) + flush_batch(ctx, false); +} + static bool zink_resource_commit(struct pipe_context *pctx, struct pipe_resource *pres, unsigned level, struct pipe_box *box, bool commit) { struct zink_context *ctx = zink_context(pctx); struct zink_resource *res = zink_resource(pres); - struct zink_screen *screen = zink_screen(pctx->screen); /* if any current usage exists, flush the queue */ if (zink_resource_has_unflushed_usage(res)) zink_flush_queue(ctx); - bool ret = zink_bo_commit(screen, res, box->x, box->width, commit); - if (!ret) + VkSemaphore sem = VK_NULL_HANDLE; + bool ret = zink_bo_commit(ctx, res, level, box, commit, &sem); + if (ret) { + if (sem) + zink_batch_add_wait_semaphore(&ctx->batch, sem); + } else { check_device_lost(ctx); + } return ret; } @@ -3420,43 +4872,44 @@ zink_resource_commit(struct pipe_context *pctx, struct pipe_resource *pres, unsi static void rebind_image(struct zink_context *ctx, struct zink_resource *res) { - zink_rebind_framebuffer(ctx, res); - if (!zink_resource_has_binds(res)) - return; - for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) { - if (res->sampler_binds[i]) { - for (unsigned j = 0; j < ctx->di.num_sampler_views[i]; j++) { - struct zink_sampler_view *sv = zink_sampler_view(ctx->sampler_views[i][j]); - if (sv && sv->base.texture == &res->base.b) { - struct pipe_surface *psurf = &sv->image_view->base; - zink_rebind_surface(ctx, &psurf); - sv->image_view = zink_surface(psurf); - zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, j, 1); - update_descriptor_state_sampler(ctx, i, j, res); - } - } - } - if (!res->image_bind_count[i == PIPE_SHADER_COMPUTE]) - continue; - for (unsigned j = 0; j < ctx->di.num_images[i]; j++) { - if (zink_resource(ctx->image_views[i][j].base.resource) == res) { - zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_IMAGE, j, 1); - update_descriptor_state_sampler(ctx, i, j, res); - _mesa_set_add(ctx->need_barriers[i == PIPE_SHADER_COMPUTE], res); - } - } - } + assert(!ctx->blitting); + if (res->fb_binds) + zink_rebind_framebuffer(ctx, res); + if (!zink_resource_has_binds(res)) + return; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (res->sampler_binds[i]) { + for (unsigned j = 0; j < ctx->di.num_sampler_views[i]; j++) { + struct zink_sampler_view *sv = zink_sampler_view(ctx->sampler_views[i][j]); + if (sv && sv->base.texture == &res->base.b) { + struct pipe_surface *psurf = &sv->image_view->base; + zink_rebind_surface(ctx, &psurf); + sv->image_view = zink_surface(psurf); + ctx->invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, j, 1); + update_descriptor_state_sampler(ctx, i, j, res); + } + } + } + if (!res->image_bind_count[i == MESA_SHADER_COMPUTE]) + continue; + for (unsigned j = 0; j < ctx->di.num_images[i]; j++) { + if (zink_resource(ctx->image_views[i][j].base.resource) == res) { + ctx->invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_IMAGE, j, 1); + update_descriptor_state_image(ctx, i, j, res); + _mesa_set_add(ctx->need_barriers[i == MESA_SHADER_COMPUTE], res); + } + } + } } bool zink_resource_rebind(struct zink_context *ctx, struct zink_resource *res) { - /* force counter buffer reset */ - res->so_valid = false; - if (!zink_resource_has_binds(res)) - return true; - if (res->base.b.target == PIPE_BUFFER) + if (res->base.b.target == PIPE_BUFFER) { + /* force counter buffer reset */ + res->so_valid = false; return rebind_buffer(ctx, res, 0, 0) == res->bind_count[0] + res->bind_count[1]; + } rebind_image(ctx, res); return false; } @@ -3465,33 +4918,66 @@ void zink_rebind_all_buffers(struct zink_context *ctx) { struct zink_batch *batch = &ctx->batch; - u_foreach_bit(slot, ctx->gfx_pipeline_state.vertex_buffers_enabled_mask) - set_vertex_buffer_clamped(ctx, slot); ctx->vertex_buffers_dirty = ctx->gfx_pipeline_state.vertex_buffers_enabled_mask > 0; ctx->dirty_so_targets = ctx->num_so_targets > 0; if (ctx->num_so_targets) - zink_resource_buffer_barrier(ctx, zink_resource(ctx->dummy_xfb_buffer), + zink_screen(ctx->base.screen)->buffer_barrier(ctx, zink_resource(ctx->dummy_xfb_buffer), VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT); - for (unsigned shader = PIPE_SHADER_VERTEX; shader < PIPE_SHADER_TYPES; shader++) { + for (unsigned shader = MESA_SHADER_VERTEX; shader < MESA_SHADER_STAGES; shader++) { for (unsigned slot = 0; slot < ctx->di.num_ubos[shader]; slot++) { struct zink_resource *res = rebind_ubo(ctx, shader, slot); if (res) - zink_batch_resource_usage_set(batch, res, false); + zink_batch_resource_usage_set(batch, res, false, true); } for (unsigned slot = 0; slot < ctx->di.num_sampler_views[shader]; slot++) { struct zink_resource *res = rebind_tbo(ctx, shader, slot); if (res) - zink_batch_resource_usage_set(batch, res, false); + zink_batch_resource_usage_set(batch, res, false, true); } for (unsigned slot = 0; slot < ctx->di.num_ssbos[shader]; slot++) { struct zink_resource *res = rebind_ssbo(ctx, shader, slot); if (res) - zink_batch_resource_usage_set(batch, res, (ctx->writable_ssbos[shader] & BITFIELD64_BIT(slot)) != 0); + zink_batch_resource_usage_set(batch, res, (ctx->writable_ssbos[shader] & BITFIELD64_BIT(slot)) != 0, true); } for (unsigned slot = 0; slot < ctx->di.num_images[shader]; slot++) { struct zink_resource *res = rebind_ibo(ctx, shader, slot); if (res) - zink_batch_resource_usage_set(batch, res, (ctx->image_views[shader][slot].base.access & PIPE_IMAGE_ACCESS_WRITE) != 0); + zink_batch_resource_usage_set(batch, res, (ctx->image_views[shader][slot].base.access & PIPE_IMAGE_ACCESS_WRITE) != 0, true); + } + } +} + +void +zink_rebind_all_images(struct zink_context *ctx) +{ + assert(!ctx->blitting); + rebind_fb_state(ctx, NULL, false); + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + for (unsigned j = 0; j < ctx->di.num_sampler_views[i]; j++) { + struct zink_sampler_view *sv = zink_sampler_view(ctx->sampler_views[i][j]); + if (!sv || !sv->image_view || sv->image_view->base.texture->target == PIPE_BUFFER) + continue; + struct zink_resource *res = zink_resource(sv->image_view->base.texture); + if (res->obj != sv->image_view->obj) { + struct pipe_surface *psurf = &sv->image_view->base; + zink_rebind_surface(ctx, &psurf); + sv->image_view = zink_surface(psurf); + ctx->invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, j, 1); + update_descriptor_state_sampler(ctx, i, j, res); + } + } + for (unsigned j = 0; j < ctx->di.num_images[i]; j++) { + struct zink_image_view *image_view = &ctx->image_views[i][j]; + struct zink_resource *res = zink_resource(image_view->base.resource); + if (!res || res->base.b.target == PIPE_BUFFER) + continue; + if (ctx->image_views[i][j].surface->obj != res->obj) { + zink_surface_reference(zink_screen(ctx->base.screen), &image_view->surface, NULL); + image_view->surface = create_image_surface(ctx, &image_view->base, i == MESA_SHADER_COMPUTE); + ctx->invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_IMAGE, j, 1); + update_descriptor_state_image(ctx, i, j, res); + _mesa_set_add(ctx->need_barriers[i == MESA_SHADER_COMPUTE], res); + } } } } @@ -3510,16 +4996,19 @@ zink_context_replace_buffer_storage(struct pipe_context *pctx, struct pipe_resou assert(d->obj); assert(s->obj); util_idalloc_mt_free(&screen->buffer_ids, delete_buffer_id); - zink_resource_object_reference(screen, NULL, s->obj); - if (zink_resource_has_unflushed_usage(d) || - (zink_resource_has_usage(d) && zink_resource_has_binds(d))) - zink_batch_reference_resource_move(&ctx->batch, d); - else - zink_resource_object_reference(screen, &d->obj, NULL); - d->obj = s->obj; + zink_batch_reference_resource(&ctx->batch, d); + /* don't be too creative */ + zink_resource_object_reference(screen, &d->obj, s->obj); + d->valid_buffer_range = s->valid_buffer_range; + zink_resource_copies_reset(d); /* force counter buffer reset */ d->so_valid = false; - if (num_rebinds && rebind_buffer(ctx, d, rebind_mask, num_rebinds) != num_rebinds) + /* FIXME: tc buffer sharedness tracking */ + if (!num_rebinds) { + num_rebinds = d->bind_count[0] + d->bind_count[1]; + rebind_mask = 0; + } + if (num_rebinds && rebind_buffer(ctx, d, rebind_mask, num_rebinds) < num_rebinds) ctx->buffer_rebind_counter = p_atomic_inc_return(&screen->buffer_rebind_counter); } @@ -3529,6 +5018,8 @@ zink_context_is_resource_busy(struct pipe_screen *pscreen, struct pipe_resource struct zink_screen *screen = zink_screen(pscreen); struct zink_resource *res = zink_resource(pres); uint32_t check_usage = 0; + if (usage & PIPE_MAP_UNSYNCHRONIZED && (!res->obj->unsync_access || zink_is_swapchain(res))) + return true; if (usage & PIPE_MAP_READ) check_usage |= ZINK_RESOURCE_ACCESS_WRITE; if (usage & PIPE_MAP_WRITE) @@ -3561,20 +5052,291 @@ zink_emit_string_marker(struct pipe_context *pctx, free(temp); } +VkIndirectCommandsLayoutTokenNV * +zink_dgc_add_token(struct zink_context *ctx, VkIndirectCommandsTokenTypeNV type, void **mem) +{ + size_t size = 0; + struct zink_screen *screen = zink_screen(ctx->base.screen); + VkIndirectCommandsLayoutTokenNV *ret = util_dynarray_grow(&ctx->dgc.tokens, VkIndirectCommandsLayoutTokenNV, 1); + ret->sType = VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_TOKEN_NV; + ret->pNext = NULL; + ret->tokenType = type; + ret->vertexDynamicStride = ctx->gfx_pipeline_state.uses_dynamic_stride; + ret->indirectStateFlags = 0; + ret->indexTypeCount = 0; + switch (type) { + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: + ret->stream = ZINK_DGC_VBO; + size = sizeof(VkBindVertexBufferIndirectCommandNV); + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: + ret->stream = ZINK_DGC_IB; + size = sizeof(VkBindIndexBufferIndirectCommandNV); + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV: + ret->stream = ZINK_DGC_PSO; + size = sizeof(VkBindShaderGroupIndirectCommandNV); + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: + ret->stream = ZINK_DGC_PUSH; + ret->pushconstantPipelineLayout = ctx->dgc.last_prog->base.layout; + ret->pushconstantShaderStageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; + size = sizeof(float) * 6; //size for full tess level upload every time + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV: + ret->stream = ZINK_DGC_DRAW; + size = sizeof(VkDrawIndirectCommand); + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV: + ret->stream = ZINK_DGC_DRAW; + size = sizeof(VkDrawIndexedIndirectCommand); + break; + default: + unreachable("ack"); + } + struct zink_resource *old = NULL; + unsigned stream_count = screen->info.nv_dgc_props.maxIndirectCommandsStreamCount >= ZINK_DGC_MAX ? ZINK_DGC_MAX : 1; + if (stream_count == 1) + ret->stream = 0; + unsigned stream = ret->stream; + bool max_exceeded = !ctx->dgc.max_size[stream]; + ret->offset = ctx->dgc.cur_offsets[stream]; + if (ctx->dgc.buffers[stream]) { + /* detect end of buffer */ + if (ctx->dgc.bind_offsets[stream] + ctx->dgc.cur_offsets[stream] + size > ctx->dgc.buffers[stream]->base.b.width0) { + old = ctx->dgc.buffers[stream]; + ctx->dgc.buffers[stream] = NULL; + max_exceeded = true; + } + } + if (!ctx->dgc.buffers[stream]) { + if (max_exceeded) + ctx->dgc.max_size[stream] += size * 5; + uint8_t *ptr; + unsigned offset; + u_upload_alloc(ctx->dgc.upload[stream], 0, ctx->dgc.max_size[stream], + screen->info.props.limits.minMemoryMapAlignment, &offset, + (struct pipe_resource **)&ctx->dgc.buffers[stream], (void **)&ptr); + size_t cur_size = old ? (ctx->dgc.cur_offsets[stream] - ctx->dgc.bind_offsets[stream]) : 0; + if (old) { + struct pipe_resource *pold = &old->base.b; + /* copy and delete old buffer */ + zink_batch_reference_resource_rw(&ctx->batch, old, true); + memcpy(ptr + offset, ctx->dgc.maps[stream] + ctx->dgc.bind_offsets[stream], cur_size); + pipe_resource_reference(&pold, NULL); + } + ctx->dgc.maps[stream] = ptr; + ctx->dgc.bind_offsets[stream] = offset; + ctx->dgc.cur_offsets[stream] = cur_size; + } + *mem = ctx->dgc.maps[stream] + ctx->dgc.cur_offsets[stream]; + ctx->dgc.cur_offsets[stream] += size; + return ret; +} + +void +zink_flush_dgc(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_batch_state *bs = ctx->batch.state; + if (!ctx->dgc.valid) + return; + + /* tokens should be created as they are used */ + unsigned num_cmds = util_dynarray_num_elements(&ctx->dgc.tokens, VkIndirectCommandsLayoutTokenNV); + assert(num_cmds); + VkIndirectCommandsLayoutTokenNV *cmds = ctx->dgc.tokens.data; + uint32_t strides[ZINK_DGC_MAX] = {0}; + + unsigned stream_count = screen->info.nv_dgc_props.maxIndirectCommandsStreamCount >= ZINK_DGC_MAX ? ZINK_DGC_MAX : 1; + VkIndirectCommandsStreamNV streams[ZINK_DGC_MAX]; + for (unsigned i = 0; i < stream_count; i++) { + if (ctx->dgc.buffers[i]) { + streams[i].buffer = ctx->dgc.buffers[i]->obj->buffer; + streams[i].offset = ctx->dgc.bind_offsets[i]; + } else { + streams[i].buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; + streams[i].offset = 0; + } + } + /* this is a stupid pipeline that will never actually be used as anything but a container */ + VkPipeline pipeline = VK_NULL_HANDLE; + if (screen->info.nv_dgc_props.maxGraphicsShaderGroupCount == 1) { + /* RADV doesn't support shader pipeline binds, so use this hacky path */ + pipeline = ctx->gfx_pipeline_state.pipeline; + } else { + VkPrimitiveTopology vkmode = zink_primitive_topology(ctx->gfx_pipeline_state.gfx_prim_mode); + pipeline = zink_create_gfx_pipeline(screen, ctx->dgc.last_prog, ctx->dgc.last_prog->objs, &ctx->gfx_pipeline_state, ctx->gfx_pipeline_state.element_state->binding_map, vkmode, false, &ctx->dgc.pipelines); + assert(pipeline); + util_dynarray_append(&bs->dgc.pipelines, VkPipeline, pipeline); + VKCTX(CmdBindPipelineShaderGroupNV)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline, 0); + } + unsigned remaining = num_cmds; + for (unsigned i = 0; i < num_cmds; i += screen->info.nv_dgc_props.maxIndirectCommandsTokenCount, remaining -= screen->info.nv_dgc_props.maxIndirectCommandsTokenCount) { + VkIndirectCommandsLayoutCreateInfoNV lci = { + VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NV, + NULL, + 0, + VK_PIPELINE_BIND_POINT_GRAPHICS, + MIN2(remaining, screen->info.nv_dgc_props.maxIndirectCommandsTokenCount), + cmds + i, + stream_count, + strides + }; + VkIndirectCommandsLayoutNV iclayout; + ASSERTED VkResult res = VKSCR(CreateIndirectCommandsLayoutNV)(screen->dev, &lci, NULL, &iclayout); + assert(res == VK_SUCCESS); + util_dynarray_append(&bs->dgc.layouts, VkIndirectCommandsLayoutNV, iclayout); + + /* a lot of hacks to set up a preprocess buffer */ + VkGeneratedCommandsMemoryRequirementsInfoNV info = { + VK_STRUCTURE_TYPE_GENERATED_COMMANDS_MEMORY_REQUIREMENTS_INFO_NV, + NULL, + VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline, + iclayout, + 1 + }; + VkMemoryRequirements2 reqs = { + VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2 + }; + VKSCR(GetGeneratedCommandsMemoryRequirementsNV)(screen->dev, &info, &reqs); + struct pipe_resource templ = {0}; + templ.target = PIPE_BUFFER; + templ.format = PIPE_FORMAT_R8_UNORM; + templ.bind = 0; + templ.usage = PIPE_USAGE_IMMUTABLE; + templ.flags = 0; + templ.width0 = reqs.memoryRequirements.size; + templ.height0 = 1; + templ.depth0 = 1; + templ.array_size = 1; + uint64_t params[] = {reqs.memoryRequirements.size, reqs.memoryRequirements.alignment, reqs.memoryRequirements.memoryTypeBits}; + struct pipe_resource *pres = screen->base.resource_create_with_modifiers(&screen->base, &templ, params, 3); + assert(pres); + zink_batch_reference_resource_rw(&ctx->batch, zink_resource(pres), true); + + VkGeneratedCommandsInfoNV gen = { + VK_STRUCTURE_TYPE_GENERATED_COMMANDS_INFO_NV, + NULL, + VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline, + iclayout, + stream_count, + streams, + 1, + zink_resource(pres)->obj->buffer, + 0, + pres->width0, + VK_NULL_HANDLE, + 0, + VK_NULL_HANDLE, + 0 + }; + VKCTX(CmdExecuteGeneratedCommandsNV)(ctx->batch.state->cmdbuf, VK_FALSE, &gen); + + pipe_resource_reference(&pres, NULL); + } + util_dynarray_clear(&ctx->dgc.pipelines); + util_dynarray_clear(&ctx->dgc.tokens); + ctx->dgc.valid = false; + ctx->pipeline_changed[0] = true; + zink_select_draw_vbo(ctx); +} + +struct pipe_surface * +zink_get_dummy_pipe_surface(struct zink_context *ctx, int samples_index) +{ + unsigned size = calc_max_dummy_fbo_size(ctx); + bool needs_null_init = false; + if (ctx->dummy_surface[samples_index]) { + /* delete old surface if ETOOSMALL */ + struct zink_resource *res = zink_resource(ctx->dummy_surface[samples_index]->texture); + if (res->base.b.width0 > size || res->base.b.height0 > size) { + pipe_surface_release(&ctx->base, &ctx->dummy_surface[samples_index]); + needs_null_init = !samples_index && ctx->di.null_fbfetch_init; + if (!samples_index) + ctx->di.null_fbfetch_init = false; + } + } + if (!ctx->dummy_surface[samples_index]) { + ctx->dummy_surface[samples_index] = zink_surface_create_null(ctx, PIPE_TEXTURE_2D, size, size, BITFIELD_BIT(samples_index)); + assert(ctx->dummy_surface[samples_index]); + /* This is possibly used with imageLoad which according to GL spec must return 0 */ + if (!samples_index) { + union pipe_color_union color = {0}; + struct pipe_box box; + u_box_2d(0, 0, size, size, &box); + ctx->base.clear_texture(&ctx->base, ctx->dummy_surface[samples_index]->texture, 0, &box, &color); + } + } + if (needs_null_init) + init_null_fbfetch(ctx); + return ctx->dummy_surface[samples_index]; +} + +struct zink_surface * +zink_get_dummy_surface(struct zink_context *ctx, int samples_index) +{ + return zink_csurface(zink_get_dummy_pipe_surface(ctx, samples_index)); + +} + +static void +zink_tc_parse_dsa(void *state, struct tc_renderpass_info *info) +{ + struct zink_depth_stencil_alpha_state *cso = state; + info->zsbuf_write_dsa |= (cso->hw_state.depth_write || cso->hw_state.stencil_test); + info->zsbuf_read_dsa |= (cso->hw_state.depth_test || cso->hw_state.stencil_test); + /* TODO: if zsbuf fbfetch is ever supported */ +} + +static void +zink_tc_parse_fs(void *state, struct tc_renderpass_info *info) +{ + struct zink_shader *zs = state; + info->zsbuf_write_fs |= zs->info.outputs_written & (BITFIELD64_BIT(FRAG_RESULT_DEPTH) | BITFIELD64_BIT(FRAG_RESULT_STENCIL)); + /* TODO: if >1 fbfetch attachment is ever supported */ + info->cbuf_fbfetch |= zs->info.fs.uses_fbfetch_output ? BITFIELD_BIT(0) : 0; +} + +void +zink_parse_tc_info(struct zink_context *ctx) +{ + struct tc_renderpass_info *info = &ctx->dynamic_fb.tc_info; + /* reset cso info first */ + info->data16[2] = 0; + if (ctx->gfx_stages[MESA_SHADER_FRAGMENT]) + zink_tc_parse_fs(ctx->gfx_stages[MESA_SHADER_FRAGMENT], info); + if (ctx->dsa_state) + zink_tc_parse_dsa(ctx->dsa_state, info); + if (ctx->zsbuf_unused == zink_is_zsbuf_used(ctx)) + ctx->rp_layout_changed = true; +} + struct pipe_context * zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { struct zink_screen *screen = zink_screen(pscreen); struct zink_context *ctx = rzalloc(NULL, struct zink_context); + bool is_copy_only = (flags & ZINK_CONTEXT_COPY_ONLY) > 0; + bool is_compute_only = (flags & PIPE_CONTEXT_COMPUTE_ONLY) > 0; + bool is_robust = (flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS) > 0; if (!ctx) goto fail; - ctx->have_timelines = screen->info.have_KHR_timeline_semaphore; + ctx->flags = flags; ctx->pipeline_changed[0] = ctx->pipeline_changed[1] = true; ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch = 1; + ctx->gfx_pipeline_state.uses_dynamic_stride = screen->info.have_EXT_extended_dynamic_state || + screen->info.have_EXT_vertex_input_dynamic_state; ctx->compute_pipeline_state.dirty = true; ctx->fb_changed = ctx->rp_changed = true; - ctx->gfx_pipeline_state.gfx_prim_mode = PIPE_PRIM_MAX; + ctx->sample_mask_changed = true; + ctx->gfx_pipeline_state.gfx_prim_mode = MESA_PRIM_COUNT; + ctx->gfx_pipeline_state.shader_rast_prim = MESA_PRIM_COUNT; + ctx->gfx_pipeline_state.rast_prim = MESA_PRIM_COUNT; zink_init_draw_functions(ctx, screen); zink_init_grid_functions(ctx); @@ -3582,22 +5344,15 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) ctx->base.screen = pscreen; ctx->base.priv = priv; - if (screen->info.have_KHR_imageless_framebuffer) { - ctx->get_framebuffer = zink_get_framebuffer_imageless; - ctx->init_framebuffer = zink_init_framebuffer_imageless; - } else { - ctx->get_framebuffer = zink_get_framebuffer; - ctx->init_framebuffer = zink_init_framebuffer; - } - ctx->base.destroy = zink_context_destroy; + ctx->base.set_debug_callback = zink_set_debug_callback; ctx->base.get_device_reset_status = zink_get_device_reset_status; ctx->base.set_device_reset_callback = zink_set_device_reset_callback; zink_context_state_init(&ctx->base); ctx->base.create_sampler_state = zink_create_sampler_state; - ctx->base.bind_sampler_states = zink_bind_sampler_states; + ctx->base.bind_sampler_states = screen->info.have_EXT_non_seamless_cube_map ? zink_bind_sampler_states : zink_bind_sampler_states_nonseamless; ctx->base.delete_sampler_state = zink_delete_sampler_state; ctx->base.create_sampler_view = zink_create_sampler_view; @@ -3623,15 +5378,20 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) ctx->base.set_tess_state = zink_set_tess_state; ctx->base.set_patch_vertices = zink_set_patch_vertices; + ctx->base.set_min_samples = zink_set_min_samples; + ctx->gfx_pipeline_state.min_samples = 0; ctx->base.set_sample_mask = zink_set_sample_mask; + ctx->gfx_pipeline_state.sample_mask = UINT32_MAX; ctx->base.clear = zink_clear; - ctx->base.clear_texture = zink_clear_texture; + ctx->base.clear_texture = screen->info.have_KHR_dynamic_rendering ? zink_clear_texture_dynamic : zink_clear_texture; ctx->base.clear_buffer = zink_clear_buffer; ctx->base.clear_render_target = zink_clear_render_target; ctx->base.clear_depth_stencil = zink_clear_depth_stencil; + ctx->base.create_fence_fd = zink_create_fence_fd; ctx->base.fence_server_sync = zink_fence_server_sync; + ctx->base.fence_server_signal = zink_fence_server_signal; ctx->base.flush = zink_flush; ctx->base.memory_barrier = zink_memory_barrier; ctx->base.texture_barrier = zink_texture_barrier; @@ -3645,6 +5405,8 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) ctx->base.set_stream_output_targets = zink_set_stream_output_targets; ctx->base.flush_resource = zink_flush_resource; + if (screen->info.have_KHR_buffer_device_address) + ctx->base.set_global_binding = zink_set_global_binding; ctx->base.emit_string_marker = zink_emit_string_marker; @@ -3652,6 +5414,10 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) zink_context_resource_init(&ctx->base); zink_context_query_init(&ctx->base); + util_queue_fence_init(&ctx->flush_fence); + util_queue_fence_init(&ctx->unsync_fence); + + list_inithead(&ctx->query_pools); _mesa_set_init(&ctx->update_barriers[0][0], ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); _mesa_set_init(&ctx->update_barriers[1][0], ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); _mesa_set_init(&ctx->update_barriers[0][1], ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -3659,12 +5425,6 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) ctx->need_barriers[0] = &ctx->update_barriers[0][0]; ctx->need_barriers[1] = &ctx->update_barriers[1][0]; - util_dynarray_init(&ctx->free_batch_states, ctx); - _mesa_hash_table_init(&ctx->batch_states, ctx, NULL, _mesa_key_pointer_equal); - - ctx->gfx_pipeline_state.have_EXT_extended_dynamic_state = screen->info.have_EXT_extended_dynamic_state; - ctx->gfx_pipeline_state.have_EXT_extended_dynamic_state2 = screen->info.have_EXT_extended_dynamic_state2; - slab_create_child(&ctx->transfer_pool, &screen->transfer_pool); slab_create_child(&ctx->transfer_pool_unsync, &screen->transfer_pool); @@ -3673,89 +5433,228 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) for (int i = 0; i < ARRAY_SIZE(ctx->fb_clears); i++) util_dynarray_init(&ctx->fb_clears[i].clears, ctx); - ctx->blitter = util_blitter_create(&ctx->base); - if (!ctx->blitter) - goto fail; + if (zink_debug & ZINK_DEBUG_DGC) { + util_dynarray_init(&ctx->dgc.pipelines, ctx); + util_dynarray_init(&ctx->dgc.tokens, ctx); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->dgc.upload); i++) + ctx->dgc.upload[i] = u_upload_create_default(&ctx->base); + } + + if (!is_copy_only) { + ctx->blitter = util_blitter_create(&ctx->base); + if (!ctx->blitter) + goto fail; + } + zink_set_last_vertex_key(ctx)->last_vertex_stage = true; ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base.last_vertex_stage = true; - ctx->last_vertex_stage_dirty = true; - ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_VERTEX].size = sizeof(struct zink_vs_key_base); - ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_TESS_EVAL].size = sizeof(struct zink_vs_key_base); - ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_GEOMETRY].size = sizeof(struct zink_vs_key_base); - ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_FRAGMENT].size = sizeof(struct zink_fs_key); - _mesa_hash_table_init(&ctx->compute_program_cache, ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); + zink_set_tcs_key_patches(ctx, 1); + if (!screen->optimal_keys) { + ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].size = sizeof(struct zink_vs_key_base); + ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_TESS_EVAL].size = sizeof(struct zink_vs_key_base); + ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_TESS_CTRL].size = sizeof(struct zink_tcs_key); + ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_GEOMETRY].size = sizeof(struct zink_gs_key); + ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].size = sizeof(struct zink_fs_key); + + /* this condition must be updated if new fields are added to zink_cs_key */ + if (screen->driver_workarounds.lower_robustImageAccess2) + ctx->compute_pipeline_state.key.size = sizeof(struct zink_cs_key); + + if (is_robust && screen->driver_workarounds.lower_robustImageAccess2) { + ctx->compute_pipeline_state.key.key.cs.robust_access = true; + for (gl_shader_stage pstage = MESA_SHADER_VERTEX; pstage < MESA_SHADER_FRAGMENT; pstage++) + ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base.robust_access = true; + ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.robust_access = true; + } + } _mesa_hash_table_init(&ctx->framebuffer_cache, ctx, hash_framebuffer_imageless, equals_framebuffer_imageless); - _mesa_set_init(&ctx->render_pass_state_cache, ctx, hash_rp_state, equals_rp_state); - ctx->render_pass_cache = _mesa_hash_table_create(NULL, - hash_render_pass_state, - equals_render_pass_state); - if (!ctx->render_pass_cache) + if (!zink_init_render_pass(ctx)) goto fail; - - const uint8_t data[] = {0}; - ctx->dummy_vertex_buffer = pipe_buffer_create(&screen->base, - PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_SHADER_IMAGE, PIPE_USAGE_IMMUTABLE, sizeof(data)); - if (!ctx->dummy_vertex_buffer) - goto fail; - ctx->dummy_xfb_buffer = pipe_buffer_create(&screen->base, - PIPE_BIND_STREAM_OUTPUT, PIPE_USAGE_DEFAULT, sizeof(data)); - if (!ctx->dummy_xfb_buffer) - goto fail; - for (unsigned i = 0; i < ARRAY_SIZE(ctx->dummy_surface); i++) { - if (!(screen->info.props.limits.framebufferDepthSampleCounts & BITFIELD_BIT(i))) - continue; - ctx->dummy_surface[i] = zink_surface_create_null(ctx, PIPE_TEXTURE_2D, 1024, 1024, BITFIELD_BIT(i)); - if (!ctx->dummy_surface[i]) + for (unsigned i = 0; i < ARRAY_SIZE(ctx->rendering_state_cache); i++) + _mesa_set_init(&ctx->rendering_state_cache[i], ctx, hash_rendering_state, equals_rendering_state); + ctx->dynamic_fb.info.pColorAttachments = ctx->dynamic_fb.attachments; + ctx->dynamic_fb.info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO; + for (unsigned i = 0; i < ARRAY_SIZE(ctx->dynamic_fb.attachments); i++) { + VkRenderingAttachmentInfo *att = &ctx->dynamic_fb.attachments[i]; + att->sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO; + att->imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + att->storeOp = VK_ATTACHMENT_STORE_OP_STORE; + } + ctx->gfx_pipeline_state.rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO; + ctx->gfx_pipeline_state.rendering_info.pColorAttachmentFormats = ctx->gfx_pipeline_state.rendering_formats; + ctx->gfx_pipeline_state.feedback_loop = screen->driver_workarounds.always_feedback_loop; + ctx->gfx_pipeline_state.feedback_loop_zs = screen->driver_workarounds.always_feedback_loop_zs; + + const uint32_t data[] = {0}; + if (!is_copy_only) { + ctx->dummy_vertex_buffer = pipe_buffer_create(&screen->base, + PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_SHADER_IMAGE, PIPE_USAGE_IMMUTABLE, sizeof(data)); + if (!ctx->dummy_vertex_buffer) + goto fail; + ctx->dummy_xfb_buffer = pipe_buffer_create(&screen->base, + PIPE_BIND_STREAM_OUTPUT, PIPE_USAGE_IMMUTABLE, sizeof(data)); + if (!ctx->dummy_xfb_buffer) goto fail; } - ctx->dummy_bufferview = get_buffer_view(ctx, zink_resource(ctx->dummy_vertex_buffer), PIPE_FORMAT_R8_UNORM, 0, sizeof(data)); - if (!ctx->dummy_bufferview) - goto fail; - - if (!zink_descriptor_layouts_init(ctx)) - goto fail; + if (!is_copy_only) { + VkBufferViewCreateInfo bvci = create_bvci(ctx, zink_resource(ctx->dummy_vertex_buffer), PIPE_FORMAT_R8G8B8A8_UNORM, 0, sizeof(data)); + ctx->dummy_bufferview = get_buffer_view(ctx, zink_resource(ctx->dummy_vertex_buffer), &bvci); + if (!ctx->dummy_bufferview) + goto fail; - if (!screen->descriptors_init(ctx)) { - zink_screen_init_descriptor_funcs(screen, true); - if (!screen->descriptors_init(ctx)) + if (!zink_descriptors_init(ctx)) goto fail; } - ctx->have_timelines = screen->info.have_KHR_timeline_semaphore; - simple_mtx_init(&ctx->batch_mtx, mtx_plain); + if (!is_copy_only && !is_compute_only) { + ctx->base.create_texture_handle = zink_create_texture_handle; + ctx->base.delete_texture_handle = zink_delete_texture_handle; + ctx->base.make_texture_handle_resident = zink_make_texture_handle_resident; + ctx->base.create_image_handle = zink_create_image_handle; + ctx->base.delete_image_handle = zink_delete_image_handle; + ctx->base.make_image_handle_resident = zink_make_image_handle_resident; + for (unsigned i = 0; i < 2; i++) { + _mesa_hash_table_init(&ctx->di.bindless[i].img_handles, ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); + _mesa_hash_table_init(&ctx->di.bindless[i].tex_handles, ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); + + /* allocate 1024 slots and reserve slot 0 */ + util_idalloc_init(&ctx->di.bindless[i].tex_slots, ZINK_MAX_BINDLESS_HANDLES); + util_idalloc_alloc(&ctx->di.bindless[i].tex_slots); + util_idalloc_init(&ctx->di.bindless[i].img_slots, ZINK_MAX_BINDLESS_HANDLES); + util_idalloc_alloc(&ctx->di.bindless[i].img_slots); + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + ctx->di.bindless[i].db.buffer_infos = malloc(sizeof(VkDescriptorAddressInfoEXT) * ZINK_MAX_BINDLESS_HANDLES); + if (!ctx->di.bindless[i].db.buffer_infos) { + mesa_loge("ZINK: failed to allocate ctx->di.bindless[%d].db.buffer_infos!",i); + goto fail; + } + for (unsigned j = 0; j < ZINK_MAX_BINDLESS_HANDLES; j++) { + ctx->di.bindless[i].db.buffer_infos[j].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; + ctx->di.bindless[i].db.buffer_infos[j].pNext = NULL; + } + } else { + ctx->di.bindless[i].t.buffer_infos = malloc(sizeof(VkBufferView) * ZINK_MAX_BINDLESS_HANDLES); + if (!ctx->di.bindless[i].t.buffer_infos) { + mesa_loge("ZINK: failed to allocate ctx->di.bindless[%d].t.buffer_infos!",i); + goto fail; + } + } + ctx->di.bindless[i].img_infos = malloc(sizeof(VkDescriptorImageInfo) * ZINK_MAX_BINDLESS_HANDLES); + if (!ctx->di.bindless[i].img_infos) { + mesa_loge("ZINK: failed to allocate ctx->di.bindless[%d].img_infos!",i); + goto fail; + } + util_dynarray_init(&ctx->di.bindless[i].updates, NULL); + util_dynarray_init(&ctx->di.bindless[i].resident, NULL); + } + } + + simple_mtx_init(&ctx->batch.ref_lock, mtx_plain); zink_start_batch(ctx, &ctx->batch); if (!ctx->batch.state) goto fail; - pipe_buffer_write(&ctx->base, ctx->dummy_vertex_buffer, 0, sizeof(data), data); - pipe_buffer_write(&ctx->base, ctx->dummy_xfb_buffer, 0, sizeof(data), data); - - for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) { - /* need to update these based on screen config for null descriptors */ - for (unsigned j = 0; j < 32; j++) { - update_descriptor_state_ubo(ctx, i, j, NULL); - update_descriptor_state_sampler(ctx, i, j, NULL); - update_descriptor_state_ssbo(ctx, i, j, NULL); - update_descriptor_state_image(ctx, i, j, NULL); + if (screen->compact_descriptors) + ctx->invalidate_descriptor_state = zink_context_invalidate_descriptor_state_compact; + else + ctx->invalidate_descriptor_state = zink_context_invalidate_descriptor_state; + if (!is_copy_only && !is_compute_only) { + pipe_buffer_write_nooverlap(&ctx->base, ctx->dummy_vertex_buffer, 0, sizeof(data), data); + pipe_buffer_write_nooverlap(&ctx->base, ctx->dummy_xfb_buffer, 0, sizeof(data), data); + if (screen->info.have_EXT_color_write_enable) + reapply_color_write(ctx); + + /* set on startup just to avoid validation errors if a draw comes through without + * a tess shader later + */ + if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) { + VKCTX(CmdSetPatchControlPointsEXT)(ctx->batch.state->cmdbuf, 1); + VKCTX(CmdSetPatchControlPointsEXT)(ctx->batch.state->reordered_cmdbuf, 1); + } + } + if (!is_copy_only) { + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + /* need to update these based on screen config for null descriptors */ + for (unsigned j = 0; j < ARRAY_SIZE(ctx->di.t.ubos[i]); j++) { + update_descriptor_state_ubo(ctx, i, j, NULL); + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + ctx->di.db.ubos[i][j].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; + } + for (unsigned j = 0; j < ARRAY_SIZE(ctx->di.textures[i]); j++) { + update_descriptor_state_sampler(ctx, i, j, NULL); + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + ctx->di.db.tbos[i][j].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; + } + for (unsigned j = 0; j < ARRAY_SIZE(ctx->di.t.ssbos[i]); j++) { + update_descriptor_state_ssbo(ctx, i, j, NULL); + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + ctx->di.db.ssbos[i][j].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; + } + for (unsigned j = 0; j < ARRAY_SIZE(ctx->di.images[i]); j++) { + update_descriptor_state_image(ctx, i, j, NULL); + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + ctx->di.db.texel_images[i][j].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; + } } + + p_atomic_inc(&screen->base.num_contexts); } - if (!screen->info.rb2_feats.nullDescriptor) - ctx->di.fbfetch.imageView = zink_csurface(ctx->dummy_surface[0])->image_view; - p_atomic_inc(&screen->base.num_contexts); zink_select_draw_vbo(ctx); zink_select_launch_grid(ctx); + if (!is_copy_only && zink_debug & ZINK_DEBUG_SHADERDB) { + if (!screen->info.have_EXT_vertex_input_dynamic_state) { + struct pipe_vertex_element velems[32] = {0}; + for (unsigned i = 0; i < ARRAY_SIZE(velems); i++) + velems[i].src_format = PIPE_FORMAT_R8G8B8_UNORM; + void *state = ctx->base.create_vertex_elements_state(&ctx->base, ARRAY_SIZE(velems), velems); + ctx->base.bind_vertex_elements_state(&ctx->base, state); + } + ctx->gfx_pipeline_state.sample_mask = BITFIELD_MASK(32); + struct pipe_framebuffer_state fb = {0}; + fb.cbufs[0] = zink_get_dummy_pipe_surface(ctx, 0); + fb.nr_cbufs = 1; + fb.width = fb.height = 256; + ctx->base.set_framebuffer_state(&ctx->base, &fb); + ctx->disable_fs = true; + struct pipe_depth_stencil_alpha_state dsa = {0}; + void *state = ctx->base.create_depth_stencil_alpha_state(&ctx->base, &dsa); + ctx->base.bind_depth_stencil_alpha_state(&ctx->base, state); + + struct pipe_blend_state blend = { + .rt[0].colormask = 0xF + }; + + void *blend_state = ctx->base.create_blend_state(&ctx->base, &blend); + ctx->base.bind_blend_state(&ctx->base, blend_state); + + zink_batch_rp(ctx); + } + + if (!is_compute_only && zink_debug & ZINK_DEBUG_NOREORDER) + ctx->no_reorder = true; + if (!(flags & PIPE_CONTEXT_PREFER_THREADED) || flags & PIPE_CONTEXT_COMPUTE_ONLY) { return &ctx->base; } struct threaded_context *tc = (struct threaded_context*)threaded_context_create(&ctx->base, &screen->transfer_pool, zink_context_replace_buffer_storage, - zink_create_tc_fence_for_tc, - zink_context_is_resource_busy, true, &ctx->tc); + &(struct threaded_context_options){ + .create_fence = zink_create_tc_fence_for_tc, + .is_resource_busy = zink_context_is_resource_busy, + .driver_calls_flush_notify = !screen->driver_workarounds.track_renderpasses, + .unsynchronized_get_device_reset_status = true, + .unsynchronized_texture_subdata = true, + .parse_renderpass_info = screen->driver_workarounds.track_renderpasses, + .dsa_parse = zink_tc_parse_dsa, + .fs_parse = zink_tc_parse_fs, + }, + &ctx->tc); if (tc && (struct zink_context*)tc != ctx) { + ctx->track_renderpasses = screen->driver_workarounds.track_renderpasses; threaded_context_init_bytes_mapped_limit(tc, 4); ctx->base.set_context_param = zink_set_context_param; } @@ -3767,3 +5666,143 @@ fail: zink_context_destroy(&ctx->base); return NULL; } + +struct zink_context * +zink_tc_context_unwrap(struct pipe_context *pctx, bool threaded) +{ + /* need to get the actual zink_context, not the threaded context */ + if (threaded) + pctx = threaded_context_unwrap_sync(pctx); + pctx = trace_get_possibly_threaded_context(pctx); + return zink_context(pctx); +} + + +static bool +add_implicit_feedback_loop(struct zink_context *ctx, struct zink_resource *res) +{ + /* can only feedback loop with fb+sampler bind; image bind must be GENERAL */ + if (!res->fb_bind_count || !res->sampler_bind_count[0] || res->image_bind_count[0]) + return false; + if (!(res->aspect & VK_IMAGE_ASPECT_COLOR_BIT) && !zink_is_zsbuf_write(ctx)) + /* if zsbuf isn't used then it effectively has no fb binds */ + /* if zsbuf isn't written to then it'll be fine with read-only access */ + return false; + bool is_feedback = false; + /* avoid false positives when a texture is bound but not used */ + u_foreach_bit(vkstage, res->gfx_barrier) { + VkPipelineStageFlags vkstagebit = BITFIELD_BIT(vkstage); + if (vkstagebit < VK_PIPELINE_STAGE_VERTEX_SHADER_BIT || vkstagebit > VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) + continue; + /* in-range VkPipelineStageFlagBits can be converted to VkShaderStageFlags with a bitshift */ + gl_shader_stage stage = vk_to_mesa_shader_stage((VkShaderStageFlagBits)(vkstagebit >> 3)); + /* check shader texture usage against resource's sampler binds */ + if ((ctx->gfx_stages[stage] && (res->sampler_binds[stage] & ctx->gfx_stages[stage]->info.textures_used[0]))) + is_feedback = true; + } + if (!is_feedback) + return false; + if (ctx->feedback_loops & res->fb_binds) + /* already added */ + return true; + /* new feedback loop detected */ + if (res->aspect == VK_IMAGE_ASPECT_COLOR_BIT) { + if (!ctx->gfx_pipeline_state.feedback_loop) + ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.feedback_loop = true; + } else { + if (!ctx->gfx_pipeline_state.feedback_loop_zs) + ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.feedback_loop_zs = true; + } + ctx->rp_layout_changed = true; + ctx->feedback_loops |= res->fb_binds; + u_foreach_bit(idx, res->fb_binds) { + if (zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_layout) + ctx->dynamic_fb.attachments[idx].imageLayout = VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + else + ctx->dynamic_fb.attachments[idx].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + update_feedback_loop_dynamic_state(ctx); + return true; +} + +void +zink_update_barriers(struct zink_context *ctx, bool is_compute, + struct pipe_resource *index, struct pipe_resource *indirect, struct pipe_resource *indirect_draw_count) +{ + assert(!ctx->blitting); + if (!ctx->need_barriers[is_compute]->entries) + return; + struct set *need_barriers = ctx->need_barriers[is_compute]; + ctx->barrier_set_idx[is_compute] = !ctx->barrier_set_idx[is_compute]; + ctx->need_barriers[is_compute] = &ctx->update_barriers[is_compute][ctx->barrier_set_idx[is_compute]]; + ASSERTED bool check_rp = ctx->batch.in_rp && ctx->dynamic_fb.tc_info.zsbuf_invalidate; + set_foreach(need_barriers, he) { + struct zink_resource *res = (struct zink_resource *)he->key; + if (res->bind_count[is_compute]) { + VkPipelineStageFlagBits pipeline = is_compute ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT : res->gfx_barrier; + if (res->base.b.target == PIPE_BUFFER) + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, res->barrier_access[is_compute], pipeline); + else { + bool is_feedback = is_compute ? false : add_implicit_feedback_loop(ctx, res); + VkImageLayout layout = zink_descriptor_util_image_layout_eval(ctx, res, is_compute); + /* GENERAL is only used for feedback loops and storage image binds */ + if (is_feedback || layout != VK_IMAGE_LAYOUT_GENERAL || res->image_bind_count[is_compute]) + zink_screen(ctx->base.screen)->image_barrier(ctx, res, layout, res->barrier_access[is_compute], pipeline); + assert(!check_rp || check_rp == ctx->batch.in_rp); + if (is_feedback) + update_res_sampler_layouts(ctx, res); + } + if (zink_resource_access_is_write(res->barrier_access[is_compute]) || + // TODO: figure out a way to link up layouts between unordered and main cmdbuf + res->base.b.target != PIPE_BUFFER) + res->obj->unordered_write = false; + res->obj->unordered_read = false; + /* always barrier on draw if this resource has either multiple image write binds or + * image write binds and image read binds + */ + if (res->write_bind_count[is_compute] && res->bind_count[is_compute] > 1) + _mesa_set_add_pre_hashed(ctx->need_barriers[is_compute], he->hash, res); + } + _mesa_set_remove(need_barriers, he); + if (!need_barriers->entries) + break; + } +} + +/** + * Emits a debug marker in the cmd stream to be captured by perfetto during + * execution on the GPU. + */ +bool +zink_cmd_debug_marker_begin(struct zink_context *ctx, VkCommandBuffer cmdbuf, const char *fmt, ...) +{ + if (!zink_tracing) + return false; + + char *name; + va_list va; + va_start(va, fmt); + int ret = vasprintf(&name, fmt, va); + va_end(va); + + if (ret == -1) + return false; + + VkDebugUtilsLabelEXT info = { 0 }; + info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; + info.pLabelName = name; + + VKCTX(CmdBeginDebugUtilsLabelEXT)(cmdbuf ? cmdbuf : ctx->batch.state->cmdbuf, &info); + + free(name); + return true; +} + +void +zink_cmd_debug_marker_end(struct zink_context *ctx, VkCommandBuffer cmdbuf, bool emitted) +{ + if (emitted) + VKCTX(CmdEndDebugUtilsLabelEXT)(cmdbuf); +} diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h index abfe0199c15..79b6c087bff 100644 --- a/src/gallium/drivers/zink/zink_context.h +++ b/src/gallium/drivers/zink/zink_context.h @@ -24,29 +24,17 @@ #ifndef ZINK_CONTEXT_H #define ZINK_CONTEXT_H -#define ZINK_FBFETCH_BINDING 6 //COMPUTE+1 -#define ZINK_SHADER_COUNT (PIPE_SHADER_TYPES - 1) - -#define ZINK_DEFAULT_MAX_DESCS 5000 -#define ZINK_DEFAULT_DESC_CLAMP (ZINK_DEFAULT_MAX_DESCS * 0.9) - -#include "zink_clear.h" -#include "zink_pipeline.h" -#include "zink_batch.h" -#include "zink_compiler.h" -#include "zink_descriptors.h" -#include "zink_surface.h" - -#include "pipe/p_context.h" -#include "pipe/p_state.h" #include "util/u_rect.h" -#include "util/u_threaded_context.h" +#include "zink_types.h" +#include "vk_enum_to_str.h" -#include "util/slab.h" -#include "util/list.h" -#include "util/u_dynarray.h" +#define GFX_SHADER_BITS (VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \ + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \ + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \ + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \ + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) -#include <vulkan/vulkan.h> +#define pipe_buffer_write "use tc_buffer_write to avoid breaking threaded context" #ifdef __cplusplus extern "C" { @@ -62,285 +50,17 @@ struct zink_rasterizer_state; struct zink_resource; struct zink_vertex_elements_state; -enum zink_blit_flags { - ZINK_BLIT_NORMAL = 1 << 0, - ZINK_BLIT_SAVE_FS = 1 << 1, - ZINK_BLIT_SAVE_FB = 1 << 2, - ZINK_BLIT_SAVE_TEXTURES = 1 << 3, - ZINK_BLIT_NO_COND_RENDER = 1 << 4, -}; - -struct zink_sampler_state { - VkSampler sampler; - uint32_t hash; - struct zink_descriptor_refs desc_set_refs; - struct zink_batch_usage *batch_uses; - bool custom_border_color; -}; - -struct zink_buffer_view { - struct pipe_reference reference; - struct pipe_resource *pres; - VkBufferViewCreateInfo bvci; - VkBufferView buffer_view; - uint32_t hash; - struct zink_batch_usage *batch_uses; - struct zink_descriptor_refs desc_set_refs; -}; - -struct zink_sampler_view { - struct pipe_sampler_view base; - union { - struct zink_surface *image_view; - struct zink_buffer_view *buffer_view; - }; -}; - -struct zink_image_view { - struct pipe_image_view base; - union { - struct zink_surface *surface; - struct zink_buffer_view *buffer_view; - }; -}; - -static inline struct zink_sampler_view * -zink_sampler_view(struct pipe_sampler_view *pview) -{ - return (struct zink_sampler_view *)pview; -} +#define perf_debug(ctx, ...) do { \ + util_debug_message(&ctx->dbg, PERF_INFO, __VA_ARGS__); \ +} while(0) -struct zink_so_target { - struct pipe_stream_output_target base; - struct pipe_resource *counter_buffer; - VkDeviceSize counter_buffer_offset; - uint32_t stride; - bool counter_buffer_valid; -}; -static inline struct zink_so_target * -zink_so_target(struct pipe_stream_output_target *so_target) +static inline struct zink_resource * +zink_descriptor_surface_resource(struct zink_descriptor_surface *ds) { - return (struct zink_so_target *)so_target; -} - -struct zink_viewport_state { - struct pipe_viewport_state viewport_states[PIPE_MAX_VIEWPORTS]; - struct pipe_scissor_state scissor_states[PIPE_MAX_VIEWPORTS]; - uint8_t num_viewports; -}; - - -struct zink_descriptor_surface { - union { - struct zink_surface *surface; - struct zink_buffer_view *bufferview; - }; - bool is_buffer; -}; - -typedef void (*pipe_draw_vbo_func)(struct pipe_context *pipe, - const struct pipe_draw_info *info, - unsigned drawid_offset, - const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count_bias *draws, - unsigned num_draws); - -typedef void (*pipe_launch_grid_func)(struct pipe_context *pipe, const struct pipe_grid_info *info); - -typedef enum { - ZINK_NO_MULTIDRAW, - ZINK_MULTIDRAW, -} zink_multidraw; - -typedef enum { - ZINK_NO_DYNAMIC_STATE, - ZINK_DYNAMIC_STATE, -} zink_dynamic_state; - -typedef enum { - ZINK_NO_DYNAMIC_STATE2, - ZINK_DYNAMIC_STATE2, -} zink_dynamic_state2; - -typedef enum { - ZINK_NO_DYNAMIC_VERTEX_INPUT, - ZINK_DYNAMIC_VERTEX_INPUT, -} zink_dynamic_vertex_input; - -struct zink_context { - struct pipe_context base; - struct threaded_context *tc; - struct slab_child_pool transfer_pool; - struct slab_child_pool transfer_pool_unsync; - struct blitter_context *blitter; - - pipe_draw_vbo_func draw_vbo[2]; //batch changed - pipe_launch_grid_func launch_grid[2]; //batch changed - - struct pipe_device_reset_callback reset; - - uint32_t curr_batch; //the current batch id - - simple_mtx_t batch_mtx; - struct zink_fence *deferred_fence; - struct zink_fence *last_fence; //the last command buffer submitted - struct hash_table batch_states; //submitted batch states - struct util_dynarray free_batch_states; //unused batch states - bool oom_flush; - bool oom_stall; - struct zink_batch batch; - - unsigned shader_has_inlinable_uniforms_mask; - unsigned inlinable_uniforms_valid_mask; - uint32_t compute_inlinable_uniforms[MAX_INLINABLE_UNIFORMS]; - - struct pipe_constant_buffer ubos[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; - struct pipe_shader_buffer ssbos[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS]; - uint32_t writable_ssbos[PIPE_SHADER_TYPES]; - struct zink_image_view image_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES]; - - struct pipe_framebuffer_state fb_state; - struct zink_framebuffer *(*get_framebuffer)(struct zink_context*); - void (*init_framebuffer)(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp); - struct hash_table framebuffer_cache; - - struct zink_vertex_elements_state *element_state; - struct zink_rasterizer_state *rast_state; - struct zink_depth_stencil_alpha_state *dsa_state; - - struct hash_table desc_set_layouts[ZINK_DESCRIPTOR_TYPES]; - bool pipeline_changed[2]; //gfx, compute - - struct zink_shader *gfx_stages[ZINK_SHADER_COUNT]; - struct zink_shader *last_vertex_stage; - bool shader_reads_drawid; - bool shader_reads_basevertex; - struct zink_gfx_pipeline_state gfx_pipeline_state; - /* there are 5 gfx stages, but VS and FS are assumed to be always present, - * thus only 3 stages need to be considered, giving 2^3 = 8 program caches. - */ - struct hash_table program_cache[8]; - uint32_t gfx_hash; - struct zink_gfx_program *curr_program; - - struct zink_descriptor_data *dd; - - struct zink_shader *compute_stage; - struct zink_compute_pipeline_state compute_pipeline_state; - struct hash_table compute_program_cache; - struct zink_compute_program *curr_compute; - - unsigned shader_stages : ZINK_SHADER_COUNT; /* mask of bound gfx shader stages */ - unsigned dirty_shader_stages : 6; /* mask of changed shader stages */ - bool last_vertex_stage_dirty; - - struct set render_pass_state_cache; - struct hash_table *render_pass_cache; - bool new_swapchain; - bool fb_changed; - bool rp_changed; - - struct zink_framebuffer *framebuffer; - struct zink_framebuffer_clear fb_clears[PIPE_MAX_COLOR_BUFS + 1]; - uint16_t clears_enabled; - uint16_t rp_clears_enabled; - uint16_t fbfetch_outputs; - - VkBuffer vbufs[PIPE_MAX_ATTRIBS]; - unsigned vbuf_offsets[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; - bool vertex_buffers_dirty; - - void *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; - struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; - - struct zink_viewport_state vp_state; - bool vp_state_changed; - bool scissor_changed; - - float blend_constants[4]; - - bool sample_locations_changed; - VkSampleLocationEXT vk_sample_locations[PIPE_MAX_SAMPLE_LOCATION_GRID_SIZE * PIPE_MAX_SAMPLE_LOCATION_GRID_SIZE]; - uint8_t sample_locations[2 * 4 * 8 * 16]; - - struct pipe_stencil_ref stencil_ref; - - union { - struct { - float default_inner_level[2]; - float default_outer_level[4]; - }; - float tess_levels[6]; - }; - - struct list_head suspended_queries; - struct list_head primitives_generated_queries; - bool queries_disabled, render_condition_active; - struct { - struct zink_query *query; - bool inverted; - } render_condition; - - struct pipe_resource *dummy_vertex_buffer; - struct pipe_resource *dummy_xfb_buffer; - struct pipe_surface *dummy_surface[7]; - struct zink_buffer_view *dummy_bufferview; - - unsigned buffer_rebind_counter; - - struct { - /* descriptor info */ - VkDescriptorBufferInfo ubos[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; - uint32_t push_valid; - uint8_t num_ubos[PIPE_SHADER_TYPES]; - - VkDescriptorBufferInfo ssbos[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS]; - uint8_t num_ssbos[PIPE_SHADER_TYPES]; - - VkDescriptorImageInfo textures[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; - VkBufferView tbos[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; - uint8_t num_samplers[PIPE_SHADER_TYPES]; - uint8_t num_sampler_views[PIPE_SHADER_TYPES]; - - VkDescriptorImageInfo images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES]; - VkBufferView texel_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES]; - uint8_t num_images[PIPE_SHADER_TYPES]; - - VkDescriptorImageInfo fbfetch; - - struct zink_resource *descriptor_res[ZINK_DESCRIPTOR_TYPES][PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; - struct zink_descriptor_surface sampler_surfaces[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; - struct zink_descriptor_surface image_surfaces[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES]; - } di; - struct set *need_barriers[2]; //gfx, compute - struct set update_barriers[2][2]; //[gfx, compute][current, next] - uint8_t barrier_set_idx[2]; - unsigned memory_barrier; - - uint32_t num_so_targets; - struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_OUTPUTS]; - bool dirty_so_targets; - bool xfb_barrier; - bool first_frame_done; - bool have_timelines; - - bool gfx_dirty; - - bool is_device_lost; - bool primitive_restart; - bool vertex_state_changed : 1; - bool blend_state_changed : 1; - bool rast_state_changed : 1; - bool dsa_state_changed : 1; - bool stencil_ref_changed : 1; -}; - -static inline struct zink_context * -zink_context(struct pipe_context *context) -{ - return (struct zink_context *)context; + return ds->is_buffer ? + zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB ? zink_resource(ds->db.pres) : zink_resource(ds->bufferview->pres) : + (struct zink_resource*)ds->surface->base.texture; } static inline bool @@ -351,18 +71,45 @@ zink_fb_clear_enabled(const struct zink_context *ctx, unsigned idx) return ctx->clears_enabled & (PIPE_CLEAR_COLOR0 << idx); } +static inline uint32_t +zink_program_cache_stages(uint32_t stages_present) +{ + return (stages_present & ((1 << MESA_SHADER_TESS_CTRL) | + (1 << MESA_SHADER_TESS_EVAL) | + (1 << MESA_SHADER_GEOMETRY))) >> 1; +} + +static ALWAYS_INLINE bool +zink_is_zsbuf_used(const struct zink_context *ctx) +{ + return ctx->blitting || tc_renderpass_info_is_zsbuf_used(&ctx->dynamic_fb.tc_info); +} + +static ALWAYS_INLINE bool +zink_is_zsbuf_write(const struct zink_context *ctx) +{ + if (!zink_is_zsbuf_used(ctx)) + return false; + return ctx->dynamic_fb.tc_info.zsbuf_write_fs || ctx->dynamic_fb.tc_info.zsbuf_write_dsa || + ctx->dynamic_fb.tc_info.zsbuf_clear || ctx->dynamic_fb.tc_info.zsbuf_clear_partial; +} + void zink_fence_wait(struct pipe_context *ctx); void -zink_wait_on_batch(struct zink_context *ctx, uint32_t batch_id); - +zink_wait_on_batch(struct zink_context *ctx, uint64_t batch_id); +void +zink_reset_ds3_states(struct zink_context *ctx); bool -zink_check_batch_completion(struct zink_context *ctx, uint32_t batch_id, bool have_lock); - +zink_check_batch_completion(struct zink_context *ctx, uint64_t batch_id); +VkCommandBuffer +zink_get_cmdbuf(struct zink_context *ctx, struct zink_resource *src, struct zink_resource *dst); +unsigned +zink_update_rendering_info(struct zink_context *ctx); void zink_flush_queue(struct zink_context *ctx); -void +bool zink_update_fbfetch(struct zink_context *ctx); bool zink_resource_access_is_write(VkAccessFlags flags); @@ -370,56 +117,59 @@ zink_resource_access_is_write(VkAccessFlags flags); void zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline); void -zink_fake_buffer_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline); -bool -zink_resource_image_needs_barrier(struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); -bool +zink_resource_buffer_barrier2(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline); +void zink_resource_image_barrier_init(VkImageMemoryBarrier *imb, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); void +zink_resource_image_barrier2_init(VkImageMemoryBarrier2 *imb, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); +void zink_resource_image_barrier(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); - +void +zink_resource_image_barrier2(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); +bool +zink_check_unordered_transfer_access(struct zink_resource *res, unsigned level, const struct pipe_box *box); bool -zink_resource_needs_barrier(struct zink_resource *res, VkImageLayout layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); +zink_check_valid_buffer_src_access(struct zink_context *ctx, struct zink_resource *res, unsigned offset, unsigned size); +void +zink_resource_image_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box, bool unsync); +bool +zink_resource_buffer_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned offset, unsigned size); +void +zink_synchronization_init(struct zink_screen *screen); void zink_update_descriptor_refs(struct zink_context *ctx, bool compute); void zink_init_vk_sample_locations(struct zink_context *ctx, VkSampleLocationsInfoEXT *loc); void -zink_begin_render_pass(struct zink_context *ctx); -void -zink_end_render_pass(struct zink_context *ctx); +zink_batch_rp(struct zink_context *ctx); -static inline void -zink_batch_rp(struct zink_context *ctx) -{ - if (!ctx->batch.in_rp) - zink_begin_render_pass(ctx); -} +void +zink_batch_no_rp(struct zink_context *ctx); +void +zink_batch_no_rp_safe(struct zink_context *ctx); -static inline void -zink_batch_no_rp(struct zink_context *ctx) -{ - zink_end_render_pass(ctx); - assert(!ctx->batch.in_rp); -} +VkImageView +zink_prep_fb_attachment(struct zink_context *ctx, struct zink_surface *surf, unsigned i); +void +zink_update_vk_sample_locations(struct zink_context *ctx); static inline VkPipelineStageFlags -zink_pipeline_flags_from_pipe_stage(enum pipe_shader_type pstage) +zink_pipeline_flags_from_pipe_stage(gl_shader_stage pstage) { switch (pstage) { - case PIPE_SHADER_VERTEX: + case MESA_SHADER_VERTEX: return VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; - case PIPE_SHADER_FRAGMENT: + case MESA_SHADER_FRAGMENT: return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - case PIPE_SHADER_GEOMETRY: + case MESA_SHADER_GEOMETRY: return VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; - case PIPE_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_CTRL: return VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT; - case PIPE_SHADER_TESS_EVAL: + case MESA_SHADER_TESS_EVAL: return VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; - case PIPE_SHADER_COMPUTE: + case MESA_SHADER_COMPUTE: return VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; default: unreachable("unknown shader stage"); @@ -428,13 +178,44 @@ zink_pipeline_flags_from_pipe_stage(enum pipe_shader_type pstage) void zink_rebind_all_buffers(struct zink_context *ctx); +void +zink_rebind_all_images(struct zink_context *ctx); void +zink_parse_tc_info(struct zink_context *ctx); +void zink_flush_memory_barrier(struct zink_context *ctx, bool is_compute); void zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen); void zink_init_grid_functions(struct zink_context *ctx); +struct zink_context * +zink_tc_context_unwrap(struct pipe_context *pctx, bool threaded); + +void +zink_update_barriers(struct zink_context *ctx, bool is_compute, + struct pipe_resource *index, struct pipe_resource *indirect, struct pipe_resource *indirect_draw_count); + + +bool +zink_cmd_debug_marker_begin(struct zink_context *ctx, VkCommandBuffer cmdbuf, const char *fmt, ...); +void +zink_cmd_debug_marker_end(struct zink_context *ctx, VkCommandBuffer cmdbuf,bool emitted); +void +zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, + unsigned dst_offset, unsigned src_offset, unsigned size); + +VkIndirectCommandsLayoutTokenNV * +zink_dgc_add_token(struct zink_context *ctx, VkIndirectCommandsTokenTypeNV type, void **mem); +void +zink_flush_dgc(struct zink_context *ctx); + +static ALWAYS_INLINE void +zink_flush_dgc_if_enabled(struct zink_context *ctx) +{ + if (unlikely(zink_debug & ZINK_DEBUG_DGC)) + zink_flush_dgc(ctx); +} #ifdef __cplusplus } @@ -444,9 +225,6 @@ zink_init_grid_functions(struct zink_context *ctx); VkPipelineStageFlags zink_pipeline_flags_from_stage(VkShaderStageFlagBits stage); -VkShaderStageFlagBits -zink_shader_stage(enum pipe_shader_type type); - struct pipe_context * zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); @@ -455,6 +233,8 @@ zink_context_query_init(struct pipe_context *ctx); void zink_blit_begin(struct zink_context *ctx, enum zink_blit_flags flags); +void +zink_blit_barriers(struct zink_context *ctx, struct zink_resource *src, struct zink_resource *dst, bool whole_dst); void zink_blit(struct pipe_context *pctx, @@ -482,12 +262,14 @@ zink_component_mapping(enum pipe_swizzle swizzle) case PIPE_SWIZZLE_W: return VK_COMPONENT_SWIZZLE_A; case PIPE_SWIZZLE_0: return VK_COMPONENT_SWIZZLE_ZERO; case PIPE_SWIZZLE_1: return VK_COMPONENT_SWIZZLE_ONE; - case PIPE_SWIZZLE_NONE: return VK_COMPONENT_SWIZZLE_IDENTITY; // ??? default: unreachable("unexpected swizzle"); } } +void +zink_update_shadow_samplerviews(struct zink_context *ctx, unsigned mask); + enum pipe_swizzle zink_clamp_void_swizzle(const struct util_format_description *desc, enum pipe_swizzle swizzle); @@ -496,10 +278,8 @@ zink_resource_rebind(struct zink_context *ctx, struct zink_resource *res); void zink_rebind_framebuffer(struct zink_context *ctx, struct zink_resource *res); - void -zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, - unsigned dst_offset, unsigned src_offset, unsigned size); +zink_set_null_fs(struct zink_context *ctx); void zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, @@ -509,6 +289,11 @@ zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, stru void zink_destroy_buffer_view(struct zink_screen *screen, struct zink_buffer_view *buffer_view); +struct pipe_surface * +zink_get_dummy_pipe_surface(struct zink_context *ctx, int samples_index); +struct zink_surface * +zink_get_dummy_surface(struct zink_context *ctx, int samples_index); + void debug_describe_zink_buffer_view(char *buf, const struct zink_buffer_view *ptr); diff --git a/src/gallium/drivers/zink/zink_descriptors.c b/src/gallium/drivers/zink/zink_descriptors.c index 9a0025ce25f..790daa30330 100644 --- a/src/gallium/drivers/zink/zink_descriptors.c +++ b/src/gallium/drivers/zink/zink_descriptors.c @@ -1,5 +1,6 @@ /* * Copyright © 2020 Mike Blumenkrantz + * Copyright © 2022 Valve Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -24,316 +25,17 @@ * Mike Blumenkrantz <michael.blumenkrantz@gmail.com> */ -#include "tgsi/tgsi_from_mesa.h" - - - #include "zink_context.h" +#include "zink_compiler.h" #include "zink_descriptors.h" #include "zink_program.h" +#include "zink_render_pass.h" #include "zink_resource.h" #include "zink_screen.h" #define XXH_INLINE_ALL #include "util/xxhash.h" - -struct zink_descriptor_pool { - struct pipe_reference reference; - enum zink_descriptor_type type; - struct hash_table *desc_sets; - struct hash_table *free_desc_sets; - struct util_dynarray alloc_desc_sets; - VkDescriptorPool descpool; - struct zink_descriptor_pool_key key; - unsigned num_resources; - unsigned num_sets_allocated; - simple_mtx_t mtx; -}; - -struct zink_descriptor_set { - struct zink_descriptor_pool *pool; - struct pipe_reference reference; //incremented for batch usage - VkDescriptorSet desc_set; - uint32_t hash; - bool invalid; - bool punted; - bool recycled; - struct zink_descriptor_state_key key; - struct zink_batch_usage *batch_uses; -#ifndef NDEBUG - /* for extra debug asserts */ - unsigned num_resources; -#endif - union { - struct zink_resource_object **res_objs; - struct { - struct zink_descriptor_surface *surfaces; - struct zink_sampler_state **sampler_states; - }; - }; -}; - -union zink_program_descriptor_refs { - struct zink_resource **res; - struct zink_descriptor_surface *dsurf; - struct { - struct zink_descriptor_surface *dsurf; - struct zink_sampler_state **sampler_state; - } sampler; -}; - -struct zink_program_descriptor_data_cached { - struct zink_program_descriptor_data base; - struct zink_descriptor_pool *pool[ZINK_DESCRIPTOR_TYPES]; - struct zink_descriptor_set *last_set[ZINK_DESCRIPTOR_TYPES]; - unsigned num_refs[ZINK_DESCRIPTOR_TYPES]; - union zink_program_descriptor_refs *refs[ZINK_DESCRIPTOR_TYPES]; - unsigned cache_misses[ZINK_DESCRIPTOR_TYPES]; -}; - - -static inline struct zink_program_descriptor_data_cached * -pdd_cached(struct zink_program *pg) -{ - return (struct zink_program_descriptor_data_cached*)pg->dd; -} - -static bool -batch_add_desc_set(struct zink_batch *batch, struct zink_descriptor_set *zds) -{ - if (zink_batch_usage_matches(zds->batch_uses, batch->state) || - !batch_ptr_add_usage(batch, batch->state->dd->desc_sets, zds)) - return false; - pipe_reference(NULL, &zds->reference); - zink_batch_usage_set(&zds->batch_uses, batch->state); - return true; -} - -static void -debug_describe_zink_descriptor_pool(char *buf, const struct zink_descriptor_pool *ptr) -{ - sprintf(buf, "zink_descriptor_pool"); -} - -static inline uint32_t -get_sampler_view_hash(const struct zink_sampler_view *sampler_view) -{ - if (!sampler_view) - return 0; - return sampler_view->base.target == PIPE_BUFFER ? - sampler_view->buffer_view->hash : sampler_view->image_view->hash; -} - -static inline uint32_t -get_image_view_hash(const struct zink_image_view *image_view) -{ - if (!image_view || !image_view->base.resource) - return 0; - return image_view->base.resource->target == PIPE_BUFFER ? - image_view->buffer_view->hash : image_view->surface->hash; -} - -uint32_t -zink_get_sampler_view_hash(struct zink_context *ctx, struct zink_sampler_view *sampler_view, bool is_buffer) -{ - return get_sampler_view_hash(sampler_view) ? get_sampler_view_hash(sampler_view) : - (is_buffer ? zink_screen(ctx->base.screen)->null_descriptor_hashes.buffer_view : - zink_screen(ctx->base.screen)->null_descriptor_hashes.image_view); -} - -uint32_t -zink_get_image_view_hash(struct zink_context *ctx, struct zink_image_view *image_view, bool is_buffer) -{ - return get_image_view_hash(image_view) ? get_image_view_hash(image_view) : - (is_buffer ? zink_screen(ctx->base.screen)->null_descriptor_hashes.buffer_view : - zink_screen(ctx->base.screen)->null_descriptor_hashes.image_view); -} - -#ifndef NDEBUG -static uint32_t -get_descriptor_surface_hash(struct zink_context *ctx, struct zink_descriptor_surface *dsurf) -{ - return dsurf->is_buffer ? (dsurf->bufferview ? dsurf->bufferview->hash : zink_screen(ctx->base.screen)->null_descriptor_hashes.buffer_view) : - (dsurf->surface ? dsurf->surface->hash : zink_screen(ctx->base.screen)->null_descriptor_hashes.image_view); -} -#endif - -static bool -desc_state_equal(const void *a, const void *b) -{ - const struct zink_descriptor_state_key *a_k = (void*)a; - const struct zink_descriptor_state_key *b_k = (void*)b; - - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) { - if (a_k->exists[i] != b_k->exists[i]) - return false; - if (a_k->exists[i] && b_k->exists[i] && - a_k->state[i] != b_k->state[i]) - return false; - } - return true; -} - -static uint32_t -desc_state_hash(const void *key) -{ - const struct zink_descriptor_state_key *d_key = (void*)key; - uint32_t hash = 0; - bool first = true; - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) { - if (d_key->exists[i]) { - if (!first) - hash = XXH32(&d_key->state[i], sizeof(uint32_t), hash); - else - hash = d_key->state[i]; - first = false; - } - } - return hash; -} - -static void -pop_desc_set_ref(struct zink_descriptor_set *zds, struct util_dynarray *refs) -{ - size_t size = sizeof(struct zink_descriptor_reference); - unsigned num_elements = refs->size / size; - for (unsigned i = 0; i < num_elements; i++) { - struct zink_descriptor_reference *ref = util_dynarray_element(refs, struct zink_descriptor_reference, i); - if (&zds->invalid == ref->invalid) { - memcpy(util_dynarray_element(refs, struct zink_descriptor_reference, i), - util_dynarray_pop_ptr(refs, struct zink_descriptor_reference), size); - break; - } - } -} - -static void -descriptor_set_invalidate(struct zink_descriptor_set *zds) -{ - zds->invalid = true; - for (unsigned i = 0; i < zds->pool->key.layout->num_descriptors; i++) { - switch (zds->pool->type) { - case ZINK_DESCRIPTOR_TYPE_UBO: - case ZINK_DESCRIPTOR_TYPE_SSBO: - if (zds->res_objs[i]) - pop_desc_set_ref(zds, &zds->res_objs[i]->desc_set_refs.refs); - zds->res_objs[i] = NULL; - break; - case ZINK_DESCRIPTOR_TYPE_IMAGE: - if (zds->surfaces[i].is_buffer) { - if (zds->surfaces[i].bufferview) - pop_desc_set_ref(zds, &zds->surfaces[i].bufferview->desc_set_refs.refs); - zds->surfaces[i].bufferview = NULL; - } else { - if (zds->surfaces[i].surface) - pop_desc_set_ref(zds, &zds->surfaces[i].surface->desc_set_refs.refs); - zds->surfaces[i].surface = NULL; - } - break; - case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: - if (zds->surfaces[i].is_buffer) { - if (zds->surfaces[i].bufferview) - pop_desc_set_ref(zds, &zds->surfaces[i].bufferview->desc_set_refs.refs); - zds->surfaces[i].bufferview = NULL; - } else { - if (zds->surfaces[i].surface) - pop_desc_set_ref(zds, &zds->surfaces[i].surface->desc_set_refs.refs); - zds->surfaces[i].surface = NULL; - } - if (zds->sampler_states[i]) - pop_desc_set_ref(zds, &zds->sampler_states[i]->desc_set_refs.refs); - zds->sampler_states[i] = NULL; - break; - default: - break; - } - } -} - -#ifndef NDEBUG -static void -descriptor_pool_clear(struct hash_table *ht) -{ - _mesa_hash_table_clear(ht, NULL); -} -#endif - -static void -descriptor_pool_free(struct zink_screen *screen, struct zink_descriptor_pool *pool) -{ - if (!pool) - return; - if (pool->descpool) - VKSCR(DestroyDescriptorPool)(screen->dev, pool->descpool, NULL); - - simple_mtx_lock(&pool->mtx); -#ifndef NDEBUG - if (pool->desc_sets) - descriptor_pool_clear(pool->desc_sets); - if (pool->free_desc_sets) - descriptor_pool_clear(pool->free_desc_sets); -#endif - if (pool->desc_sets) - _mesa_hash_table_destroy(pool->desc_sets, NULL); - if (pool->free_desc_sets) - _mesa_hash_table_destroy(pool->free_desc_sets, NULL); - - simple_mtx_unlock(&pool->mtx); - util_dynarray_fini(&pool->alloc_desc_sets); - simple_mtx_destroy(&pool->mtx); - ralloc_free(pool); -} - -static struct zink_descriptor_pool * -descriptor_pool_create(struct zink_screen *screen, enum zink_descriptor_type type, - struct zink_descriptor_layout_key *layout_key, VkDescriptorPoolSize *sizes, unsigned num_type_sizes) -{ - struct zink_descriptor_pool *pool = rzalloc(NULL, struct zink_descriptor_pool); - if (!pool) - return NULL; - pipe_reference_init(&pool->reference, 1); - pool->type = type; - pool->key.layout = layout_key; - pool->key.num_type_sizes = num_type_sizes; - size_t types_size = num_type_sizes * sizeof(VkDescriptorPoolSize); - pool->key.sizes = ralloc_size(pool, types_size); - if (!pool->key.sizes) { - ralloc_free(pool); - return NULL; - } - memcpy(pool->key.sizes, sizes, types_size); - simple_mtx_init(&pool->mtx, mtx_plain); - for (unsigned i = 0; i < layout_key->num_descriptors; i++) { - pool->num_resources += layout_key->bindings[i].descriptorCount; - } - pool->desc_sets = _mesa_hash_table_create(NULL, desc_state_hash, desc_state_equal); - if (!pool->desc_sets) - goto fail; - - pool->free_desc_sets = _mesa_hash_table_create(NULL, desc_state_hash, desc_state_equal); - if (!pool->free_desc_sets) - goto fail; - - util_dynarray_init(&pool->alloc_desc_sets, NULL); - - VkDescriptorPoolCreateInfo dpci = {0}; - dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - dpci.pPoolSizes = sizes; - dpci.poolSizeCount = num_type_sizes; - dpci.flags = 0; - dpci.maxSets = ZINK_DEFAULT_MAX_DESCS; - if (VKSCR(CreateDescriptorPool)(screen->dev, &dpci, 0, &pool->descpool) != VK_SUCCESS) { - debug_printf("vkCreateDescriptorPool failed\n"); - goto fail; - } - - return pool; -fail: - descriptor_pool_free(screen, pool); - return NULL; -} - static VkDescriptorSetLayout descriptor_layout_create(struct zink_screen *screen, enum zink_descriptor_type t, VkDescriptorSetLayoutBinding *bindings, unsigned num_bindings) { @@ -343,16 +45,17 @@ descriptor_layout_create(struct zink_screen *screen, enum zink_descriptor_type t dcslci.pNext = NULL; VkDescriptorSetLayoutBindingFlagsCreateInfo fci = {0}; VkDescriptorBindingFlags flags[ZINK_MAX_DESCRIPTORS_PER_TYPE]; - if (screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY) { - dcslci.pNext = &fci; - if (t == ZINK_DESCRIPTOR_TYPES) - dcslci.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR; - fci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; - fci.bindingCount = num_bindings; - fci.pBindingFlags = flags; - for (unsigned i = 0; i < num_bindings; i++) { - flags[i] = 0; - } + dcslci.pNext = &fci; + /* TODO bindless */ + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && t != ZINK_DESCRIPTOR_BINDLESS) + dcslci.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + else if (t == ZINK_DESCRIPTOR_TYPE_UNIFORMS) + dcslci.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR; + fci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; + fci.bindingCount = num_bindings; + fci.pBindingFlags = flags; + for (unsigned i = 0; i < num_bindings; i++) { + flags[i] = 0; } dcslci.bindingCount = num_bindings; dcslci.pBindings = bindings; @@ -367,8 +70,9 @@ descriptor_layout_create(struct zink_screen *screen, enum zink_descriptor_type t return VK_NULL_HANDLE; } } - if (VKSCR(CreateDescriptorSetLayout)(screen->dev, &dcslci, 0, &dsl) != VK_SUCCESS) - debug_printf("vkCreateDescriptorSetLayout failed\n"); + VkResult result = VKSCR(CreateDescriptorSetLayout)(screen->dev, &dcslci, 0, &dsl); + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkCreateDescriptorSetLayout failed (%s)", vk_Result_to_str(result)); return dsl; } @@ -377,8 +81,10 @@ hash_descriptor_layout(const void *key) { uint32_t hash = 0; const struct zink_descriptor_layout_key *k = key; - hash = XXH32(&k->num_descriptors, sizeof(unsigned), hash); - hash = XXH32(k->bindings, k->num_descriptors * sizeof(VkDescriptorSetLayoutBinding), hash); + hash = XXH32(&k->num_bindings, sizeof(unsigned), hash); + /* only hash first 3 members: no holes and the rest are always constant */ + for (unsigned i = 0; i < k->num_bindings; i++) + hash = XXH32(&k->bindings[i], offsetof(VkDescriptorSetLayoutBinding, stageFlags), hash); return hash; } @@ -388,113 +94,158 @@ equals_descriptor_layout(const void *a, const void *b) { const struct zink_descriptor_layout_key *a_k = a; const struct zink_descriptor_layout_key *b_k = b; - return a_k->num_descriptors == b_k->num_descriptors && - !memcmp(a_k->bindings, b_k->bindings, a_k->num_descriptors * sizeof(VkDescriptorSetLayoutBinding)); + return a_k->num_bindings == b_k->num_bindings && + (!a_k->num_bindings || !memcmp(a_k->bindings, b_k->bindings, a_k->num_bindings * sizeof(VkDescriptorSetLayoutBinding))); } static struct zink_descriptor_layout * -create_layout(struct zink_context *ctx, enum zink_descriptor_type type, +create_layout(struct zink_screen *screen, enum zink_descriptor_type type, VkDescriptorSetLayoutBinding *bindings, unsigned num_bindings, struct zink_descriptor_layout_key **layout_key) { - struct zink_screen *screen = zink_screen(ctx->base.screen); - VkDescriptorSetLayout dsl = descriptor_layout_create(screen, type, bindings, MAX2(num_bindings, 1)); + VkDescriptorSetLayout dsl = descriptor_layout_create(screen, type, bindings, num_bindings); if (!dsl) return NULL; - struct zink_descriptor_layout_key *k = ralloc(ctx, struct zink_descriptor_layout_key); - k->use_count = 0; - k->num_descriptors = num_bindings; - size_t bindings_size = MAX2(num_bindings, 1) * sizeof(VkDescriptorSetLayoutBinding); - k->bindings = ralloc_size(k, bindings_size); - if (!k->bindings) { - ralloc_free(k); - VKSCR(DestroyDescriptorSetLayout)(screen->dev, dsl, NULL); - return NULL; + size_t bindings_size = num_bindings * sizeof(VkDescriptorSetLayoutBinding); + struct zink_descriptor_layout_key *k = ralloc_size(screen, sizeof(struct zink_descriptor_layout_key) + bindings_size); + k->num_bindings = num_bindings; + if (num_bindings) { + k->bindings = (void *)(k + 1); + memcpy(k->bindings, bindings, bindings_size); } - memcpy(k->bindings, bindings, bindings_size); - struct zink_descriptor_layout *layout = rzalloc(ctx, struct zink_descriptor_layout); + struct zink_descriptor_layout *layout = rzalloc(screen, struct zink_descriptor_layout); layout->layout = dsl; *layout_key = k; return layout; } -struct zink_descriptor_layout * -zink_descriptor_util_layout_get(struct zink_context *ctx, enum zink_descriptor_type type, +static struct zink_descriptor_layout * +descriptor_util_layout_get(struct zink_screen *screen, enum zink_descriptor_type type, VkDescriptorSetLayoutBinding *bindings, unsigned num_bindings, struct zink_descriptor_layout_key **layout_key) { uint32_t hash = 0; struct zink_descriptor_layout_key key = { - .num_descriptors = num_bindings, + .num_bindings = num_bindings, .bindings = bindings, }; - VkDescriptorSetLayoutBinding null_binding; - if (!bindings) { - null_binding.binding = 0; - null_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - null_binding.descriptorCount = 1; - null_binding.pImmutableSamplers = NULL; - null_binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | - VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | - VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_COMPUTE_BIT; - key.bindings = &null_binding; - } - - if (type != ZINK_DESCRIPTOR_TYPES) { + /* push descriptor layouts are unique and can't be reused */ + if (type != ZINK_DESCRIPTOR_TYPE_UNIFORMS) { hash = hash_descriptor_layout(&key); - struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&ctx->desc_set_layouts[type], hash, &key); + simple_mtx_lock(&screen->desc_set_layouts_lock); + struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&screen->desc_set_layouts[type], hash, &key); + simple_mtx_unlock(&screen->desc_set_layouts_lock); if (he) { *layout_key = (void*)he->key; return he->data; } } - struct zink_descriptor_layout *layout = create_layout(ctx, type, bindings ? bindings : &null_binding, num_bindings, layout_key); - if (layout && type != ZINK_DESCRIPTOR_TYPES) { - _mesa_hash_table_insert_pre_hashed(&ctx->desc_set_layouts[type], hash, *layout_key, layout); + struct zink_descriptor_layout *layout = create_layout(screen, type, bindings, num_bindings, layout_key); + if (layout && type != ZINK_DESCRIPTOR_TYPE_UNIFORMS) { + simple_mtx_lock(&screen->desc_set_layouts_lock); + _mesa_hash_table_insert_pre_hashed(&screen->desc_set_layouts[type], hash, *layout_key, layout); + simple_mtx_unlock(&screen->desc_set_layouts_lock); } return layout; } + +static uint32_t +hash_descriptor_pool_key(const void *key) +{ + uint32_t hash = 0; + const struct zink_descriptor_pool_key *k = key; + hash = XXH32(&k->layout, sizeof(void*), hash); + for (unsigned i = 0; i < k->num_type_sizes; i++) + hash = XXH32(&k->sizes[i], sizeof(VkDescriptorPoolSize), hash); + + return hash; +} + +static bool +equals_descriptor_pool_key(const void *a, const void *b) +{ + const struct zink_descriptor_pool_key *a_k = a; + const struct zink_descriptor_pool_key *b_k = b; + const unsigned a_num_type_sizes = a_k->num_type_sizes; + const unsigned b_num_type_sizes = b_k->num_type_sizes; + return a_k->layout == b_k->layout && + a_num_type_sizes == b_num_type_sizes && + !memcmp(a_k->sizes, b_k->sizes, b_num_type_sizes * sizeof(VkDescriptorPoolSize)); +} + +static struct zink_descriptor_pool_key * +descriptor_util_pool_key_get(struct zink_screen *screen, enum zink_descriptor_type type, + struct zink_descriptor_layout_key *layout_key, + VkDescriptorPoolSize *sizes, unsigned num_type_sizes) +{ + uint32_t hash = 0; + struct zink_descriptor_pool_key key; + key.num_type_sizes = num_type_sizes; + /* push descriptor pools can't be shared/reused by other types */ + if (type != ZINK_DESCRIPTOR_TYPE_UNIFORMS) { + key.layout = layout_key; + memcpy(key.sizes, sizes, num_type_sizes * sizeof(VkDescriptorPoolSize)); + hash = hash_descriptor_pool_key(&key); + simple_mtx_lock(&screen->desc_pool_keys_lock); + struct set_entry *he = _mesa_set_search_pre_hashed(&screen->desc_pool_keys[type], hash, &key); + simple_mtx_unlock(&screen->desc_pool_keys_lock); + if (he) + return (void*)he->key; + } + + struct zink_descriptor_pool_key *pool_key = rzalloc(screen, struct zink_descriptor_pool_key); + pool_key->layout = layout_key; + pool_key->num_type_sizes = num_type_sizes; + assert(pool_key->num_type_sizes); + memcpy(pool_key->sizes, sizes, num_type_sizes * sizeof(VkDescriptorPoolSize)); + if (type != ZINK_DESCRIPTOR_TYPE_UNIFORMS) { + simple_mtx_lock(&screen->desc_pool_keys_lock); + _mesa_set_add_pre_hashed(&screen->desc_pool_keys[type], hash, pool_key); + pool_key->id = screen->desc_pool_keys[type].entries - 1; + simple_mtx_unlock(&screen->desc_pool_keys_lock); + } + return pool_key; +} + static void init_push_binding(VkDescriptorSetLayoutBinding *binding, unsigned i, VkDescriptorType type) { - binding->binding = tgsi_processor_to_shader_stage(i); + binding->binding = i; binding->descriptorType = type; binding->descriptorCount = 1; - binding->stageFlags = zink_shader_stage(i); + binding->stageFlags = mesa_to_vk_shader_stage(i); binding->pImmutableSamplers = NULL; } static VkDescriptorType get_push_types(struct zink_screen *screen, enum zink_descriptor_type *dsl_type) { - *dsl_type = screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY && - screen->info.have_KHR_push_descriptor ? ZINK_DESCRIPTOR_TYPES : ZINK_DESCRIPTOR_TYPE_UBO; - return screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY ? - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + *dsl_type = screen->info.have_KHR_push_descriptor ? ZINK_DESCRIPTOR_TYPE_UNIFORMS : ZINK_DESCRIPTOR_TYPE_UBO; + return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; } static struct zink_descriptor_layout * create_gfx_layout(struct zink_context *ctx, struct zink_descriptor_layout_key **layout_key, bool fbfetch) { struct zink_screen *screen = zink_screen(ctx->base.screen); - VkDescriptorSetLayoutBinding bindings[PIPE_SHADER_TYPES]; + VkDescriptorSetLayoutBinding bindings[MESA_SHADER_STAGES]; enum zink_descriptor_type dsl_type; VkDescriptorType vktype = get_push_types(screen, &dsl_type); - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) init_push_binding(&bindings[i], i, vktype); if (fbfetch) { - bindings[ZINK_SHADER_COUNT].binding = ZINK_FBFETCH_BINDING; - bindings[ZINK_SHADER_COUNT].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; - bindings[ZINK_SHADER_COUNT].descriptorCount = 1; - bindings[ZINK_SHADER_COUNT].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[ZINK_SHADER_COUNT].pImmutableSamplers = NULL; + bindings[ZINK_GFX_SHADER_COUNT].binding = ZINK_FBFETCH_BINDING; + bindings[ZINK_GFX_SHADER_COUNT].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + bindings[ZINK_GFX_SHADER_COUNT].descriptorCount = 1; + bindings[ZINK_GFX_SHADER_COUNT].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[ZINK_GFX_SHADER_COUNT].pImmutableSamplers = NULL; } - return create_layout(ctx, dsl_type, bindings, fbfetch ? ARRAY_SIZE(bindings) : ARRAY_SIZE(bindings) - 1, layout_key); + return create_layout(screen, dsl_type, bindings, fbfetch ? ARRAY_SIZE(bindings) : ARRAY_SIZE(bindings) - 1, layout_key); } bool @@ -504,106 +255,42 @@ zink_descriptor_util_push_layouts_get(struct zink_context *ctx, struct zink_desc VkDescriptorSetLayoutBinding compute_binding; enum zink_descriptor_type dsl_type; VkDescriptorType vktype = get_push_types(screen, &dsl_type); - init_push_binding(&compute_binding, PIPE_SHADER_COMPUTE, vktype); + init_push_binding(&compute_binding, MESA_SHADER_COMPUTE, vktype); dsls[0] = create_gfx_layout(ctx, &layout_keys[0], false); - dsls[1] = create_layout(ctx, dsl_type, &compute_binding, 1, &layout_keys[1]); + dsls[1] = create_layout(screen, dsl_type, &compute_binding, 1, &layout_keys[1]); return dsls[0] && dsls[1]; } -void -zink_descriptor_util_init_null_set(struct zink_context *ctx, VkDescriptorSet desc_set) -{ - struct zink_screen *screen = zink_screen(ctx->base.screen); - VkDescriptorBufferInfo push_info; - VkWriteDescriptorSet push_wd; - push_wd.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - push_wd.pNext = NULL; - push_wd.dstBinding = 0; - push_wd.dstArrayElement = 0; - push_wd.descriptorCount = 1; - push_wd.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - push_wd.dstSet = desc_set; - push_wd.pBufferInfo = &push_info; - push_info.buffer = screen->info.rb2_feats.nullDescriptor ? - VK_NULL_HANDLE : - zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; - push_info.offset = 0; - push_info.range = VK_WHOLE_SIZE; - VKSCR(UpdateDescriptorSets)(screen->dev, 1, &push_wd, 0, NULL); -} - VkImageLayout -zink_descriptor_util_image_layout_eval(const struct zink_resource *res, bool is_compute) -{ - return res->image_bind_count[is_compute] ? VK_IMAGE_LAYOUT_GENERAL : - res->aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) ? - //Vulkan-Docs#1490 - //(res->aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL : - //res->aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL : - (res->aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : - res->aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : - VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL) : - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; -} - -static uint32_t -hash_descriptor_pool(const void *key) -{ - uint32_t hash = 0; - const struct zink_descriptor_pool_key *k = key; - hash = XXH32(&k->num_type_sizes, sizeof(unsigned), hash); - hash = XXH32(&k->layout, sizeof(k->layout), hash); - hash = XXH32(k->sizes, k->num_type_sizes * sizeof(VkDescriptorPoolSize), hash); - - return hash; -} - -static bool -equals_descriptor_pool(const void *a, const void *b) -{ - const struct zink_descriptor_pool_key *a_k = a; - const struct zink_descriptor_pool_key *b_k = b; - return a_k->num_type_sizes == b_k->num_type_sizes && - a_k->layout == b_k->layout && - !memcmp(a_k->sizes, b_k->sizes, a_k->num_type_sizes * sizeof(VkDescriptorPoolSize)); -} - -static struct zink_descriptor_pool * -descriptor_pool_get(struct zink_context *ctx, enum zink_descriptor_type type, - struct zink_descriptor_layout_key *layout_key, VkDescriptorPoolSize *sizes, unsigned num_type_sizes) +zink_descriptor_util_image_layout_eval(const struct zink_context *ctx, const struct zink_resource *res, bool is_compute) { - uint32_t hash = 0; - if (type != ZINK_DESCRIPTOR_TYPES) { - struct zink_descriptor_pool_key key = { - .layout = layout_key, - .num_type_sizes = num_type_sizes, - .sizes = sizes, - }; - - hash = hash_descriptor_pool(&key); - struct hash_entry *he = _mesa_hash_table_search_pre_hashed(ctx->dd->descriptor_pools[type], hash, &key); - if (he) - return (void*)he->data; + if (res->bindless[0] || res->bindless[1]) { + /* bindless needs most permissive layout */ + if (res->image_bind_count[0] || res->image_bind_count[1]) + return VK_IMAGE_LAYOUT_GENERAL; + return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } - struct zink_descriptor_pool *pool = descriptor_pool_create(zink_screen(ctx->base.screen), type, layout_key, sizes, num_type_sizes); - if (type != ZINK_DESCRIPTOR_TYPES) - _mesa_hash_table_insert_pre_hashed(ctx->dd->descriptor_pools[type], hash, &pool->key, pool); - return pool; -} - -static bool -get_invalidated_desc_set(struct zink_descriptor_set *zds) -{ - if (!zds->invalid) - return false; - return p_atomic_read(&zds->reference.count) == 1; + if (res->image_bind_count[is_compute]) + return VK_IMAGE_LAYOUT_GENERAL; + if (!is_compute && res->fb_bind_count && res->sampler_bind_count[0]) { + /* feedback loop */ + if (!(res->obj->vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) || zink_is_zsbuf_write(ctx)) { + if (zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_layout) + return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + return VK_IMAGE_LAYOUT_GENERAL; + } + } + if (res->obj->vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + return VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } bool zink_descriptor_util_alloc_sets(struct zink_screen *screen, VkDescriptorSetLayout dsl, VkDescriptorPool pool, VkDescriptorSet *sets, unsigned num_sets) { VkDescriptorSetAllocateInfo dsai; - VkDescriptorSetLayout *layouts = alloca(sizeof(*layouts) * num_sets); + VkDescriptorSetLayout layouts[100]; + assert(num_sets <= ARRAY_SIZE(layouts)); memset((void *)&dsai, 0, sizeof(dsai)); dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; dsai.pNext = NULL; @@ -613,1142 +300,1559 @@ zink_descriptor_util_alloc_sets(struct zink_screen *screen, VkDescriptorSetLayou layouts[i] = dsl; dsai.pSetLayouts = layouts; - if (VKSCR(AllocateDescriptorSets)(screen->dev, &dsai, sets) != VK_SUCCESS) { - debug_printf("ZINK: %" PRIu64 " failed to allocate descriptor set :/\n", (uint64_t)dsl); + VkResult result = VKSCR(AllocateDescriptorSets)(screen->dev, &dsai, sets); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: %" PRIu64 " failed to allocate descriptor set :/ (%s)", (uint64_t)dsl, vk_Result_to_str(result)); return false; } return true; } -unsigned -zink_descriptor_program_num_sizes(struct zink_program *pg, enum zink_descriptor_type type) +static void +init_db_template_entry(struct zink_screen *screen, struct zink_shader *shader, enum zink_descriptor_type type, + unsigned idx, struct zink_descriptor_template *entry, unsigned *entry_idx) +{ + int index = shader->bindings[type][idx].index; + gl_shader_stage stage = clamp_stage(&shader->info); + entry->count = shader->bindings[type][idx].size; + + switch (shader->bindings[type][idx].type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + entry->offset = offsetof(struct zink_context, di.db.ubos[stage][index]); + entry->stride = sizeof(VkDescriptorAddressInfoEXT); + entry->db_size = screen->info.db_props.robustUniformBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + entry->offset = offsetof(struct zink_context, di.textures[stage][index]); + entry->stride = sizeof(VkDescriptorImageInfo); + entry->db_size = screen->info.db_props.combinedImageSamplerDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + entry->offset = offsetof(struct zink_context, di.textures[stage][index]); + entry->stride = sizeof(VkDescriptorImageInfo); + entry->db_size = screen->info.db_props.sampledImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + entry->offset = offsetof(struct zink_context, di.textures[stage][index]); + entry->stride = sizeof(VkDescriptorImageInfo); + entry->db_size = screen->info.db_props.samplerDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + entry->offset = offsetof(struct zink_context, di.db.tbos[stage][index]); + entry->stride = sizeof(VkDescriptorAddressInfoEXT); + entry->db_size = screen->info.db_props.robustUniformTexelBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + entry->offset = offsetof(struct zink_context, di.db.ssbos[stage][index]); + entry->stride = sizeof(VkDescriptorAddressInfoEXT); + entry->db_size = screen->info.db_props.robustStorageBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + entry->offset = offsetof(struct zink_context, di.images[stage][index]); + entry->stride = sizeof(VkDescriptorImageInfo); + entry->db_size = screen->info.db_props.storageImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + entry->offset = offsetof(struct zink_context, di.db.texel_images[stage][index]); + entry->stride = sizeof(VkDescriptorAddressInfoEXT); + entry->db_size = screen->info.db_props.robustStorageTexelBufferDescriptorSize; + break; + default: + unreachable("unknown type"); + } + (*entry_idx)++; +} + +static void +init_template_entry(struct zink_shader *shader, enum zink_descriptor_type type, + unsigned idx, VkDescriptorUpdateTemplateEntry *entry, unsigned *entry_idx) +{ + int index = shader->bindings[type][idx].index; + gl_shader_stage stage = clamp_stage(&shader->info); + entry->dstArrayElement = 0; + entry->dstBinding = shader->bindings[type][idx].binding; + entry->descriptorCount = shader->bindings[type][idx].size; + if (shader->bindings[type][idx].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) + /* filter out DYNAMIC type here since this is just the uniform set */ + entry->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + else + entry->descriptorType = shader->bindings[type][idx].type; + switch (shader->bindings[type][idx].type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + entry->offset = offsetof(struct zink_context, di.t.ubos[stage][index]); + entry->stride = sizeof(VkDescriptorBufferInfo); + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + entry->offset = offsetof(struct zink_context, di.textures[stage][index]); + entry->stride = sizeof(VkDescriptorImageInfo); + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + entry->offset = offsetof(struct zink_context, di.t.tbos[stage][index]); + entry->stride = sizeof(VkBufferView); + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + entry->offset = offsetof(struct zink_context, di.t.ssbos[stage][index]); + entry->stride = sizeof(VkDescriptorBufferInfo); + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + entry->offset = offsetof(struct zink_context, di.images[stage][index]); + entry->stride = sizeof(VkDescriptorImageInfo); + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + entry->offset = offsetof(struct zink_context, di.t.texel_images[stage][index]); + entry->stride = sizeof(VkBufferView); + break; + default: + unreachable("unknown type"); + } + (*entry_idx)++; +} + +static void +init_program_db(struct zink_screen *screen, struct zink_program *pg, enum zink_descriptor_type type, VkDescriptorSetLayoutBinding *bindings, unsigned num_bindings, VkDescriptorSetLayout dsl) +{ + VkDeviceSize val; + VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, dsl, &val); + pg->dd.db_size[type] = val; + pg->dd.db_offset[type] = rzalloc_array(pg, uint32_t, num_bindings); + for (unsigned i = 0; i < num_bindings; i++) { + VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, dsl, bindings[i].binding, &val); + pg->dd.db_offset[type][i] = val; + } +} + +static uint16_t +descriptor_program_num_sizes(VkDescriptorPoolSize *sizes, enum zink_descriptor_type type) { switch (type) { case ZINK_DESCRIPTOR_TYPE_UBO: - return 1; + return !!sizes[ZDS_INDEX_UBO].descriptorCount; case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: - return !!pg->dd->sizes[ZDS_INDEX_COMBINED_SAMPLER].descriptorCount + - !!pg->dd->sizes[ZDS_INDEX_UNIFORM_TEXELS].descriptorCount; + return !!sizes[ZDS_INDEX_COMBINED_SAMPLER].descriptorCount + + !!sizes[ZDS_INDEX_UNIFORM_TEXELS].descriptorCount + + !!sizes[ZDS_INDEX_SAMPLER].descriptorCount; case ZINK_DESCRIPTOR_TYPE_SSBO: - return 1; + return !!sizes[ZDS_INDEX_STORAGE_BUFFER].descriptorCount; case ZINK_DESCRIPTOR_TYPE_IMAGE: - return !!pg->dd->sizes[ZDS_INDEX_STORAGE_IMAGE].descriptorCount + - !!pg->dd->sizes[ZDS_INDEX_STORAGE_TEXELS].descriptorCount; + return !!sizes[ZDS_INDEX_STORAGE_IMAGE].descriptorCount + + !!sizes[ZDS_INDEX_STORAGE_TEXELS].descriptorCount; default: break; } unreachable("unknown type"); } -static struct zink_descriptor_set * -allocate_desc_set(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, unsigned descs_used, bool is_compute) +static uint16_t +descriptor_program_num_sizes_compact(VkDescriptorPoolSize *sizes, unsigned desc_set) { - struct zink_screen *screen = zink_screen(ctx->base.screen); - bool push_set = type == ZINK_DESCRIPTOR_TYPES; - struct zink_descriptor_pool *pool = push_set ? ctx->dd->push_pool[is_compute] : pdd_cached(pg)->pool[type]; -#define DESC_BUCKET_FACTOR 10 - unsigned bucket_size = pool->key.layout->num_descriptors ? DESC_BUCKET_FACTOR : 1; - if (pool->key.layout->num_descriptors) { - for (unsigned desc_factor = DESC_BUCKET_FACTOR; desc_factor < descs_used; desc_factor *= DESC_BUCKET_FACTOR) - bucket_size = desc_factor; - } - /* never grow more than this many at a time */ - bucket_size = MIN2(bucket_size, ZINK_DEFAULT_MAX_DESCS); - VkDescriptorSet *desc_set = alloca(sizeof(*desc_set) * bucket_size); - if (!zink_descriptor_util_alloc_sets(screen, push_set ? ctx->dd->push_dsl[is_compute]->layout : pg->dsl[type + 1], pool->descpool, desc_set, bucket_size)) - return VK_NULL_HANDLE; - - struct zink_descriptor_set *alloc = ralloc_array(pool, struct zink_descriptor_set, bucket_size); - assert(alloc); - unsigned num_resources = pool->num_resources; - struct zink_resource_object **res_objs = NULL; - void **samplers = NULL; - struct zink_descriptor_surface *surfaces = NULL; - switch (type) { + switch (desc_set) { + case ZINK_DESCRIPTOR_TYPE_UBO: + return !!sizes[ZDS_INDEX_COMP_UBO].descriptorCount + !!sizes[ZDS_INDEX_COMP_STORAGE_BUFFER].descriptorCount; case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: - samplers = rzalloc_array(pool, void*, num_resources * bucket_size); - assert(samplers); - FALLTHROUGH; + return !!sizes[ZDS_INDEX_COMP_COMBINED_SAMPLER].descriptorCount + + !!sizes[ZDS_INDEX_COMP_UNIFORM_TEXELS].descriptorCount + + !!sizes[ZDS_INDEX_COMP_SAMPLER].descriptorCount + + !!sizes[ZDS_INDEX_COMP_STORAGE_IMAGE].descriptorCount + + !!sizes[ZDS_INDEX_COMP_STORAGE_TEXELS].descriptorCount; + case ZINK_DESCRIPTOR_TYPE_SSBO: case ZINK_DESCRIPTOR_TYPE_IMAGE: - surfaces = rzalloc_array(pool, struct zink_descriptor_surface, num_resources * bucket_size); - assert(surfaces); - break; - default: - res_objs = rzalloc_array(pool, struct zink_resource_object*, num_resources * bucket_size); - assert(res_objs); - break; - } - for (unsigned i = 0; i < bucket_size; i ++) { - struct zink_descriptor_set *zds = &alloc[i]; - pipe_reference_init(&zds->reference, 1); - zds->pool = pool; - zds->hash = 0; - zds->batch_uses = NULL; - zds->invalid = true; - zds->punted = zds->recycled = false; -#ifndef NDEBUG - zds->num_resources = num_resources; -#endif - switch (type) { - case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: - zds->sampler_states = (struct zink_sampler_state**)&samplers[i * pool->key.layout->num_descriptors]; - FALLTHROUGH; - case ZINK_DESCRIPTOR_TYPE_IMAGE: - zds->surfaces = &surfaces[i * pool->key.layout->num_descriptors]; - break; - default: - zds->res_objs = (struct zink_resource_object**)&res_objs[i * pool->key.layout->num_descriptors]; - break; - } - zds->desc_set = desc_set[i]; - if (i > 0) - util_dynarray_append(&pool->alloc_desc_sets, struct zink_descriptor_set *, zds); + default: break; } - pool->num_sets_allocated += bucket_size; - return alloc; + unreachable("unknown type"); } -static void -populate_zds_key(struct zink_context *ctx, enum zink_descriptor_type type, bool is_compute, - struct zink_descriptor_state_key *key, uint32_t push_usage) +/* create all the descriptor objects for a program: + * called during program creation + * may be called from threads (no unsafe ctx use!) + */ +bool +zink_descriptor_program_init(struct zink_context *ctx, struct zink_program *pg) { - if (is_compute) { - for (unsigned i = 1; i < ZINK_SHADER_COUNT; i++) - key->exists[i] = false; - key->exists[0] = true; - if (type == ZINK_DESCRIPTOR_TYPES) - key->state[0] = ctx->dd->push_state[is_compute]; - else - key->state[0] = ctx->dd->descriptor_states[is_compute].state[type]; - } else if (type == ZINK_DESCRIPTOR_TYPES) { - /* gfx only */ - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) { - if (push_usage & BITFIELD_BIT(i)) { - key->exists[i] = true; - key->state[i] = ctx->dd->gfx_push_state[i]; - } else - key->exists[i] = false; + struct zink_screen *screen = zink_screen(ctx->base.screen); + VkDescriptorSetLayoutBinding bindings[ZINK_DESCRIPTOR_BASE_TYPES][MESA_SHADER_STAGES * 64]; + VkDescriptorUpdateTemplateEntry entries[ZINK_DESCRIPTOR_BASE_TYPES][MESA_SHADER_STAGES * 64]; + unsigned num_bindings[ZINK_DESCRIPTOR_BASE_TYPES] = {0}; + uint8_t has_bindings = 0; + unsigned push_count = 0; + uint16_t num_type_sizes[ZINK_DESCRIPTOR_BASE_TYPES]; + VkDescriptorPoolSize sizes[ZDS_INDEX_MAX] = {0}; //zink_descriptor_size_index + + struct zink_shader **stages; + if (pg->is_compute) + stages = &((struct zink_compute_program*)pg)->shader; + else + stages = ((struct zink_gfx_program*)pg)->shaders; + + if (!pg->is_compute && stages[MESA_SHADER_FRAGMENT]->info.fs.uses_fbfetch_output) { + push_count = 1; + pg->dd.fbfetch = true; + } + + unsigned entry_idx[ZINK_DESCRIPTOR_BASE_TYPES] = {0}; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + unsigned desc_set_size[ZINK_DESCRIPTOR_BASE_TYPES]; + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) + desc_set_size[i] = zink_program_num_bindings_typed(pg, i); + if (screen->compact_descriptors) { + desc_set_size[ZINK_DESCRIPTOR_TYPE_UBO] += desc_set_size[ZINK_DESCRIPTOR_TYPE_SSBO]; + desc_set_size[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] += desc_set_size[ZINK_DESCRIPTOR_TYPE_IMAGE]; + desc_set_size[ZINK_DESCRIPTOR_TYPE_SSBO] = 0; + desc_set_size[ZINK_DESCRIPTOR_TYPE_IMAGE] = 0; } - } else { - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) { - key->exists[i] = ctx->dd->gfx_descriptor_states[i].valid[type]; - key->state[i] = ctx->dd->gfx_descriptor_states[i].state[type]; + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) { + if (desc_set_size[i]) + pg->dd.db_template[i] = rzalloc_array(pg, struct zink_descriptor_template, desc_set_size[i]); } } -} -static void -punt_invalid_set(struct zink_descriptor_set *zds, struct hash_entry *he) -{ - /* this is no longer usable, so we punt it for now until it gets recycled */ - assert(!zds->recycled); - if (!he) - he = _mesa_hash_table_search_pre_hashed(zds->pool->desc_sets, zds->hash, &zds->key); - _mesa_hash_table_remove(zds->pool->desc_sets, he); - zds->punted = true; -} + unsigned num_shaders = pg->is_compute ? 1 : ZINK_GFX_SHADER_COUNT; + bool have_push = screen->info.have_KHR_push_descriptor; + /* iterate over the shaders and generate binding/layout/template structs */ + for (int i = 0; i < num_shaders; i++) { + struct zink_shader *shader = stages[i]; + if (!shader) + continue; -static struct zink_descriptor_set * -zink_descriptor_set_get(struct zink_context *ctx, - enum zink_descriptor_type type, - bool is_compute, - bool *cache_hit) -{ - *cache_hit = false; - struct zink_descriptor_set *zds; - struct zink_program *pg = is_compute ? (struct zink_program *)ctx->curr_compute : (struct zink_program *)ctx->curr_program; - struct zink_batch *batch = &ctx->batch; - bool push_set = type == ZINK_DESCRIPTOR_TYPES; - struct zink_descriptor_pool *pool = push_set ? ctx->dd->push_pool[is_compute] : pdd_cached(pg)->pool[type]; - unsigned descs_used = 1; - assert(type <= ZINK_DESCRIPTOR_TYPES); - - assert(pool->key.layout->num_descriptors); - uint32_t hash = push_set ? ctx->dd->push_state[is_compute] : - ctx->dd->descriptor_states[is_compute].state[type]; - - struct zink_descriptor_set *last_set = push_set ? ctx->dd->last_set[is_compute] : pdd_cached(pg)->last_set[type]; - /* if the current state hasn't changed since the last time it was used, - * it's impossible for this set to not be valid, which means that an - * early return here can be done safely and with no locking - */ - if (last_set && ((push_set && !ctx->dd->changed[is_compute][ZINK_DESCRIPTOR_TYPES]) || - (!push_set && !ctx->dd->changed[is_compute][type]))) { - *cache_hit = true; - return last_set; - } - - struct zink_descriptor_state_key key; - populate_zds_key(ctx, type, is_compute, &key, pg->dd->push_usage); - - simple_mtx_lock(&pool->mtx); - if (last_set && last_set->hash == hash && desc_state_equal(&last_set->key, &key)) { - zds = last_set; - *cache_hit = !zds->invalid; - if (zds->recycled) { - struct hash_entry *he = _mesa_hash_table_search_pre_hashed(pool->free_desc_sets, hash, &key); - if (he) - _mesa_hash_table_remove(pool->free_desc_sets, he); - zds->recycled = false; + gl_shader_stage stage = clamp_stage(&shader->info); + VkShaderStageFlagBits stage_flags = mesa_to_vk_shader_stage(stage); + /* uniform ubos handled in push */ + if (shader->has_uniforms) { + pg->dd.push_usage |= BITFIELD64_BIT(stage); + push_count++; } - if (zds->invalid) { - if (zink_batch_usage_exists(zds->batch_uses)) - punt_invalid_set(zds, NULL); - else - /* this set is guaranteed to be in pool->alloc_desc_sets */ - goto skip_hash_tables; - zds = NULL; + for (int j = 0; j < ZINK_DESCRIPTOR_BASE_TYPES; j++) { + unsigned desc_type = screen->desc_set_id[j] - 1; + for (int k = 0; k < shader->num_bindings[j]; k++) { + assert(num_bindings[desc_type] < ARRAY_SIZE(bindings[desc_type])); + VkDescriptorSetLayoutBinding *binding = &bindings[desc_type][num_bindings[desc_type]]; + binding->binding = shader->bindings[j][k].binding; + binding->descriptorType = shader->bindings[j][k].type; + binding->descriptorCount = shader->bindings[j][k].size; + binding->stageFlags = stage_flags; + binding->pImmutableSamplers = NULL; + + unsigned idx = screen->compact_descriptors ? zink_vktype_to_size_idx_comp(shader->bindings[j][k].type) : + zink_vktype_to_size_idx(shader->bindings[j][k].type); + sizes[idx].descriptorCount += shader->bindings[j][k].size; + sizes[idx].type = shader->bindings[j][k].type; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + init_db_template_entry(screen, shader, j, k, &pg->dd.db_template[desc_type][entry_idx[desc_type]], &entry_idx[desc_type]); + else + init_template_entry(shader, j, k, &entries[desc_type][entry_idx[desc_type]], &entry_idx[desc_type]); + num_bindings[desc_type]++; + has_bindings |= BITFIELD_BIT(desc_type); + } + num_type_sizes[desc_type] = screen->compact_descriptors ? + descriptor_program_num_sizes_compact(sizes, desc_type) : + descriptor_program_num_sizes(sizes, j); } - if (zds) - goto out; - } - - struct hash_entry *he = _mesa_hash_table_search_pre_hashed(pool->desc_sets, hash, &key); - bool recycled = false, punted = false; - if (he) { - zds = (void*)he->data; - if (zds->invalid && zink_batch_usage_exists(zds->batch_uses)) { - punt_invalid_set(zds, he); - zds = NULL; - punted = true; - } + pg->dd.bindless |= shader->bindless; + } + pg->dd.binding_usage = has_bindings; + if (!has_bindings && !push_count && !pg->dd.bindless) { + pg->layout = zink_pipeline_layout_create(screen, pg->dsl, pg->num_dsl, pg->is_compute, 0); + if (pg->layout) + pg->compat_id = _mesa_hash_data(pg->dsl, pg->num_dsl * sizeof(pg->dsl[0])); + return !!pg->layout; } - if (!he) { - he = _mesa_hash_table_search_pre_hashed(pool->free_desc_sets, hash, &key); - recycled = true; - } - if (he && !punted) { - zds = (void*)he->data; - *cache_hit = !zds->invalid; - if (recycled) { - /* need to migrate this entry back to the in-use hash */ - _mesa_hash_table_remove(pool->free_desc_sets, he); - goto out; + + pg->dsl[pg->num_dsl++] = push_count ? ctx->dd.push_dsl[pg->is_compute]->layout : ctx->dd.dummy_dsl->layout; + /* iterate over the found descriptor types and create layouts / pool keys */ + if (has_bindings) { + for (unsigned i = 0; i < ARRAY_SIZE(sizes); i++) + sizes[i].descriptorCount *= MAX_LAZY_DESCRIPTORS; + u_foreach_bit(desc_type, has_bindings) { + /* descriptor sets must be bound contiguously, so add null sets for any that are "missing" */ + for (unsigned i = 0; i < desc_type; i++) { + /* push set is always 0 */ + if (!pg->dsl[i + 1]) { + /* inject a null dsl */ + pg->dsl[pg->num_dsl++] = ctx->dd.dummy_dsl->layout; + pg->dd.binding_usage |= BITFIELD_BIT(i); + } + } + struct zink_descriptor_layout_key *key; + pg->dd.layouts[pg->num_dsl] = descriptor_util_layout_get(screen, desc_type, bindings[desc_type], num_bindings[desc_type], &key); + unsigned idx = screen->compact_descriptors ? zink_descriptor_type_to_size_idx_comp(desc_type) : + zink_descriptor_type_to_size_idx(desc_type); + /* some sets can have multiple descriptor types: ensure the size arrays for these types are contiguous for creating the pool key */ + VkDescriptorPoolSize *sz = &sizes[idx]; + VkDescriptorPoolSize sz2[5]; + if (screen->compact_descriptors || (pg->is_compute && stages[0]->info.stage == MESA_SHADER_KERNEL)) { + unsigned found = 0; + while (found < num_type_sizes[desc_type]) { + if (sz->descriptorCount) { + memcpy(&sz2[found], sz, sizeof(VkDescriptorPoolSize)); + found++; + } + sz++; + } + sz = sz2; + } else { + if (!sz->descriptorCount) + sz++; + } + pg->dd.pool_key[desc_type] = descriptor_util_pool_key_get(screen, desc_type, key, sz, num_type_sizes[desc_type]); + pg->dd.pool_key[desc_type]->use_count++; + pg->dsl[pg->num_dsl] = pg->dd.layouts[pg->num_dsl]->layout; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + init_program_db(screen, pg, desc_type, bindings[desc_type], num_bindings[desc_type], pg->dsl[pg->num_dsl]); + pg->num_dsl++; } - goto quick_out; - } -skip_hash_tables: - if (util_dynarray_num_elements(&pool->alloc_desc_sets, struct zink_descriptor_set *)) { - /* grab one off the allocated array */ - zds = util_dynarray_pop(&pool->alloc_desc_sets, struct zink_descriptor_set *); - goto out; - } - - if (_mesa_hash_table_num_entries(pool->free_desc_sets)) { - /* try for an invalidated set first */ - unsigned count = 0; - hash_table_foreach(pool->free_desc_sets, he) { - struct zink_descriptor_set *tmp = he->data; - if ((count++ >= 100 && tmp->reference.count == 1) || get_invalidated_desc_set(he->data)) { - zds = tmp; - assert(p_atomic_read(&zds->reference.count) == 1); - descriptor_set_invalidate(zds); - _mesa_hash_table_remove(pool->free_desc_sets, he); - goto out; + } + /* TODO: make this dynamic so that bindless set id can be 0 if no other descriptors are used? */ + if (pg->dd.bindless) { + unsigned desc_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]; + pg->num_dsl = desc_set + 1; + pg->dsl[desc_set] = screen->bindless_layout; + /* separate handling for null set injection when only bindless descriptors are used */ + for (unsigned i = 0; i < desc_set; i++) { + if (!pg->dsl[i]) { + /* inject a null dsl */ + pg->dsl[i] = ctx->dd.dummy_dsl->layout; + if (i != screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_UNIFORMS]) + pg->dd.binding_usage |= BITFIELD_BIT(i); } } + /* all lower id sets are guaranteed to be used */ + pg->dd.binding_usage |= BITFIELD_MASK(ZINK_DESCRIPTOR_BASE_TYPES); } - assert(pool->num_sets_allocated < ZINK_DEFAULT_MAX_DESCS); - - zds = allocate_desc_set(ctx, pg, type, descs_used, is_compute); -out: - if (unlikely(pool->num_sets_allocated >= ZINK_DEFAULT_DESC_CLAMP && - _mesa_hash_table_num_entries(pool->free_desc_sets) < ZINK_DEFAULT_MAX_DESCS - ZINK_DEFAULT_DESC_CLAMP)) - ctx->oom_flush = ctx->oom_stall = true; - zds->hash = hash; - populate_zds_key(ctx, type, is_compute, &zds->key, pg->dd->push_usage); - zds->recycled = false; - _mesa_hash_table_insert_pre_hashed(pool->desc_sets, hash, &zds->key, zds); -quick_out: - zds->punted = zds->invalid = false; - batch_add_desc_set(batch, zds); - if (push_set) - ctx->dd->last_set[is_compute] = zds; - else - pdd_cached(pg)->last_set[type] = zds; - simple_mtx_unlock(&pool->mtx); + pg->layout = zink_pipeline_layout_create(screen, pg->dsl, pg->num_dsl, pg->is_compute, 0); + if (!pg->layout) + return false; + pg->compat_id = _mesa_hash_data(pg->dsl, pg->num_dsl * sizeof(pg->dsl[0])); + + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + return true; - return zds; + VkDescriptorUpdateTemplateCreateInfo template[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES] = {0}; + /* type of template */ + VkDescriptorUpdateTemplateType types[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES] = {VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET}; + if (have_push) + types[0] = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR; + + /* number of descriptors in template */ + unsigned wd_count[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; + if (push_count) + wd_count[0] = pg->is_compute ? 1 : (ZINK_GFX_SHADER_COUNT + !!ctx->dd.has_fbfetch); + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) + wd_count[i + 1] = pg->dd.pool_key[i] ? pg->dd.pool_key[i]->layout->num_bindings : 0; + + VkDescriptorUpdateTemplateEntry *push_entries[2] = { + ctx->dd.push_entries, + &ctx->dd.compute_push_entry, + }; + for (unsigned i = 0; i < pg->num_dsl; i++) { + bool is_push = i == 0; + /* no need for empty templates */ + if (pg->dsl[i] == ctx->dd.dummy_dsl->layout || + pg->dsl[i] == screen->bindless_layout || + (!is_push && pg->dd.templates[i])) + continue; + template[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; + assert(wd_count[i]); + template[i].descriptorUpdateEntryCount = wd_count[i]; + if (is_push) + template[i].pDescriptorUpdateEntries = push_entries[pg->is_compute]; + else + template[i].pDescriptorUpdateEntries = entries[i - 1]; + template[i].templateType = types[i]; + template[i].descriptorSetLayout = pg->dsl[i]; + template[i].pipelineBindPoint = pg->is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS; + template[i].pipelineLayout = pg->layout; + template[i].set = i; + VkDescriptorUpdateTemplate t; + if (VKSCR(CreateDescriptorUpdateTemplate)(screen->dev, &template[i], NULL, &t) != VK_SUCCESS) + return false; + pg->dd.templates[i] = t; + } + return true; } void -zink_descriptor_set_recycle(struct zink_descriptor_set *zds) +zink_descriptor_shader_get_binding_offsets(const struct zink_shader *shader, unsigned *offsets) { - struct zink_descriptor_pool *pool = zds->pool; - /* if desc set is still in use by a batch, don't recache */ - uint32_t refcount = p_atomic_read(&zds->reference.count); - if (refcount != 1) - return; - /* this is a null set */ - if (!pool->key.layout->num_descriptors) - return; - simple_mtx_lock(&pool->mtx); - if (zds->punted) - zds->invalid = true; - else { - /* if we've previously punted this set, then it won't have a hash or be in either of the tables */ - struct hash_entry *he = _mesa_hash_table_search_pre_hashed(pool->desc_sets, zds->hash, &zds->key); - if (!he) { - /* desc sets can be used multiple times in the same batch */ - simple_mtx_unlock(&pool->mtx); - return; + offsets[ZINK_DESCRIPTOR_TYPE_UBO] = 0; + offsets[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] = (shader->num_bindings[ZINK_DESCRIPTOR_TYPE_UBO] ? + shader->bindings[ZINK_DESCRIPTOR_TYPE_UBO][shader->num_bindings[ZINK_DESCRIPTOR_TYPE_UBO] - 1].binding + 1 : + 1); + offsets[ZINK_DESCRIPTOR_TYPE_SSBO] = offsets[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] + (shader->num_bindings[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] ? + shader->bindings[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][shader->num_bindings[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] - 1].binding + 1 : + 1); + offsets[ZINK_DESCRIPTOR_TYPE_IMAGE] = offsets[ZINK_DESCRIPTOR_TYPE_SSBO] + (shader->num_bindings[ZINK_DESCRIPTOR_TYPE_SSBO] ? + shader->bindings[ZINK_DESCRIPTOR_TYPE_SSBO][shader->num_bindings[ZINK_DESCRIPTOR_TYPE_SSBO] - 1].binding + 1 : + 1); +} + +void +zink_descriptor_shader_init(struct zink_screen *screen, struct zink_shader *shader) +{ + VkDescriptorSetLayoutBinding bindings[ZINK_DESCRIPTOR_BASE_TYPES * ZINK_MAX_DESCRIPTORS_PER_TYPE]; + unsigned num_bindings = 0; + VkShaderStageFlagBits stage_flags = mesa_to_vk_shader_stage(clamp_stage(&shader->info)); + + unsigned desc_set_size = shader->has_uniforms; + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) + desc_set_size += shader->num_bindings[i]; + if (desc_set_size) + shader->precompile.db_template = rzalloc_array(shader, struct zink_descriptor_template, desc_set_size); + + if (shader->has_uniforms) { + VkDescriptorSetLayoutBinding *binding = &bindings[num_bindings]; + binding->binding = 0; + binding->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + binding->descriptorCount = 1; + binding->stageFlags = stage_flags; + binding->pImmutableSamplers = NULL; + struct zink_descriptor_template *entry = &shader->precompile.db_template[num_bindings]; + entry->count = 1; + entry->offset = offsetof(struct zink_context, di.db.ubos[clamp_stage(&shader->info)][0]); + entry->stride = sizeof(VkDescriptorAddressInfoEXT); + entry->db_size = screen->info.db_props.robustUniformBufferDescriptorSize; + num_bindings++; + } + /* sync with zink_shader_compile_separate() */ + unsigned offsets[4]; + zink_descriptor_shader_get_binding_offsets(shader, offsets); + for (int j = 0; j < ZINK_DESCRIPTOR_BASE_TYPES; j++) { + for (int k = 0; k < shader->num_bindings[j]; k++) { + VkDescriptorSetLayoutBinding *binding = &bindings[num_bindings]; + if (j == ZINK_DESCRIPTOR_TYPE_UBO) + binding->binding = 1; + else + binding->binding = shader->bindings[j][k].binding + offsets[j]; + binding->descriptorType = shader->bindings[j][k].type; + binding->descriptorCount = shader->bindings[j][k].size; + binding->stageFlags = stage_flags; + binding->pImmutableSamplers = NULL; + + unsigned temp = 0; + init_db_template_entry(screen, shader, j, k, &shader->precompile.db_template[num_bindings], &temp); + num_bindings++; } - _mesa_hash_table_remove(pool->desc_sets, he); } - - if (zds->invalid) { - descriptor_set_invalidate(zds); - util_dynarray_append(&pool->alloc_desc_sets, struct zink_descriptor_set *, zds); - } else { - zds->recycled = true; - _mesa_hash_table_insert_pre_hashed(pool->free_desc_sets, zds->hash, &zds->key, zds); + if (num_bindings) { + shader->precompile.dsl = descriptor_layout_create(screen, 0, bindings, num_bindings); + shader->precompile.bindings = mem_dup(bindings, num_bindings * sizeof(VkDescriptorSetLayoutBinding)); + shader->precompile.num_bindings = num_bindings; + VkDeviceSize val; + VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, shader->precompile.dsl, &val); + shader->precompile.db_size = val; + shader->precompile.db_offset = rzalloc_array(shader, uint32_t, num_bindings); + for (unsigned i = 0; i < num_bindings; i++) { + VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, shader->precompile.dsl, bindings[i].binding, &val); + shader->precompile.db_offset[i] = val; + } + } + if (screen->info.have_EXT_shader_object) + return; + VkDescriptorSetLayout dsl[ZINK_DESCRIPTOR_ALL_TYPES] = {0}; + unsigned num_dsl = num_bindings ? 2 : 0; + if (shader->bindless) + num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES; + if (num_bindings || shader->bindless) { + dsl[shader->info.stage == MESA_SHADER_FRAGMENT] = shader->precompile.dsl; + if (shader->bindless) + dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout; } - simple_mtx_unlock(&pool->mtx); + shader->precompile.layout = zink_pipeline_layout_create(screen, dsl, num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT); } - -static void -desc_set_ref_add(struct zink_descriptor_set *zds, struct zink_descriptor_refs *refs, void **ref_ptr, void *ptr) +void +zink_descriptor_shader_deinit(struct zink_screen *screen, struct zink_shader *shader) { - struct zink_descriptor_reference ref = {ref_ptr, &zds->invalid}; - *ref_ptr = ptr; - if (ptr) - util_dynarray_append(&refs->refs, struct zink_descriptor_reference, ref); + if (shader->precompile.dsl) + VKSCR(DestroyDescriptorSetLayout)(screen->dev, shader->precompile.dsl, NULL); + if (shader->precompile.layout) + VKSCR(DestroyPipelineLayout)(screen->dev, shader->precompile.layout, NULL); } -static void -zink_descriptor_surface_desc_set_add(struct zink_descriptor_surface *dsurf, struct zink_descriptor_set *zds, unsigned idx) +/* called during program destroy */ +void +zink_descriptor_program_deinit(struct zink_screen *screen, struct zink_program *pg) { - assert(idx < zds->num_resources); - zds->surfaces[idx].is_buffer = dsurf->is_buffer; - if (dsurf->is_buffer) - desc_set_ref_add(zds, &dsurf->bufferview->desc_set_refs, (void**)&zds->surfaces[idx].bufferview, dsurf->bufferview); - else - desc_set_ref_add(zds, &dsurf->surface->desc_set_refs, (void**)&zds->surfaces[idx].surface, dsurf->surface); + for (unsigned i = 0; pg->num_dsl && i < ZINK_DESCRIPTOR_BASE_TYPES; i++) { + if (pg->dd.pool_key[i]) { + pg->dd.pool_key[i]->use_count--; + pg->dd.pool_key[i] = NULL; + } + } + for (unsigned i = 0; pg->num_dsl && i < ZINK_DESCRIPTOR_NON_BINDLESS_TYPES; i++) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY && pg->dd.templates[i]) { + VKSCR(DestroyDescriptorUpdateTemplate)(screen->dev, pg->dd.templates[i], NULL); + pg->dd.templates[i] = VK_NULL_HANDLE; + } + } } static void -zink_image_view_desc_set_add(struct zink_image_view *image_view, struct zink_descriptor_set *zds, unsigned idx, bool is_buffer) +pool_destroy(struct zink_screen *screen, struct zink_descriptor_pool *pool) { - assert(idx < zds->num_resources); - if (is_buffer) - desc_set_ref_add(zds, &image_view->buffer_view->desc_set_refs, (void**)&zds->surfaces[idx].bufferview, image_view->buffer_view); - else - desc_set_ref_add(zds, &image_view->surface->desc_set_refs, (void**)&zds->surfaces[idx].surface, image_view->surface); + VKSCR(DestroyDescriptorPool)(screen->dev, pool->pool, NULL); + FREE(pool); } static void -zink_sampler_state_desc_set_add(struct zink_sampler_state *sampler_state, struct zink_descriptor_set *zds, unsigned idx) +multi_pool_destroy(struct zink_screen *screen, struct zink_descriptor_pool_multi *mpool) { - assert(idx < zds->num_resources); - if (sampler_state) - desc_set_ref_add(zds, &sampler_state->desc_set_refs, (void**)&zds->sampler_states[idx], sampler_state); - else - zds->sampler_states[idx] = NULL; + if (mpool->pool) + pool_destroy(screen, mpool->pool); + FREE(mpool); } -static void -zink_resource_desc_set_add(struct zink_resource *res, struct zink_descriptor_set *zds, unsigned idx) +static bool +clear_multi_pool_overflow(struct zink_screen *screen, struct util_dynarray *overflowed_pools) { - assert(idx < zds->num_resources); - desc_set_ref_add(zds, res ? &res->obj->desc_set_refs : NULL, (void**)&zds->res_objs[idx], res ? res->obj : NULL); + bool found = false; + while (util_dynarray_num_elements(overflowed_pools, struct zink_descriptor_pool*)) { + struct zink_descriptor_pool *pool = util_dynarray_pop(overflowed_pools, struct zink_descriptor_pool*); + pool_destroy(screen, pool); + found = true; + } + return found; } -void -zink_descriptor_set_refs_clear(struct zink_descriptor_refs *refs, void *ptr) +static VkDescriptorPool +create_pool(struct zink_screen *screen, unsigned num_type_sizes, const VkDescriptorPoolSize *sizes, unsigned flags) { - util_dynarray_foreach(&refs->refs, struct zink_descriptor_reference, ref) { - if (*ref->ref == ptr) { - *ref->invalid = true; - *ref->ref = NULL; + VkDescriptorPool pool; + VkDescriptorPoolCreateInfo dpci = {0}; + dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + dpci.pPoolSizes = sizes; + dpci.poolSizeCount = num_type_sizes; + dpci.flags = flags; + dpci.maxSets = MAX_LAZY_DESCRIPTORS; + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateDescriptorPool)(screen->dev, &dpci, 0, &pool), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateDescriptorPool failed (%s)", vk_Result_to_str(result)); + return VK_NULL_HANDLE; } - } - util_dynarray_fini(&refs->refs); + ); + return pool; } -static inline void -zink_descriptor_pool_reference(struct zink_screen *screen, - struct zink_descriptor_pool **dst, - struct zink_descriptor_pool *src) -{ - struct zink_descriptor_pool *old_dst = dst ? *dst : NULL; +static struct zink_descriptor_pool * +get_descriptor_pool(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, struct zink_batch_state *bs, bool is_compute); - if (pipe_reference_described(old_dst ? &old_dst->reference : NULL, &src->reference, - (debug_reference_descriptor)debug_describe_zink_descriptor_pool)) - descriptor_pool_free(screen, old_dst); - if (dst) *dst = src; +/* set a multi-pool to its zink_descriptor_pool_key::id-indexed array element on a given batch state */ +static bool +set_pool(struct zink_batch_state *bs, struct zink_program *pg, struct zink_descriptor_pool_multi *mpool, enum zink_descriptor_type type) +{ + /* push descriptors should never reach this */ + assert(type != ZINK_DESCRIPTOR_TYPE_UNIFORMS); + assert(mpool); + const struct zink_descriptor_pool_key *pool_key = pg->dd.pool_key[type]; + size_t size = bs->dd.pools[type].capacity; + /* ensure the pool array is big enough to have an element for this key */ + if (!util_dynarray_resize(&bs->dd.pools[type], struct zink_descriptor_pool_multi*, pool_key->id + 1)) + return false; + if (size != bs->dd.pools[type].capacity) { + /* when resizing, always zero the new data to avoid garbage */ + uint8_t *data = bs->dd.pools[type].data; + memset(data + size, 0, bs->dd.pools[type].capacity - size); + } + /* dynarray can't track sparse array sizing, so the array size must be manually tracked */ + bs->dd.pool_size[type] = MAX2(bs->dd.pool_size[type], pool_key->id + 1); + struct zink_descriptor_pool_multi **mppool = util_dynarray_element(&bs->dd.pools[type], struct zink_descriptor_pool_multi*, pool_key->id); + *mppool = mpool; + return true; } -static void -create_descriptor_ref_template(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type) +static struct zink_descriptor_pool * +alloc_new_pool(struct zink_screen *screen, struct zink_descriptor_pool_multi *mpool) { - struct zink_shader **stages; - if (pg->is_compute) - stages = &((struct zink_compute_program*)pg)->shader; - else - stages = ((struct zink_gfx_program*)pg)->shaders; - unsigned num_shaders = pg->is_compute ? 1 : ZINK_SHADER_COUNT; - - for (int i = 0; i < num_shaders; i++) { - struct zink_shader *shader = stages[i]; - if (!shader) - continue; - - for (int j = 0; j < shader->num_bindings[type]; j++) { - int index = shader->bindings[type][j].index; - if (type == ZINK_DESCRIPTOR_TYPE_UBO && !index) - continue; - pdd_cached(pg)->num_refs[type] += shader->bindings[type][j].size; - } + struct zink_descriptor_pool *pool = CALLOC_STRUCT(zink_descriptor_pool); + if (!pool) + return NULL; + const unsigned num_type_sizes = mpool->pool_key->sizes[1].descriptorCount ? 2 : 1; + pool->pool = create_pool(screen, num_type_sizes, mpool->pool_key->sizes, 0); + if (!pool->pool) { + FREE(pool); + return NULL; } + return pool; +} - pdd_cached(pg)->refs[type] = ralloc_array(pg->dd, union zink_program_descriptor_refs, pdd_cached(pg)->num_refs[type]); - if (!pdd_cached(pg)->refs[type]) - return; - - unsigned ref_idx = 0; - for (int i = 0; i < num_shaders; i++) { - struct zink_shader *shader = stages[i]; - if (!shader) - continue; - - enum pipe_shader_type stage = pipe_shader_type_from_mesa(shader->nir->info.stage); - for (int j = 0; j < shader->num_bindings[type]; j++) { - int index = shader->bindings[type][j].index; - for (unsigned k = 0; k < shader->bindings[type][j].size; k++) { - switch (type) { - case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: - pdd_cached(pg)->refs[type][ref_idx].sampler.sampler_state = (struct zink_sampler_state**)&ctx->sampler_states[stage][index + k]; - pdd_cached(pg)->refs[type][ref_idx].sampler.dsurf = &ctx->di.sampler_surfaces[stage][index + k]; - break; - case ZINK_DESCRIPTOR_TYPE_IMAGE: - pdd_cached(pg)->refs[type][ref_idx].dsurf = &ctx->di.image_surfaces[stage][index + k]; - break; - case ZINK_DESCRIPTOR_TYPE_UBO: - if (!index) - continue; - FALLTHROUGH; - default: - pdd_cached(pg)->refs[type][ref_idx].res = &ctx->di.descriptor_res[type][stage][index + k]; - break; - } - assert(ref_idx < pdd_cached(pg)->num_refs[type]); - ref_idx++; +/* strictly for finding a usable pool in oom scenarios */ +static void +find_pool(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_descriptor_pool_multi *mpool, bool both) +{ + bool found = false; + /* worst case: iterate all the pools for the batch until something can be recycled */ + for (unsigned type = 0; type < ZINK_DESCRIPTOR_BASE_TYPES; type++) { + for (unsigned i = 0; i < bs->dd.pool_size[type]; i++) { + struct zink_descriptor_pool_multi **mppool = util_dynarray_element(&bs->dd.pools[type], struct zink_descriptor_pool_multi *, i); + if (mppool && *mppool && *mppool != mpool) { + unsigned idx[] = {!(*mppool)->overflow_idx, (*mppool)->overflow_idx}; + for (unsigned j = 0; j < 1 + !!both; j++) + found |= clear_multi_pool_overflow(screen, &(*mppool)->overflowed_pools[idx[j]]); } } } + if (found) + mpool->pool = alloc_new_pool(screen, mpool); } -bool -zink_descriptor_program_init(struct zink_context *ctx, struct zink_program *pg) +static struct zink_descriptor_pool * +check_pool_alloc(struct zink_context *ctx, struct zink_descriptor_pool_multi *mpool, struct zink_program *pg, + enum zink_descriptor_type type, struct zink_batch_state *bs, bool is_compute) { struct zink_screen *screen = zink_screen(ctx->base.screen); - - pg->dd = (void*)rzalloc(pg, struct zink_program_descriptor_data_cached); - if (!pg->dd) - return false; - - if (!zink_descriptor_program_init_lazy(ctx, pg)) - return false; - - /* no descriptors */ - if (!pg->dd) - return true; - - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) { - if (!pg->dd->layout_key[i]) - continue; - - unsigned idx = zink_descriptor_type_to_size_idx(i); - VkDescriptorPoolSize *size = &pg->dd->sizes[idx]; - /* this is a sampler/image set with no images only texels */ - if (!size->descriptorCount) - size++; - unsigned num_sizes = zink_descriptor_program_num_sizes(pg, i); - struct zink_descriptor_pool *pool = descriptor_pool_get(ctx, i, pg->dd->layout_key[i], size, num_sizes); - if (!pool) - return false; - zink_descriptor_pool_reference(screen, &pdd_cached(pg)->pool[i], pool); - - if (screen->info.have_KHR_descriptor_update_template && - screen->descriptor_mode != ZINK_DESCRIPTOR_MODE_NOTEMPLATES) - create_descriptor_ref_template(ctx, pg, i); + assert(mpool->pool_key == pg->dd.pool_key[type]); + /* a current pool may not exist */ + if (!mpool->pool) { + /* first, try to recycle a pool from the idle overflowed sets */ + if (util_dynarray_contains(&mpool->overflowed_pools[!mpool->overflow_idx], struct zink_descriptor_pool*)) + mpool->pool = util_dynarray_pop(&mpool->overflowed_pools[!mpool->overflow_idx], struct zink_descriptor_pool*); + else + /* if none exist, try to create a new one */ + mpool->pool = alloc_new_pool(screen, mpool); + /* OOM: force pool recycling from overflows */ + if (!mpool->pool) { + find_pool(screen, bs, mpool, false); + if (!mpool->pool) { + /* bad case: iterate unused batches and recycle */ + for (struct zink_batch_state *state = ctx->free_batch_states; state; state = state->next) + find_pool(screen, state, mpool, true); + if (!mpool->pool) { + /* worst case: iterate in-use batches and recycle (very safe) */ + for (struct zink_batch_state *state = ctx->batch_states; state; state = state->next) + find_pool(screen, state, mpool, false); + } + } + } + if (!mpool->pool) + unreachable("out of descriptor memory!"); } - - return true; + struct zink_descriptor_pool *pool = mpool->pool; + /* allocate up to $current * 10, e.g., 10 -> 100; + * never allocate more than 100 at a time to minimize unused descriptor sets + */ + if (pool->set_idx == pool->sets_alloc) { + unsigned sets_to_alloc = MIN2(MIN2(MAX2(pool->sets_alloc * 10, 10), MAX_LAZY_DESCRIPTORS) - pool->sets_alloc, 100); + if (!sets_to_alloc) { + /* overflowed pool: store for reuse */ + pool->set_idx = 0; + util_dynarray_append(&mpool->overflowed_pools[mpool->overflow_idx], struct zink_descriptor_pool*, pool); + mpool->pool = NULL; + /* call recursively to get recycle/oom handling */ + return get_descriptor_pool(ctx, pg, type, bs, is_compute); + } + if (!zink_descriptor_util_alloc_sets(screen, pg->dsl[type + 1], + pool->pool, &pool->sets[pool->sets_alloc], sets_to_alloc)) + return NULL; + pool->sets_alloc += sets_to_alloc; + } + return pool; } -void -zink_descriptor_program_deinit(struct zink_screen *screen, struct zink_program *pg) +static struct zink_descriptor_pool * +create_push_pool(struct zink_screen *screen, struct zink_batch_state *bs, bool is_compute, bool has_fbfetch) { - if (!pg->dd) - return; - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) - zink_descriptor_pool_reference(screen, &pdd_cached(pg)->pool[i], NULL); - - zink_descriptor_program_deinit_lazy(screen, pg); + struct zink_descriptor_pool *pool = CALLOC_STRUCT(zink_descriptor_pool); + VkDescriptorPoolSize sizes[2]; + sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + if (is_compute) + sizes[0].descriptorCount = MAX_LAZY_DESCRIPTORS; + else { + sizes[0].descriptorCount = ZINK_GFX_SHADER_COUNT * MAX_LAZY_DESCRIPTORS; + sizes[1].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + sizes[1].descriptorCount = MAX_LAZY_DESCRIPTORS; + } + pool->pool = create_pool(screen, !is_compute && has_fbfetch ? 2 : 1, sizes, 0); + return pool; } -static void -zink_descriptor_pool_deinit(struct zink_context *ctx) +static struct zink_descriptor_pool * +check_push_pool_alloc(struct zink_context *ctx, struct zink_descriptor_pool_multi *mpool, struct zink_batch_state *bs, bool is_compute) { struct zink_screen *screen = zink_screen(ctx->base.screen); - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) { - hash_table_foreach(ctx->dd->descriptor_pools[i], entry) { - struct zink_descriptor_pool *pool = (void*)entry->data; - zink_descriptor_pool_reference(screen, &pool, NULL); + struct zink_descriptor_pool *pool = mpool->pool; + /* allocate up to $current * 10, e.g., 10 -> 100 or 100 -> 1000 */ + if (pool->set_idx == pool->sets_alloc || unlikely(ctx->dd.has_fbfetch != bs->dd.has_fbfetch)) { + unsigned sets_to_alloc = MIN2(MIN2(MAX2(pool->sets_alloc * 10, 10), MAX_LAZY_DESCRIPTORS) - pool->sets_alloc, 100); + if (!sets_to_alloc || unlikely(ctx->dd.has_fbfetch != bs->dd.has_fbfetch)) { + /* overflowed pool: store for reuse */ + pool->set_idx = 0; + util_dynarray_append(&mpool->overflowed_pools[mpool->overflow_idx], struct zink_descriptor_pool*, pool); + if (util_dynarray_contains(&mpool->overflowed_pools[!mpool->overflow_idx], struct zink_descriptor_pool*)) + bs->dd.push_pool[is_compute].pool = util_dynarray_pop(&mpool->overflowed_pools[!mpool->overflow_idx], struct zink_descriptor_pool*); + else + bs->dd.push_pool[is_compute].pool = create_push_pool(screen, bs, is_compute, ctx->dd.has_fbfetch); + if (unlikely(ctx->dd.has_fbfetch != bs->dd.has_fbfetch)) + mpool->reinit_overflow = true; + bs->dd.has_fbfetch = ctx->dd.has_fbfetch; + return check_push_pool_alloc(ctx, &bs->dd.push_pool[is_compute], bs, is_compute); } - _mesa_hash_table_destroy(ctx->dd->descriptor_pools[i], NULL); + if (!zink_descriptor_util_alloc_sets(screen, ctx->dd.push_dsl[is_compute]->layout, + pool->pool, &pool->sets[pool->sets_alloc], sets_to_alloc)) { + mesa_loge("ZINK: failed to allocate push set!"); + return NULL; + } + pool->sets_alloc += sets_to_alloc; } + return pool; } -static bool -zink_descriptor_pool_init(struct zink_context *ctx) +static struct zink_descriptor_pool * +get_descriptor_pool(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, struct zink_batch_state *bs, bool is_compute) { - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) { - ctx->dd->descriptor_pools[i] = _mesa_hash_table_create(ctx, hash_descriptor_pool, equals_descriptor_pool); - if (!ctx->dd->descriptor_pools[i]) - return false; - } struct zink_screen *screen = zink_screen(ctx->base.screen); - VkDescriptorPoolSize sizes[2]; - sizes[0].type = screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - sizes[0].descriptorCount = ZINK_SHADER_COUNT * ZINK_DEFAULT_MAX_DESCS; - sizes[1].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; - sizes[1].descriptorCount = ZINK_DEFAULT_MAX_DESCS; - ctx->dd->push_pool[0] = descriptor_pool_get(ctx, 0, ctx->dd->push_layout_keys[0], sizes, ctx->dd->has_fbfetch ? 2 : 1); - sizes[0].descriptorCount = ZINK_DEFAULT_MAX_DESCS; - ctx->dd->push_pool[1] = descriptor_pool_get(ctx, 0, ctx->dd->push_layout_keys[1], sizes, 1); - return ctx->dd->push_pool[0] && ctx->dd->push_pool[1]; + const struct zink_descriptor_pool_key *pool_key = pg->dd.pool_key[type]; + struct zink_descriptor_pool_multi **mppool = bs->dd.pool_size[type] > pool_key->id ? + util_dynarray_element(&bs->dd.pools[type], struct zink_descriptor_pool_multi *, pool_key->id) : + NULL; + if (mppool && *mppool) + return check_pool_alloc(ctx, *mppool, pg, type, bs, is_compute); + struct zink_descriptor_pool_multi *mpool = CALLOC_STRUCT(zink_descriptor_pool_multi); + if (!mpool) + return NULL; + util_dynarray_init(&mpool->overflowed_pools[0], NULL); + util_dynarray_init(&mpool->overflowed_pools[1], NULL); + mpool->pool_key = pool_key; + if (!set_pool(bs, pg, mpool, type)) { + multi_pool_destroy(screen, mpool); + return NULL; + } + assert(pool_key->id < bs->dd.pool_size[type]); + return check_pool_alloc(ctx, mpool, pg, type, bs, is_compute); } - -static void -desc_set_res_add(struct zink_descriptor_set *zds, struct zink_resource *res, unsigned int i, bool cache_hit) +ALWAYS_INLINE static VkDescriptorSet +get_descriptor_set(struct zink_descriptor_pool *pool) { - /* if we got a cache hit, we have to verify that the cached set is still valid; - * we store the vk resource to the set here to avoid a more complex and costly mechanism of maintaining a - * hash table on every resource with the associated descriptor sets that then needs to be iterated through - * whenever a resource is destroyed - */ - assert(!cache_hit || zds->res_objs[i] == (res ? res->obj : NULL)); - if (!cache_hit) - zink_resource_desc_set_add(res, zds, i); + if (!pool) + return VK_NULL_HANDLE; + + assert(pool->set_idx < pool->sets_alloc); + return pool->sets[pool->set_idx++]; } -static void -desc_set_sampler_add(struct zink_context *ctx, struct zink_descriptor_set *zds, struct zink_descriptor_surface *dsurf, - struct zink_sampler_state *state, unsigned int i, bool cache_hit) -{ - /* if we got a cache hit, we have to verify that the cached set is still valid; - * we store the vk resource to the set here to avoid a more complex and costly mechanism of maintaining a - * hash table on every resource with the associated descriptor sets that then needs to be iterated through - * whenever a resource is destroyed - */ -#ifndef NDEBUG - uint32_t cur_hash = get_descriptor_surface_hash(ctx, &zds->surfaces[i]); - uint32_t new_hash = get_descriptor_surface_hash(ctx, dsurf); -#endif - assert(!cache_hit || cur_hash == new_hash); - assert(!cache_hit || zds->sampler_states[i] == state); - if (!cache_hit) { - zink_descriptor_surface_desc_set_add(dsurf, zds, i); - zink_sampler_state_desc_set_add(state, zds, i); +static bool +populate_sets(struct zink_context *ctx, struct zink_batch_state *bs, + struct zink_program *pg, uint8_t changed_sets, VkDescriptorSet *sets) +{ + u_foreach_bit(type, changed_sets) { + if (pg->dd.pool_key[type]) { + struct zink_descriptor_pool *pool = get_descriptor_pool(ctx, pg, type, bs, pg->is_compute); + sets[type] = get_descriptor_set(pool); + if (!sets[type]) + return false; + } else + sets[type] = VK_NULL_HANDLE; } + return true; } static void -desc_set_image_add(struct zink_context *ctx, struct zink_descriptor_set *zds, struct zink_image_view *image_view, - unsigned int i, bool is_buffer, bool cache_hit) +reinit_db(struct zink_screen *screen, struct zink_batch_state *bs) { - /* if we got a cache hit, we have to verify that the cached set is still valid; - * we store the vk resource to the set here to avoid a more complex and costly mechanism of maintaining a - * hash table on every resource with the associated descriptor sets that then needs to be iterated through - * whenever a resource is destroyed - */ -#ifndef NDEBUG - uint32_t cur_hash = get_descriptor_surface_hash(ctx, &zds->surfaces[i]); - uint32_t new_hash = zink_get_image_view_hash(ctx, image_view, is_buffer); -#endif - assert(!cache_hit || cur_hash == new_hash); - if (!cache_hit) - zink_image_view_desc_set_add(image_view, zds, i, is_buffer); + zink_batch_descriptor_deinit(screen, bs); + zink_batch_descriptor_init(screen, bs); } static void -desc_set_descriptor_surface_add(struct zink_context *ctx, struct zink_descriptor_set *zds, struct zink_descriptor_surface *dsurf, - unsigned int i, bool cache_hit) +enlarge_db(struct zink_context *ctx) { - /* if we got a cache hit, we have to verify that the cached set is still valid; - * we store the vk resource to the set here to avoid a more complex and costly mechanism of maintaining a - * hash table on every resource with the associated descriptor sets that then needs to be iterated through - * whenever a resource is destroyed - */ -#ifndef NDEBUG - uint32_t cur_hash = get_descriptor_surface_hash(ctx, &zds->surfaces[i]); - uint32_t new_hash = get_descriptor_surface_hash(ctx, dsurf); -#endif - assert(!cache_hit || cur_hash == new_hash); - if (!cache_hit) - zink_descriptor_surface_desc_set_add(dsurf, zds, i); -} - -static unsigned -init_write_descriptor(struct zink_shader *shader, struct zink_descriptor_set *zds, enum zink_descriptor_type type, int idx, VkWriteDescriptorSet *wd, unsigned num_wds) -{ - wd->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - wd->pNext = NULL; - wd->dstBinding = shader ? shader->bindings[type][idx].binding : idx; - wd->dstArrayElement = 0; - wd->descriptorCount = shader ? shader->bindings[type][idx].size : 1; - wd->descriptorType = shader ? shader->bindings[type][idx].type : - idx == ZINK_FBFETCH_BINDING ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - wd->dstSet = zds->desc_set; - return num_wds + 1; + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_batch_state *bs = ctx->batch.state; + /* ensure current db surives */ + zink_batch_reference_resource(&ctx->batch, bs->dd.db); + /* rebinding a db mid-batch is extremely costly: if we start with a factor + * 16 and then half the factor with each new allocation. It shouldn't need to + * do this more than twice. */ + ctx->dd.db.max_db_size *= ctx->dd.db.size_enlarge_scale; + ctx->dd.db.size_enlarge_scale = MAX2(ctx->dd.db.size_enlarge_scale >> 1, 4); + reinit_db(screen, bs); } -static unsigned -update_push_ubo_descriptors(struct zink_context *ctx, struct zink_descriptor_set *zds, - bool is_compute, bool cache_hit, uint32_t *dynamic_offsets) +static void +update_separable(struct zink_context *ctx, struct zink_program *pg) { struct zink_screen *screen = zink_screen(ctx->base.screen); - VkWriteDescriptorSet wds[ZINK_SHADER_COUNT + 1]; - VkDescriptorBufferInfo buffer_infos[ZINK_SHADER_COUNT]; - struct zink_shader **stages; - bool fbfetch = false; + struct zink_batch_state *bs = ctx->batch.state; + + unsigned use_buffer = 0; + VkDescriptorGetInfoEXT info; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; + info.pNext = NULL; + struct zink_gfx_program *prog = (struct zink_gfx_program *)pg; + size_t db_size = 0; + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + if (prog->shaders[i]) + db_size += prog->shaders[i]->precompile.db_size; + } - unsigned num_stages = is_compute ? 1 : ZINK_SHADER_COUNT; - struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base; - if (is_compute) - stages = &ctx->curr_compute->shader; - else - stages = &ctx->gfx_stages[0]; + if (bs->dd.db_offset + db_size >= bs->dd.db->base.b.width0) + enlarge_db(ctx); - for (int i = 0; i < num_stages; i++) { - struct zink_shader *shader = stages[i]; - enum pipe_shader_type pstage = shader ? pipe_shader_type_from_mesa(shader->nir->info.stage) : i; - VkDescriptorBufferInfo *info = &ctx->di.ubos[pstage][0]; - unsigned dynamic_idx = is_compute ? 0 : tgsi_processor_to_shader_stage(pstage); - - /* Values are taken from pDynamicOffsets in an order such that all entries for set N come before set N+1; - * within a set, entries are ordered by the binding numbers in the descriptor set layouts - * - vkCmdBindDescriptorSets spec - * - * because of this, we have to populate the dynamic offsets by their shader stage to ensure they - * match what the driver expects - */ - const bool used = (pg->dd->push_usage & BITFIELD_BIT(pstage)) == BITFIELD_BIT(pstage); - dynamic_offsets[dynamic_idx] = used ? info->offset : 0; - if (!cache_hit) { - init_write_descriptor(NULL, zds, ZINK_DESCRIPTOR_TYPE_UBO, tgsi_processor_to_shader_stage(pstage), &wds[i], 0); - if (used) { - desc_set_res_add(zds, ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_UBO][pstage][0], i, cache_hit); - buffer_infos[i].buffer = info->buffer; - buffer_infos[i].range = info->range; + if (!bs->dd.db_bound) + zink_batch_bind_db(ctx); + + for (unsigned j = 0; j < ZINK_GFX_SHADER_COUNT; j++) { + struct zink_shader *zs = prog->shaders[j]; + if (!zs || !zs->precompile.dsl) + continue; + uint64_t offset = bs->dd.db_offset; + assert(bs->dd.db->base.b.width0 > bs->dd.db_offset + zs->precompile.db_size); + for (unsigned i = 0; i < zs->precompile.num_bindings; i++) { + info.type = zs->precompile.bindings[i].descriptorType; + uint64_t desc_offset = offset + zs->precompile.db_offset[i]; + if (screen->info.db_props.combinedImageSamplerDescriptorSingleArray || + zs->precompile.bindings[i].descriptorType != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER || + zs->precompile.bindings[i].descriptorCount == 1) { + for (unsigned k = 0; k < zs->precompile.bindings[i].descriptorCount; k++) { + /* VkDescriptorDataEXT is a union of pointers; the member doesn't matter */ + info.data.pSampler = (void*)(((uint8_t*)ctx) + zs->precompile.db_template[i].offset + k * zs->precompile.db_template[i].stride); + VKSCR(GetDescriptorEXT)(screen->dev, &info, zs->precompile.db_template[i].db_size, bs->dd.db_map + desc_offset + k * zs->precompile.db_template[i].db_size); + } } else { - desc_set_res_add(zds, NULL, i, cache_hit); - if (unlikely(!screen->info.rb2_feats.nullDescriptor)) - buffer_infos[i].buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; - else - buffer_infos[i].buffer = VK_NULL_HANDLE; - buffer_infos[i].range = VK_WHOLE_SIZE; + assert(zs->precompile.bindings[i].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + char buf[1024]; + uint8_t *db = bs->dd.db_map + desc_offset; + uint8_t *samplers = db + zs->precompile.bindings[i].descriptorCount * screen->info.db_props.sampledImageDescriptorSize; + for (unsigned k = 0; k < zs->precompile.bindings[i].descriptorCount; k++) { + /* VkDescriptorDataEXT is a union of pointers; the member doesn't matter */ + info.data.pSampler = (void*)(((uint8_t*)ctx) + zs->precompile.db_template[i].offset + + k * zs->precompile.db_template[i].stride); + VKSCR(GetDescriptorEXT)(screen->dev, &info, zs->precompile.db_template[i].db_size, buf); + /* drivers that don't support combinedImageSamplerDescriptorSingleArray must have sampler arrays written in memory as + * + * | array_of_samplers[] | array_of_sampled_images[] | + * + * which means each descriptor's data must be split + */ + memcpy(db, buf, screen->info.db_props.samplerDescriptorSize); + memcpy(samplers, &buf[screen->info.db_props.samplerDescriptorSize], screen->info.db_props.sampledImageDescriptorSize); + db += screen->info.db_props.sampledImageDescriptorSize; + samplers += screen->info.db_props.samplerDescriptorSize; + } } - /* these are dynamic UBO descriptors, so we have to always set 0 as the descriptor offset */ - buffer_infos[i].offset = 0; - wds[i].pBufferInfo = &buffer_infos[i]; } + bs->dd.cur_db_offset[use_buffer] = bs->dd.db_offset; + bs->dd.db_offset += zs->precompile.db_size; + /* TODO: maybe compile multiple variants for different set counts for compact mode? */ + int set_idx = screen->info.have_EXT_shader_object ? j : j == MESA_SHADER_FRAGMENT; + VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pg->layout, set_idx, 1, &use_buffer, &offset); } - if (unlikely(!cache_hit && !is_compute && ctx->fbfetch_outputs)) { - struct zink_resource *res = zink_resource(ctx->fb_state.cbufs[0]->texture); - init_write_descriptor(NULL, zds, 0, MESA_SHADER_STAGES, &wds[ZINK_SHADER_COUNT], 0); - desc_set_res_add(zds, res, ZINK_SHADER_COUNT, cache_hit); - wds[ZINK_SHADER_COUNT].pImageInfo = &ctx->di.fbfetch; - fbfetch = true; - } - - if (!cache_hit) - VKSCR(UpdateDescriptorSets)(screen->dev, num_stages + !!fbfetch, wds, 0, NULL); - return num_stages; } +/* updates the mask of changed_sets and binds the mask of bind_sets */ static void -set_descriptor_set_refs(struct zink_context *ctx, struct zink_descriptor_set *zds, struct zink_program *pg, bool cache_hit) +zink_descriptors_update_masked_buffer(struct zink_context *ctx, bool is_compute, uint8_t changed_sets, uint8_t bind_sets) { - enum zink_descriptor_type type = zds->pool->type; - for (unsigned i = 0; i < pdd_cached(pg)->num_refs[type]; i++) { - switch (type) { - case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: - desc_set_sampler_add(ctx, zds, pdd_cached(pg)->refs[type][i].sampler.dsurf, - *pdd_cached(pg)->refs[type][i].sampler.sampler_state, i, cache_hit); - break; - case ZINK_DESCRIPTOR_TYPE_IMAGE: - desc_set_descriptor_surface_add(ctx, zds, pdd_cached(pg)->refs[type][i].dsurf, i, cache_hit); - break; - default: - desc_set_res_add(zds, *pdd_cached(pg)->refs[type][i].res, i, cache_hit); - break; + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_batch_state *bs = ctx->batch.state; + struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base; + + /* skip if no descriptors are updated */ + if (!pg->dd.binding_usage || (!changed_sets && !bind_sets)) + return; + + unsigned use_buffer = 0; + u_foreach_bit(type, changed_sets | bind_sets) { + if (!pg->dd.pool_key[type]) + continue; + assert(type + 1 < pg->num_dsl); + assert(type < ZINK_DESCRIPTOR_BASE_TYPES); + bool changed = (changed_sets & BITFIELD_BIT(type)) > 0; + uint64_t offset = changed ? bs->dd.db_offset : bs->dd.cur_db_offset[type]; + if (pg->dd.db_template[type] && changed) { + const struct zink_descriptor_layout_key *key = pg->dd.pool_key[type]->layout; + VkDescriptorGetInfoEXT info; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; + info.pNext = NULL; + assert(bs->dd.db->base.b.width0 > bs->dd.db_offset + pg->dd.db_size[type]); + for (unsigned i = 0; i < key->num_bindings; i++) { + info.type = key->bindings[i].descriptorType; + uint64_t desc_offset = offset + pg->dd.db_offset[type][i]; + if (screen->info.db_props.combinedImageSamplerDescriptorSingleArray || + key->bindings[i].descriptorType != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER || + key->bindings[i].descriptorCount == 1) { + for (unsigned j = 0; j < key->bindings[i].descriptorCount; j++) { + /* VkDescriptorDataEXT is a union of pointers; the member doesn't matter */ + info.data.pSampler = (void*)(((uint8_t*)ctx) + pg->dd.db_template[type][i].offset + j * pg->dd.db_template[type][i].stride); + VKSCR(GetDescriptorEXT)(screen->dev, &info, pg->dd.db_template[type][i].db_size, bs->dd.db_map + desc_offset + j * pg->dd.db_template[type][i].db_size); + } + } else { + assert(key->bindings[i].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + char buf[1024]; + uint8_t *db = bs->dd.db_map + desc_offset; + uint8_t *samplers = db + key->bindings[i].descriptorCount * screen->info.db_props.sampledImageDescriptorSize; + for (unsigned j = 0; j < key->bindings[i].descriptorCount; j++) { + /* VkDescriptorDataEXT is a union of pointers; the member doesn't matter */ + info.data.pSampler = (void*)(((uint8_t*)ctx) + pg->dd.db_template[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][i].offset + + j * pg->dd.db_template[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][i].stride); + VKSCR(GetDescriptorEXT)(screen->dev, &info, pg->dd.db_template[type][ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW].db_size, buf); + /* drivers that don't support combinedImageSamplerDescriptorSingleArray must have sampler arrays written in memory as + * + * | array_of_samplers[] | array_of_sampled_images[] | + * + * which means each descriptor's data must be split + */ + memcpy(db, buf, screen->info.db_props.samplerDescriptorSize); + memcpy(samplers, &buf[screen->info.db_props.samplerDescriptorSize], screen->info.db_props.sampledImageDescriptorSize); + db += screen->info.db_props.sampledImageDescriptorSize; + samplers += screen->info.db_props.samplerDescriptorSize; + } + } + } + bs->dd.cur_db_offset[type] = bs->dd.db_offset; + bs->dd.db_offset += pg->dd.db_size[type]; } + zink_flush_dgc_if_enabled(ctx); + /* templates are indexed by the set id, so increment type by 1 + * (this is effectively an optimization of indirecting through screen->desc_set_id) + */ + VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf, + is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, + pg->layout, + type + 1, 1, + &use_buffer, + &offset); } } -static void -update_descriptors_internal(struct zink_context *ctx, enum zink_descriptor_type type, struct zink_descriptor_set *zds, struct zink_program *pg, bool cache_hit) +/* updates the mask of changed_sets and binds the mask of bind_sets */ +void +zink_descriptors_update_masked(struct zink_context *ctx, bool is_compute, uint8_t changed_sets, uint8_t bind_sets) { struct zink_screen *screen = zink_screen(ctx->base.screen); - struct zink_shader **stages; - - unsigned num_stages = pg->is_compute ? 1 : ZINK_SHADER_COUNT; - if (pg->is_compute) - stages = &ctx->curr_compute->shader; - else - stages = &ctx->gfx_stages[0]; + struct zink_batch_state *bs = ctx->batch.state; + struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base; + VkDescriptorSet desc_sets[ZINK_DESCRIPTOR_BASE_TYPES]; - if (cache_hit || !zds) + /* skip if no descriptors are updated */ + if (!pg->dd.binding_usage || (!changed_sets && !bind_sets)) return; - if (screen->info.have_KHR_descriptor_update_template && - screen->descriptor_mode != ZINK_DESCRIPTOR_MODE_NOTEMPLATES) { - set_descriptor_set_refs(ctx, zds, pg, cache_hit); - zink_descriptor_set_update_lazy(ctx, pg, type, zds->desc_set); + /* populate usable sets for the changed_sets mask */ + if (!populate_sets(ctx, bs, pg, changed_sets, desc_sets)) { + debug_printf("ZINK: couldn't get descriptor sets!\n"); return; } - - unsigned num_resources = 0; - ASSERTED unsigned num_bindings = zds->pool->num_resources; - VkWriteDescriptorSet wds[ZINK_MAX_DESCRIPTORS_PER_TYPE]; - unsigned num_wds = 0; - - for (int i = 0; i < num_stages; i++) { - struct zink_shader *shader = stages[i]; - if (!shader) - continue; - enum pipe_shader_type stage = pipe_shader_type_from_mesa(shader->nir->info.stage); - for (int j = 0; j < shader->num_bindings[type]; j++) { - int index = shader->bindings[type][j].index; - switch (type) { - case ZINK_DESCRIPTOR_TYPE_UBO: - if (!index) - continue; - FALLTHROUGH; - case ZINK_DESCRIPTOR_TYPE_SSBO: { - VkDescriptorBufferInfo *info; - struct zink_resource *res = ctx->di.descriptor_res[type][stage][index]; - if (type == ZINK_DESCRIPTOR_TYPE_UBO) - info = &ctx->di.ubos[stage][index]; - else - info = &ctx->di.ssbos[stage][index]; - assert(num_resources < num_bindings); - desc_set_res_add(zds, res, num_resources++, cache_hit); - wds[num_wds].pBufferInfo = info; - } - break; - case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: - case ZINK_DESCRIPTOR_TYPE_IMAGE: { - VkDescriptorImageInfo *image_info; - VkBufferView *buffer_info; - if (type == ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW) { - image_info = &ctx->di.textures[stage][index]; - buffer_info = &ctx->di.tbos[stage][index]; - } else { - image_info = &ctx->di.images[stage][index]; - buffer_info = &ctx->di.texel_images[stage][index]; - } - bool is_buffer = zink_shader_descriptor_is_buffer(shader, type, j); - for (unsigned k = 0; k < shader->bindings[type][j].size; k++) { - assert(num_resources < num_bindings); - if (type == ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW) { - struct zink_sampler_state *sampler = NULL; - if (!is_buffer && image_info->imageView) - sampler = ctx->sampler_states[stage][index + k];; - - desc_set_sampler_add(ctx, zds, &ctx->di.sampler_surfaces[stage][index + k], sampler, num_resources++, cache_hit); - } else { - struct zink_image_view *image_view = &ctx->image_views[stage][index + k]; - desc_set_image_add(ctx, zds, image_view, num_resources++, is_buffer, cache_hit); - } - } - if (is_buffer) - wds[num_wds].pTexelBufferView = buffer_info; - else - wds[num_wds].pImageInfo = image_info; - } - break; - default: - unreachable("unknown descriptor type"); - } - num_wds = init_write_descriptor(shader, zds, type, j, &wds[num_wds], num_wds); + /* no flushing allowed: sets are allocated to the batch, so this breaks everything */ + assert(ctx->batch.state == bs); + + u_foreach_bit(type, changed_sets) { + assert(type + 1 < pg->num_dsl); + if (pg->dd.pool_key[type]) { + zink_flush_dgc_if_enabled(ctx); + /* templates are indexed by the set id, so increment type by 1 + * (this is effectively an optimization of indirecting through screen->desc_set_id) + */ + VKSCR(UpdateDescriptorSetWithTemplate)(screen->dev, desc_sets[type], pg->dd.templates[type + 1], ctx); + VKSCR(CmdBindDescriptorSets)(bs->cmdbuf, + is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, + /* same set indexing as above */ + pg->layout, type + 1, 1, &desc_sets[type], + 0, NULL); + bs->dd.sets[is_compute][type + 1] = desc_sets[type]; } } - if (num_wds) - VKSCR(UpdateDescriptorSets)(screen->dev, num_wds, wds, 0, NULL); + /* these are the unchanged sets being rebound across pipeline changes when compat_id changes but the set is the same + * also handles binding null sets + */ + u_foreach_bit(type, bind_sets & ~changed_sets) { + if (!pg->dd.pool_key[type]) + continue; + /* same set indexing as above */ + assert(bs->dd.sets[is_compute][type + 1]); + zink_flush_dgc_if_enabled(ctx); + VKSCR(CmdBindDescriptorSets)(bs->cmdbuf, + is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, + /* same set indexing as above */ + pg->layout, type + 1, 1, &bs->dd.sets[is_compute][type + 1], + 0, NULL); + } } static void -zink_context_update_descriptor_states(struct zink_context *ctx, struct zink_program *pg); - -#define MAX_CACHE_MISSES 50 - +bind_bindless_db(struct zink_context *ctx, struct zink_program *pg) +{ + struct zink_batch_state *bs = ctx->batch.state; + struct zink_screen *screen = zink_screen(ctx->base.screen); + unsigned index = 1; + VkDeviceSize offset = 0; + VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf, + pg->is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, + pg->layout, + screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS], 1, + &index, + &offset); + ctx->dd.bindless_bound = true; +} + +/* entrypoint for all descriptor updating: + * - update push set + * - generate masks for updating other sets + * - always called from driver thread + */ void zink_descriptors_update(struct zink_context *ctx, bool is_compute) { - struct zink_program *pg = is_compute ? (struct zink_program *)ctx->curr_compute : (struct zink_program *)ctx->curr_program; + struct zink_batch_state *bs = ctx->batch.state; + struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base; + struct zink_screen *screen = zink_screen(ctx->base.screen); + bool have_KHR_push_descriptor = screen->info.have_KHR_push_descriptor; - zink_context_update_descriptor_states(ctx, pg); - bool cache_hit; - VkDescriptorSet desc_set; - struct zink_descriptor_set *zds; + bool batch_changed = !bs->dd.pg[is_compute]; + if (batch_changed) { + /* update all sets and bind null sets */ + ctx->dd.state_changed[is_compute] = pg->dd.binding_usage & BITFIELD_MASK(ZINK_DESCRIPTOR_TYPE_UNIFORMS); + ctx->dd.push_state_changed[is_compute] = !!pg->dd.push_usage || ctx->dd.has_fbfetch != bs->dd.has_fbfetch; + } - struct zink_batch *batch = &ctx->batch; - VkPipelineBindPoint bp = is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS; + if (!is_compute) { + struct zink_gfx_program *prog = (struct zink_gfx_program*)pg; + if (prog->is_separable) { + /* force all descriptors update on next pass: separables use different layouts */ + ctx->dd.state_changed[is_compute] = BITFIELD_MASK(ZINK_DESCRIPTOR_TYPE_UNIFORMS); + ctx->dd.push_state_changed[is_compute] = true; + update_separable(ctx, pg); + if (pg->dd.bindless) + bind_bindless_db(ctx, pg); + return; + } + } - { - uint32_t dynamic_offsets[PIPE_MAX_CONSTANT_BUFFERS]; - unsigned dynamic_offset_idx = 0; + if (pg != bs->dd.pg[is_compute]) { + /* if we don't already know that we have to update all sets, + * check to see if any dsls changed + * + * also always update the dsl pointers on program change + */ + for (unsigned i = 0; i < ARRAY_SIZE(bs->dd.dsl[is_compute]); i++) { + /* push set is already detected, start at 1 */ + if (bs->dd.dsl[is_compute][i] != pg->dsl[i + 1]) + ctx->dd.state_changed[is_compute] |= BITFIELD_BIT(i); + bs->dd.dsl[is_compute][i] = pg->dsl[i + 1]; + } + ctx->dd.push_state_changed[is_compute] |= bs->dd.push_usage[is_compute] != pg->dd.push_usage; + bs->dd.push_usage[is_compute] = pg->dd.push_usage; + } - /* push set is indexed in vulkan as 0 but isn't in the general pool array */ - ctx->dd->changed[is_compute][ZINK_DESCRIPTOR_TYPES] |= ctx->dd->pg[is_compute] != pg; - if (pg->dd->push_usage) { - zds = zink_descriptor_set_get(ctx, ZINK_DESCRIPTOR_TYPES, is_compute, &cache_hit); - } else { - zds = NULL; - cache_hit = false; + uint8_t changed_sets = pg->dd.binding_usage & ctx->dd.state_changed[is_compute]; + /* + * when binding a pipeline, the pipeline can correctly access any previously bound + * descriptor sets which were bound with compatible pipeline layouts + * VK 14.2.2 + */ + uint8_t bind_sets = bs->dd.pg[is_compute] && bs->dd.compat_id[is_compute] == pg->compat_id ? 0 : pg->dd.binding_usage; + + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + size_t check_size = 0; + if (pg->dd.push_usage && ctx->dd.push_state_changed[is_compute]) + check_size += ctx->dd.db_size[is_compute]; + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) { + if (changed_sets & BITFIELD_BIT(i)) + check_size += pg->dd.db_size[i]; } - ctx->dd->changed[is_compute][ZINK_DESCRIPTOR_TYPES] = false; - desc_set = zds ? zds->desc_set : ctx->dd->dummy_set; - - if (pg->dd->push_usage) // push set - dynamic_offset_idx = update_push_ubo_descriptors(ctx, zds, - is_compute, cache_hit, dynamic_offsets); - VKCTX(CmdBindDescriptorSets)(batch->state->cmdbuf, bp, - pg->layout, 0, 1, &desc_set, - dynamic_offset_idx, dynamic_offsets); - } - - { - for (int h = 0; h < ZINK_DESCRIPTOR_TYPES; h++) { - if (pdd_cached(pg)->cache_misses[h] < MAX_CACHE_MISSES) { - ctx->dd->changed[is_compute][h] |= ctx->dd->pg[is_compute] != pg; - if (pg->dsl[h + 1]) { - /* null set has null pool */ - if (pdd_cached(pg)->pool[h]) { - zds = zink_descriptor_set_get(ctx, h, is_compute, &cache_hit); - if (cache_hit) { - pdd_cached(pg)->cache_misses[h] = 0; - } else if (likely(zink_screen(ctx->base.screen)->descriptor_mode != ZINK_DESCRIPTOR_MODE_NOFALLBACK)) { - if (++pdd_cached(pg)->cache_misses[h] == MAX_CACHE_MISSES) { - const char *set_names[] = { - "UBO", - "TEXTURES", - "SSBO", - "IMAGES", - }; - debug_printf("zink: descriptor cache exploded for prog %p set %s: getting lazy (not a bug, just lettin you know)\n", pg, set_names[h]); - } - } - } else - zds = NULL; - /* reuse dummy set for bind */ - desc_set = zds ? zds->desc_set : ctx->dd->dummy_set; - update_descriptors_internal(ctx, h, zds, pg, cache_hit); - - VKCTX(CmdBindDescriptorSets)(batch->state->cmdbuf, bp, - pg->layout, h + 1, 1, &desc_set, - 0, NULL); + + if (bs->dd.db_offset + check_size >= bs->dd.db->base.b.width0) { + enlarge_db(ctx); + changed_sets = pg->dd.binding_usage; + ctx->dd.push_state_changed[is_compute] = true; + zink_flush_dgc_if_enabled(ctx); + } + + if (!bs->dd.db_bound) + zink_batch_bind_db(ctx); + } + + if (pg->dd.push_usage && (ctx->dd.push_state_changed[is_compute] || bind_sets)) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + uint32_t index = 0; + uint64_t offset = ctx->dd.push_state_changed[is_compute] ? + bs->dd.db_offset : + bs->dd.cur_db_offset[ZINK_DESCRIPTOR_TYPE_UNIFORMS]; + if (ctx->dd.push_state_changed[is_compute]) { + assert(bs->dd.db->base.b.width0 > bs->dd.db_offset + ctx->dd.db_size[is_compute]); + for (unsigned i = 0; i < (is_compute ? 1 : ZINK_GFX_SHADER_COUNT); i++) { + VkDescriptorGetInfoEXT info; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; + info.pNext = NULL; + info.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + info.data.pUniformBuffer = &ctx->di.db.ubos[is_compute ? MESA_SHADER_COMPUTE : i][0]; + uint64_t stage_offset = offset + (is_compute ? 0 : ctx->dd.db_offset[i]); + VKSCR(GetDescriptorEXT)(screen->dev, &info, screen->info.db_props.robustUniformBufferDescriptorSize, + bs->dd.db_map + stage_offset); + } + if (!is_compute && ctx->dd.has_fbfetch) { + uint64_t stage_offset = offset + ctx->dd.db_offset[MESA_SHADER_FRAGMENT + 1]; + if (pg->dd.fbfetch && screen->info.db_props.inputAttachmentDescriptorSize) { + /* real fbfetch descriptor */ + VkDescriptorGetInfoEXT info; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; + info.pNext = NULL; + info.type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + info.data.pInputAttachmentImage = &ctx->di.fbfetch; + VKSCR(GetDescriptorEXT)(screen->dev, &info, screen->info.db_props.inputAttachmentDescriptorSize, + bs->dd.db_map + stage_offset); + } else { + /* reuse cached dummy descriptor */ + memcpy(bs->dd.db_map + stage_offset, ctx->di.fbfetch_db, screen->info.db_props.inputAttachmentDescriptorSize); + } } + bs->dd.cur_db_offset[ZINK_DESCRIPTOR_TYPE_UNIFORMS] = bs->dd.db_offset; + bs->dd.db_offset += ctx->dd.db_size[is_compute]; + } + zink_flush_dgc_if_enabled(ctx); + VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf, + is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, + pg->layout, + 0, 1, + &index, + &offset); + } else { + if (ctx->dd.push_state_changed[0]) { + zink_flush_dgc_if_enabled(ctx); + } + if (have_KHR_push_descriptor) { + if (ctx->dd.push_state_changed[is_compute]) + VKCTX(CmdPushDescriptorSetWithTemplateKHR)(bs->cmdbuf, pg->dd.templates[0], + pg->layout, 0, ctx); } else { - zink_descriptors_update_lazy_masked(ctx, is_compute, BITFIELD_BIT(h), false, false); + if (ctx->dd.push_state_changed[is_compute]) { + struct zink_descriptor_pool *pool = check_push_pool_alloc(ctx, &bs->dd.push_pool[pg->is_compute], bs, pg->is_compute); + VkDescriptorSet push_set = get_descriptor_set(pool); + if (!push_set) + mesa_loge("ZINK: failed to get push descriptor set! prepare to crash!"); + VKCTX(UpdateDescriptorSetWithTemplate)(screen->dev, push_set, pg->dd.templates[0], ctx); + bs->dd.sets[is_compute][0] = push_set; + } + assert(bs->dd.sets[is_compute][0]); + VKCTX(CmdBindDescriptorSets)(bs->cmdbuf, + is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, + pg->layout, 0, 1, &bs->dd.sets[is_compute][0], + 0, NULL); } - ctx->dd->changed[is_compute][h] = false; } } - ctx->dd->pg[is_compute] = pg; + ctx->dd.push_state_changed[is_compute] = false; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + zink_descriptors_update_masked_buffer(ctx, is_compute, changed_sets, bind_sets); + else + zink_descriptors_update_masked(ctx, is_compute, changed_sets, bind_sets); + /* bindless descriptors are context-based and get updated elsewhere */ + if (pg->dd.bindless && unlikely(!ctx->dd.bindless_bound)) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + bind_bindless_db(ctx, pg); + } else { + VKCTX(CmdBindDescriptorSets)(ctx->batch.state->cmdbuf, is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, + pg->layout, screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS], 1, &ctx->dd.t.bindless_set, + 0, NULL); + } + ctx->dd.bindless_bound = true; + } + bs->dd.pg[is_compute] = pg; + ctx->dd.pg[is_compute] = pg; + bs->dd.compat_id[is_compute] = pg->compat_id; + ctx->dd.state_changed[is_compute] = 0; } +/* called from gallium descriptor change hooks, e.g., set_sampler_views */ void -zink_batch_descriptor_deinit(struct zink_screen *screen, struct zink_batch_state *bs) +zink_context_invalidate_descriptor_state(struct zink_context *ctx, gl_shader_stage shader, enum zink_descriptor_type type, unsigned start, unsigned count) { - if (!bs->dd) - return; - _mesa_set_destroy(bs->dd->desc_sets, NULL); - zink_batch_descriptor_deinit_lazy(screen, bs); + if (type == ZINK_DESCRIPTOR_TYPE_UBO && !start) + ctx->dd.push_state_changed[shader == MESA_SHADER_COMPUTE] = true; + else + ctx->dd.state_changed[shader == MESA_SHADER_COMPUTE] |= BITFIELD_BIT(type); } - void -zink_batch_descriptor_reset(struct zink_screen *screen, struct zink_batch_state *bs) +zink_context_invalidate_descriptor_state_compact(struct zink_context *ctx, gl_shader_stage shader, enum zink_descriptor_type type, unsigned start, unsigned count) { - set_foreach(bs->dd->desc_sets, entry) { - struct zink_descriptor_set *zds = (void*)entry->key; - zink_batch_usage_unset(&zds->batch_uses, bs); - /* reset descriptor pools when no bs is using this program to avoid - * having some inactive program hogging a billion descriptors - */ - pipe_reference(&zds->reference, NULL); - zink_descriptor_set_recycle(zds); - _mesa_set_remove(bs->dd->desc_sets, entry); + if (type == ZINK_DESCRIPTOR_TYPE_UBO && !start) + ctx->dd.push_state_changed[shader == MESA_SHADER_COMPUTE] = true; + else { + if (type > ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW) + type -= ZINK_DESCRIPTOR_COMPACT; + ctx->dd.state_changed[shader == MESA_SHADER_COMPUTE] |= BITFIELD_BIT(type); } - zink_batch_descriptor_reset_lazy(screen, bs); -} - -bool -zink_batch_descriptor_init(struct zink_screen *screen, struct zink_batch_state *bs) -{ - if (!zink_batch_descriptor_init_lazy(screen, bs)) - return false; - bs->dd->desc_sets = _mesa_pointer_set_create(bs); - return !!bs->dd->desc_sets; -} - -static uint32_t -calc_descriptor_state_hash_ubo(struct zink_context *ctx, enum pipe_shader_type shader, int idx, uint32_t hash, bool need_offset) -{ - struct zink_resource *res = ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_UBO][shader][idx]; - struct zink_resource_object *obj = res ? res->obj : NULL; - hash = XXH32(&obj, sizeof(void*), hash); - void *hash_data = &ctx->di.ubos[shader][idx].range; - size_t data_size = sizeof(unsigned); - hash = XXH32(hash_data, data_size, hash); - if (need_offset) - hash = XXH32(&ctx->di.ubos[shader][idx].offset, sizeof(unsigned), hash); - return hash; } -static uint32_t -calc_descriptor_state_hash_ssbo(struct zink_context *ctx, struct zink_shader *zs, enum pipe_shader_type shader, int i, int idx, uint32_t hash) +static void +deinit_multi_pool_overflow(struct zink_screen *screen, struct zink_descriptor_pool_multi *mpool) { - struct zink_resource *res = ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_SSBO][shader][idx]; - struct zink_resource_object *obj = res ? res->obj : NULL; - hash = XXH32(&obj, sizeof(void*), hash); - if (obj) { - struct pipe_shader_buffer *ssbo = &ctx->ssbos[shader][idx]; - hash = XXH32(&ssbo->buffer_offset, sizeof(ssbo->buffer_offset), hash); - hash = XXH32(&ssbo->buffer_size, sizeof(ssbo->buffer_size), hash); + for (unsigned i = 0; i < 2; i++) { + clear_multi_pool_overflow(screen, &mpool->overflowed_pools[i]); + util_dynarray_fini(&mpool->overflowed_pools[i]); } - return hash; } -static uint32_t -calc_descriptor_state_hash_sampler(struct zink_context *ctx, struct zink_shader *zs, enum pipe_shader_type shader, int i, int idx, uint32_t hash) +/* called during batch state destroy */ +void +zink_batch_descriptor_deinit(struct zink_screen *screen, struct zink_batch_state *bs) { - for (unsigned k = 0; k < zs->bindings[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][i].size; k++) { - struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->sampler_views[shader][idx + k]); - bool is_buffer = zink_shader_descriptor_is_buffer(zs, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, i); - ctx->di.sampler_surfaces[shader][idx + k].is_buffer = is_buffer; - uint32_t val = zink_get_sampler_view_hash(ctx, sampler_view, is_buffer); - hash = XXH32(&val, sizeof(uint32_t), hash); - if (is_buffer) - continue; - - struct zink_sampler_state *sampler_state = ctx->sampler_states[shader][idx + k]; - - if (sampler_state) - hash = XXH32(&sampler_state->hash, sizeof(uint32_t), hash); + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) { + for (unsigned j = 0; j < bs->dd.pools[i].capacity / sizeof(struct zink_descriptor_pool_multi *); j++) { + struct zink_descriptor_pool_multi **mppool = util_dynarray_element(&bs->dd.pools[i], struct zink_descriptor_pool_multi *, j); + if (mppool && *mppool) { + deinit_multi_pool_overflow(screen, *mppool); + multi_pool_destroy(screen, *mppool); + } + } + util_dynarray_fini(&bs->dd.pools[i]); } - return hash; -} - -static uint32_t -calc_descriptor_state_hash_image(struct zink_context *ctx, struct zink_shader *zs, enum pipe_shader_type shader, int i, int idx, uint32_t hash) -{ - for (unsigned k = 0; k < zs->bindings[ZINK_DESCRIPTOR_TYPE_IMAGE][i].size; k++) { - bool is_buffer = zink_shader_descriptor_is_buffer(zs, ZINK_DESCRIPTOR_TYPE_IMAGE, i); - uint32_t val = zink_get_image_view_hash(ctx, &ctx->image_views[shader][idx + k], is_buffer); - ctx->di.image_surfaces[shader][idx + k].is_buffer = is_buffer; - hash = XXH32(&val, sizeof(uint32_t), hash); + for (unsigned i = 0; i < 2; i++) { + if (bs->dd.push_pool[i].pool) + pool_destroy(screen, bs->dd.push_pool[i].pool); + deinit_multi_pool_overflow(screen, &bs->dd.push_pool[i]); } - return hash; + + if (bs->dd.db_xfer) + zink_screen_buffer_unmap(&screen->base, bs->dd.db_xfer); + bs->dd.db_xfer = NULL; + if (bs->dd.db) + screen->base.resource_destroy(&screen->base, &bs->dd.db->base.b); + bs->dd.db = NULL; + bs->dd.db_bound = false; + bs->dd.db_offset = 0; + memset(bs->dd.cur_db_offset, 0, sizeof(bs->dd.cur_db_offset)); } -static uint32_t -update_descriptor_stage_state(struct zink_context *ctx, enum pipe_shader_type shader, enum zink_descriptor_type type) +/* ensure the idle/usable overflow set array always has as many members as possible by merging both arrays on batch state reset */ +static void +consolidate_pool_alloc(struct zink_screen *screen, struct zink_descriptor_pool_multi *mpool) { - struct zink_shader *zs = shader == PIPE_SHADER_COMPUTE ? ctx->compute_stage : ctx->gfx_stages[shader]; - - uint32_t hash = 0; - for (int i = 0; i < zs->num_bindings[type]; i++) { - /* skip push set members */ - if (zs->bindings[type][i].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) - continue; + unsigned sizes[] = { + util_dynarray_num_elements(&mpool->overflowed_pools[0], struct zink_descriptor_pool*), + util_dynarray_num_elements(&mpool->overflowed_pools[1], struct zink_descriptor_pool*), + }; + if (!sizes[0] && !sizes[1]) + return; + /* set idx to whichever overflow is smaller */ + mpool->overflow_idx = sizes[0] > sizes[1]; + if (!mpool->overflowed_pools[mpool->overflow_idx].size) + return; - int idx = zs->bindings[type][i].index; - switch (type) { - case ZINK_DESCRIPTOR_TYPE_UBO: - hash = calc_descriptor_state_hash_ubo(ctx, shader, idx, hash, true); - break; - case ZINK_DESCRIPTOR_TYPE_SSBO: - hash = calc_descriptor_state_hash_ssbo(ctx, zs, shader, i, idx, hash); - break; - case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: - hash = calc_descriptor_state_hash_sampler(ctx, zs, shader, i, idx, hash); - break; - case ZINK_DESCRIPTOR_TYPE_IMAGE: - hash = calc_descriptor_state_hash_image(ctx, zs, shader, i, idx, hash); - break; - default: - unreachable("unknown descriptor type"); - } - } - return hash; + /* attempt to consolidate all the overflow into one array to maximize reuse */ + util_dynarray_append_dynarray(&mpool->overflowed_pools[!mpool->overflow_idx], &mpool->overflowed_pools[mpool->overflow_idx]); + util_dynarray_clear(&mpool->overflowed_pools[mpool->overflow_idx]); } -static void -update_descriptor_state(struct zink_context *ctx, enum zink_descriptor_type type, bool is_compute) +/* called when a batch state is reset, i.e., just before a batch state becomes the current state */ +void +zink_batch_descriptor_reset(struct zink_screen *screen, struct zink_batch_state *bs) { - /* we shouldn't be calling this if we don't have to */ - assert(!ctx->dd->descriptor_states[is_compute].valid[type]); - bool has_any_usage = false; - - if (is_compute) { - /* just update compute state */ - bool has_usage = zink_program_get_descriptor_usage(ctx, PIPE_SHADER_COMPUTE, type); - if (has_usage) - ctx->dd->descriptor_states[is_compute].state[type] = update_descriptor_stage_state(ctx, PIPE_SHADER_COMPUTE, type); - else - ctx->dd->descriptor_states[is_compute].state[type] = 0; - has_any_usage = has_usage; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + bs->dd.db_offset = 0; + if (bs->dd.db && bs->dd.db->base.b.width0 < bs->ctx->dd.db.max_db_size * screen->base_descriptor_size) + reinit_db(screen, bs); + bs->dd.db_bound = false; } else { - /* update all gfx states */ - bool first = true; - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) { - bool has_usage = false; - /* this is the incremental update for the shader stage */ - if (!ctx->dd->gfx_descriptor_states[i].valid[type]) { - ctx->dd->gfx_descriptor_states[i].state[type] = 0; - if (ctx->gfx_stages[i]) { - has_usage = zink_program_get_descriptor_usage(ctx, i, type); - if (has_usage) - ctx->dd->gfx_descriptor_states[i].state[type] = update_descriptor_stage_state(ctx, i, type); - ctx->dd->gfx_descriptor_states[i].valid[type] = has_usage; + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) { + struct zink_descriptor_pool_multi **mpools = bs->dd.pools[i].data; + for (unsigned j = 0; j < bs->dd.pool_size[i]; j++) { + struct zink_descriptor_pool_multi *mpool = mpools[j]; + if (!mpool) + continue; + consolidate_pool_alloc(screen, mpool); + + /* if the pool is still in use, reset the current set index */ + if (mpool->pool_key->use_count) + mpool->pool->set_idx = 0; + else { + /* otherwise destroy it to reclaim memory */ + multi_pool_destroy(screen, mpool); + mpools[j] = NULL; } } - if (ctx->dd->gfx_descriptor_states[i].valid[type]) { - /* this is the overall state update for the descriptor set hash */ - if (first) { - /* no need to double hash the first state */ - ctx->dd->descriptor_states[is_compute].state[type] = ctx->dd->gfx_descriptor_states[i].state[type]; - first = false; - } else { - ctx->dd->descriptor_states[is_compute].state[type] = XXH32(&ctx->dd->gfx_descriptor_states[i].state[type], - sizeof(uint32_t), - ctx->dd->descriptor_states[is_compute].state[type]); - } + } + for (unsigned i = 0; i < 2; i++) { + if (bs->dd.push_pool[i].reinit_overflow) { + /* these don't match current fbfetch usage and can never be used again */ + clear_multi_pool_overflow(screen, &bs->dd.push_pool[i].overflowed_pools[bs->dd.push_pool[i].overflow_idx]); + } else if (bs->dd.push_pool[i].pool) { + consolidate_pool_alloc(screen, &bs->dd.push_pool[i]); } - has_any_usage |= has_usage; + if (bs->dd.push_pool[i].pool) + bs->dd.push_pool[i].pool->set_idx = 0; } } - ctx->dd->descriptor_states[is_compute].valid[type] = has_any_usage; + memset(bs->dd.pg, 0, sizeof(bs->dd.pg)); } -static void -zink_context_update_descriptor_states(struct zink_context *ctx, struct zink_program *pg) +/* called on batch state creation */ +bool +zink_batch_descriptor_init(struct zink_screen *screen, struct zink_batch_state *bs) { - if (pg->dd->push_usage && (!ctx->dd->push_valid[pg->is_compute] || - pg->dd->push_usage != ctx->dd->last_push_usage[pg->is_compute])) { - uint32_t hash = 0; - if (pg->is_compute) { - hash = calc_descriptor_state_hash_ubo(ctx, PIPE_SHADER_COMPUTE, 0, 0, false); - } else { - bool first = true; - u_foreach_bit(stage, pg->dd->push_usage) { - if (!ctx->dd->gfx_push_valid[stage]) { - ctx->dd->gfx_push_state[stage] = calc_descriptor_state_hash_ubo(ctx, stage, 0, 0, false); - ctx->dd->gfx_push_valid[stage] = true; - } - if (first) - hash = ctx->dd->gfx_push_state[stage]; - else - hash = XXH32(&ctx->dd->gfx_push_state[stage], sizeof(uint32_t), hash); - first = false; - } + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) + util_dynarray_init(&bs->dd.pools[i], bs); + if (!screen->info.have_KHR_push_descriptor) { + for (unsigned i = 0; i < 2; i++) { + bs->dd.push_pool[i].pool = create_push_pool(screen, bs, i, false); + util_dynarray_init(&bs->dd.push_pool[i].overflowed_pools[0], bs); + util_dynarray_init(&bs->dd.push_pool[i].overflowed_pools[1], bs); } - ctx->dd->push_state[pg->is_compute] = hash; - ctx->dd->push_valid[pg->is_compute] = true; - ctx->dd->last_push_usage[pg->is_compute] = pg->dd->push_usage; } - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) { - if (pdd_cached(pg)->pool[i] && pdd_cached(pg)->cache_misses[i] < MAX_CACHE_MISSES && - !ctx->dd->descriptor_states[pg->is_compute].valid[i]) - update_descriptor_state(ctx, i, pg->is_compute); + + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && !(bs->ctx->flags & ZINK_CONTEXT_COPY_ONLY)) { + unsigned bind = ZINK_BIND_DESCRIPTOR; + struct pipe_resource *pres = pipe_buffer_create(&screen->base, bind, 0, bs->ctx->dd.db.max_db_size * screen->base_descriptor_size); + if (!pres) + return false; + bs->dd.db = zink_resource(pres); + bs->dd.db_map = pipe_buffer_map(&bs->ctx->base, pres, PIPE_MAP_READ | PIPE_MAP_WRITE | PIPE_MAP_PERSISTENT | PIPE_MAP_COHERENT | PIPE_MAP_THREAD_SAFE, &bs->dd.db_xfer); } + return true; } -void -zink_context_invalidate_descriptor_state(struct zink_context *ctx, enum pipe_shader_type shader, enum zink_descriptor_type type, unsigned start, unsigned count) +static void +init_push_template_entry(VkDescriptorUpdateTemplateEntry *entry, unsigned i) { - zink_context_invalidate_descriptor_state_lazy(ctx, shader, type, start, count); - if (type == ZINK_DESCRIPTOR_TYPE_UBO && !start) { - /* ubo 0 is the push set */ - ctx->dd->push_state[shader == PIPE_SHADER_COMPUTE] = 0; - ctx->dd->push_valid[shader == PIPE_SHADER_COMPUTE] = false; - if (shader != PIPE_SHADER_COMPUTE) { - ctx->dd->gfx_push_state[shader] = 0; - ctx->dd->gfx_push_valid[shader] = false; - } - ctx->dd->changed[shader == PIPE_SHADER_COMPUTE][ZINK_DESCRIPTOR_TYPES] = true; - return; - } - if (shader != PIPE_SHADER_COMPUTE) { - ctx->dd->gfx_descriptor_states[shader].valid[type] = false; - ctx->dd->gfx_descriptor_states[shader].state[type] = 0; - } - ctx->dd->descriptor_states[shader == PIPE_SHADER_COMPUTE].valid[type] = false; - ctx->dd->descriptor_states[shader == PIPE_SHADER_COMPUTE].state[type] = 0; - ctx->dd->changed[shader == PIPE_SHADER_COMPUTE][type] = true; + entry->dstBinding = i; + entry->descriptorCount = 1; + entry->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + entry->offset = offsetof(struct zink_context, di.t.ubos[i][0]); + entry->stride = sizeof(VkDescriptorBufferInfo); } +/* called on context creation */ bool zink_descriptors_init(struct zink_context *ctx) { - zink_descriptors_init_lazy(ctx); - if (!ctx->dd) + struct zink_screen *screen = zink_screen(ctx->base.screen); + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + VkDescriptorUpdateTemplateEntry *entry = &ctx->dd.push_entries[i]; + init_push_template_entry(entry, i); + } + init_push_template_entry(&ctx->dd.compute_push_entry, MESA_SHADER_COMPUTE); + VkDescriptorUpdateTemplateEntry *entry = &ctx->dd.push_entries[ZINK_GFX_SHADER_COUNT]; //fbfetch + entry->dstBinding = ZINK_FBFETCH_BINDING; + entry->descriptorCount = 1; + entry->descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + entry->offset = offsetof(struct zink_context, di.fbfetch); + entry->stride = sizeof(VkDescriptorImageInfo); + struct zink_descriptor_layout_key *layout_key; + if (!zink_descriptor_util_push_layouts_get(ctx, ctx->dd.push_dsl, ctx->dd.push_layout_keys)) + return false; + + ctx->dd.dummy_dsl = descriptor_util_layout_get(screen, 0, NULL, 0, &layout_key); + if (!ctx->dd.dummy_dsl) return false; - return zink_descriptor_pool_init(ctx); + + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + VkDeviceSize val; + for (unsigned i = 0; i < 2; i++) { + VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, ctx->dd.push_dsl[i]->layout, &val); + ctx->dd.db_size[i] = val; + } + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, ctx->dd.push_dsl[0]->layout, i, &val); + ctx->dd.db_offset[i] = val; + } + /* start small */ + ctx->dd.db.max_db_size = 250; + ctx->dd.db.size_enlarge_scale = 16; + } + + return true; } +/* called on context destroy */ void zink_descriptors_deinit(struct zink_context *ctx) { - zink_descriptor_pool_deinit(ctx); - zink_descriptors_deinit_lazy(ctx); + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (ctx->dd.push_dsl[0]) + VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd.push_dsl[0]->layout, NULL); + if (ctx->dd.push_dsl[1]) + VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd.push_dsl[1]->layout, NULL); } +/* called on screen creation */ bool -zink_descriptor_layouts_init(struct zink_context *ctx) +zink_descriptor_layouts_init(struct zink_screen *screen) { - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) - if (!_mesa_hash_table_init(&ctx->desc_set_layouts[i], ctx, hash_descriptor_layout, equals_descriptor_layout)) + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) { + if (!_mesa_hash_table_init(&screen->desc_set_layouts[i], screen, hash_descriptor_layout, equals_descriptor_layout)) + return false; + if (!_mesa_set_init(&screen->desc_pool_keys[i], screen, hash_descriptor_pool_key, equals_descriptor_pool_key)) return false; + } + simple_mtx_init(&screen->desc_set_layouts_lock, mtx_plain); + simple_mtx_init(&screen->desc_pool_keys_lock, mtx_plain); return true; } +/* called on screen destroy */ void -zink_descriptor_layouts_deinit(struct zink_context *ctx) +zink_descriptor_layouts_deinit(struct zink_screen *screen) { - struct zink_screen *screen = zink_screen(ctx->base.screen); - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) { - hash_table_foreach(&ctx->desc_set_layouts[i], he) { + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) { + hash_table_foreach(&screen->desc_set_layouts[i], he) { struct zink_descriptor_layout *layout = he->data; VKSCR(DestroyDescriptorSetLayout)(screen->dev, layout->layout, NULL); - if (layout->desc_template) - VKSCR(DestroyDescriptorUpdateTemplate)(screen->dev, layout->desc_template, NULL); ralloc_free(layout); - _mesa_hash_table_remove(&ctx->desc_set_layouts[i], he); + _mesa_hash_table_remove(&screen->desc_set_layouts[i], he); } } + simple_mtx_destroy(&screen->desc_set_layouts_lock); + simple_mtx_destroy(&screen->desc_pool_keys_lock); } - +/* fbfetch descriptor is not initialized by default since it is seldom used + * once it is needed, new push layouts/sets are allocated and all previous layouts/sets are destroyed + */ void zink_descriptor_util_init_fbfetch(struct zink_context *ctx) { - if (ctx->dd->has_fbfetch) + if (ctx->dd.has_fbfetch) + return; + + struct zink_screen *screen = zink_screen(ctx->base.screen); + VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd.push_dsl[0]->layout, NULL); + //don't free these now, let ralloc free on teardown to avoid invalid access + //ralloc_free(ctx->dd.push_dsl[0]); + //ralloc_free(ctx->dd.push_layout_keys[0]); + ctx->dd.push_dsl[0] = create_gfx_layout(ctx, &ctx->dd.push_layout_keys[0], true); + ctx->dd.has_fbfetch = true; + + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + VkDeviceSize val; + VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, ctx->dd.push_dsl[0]->layout, &val); + ctx->dd.db_size[0] = val; + for (unsigned i = 0; i < ARRAY_SIZE(ctx->dd.db_offset); i++) { + VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, ctx->dd.push_dsl[0]->layout, i, &val); + ctx->dd.db_offset[i] = val; + } + } +} + +/* called when a shader that uses bindless is created */ +void +zink_descriptors_init_bindless(struct zink_context *ctx) +{ + if (ctx->dd.bindless_init) return; + struct zink_screen *screen = zink_screen(ctx->base.screen); + assert(screen->bindless_layout); + ctx->dd.bindless_init = true; + + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + unsigned bind = ZINK_BIND_DESCRIPTOR; + VkDeviceSize size; + VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, screen->bindless_layout, &size); + struct pipe_resource *pres = pipe_buffer_create(&screen->base, bind, 0, size); + ctx->dd.db.bindless_db = zink_resource(pres); + ctx->dd.db.bindless_db_map = pipe_buffer_map(&ctx->base, pres, PIPE_MAP_READ | PIPE_MAP_WRITE | PIPE_MAP_PERSISTENT, &ctx->dd.db.bindless_db_xfer); + zink_batch_bind_db(ctx); + for (unsigned i = 0; i < 4; i++) { + VkDeviceSize offset; + VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, screen->bindless_layout, i, &offset); + ctx->dd.db.bindless_db_offsets[i] = offset; + } + } else { + VkDescriptorPoolCreateInfo dpci = {0}; + VkDescriptorPoolSize sizes[4]; + for (unsigned i = 0; i < 4; i++) { + sizes[i].type = zink_descriptor_type_from_bindless_index(i); + sizes[i].descriptorCount = ZINK_MAX_BINDLESS_HANDLES; + } + dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + dpci.pPoolSizes = sizes; + dpci.poolSizeCount = 4; + dpci.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT; + dpci.maxSets = 1; + VkResult result = VKSCR(CreateDescriptorPool)(screen->dev, &dpci, 0, &ctx->dd.t.bindless_pool); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateDescriptorPool failed (%s)", vk_Result_to_str(result)); + return; + } + + zink_descriptor_util_alloc_sets(screen, screen->bindless_layout, ctx->dd.t.bindless_pool, &ctx->dd.t.bindless_set, 1); + } +} + +/* called on context destroy */ +void +zink_descriptors_deinit_bindless(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + if (ctx->dd.db.bindless_db_xfer) + pipe_buffer_unmap(&ctx->base, ctx->dd.db.bindless_db_xfer); + if (ctx->dd.db.bindless_db) { + struct pipe_resource *pres = &ctx->dd.db.bindless_db->base.b; + pipe_resource_reference(&pres, NULL); + } + } else { + if (ctx->dd.t.bindless_pool) + VKSCR(DestroyDescriptorPool)(screen->dev, ctx->dd.t.bindless_pool, NULL); + } +} +/* entrypoint for updating bindless descriptors: called from draw/dispatch */ +void +zink_descriptors_update_bindless(struct zink_context *ctx) +{ struct zink_screen *screen = zink_screen(ctx->base.screen); - VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd->push_dsl[0]->layout, NULL); - ralloc_free(ctx->dd->push_dsl[0]); - ralloc_free(ctx->dd->push_layout_keys[0]); - ctx->dd->push_dsl[0] = create_gfx_layout(ctx, &ctx->dd->push_layout_keys[0], true); - ctx->dd->has_fbfetch = true; - if (screen->descriptor_mode != ZINK_DESCRIPTOR_MODE_LAZY) - zink_descriptor_pool_init(ctx); + VkDescriptorGetInfoEXT info; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; + info.pNext = NULL; + /* bindless descriptors are split between images and buffers */ + for (unsigned i = 0; i < 2; i++) { + if (!ctx->di.bindless_dirty[i]) + continue; + while (util_dynarray_contains(&ctx->di.bindless[i].updates, uint32_t)) { + /* updates are tracked by handle */ + uint32_t handle = util_dynarray_pop(&ctx->di.bindless[i].updates, uint32_t); + bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle); + unsigned binding = i * 2 + !!is_buffer; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + if (is_buffer) { + size_t size = i ? screen->info.db_props.robustStorageTexelBufferDescriptorSize : screen->info.db_props.robustUniformTexelBufferDescriptorSize; + info.type = i ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + info.data.pSampler = (void*)&ctx->di.bindless[i].db.buffer_infos[handle - ZINK_MAX_BINDLESS_HANDLES]; + VKSCR(GetDescriptorEXT)(screen->dev, &info, size, ctx->dd.db.bindless_db_map + ctx->dd.db.bindless_db_offsets[binding] + handle * size); + } else { + info.type = i ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE : VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + if (screen->info.db_props.combinedImageSamplerDescriptorSingleArray || i) { + size_t size = i ? screen->info.db_props.storageImageDescriptorSize : screen->info.db_props.combinedImageSamplerDescriptorSize; + info.data.pSampler = (void*)&ctx->di.bindless[i].img_infos[handle]; + VKSCR(GetDescriptorEXT)(screen->dev, &info, size, ctx->dd.db.bindless_db_map + ctx->dd.db.bindless_db_offsets[binding] + handle * size); + } else { + /* drivers that don't support combinedImageSamplerDescriptorSingleArray must have sampler arrays written in memory as + * + * | array_of_samplers[] | array_of_sampled_images[] | + * + * which means each descriptor's data must be split + */ + uint8_t buf[1024]; + size_t size = screen->info.db_props.combinedImageSamplerDescriptorSize; + info.data.pSampler = (void*)&ctx->di.bindless[i].img_infos[handle]; + VKSCR(GetDescriptorEXT)(screen->dev, &info, size, buf); + memcpy(ctx->dd.db.bindless_db_map + ctx->dd.db.bindless_db_offsets[binding] + handle * screen->info.db_props.samplerDescriptorSize, buf, screen->info.db_props.samplerDescriptorSize); + size_t offset = screen->info.db_props.samplerDescriptorSize * ZINK_MAX_BINDLESS_HANDLES; + offset += handle * screen->info.db_props.sampledImageDescriptorSize; + memcpy(ctx->dd.db.bindless_db_map + ctx->dd.db.bindless_db_offsets[binding] + offset, &buf[screen->info.db_props.samplerDescriptorSize], screen->info.db_props.sampledImageDescriptorSize); + } + } + } else { + VkWriteDescriptorSet wd; + wd.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + wd.pNext = NULL; + wd.dstSet = ctx->dd.t.bindless_set; + wd.dstBinding = binding; + /* buffer handle ids are offset by ZINK_MAX_BINDLESS_HANDLES for internal tracking */ + wd.dstArrayElement = is_buffer ? handle - ZINK_MAX_BINDLESS_HANDLES : handle; + wd.descriptorCount = 1; + wd.descriptorType = zink_descriptor_type_from_bindless_index(wd.dstBinding); + if (is_buffer) + wd.pTexelBufferView = &ctx->di.bindless[i].t.buffer_infos[wd.dstArrayElement]; + else + wd.pImageInfo = &ctx->di.bindless[i].img_infos[handle]; + /* this sucks, but sets must be singly updated to be handled correctly */ + VKSCR(UpdateDescriptorSets)(screen->dev, 1, &wd, 0, NULL); + } + } + } + ctx->di.any_bindless_dirty = 0; } diff --git a/src/gallium/drivers/zink/zink_descriptors.h b/src/gallium/drivers/zink/zink_descriptors.h index a2b56da3dbf..8280a05f194 100644 --- a/src/gallium/drivers/zink/zink_descriptors.h +++ b/src/gallium/drivers/zink/zink_descriptors.h @@ -26,126 +26,16 @@ #ifndef ZINK_DESCRIPTOR_H # define ZINK_DESCRIPTOR_H -#include <vulkan/vulkan.h> -#include "util/u_dynarray.h" -#include "util/u_inlines.h" -#include "util/simple_mtx.h" -#include "zink_batch.h" +#include "zink_types.h" #ifdef __cplusplus extern "C" { #endif -#ifndef ZINK_SHADER_COUNT -#define ZINK_SHADER_COUNT (PIPE_SHADER_TYPES - 1) -#endif - -enum zink_descriptor_type { - ZINK_DESCRIPTOR_TYPE_UBO, - ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, - ZINK_DESCRIPTOR_TYPE_SSBO, - ZINK_DESCRIPTOR_TYPE_IMAGE, - ZINK_DESCRIPTOR_TYPES, -}; - -#define ZINK_MAX_DESCRIPTORS_PER_TYPE (32 * ZINK_SHADER_COUNT) - -struct zink_descriptor_refs { - struct util_dynarray refs; -}; - - -/* hashes of all the named types in a given state */ -struct zink_descriptor_state { - bool valid[ZINK_DESCRIPTOR_TYPES]; - uint32_t state[ZINK_DESCRIPTOR_TYPES]; -}; - -enum zink_descriptor_size_index { - ZDS_INDEX_UBO, - ZDS_INDEX_COMBINED_SAMPLER, - ZDS_INDEX_UNIFORM_TEXELS, - ZDS_INDEX_STORAGE_BUFFER, - ZDS_INDEX_STORAGE_IMAGE, - ZDS_INDEX_STORAGE_TEXELS, -}; - -struct hash_table; - -struct zink_context; -struct zink_image_view; -struct zink_program; -struct zink_resource; -struct zink_sampler; -struct zink_sampler_view; -struct zink_shader; -struct zink_screen; - - -struct zink_descriptor_state_key { - bool exists[ZINK_SHADER_COUNT]; - uint32_t state[ZINK_SHADER_COUNT]; -}; - -struct zink_descriptor_layout_key { - unsigned num_descriptors; - VkDescriptorSetLayoutBinding *bindings; - unsigned use_count; -}; - -struct zink_descriptor_layout { - VkDescriptorSetLayout layout; - VkDescriptorUpdateTemplateKHR desc_template; -}; - -struct zink_descriptor_pool_key { - struct zink_descriptor_layout_key *layout; - unsigned num_type_sizes; - VkDescriptorPoolSize *sizes; -}; - -struct zink_descriptor_reference { - void **ref; - bool *invalid; -}; - - -struct zink_descriptor_data { - struct zink_descriptor_state gfx_descriptor_states[ZINK_SHADER_COUNT]; // keep incremental hashes here - struct zink_descriptor_state descriptor_states[2]; // gfx, compute - struct hash_table *descriptor_pools[ZINK_DESCRIPTOR_TYPES]; - - struct zink_descriptor_layout_key *push_layout_keys[2]; //gfx, compute - struct zink_descriptor_pool *push_pool[2]; //gfx, compute - struct zink_descriptor_layout *push_dsl[2]; //gfx, compute - uint8_t last_push_usage[2]; - bool push_valid[2]; - uint32_t push_state[2]; - bool gfx_push_valid[ZINK_SHADER_COUNT]; - uint32_t gfx_push_state[ZINK_SHADER_COUNT]; - struct zink_descriptor_set *last_set[2]; - - VkDescriptorPool dummy_pool; - struct zink_descriptor_layout *dummy_dsl; - VkDescriptorSet dummy_set; - - bool changed[2][ZINK_DESCRIPTOR_TYPES + 1]; - bool has_fbfetch; - struct zink_program *pg[2]; //gfx, compute -}; +#define ZINK_DESCRIPTOR_COMPACT 2 -struct zink_program_descriptor_data { - uint8_t push_usage; - VkDescriptorPoolSize sizes[6]; //zink_descriptor_size_index - struct zink_descriptor_layout_key *layout_key[ZINK_DESCRIPTOR_TYPES]; //push set doesn't need one - uint8_t binding_usage; - struct zink_descriptor_layout *layouts[ZINK_DESCRIPTOR_TYPES + 1]; - VkDescriptorUpdateTemplateKHR push_template; -}; -struct zink_batch_descriptor_data { - struct set *desc_sets; -}; +#define ZINK_BINDLESS_IS_BUFFER(HANDLE) (HANDLE >= ZINK_MAX_BINDLESS_HANDLES) static inline enum zink_descriptor_size_index zink_vktype_to_size_idx(VkDescriptorType type) @@ -154,10 +44,13 @@ zink_vktype_to_size_idx(VkDescriptorType type) case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: return ZDS_INDEX_UBO; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: return ZDS_INDEX_COMBINED_SAMPLER; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: return ZDS_INDEX_UNIFORM_TEXELS; + case VK_DESCRIPTOR_TYPE_SAMPLER: + return ZDS_INDEX_SAMPLER; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: return ZDS_INDEX_STORAGE_BUFFER; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: @@ -169,6 +62,31 @@ zink_vktype_to_size_idx(VkDescriptorType type) unreachable("unknown type"); } +static inline enum zink_descriptor_size_index_compact +zink_vktype_to_size_idx_comp(VkDescriptorType type) +{ + switch (type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + return ZDS_INDEX_COMP_UBO; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + return ZDS_INDEX_COMP_COMBINED_SAMPLER; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + return ZDS_INDEX_COMP_UNIFORM_TEXELS; + case VK_DESCRIPTOR_TYPE_SAMPLER: + return ZDS_INDEX_COMP_SAMPLER; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + return ZDS_INDEX_COMP_STORAGE_BUFFER; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + return ZDS_INDEX_COMP_STORAGE_IMAGE; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + return ZDS_INDEX_COMP_STORAGE_TEXELS; + default: break; + } + unreachable("unknown type"); +} + static inline enum zink_descriptor_size_index zink_descriptor_type_to_size_idx(enum zink_descriptor_type type) { @@ -185,42 +103,63 @@ zink_descriptor_type_to_size_idx(enum zink_descriptor_type type) } unreachable("unknown type"); } -unsigned -zink_descriptor_program_num_sizes(struct zink_program *pg, enum zink_descriptor_type type); + +static inline enum zink_descriptor_size_index_compact +zink_descriptor_type_to_size_idx_comp(enum zink_descriptor_type type) +{ + switch (type) { + case ZINK_DESCRIPTOR_TYPE_UBO: + return ZDS_INDEX_COMP_UBO; + case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: + return ZDS_INDEX_COMP_COMBINED_SAMPLER; + case ZINK_DESCRIPTOR_TYPE_SSBO: + case ZINK_DESCRIPTOR_TYPE_IMAGE: + default: break; + } + unreachable("unknown type"); +} + +/* bindless descriptor bindings have their own struct indexing */ +ALWAYS_INLINE static VkDescriptorType +zink_descriptor_type_from_bindless_index(unsigned idx) +{ + switch (idx) { + case 0: return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + case 1: return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + case 2: return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + case 3: return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + default: + unreachable("unknown index"); + } +} + bool -zink_descriptor_layouts_init(struct zink_context *ctx); +zink_descriptor_layouts_init(struct zink_screen *screen); void -zink_descriptor_layouts_deinit(struct zink_context *ctx); +zink_descriptor_layouts_deinit(struct zink_screen *screen); -uint32_t -zink_get_sampler_view_hash(struct zink_context *ctx, struct zink_sampler_view *sampler_view, bool is_buffer); -uint32_t -zink_get_image_view_hash(struct zink_context *ctx, struct zink_image_view *image_view, bool is_buffer); bool zink_descriptor_util_alloc_sets(struct zink_screen *screen, VkDescriptorSetLayout dsl, VkDescriptorPool pool, VkDescriptorSet *sets, unsigned num_sets); -struct zink_descriptor_layout * -zink_descriptor_util_layout_get(struct zink_context *ctx, enum zink_descriptor_type type, - VkDescriptorSetLayoutBinding *bindings, unsigned num_bindings, - struct zink_descriptor_layout_key **layout_key); void zink_descriptor_util_init_fbfetch(struct zink_context *ctx); bool zink_descriptor_util_push_layouts_get(struct zink_context *ctx, struct zink_descriptor_layout **dsls, struct zink_descriptor_layout_key **layout_keys); -void -zink_descriptor_util_init_null_set(struct zink_context *ctx, VkDescriptorSet desc_set); VkImageLayout -zink_descriptor_util_image_layout_eval(const struct zink_resource *res, bool is_compute); - -/* these two can't be called in lazy mode */ +zink_descriptor_util_image_layout_eval(const struct zink_context *ctx, const struct zink_resource *res, bool is_compute); void -zink_descriptor_set_refs_clear(struct zink_descriptor_refs *refs, void *ptr); +zink_descriptors_init_bindless(struct zink_context *ctx); void -zink_descriptor_set_recycle(struct zink_descriptor_set *zds); - - - +zink_descriptors_deinit_bindless(struct zink_context *ctx); +void +zink_descriptors_update_bindless(struct zink_context *ctx); +void +zink_descriptor_shader_get_binding_offsets(const struct zink_shader *shader, unsigned *offsets); +void +zink_descriptor_shader_init(struct zink_screen *screen, struct zink_shader *shader); +void +zink_descriptor_shader_deinit(struct zink_screen *screen, struct zink_shader *shader); bool zink_descriptor_program_init(struct zink_context *ctx, struct zink_program *pg); @@ -233,14 +172,9 @@ zink_descriptors_update(struct zink_context *ctx, bool is_compute); void -zink_context_invalidate_descriptor_state(struct zink_context *ctx, enum pipe_shader_type shader, enum zink_descriptor_type type, unsigned, unsigned); - -uint32_t -zink_get_sampler_view_hash(struct zink_context *ctx, struct zink_sampler_view *sampler_view, bool is_buffer); -uint32_t -zink_get_image_view_hash(struct zink_context *ctx, struct zink_image_view *image_view, bool is_buffer); -struct zink_resource * -zink_get_resource_for_descriptor(struct zink_context *ctx, enum zink_descriptor_type type, enum pipe_shader_type shader, int idx); +zink_context_invalidate_descriptor_state(struct zink_context *ctx, gl_shader_stage shader, enum zink_descriptor_type type, unsigned, unsigned); +void +zink_context_invalidate_descriptor_state_compact(struct zink_context *ctx, gl_shader_stage shader, enum zink_descriptor_type type, unsigned, unsigned); void zink_batch_descriptor_deinit(struct zink_screen *screen, struct zink_batch_state *bs); @@ -255,37 +189,8 @@ zink_descriptors_init(struct zink_context *ctx); void zink_descriptors_deinit(struct zink_context *ctx); -//LAZY -bool -zink_descriptor_program_init_lazy(struct zink_context *ctx, struct zink_program *pg); - -void -zink_descriptor_program_deinit_lazy(struct zink_screen *screen, struct zink_program *pg); - -void -zink_descriptors_update_lazy(struct zink_context *ctx, bool is_compute); - - -void -zink_context_invalidate_descriptor_state_lazy(struct zink_context *ctx, enum pipe_shader_type shader, enum zink_descriptor_type type, unsigned, unsigned); - -void -zink_batch_descriptor_deinit_lazy(struct zink_screen *screen, struct zink_batch_state *bs); -void -zink_batch_descriptor_reset_lazy(struct zink_screen *screen, struct zink_batch_state *bs); -bool -zink_batch_descriptor_init_lazy(struct zink_screen *screen, struct zink_batch_state *bs); - -bool -zink_descriptors_init_lazy(struct zink_context *ctx); - -void -zink_descriptors_deinit_lazy(struct zink_context *ctx); - -void -zink_descriptor_set_update_lazy(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, VkDescriptorSet set); void -zink_descriptors_update_lazy_masked(struct zink_context *ctx, bool is_compute, uint8_t changed_sets, bool need_push, bool update_push); +zink_descriptors_update_masked(struct zink_context *ctx, bool is_compute, uint8_t changed_sets, uint8_t bind_sets); #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/zink/zink_descriptors_lazy.c b/src/gallium/drivers/zink/zink_descriptors_lazy.c deleted file mode 100644 index a727d8ae3c5..00000000000 --- a/src/gallium/drivers/zink/zink_descriptors_lazy.c +++ /dev/null @@ -1,689 +0,0 @@ -/* - * Copyright © 2021 Valve Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Mike Blumenkrantz <michael.blumenkrantz@gmail.com> - */ -#include "tgsi/tgsi_from_mesa.h" - - - -#include "zink_context.h" -#include "zink_compiler.h" -#include "zink_descriptors.h" -#include "zink_program.h" -#include "zink_resource.h" -#include "zink_screen.h" - -#define MAX_LAZY_DESCRIPTORS (ZINK_DEFAULT_MAX_DESCS / 10) - -struct zink_descriptor_data_lazy { - struct zink_descriptor_data base; - VkDescriptorUpdateTemplateEntry push_entries[PIPE_SHADER_TYPES]; //gfx+fbfetch - VkDescriptorUpdateTemplateEntry compute_push_entry; - bool push_state_changed[2]; //gfx, compute - uint8_t state_changed[2]; //gfx, compute -}; - -struct zink_descriptor_pool { - VkDescriptorPool pool; - VkDescriptorSet sets[MAX_LAZY_DESCRIPTORS]; - unsigned set_idx; - unsigned sets_alloc; -}; - -struct zink_batch_descriptor_data_lazy { - struct zink_batch_descriptor_data base; - struct util_dynarray overflowed_pools; - struct hash_table pools[ZINK_DESCRIPTOR_TYPES]; - struct zink_descriptor_pool *push_pool[2]; - struct zink_program *pg[2]; //gfx, compute - VkDescriptorSetLayout dsl[2][ZINK_DESCRIPTOR_TYPES]; - unsigned push_usage[2]; - bool has_fbfetch; -}; - -ALWAYS_INLINE static struct zink_descriptor_data_lazy * -dd_lazy(struct zink_context *ctx) -{ - return (struct zink_descriptor_data_lazy*)ctx->dd; -} - -ALWAYS_INLINE static struct zink_batch_descriptor_data_lazy * -bdd_lazy(struct zink_batch_state *bs) -{ - return (struct zink_batch_descriptor_data_lazy*)bs->dd; -} - -static void -init_template_entry(struct zink_shader *shader, enum zink_descriptor_type type, - unsigned idx, unsigned offset, VkDescriptorUpdateTemplateEntry *entry, unsigned *entry_idx, bool flatten_dynamic) -{ - int index = shader->bindings[type][idx].index; - enum pipe_shader_type stage = pipe_shader_type_from_mesa(shader->nir->info.stage); - entry->dstArrayElement = 0; - entry->dstBinding = shader->bindings[type][idx].binding; - if (shader->bindings[type][idx].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC && flatten_dynamic) - /* filter out DYNAMIC type here */ - entry->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - else - entry->descriptorType = shader->bindings[type][idx].type; - switch (shader->bindings[type][idx].type) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - entry->descriptorCount = 1; - entry->offset = offsetof(struct zink_context, di.ubos[stage][index + offset]); - entry->stride = sizeof(VkDescriptorBufferInfo); - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - entry->descriptorCount = shader->bindings[type][idx].size; - entry->offset = offsetof(struct zink_context, di.textures[stage][index + offset]); - entry->stride = sizeof(VkDescriptorImageInfo); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - entry->descriptorCount = shader->bindings[type][idx].size; - entry->offset = offsetof(struct zink_context, di.tbos[stage][index + offset]); - entry->stride = sizeof(VkBufferView); - break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - entry->descriptorCount = 1; - entry->offset = offsetof(struct zink_context, di.ssbos[stage][index + offset]); - entry->stride = sizeof(VkDescriptorBufferInfo); - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - entry->descriptorCount = shader->bindings[type][idx].size; - entry->offset = offsetof(struct zink_context, di.images[stage][index + offset]); - entry->stride = sizeof(VkDescriptorImageInfo); - break; - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - entry->descriptorCount = shader->bindings[type][idx].size; - entry->offset = offsetof(struct zink_context, di.texel_images[stage][index + offset]); - entry->stride = sizeof(VkBufferView); - break; - default: - unreachable("unknown type"); - } - (*entry_idx)++; -} - -bool -zink_descriptor_program_init_lazy(struct zink_context *ctx, struct zink_program *pg) -{ - struct zink_screen *screen = zink_screen(ctx->base.screen); - VkDescriptorSetLayoutBinding bindings[ZINK_DESCRIPTOR_TYPES][PIPE_SHADER_TYPES * 32]; - VkDescriptorUpdateTemplateEntry entries[ZINK_DESCRIPTOR_TYPES][PIPE_SHADER_TYPES * 32]; - unsigned num_bindings[ZINK_DESCRIPTOR_TYPES] = {0}; - uint8_t has_bindings = 0; - unsigned push_count = 0; - - struct zink_shader **stages; - if (pg->is_compute) - stages = &((struct zink_compute_program*)pg)->shader; - else { - stages = ((struct zink_gfx_program*)pg)->shaders; - if (stages[PIPE_SHADER_FRAGMENT]->nir->info.fs.uses_fbfetch_output) { - zink_descriptor_util_init_fbfetch(ctx); - push_count = 1; - } - } - - if (!pg->dd) - pg->dd = (void*)rzalloc(pg, struct zink_program_descriptor_data); - if (!pg->dd) - return false; - - unsigned entry_idx[ZINK_DESCRIPTOR_TYPES] = {0}; - - unsigned num_shaders = pg->is_compute ? 1 : ZINK_SHADER_COUNT; - bool have_push = screen->info.have_KHR_push_descriptor; - for (int i = 0; i < num_shaders; i++) { - struct zink_shader *shader = stages[i]; - if (!shader) - continue; - - enum pipe_shader_type stage = pipe_shader_type_from_mesa(shader->nir->info.stage); - VkShaderStageFlagBits stage_flags = zink_shader_stage(stage); - for (int j = 0; j < ZINK_DESCRIPTOR_TYPES; j++) { - for (int k = 0; k < shader->num_bindings[j]; k++) { - /* dynamic ubos handled in push */ - if (shader->bindings[j][k].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { - pg->dd->push_usage |= BITFIELD64_BIT(stage); - - push_count++; - continue; - } - - assert(num_bindings[j] < ARRAY_SIZE(bindings[j])); - VkDescriptorSetLayoutBinding *binding = &bindings[j][num_bindings[j]]; - binding->binding = shader->bindings[j][k].binding; - binding->descriptorType = shader->bindings[j][k].type; - binding->descriptorCount = shader->bindings[j][k].size; - binding->stageFlags = stage_flags; - binding->pImmutableSamplers = NULL; - - enum zink_descriptor_size_index idx = zink_vktype_to_size_idx(shader->bindings[j][k].type); - pg->dd->sizes[idx].descriptorCount += shader->bindings[j][k].size; - pg->dd->sizes[idx].type = shader->bindings[j][k].type; - switch (shader->bindings[j][k].type) { - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - init_template_entry(shader, j, k, 0, &entries[j][entry_idx[j]], &entry_idx[j], screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - for (unsigned l = 0; l < shader->bindings[j][k].size; l++) - init_template_entry(shader, j, k, l, &entries[j][entry_idx[j]], &entry_idx[j], screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY); - break; - default: - break; - } - num_bindings[j]++; - has_bindings |= BITFIELD_BIT(j); - } - } - } - pg->dd->binding_usage = has_bindings; - if (!has_bindings && !push_count) { - ralloc_free(pg->dd); - pg->dd = NULL; - - pg->layout = zink_pipeline_layout_create(screen, pg); - return !!pg->layout; - } - - pg->dsl[pg->num_dsl++] = push_count ? ctx->dd->push_dsl[pg->is_compute]->layout : ctx->dd->dummy_dsl->layout; - if (has_bindings) { - u_foreach_bit(type, has_bindings) { - for (unsigned i = 0; i < type; i++) { - /* push set is always 0 */ - if (!pg->dsl[i + 1]) { - /* inject a null dsl */ - pg->dsl[pg->num_dsl++] = ctx->dd->dummy_dsl->layout; - pg->dd->binding_usage |= BITFIELD_BIT(i); - } - } - pg->dd->layouts[pg->num_dsl] = zink_descriptor_util_layout_get(ctx, type, bindings[type], num_bindings[type], &pg->dd->layout_key[type]); - pg->dd->layout_key[type]->use_count++; - pg->dsl[pg->num_dsl] = pg->dd->layouts[pg->num_dsl]->layout; - pg->num_dsl++; - } - for (unsigned i = 0; i < ARRAY_SIZE(pg->dd->sizes); i++) - pg->dd->sizes[i].descriptorCount *= screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY ? MAX_LAZY_DESCRIPTORS : ZINK_DEFAULT_MAX_DESCS; - } - - pg->layout = zink_pipeline_layout_create(screen, pg); - if (!pg->layout) - return false; - if (!screen->info.have_KHR_descriptor_update_template || screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_NOTEMPLATES) - return true; - - VkDescriptorUpdateTemplateCreateInfo template[ZINK_DESCRIPTOR_TYPES + 1] = {0}; - /* type of template */ - VkDescriptorUpdateTemplateType types[ZINK_DESCRIPTOR_TYPES + 1] = {VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET}; - if (have_push && screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY) - types[0] = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR; - - /* number of descriptors in template */ - unsigned wd_count[ZINK_DESCRIPTOR_TYPES + 1]; - if (push_count) - wd_count[0] = pg->is_compute ? 1 : (ZINK_SHADER_COUNT + !!ctx->dd->has_fbfetch); - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) - wd_count[i + 1] = pg->dd->layout_key[i] ? pg->dd->layout_key[i]->num_descriptors : 0; - - VkDescriptorUpdateTemplateEntry *push_entries[2] = { - dd_lazy(ctx)->push_entries, - &dd_lazy(ctx)->compute_push_entry, - }; - for (unsigned i = 0; i < pg->num_dsl; i++) { - bool is_push = i == 0; - /* no need for empty templates */ - if (pg->dsl[i] == ctx->dd->dummy_dsl->layout || - (!is_push && pg->dd->layouts[i]->desc_template)) - continue; - template[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; - assert(wd_count[i]); - template[i].descriptorUpdateEntryCount = wd_count[i]; - if (is_push) - template[i].pDescriptorUpdateEntries = push_entries[pg->is_compute]; - else - template[i].pDescriptorUpdateEntries = entries[i - 1]; - template[i].templateType = types[i]; - template[i].descriptorSetLayout = pg->dsl[i]; - template[i].pipelineBindPoint = pg->is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS; - template[i].pipelineLayout = pg->layout; - template[i].set = i; - VkDescriptorUpdateTemplateKHR t; - if (VKSCR(CreateDescriptorUpdateTemplate)(screen->dev, &template[i], NULL, &t) != VK_SUCCESS) - return false; - if (is_push) - pg->dd->push_template = t; - else - pg->dd->layouts[i]->desc_template = t; - } - return true; -} - -void -zink_descriptor_program_deinit_lazy(struct zink_screen *screen, struct zink_program *pg) -{ - for (unsigned i = 0; pg->num_dsl && i < ZINK_DESCRIPTOR_TYPES; i++) { - if (pg->dd->layout_key[i]) - pg->dd->layout_key[i]->use_count--; - } - if (pg->dd && pg->dd->push_template) - VKSCR(DestroyDescriptorUpdateTemplate)(screen->dev, pg->dd->push_template, NULL); - ralloc_free(pg->dd); -} - -static VkDescriptorPool -create_pool(struct zink_screen *screen, unsigned num_type_sizes, VkDescriptorPoolSize *sizes, unsigned flags) -{ - VkDescriptorPool pool; - VkDescriptorPoolCreateInfo dpci = {0}; - dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - dpci.pPoolSizes = sizes; - dpci.poolSizeCount = num_type_sizes; - dpci.flags = flags; - dpci.maxSets = MAX_LAZY_DESCRIPTORS; - if (VKSCR(CreateDescriptorPool)(screen->dev, &dpci, 0, &pool) != VK_SUCCESS) { - debug_printf("vkCreateDescriptorPool failed\n"); - return VK_NULL_HANDLE; - } - return pool; -} - -static struct zink_descriptor_pool * -get_descriptor_pool_lazy(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, struct zink_batch_descriptor_data_lazy *bdd, bool is_compute); - -static struct zink_descriptor_pool * -check_pool_alloc(struct zink_context *ctx, struct zink_descriptor_pool *pool, struct hash_entry *he, struct zink_program *pg, - enum zink_descriptor_type type, struct zink_batch_descriptor_data_lazy *bdd, bool is_compute) -{ - struct zink_screen *screen = zink_screen(ctx->base.screen); - /* allocate up to $current * 10, e.g., 10 -> 100 or 100 -> 1000 */ - if (pool->set_idx == pool->sets_alloc) { - unsigned sets_to_alloc = MIN2(MIN2(MAX2(pool->sets_alloc * 10, 10), MAX_LAZY_DESCRIPTORS) - pool->sets_alloc, 100); - if (!sets_to_alloc) { - /* overflowed pool: queue for deletion on next reset */ - util_dynarray_append(&bdd->overflowed_pools, struct zink_descriptor_pool*, pool); - _mesa_hash_table_remove(&bdd->pools[type], he); - return get_descriptor_pool_lazy(ctx, pg, type, bdd, is_compute); - } - if (!zink_descriptor_util_alloc_sets(screen, pg->dsl[type + 1], - pool->pool, &pool->sets[pool->sets_alloc], sets_to_alloc)) - return NULL; - pool->sets_alloc += sets_to_alloc; - } - return pool; -} - -static struct zink_descriptor_pool * -create_push_pool(struct zink_screen *screen, struct zink_batch_descriptor_data_lazy *bdd, bool is_compute, bool has_fbfetch) -{ - struct zink_descriptor_pool *pool = rzalloc(bdd, struct zink_descriptor_pool); - VkDescriptorPoolSize sizes[2]; - sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - if (is_compute) - sizes[0].descriptorCount = MAX_LAZY_DESCRIPTORS; - else { - sizes[0].descriptorCount = ZINK_SHADER_COUNT * MAX_LAZY_DESCRIPTORS; - sizes[1].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; - sizes[1].descriptorCount = MAX_LAZY_DESCRIPTORS; - } - pool->pool = create_pool(screen, !is_compute && has_fbfetch ? 2 : 1, sizes, 0); - return pool; -} - -static struct zink_descriptor_pool * -check_push_pool_alloc(struct zink_context *ctx, struct zink_descriptor_pool *pool, struct zink_batch_descriptor_data_lazy *bdd, bool is_compute) -{ - struct zink_screen *screen = zink_screen(ctx->base.screen); - /* allocate up to $current * 10, e.g., 10 -> 100 or 100 -> 1000 */ - if (pool->set_idx == pool->sets_alloc || unlikely(ctx->dd->has_fbfetch != bdd->has_fbfetch)) { - unsigned sets_to_alloc = MIN2(MIN2(MAX2(pool->sets_alloc * 10, 10), MAX_LAZY_DESCRIPTORS) - pool->sets_alloc, 100); - if (!sets_to_alloc || unlikely(ctx->dd->has_fbfetch != bdd->has_fbfetch)) { - /* overflowed pool: queue for deletion on next reset */ - util_dynarray_append(&bdd->overflowed_pools, struct zink_descriptor_pool*, pool); - bdd->push_pool[is_compute] = create_push_pool(screen, bdd, is_compute, ctx->dd->has_fbfetch); - return check_push_pool_alloc(ctx, bdd->push_pool[is_compute], bdd, is_compute); - } - if (!zink_descriptor_util_alloc_sets(screen, ctx->dd->push_dsl[is_compute]->layout, - pool->pool, &pool->sets[pool->sets_alloc], sets_to_alloc)) - return NULL; - pool->sets_alloc += sets_to_alloc; - } - return pool; -} - -static struct zink_descriptor_pool * -get_descriptor_pool_lazy(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, struct zink_batch_descriptor_data_lazy *bdd, bool is_compute) -{ - struct zink_screen *screen = zink_screen(ctx->base.screen); - struct hash_entry *he = _mesa_hash_table_search(&bdd->pools[type], pg->dd->layout_key[type]); - struct zink_descriptor_pool *pool; - if (he) { - pool = he->data; - return check_pool_alloc(ctx, pool, he, pg, type, bdd, is_compute); - } - pool = rzalloc(bdd, struct zink_descriptor_pool); - if (!pool) - return NULL; - unsigned idx = zink_descriptor_type_to_size_idx(type); - VkDescriptorPoolSize *size = &pg->dd->sizes[idx]; - /* this is a sampler/image set with no images only texels */ - if (!size->descriptorCount) - size++; - pool->pool = create_pool(screen, zink_descriptor_program_num_sizes(pg, type), size, 0); - if (!pool->pool) { - ralloc_free(pool); - return NULL; - } - _mesa_hash_table_insert(&bdd->pools[type], pg->dd->layout_key[type], pool); - return check_pool_alloc(ctx, pool, he, pg, type, bdd, is_compute); -} - -ALWAYS_INLINE static VkDescriptorSet -get_descriptor_set_lazy(struct zink_descriptor_pool *pool) -{ - if (!pool) - return VK_NULL_HANDLE; - - assert(pool->set_idx < pool->sets_alloc); - return pool->sets[pool->set_idx++]; -} - -static bool -populate_sets(struct zink_context *ctx, struct zink_batch_descriptor_data_lazy *bdd, - struct zink_program *pg, uint8_t *changed_sets, bool need_push, VkDescriptorSet *sets) -{ - if (need_push && !zink_screen(ctx->base.screen)->info.have_KHR_push_descriptor) { - struct zink_descriptor_pool *pool = check_push_pool_alloc(ctx, bdd->push_pool[pg->is_compute], bdd, pg->is_compute); - sets[0] = get_descriptor_set_lazy(pool); - if (!sets[0]) - return false; - } else - sets[0] = VK_NULL_HANDLE; - u_foreach_bit(type, *changed_sets) { - if (pg->dd->layout_key[type]) { - struct zink_descriptor_pool *pool = get_descriptor_pool_lazy(ctx, pg, type, bdd, pg->is_compute); - sets[type + 1] = get_descriptor_set_lazy(pool); - } else - sets[type + 1] = ctx->dd->dummy_set; - if (!sets[type + 1]) - return false; - } - return true; -} - -void -zink_descriptor_set_update_lazy(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, VkDescriptorSet set) -{ - struct zink_screen *screen = zink_screen(ctx->base.screen); - VKCTX(UpdateDescriptorSetWithTemplate)(screen->dev, set, pg->dd->layouts[type + 1]->desc_template, ctx); -} - -void -zink_descriptors_update_lazy_masked(struct zink_context *ctx, bool is_compute, uint8_t changed_sets, bool need_push, bool update_push) -{ - struct zink_screen *screen = zink_screen(ctx->base.screen); - struct zink_batch *batch = &ctx->batch; - struct zink_batch_state *bs = ctx->batch.state; - struct zink_batch_descriptor_data_lazy *bdd = bdd_lazy(bs); - struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base; - VkDescriptorSet desc_sets[5]; - if (!populate_sets(ctx, bdd, pg, &changed_sets, need_push, desc_sets)) { - debug_printf("ZINK: couldn't get descriptor sets!\n"); - return; - } - /* no flushing allowed */ - assert(ctx->batch.state == bs); - - if (pg->dd->binding_usage && changed_sets) { - u_foreach_bit(type, changed_sets) { - if (pg->dd->layout_key[type]) - VKSCR(UpdateDescriptorSetWithTemplate)(screen->dev, desc_sets[type + 1], pg->dd->layouts[type + 1]->desc_template, ctx); - assert(type + 1 < pg->num_dsl); - VKSCR(CmdBindDescriptorSets)(bs->cmdbuf, - is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, - /* set index incremented by 1 to account for push set */ - pg->layout, type + 1, 1, &desc_sets[type + 1], - 0, NULL); - } - dd_lazy(ctx)->state_changed[is_compute] = false; - } - - if (update_push) { - if (pg->dd->push_usage && dd_lazy(ctx)->push_state_changed[is_compute]) { - if (screen->info.have_KHR_push_descriptor) - VKSCR(CmdPushDescriptorSetWithTemplateKHR)(batch->state->cmdbuf, pg->dd->push_template, - pg->layout, 0, ctx); - else { - assert(desc_sets[0]); - VKSCR(UpdateDescriptorSetWithTemplate)(screen->dev, desc_sets[0], pg->dd->push_template, ctx); - VKSCR(CmdBindDescriptorSets)(batch->state->cmdbuf, - is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, - pg->layout, 0, 1, &desc_sets[0], - 0, NULL); - } - dd_lazy(ctx)->push_state_changed[is_compute] = false; - } else if (dd_lazy(ctx)->push_state_changed[is_compute]) { - VKSCR(CmdBindDescriptorSets)(bs->cmdbuf, - is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, - pg->layout, 0, 1, &ctx->dd->dummy_set, - 0, NULL); - dd_lazy(ctx)->push_state_changed[is_compute] = false; - } - } - bdd->pg[is_compute] = pg; - ctx->dd->pg[is_compute] = pg; -} - -void -zink_descriptors_update_lazy(struct zink_context *ctx, bool is_compute) -{ - struct zink_batch_state *bs = ctx->batch.state; - struct zink_batch_descriptor_data_lazy *bdd = bdd_lazy(bs); - struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base; - - bool batch_changed = !bdd->pg[is_compute]; - if (batch_changed) { - /* update all sets and bind null sets */ - dd_lazy(ctx)->state_changed[is_compute] = pg->dd->binding_usage; - dd_lazy(ctx)->push_state_changed[is_compute] = !!pg->dd->push_usage; - } - - if (pg != bdd->pg[is_compute]) { - /* if we don't already know that we have to update all sets, - * check to see if any dsls changed - * - * also always update the dsl pointers on program change - */ - for (unsigned i = 0; i < ARRAY_SIZE(bdd->dsl[is_compute]); i++) { - /* push set is already detected, start at 1 */ - if (bdd->dsl[is_compute][i] != pg->dsl[i + 1]) - dd_lazy(ctx)->state_changed[is_compute] |= BITFIELD_BIT(i); - bdd->dsl[is_compute][i] = pg->dsl[i + 1]; - } - dd_lazy(ctx)->push_state_changed[is_compute] |= bdd->push_usage[is_compute] != pg->dd->push_usage; - bdd->push_usage[is_compute] = pg->dd->push_usage; - } - bdd->pg[is_compute] = pg; - - uint8_t changed_sets = pg->dd->binding_usage & dd_lazy(ctx)->state_changed[is_compute]; - bool need_push = pg->dd->push_usage && - (dd_lazy(ctx)->push_state_changed[is_compute] || batch_changed); - zink_descriptors_update_lazy_masked(ctx, is_compute, changed_sets, need_push, true); -} - -void -zink_context_invalidate_descriptor_state_lazy(struct zink_context *ctx, enum pipe_shader_type shader, enum zink_descriptor_type type, unsigned start, unsigned count) -{ - if (type == ZINK_DESCRIPTOR_TYPE_UBO && !start) - dd_lazy(ctx)->push_state_changed[shader == PIPE_SHADER_COMPUTE] = true; - else - dd_lazy(ctx)->state_changed[shader == PIPE_SHADER_COMPUTE] |= BITFIELD_BIT(type); -} - -void -zink_batch_descriptor_deinit_lazy(struct zink_screen *screen, struct zink_batch_state *bs) -{ - if (!bs->dd) - return; - struct zink_batch_descriptor_data_lazy *bdd = bdd_lazy(bs); - if (screen->info.have_KHR_descriptor_update_template) { - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) { - hash_table_foreach(&bdd->pools[i], entry) { - struct zink_descriptor_pool *pool = (void*)entry->data; - VKSCR(DestroyDescriptorPool)(screen->dev, pool->pool, NULL); - } - } - if (bdd->push_pool[0]) - VKSCR(DestroyDescriptorPool)(screen->dev, bdd->push_pool[0]->pool, NULL); - if (bdd->push_pool[1]) - VKSCR(DestroyDescriptorPool)(screen->dev, bdd->push_pool[1]->pool, NULL); - } - ralloc_free(bs->dd); -} - -static void -pool_destroy(struct zink_screen *screen, struct zink_descriptor_pool *pool) -{ - VKSCR(DestroyDescriptorPool)(screen->dev, pool->pool, NULL); - ralloc_free(pool); -} - -void -zink_batch_descriptor_reset_lazy(struct zink_screen *screen, struct zink_batch_state *bs) -{ - if (!screen->info.have_KHR_descriptor_update_template) - return; - struct zink_batch_descriptor_data_lazy *bdd = bdd_lazy(bs); - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) { - hash_table_foreach(&bdd->pools[i], entry) { - const struct zink_descriptor_layout_key *key = entry->key; - struct zink_descriptor_pool *pool = (void*)entry->data; - if (key->use_count) - pool->set_idx = 0; - else { - pool_destroy(screen, pool); - _mesa_hash_table_remove(&bdd->pools[i], entry); - } - } - } - for (unsigned i = 0; i < 2; i++) { - bdd->pg[i] = NULL; - if (bdd->push_pool[i]) - bdd->push_pool[i]->set_idx = 0; - } - while (util_dynarray_num_elements(&bdd->overflowed_pools, struct zink_descriptor_pool*)) { - struct zink_descriptor_pool *pool = util_dynarray_pop(&bdd->overflowed_pools, struct zink_descriptor_pool*); - pool_destroy(screen, pool); - } -} - -bool -zink_batch_descriptor_init_lazy(struct zink_screen *screen, struct zink_batch_state *bs) -{ - bs->dd = (void*)rzalloc(bs, struct zink_batch_descriptor_data_lazy); - if (!bs->dd) - return false; - if (!screen->info.have_KHR_descriptor_update_template) - return true; - struct zink_batch_descriptor_data_lazy *bdd = bdd_lazy(bs); - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) { - if (!_mesa_hash_table_init(&bdd->pools[i], bs->dd, _mesa_hash_pointer, _mesa_key_pointer_equal)) - return false; - } - util_dynarray_init(&bdd->overflowed_pools, bs->dd); - if (!screen->info.have_KHR_push_descriptor) { - bdd->push_pool[0] = create_push_pool(screen, bdd, false, false); - bdd->push_pool[1] = create_push_pool(screen, bdd, true, false); - } - return true; -} - -static void -init_push_template_entry(VkDescriptorUpdateTemplateEntry *entry, unsigned i) -{ - entry->dstBinding = tgsi_processor_to_shader_stage(i); - entry->descriptorCount = 1; - entry->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - entry->offset = offsetof(struct zink_context, di.ubos[i][0]); - entry->stride = sizeof(VkDescriptorBufferInfo); -} - -bool -zink_descriptors_init_lazy(struct zink_context *ctx) -{ - struct zink_screen *screen = zink_screen(ctx->base.screen); - ctx->dd = (void*)rzalloc(ctx, struct zink_descriptor_data_lazy); - if (!ctx->dd) - return false; - - if (screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_NOTEMPLATES) - printf("ZINK: CACHED/NOTEMPLATES DESCRIPTORS\n"); - else if (screen->info.have_KHR_descriptor_update_template) { - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) { - VkDescriptorUpdateTemplateEntry *entry = &dd_lazy(ctx)->push_entries[i]; - init_push_template_entry(entry, i); - } - init_push_template_entry(&dd_lazy(ctx)->compute_push_entry, PIPE_SHADER_COMPUTE); - VkDescriptorUpdateTemplateEntry *entry = &dd_lazy(ctx)->push_entries[ZINK_SHADER_COUNT]; //fbfetch - entry->dstBinding = ZINK_FBFETCH_BINDING; - entry->descriptorCount = 1; - entry->descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; - entry->offset = offsetof(struct zink_context, di.fbfetch); - entry->stride = sizeof(VkDescriptorImageInfo); - if (screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY) - printf("ZINK: USING LAZY DESCRIPTORS\n"); - } - struct zink_descriptor_layout_key *layout_key; - if (!zink_descriptor_util_push_layouts_get(ctx, ctx->dd->push_dsl, ctx->dd->push_layout_keys)) - return false; - - ctx->dd->dummy_dsl = zink_descriptor_util_layout_get(ctx, 0, NULL, 0, &layout_key); - if (!ctx->dd->dummy_dsl) - return false; - VkDescriptorPoolSize null_size = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1}; - ctx->dd->dummy_pool = create_pool(screen, 1, &null_size, 0); - zink_descriptor_util_alloc_sets(screen, ctx->dd->dummy_dsl->layout, - ctx->dd->dummy_pool, &ctx->dd->dummy_set, 1); - zink_descriptor_util_init_null_set(ctx, ctx->dd->dummy_set); - return true; -} - -void -zink_descriptors_deinit_lazy(struct zink_context *ctx) -{ - if (ctx->dd) { - struct zink_screen *screen = zink_screen(ctx->base.screen); - if (ctx->dd->dummy_pool) - VKSCR(DestroyDescriptorPool)(screen->dev, ctx->dd->dummy_pool, NULL); - if (ctx->dd->push_dsl[0]) - VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd->push_dsl[0]->layout, NULL); - if (ctx->dd->push_dsl[1]) - VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd->push_dsl[1]->layout, NULL); - } - ralloc_free(ctx->dd); -} diff --git a/src/gallium/drivers/zink/zink_device_info.py b/src/gallium/drivers/zink/zink_device_info.py index 3ac8011336a..ec265deccb9 100644 --- a/src/gallium/drivers/zink/zink_device_info.py +++ b/src/gallium/drivers/zink/zink_device_info.py @@ -60,21 +60,63 @@ import sys # - guard: adds a #if defined(`extension_name`)/#endif guard around the code generated for this Extension. EXTENSIONS = [ Extension("VK_KHR_maintenance1", - required=True), + required=True), Extension("VK_KHR_maintenance2"), Extension("VK_KHR_maintenance3"), + Extension("VK_KHR_maintenance4", + alias="maint4", + features=True), + Extension("VK_KHR_maintenance5", + alias="maint5", + features=True, properties=True), + Extension("VK_KHR_maintenance6", + alias="maint6", + features=True, properties=True), Extension("VK_KHR_external_memory"), Extension("VK_KHR_external_memory_fd"), + Extension("VK_KHR_vulkan_memory_model"), + Extension("VK_KHR_workgroup_memory_explicit_layout", alias="explicit_layout", features=True), + Extension("VK_KHR_pipeline_executable_properties", + alias="pipestats", + features=True), + Extension("VK_KHR_external_semaphore_fd"), + Extension("VK_KHR_create_renderpass2", + required=True), + Extension("VK_KHR_synchronization2", + alias="sync2", + features=True), + Extension("VK_KHR_external_memory_win32"), + Extension("VK_KHR_external_semaphore_win32"), Extension("VK_EXT_external_memory_dma_buf"), + Extension("VK_KHR_buffer_device_address", + alias="bda", + features=True), + Extension("VK_EXT_external_memory_host", alias="ext_host_mem", properties=True), Extension("VK_EXT_queue_family_foreign"), + Extension("VK_KHR_swapchain_mutable_format"), + Extension("VK_KHR_incremental_present"), Extension("VK_EXT_provoking_vertex", - alias="pv", - features=True, - properties=True, - conditions=["$feats.provokingVertexLast"]), + alias="pv", + features=True, + properties=True, + conditions=["$feats.provokingVertexLast"]), Extension("VK_EXT_shader_viewport_index_layer"), Extension("VK_KHR_get_memory_requirements2"), Extension("VK_EXT_post_depth_coverage"), + Extension("VK_EXT_depth_clip_control", + alias="clip_control", + features=True), + Extension("VK_EXT_depth_clamp_zero_one", + alias="clamp_01", + features=True), + Extension("VK_EXT_shader_subgroup_ballot"), + Extension("VK_EXT_shader_subgroup_vote"), + Extension("VK_EXT_shader_atomic_float", + alias="atomic_float", + features=True), + Extension("VK_KHR_shader_atomic_int64", + alias="atomic_int", + features=True), Extension("VK_KHR_8bit_storage", alias="storage_8bit", features=True, @@ -83,126 +125,205 @@ EXTENSIONS = [ alias="storage_16bit", features=True, conditions=["$feats.storageBuffer16BitAccess"]), + Extension("VK_EXT_image_2d_view_of_3d", + alias="view2d", + features=True), Extension("VK_KHR_driver_properties", - alias="driver", - properties=True), + alias="driver", + properties=True), Extension("VK_EXT_memory_budget"), + Extension("VK_EXT_memory_priority", alias="memprio", features=True), + Extension("VK_EXT_pageable_device_local_memory", alias="mempage", features=True), Extension("VK_KHR_draw_indirect_count"), + Extension("VK_EXT_dynamic_rendering_unused_attachments", alias="unused", features=True), + Extension("VK_EXT_shader_object", alias="shobj", features=True, properties=True), + Extension("VK_EXT_attachment_feedback_loop_layout", + alias="feedback_loop", + features=True), + Extension("VK_EXT_attachment_feedback_loop_dynamic_state", alias="feedback_dyn", features=True), + Extension("VK_NV_device_generated_commands", alias="nv_dgc", features=True, properties=True), Extension("VK_EXT_fragment_shader_interlock", - alias="interlock", - features=True, - conditions=["$feats.fragmentShaderSampleInterlock", "$feats.fragmentShaderPixelInterlock"]), + alias="interlock", + features=True, + conditions=["$feats.fragmentShaderSampleInterlock", "$feats.fragmentShaderPixelInterlock"]), Extension("VK_EXT_sample_locations", - alias="sample_locations", - properties=True), - Extension("VK_EXT_conservative_rasterization", - alias="cons_raster", - properties=True, - conditions=["$props.fullyCoveredFragmentShaderInputVariable"]), + alias="sample_locations", + properties=True), Extension("VK_KHR_shader_draw_parameters"), Extension("VK_KHR_sampler_mirror_clamp_to_edge"), + Extension("VK_EXT_descriptor_buffer", alias="db", features=True, properties=True), Extension("VK_EXT_conditional_rendering", - alias="cond_render", - features=True, - conditions=["$feats.conditionalRendering"]), + alias="cond_render", + features=True, + conditions=["$feats.conditionalRendering"]), Extension("VK_EXT_transform_feedback", - alias="tf", - properties=True, - features=True, - conditions=["$feats.transformFeedback"]), + alias="tf", + properties=True, + features=True, + conditions=["$feats.transformFeedback"]), Extension("VK_EXT_index_type_uint8", - alias="index_uint8", - features=True, - conditions=["$feats.indexTypeUint8"]), + alias="index_uint8", + features=True, + conditions=["$feats.indexTypeUint8"]), + Extension("VK_KHR_image_format_list"), + Extension("VK_KHR_sampler_ycbcr_conversion"), Extension("VK_KHR_imageless_framebuffer", - alias="imgless", - features=True, - conditions=["$feats.imagelessFramebuffer"]), + alias="imgless", + features=True, + required=True), Extension("VK_EXT_robustness2", - alias="rb2", - properties=True, - features=True, - conditions=["$feats.nullDescriptor"]), + alias="rb2", + properties=True, + features=True, + conditions=["$feats.nullDescriptor"]), + Extension("VK_EXT_image_robustness", + alias="rb_image", + features=True), Extension("VK_EXT_image_drm_format_modifier"), Extension("VK_EXT_vertex_attribute_divisor", - alias="vdiv", - properties=True, - features=True, - conditions=["$feats.vertexAttributeInstanceRateDivisor"]), + alias="vdiv", + properties=True, + features=True, + conditions=["$feats.vertexAttributeInstanceRateDivisor"]), Extension("VK_EXT_calibrated_timestamps"), + Extension("VK_NV_linear_color_attachment", + alias="linear_color", + features=True), + Extension("VK_KHR_dynamic_rendering", + alias="dynamic_render", + features=True), + Extension("VK_KHR_dynamic_rendering_local_read", + alias="drlr", + features=True), + Extension("VK_EXT_multisampled_render_to_single_sampled", + alias="msrtss", + features=True), Extension("VK_KHR_shader_clock", - alias="shader_clock", - features=True, - conditions=["$feats.shaderSubgroupClock"]), + alias="shader_clock", + features=True, + conditions=["$feats.shaderSubgroupClock"]), + Extension("VK_INTEL_shader_integer_functions2", + alias="shader_int_fns2", + features=True, + conditions=["$feats.shaderIntegerFunctions2"]), Extension("VK_EXT_sampler_filter_minmax", - alias="reduction", - properties=True), + alias="reduction", + properties=True, + conditions=["$props.filterMinmaxSingleComponentFormats"]), Extension("VK_EXT_custom_border_color", - alias="border_color", - properties=True, - features=True, - conditions=["$feats.customBorderColors"]), + alias="border_color", + properties=True, + features=True, + conditions=["$feats.customBorderColors"]), + Extension("VK_EXT_non_seamless_cube_map", + alias="nonseamless", + features=True), + Extension("VK_EXT_border_color_swizzle", + alias="border_swizzle", + features=True), Extension("VK_EXT_blend_operation_advanced", - alias="blend", - properties=True, - # TODO: we can probably support non-premul here with some work? - conditions=["$props.advancedBlendNonPremultipliedSrcColor", "$props.advancedBlendNonPremultipliedDstColor"]), + alias="blend", + properties=True, + # TODO: we can probably support non-premul here with some work? + conditions=["$props.advancedBlendNonPremultipliedSrcColor", "$props.advancedBlendNonPremultipliedDstColor"]), Extension("VK_EXT_extended_dynamic_state", - alias="dynamic_state", - features=True, - conditions=["$feats.extendedDynamicState"]), + alias="dynamic_state", + features=True, + conditions=["$feats.extendedDynamicState"]), Extension("VK_EXT_extended_dynamic_state2", - alias="dynamic_state2", - features=True, - conditions=["$feats.extendedDynamicState2"]), + alias="dynamic_state2", + features=True, + conditions=["$feats.extendedDynamicState2"]), + Extension("VK_EXT_extended_dynamic_state3", + alias="dynamic_state3", + properties=True, + features=True), Extension("VK_EXT_pipeline_creation_cache_control", - alias="pipeline_cache_control", - features=True, - conditions=["$feats.pipelineCreationCacheControl"]), + alias="pipeline_cache_control", + features=True, + conditions=["$feats.pipelineCreationCacheControl"]), Extension("VK_EXT_shader_stencil_export", - alias="stencil_export"), - Extension("VK_EXTX_portability_subset", - alias="portability_subset_extx", - nonstandard=True, - properties=True, - features=True, - guard=True), - Extension("VK_KHR_timeline_semaphore"), + alias="stencil_export"), + Extension("VK_KHR_portability_subset", + alias="portability_subset", + features=True, + guard=True), + Extension("VK_NV_compute_shader_derivatives", + alias="shader_derivs", + features=True, + conditions=["$feats.computeDerivativeGroupQuads", "$feats.computeDerivativeGroupLinear"]), + Extension("VK_KHR_timeline_semaphore", + alias="timeline", + features=True), + Extension("VK_EXT_color_write_enable", + alias="cwrite", + features=True), Extension("VK_EXT_4444_formats", - alias="format_4444", - features=True), + alias="format_4444", + features=True), + Extension("VK_EXT_host_image_copy", + alias="hic", + features=True, + properties=True), Extension("VK_EXT_scalar_block_layout", - alias="scalar_block_layout", - features=True, - conditions=["$feats.scalarBlockLayout"]), + alias="scalar_block_layout", + features=True, + conditions=["$feats.scalarBlockLayout"]), Extension("VK_KHR_swapchain"), + Extension("VK_EXT_rasterization_order_attachment_access", + alias="rast_order_access", + features=True, + conditions=["$feats.rasterizationOrderColorAttachmentAccess"]), Extension("VK_KHR_shader_float16_int8", alias="shader_float16_int8", features=True), Extension("VK_EXT_multi_draw", alias="multidraw", - features=True, - properties=True, - conditions=["$feats.multiDraw"]), + features=True, + properties=True, + conditions=["$feats.multiDraw"]), + Extension("VK_EXT_primitives_generated_query", + alias="primgen", + features=True), + Extension("VK_KHR_pipeline_library"), + Extension("VK_EXT_graphics_pipeline_library", + alias="gpl", + features=True, + properties=True), Extension("VK_KHR_push_descriptor", - alias="push", - properties=True), + alias="push", + properties=True), Extension("VK_KHR_descriptor_update_template", - alias="template"), + alias="template", required=True), Extension("VK_EXT_line_rasterization", - alias="line_rast", - properties=True, - features=True), + alias="line_rast", + properties=True, + features=True), Extension("VK_EXT_vertex_input_dynamic_state", - alias="vertex_input", - features=True, - conditions=["$feats.vertexInputDynamicState"]), + alias="vertex_input", + features=True, + conditions=["$feats.vertexInputDynamicState"]), Extension("VK_EXT_primitive_topology_list_restart", - alias="list_restart", - features=True, - conditions=["$feats.primitiveTopologyListRestart"]), + alias="list_restart", + features=True, + conditions=["$feats.primitiveTopologyListRestart"]), Extension("VK_KHR_dedicated_allocation", - alias="dedicated"), + alias="dedicated"), + Extension("VK_EXT_descriptor_indexing", + alias="desc_indexing", + features=True, + properties=True, + conditions=["$feats.descriptorBindingPartiallyBound"]), + Extension("VK_EXT_depth_clip_enable", + alias="depth_clip_enable", + features=True), + Extension("VK_EXT_shader_demote_to_helper_invocation", + alias="demote", + features=True, + conditions=["$feats.shaderDemoteToHelperInvocation"]), + Extension("VK_KHR_shader_float_controls", + alias="float_controls"), + Extension("VK_KHR_format_feature_flags2"), ] # constructor: Versions(device_version(major, minor, patch), struct_version(major, minor)) @@ -214,14 +335,13 @@ EXTENSIONS = [ VERSIONS = [ Version((1,1,0), (1,1)), Version((1,2,0), (1,2)), + Version((1,3,0), (1,3)), ] # There exists some inconsistencies regarding the enum constants, fix them. # This is basically generated_code.replace(key, value). REPLACEMENTS = { - "ROBUSTNESS2": "ROBUSTNESS_2", "PROPERTIES_PROPERTIES": "PROPERTIES", - "EXTENDED_DYNAMIC_STATE2": "EXTENDED_DYNAMIC_STATE_2", } @@ -252,7 +372,16 @@ header_code = """ #include "util/u_memory.h" -#include <vulkan/vulkan.h> +#include <vulkan/vulkan_core.h> + +#ifdef VK_ENABLE_BETA_EXTENSIONS +#include <vulkan/vulkan_beta.h> +#endif + +#ifdef _WIN32 +#include <windows.h> +#include <vulkan/vulkan_win32.h> +#endif struct zink_screen; @@ -269,6 +398,7 @@ struct zink_device_info { %endfor VkPhysicalDeviceFeatures2 feats; + VkPhysicalDeviceSubgroupProperties subgroup; %for version in versions: VkPhysicalDeviceVulkan${version.struct()}Features feats${version.struct()}; %endfor @@ -279,6 +409,7 @@ struct zink_device_info { %endfor VkPhysicalDeviceMemoryProperties mem_props; + VkPhysicalDeviceIDProperties deviceid_props; %for ext in extensions: <%helpers:guard ext="${ext}"> @@ -319,6 +450,7 @@ void zink_stub_${cmd.lstrip("vk")}(void); impl_code = """ <%namespace name="helpers" file="helpers"/> +#include "vk_enum_to_str.h" #include "zink_device_info.h" #include "zink_screen.h" @@ -334,14 +466,22 @@ zink_get_physical_device_info(struct zink_screen *screen) uint32_t num_extensions = 0; // get device memory properties - vkGetPhysicalDeviceMemoryProperties(screen->pdev, &info->mem_props); + screen->vk.GetPhysicalDeviceMemoryProperties(screen->pdev, &info->mem_props); // enumerate device supported extensions - if (vkEnumerateDeviceExtensionProperties(screen->pdev, NULL, &num_extensions, NULL) == VK_SUCCESS) { + VkResult result = screen->vk.EnumerateDeviceExtensionProperties(screen->pdev, NULL, &num_extensions, NULL); + if (result != VK_SUCCESS) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: vkEnumerateDeviceExtensionProperties failed (%s)", vk_Result_to_str(result)); + } else { if (num_extensions > 0) { VkExtensionProperties *extensions = MALLOC(sizeof(VkExtensionProperties) * num_extensions); if (!extensions) goto fail; - vkEnumerateDeviceExtensionProperties(screen->pdev, NULL, &num_extensions, extensions); + result = screen->vk.EnumerateDeviceExtensionProperties(screen->pdev, NULL, &num_extensions, extensions); + if (result != VK_SUCCESS) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: vkEnumerateDeviceExtensionProperties failed (%s)", vk_Result_to_str(result)); + } for (uint32_t i = 0; i < num_extensions; ++i) { %for ext in extensions: @@ -361,22 +501,6 @@ zink_get_physical_device_info(struct zink_screen *screen) } } - %for version in versions: - if (${version.version()} <= screen->vk_version) { - %for ext in extensions: - %if ext.core_since and ext.core_since.struct_version == version.struct_version: - <%helpers:guard ext="${ext}"> - %if not (ext.has_features or ext.has_properties): - info->have_${ext.name_with_vendor()} = true; - %else: - support_${ext.name_with_vendor()} = true; - %endif - </%helpers:guard> - %endif - %endfor - } - %endfor - // get device features if (screen->vk.GetPhysicalDeviceFeatures2) { // check for device extension features @@ -399,7 +523,11 @@ zink_get_physical_device_info(struct zink_screen *screen) %for ext in extensions: %if ext.has_features: <%helpers:guard ext="${ext}"> +%if ext.features_promoted: + if (support_${ext.name_with_vendor()} && !info->have_vulkan${ext.core_since.struct()}) { +%else: if (support_${ext.name_with_vendor()}) { +%endif info->${ext.field("feats")}.sType = ${ext.stype("FEATURES")}; info->${ext.field("feats")}.pNext = info->feats.pNext; info->feats.pNext = &info->${ext.field("feats")}; @@ -410,7 +538,7 @@ zink_get_physical_device_info(struct zink_screen *screen) screen->vk.GetPhysicalDeviceFeatures2(screen->pdev, &info->feats); } else { - vkGetPhysicalDeviceFeatures(screen->pdev, &info->feats.features); + screen->vk.GetPhysicalDeviceFeatures(screen->pdev, &info->feats.features); } // check for device properties @@ -434,7 +562,11 @@ zink_get_physical_device_info(struct zink_screen *screen) %for ext in extensions: %if ext.has_properties: <%helpers:guard ext="${ext}"> +%if ext.properties_promoted: + if (support_${ext.name_with_vendor()} && !info->have_vulkan${ext.core_since.struct()}) { +%else: if (support_${ext.name_with_vendor()}) { +%endif info->${ext.field("props")}.sType = ${ext.stype("PROPERTIES")}; info->${ext.field("props")}.pNext = props.pNext; props.pNext = &info->${ext.field("props")}; @@ -443,10 +575,58 @@ zink_get_physical_device_info(struct zink_screen *screen) %endif %endfor + if (screen->vk_version < VK_MAKE_VERSION(1,2,0) && screen->instance_info.have_KHR_external_memory_capabilities) { + info->deviceid_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; + info->deviceid_props.pNext = props.pNext; + props.pNext = &info->deviceid_props; + } + + if (screen->vk_version >= VK_MAKE_VERSION(1,1,0)) { + info->subgroup.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; + info->subgroup.pNext = props.pNext; + props.pNext = &info->subgroup; + } + // note: setting up local VkPhysicalDeviceProperties2. screen->vk.GetPhysicalDeviceProperties2(screen->pdev, &props); } + /* We re-apply the fields from VkPhysicalDeviceVulkanXYFeatures struct + * onto their respective fields in the VkPhysicalDeviceExtensionNameFeatures + * struct if the former is provided by the VK implementation. + * + * As for why this is done: the spec mentions that once an extension is + * promoted to core and its feature fields are added in VulkanXYFeatures, + * including both ExtensionNameFeatures and VulkanXYFeatures at the same + * time is prohibited when using vkGetPhysicalDeviceFeatures2. + */ +%for ext in extensions: +%if ext.features_promoted: + if (info->have_vulkan${ext.core_since.struct()}) { + %for field in registry.get_registry_entry(ext.name).features_fields: + info->${ext.field("feats")}.${field} = info->feats${ext.core_since.struct()}.${field}; + %endfor + } +%endif +%endfor + + /* See above, but for VulkanXYProperties. + * Unlike VulkanXYFeatures with all the booleans, VulkanXYProperties can + * contain different types of data, including arrays. The C language hates us + * when we assign an array to another array, therefore we use an memcpy here. + */ +%for ext in extensions: +%if ext.properties_promoted: + if (info->have_vulkan${ext.core_since.struct()}) { + %for field in registry.get_registry_entry(ext.name).properties_fields: + memcpy(&info->${ext.field("props")}.${field}, + &info->props${ext.core_since.struct()}.${field}, + sizeof(info->${ext.field("props")}.${field})); + %endfor + } +%endif +%endfor + // enable the extensions if they match the conditions given by ext.enable_conds if (screen->vk.GetPhysicalDeviceProperties2) { %for ext in extensions: @@ -484,6 +664,36 @@ zink_get_physical_device_info(struct zink_screen *screen) info->num_extensions = num_extensions; + info->feats.pNext = NULL; + +%for version in versions: +%if version.device_version < (1,2,0): + if (VK_MAKE_VERSION(1,2,0) <= screen->vk_version) { + /* VkPhysicalDeviceVulkan11Features was added in 1.2, not 1.1 as one would think */ +%else: + if (${version.version()} <= screen->vk_version) { +%endif + info->feats${version.struct()}.pNext = info->feats.pNext; + info->feats.pNext = &info->feats${version.struct()}; + } +%endfor + +%for ext in extensions: +%if ext.has_features: +<%helpers:guard ext="${ext}"> +%if ext.features_promoted: + if (info->have_${ext.name_with_vendor()} && !info->have_vulkan${ext.core_since.struct()}) { +%else: + if (info->have_${ext.name_with_vendor()}) { +%endif + info->${ext.field("feats")}.sType = ${ext.stype("FEATURES")}; + info->${ext.field("feats")}.pNext = info->feats.pNext; + info->feats.pNext = &info->${ext.field("feats")}; + } +</%helpers:guard> +%endif +%endfor + return true; fail: @@ -495,8 +705,12 @@ zink_verify_device_extensions(struct zink_screen *screen) { %for ext in extensions: %if registry.in_registry(ext.name): +<%helpers:guard ext="${ext}"> if (screen->info.have_${ext.name_with_vendor()}) { %for cmd in registry.get_registry_entry(ext.name).device_commands: +%if cmd.find("win32"): +#ifdef _WIN32 +%endif if (!screen->vk.${cmd.lstrip("vk")}) { #ifndef NDEBUG screen->vk.${cmd.lstrip("vk")} = (PFN_${cmd})zink_stub_${cmd.lstrip("vk")}; @@ -504,8 +718,12 @@ zink_verify_device_extensions(struct zink_screen *screen) screen->vk.${cmd.lstrip("vk")} = (PFN_${cmd})zink_stub_function_not_loaded; #endif } +%if cmd.find("win32"): +#endif +%endif %endfor } +</%helpers:guard> %endif %endfor } @@ -589,15 +807,21 @@ if __name__ == "__main__": if not (entry.features_struct and ext.physical_device_struct("Features") == entry.features_struct): error_count += 1 print("The extension {} does not provide a features struct.".format(ext.name)) + ext.features_promoted = entry.features_promoted if ext.has_properties: if not (entry.properties_struct and ext.physical_device_struct("Properties") == entry.properties_struct): error_count += 1 print("The extension {} does not provide a properties struct.".format(ext.name)) - print(entry.properties_struct, ext.physical_device_struct("Properties")) + ext.properties_promoted = entry.properties_promoted - if entry.promoted_in: + if entry.promoted_in and entry.promoted_in <= versions[-1].struct_version: ext.core_since = Version((*entry.promoted_in, 0)) + else: + # even if the ext is promoted in a newer VK version, consider it + # unpromoted until there's an entry for that VK version in VERSIONS + ext.features_promoted = False + ext.properties_promoted = False if error_count > 0: print("zink_device_info.py: Found {} error(s) in total. Quitting.".format(error_count)) @@ -606,12 +830,12 @@ if __name__ == "__main__": lookup = TemplateLookup() lookup.put_string("helpers", include_template) - with open(header_path, "w") as header_file: + with open(header_path, "w", encoding='utf-8') as header_file: header = Template(header_code, lookup=lookup).render(extensions=extensions, versions=versions, registry=registry).strip() header = replace_code(header, replacement) print(header, file=header_file) - with open(impl_path, "w") as impl_file: + with open(impl_path, "w", encoding='utf-8') as impl_file: impl = Template(impl_code, lookup=lookup).render(extensions=extensions, versions=versions, registry=registry).strip() impl = replace_code(impl, replacement) print(impl, file=impl_file) diff --git a/src/gallium/drivers/zink/zink_draw.cpp b/src/gallium/drivers/zink/zink_draw.cpp index fde03630cb0..0da405ea7b7 100644 --- a/src/gallium/drivers/zink/zink_draw.cpp +++ b/src/gallium/drivers/zink/zink_draw.cpp @@ -1,6 +1,9 @@ +#include "zink_batch.h" #include "zink_compiler.h" #include "zink_context.h" +#include "zink_descriptors.h" #include "zink_program.h" +#include "zink_program_state.hpp" #include "zink_query.h" #include "zink_resource.h" #include "zink_screen.h" @@ -8,57 +11,40 @@ #include "zink_surface.h" #include "zink_inlines.h" -#include "tgsi/tgsi_from_mesa.h" #include "util/hash_table.h" +#include "util/u_cpu_detect.h" #include "util/u_debug.h" #include "util/u_helpers.h" #include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_prim_restart.h" - static void zink_emit_xfb_counter_barrier(struct zink_context *ctx) { - /* Between the pause and resume there needs to be a memory barrier for the counter buffers - * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT - * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT - * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT - * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT. - * - * - from VK_EXT_transform_feedback spec - */ for (unsigned i = 0; i < ctx->num_so_targets; i++) { struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); if (!t) continue; struct zink_resource *res = zink_resource(t->counter_buffer); - if (t->counter_buffer_valid) - zink_resource_buffer_barrier(ctx, res, VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); - else - zink_resource_buffer_barrier(ctx, res, VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT, - VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT); + VkAccessFlags access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + VkPipelineStageFlags stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; + if (t->counter_buffer_valid) { + /* Between the pause and resume there needs to be a memory barrier for the counter buffers + * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT + * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT + * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT + * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT. + * + * - from VK_EXT_transform_feedback spec + */ + access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT; + stage |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + } + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, access, stage); + if (!ctx->unordered_blitting) + res->obj->unordered_read = false; } - ctx->xfb_barrier = false; -} - -static void -zink_emit_xfb_vertex_input_barrier(struct zink_context *ctx, struct zink_resource *res) -{ - /* A pipeline barrier is required between using the buffers as - * transform feedback buffers and vertex buffers to - * ensure all writes to the transform feedback buffers are visible - * when the data is read as vertex attributes. - * The source access is VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT - * and the destination access is VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT - * for the pipeline stages VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT - * and VK_PIPELINE_STAGE_VERTEX_INPUT_BIT respectively. - * - * - 20.3.1. Drawing Transform Feedback - */ - zink_resource_buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT); } static void @@ -66,9 +52,9 @@ zink_emit_stream_output_targets(struct pipe_context *pctx) { struct zink_context *ctx = zink_context(pctx); struct zink_batch *batch = &ctx->batch; - VkBuffer buffers[PIPE_MAX_SO_OUTPUTS] = {0}; - VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {0}; - VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS] = {0}; + VkBuffer buffers[PIPE_MAX_SO_BUFFERS] = {0}; + VkDeviceSize buffer_offsets[PIPE_MAX_SO_BUFFERS] = {0}; + VkDeviceSize buffer_sizes[PIPE_MAX_SO_BUFFERS] = {0}; for (unsigned i = 0; i < ctx->num_so_targets; i++) { struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i]; @@ -84,12 +70,15 @@ zink_emit_stream_output_targets(struct pipe_context *pctx) /* resource has been rebound */ t->counter_buffer_valid = false; buffers[i] = res->obj->buffer; - zink_resource_buffer_barrier(ctx, zink_resource(t->base.buffer), - VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT); zink_batch_reference_resource_rw(batch, res, true); buffer_offsets[i] = t->base.buffer_offset; buffer_sizes[i] = t->base.buffer_size; res->so_valid = true; + if (!ctx->unordered_blitting) { + res->obj->unordered_read = res->obj->unordered_write = false; + res->obj->access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; + } util_range_add(t->base.buffer, &res->valid_buffer_range, t->base.buffer_offset, t->base.buffer_offset + t->base.buffer_size); } @@ -104,7 +93,9 @@ ALWAYS_INLINE static void check_buffer_barrier(struct zink_context *ctx, struct pipe_resource *pres, VkAccessFlags flags, VkPipelineStageFlags pipeline) { struct zink_resource *res = zink_resource(pres); - zink_resource_buffer_barrier(ctx, res, flags, pipeline); + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, flags, pipeline); + if (!ctx->unordered_blitting) + res->obj->unordered_read = false; } ALWAYS_INLINE static void @@ -122,50 +113,70 @@ barrier_draw_buffers(struct zink_context *ctx, const struct pipe_draw_info *dinf } } -template <zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_vertex_input HAS_VERTEX_INPUT> +static void +bind_vertex_buffers_dgc(struct zink_context *ctx) +{ + struct zink_vertex_elements_state *elems = ctx->element_state; + + ctx->vertex_buffers_dirty = false; + if (!elems->hw_state.num_bindings) + return; + for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) { + struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->hw_state.binding_map[i]; + assert(vb); + VkBindVertexBufferIndirectCommandNV *ptr; + VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV, (void**)&ptr); + token->vertexBindingUnit = ctx->element_state->hw_state.binding_map[i]; + if (vb->buffer.resource) { + struct zink_resource *res = zink_resource(vb->buffer.resource); + assert(res->obj->bda); + ptr->bufferAddress = res->obj->bda + vb->buffer_offset; + ptr->size = res->base.b.width0; + ptr->stride = ctx->element_state->hw_state.b.strides[i]; + } else { + ptr->bufferAddress = 0; + ptr->size = 0; + ptr->stride = 0; + } + } +} + +template <zink_dynamic_state DYNAMIC_STATE> static void zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx) { VkBuffer buffers[PIPE_MAX_ATTRIBS]; VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS]; - VkDeviceSize buffer_strides[PIPE_MAX_ATTRIBS]; struct zink_vertex_elements_state *elems = ctx->element_state; struct zink_screen *screen = zink_screen(ctx->base.screen); - if (!elems->hw_state.num_bindings) - return; - for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) { - const unsigned buffer_id = ctx->element_state->binding_map[i]; - struct pipe_vertex_buffer *vb = ctx->vertex_buffers + buffer_id; + struct pipe_vertex_buffer *vb = ctx->vertex_buffers + elems->hw_state.binding_map[i]; assert(vb); if (vb->buffer.resource) { - buffers[i] = ctx->vbufs[buffer_id]; - assert(buffers[i]); - if (HAS_VERTEX_INPUT) - elems->hw_state.dynbindings[i].stride = vb->stride; - buffer_offsets[i] = ctx->vbuf_offsets[buffer_id]; - buffer_strides[i] = vb->stride; - zink_batch_resource_usage_set(&ctx->batch, zink_resource(vb->buffer.resource), false); + struct zink_resource *res = zink_resource(vb->buffer.resource); + assert(res->obj->buffer); + buffers[i] = res->obj->buffer; + buffer_offsets[i] = vb->buffer_offset; } else { buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; buffer_offsets[i] = 0; - buffer_strides[i] = 0; - if (HAS_VERTEX_INPUT) - elems->hw_state.dynbindings[i].stride = 0; } } - if (HAS_DYNAMIC_STATE && !HAS_VERTEX_INPUT) - VKCTX(CmdBindVertexBuffers2EXT)(batch->state->cmdbuf, 0, - elems->hw_state.num_bindings, - buffers, buffer_offsets, NULL, buffer_strides); - else + if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && + DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && + DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT) { + if (elems->hw_state.num_bindings) + VKCTX(CmdBindVertexBuffers2)(batch->state->cmdbuf, 0, + elems->hw_state.num_bindings, + buffers, buffer_offsets, NULL, elems->hw_state.b.strides); + } else if (elems->hw_state.num_bindings) VKSCR(CmdBindVertexBuffers)(batch->state->cmdbuf, 0, elems->hw_state.num_bindings, buffers, buffer_offsets); - if (HAS_VERTEX_INPUT) + if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2 || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT) VKCTX(CmdSetVertexInputEXT)(batch->state->cmdbuf, elems->hw_state.num_bindings, elems->hw_state.dynbindings, elems->hw_state.num_attribs, elems->hw_state.dynattribs); @@ -173,72 +184,53 @@ zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx) ctx->vertex_buffers_dirty = false; } -static void -update_gfx_program(struct zink_context *ctx) +ALWAYS_INLINE static void +update_drawid(struct zink_context *ctx, unsigned draw_id) { - if (ctx->last_vertex_stage_dirty) { - enum pipe_shader_type pstage = pipe_shader_type_from_mesa(ctx->last_vertex_stage->nir->info.stage); - ctx->dirty_shader_stages |= BITFIELD_BIT(pstage); - memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base, - &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base, - sizeof(struct zink_vs_key_base)); - ctx->last_vertex_stage_dirty = false; - } - unsigned bits = BITFIELD_MASK(PIPE_SHADER_COMPUTE); - if (ctx->gfx_dirty) { - struct zink_gfx_program *prog = NULL; - - struct hash_table *ht = &ctx->program_cache[ctx->shader_stages >> 2]; - const uint32_t hash = ctx->gfx_hash; - struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); - if (entry) { - prog = (struct zink_gfx_program*)entry->data; - u_foreach_bit(stage, prog->stages_present & ~ctx->dirty_shader_stages) - ctx->gfx_pipeline_state.modules[stage] = prog->modules[stage]->shader; - } else { - ctx->dirty_shader_stages |= bits; - prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.vertices_per_patch + 1); - _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); - } - zink_update_gfx_program(ctx, prog); - if (prog && prog != ctx->curr_program) - zink_batch_reference_program(&ctx->batch, &prog->base); - if (ctx->curr_program) - ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; - ctx->curr_program = prog; - ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; - ctx->gfx_dirty = false; - } else if (ctx->dirty_shader_stages & bits) { - zink_update_gfx_program(ctx, ctx->curr_program); - } - ctx->dirty_shader_stages &= ~bits; + VKCTX(CmdPushConstants)(ctx->batch.state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, + offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned), + &draw_id); } -static bool -line_width_needed(enum pipe_prim_type reduced_prim, - unsigned polygon_mode) +static void +update_drawid_dgc(struct zink_context *ctx, unsigned draw_id) { - switch (reduced_prim) { - case PIPE_PRIM_POINTS: - return false; - - case PIPE_PRIM_LINES: - return true; - - case PIPE_PRIM_TRIANGLES: - return polygon_mode == VK_POLYGON_MODE_LINE; - - default: - unreachable("unexpected reduced prim"); - } + uint32_t *ptr; + VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr); + token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, draw_id); + token->pushconstantSize = sizeof(unsigned); + *ptr = draw_id; } ALWAYS_INLINE static void -update_drawid(struct zink_context *ctx, unsigned draw_id) +draw_indexed_dgc_need_index_buffer_unref(struct zink_context *ctx, + const struct pipe_draw_info *dinfo, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws, + unsigned draw_id, + bool needs_drawid) { - VKCTX(CmdPushConstants)(ctx->batch.state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT, - offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned), - &draw_id); + if (dinfo->increment_draw_id && needs_drawid) { + for (unsigned i = 0; i < num_draws; i++) { + update_drawid_dgc(ctx, draw_id); + VkDrawIndexedIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr); + *ptr = cmd; + draw_id++; + } + } else { + if (needs_drawid) + update_drawid_dgc(ctx, draw_id); + for (unsigned i = 0; i < num_draws; i++) { + VkDrawIndexedIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr); + *ptr = cmd; + } + } } ALWAYS_INLINE static void @@ -269,6 +261,37 @@ draw_indexed_need_index_buffer_unref(struct zink_context *ctx, } } +ALWAYS_INLINE static void +draw_indexed_dgc(struct zink_context *ctx, + const struct pipe_draw_info *dinfo, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws, + unsigned draw_id, + bool needs_drawid) +{ + if (dinfo->increment_draw_id && needs_drawid) { + for (unsigned i = 0; i < num_draws; i++) { + update_drawid_dgc(ctx, draw_id); + VkDrawIndexedIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr); + *ptr = cmd; + draw_id++; + } + } else { + if (needs_drawid) + update_drawid_dgc(ctx, draw_id); + for (unsigned i = 0; i < num_draws; i++) { + VkDrawIndexedIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr); + *ptr = cmd; + } + } +} + template <zink_multidraw HAS_MULTIDRAW> ALWAYS_INLINE static void draw_indexed(struct zink_context *ctx, @@ -304,6 +327,37 @@ draw_indexed(struct zink_context *ctx, } } +ALWAYS_INLINE static void +draw_dgc(struct zink_context *ctx, + const struct pipe_draw_info *dinfo, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws, + unsigned draw_id, + bool needs_drawid) +{ + if (dinfo->increment_draw_id && needs_drawid) { + for (unsigned i = 0; i < num_draws; i++) { + update_drawid_dgc(ctx, draw_id); + VkDrawIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV, (void**)&ptr); + *ptr = cmd; + draw_id++; + } + } else { + if (needs_drawid) + update_drawid_dgc(ctx, draw_id); + for (unsigned i = 0; i < num_draws; i++) { + VkDrawIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV, (void**)&ptr); + *ptr = cmd; + } + } +} + template <zink_multidraw HAS_MULTIDRAW> ALWAYS_INLINE static void draw(struct zink_context *ctx, @@ -335,131 +389,107 @@ draw(struct zink_context *ctx, } } -ALWAYS_INLINE static VkPipelineStageFlags -find_pipeline_bits(uint32_t *mask) +template <zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED> +static bool +update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum mesa_prim mode, bool can_dgc) { - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) { - if (mask[i]) { - return zink_pipeline_flags_from_pipe_stage((enum pipe_shader_type)i); - } + VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline; + const struct zink_screen *screen = zink_screen(ctx->base.screen); + bool shaders_changed = ctx->gfx_dirty || ctx->dirty_gfx_stages; + if (screen->optimal_keys && !ctx->is_generated_gs_bound) + zink_gfx_program_update_optimal(ctx); + else + zink_gfx_program_update(ctx); + bool pipeline_changed = false; + VkPipeline pipeline = VK_NULL_HANDLE; + if (!ctx->curr_program->base.uses_shobj) { + if (screen->info.have_EXT_graphics_pipeline_library) + pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode); + else + pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode); } - return 0; -} - -static void -update_barriers(struct zink_context *ctx, bool is_compute) -{ - if (!ctx->need_barriers[is_compute]->entries) - return; - struct set *need_barriers = ctx->need_barriers[is_compute]; - ctx->barrier_set_idx[is_compute] = !ctx->barrier_set_idx[is_compute]; - ctx->need_barriers[is_compute] = &ctx->update_barriers[is_compute][ctx->barrier_set_idx[is_compute]]; - set_foreach(need_barriers, he) { - struct zink_resource *res = (struct zink_resource *)he->key; - VkPipelineStageFlags pipeline = 0; - VkAccessFlags access = 0; - if (res->bind_count[is_compute]) { - if (res->write_bind_count[is_compute]) - access |= VK_ACCESS_SHADER_WRITE_BIT; - if (res->write_bind_count[is_compute] != res->bind_count[is_compute]) { - unsigned bind_count = res->bind_count[is_compute] - res->write_bind_count[is_compute]; - if (res->obj->is_buffer) { - if (res->ubo_bind_count[is_compute]) { - access |= VK_ACCESS_UNIFORM_READ_BIT; - bind_count -= res->ubo_bind_count[is_compute]; - } - if (!is_compute && res->vbo_bind_mask) { - access |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - pipeline |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - bind_count -= util_bitcount(res->vbo_bind_mask); - } - bind_count -= res->so_bind_count; - } - if (bind_count) - access |= VK_ACCESS_SHADER_READ_BIT; - } - if (is_compute) - pipeline = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - else if (!pipeline) { - if (res->ubo_bind_count[0]) - pipeline |= find_pipeline_bits(res->ubo_bind_mask); - if (!pipeline) - pipeline |= find_pipeline_bits(res->ssbo_bind_mask); - if (!pipeline) - pipeline |= find_pipeline_bits(res->sampler_binds); - if (!pipeline) //must be a shader image - pipeline = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - } - if (res->base.b.target == PIPE_BUFFER) - zink_resource_buffer_barrier(ctx, res, access, pipeline); - else { - VkImageLayout layout = zink_descriptor_util_image_layout_eval(res, is_compute); - if (layout != res->layout) - zink_resource_image_barrier(ctx, res, layout, access, pipeline); + if (pipeline) { + pipeline_changed = prev_pipeline != pipeline; + if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw) { + ctx->dgc.last_prog = ctx->curr_program; + if (unlikely(can_dgc && screen->info.nv_dgc_props.maxGraphicsShaderGroupCount == 1)) { + VkBindShaderGroupIndirectCommandNV *ptr; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV, (void**)&ptr); + util_dynarray_append(&ctx->dgc.pipelines, VkPipeline, pipeline); + /* zero-indexed -> base + group + num_pipelines-1 = base + num_pipelines */ + ptr->groupIndex = util_dynarray_num_elements(&ctx->dgc.pipelines, VkPipeline) + 1; + } else { + VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } - /* always barrier on draw if this resource has either multiple image write binds or - * image write binds and image read binds - */ - if (res->write_bind_count[is_compute] && res->bind_count[is_compute] > 1) - _mesa_set_add_pre_hashed(ctx->need_barriers[is_compute], he->hash, res); } - _mesa_set_remove(need_barriers, he); - if (!need_barriers->entries) - break; + ctx->shobj_draw = false; + } else { + if (BATCH_CHANGED || shaders_changed || !ctx->shobj_draw) { + VkShaderStageFlagBits stages[] = { + VK_SHADER_STAGE_VERTEX_BIT, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, + VK_SHADER_STAGE_GEOMETRY_BIT, + VK_SHADER_STAGE_FRAGMENT_BIT, + }; + /* always rebind all stages */ + VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects); + VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE); + VKCTX(CmdSetTessellationDomainOriginEXT)(bs->cmdbuf, VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT); + VKCTX(CmdSetSampleLocationsEnableEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.sample_locations_enabled); + VKCTX(CmdSetRasterizationStreamEXT)(bs->cmdbuf, 0); + } + ctx->shobj_draw = true; } + return pipeline_changed; } -template <bool BATCH_CHANGED> -static bool -update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum pipe_prim_type mode) +static enum mesa_prim +zink_prim_type(const struct zink_context *ctx, + const struct pipe_draw_info *dinfo) { - VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline; - update_gfx_program(ctx); - VkPipeline pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode); - bool pipeline_changed = prev_pipeline != pipeline; - if (BATCH_CHANGED || pipeline_changed) - VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - return pipeline_changed; + if (ctx->gfx_pipeline_state.shader_rast_prim != MESA_PRIM_COUNT) + return ctx->gfx_pipeline_state.shader_rast_prim; + + return u_reduced_prim((enum mesa_prim)dinfo->mode); } -static bool -hack_conditional_render(struct pipe_context *pctx, - const struct pipe_draw_info *dinfo, - unsigned drawid_offset, - const struct pipe_draw_indirect_info *dindirect, - const struct pipe_draw_start_count_bias *draws, - unsigned num_draws) +static enum mesa_prim +zink_rast_prim(const struct zink_context *ctx, + const struct pipe_draw_info *dinfo) { - struct zink_context *ctx = zink_context(pctx); - struct zink_batch_state *bs = ctx->batch.state; - static bool warned; - if (!warned) { - fprintf(stderr, "ZINK: warning, this is cpu-based conditional rendering, say bye-bye to fps\n"); - warned = true; - } - if (!zink_check_conditional_render(ctx)) - return false; - if (bs != ctx->batch.state) { - bool prev = ctx->render_condition_active; - ctx->render_condition_active = false; - zink_select_draw_vbo(ctx); - pctx->draw_vbo(pctx, dinfo, drawid_offset, dindirect, draws, num_draws); - ctx->render_condition_active = prev; - return false; + enum mesa_prim prim_type = zink_prim_type(ctx, dinfo); + assert(prim_type != MESA_PRIM_COUNT); + + if (prim_type == MESA_PRIM_TRIANGLES && + ctx->rast_state->base.fill_front != PIPE_POLYGON_MODE_FILL) { + switch(ctx->rast_state->base.fill_front) { + case PIPE_POLYGON_MODE_POINT: + return MESA_PRIM_POINTS; + case PIPE_POLYGON_MODE_LINE: + return MESA_PRIM_LINES; + default: + unreachable("unexpected polygon mode"); + } } - return true; + + return prim_type; } -template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2, - zink_dynamic_vertex_input HAS_VERTEX_INPUT, bool BATCH_CHANGED> +template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED, bool DRAW_STATE> void -zink_draw_vbo(struct pipe_context *pctx, - const struct pipe_draw_info *dinfo, - unsigned drawid_offset, - const struct pipe_draw_indirect_info *dindirect, - const struct pipe_draw_start_count_bias *draws, - unsigned num_draws) +zink_draw(struct pipe_context *pctx, + const struct pipe_draw_info *dinfo, + unsigned drawid_offset, + const struct pipe_draw_indirect_info *dindirect, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws, + struct pipe_vertex_state *vstate, + uint32_t partial_velem_mask) { + if (!dindirect && (!draws[0].count || !dinfo->instance_count)) + return; + struct zink_context *ctx = zink_context(pctx); struct zink_screen *screen = zink_screen(pctx->screen); struct zink_rasterizer_state *rast_state = ctx->rast_state; @@ -468,58 +498,31 @@ zink_draw_vbo(struct pipe_context *pctx, struct zink_so_target *so_target = dindirect && dindirect->count_from_stream_output ? zink_so_target(dindirect->count_from_stream_output) : NULL; - VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS]; - VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS]; + VkBuffer counter_buffers[PIPE_MAX_SO_BUFFERS]; + VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_BUFFERS]; bool need_index_buffer_unref = false; bool mode_changed = ctx->gfx_pipeline_state.gfx_prim_mode != dinfo->mode; bool reads_drawid = ctx->shader_reads_drawid; bool reads_basevertex = ctx->shader_reads_basevertex; unsigned work_count = ctx->batch.work_count; - enum pipe_prim_type mode = (enum pipe_prim_type)dinfo->mode; - - if (unlikely(!screen->info.have_EXT_conditional_rendering)) { - if (!hack_conditional_render(pctx, dinfo, drawid_offset, dindirect, draws, num_draws)) - return; - } + enum mesa_prim mode = (enum mesa_prim)dinfo->mode; - zink_flush_memory_barrier(ctx, false); - update_barriers(ctx, false); + if (ctx->memory_barrier && !ctx->blitting) + zink_flush_memory_barrier(ctx, false); - if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter)) { + if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter && !ctx->blitting)) { ctx->buffer_rebind_counter = screen->buffer_rebind_counter; zink_rebind_all_buffers(ctx); } - uint8_t vertices_per_patch = ctx->gfx_pipeline_state.patch_vertices ? ctx->gfx_pipeline_state.patch_vertices - 1 : 0; - if (ctx->gfx_pipeline_state.vertices_per_patch != vertices_per_patch) - ctx->gfx_pipeline_state.dirty = true; - bool drawid_broken = false; - if (reads_drawid && (!dindirect || !dindirect->buffer)) - drawid_broken = (drawid_offset != 0 || - (!HAS_MULTIDRAW && num_draws > 1) || - (HAS_MULTIDRAW && num_draws > 1 && !dinfo->increment_draw_id)); - if (drawid_broken != zink_get_last_vertex_key(ctx)->push_drawid) - zink_set_last_vertex_key(ctx)->push_drawid = drawid_broken; - ctx->gfx_pipeline_state.vertices_per_patch = vertices_per_patch; - if (mode_changed) { - bool points_changed = false; - if (mode == PIPE_PRIM_POINTS) { - ctx->gfx_pipeline_state.has_points++; - points_changed = true; - } else if (ctx->gfx_pipeline_state.gfx_prim_mode == PIPE_PRIM_POINTS) { - ctx->gfx_pipeline_state.has_points--; - points_changed = true; - } - if (points_changed && ctx->rast_state->base.point_quad_rasterization) - zink_set_fs_point_coord_key(ctx); - } - ctx->gfx_pipeline_state.gfx_prim_mode = mode; - if (!HAS_DYNAMIC_STATE2) { - if (ctx->gfx_pipeline_state.primitive_restart != dinfo->primitive_restart) - ctx->gfx_pipeline_state.dirty = true; - ctx->gfx_pipeline_state.primitive_restart = dinfo->primitive_restart; + if (unlikely(ctx->image_rebind_counter < screen->image_rebind_counter && !ctx->blitting)) { + ctx->image_rebind_counter = screen->image_rebind_counter; + zink_rebind_all_images(ctx); } + if (mode_changed) + zink_flush_dgc_if_enabled(ctx); + unsigned index_offset = 0; unsigned index_size = dinfo->index_size; struct pipe_resource *index_buffer = NULL; @@ -529,64 +532,190 @@ zink_draw_vbo(struct pipe_context *pctx, debug_printf("util_upload_index_buffer() failed\n"); return; } - zink_batch_reference_resource_move(batch, zink_resource(index_buffer)); + /* this will have extra refs from tc */ + if (screen->threaded) + zink_batch_reference_resource_move(batch, zink_resource(index_buffer)); + else + zink_batch_reference_resource(batch, zink_resource(index_buffer)); } else { index_buffer = dinfo->index.resource; zink_batch_reference_resource_rw(batch, zink_resource(index_buffer), false); } assert(index_size <= 4 && index_size != 3); assert(index_size != 1 || screen->info.have_EXT_index_type_uint8); - const VkIndexType index_type[3] = { - VK_INDEX_TYPE_UINT8_EXT, - VK_INDEX_TYPE_UINT16, - VK_INDEX_TYPE_UINT32, - }; - struct zink_resource *res = zink_resource(index_buffer); - VKCTX(CmdBindIndexBuffer)(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]); } + ctx->was_line_loop = dinfo->was_line_loop; + bool have_streamout = !!ctx->num_so_targets; if (have_streamout) { - if (ctx->xfb_barrier) - zink_emit_xfb_counter_barrier(ctx); - if (ctx->dirty_so_targets) - zink_emit_stream_output_targets(pctx); + zink_emit_xfb_counter_barrier(ctx); + if (ctx->dirty_so_targets) { + /* have to loop here and below because barriers must be emitted out of renderpass, + * but xfb buffers can't be bound before the renderpass is active to avoid + * breaking from recursion + */ + for (unsigned i = 0; i < ctx->num_so_targets; i++) { + struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i]; + if (t) { + struct zink_resource *res = zink_resource(t->base.buffer); + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, + VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT); + if (!ctx->unordered_blitting) + res->obj->unordered_read = res->obj->unordered_write = false; + } + } + } } - if (so_target) - zink_emit_xfb_vertex_input_barrier(ctx, zink_resource(so_target->base.buffer)); - barrier_draw_buffers(ctx, dinfo, dindirect, index_buffer); + /* this may re-emit draw buffer barriers, but such synchronization is harmless */ + if (!ctx->blitting) + zink_update_barriers(ctx, false, index_buffer, dindirect ? dindirect->buffer : NULL, dindirect ? dindirect->indirect_draw_count : NULL); + + bool can_dgc = false; + if (unlikely(zink_debug & ZINK_DEBUG_DGC)) + can_dgc = !so_target && !ctx->num_so_targets && (!dindirect || !dindirect->buffer); + + /* ensure synchronization between doing streamout with counter buffer + * and using counter buffer for indirect draw + */ + if (so_target && so_target->counter_buffer_valid) { + struct zink_resource *res = zink_resource(so_target->counter_buffer); + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, + VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); + if (!ctx->unordered_blitting) + res->obj->unordered_read = false; + } + + zink_query_update_gs_states(ctx); + + if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) { + zink_batch_no_rp(ctx); + VkMemoryBarrier mb; + mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mb.pNext = NULL; + mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, 1, &mb, 0, NULL, 0, NULL); + } + + zink_batch_rp(ctx); + /* check dead swapchain */ + if (unlikely(!ctx->batch.in_rp)) + return; if (BATCH_CHANGED) zink_update_descriptor_refs(ctx, false); - zink_batch_rp(ctx); - bool pipeline_changed = false; - if (!HAS_DYNAMIC_STATE) - pipeline_changed = update_gfx_pipeline<BATCH_CHANGED>(ctx, batch->state, mode); + /* these must be after renderpass start to avoid issues with recursion */ + bool drawid_broken = false; + if (reads_drawid && (!dindirect || !dindirect->buffer)) + drawid_broken = (drawid_offset != 0 || + (!HAS_MULTIDRAW && num_draws > 1) || + (HAS_MULTIDRAW && num_draws > 1 && !dinfo->increment_draw_id)); + if (drawid_broken != zink_get_last_vertex_key(ctx)->push_drawid) + zink_set_last_vertex_key(ctx)->push_drawid = drawid_broken; + + bool rast_prim_changed = false; + bool prim_changed = false; + bool rast_state_changed = ctx->rast_state_changed; + if (mode_changed || ctx->gfx_pipeline_state.modules_changed || + rast_state_changed) { + enum mesa_prim rast_prim = zink_rast_prim(ctx, dinfo); + if (rast_prim != ctx->gfx_pipeline_state.rast_prim) { + bool points_changed = + (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) != + (rast_prim == MESA_PRIM_POINTS); + + prim_changed = ctx->gfx_pipeline_state.rast_prim != rast_prim; + + static bool rect_warned = false; + if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3 && rast_prim == MESA_PRIM_LINES && !rect_warned && + (VkLineRasterizationModeEXT)rast_state->hw_state.line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT) { + if (screen->info.line_rast_feats.rectangularLines) + rect_warned = true; + else + warn_missing_feature(rect_warned, "rectangularLines"); + } + + ctx->gfx_pipeline_state.rast_prim = rast_prim; + rast_prim_changed = true; + + if (points_changed && ctx->rast_state->base.point_quad_rasterization) + zink_set_fs_point_coord_key(ctx); + } + } + ctx->gfx_pipeline_state.gfx_prim_mode = mode; + + if ((mode_changed || prim_changed || rast_state_changed || ctx->gfx_pipeline_state.modules_changed)) { + zink_set_primitive_emulation_keys(ctx); + } + + if (index_size) { + const VkIndexType index_type[3] = { + VK_INDEX_TYPE_UINT8_EXT, + VK_INDEX_TYPE_UINT16, + VK_INDEX_TYPE_UINT32, + }; + struct zink_resource *res = zink_resource(index_buffer); + if (unlikely(can_dgc)) { + VkBindIndexBufferIndirectCommandNV *ptr; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV, (void**)&ptr); + ptr->bufferAddress = res->obj->bda + index_offset; + ptr->size = res->base.b.width0; + ptr->indexType = index_type[index_size >> 1]; + } else { + VKCTX(CmdBindIndexBuffer)(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]); + } + } + if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE2) { + if (ctx->gfx_pipeline_state.dyn_state2.primitive_restart != dinfo->primitive_restart) + ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dyn_state2.primitive_restart = dinfo->primitive_restart; + } + + if (have_streamout && ctx->dirty_so_targets) + zink_emit_stream_output_targets(pctx); + + bool pipeline_changed = update_gfx_pipeline<DYNAMIC_STATE, BATCH_CHANGED>(ctx, batch->state, mode, can_dgc); - if (BATCH_CHANGED || ctx->vp_state_changed || (!HAS_DYNAMIC_STATE && pipeline_changed)) { + if (BATCH_CHANGED || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) { VkViewport viewports[PIPE_MAX_VIEWPORTS]; for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) { VkViewport viewport = { ctx->vp_state.viewport_states[i].translate[0] - ctx->vp_state.viewport_states[i].scale[0], ctx->vp_state.viewport_states[i].translate[1] - ctx->vp_state.viewport_states[i].scale[1], - ctx->vp_state.viewport_states[i].scale[0] * 2, + MAX2(ctx->vp_state.viewport_states[i].scale[0] * 2, 1), ctx->vp_state.viewport_states[i].scale[1] * 2, - ctx->rast_state->base.clip_halfz ? - ctx->vp_state.viewport_states[i].translate[2] : - ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2], - ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2] + CLAMP(ctx->rast_state->base.clip_halfz ? + ctx->vp_state.viewport_states[i].translate[2] : + ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2], + 0, 1), + CLAMP(ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2], + 0, 1) }; + if (!ctx->rast_state->base.half_pixel_center) { + /* magic constant value from dxvk */ + float cf = 0.5f - (1.0f / 128.0f); + viewport.x += cf; + if (viewport.height < 0) + viewport.y += cf; + else + viewport.y -= cf; + } viewports[i] = viewport; } - if (HAS_DYNAMIC_STATE) - VKCTX(CmdSetViewportWithCountEXT)(batch->state->cmdbuf, ctx->vp_state.num_viewports, viewports); + if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) + VKCTX(CmdSetViewportWithCount)(batch->state->cmdbuf, ctx->vp_state.num_viewports, viewports); else VKCTX(CmdSetViewport)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, viewports); } - if (BATCH_CHANGED || ctx->scissor_changed || ctx->vp_state_changed || (!HAS_DYNAMIC_STATE && pipeline_changed)) { + if (BATCH_CHANGED || ctx->scissor_changed || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) { VkRect2D scissors[PIPE_MAX_VIEWPORTS]; if (ctx->rast_state->base.scissor) { for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) { @@ -603,8 +732,8 @@ zink_draw_vbo(struct pipe_context *pctx, scissors[i].extent.height = ctx->fb_state.height; } } - if (HAS_DYNAMIC_STATE) - VKCTX(CmdSetScissorWithCountEXT)(batch->state->cmdbuf, ctx->vp_state.num_viewports, scissors); + if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) + VKCTX(CmdSetScissorWithCount)(batch->state->cmdbuf, ctx->vp_state.num_viewports, scissors); else VKCTX(CmdSetScissor)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, scissors); } @@ -619,30 +748,27 @@ zink_draw_vbo(struct pipe_context *pctx, ctx->stencil_ref_changed = false; } - if (HAS_DYNAMIC_STATE && (BATCH_CHANGED || ctx->dsa_state_changed)) { - VKCTX(CmdSetDepthBoundsTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_bounds_test); + if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || ctx->dsa_state_changed)) { + VKCTX(CmdSetDepthBoundsTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_bounds_test); if (dsa_state->hw_state.depth_bounds_test) VKCTX(CmdSetDepthBounds)(batch->state->cmdbuf, dsa_state->hw_state.min_depth_bounds, dsa_state->hw_state.max_depth_bounds); - VKCTX(CmdSetDepthTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_test); - if (dsa_state->hw_state.depth_test) - VKCTX(CmdSetDepthCompareOpEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_compare_op); - VKCTX(CmdSetDepthWriteEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_write); - VKCTX(CmdSetStencilTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.stencil_test); + VKCTX(CmdSetDepthTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_test); + VKCTX(CmdSetDepthCompareOp)(batch->state->cmdbuf, dsa_state->hw_state.depth_compare_op); + VKCTX(CmdSetDepthWriteEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_write); + VKCTX(CmdSetStencilTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.stencil_test); if (dsa_state->hw_state.stencil_test) { - VKCTX(CmdSetStencilOpEXT)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, + VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.failOp, dsa_state->hw_state.stencil_front.passOp, dsa_state->hw_state.stencil_front.depthFailOp, dsa_state->hw_state.stencil_front.compareOp); - VKCTX(CmdSetStencilOpEXT)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, + VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.failOp, dsa_state->hw_state.stencil_back.passOp, dsa_state->hw_state.stencil_back.depthFailOp, dsa_state->hw_state.stencil_back.compareOp); - } - if (dsa_state->base.stencil[0].enabled) { if (dsa_state->base.stencil[1].enabled) { VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.writeMask); VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.writeMask); @@ -652,52 +778,97 @@ zink_draw_vbo(struct pipe_context *pctx, VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask); VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask); } + } else { + VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask); + VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask); + VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS); } } ctx->dsa_state_changed = false; - bool rast_state_changed = ctx->rast_state_changed; - if (HAS_DYNAMIC_STATE && (BATCH_CHANGED || rast_state_changed)) - VKCTX(CmdSetFrontFaceEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state1.front_face); - if ((BATCH_CHANGED || rast_state_changed) && - screen->info.have_EXT_line_rasterization && rast_state->base.line_stipple_enable) - VKCTX(CmdSetLineStippleEXT)(batch->state->cmdbuf, rast_state->base.line_stipple_factor, rast_state->base.line_stipple_pattern); - - if (BATCH_CHANGED || ctx->rast_state_changed || mode_changed) { - enum pipe_prim_type reduced_prim = u_reduced_prim(mode); - - bool depth_bias = false; - switch (reduced_prim) { - case PIPE_PRIM_POINTS: - depth_bias = rast_state->offset_point; - break; + if (BATCH_CHANGED || rast_state_changed) { + if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) { + VKCTX(CmdSetFrontFace)(batch->state->cmdbuf, (VkFrontFace)ctx->gfx_pipeline_state.dyn_state1.front_face); + VKCTX(CmdSetCullMode)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state1.cull_mode); + } - case PIPE_PRIM_LINES: - depth_bias = rast_state->offset_line; - break; + if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3) { + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE)) + VKCTX(CmdSetLineStippleEXT)(batch->state->cmdbuf, rast_state->base.line_stipple_factor, rast_state->base.line_stipple_pattern); + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLIP)) + VKCTX(CmdSetDepthClipEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.depth_clip); + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLAMP)) + VKCTX(CmdSetDepthClampEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.depth_clamp); + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_POLYGON)) + VKCTX(CmdSetPolygonModeEXT)(batch->state->cmdbuf, (VkPolygonMode)rast_state->hw_state.polygon_mode); + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_HALFZ)) + VKCTX(CmdSetDepthClipNegativeOneToOneEXT)(batch->state->cmdbuf, !rast_state->hw_state.clip_halfz); + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_PV)) + VKCTX(CmdSetProvokingVertexModeEXT)(batch->state->cmdbuf, + rast_state->hw_state.pv_last ? + VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT : + VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT); + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLIP)) + VKCTX(CmdSetLineRasterizationModeEXT)(batch->state->cmdbuf, rast_state->dynamic_line_mode); + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE_ON)) + VKCTX(CmdSetLineStippleEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.line_stipple_enable); + } + } + if ((BATCH_CHANGED || ctx->sample_mask_changed) && screen->have_full_ds3) { + VKCTX(CmdSetRasterizationSamplesEXT)(batch->state->cmdbuf, (VkSampleCountFlagBits)(ctx->gfx_pipeline_state.rast_samples + 1)); + VKCTX(CmdSetSampleMaskEXT)(batch->state->cmdbuf, (VkSampleCountFlagBits)(ctx->gfx_pipeline_state.rast_samples + 1), &ctx->gfx_pipeline_state.sample_mask); + ctx->sample_mask_changed = false; + } + if ((BATCH_CHANGED || ctx->blend_state_changed)) { + if (ctx->gfx_pipeline_state.blend_state) { + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_A2C)) + VKCTX(CmdSetAlphaToCoverageEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->alpha_to_coverage && + ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)); + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_A21)) + VKCTX(CmdSetAlphaToOneEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->alpha_to_one); + if (ctx->fb_state.nr_cbufs) { + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_ON)) + VKCTX(CmdSetColorBlendEnableEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.enables); + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_WRITE)) + VKCTX(CmdSetColorWriteMaskEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.wrmask); + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_EQ)) + VKCTX(CmdSetColorBlendEquationEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.eq); + } + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_LOGIC_ON)) + VKCTX(CmdSetLogicOpEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->logicop_enable); + if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_LOGIC)) + VKCTX(CmdSetLogicOpEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->logicop_func); + } + } + ctx->ds3_states = 0; - case PIPE_PRIM_TRIANGLES: - depth_bias = rast_state->offset_tri; - break; + if (BATCH_CHANGED || + /* only re-emit on non-batch change when actually drawing lines */ + ((ctx->line_width_changed || rast_prim_changed) && ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES)) { + VKCTX(CmdSetLineWidth)(batch->state->cmdbuf, rast_state->line_width); + ctx->line_width_changed = false; + } - default: - unreachable("unexpected reduced prim"); - } + if (BATCH_CHANGED || mode_changed || + ctx->gfx_pipeline_state.modules_changed || + rast_state_changed) { + bool depth_bias = + zink_prim_type(ctx, dinfo) == MESA_PRIM_TRIANGLES && + rast_state->offset_fill; - if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) { - if (screen->info.feats.features.wideLines || rast_state->line_width == 1.0f) - VKCTX(CmdSetLineWidth)(batch->state->cmdbuf, rast_state->line_width); - else - debug_printf("BUG: wide lines not supported, needs fallback!"); - } - if (depth_bias) - VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale); - else + if (depth_bias) { + if (rast_state->base.offset_units_unscaled) { + VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units * ctx->depth_bias_scale_factor, rast_state->offset_clamp, rast_state->offset_scale); + } else { + VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale); + } + } else { VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, 0.0f, 0.0f, 0.0f); + } } ctx->rast_state_changed = false; - if (HAS_DYNAMIC_STATE) { + if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) { if (ctx->sample_locations_changed) { VkSampleLocationsInfoEXT loc; zink_init_vk_sample_locations(ctx, &loc); @@ -706,46 +877,118 @@ zink_draw_vbo(struct pipe_context *pctx, ctx->sample_locations_changed = false; } - if ((BATCH_CHANGED || ctx->blend_state_changed) && - ctx->gfx_pipeline_state.blend_state->need_blend_constants) { + if (BATCH_CHANGED || ctx->blend_color_changed) { VKCTX(CmdSetBlendConstants)(batch->state->cmdbuf, ctx->blend_constants); } ctx->blend_state_changed = false; - - if (BATCH_CHANGED || ctx->vertex_buffers_dirty) - zink_bind_vertex_buffers<HAS_DYNAMIC_STATE, HAS_VERTEX_INPUT>(batch, ctx); - - zink_query_update_gs_states(ctx); + ctx->blend_color_changed = false; + + if (!DRAW_STATE) { + if (BATCH_CHANGED || ctx->vertex_buffers_dirty) { + if (unlikely(can_dgc)) + bind_vertex_buffers_dgc(ctx); + else if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || ctx->gfx_pipeline_state.uses_dynamic_stride) + zink_bind_vertex_buffers<DYNAMIC_STATE>(batch, ctx); + else + zink_bind_vertex_buffers<ZINK_NO_DYNAMIC_STATE>(batch, ctx); + } + } if (BATCH_CHANGED) { ctx->pipeline_changed[0] = false; zink_select_draw_vbo(ctx); } - if (HAS_DYNAMIC_STATE) { - update_gfx_pipeline<BATCH_CHANGED>(ctx, batch->state, mode); - if (BATCH_CHANGED || mode_changed) - VKCTX(CmdSetPrimitiveTopologyEXT)(batch->state->cmdbuf, zink_primitive_topology(mode)); - } + if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || mode_changed)) + VKCTX(CmdSetPrimitiveTopology)(batch->state->cmdbuf, zink_primitive_topology(mode)); - if (HAS_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->primitive_restart != dinfo->primitive_restart)) { - VKCTX(CmdSetPrimitiveRestartEnableEXT)(batch->state->cmdbuf, dinfo->primitive_restart); + if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->primitive_restart != dinfo->primitive_restart)) { + VKCTX(CmdSetPrimitiveRestartEnable)(batch->state->cmdbuf, dinfo->primitive_restart); ctx->primitive_restart = dinfo->primitive_restart; } + if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->rasterizer_discard_changed)) { + VKCTX(CmdSetRasterizerDiscardEnable)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard); + ctx->rasterizer_discard_changed = false; + } + if (zink_program_has_descriptors(&ctx->curr_program->base)) - screen->descriptors_update(ctx, false); + zink_descriptors_update(ctx, false); + + if (ctx->di.any_bindless_dirty && + /* some apps (d3dretrace) call MakeTextureHandleResidentARB randomly */ + zink_program_has_descriptors(&ctx->curr_program->base) && + ctx->curr_program->base.dd.bindless) + zink_descriptors_update_bindless(ctx); if (reads_basevertex) { unsigned draw_mode_is_indexed = index_size > 0; - VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT, - offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned), - &draw_mode_is_indexed); + if (unlikely(can_dgc)) { + uint32_t *ptr; + VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr); + token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed); + token->pushconstantSize = sizeof(unsigned); + *ptr = draw_mode_is_indexed; + } else { + VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, + offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned), + &draw_mode_is_indexed); + } + } + if (ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL] && + ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated) { + if (unlikely(can_dgc)) { + float *ptr; + VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr); + token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, default_inner_level); + token->pushconstantSize = sizeof(float) * 6; + memcpy(ptr, &ctx->tess_levels[0], sizeof(float) * 6); + } else { + VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, + offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6, + &ctx->tess_levels[0]); + } + } + + if (!screen->optimal_keys) { + if (zink_get_fs_key(ctx)->lower_line_stipple || + zink_get_gs_key(ctx)->lower_gl_point || + zink_get_fs_key(ctx)->lower_line_smooth) { + + assert(zink_get_gs_key(ctx)->lower_line_stipple == + zink_get_fs_key(ctx)->lower_line_stipple); + + assert(zink_get_gs_key(ctx)->lower_line_smooth == + zink_get_fs_key(ctx)->lower_line_smooth); + + float viewport_scale[2] = { + ctx->vp_state.viewport_states[0].scale[0], + ctx->vp_state.viewport_states[0].scale[1] + }; + VKCTX(CmdPushConstants)(batch->state->cmdbuf, + ctx->curr_program->base.layout, + VK_SHADER_STAGE_ALL_GRAPHICS, + offsetof(struct zink_gfx_push_constant, viewport_scale), + sizeof(float) * 2, &viewport_scale); + + uint32_t stipple = ctx->rast_state->base.line_stipple_pattern; + stipple |= ctx->rast_state->base.line_stipple_factor << 16; + VKCTX(CmdPushConstants)(batch->state->cmdbuf, + ctx->curr_program->base.layout, + VK_SHADER_STAGE_ALL_GRAPHICS, + offsetof(struct zink_gfx_push_constant, line_stipple_pattern), + sizeof(uint32_t), &stipple); + + if (ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.lower_line_smooth) { + float line_width = ctx->rast_state->base.line_width; + VKCTX(CmdPushConstants)(batch->state->cmdbuf, + ctx->curr_program->base.layout, + VK_SHADER_STAGE_ALL_GRAPHICS, + offsetof(struct zink_gfx_push_constant, line_width), + sizeof(uint32_t), &line_width); + } + } } - if (ctx->curr_program->shaders[PIPE_SHADER_TESS_CTRL] && ctx->curr_program->shaders[PIPE_SHADER_TESS_CTRL]->is_generated) - VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, - offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6, - &ctx->tess_levels[0]); if (have_streamout) { for (unsigned i = 0; i < ctx->num_so_targets; i++) { @@ -753,8 +996,10 @@ zink_draw_vbo(struct pipe_context *pctx, counter_buffers[i] = VK_NULL_HANDLE; if (t) { struct zink_resource *res = zink_resource(t->counter_buffer); - t->stride = ctx->last_vertex_stage->streamout.so_info.stride[i] * sizeof(uint32_t); + t->stride = ctx->last_vertex_stage->sinfo.stride[i]; zink_batch_reference_resource_rw(batch, res, true); + if (!ctx->unordered_blitting) + res->obj->unordered_read = res->obj->unordered_write = false; if (t->counter_buffer_valid) { counter_buffers[i] = res->obj->buffer; counter_buffer_offsets[i] = t->counter_buffer_offset; @@ -764,6 +1009,34 @@ zink_draw_vbo(struct pipe_context *pctx, VKCTX(CmdBeginTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets); } + bool marker = false; + if (unlikely(zink_tracing)) { + VkViewport viewport = { + ctx->vp_state.viewport_states[0].translate[0] - ctx->vp_state.viewport_states[0].scale[0], + ctx->vp_state.viewport_states[0].translate[1] - ctx->vp_state.viewport_states[0].scale[1], + MAX2(ctx->vp_state.viewport_states[0].scale[0] * 2, 1), + ctx->vp_state.viewport_states[0].scale[1] * 2, + CLAMP(ctx->rast_state->base.clip_halfz ? + ctx->vp_state.viewport_states[0].translate[2] : + ctx->vp_state.viewport_states[0].translate[2] - ctx->vp_state.viewport_states[0].scale[2], + 0, 1), + CLAMP(ctx->vp_state.viewport_states[0].translate[2] + ctx->vp_state.viewport_states[0].scale[2], + 0, 1) + }; + if (ctx->blitting) { + bool is_zs = util_format_is_depth_or_stencil(ctx->sampler_views[MESA_SHADER_FRAGMENT][0]->format); + marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "u_blitter(%s->%s, %dx%d)", + util_format_short_name(ctx->sampler_views[MESA_SHADER_FRAGMENT][0]->format), + util_format_short_name((is_zs ? ctx->fb_state.zsbuf : ctx->fb_state.cbufs[0])->format), + lround(viewport.width), lround(viewport.height)); + } else { + marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "draw(%u cbufs|%s, %dx%d)", + ctx->fb_state.nr_cbufs, + ctx->fb_state.zsbuf ? "zsbuf" : "", + lround(viewport.width), lround(viewport.height)); + } + } + bool needs_drawid = reads_drawid && zink_get_last_vertex_key(ctx)->push_drawid; work_count += num_draws; if (index_size > 0) { @@ -782,20 +1055,32 @@ zink_draw_vbo(struct pipe_context *pctx, } else VKCTX(CmdDrawIndexedIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride); } else { - if (need_index_buffer_unref) + if (unlikely(can_dgc)) { + if (need_index_buffer_unref) + draw_indexed_dgc_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); + else + draw_indexed_dgc(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); + } else if (need_index_buffer_unref) { draw_indexed_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); - else + } else { draw_indexed<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); + } } } else { if (so_target && screen->info.tf_props.transformFeedbackDraw) { - if (needs_drawid) - update_drawid(ctx, drawid_offset); - zink_batch_reference_resource_rw(batch, zink_resource(so_target->base.buffer), false); - zink_batch_reference_resource_rw(batch, zink_resource(so_target->counter_buffer), true); - VKCTX(CmdDrawIndirectByteCountEXT)(batch->state->cmdbuf, dinfo->instance_count, dinfo->start_instance, - zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0, - MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride)); + /* GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_api attempts a bogus xfb + * draw using a streamout target that has no data + * to avoid hanging the gpu, reject any such draws + */ + if (so_target->counter_buffer_valid) { + if (needs_drawid) + update_drawid(ctx, drawid_offset); + zink_batch_reference_resource_rw(batch, zink_resource(so_target->base.buffer), false); + zink_batch_reference_resource_rw(batch, zink_resource(so_target->counter_buffer), true); + VKCTX(CmdDrawIndirectByteCountEXT)(batch->state->cmdbuf, dinfo->instance_count, dinfo->start_instance, + zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0, + MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride)); + } } else if (dindirect && dindirect->buffer) { assert(num_draws == 1); if (needs_drawid) @@ -811,10 +1096,17 @@ zink_draw_vbo(struct pipe_context *pctx, } else VKCTX(CmdDrawIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride); } else { - draw<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); + if (unlikely(can_dgc)) + draw_dgc(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); + else + draw<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); } } + if (unlikely(zink_tracing)) + zink_cmd_debug_marker_end(ctx, batch->state->cmdbuf, marker); + + ctx->dgc.valid = can_dgc; if (have_streamout) { for (unsigned i = 0; i < ctx->num_so_targets; i++) { struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); @@ -826,14 +1118,115 @@ zink_draw_vbo(struct pipe_context *pctx, } VKCTX(CmdEndTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets); } + batch->has_work = true; batch->last_was_compute = false; ctx->batch.work_count = work_count; /* flush if there's >100k draws */ - if (unlikely(work_count >= 30000) || ctx->oom_flush) + if (!ctx->unordered_blitting && (unlikely(work_count >= 30000) || ctx->oom_flush)) pctx->flush(pctx, NULL, 0); } +template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED> +static void +zink_draw_vbo(struct pipe_context *pctx, + const struct pipe_draw_info *info, + unsigned drawid_offset, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ + zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, false>(pctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0); +} + +template <util_popcnt HAS_POPCNT> +static void +zink_vertex_state_mask(struct zink_context *ctx, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask) +{ + struct zink_vertex_state *zstate = (struct zink_vertex_state *)vstate; + VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf; + + if (partial_velem_mask == vstate->input.full_velem_mask) { + VKCTX(CmdSetVertexInputEXT)(cmdbuf, + zstate->velems.hw_state.num_bindings, zstate->velems.hw_state.dynbindings, + zstate->velems.hw_state.num_attribs, zstate->velems.hw_state.dynattribs); + return; + } + + VkVertexInputAttributeDescription2EXT dynattribs[PIPE_MAX_ATTRIBS]; + unsigned num_attribs = 0; + u_foreach_bit(elem, vstate->input.full_velem_mask & partial_velem_mask) { + unsigned idx = util_bitcount_fast<HAS_POPCNT>(vstate->input.full_velem_mask & BITFIELD_MASK(elem)); + dynattribs[num_attribs] = zstate->velems.hw_state.dynattribs[idx]; + dynattribs[num_attribs].location = num_attribs; + num_attribs++; + } + + VKCTX(CmdSetVertexInputEXT)(cmdbuf, + zstate->velems.hw_state.num_bindings, zstate->velems.hw_state.dynbindings, + num_attribs, dynattribs); +} + +template <util_popcnt HAS_POPCNT> +static void +zink_bind_vertex_state(struct zink_context *ctx, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask) +{ + struct zink_vertex_state *zstate = (struct zink_vertex_state *)vstate; + VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf; + if (!vstate->input.vbuffer.buffer.resource) + return; + + zink_vertex_state_mask<HAS_POPCNT>(ctx, vstate, partial_velem_mask); + + struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource); + zink_batch_resource_usage_set(&ctx->batch, res, false, true); + VkDeviceSize offset = vstate->input.vbuffer.buffer_offset; + if (unlikely(zink_debug & ZINK_DEBUG_DGC)) { + VkBindVertexBufferIndirectCommandNV *ptr; + VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV, (void**)&ptr); + token->vertexBindingUnit = 0; + token->vertexDynamicStride = VK_FALSE; + ptr->bufferAddress = res->obj->bda + offset; + ptr->size = res->base.b.width0; + ptr->stride = 0; + } else { + VKCTX(CmdBindVertexBuffers)(cmdbuf, 0, + zstate->velems.hw_state.num_bindings, + &res->obj->buffer, &offset); + } +} + +template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, util_popcnt HAS_POPCNT, bool BATCH_CHANGED> +static void +zink_draw_vertex_state(struct pipe_context *pctx, + struct pipe_vertex_state *vstate, + uint32_t partial_velem_mask, + struct pipe_draw_vertex_state_info info, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ + struct pipe_draw_info dinfo = {}; + + dinfo.mode = info.mode; + dinfo.index_size = 4; + dinfo.instance_count = 1; + dinfo.index.resource = vstate->input.indexbuf; + struct zink_context *ctx = zink_context(pctx); + struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource); + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT); + if (!ctx->unordered_blitting) + res->obj->unordered_read = false; + zink_bind_vertex_state<HAS_POPCNT>(ctx, vstate, partial_velem_mask); + + zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, true>(pctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask); + /* ensure ctx->vertex_buffers gets rebound on next non-vstate draw */ + ctx->vertex_buffers_dirty = true; + + if (info.take_vertex_state_ownership) + pipe_vertex_state_reference(&vstate, NULL); +} + template <bool BATCH_CHANGED> static void zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) @@ -842,21 +1235,51 @@ zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) struct zink_screen *screen = zink_screen(pctx->screen); struct zink_batch *batch = &ctx->batch; - update_barriers(ctx, true); - zink_flush_memory_barrier(ctx, true); + if (ctx->render_condition_active) + zink_start_conditional_render(ctx); - if (zink_program_has_descriptors(&ctx->curr_compute->base)) - screen->descriptors_update(ctx, true); + if (info->indirect) { + /* + VK_ACCESS_INDIRECT_COMMAND_READ_BIT specifies read access to indirect command data read as + part of an indirect build, trace, drawing or dispatching command. Such access occurs in the + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT pipeline stage. + + - Chapter 7. Synchronization and Cache Control + */ + check_buffer_barrier(ctx, info->indirect, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); + } - zink_program_update_compute_pipeline_state(ctx, ctx->curr_compute, info->block); + zink_update_barriers(ctx, true, NULL, info->indirect, NULL); + if (ctx->memory_barrier) + zink_flush_memory_barrier(ctx, true); + + if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) { + zink_batch_no_rp(ctx); + VkMemoryBarrier mb; + mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mb.pNext = NULL; + mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, 1, &mb, 0, NULL, 0, NULL); + } + + zink_program_update_compute_pipeline_state(ctx, ctx->curr_compute, info); VkPipeline prev_pipeline = ctx->compute_pipeline_state.pipeline; - VkPipeline pipeline = zink_get_compute_pipeline(screen, ctx->curr_compute, - &ctx->compute_pipeline_state); if (BATCH_CHANGED) { zink_update_descriptor_refs(ctx, true); - zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base); } + if (ctx->compute_dirty) { + /* update inlinable constants */ + zink_update_compute_program(ctx); + ctx->compute_dirty = false; + } + + VkPipeline pipeline = zink_get_compute_pipeline(screen, ctx->curr_compute, + &ctx->compute_pipeline_state); if (prev_pipeline != pipeline || BATCH_CHANGED) VKCTX(CmdBindPipeline)(batch->state->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); @@ -865,13 +1288,15 @@ zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) zink_select_launch_grid(ctx); } - if (BITSET_TEST(ctx->compute_stage->nir->info.system_values_read, SYSTEM_VALUE_WORK_DIM)) - VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_compute->base.layout, VK_SHADER_STAGE_COMPUTE_BIT, - offsetof(struct zink_cs_push_constant, work_dim), sizeof(uint32_t), - &info->work_dim); + if (zink_program_has_descriptors(&ctx->curr_compute->base)) + zink_descriptors_update(ctx, true); + if (ctx->di.any_bindless_dirty && ctx->curr_compute->base.dd.bindless) + zink_descriptors_update_bindless(ctx); batch->work_count++; zink_batch_no_rp(ctx); + if (!ctx->queries_disabled) + zink_resume_cs_query(ctx); if (info->indirect) { VKCTX(CmdDispatchIndirect)(batch->state->cmdbuf, zink_resource(info->indirect)->obj->buffer, info->indirect_offset); zink_batch_reference_resource_rw(batch, zink_resource(info->indirect), false); @@ -880,57 +1305,44 @@ zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) batch->has_work = true; batch->last_was_compute = true; /* flush if there's >100k computes */ - if (unlikely(ctx->batch.work_count >= 30000) || ctx->oom_flush) + if (!ctx->unordered_blitting && (unlikely(ctx->batch.work_count >= 30000) || ctx->oom_flush)) pctx->flush(pctx, NULL, 0); } -template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2, - zink_dynamic_vertex_input HAS_VERTEX_INPUT, bool BATCH_CHANGED> -static void -init_batch_changed_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2]) -{ - draw_vbo_array[HAS_MULTIDRAW][HAS_DYNAMIC_STATE][HAS_DYNAMIC_STATE2][HAS_VERTEX_INPUT][BATCH_CHANGED] = - zink_draw_vbo<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, HAS_VERTEX_INPUT, BATCH_CHANGED>; -} - -template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2, - zink_dynamic_vertex_input HAS_VERTEX_INPUT> +template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED> static void -init_vertex_input_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2]) +init_batch_changed_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2]) { - init_batch_changed_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, HAS_VERTEX_INPUT, false>(ctx, draw_vbo_array); - init_batch_changed_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, HAS_VERTEX_INPUT, true>(ctx, draw_vbo_array); + draw_vbo_array[HAS_MULTIDRAW][DYNAMIC_STATE][BATCH_CHANGED] = zink_draw_vbo<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED>; + draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][0][BATCH_CHANGED] = zink_draw_vertex_state<HAS_MULTIDRAW, DYNAMIC_STATE, POPCNT_NO, BATCH_CHANGED>; + draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][1][BATCH_CHANGED] = zink_draw_vertex_state<HAS_MULTIDRAW, DYNAMIC_STATE, POPCNT_YES, BATCH_CHANGED>; } -template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2> +template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE> static void -init_dynamic_state2_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2]) +init_dynamic_state_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2]) { - init_vertex_input_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, ZINK_NO_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array); - init_vertex_input_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, ZINK_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array); -} - -template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE> -static void -init_dynamic_state_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2]) -{ - init_dynamic_state2_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, ZINK_NO_DYNAMIC_STATE2>(ctx, draw_vbo_array); - init_dynamic_state2_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, ZINK_DYNAMIC_STATE2>(ctx, draw_vbo_array); + init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, false>(ctx, draw_vbo_array, draw_state_array); + init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, true>(ctx, draw_vbo_array, draw_state_array); } template <zink_multidraw HAS_MULTIDRAW> static void -init_multidraw_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2]) +init_multidraw_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2]) { - init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_NO_DYNAMIC_STATE>(ctx, draw_vbo_array); - init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE>(ctx, draw_vbo_array); + init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_NO_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array); + init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array); + init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE2>(ctx, draw_vbo_array, draw_state_array); + init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_VERTEX_INPUT2>(ctx, draw_vbo_array, draw_state_array); + init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE3>(ctx, draw_vbo_array, draw_state_array); + init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array, draw_state_array); } static void -init_all_draw_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2]) +init_all_draw_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2]) { - init_multidraw_functions<ZINK_NO_MULTIDRAW>(ctx, draw_vbo_array); - init_multidraw_functions<ZINK_MULTIDRAW>(ctx, draw_vbo_array); + init_multidraw_functions<ZINK_NO_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array); + init_multidraw_functions<ZINK_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array); } template <bool BATCH_CHANGED> @@ -959,32 +1371,50 @@ zink_invalid_draw_vbo(struct pipe_context *pipe, } static void +zink_invalid_draw_vertex_state(struct pipe_context *pipe, + struct pipe_vertex_state *vstate, + uint32_t partial_velem_mask, + struct pipe_draw_vertex_state_info info, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ + unreachable("vertex shader not bound"); +} + +static void zink_invalid_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) { unreachable("compute shader not bound"); } +#define STAGE_BASE 0 +#define STAGE_BASE_GS (BITFIELD_BIT(MESA_SHADER_GEOMETRY) >> 1) +#define STAGE_BASE_TES (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) >> 1) +#define STAGE_BASE_TES_GS ((BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)) >> 1) +#define STAGE_BASE_TCS_TES ((BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) >> 1) +#define STAGE_BASE_TCS_TES_GS ((BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)) >> 1) + template <unsigned STAGE_MASK> static uint32_t hash_gfx_program(const void *key) { const struct zink_shader **shaders = (const struct zink_shader**)key; - uint32_t base_hash = shaders[PIPE_SHADER_VERTEX]->hash ^ shaders[PIPE_SHADER_FRAGMENT]->hash; - if (STAGE_MASK == 0) //VS+FS + uint32_t base_hash = shaders[MESA_SHADER_VERTEX]->hash ^ shaders[MESA_SHADER_FRAGMENT]->hash; + if (STAGE_MASK == STAGE_BASE) //VS+FS return base_hash; - if (STAGE_MASK == 1) //VS+GS+FS - return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash; + if (STAGE_MASK == STAGE_BASE_GS) //VS+GS+FS + return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash; /*VS+TCS+FS isn't a thing */ /*VS+TCS+GS+FS isn't a thing */ - if (STAGE_MASK == 4) //VS+TES+FS - return base_hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash; - if (STAGE_MASK == 5) //VS+TES+GS+FS - return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash; - if (STAGE_MASK == 6) //VS+TCS+TES+FS - return base_hash ^ shaders[PIPE_SHADER_TESS_CTRL]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash; + if (STAGE_MASK == STAGE_BASE_TES) //VS+TES+FS + return base_hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash; + if (STAGE_MASK == STAGE_BASE_TES_GS) //VS+TES+GS+FS + return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash; + if (STAGE_MASK == STAGE_BASE_TCS_TES) //VS+TCS+TES+FS + return base_hash ^ shaders[MESA_SHADER_TESS_CTRL]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash; /* all stages */ - return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash ^ shaders[PIPE_SHADER_TESS_CTRL]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash; + return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash ^ shaders[MESA_SHADER_TESS_CTRL]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash; } template <unsigned STAGE_MASK> @@ -993,41 +1423,75 @@ equals_gfx_program(const void *a, const void *b) { const void **sa = (const void**)a; const void **sb = (const void**)b; - if (STAGE_MASK == 0) //VS+FS - return !memcmp(a, b, sizeof(void*) * 2); - if (STAGE_MASK == 1) //VS+GS+FS - return !memcmp(a, b, sizeof(void*) * 3); + STATIC_ASSERT(MESA_SHADER_VERTEX == 0); + STATIC_ASSERT(MESA_SHADER_TESS_CTRL == 1); + STATIC_ASSERT(MESA_SHADER_TESS_EVAL == 2); + STATIC_ASSERT(MESA_SHADER_GEOMETRY == 3); + STATIC_ASSERT(MESA_SHADER_FRAGMENT == 4); + if (STAGE_MASK == STAGE_BASE) //VS+FS + return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] && + sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT]; + if (STAGE_MASK == STAGE_BASE_GS) //VS+GS+FS + return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] && + !memcmp(&sa[MESA_SHADER_GEOMETRY], &sb[MESA_SHADER_GEOMETRY], sizeof(void*) * 2); /*VS+TCS+FS isn't a thing */ /*VS+TCS+GS+FS isn't a thing */ - if (STAGE_MASK == 4) //VS+TES+FS - return sa[PIPE_SHADER_TESS_EVAL] == sb[PIPE_SHADER_TESS_EVAL] && !memcmp(a, b, sizeof(void*) * 2); - if (STAGE_MASK == 5) //VS+TES+GS+FS - return sa[PIPE_SHADER_TESS_EVAL] == sb[PIPE_SHADER_TESS_EVAL] && !memcmp(a, b, sizeof(void*) * 3); - if (STAGE_MASK == 6) //VS+TCS+TES+FS - return !memcmp(&sa[PIPE_SHADER_TESS_CTRL], &sb[PIPE_SHADER_TESS_CTRL], sizeof(void*) * 2) && - !memcmp(a, b, sizeof(void*) * 2); + if (STAGE_MASK == STAGE_BASE_TES) //VS+TES+FS + return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] && + sa[MESA_SHADER_TESS_EVAL] == sb[MESA_SHADER_TESS_EVAL] && + sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT]; + if (STAGE_MASK == STAGE_BASE_TES_GS) //VS+TES+GS+FS + return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] && + !memcmp(&sa[MESA_SHADER_TESS_EVAL], &sb[MESA_SHADER_TESS_EVAL], sizeof(void*) * 3); + if (STAGE_MASK == STAGE_BASE_TCS_TES) //VS+TCS+TES+FS + return !memcmp(sa, sb, sizeof(void*) * 3) && + sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT]; /* all stages */ - return !memcmp(a, b, sizeof(void*) * ZINK_SHADER_COUNT); + return !memcmp(a, b, sizeof(void*) * ZINK_GFX_SHADER_COUNT); } extern "C" void zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen) { - pipe_draw_vbo_func draw_vbo_array[2][2][2][2] //multidraw, dynamic state, dynamic state2, dynamic vertex input, - [2]; //batch changed - init_all_draw_functions(ctx, draw_vbo_array); + pipe_draw_func draw_vbo_array[2][6] //multidraw, zink_dynamic_state + [2]; //batch changed + pipe_draw_vertex_state_func draw_state_array[2][6] //multidraw, zink_dynamic_state + [2][2]; //has_popcnt, batch changed + zink_dynamic_state dynamic; + if (screen->info.have_EXT_extended_dynamic_state) { + if (screen->info.have_EXT_extended_dynamic_state2) { + if (screen->info.have_EXT_extended_dynamic_state3) { + if (screen->info.have_EXT_vertex_input_dynamic_state) + dynamic = ZINK_DYNAMIC_VERTEX_INPUT; + else + dynamic = ZINK_DYNAMIC_STATE3; + } else { + if (screen->info.have_EXT_vertex_input_dynamic_state) + dynamic = ZINK_DYNAMIC_VERTEX_INPUT2; + else + dynamic = ZINK_DYNAMIC_STATE2; + } + } else { + dynamic = ZINK_DYNAMIC_STATE; + } + } else { + dynamic = ZINK_NO_DYNAMIC_STATE; + } + init_all_draw_functions(ctx, draw_vbo_array, draw_state_array); memcpy(ctx->draw_vbo, &draw_vbo_array[screen->info.have_EXT_multi_draw] - [screen->info.have_EXT_extended_dynamic_state] - [screen->info.have_EXT_extended_dynamic_state2] - [screen->info.have_EXT_vertex_input_dynamic_state], + [dynamic], sizeof(ctx->draw_vbo)); + memcpy(ctx->draw_state, &draw_state_array[screen->info.have_EXT_multi_draw] + [dynamic][util_get_cpu_caps()->has_popcnt], + sizeof(ctx->draw_state)); /* Bind a fake draw_vbo, so that draw_vbo isn't NULL, which would skip * initialization of callbacks in upper layers (such as u_threaded_context). */ ctx->base.draw_vbo = zink_invalid_draw_vbo; + ctx->base.draw_vertex_state = zink_invalid_draw_vertex_state; _mesa_hash_table_init(&ctx->program_cache[0], ctx, hash_gfx_program<0>, equals_gfx_program<0>); _mesa_hash_table_init(&ctx->program_cache[1], ctx, hash_gfx_program<1>, equals_gfx_program<1>); @@ -1037,6 +1501,8 @@ zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen) _mesa_hash_table_init(&ctx->program_cache[5], ctx, hash_gfx_program<5>, equals_gfx_program<5>); _mesa_hash_table_init(&ctx->program_cache[6], ctx, hash_gfx_program<6>, equals_gfx_program<6>); _mesa_hash_table_init(&ctx->program_cache[7], ctx, hash_gfx_program<7>, equals_gfx_program<7>); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_lock); i++) + simple_mtx_init(&ctx->program_lock[i], mtx_plain); } void @@ -1048,3 +1514,18 @@ zink_init_grid_functions(struct zink_context *ctx) */ ctx->base.launch_grid = zink_invalid_launch_grid; } + +void +zink_init_screen_pipeline_libs(struct zink_screen *screen) +{ + _mesa_set_init(&screen->pipeline_libs[0], screen, hash_gfx_program<0>, equals_gfx_program<0>); + _mesa_set_init(&screen->pipeline_libs[1], screen, hash_gfx_program<1>, equals_gfx_program<1>); + _mesa_set_init(&screen->pipeline_libs[2], screen, hash_gfx_program<2>, equals_gfx_program<2>); + _mesa_set_init(&screen->pipeline_libs[3], screen, hash_gfx_program<3>, equals_gfx_program<3>); + _mesa_set_init(&screen->pipeline_libs[4], screen, hash_gfx_program<4>, equals_gfx_program<4>); + _mesa_set_init(&screen->pipeline_libs[5], screen, hash_gfx_program<5>, equals_gfx_program<5>); + _mesa_set_init(&screen->pipeline_libs[6], screen, hash_gfx_program<6>, equals_gfx_program<6>); + _mesa_set_init(&screen->pipeline_libs[7], screen, hash_gfx_program<7>, equals_gfx_program<7>); + for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs_lock); i++) + simple_mtx_init(&screen->pipeline_libs_lock[i], mtx_plain); +} diff --git a/src/gallium/drivers/zink/zink_extensions.py b/src/gallium/drivers/zink/zink_extensions.py index 52c7def4479..31634400ebd 100644 --- a/src/gallium/drivers/zink/zink_extensions.py +++ b/src/gallium/drivers/zink/zink_extensions.py @@ -67,13 +67,18 @@ class Extension: core_since = None # these are specific to zink_device_info.py: - has_properties = False - has_features = False - guard = False + has_properties = False + has_features = False + guard = False + features_promoted = False + properties_promoted = False + + + # these are specific to zink_instance.py: + platform_guard = None def __init__(self, name, alias="", required=False, nonstandard=False, - properties=False, features=False, conditions=None, guard=False, - core_since=None): + properties=False, features=False, conditions=None, guard=False): self.name = name self.alias = alias self.is_required = required @@ -82,7 +87,6 @@ class Extension: self.has_features = features self.enable_conds = conditions self.guard = guard - self.core_since = core_since if alias == "" and (properties == True or features == True): raise RuntimeError("alias must be available when properties and/or features are used") @@ -98,13 +102,39 @@ class Extension: # e.g.: "VK_EXT_robustness2" -> "Robustness2" def name_in_camel_case(self): return "".join([x.title() for x in self.name.split('_')[2:]]) - - # e.g.: "VK_EXT_robustness2" -> "VK_EXT_ROBUSTNESS2_EXTENSION_NAME" - # do note that inconsistencies exist, i.e. we have - # VK_EXT_ROBUSTNESS_2_EXTENSION_NAME defined in the headers, but then - # we also have VK_KHR_MAINTENANCE1_EXTENSION_NAME + + # e.g.: "VK_EXT_robustness2" -> "VK_EXT_ROBUSTNESS_2" + def name_in_snake_uppercase(self): + def replace(original): + # we do not split the types into two, e.g. INT_32 + match_types = re.match(".*(int|float)(8|16|32|64)$", original) + + # do not match win32 + match_os = re.match(".*win32$", original) + + # try to match extensions with alphanumeric names, like robustness2 + match_alphanumeric = re.match(r"([a-z]+)(\d+)", original) + + if match_types is not None or match_os is not None: + return original.upper() + + if match_alphanumeric is not None: + return (match_alphanumeric[1].upper() + + '_' + + match_alphanumeric[2]) + + return original.upper() + + replaced = list(map(replace, self.name.split('_'))) + return '_'.join(replaced) + + # e.g.: "VK_EXT_robustness2" -> "ROBUSTNESS_2" + def pure_name_in_snake_uppercase(self): + return '_'.join(self.name_in_snake_uppercase().split('_')[2:]) + + # e.g.: "VK_EXT_robustness2" -> "VK_EXT_ROBUSTNESS_2_EXTENSION_NAME" def extension_name(self): - return self.name.upper() + "_EXTENSION_NAME" + return self.name_in_snake_uppercase() + "_EXTENSION_NAME" # generate a C string literal for the extension def extension_name_literal(self): @@ -130,7 +160,7 @@ class Extension: # for VK_EXT_transform_feedback and struct="FEATURES" def stype(self, struct: str): return ("VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_" - + self.pure_name().upper() + + self.pure_name_in_snake_uppercase() + '_' + struct + '_' + self.vendor()) @@ -152,7 +182,13 @@ class ExtensionRegistryEntry: instance_commands = None constants = None features_struct = None + features_fields = None + features_promoted = False properties_struct = None + properties_fields = None + properties_promoted = False + # some instance extensions are locked behind certain platforms + platform_guard = "" class ExtensionRegistry: # key = extension name, value = registry entry @@ -162,7 +198,9 @@ class ExtensionRegistry: vkxml = ElementTree.parse(vkxml_path) commands_type = dict() - aliases = dict() + command_aliases = dict() + platform_guards = dict() + struct_aliases = dict() for cmd in vkxml.findall("commands/command"): name = cmd.find("./proto/name") @@ -170,11 +208,26 @@ class ExtensionRegistry: if name is not None and name.text: commands_type[name.text] = cmd.find("./param/type").text elif cmd.get("name") is not None: - aliases[cmd.get("name")] = cmd.get("alias") + command_aliases[cmd.get("name")] = cmd.get("alias") + + for typ in vkxml.findall("types/type"): + if typ.get("category") != "struct": + continue - for (cmd, alias) in aliases.items(): + name = typ.get("name") + alias = typ.get("alias") + + if name and alias: + struct_aliases[name] = alias + + for (cmd, alias) in command_aliases.items(): commands_type[cmd] = commands_type[alias] + for platform in vkxml.findall("platforms/platform"): + name = platform.get("name") + guard = platform.get("protect") + platform_guards[name] = guard + for ext in vkxml.findall("extensions/extension"): # Reserved extensions are marked with `supported="disabled"` if ext.get("supported") == "disabled": @@ -189,6 +242,8 @@ class ExtensionRegistry: entry.device_commands = [] entry.pdevice_commands = [] entry.instance_commands = [] + entry.features_fields = [] + entry.properties_fields = [] for cmd in ext.findall("require/command"): cmd_name = cmd.get("name") @@ -214,10 +269,51 @@ class ExtensionRegistry: if (self.is_features_struct(ty_name) and entry.features_struct is None): entry.features_struct = ty_name + elif (self.is_properties_struct(ty_name) and entry.properties_struct is None): entry.properties_struct = ty_name + if entry.features_struct: + struct_name = entry.features_struct + if entry.features_struct in struct_aliases: + struct_name = struct_aliases[entry.features_struct] + entry.features_promoted = True + + elif entry.promoted_in is not None: + # if the extension is promoted but a core-Vulkan alias is not + # available for the features, then consider the features struct + # non-core-promoted + entry.features_promoted = False + + for field in vkxml.findall("./types/type[@name='{}']/member".format(struct_name)): + field_name = field.find("name").text + + # we ignore sType and pNext since they are irrelevant + if field_name not in ["sType", "pNext"]: + entry.features_fields.append(field_name) + + if entry.properties_struct: + struct_name = entry.properties_struct + if entry.properties_struct in struct_aliases: + struct_name = struct_aliases[entry.properties_struct] + entry.properties_promoted = True + + elif entry.promoted_in is not None: + # if the extension is promoted but a core-Vulkan alias is not + # available for the properties, then it is not promoted to core + entry.properties_promoted = False + + for field in vkxml.findall("./types/type[@name='{}']/member".format(struct_name)): + field_name = field.find("name").text + + # we ignore sType and pNext since they are irrelevant + if field_name not in ["sType", "pNext"]: + entry.properties_fields.append(field_name) + + if ext.get("platform") is not None: + entry.platform_guard = platform_guards[ext.get("platform")] + self.registry[name] = entry def in_registry(self, ext_name: str): diff --git a/src/gallium/drivers/zink/zink_fence.c b/src/gallium/drivers/zink/zink_fence.c index b2118618bc0..86bc56cf119 100644 --- a/src/gallium/drivers/zink/zink_fence.c +++ b/src/gallium/drivers/zink/zink_fence.c @@ -28,14 +28,24 @@ #include "zink_resource.h" #include "zink_screen.h" +#include "util/os_file.h" #include "util/set.h" #include "util/u_memory.h" +#ifdef _WIN32 +#include <windows.h> +#include <vulkan/vulkan_win32.h> +#endif + static void destroy_fence(struct zink_screen *screen, struct zink_tc_fence *mfence) { + if (mfence->fence) + util_dynarray_delete_unordered(&mfence->fence->mfences, struct zink_tc_fence *, mfence); mfence->fence = NULL; tc_unflushed_batch_token_reference(&mfence->tc_token, NULL); + if (mfence->sem) + VKSCR(DestroySemaphore)(screen->dev, mfence->sem, NULL); FREE(mfence); } @@ -101,13 +111,13 @@ tc_fence_finish(struct zink_context *ctx, struct zink_tc_fence *mfence, uint64_t /* this is a tc mfence, so we're just waiting on the queue mfence to complete * after being signaled by the real mfence */ - if (*timeout_ns == PIPE_TIMEOUT_INFINITE) { + if (*timeout_ns == OS_TIMEOUT_INFINITE) { util_queue_fence_wait(&mfence->ready); } else { if (!util_queue_fence_wait_timeout(&mfence->ready, abs_timeout)) return false; } - if (*timeout_ns && *timeout_ns != PIPE_TIMEOUT_INFINITE) { + if (*timeout_ns && *timeout_ns != OS_TIMEOUT_INFINITE) { int64_t time_ns = os_time_get_nano(); *timeout_ns = abs_timeout > time_ns ? abs_timeout - time_ns : 0; } @@ -116,8 +126,8 @@ tc_fence_finish(struct zink_context *ctx, struct zink_tc_fence *mfence, uint64_t return true; } -bool -zink_vkfence_wait(struct zink_screen *screen, struct zink_fence *fence, uint64_t timeout_ns) +static bool +fence_wait(struct zink_screen *screen, struct zink_fence *fence, uint64_t timeout_ns) { if (screen->device_lost) return true; @@ -127,14 +137,7 @@ zink_vkfence_wait(struct zink_screen *screen, struct zink_fence *fence, uint64_t assert(fence->batch_id); assert(fence->submitted); - bool success = false; - - VkResult ret; - if (timeout_ns) - ret = VKSCR(WaitForFences)(screen->dev, 1, &fence->fence, VK_TRUE, timeout_ns); - else - ret = VKSCR(GetFenceStatus)(screen->dev, fence->fence); - success = zink_screen_handle_vkresult(screen, ret); + bool success = zink_screen_timeline_wait(screen, fence->batch_id, timeout_ns); if (success) { p_atomic_set(&fence->completed, true); @@ -175,17 +178,22 @@ zink_fence_finish(struct zink_screen *screen, struct pipe_context *pctx, struct struct zink_fence *fence = mfence->fence; - unsigned submit_diff = zink_batch_state(mfence->fence)->submit_count - mfence->submit_count; + unsigned submit_diff = zink_batch_state(mfence->fence)->usage.submit_count - mfence->submit_count; /* this batch is known to have finished because it has been submitted more than 1 time * since the tc fence last saw it */ if (submit_diff > 1) return true; - if (fence->submitted && zink_screen_check_last_finished(screen, fence->batch_id)) + /* - if fence is submitted, batch_id is nonzero and can be checked + * - if fence is not submitted here, it must be reset; batch_id will be 0 and submitted is false + * in either case, the fence has finished + */ + if ((fence->submitted && zink_screen_check_last_finished(screen, fence->batch_id)) || + (!fence->submitted && submit_diff)) return true; - return zink_vkfence_wait(screen, fence, timeout_ns); + return fence_wait(screen, fence, timeout_ns); } static bool @@ -196,25 +204,174 @@ fence_finish(struct pipe_screen *pscreen, struct pipe_context *pctx, timeout_ns); } +static int +fence_get_fd(struct pipe_screen *pscreen, struct pipe_fence_handle *pfence) +{ + struct zink_screen *screen = zink_screen(pscreen); + if (screen->device_lost) + return -1; + + struct zink_tc_fence *mfence = (struct zink_tc_fence *)pfence; + if (!mfence->sem) + return -1; + + const VkSemaphoreGetFdInfoKHR sgfi = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .semaphore = mfence->sem, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + }; + int fd = -1; + VkResult result = VKSCR(GetSemaphoreFdKHR)(screen->dev, &sgfi, &fd); + if (!zink_screen_handle_vkresult(screen, result)) { + mesa_loge("ZINK: vkGetSemaphoreFdKHR failed (%s)", vk_Result_to_str(result)); + return -1; + } + + return fd; +} + +void +zink_fence_server_signal(struct pipe_context *pctx, struct pipe_fence_handle *pfence) +{ + struct zink_context *ctx = zink_context(pctx); + struct zink_tc_fence *mfence = (struct zink_tc_fence *)pfence; + + assert(!ctx->batch.state->signal_semaphore); + ctx->batch.state->signal_semaphore = mfence->sem; + ctx->batch.has_work = true; + struct zink_batch_state *bs = ctx->batch.state; + /* this must produce a synchronous flush that completes before the function returns */ + pctx->flush(pctx, NULL, 0); + if (zink_screen(ctx->base.screen)->threaded_submit) + util_queue_fence_wait(&bs->flush_completed); +} + void zink_fence_server_sync(struct pipe_context *pctx, struct pipe_fence_handle *pfence) { - struct zink_tc_fence *mfence = zink_tc_fence(pfence); + struct zink_context *ctx = zink_context(pctx); + struct zink_tc_fence *mfence = (struct zink_tc_fence *)pfence; - if (mfence->deferred_ctx == pctx) + if (mfence->deferred_ctx == pctx || !mfence->sem) return; - if (mfence->deferred_ctx) { - zink_context(pctx)->batch.has_work = true; - /* this must be the current batch */ - pctx->flush(pctx, NULL, 0); + mfence->deferred_ctx = pctx; + /* this will be applied on the next submit */ + VkPipelineStageFlags flag = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + util_dynarray_append(&ctx->batch.state->wait_semaphores, VkSemaphore, mfence->sem); + util_dynarray_append(&ctx->batch.state->wait_semaphore_stages, VkPipelineStageFlags, flag); + pipe_reference(NULL, &mfence->reference); + util_dynarray_append(&ctx->batch.state->fences, struct zink_tc_fence*, mfence); + + /* transfer the external wait sempahore ownership to the next submit */ + mfence->sem = VK_NULL_HANDLE; +} + +void +zink_create_fence_fd(struct pipe_context *pctx, struct pipe_fence_handle **pfence, int fd, enum pipe_fd_type type) +{ + struct zink_screen *screen = zink_screen(pctx->screen); + VkResult result; + + assert(fd >= 0); + + struct zink_tc_fence *mfence = zink_create_tc_fence(); + if (!mfence) + goto fail_tc_fence_create; + + const VkSemaphoreCreateInfo sci = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + }; + result = VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &mfence->sem); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateSemaphore failed (%s)", vk_Result_to_str(result)); + goto fail_sem_create; + } + + int dup_fd = os_dupfd_cloexec(fd); + if (dup_fd < 0) + goto fail_fd_dup; + + static const VkExternalSemaphoreHandleTypeFlagBits flags[] = { + [PIPE_FD_TYPE_NATIVE_SYNC] = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + [PIPE_FD_TYPE_SYNCOBJ] = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + assert(type < ARRAY_SIZE(flags)); + + const VkImportSemaphoreFdInfoKHR sdi = { + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR, + .semaphore = mfence->sem, + .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, + .handleType = flags[type], + .fd = dup_fd, + }; + result = VKSCR(ImportSemaphoreFdKHR)(screen->dev, &sdi); + if (!zink_screen_handle_vkresult(screen, result)) { + mesa_loge("ZINK: vkImportSemaphoreFdKHR failed (%s)", vk_Result_to_str(result)); + goto fail_sem_import; + } + + *pfence = (struct pipe_fence_handle *)mfence; + return; + +fail_sem_import: + close(dup_fd); +fail_fd_dup: + VKSCR(DestroySemaphore)(screen->dev, mfence->sem, NULL); +fail_sem_create: + FREE(mfence); +fail_tc_fence_create: + *pfence = NULL; +} + +#ifdef _WIN32 +void +zink_create_fence_win32(struct pipe_screen *pscreen, struct pipe_fence_handle **pfence, void *handle, const void *name, enum pipe_fd_type type) +{ + struct zink_screen *screen = zink_screen(pscreen); + VkResult ret = VK_ERROR_UNKNOWN; + VkSemaphoreCreateInfo sci = { + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + NULL, + 0 + }; + struct zink_tc_fence *mfence = zink_create_tc_fence(); + VkExternalSemaphoreHandleTypeFlagBits flags[] = { + [PIPE_FD_TYPE_NATIVE_SYNC] = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT, + [PIPE_FD_TYPE_SYNCOBJ] = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT, + }; + VkImportSemaphoreWin32HandleInfoKHR sdi = {0}; + assert(type < ARRAY_SIZE(flags)); + + *pfence = NULL; + + if (VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &mfence->sem) != VK_SUCCESS) { + FREE(mfence); + return; } - zink_fence_finish(zink_screen(pctx->screen), pctx, mfence, PIPE_TIMEOUT_INFINITE); + + sdi.sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR; + sdi.semaphore = mfence->sem; + sdi.handleType = flags[type]; + sdi.handle = handle; + sdi.name = (LPCWSTR)name; + ret = VKSCR(ImportSemaphoreWin32HandleKHR)(screen->dev, &sdi); + + if (!zink_screen_handle_vkresult(screen, ret)) + goto fail; + *pfence = (struct pipe_fence_handle *)mfence; + return; + +fail: + VKSCR(DestroySemaphore)(screen->dev, mfence->sem, NULL); + FREE(mfence); } +#endif void zink_screen_fence_init(struct pipe_screen *pscreen) { pscreen->fence_reference = fence_reference; pscreen->fence_finish = fence_finish; + pscreen->fence_get_fd = fence_get_fd; } diff --git a/src/gallium/drivers/zink/zink_fence.h b/src/gallium/drivers/zink/zink_fence.h index e900a4c69c0..22faa2e6de7 100644 --- a/src/gallium/drivers/zink/zink_fence.h +++ b/src/gallium/drivers/zink/zink_fence.h @@ -24,36 +24,7 @@ #ifndef ZINK_FENCE_H #define ZINK_FENCE_H -#include "util/simple_mtx.h" -#include "util/u_inlines.h" -#include "util/u_queue.h" - -#include <vulkan/vulkan.h> - -struct pipe_context; -struct pipe_screen; -struct zink_batch; -struct zink_batch_state; -struct zink_context; -struct zink_screen; - -struct tc_unflushed_batch_token; - -struct zink_tc_fence { - struct pipe_reference reference; - uint32_t submit_count; - struct util_queue_fence ready; - struct tc_unflushed_batch_token *tc_token; - struct pipe_context *deferred_ctx; - struct zink_fence *fence; -}; - -struct zink_fence { - VkFence fence; - uint32_t batch_id; - bool submitted; - bool completed; -}; +#include "zink_types.h" static inline struct zink_fence * zink_fence(void *pfence) @@ -79,14 +50,19 @@ zink_fence_reference(struct zink_screen *screen, struct zink_tc_fence *fence); void +zink_create_fence_fd(struct pipe_context *pctx, struct pipe_fence_handle **pfence, int fd, enum pipe_fd_type type); +#if defined(_WIN32) +void +zink_create_fence_win32(struct pipe_screen *screen, struct pipe_fence_handle **pfence, void *handle, const void *name, enum pipe_fd_type type); +#endif +void +zink_fence_server_signal(struct pipe_context *pctx, struct pipe_fence_handle *pfence); +void zink_fence_server_sync(struct pipe_context *pctx, struct pipe_fence_handle *pfence); void zink_screen_fence_init(struct pipe_screen *pscreen); -bool -zink_vkfence_wait(struct zink_screen *screen, struct zink_fence *fence, uint64_t timeout_ns); - void zink_fence_clear_resources(struct zink_screen *screen, struct zink_fence *fence); #endif diff --git a/src/gallium/drivers/zink/zink_format.c b/src/gallium/drivers/zink/zink_format.c index 358f409922e..cf36909d6ca 100644 --- a/src/gallium/drivers/zink/zink_format.c +++ b/src/gallium/drivers/zink/zink_format.c @@ -1,152 +1,6 @@ #include "util/format/u_format.h" #include "zink_format.h" - -static const VkFormat formats[PIPE_FORMAT_COUNT] = { -#define MAP_FORMAT_NORM(FMT) \ - [PIPE_FORMAT_ ## FMT ## _UNORM] = VK_FORMAT_ ## FMT ## _UNORM, \ - [PIPE_FORMAT_ ## FMT ## _SNORM] = VK_FORMAT_ ## FMT ## _SNORM, - -#define MAP_FORMAT_SCALED(FMT) \ - [PIPE_FORMAT_ ## FMT ## _USCALED] = VK_FORMAT_ ## FMT ## _USCALED, \ - [PIPE_FORMAT_ ## FMT ## _SSCALED] = VK_FORMAT_ ## FMT ## _SSCALED, - -#define MAP_FORMAT_INT(FMT) \ - [PIPE_FORMAT_ ## FMT ## _UINT] = VK_FORMAT_ ## FMT ## _UINT, \ - [PIPE_FORMAT_ ## FMT ## _SINT] = VK_FORMAT_ ## FMT ## _SINT, - -#define MAP_FORMAT_SRGB(FMT) \ - [PIPE_FORMAT_ ## FMT ## _SRGB] = VK_FORMAT_ ## FMT ## _SRGB, - -#define MAP_FORMAT_FLOAT(FMT) \ - [PIPE_FORMAT_ ## FMT ## _FLOAT] = VK_FORMAT_ ## FMT ## _SFLOAT, - - // one component - - // 8-bits - MAP_FORMAT_NORM(R8) - MAP_FORMAT_SCALED(R8) - MAP_FORMAT_INT(R8) - MAP_FORMAT_SRGB(R8) - // 16-bits - MAP_FORMAT_NORM(R16) - MAP_FORMAT_SCALED(R16) - MAP_FORMAT_INT(R16) - MAP_FORMAT_FLOAT(R16) - // 32-bits - MAP_FORMAT_INT(R32) - MAP_FORMAT_FLOAT(R32) - - // two components - - // 8-bits - MAP_FORMAT_NORM(R8G8) - MAP_FORMAT_SCALED(R8G8) - MAP_FORMAT_INT(R8G8) - MAP_FORMAT_SRGB(R8G8) - // 16-bits - MAP_FORMAT_NORM(R16G16) - MAP_FORMAT_SCALED(R16G16) - MAP_FORMAT_INT(R16G16) - MAP_FORMAT_FLOAT(R16G16) - // 32-bits - MAP_FORMAT_INT(R32G32) - MAP_FORMAT_FLOAT(R32G32) - - // three components - - // 8-bits - MAP_FORMAT_NORM(R8G8B8) - MAP_FORMAT_SCALED(R8G8B8) - MAP_FORMAT_INT(R8G8B8) - MAP_FORMAT_SRGB(R8G8B8) - MAP_FORMAT_NORM(B8G8R8) - MAP_FORMAT_SCALED(B8G8R8) - MAP_FORMAT_INT(B8G8R8) - MAP_FORMAT_SRGB(B8G8R8) - // 16-bits - MAP_FORMAT_NORM(R16G16B16) - MAP_FORMAT_SCALED(R16G16B16) - MAP_FORMAT_INT(R16G16B16) - MAP_FORMAT_FLOAT(R16G16B16) - // 32-bits - MAP_FORMAT_INT(R32G32B32) - MAP_FORMAT_FLOAT(R32G32B32) - - // four components - - // 8-bits - MAP_FORMAT_NORM(R8G8B8A8) - MAP_FORMAT_SCALED(R8G8B8A8) - MAP_FORMAT_INT(R8G8B8A8) - MAP_FORMAT_NORM(B8G8R8A8) - MAP_FORMAT_SCALED(B8G8R8A8) - MAP_FORMAT_INT(B8G8R8A8) - MAP_FORMAT_SRGB(B8G8R8A8) - [PIPE_FORMAT_RGBA8888_SRGB] = VK_FORMAT_A8B8G8R8_SRGB_PACK32, - // 16-bits - MAP_FORMAT_NORM(R16G16B16A16) - MAP_FORMAT_SCALED(R16G16B16A16) - MAP_FORMAT_INT(R16G16B16A16) - MAP_FORMAT_FLOAT(R16G16B16A16) - // 32-bits - MAP_FORMAT_INT(R32G32B32A32) - MAP_FORMAT_FLOAT(R32G32B32A32) - - // other color formats - [PIPE_FORMAT_A4B4G4R4_UNORM] = VK_FORMAT_R4G4B4A4_UNORM_PACK16, - [PIPE_FORMAT_A4R4G4B4_UNORM] = VK_FORMAT_B4G4R4A4_UNORM_PACK16, - [PIPE_FORMAT_B4G4R4A4_UNORM] = VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, - [PIPE_FORMAT_R4G4B4A4_UNORM] = VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT, - [PIPE_FORMAT_B5G6R5_UNORM] = VK_FORMAT_R5G6B5_UNORM_PACK16, - [PIPE_FORMAT_R5G6B5_UNORM] = VK_FORMAT_B5G6R5_UNORM_PACK16, - - [PIPE_FORMAT_A1B5G5R5_UNORM] = VK_FORMAT_R5G5B5A1_UNORM_PACK16, - [PIPE_FORMAT_A1R5G5B5_UNORM] = VK_FORMAT_B5G5R5A1_UNORM_PACK16, - [PIPE_FORMAT_B5G5R5A1_UNORM] = VK_FORMAT_A1R5G5B5_UNORM_PACK16, - - [PIPE_FORMAT_R11G11B10_FLOAT] = VK_FORMAT_B10G11R11_UFLOAT_PACK32, - [PIPE_FORMAT_R9G9B9E5_FLOAT] = VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, - /* ARB_vertex_type_2_10_10_10 */ - [PIPE_FORMAT_R10G10B10A2_UNORM] = VK_FORMAT_A2B10G10R10_UNORM_PACK32, - [PIPE_FORMAT_R10G10B10A2_SNORM] = VK_FORMAT_A2B10G10R10_SNORM_PACK32, - [PIPE_FORMAT_B10G10R10A2_UNORM] = VK_FORMAT_A2R10G10B10_UNORM_PACK32, - [PIPE_FORMAT_B10G10R10A2_SNORM] = VK_FORMAT_A2R10G10B10_SNORM_PACK32, - [PIPE_FORMAT_R10G10B10A2_USCALED] = VK_FORMAT_A2B10G10R10_USCALED_PACK32, - [PIPE_FORMAT_R10G10B10A2_SSCALED] = VK_FORMAT_A2B10G10R10_SSCALED_PACK32, - [PIPE_FORMAT_B10G10R10A2_USCALED] = VK_FORMAT_A2R10G10B10_USCALED_PACK32, - [PIPE_FORMAT_B10G10R10A2_SSCALED] = VK_FORMAT_A2R10G10B10_SSCALED_PACK32, - [PIPE_FORMAT_R10G10B10A2_UINT] = VK_FORMAT_A2B10G10R10_UINT_PACK32, - [PIPE_FORMAT_B10G10R10A2_UINT] = VK_FORMAT_A2R10G10B10_UINT_PACK32, - [PIPE_FORMAT_B10G10R10A2_SINT] = VK_FORMAT_A2R10G10B10_SINT_PACK32, - - // depth/stencil formats - [PIPE_FORMAT_Z32_FLOAT] = VK_FORMAT_D32_SFLOAT, - [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = VK_FORMAT_D32_SFLOAT_S8_UINT, - [PIPE_FORMAT_Z16_UNORM] = VK_FORMAT_D16_UNORM, - [PIPE_FORMAT_Z16_UNORM_S8_UINT] = VK_FORMAT_D16_UNORM_S8_UINT, - [PIPE_FORMAT_Z24X8_UNORM] = VK_FORMAT_X8_D24_UNORM_PACK32, - [PIPE_FORMAT_Z24_UNORM_S8_UINT] = VK_FORMAT_D24_UNORM_S8_UINT, - [PIPE_FORMAT_S8_UINT] = VK_FORMAT_S8_UINT, - - // compressed formats - [PIPE_FORMAT_DXT1_RGB] = VK_FORMAT_BC1_RGB_UNORM_BLOCK, - [PIPE_FORMAT_DXT1_RGBA] = VK_FORMAT_BC1_RGBA_UNORM_BLOCK, - [PIPE_FORMAT_DXT3_RGBA] = VK_FORMAT_BC2_UNORM_BLOCK, - [PIPE_FORMAT_DXT5_RGBA] = VK_FORMAT_BC3_UNORM_BLOCK, - [PIPE_FORMAT_DXT1_SRGB] = VK_FORMAT_BC1_RGB_SRGB_BLOCK, - [PIPE_FORMAT_DXT1_SRGBA] = VK_FORMAT_BC1_RGBA_SRGB_BLOCK, - [PIPE_FORMAT_DXT3_SRGBA] = VK_FORMAT_BC2_SRGB_BLOCK, - [PIPE_FORMAT_DXT5_SRGBA] = VK_FORMAT_BC3_SRGB_BLOCK, - - [PIPE_FORMAT_RGTC1_UNORM] = VK_FORMAT_BC4_UNORM_BLOCK, - [PIPE_FORMAT_RGTC1_SNORM] = VK_FORMAT_BC4_SNORM_BLOCK, - [PIPE_FORMAT_RGTC2_UNORM] = VK_FORMAT_BC5_UNORM_BLOCK, - [PIPE_FORMAT_RGTC2_SNORM] = VK_FORMAT_BC5_SNORM_BLOCK, - [PIPE_FORMAT_BPTC_RGBA_UNORM] = VK_FORMAT_BC7_UNORM_BLOCK, - [PIPE_FORMAT_BPTC_SRGBA] = VK_FORMAT_BC7_SRGB_BLOCK, - [PIPE_FORMAT_BPTC_RGB_FLOAT] = VK_FORMAT_BC6H_SFLOAT_BLOCK, - [PIPE_FORMAT_BPTC_RGB_UFLOAT] = VK_FORMAT_BC6H_UFLOAT_BLOCK, -}; +#include "util/u_math.h" enum pipe_format zink_decompose_vertex_format(enum pipe_format format) @@ -195,12 +49,121 @@ zink_decompose_vertex_format(enum pipe_format format) return new_format; } -VkFormat -zink_pipe_format_to_vk_format(enum pipe_format format) +bool +zink_format_is_red_alpha(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_R4A4_UNORM: + case PIPE_FORMAT_R8A8_SINT: + case PIPE_FORMAT_R8A8_SNORM: + case PIPE_FORMAT_R8A8_UINT: + case PIPE_FORMAT_R8A8_UNORM: + case PIPE_FORMAT_R16A16_SINT: + case PIPE_FORMAT_R16A16_SNORM: + case PIPE_FORMAT_R16A16_UINT: + case PIPE_FORMAT_R16A16_UNORM: + case PIPE_FORMAT_R16A16_FLOAT: + case PIPE_FORMAT_R32A32_SINT: + case PIPE_FORMAT_R32A32_UINT: + case PIPE_FORMAT_R32A32_FLOAT: + return true; + default: break; + } + return false; +} + +bool +zink_format_is_emulated_alpha(enum pipe_format format) +{ + return util_format_is_alpha(format) || + util_format_is_luminance(format) || + util_format_is_luminance_alpha(format) || + zink_format_is_red_alpha(format); +} + +static enum pipe_format +emulate_alpha(enum pipe_format format) { - return formats[format]; + if (format == PIPE_FORMAT_A8_UNORM) + return PIPE_FORMAT_R8_UNORM; + if (format == PIPE_FORMAT_A8_UINT) + return PIPE_FORMAT_R8_UINT; + if (format == PIPE_FORMAT_A8_SNORM) + return PIPE_FORMAT_R8_SNORM; + if (format == PIPE_FORMAT_A8_SINT) + return PIPE_FORMAT_R8_SINT; + if (format == PIPE_FORMAT_A16_UNORM) + return PIPE_FORMAT_R16_UNORM; + if (format == PIPE_FORMAT_A16_UINT) + return PIPE_FORMAT_R16_UINT; + if (format == PIPE_FORMAT_A16_SNORM) + return PIPE_FORMAT_R16_SNORM; + if (format == PIPE_FORMAT_A16_SINT) + return PIPE_FORMAT_R16_SINT; + if (format == PIPE_FORMAT_A16_FLOAT) + return PIPE_FORMAT_R16_FLOAT; + if (format == PIPE_FORMAT_A32_UINT) + return PIPE_FORMAT_R32_UINT; + if (format == PIPE_FORMAT_A32_SINT) + return PIPE_FORMAT_R32_SINT; + if (format == PIPE_FORMAT_A32_FLOAT) + return PIPE_FORMAT_R32_FLOAT; + return format; } +static enum pipe_format +emulate_red_alpha(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_R8A8_SINT: + return PIPE_FORMAT_R8G8_SINT; + case PIPE_FORMAT_R8A8_SNORM: + return PIPE_FORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8A8_UINT: + return PIPE_FORMAT_R8G8_UINT; + case PIPE_FORMAT_R8A8_UNORM: + return PIPE_FORMAT_R8G8_UNORM; + case PIPE_FORMAT_R16A16_SINT: + return PIPE_FORMAT_R16G16_SINT; + case PIPE_FORMAT_R16A16_SNORM: + return PIPE_FORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16A16_UINT: + return PIPE_FORMAT_R16G16_UINT; + case PIPE_FORMAT_R16A16_UNORM: + return PIPE_FORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16A16_FLOAT: + return PIPE_FORMAT_R16G16_FLOAT; + case PIPE_FORMAT_R32A32_SINT: + return PIPE_FORMAT_R32G32_SINT; + case PIPE_FORMAT_R32A32_UINT: + return PIPE_FORMAT_R32G32_UINT; + case PIPE_FORMAT_R32A32_FLOAT: + return PIPE_FORMAT_R32G32_FLOAT; + default: break; + } + return format; +} + +enum pipe_format +zink_format_get_emulated_alpha(enum pipe_format format) +{ + if (util_format_is_alpha(format)) + return emulate_alpha(format); + if (util_format_is_luminance(format)) + return util_format_luminance_to_red(format); + if (util_format_is_luminance_alpha(format)) { + if (util_format_is_srgb(format)) + return format; + if (format == PIPE_FORMAT_LATC2_UNORM) + return PIPE_FORMAT_RGTC2_UNORM; + if (format == PIPE_FORMAT_LATC2_SNORM) + return PIPE_FORMAT_RGTC2_SNORM; + + format = util_format_luminance_to_red(format); + } + + return emulate_red_alpha(format); +} bool zink_format_is_voidable_rgba_variant(enum pipe_format format) @@ -210,7 +173,8 @@ zink_format_is_voidable_rgba_variant(enum pipe_format format) if(desc->block.width != 1 || desc->block.height != 1 || - (desc->block.bits != 32 && desc->block.bits != 64)) + (desc->block.bits != 32 && desc->block.bits != 64 && + desc->block.bits != 128)) return false; if (desc->nr_channels != 4) @@ -224,3 +188,71 @@ zink_format_is_voidable_rgba_variant(enum pipe_format format) return true; } + +void +zink_format_clamp_channel_color(const struct util_format_description *desc, union pipe_color_union *dst, const union pipe_color_union *src, unsigned i) +{ + int non_void = util_format_get_first_non_void_channel(desc->format); + unsigned channel = desc->swizzle[i]; + + if (channel > PIPE_SWIZZLE_W || desc->channel[channel].type == UTIL_FORMAT_TYPE_VOID) { + if (non_void != -1) { + if (desc->channel[non_void].type == UTIL_FORMAT_TYPE_FLOAT) { + dst->f[i] = uif(UINT32_MAX); + } else { + if (desc->channel[non_void].normalized) + dst->f[i] = 1.0; + else if (desc->channel[non_void].type == UTIL_FORMAT_TYPE_SIGNED) + dst->i[i] = INT32_MAX; + else + dst->ui[i] = UINT32_MAX; + } + } else { + dst->ui[i] = src->ui[i]; + } + return; + } + + switch (desc->channel[channel].type) { + case UTIL_FORMAT_TYPE_VOID: + unreachable("handled above"); + break; + case UTIL_FORMAT_TYPE_SIGNED: + if (desc->channel[channel].normalized) + dst->i[i] = src->i[i]; + else { + dst->i[i] = MAX2(src->i[i], -(1<<(desc->channel[channel].size - 1))); + dst->i[i] = MIN2(dst->i[i], (1 << (desc->channel[channel].size - 1)) - 1); + } + break; + case UTIL_FORMAT_TYPE_UNSIGNED: + if (desc->channel[channel].normalized) + dst->ui[i] = src->ui[i]; + else + dst->ui[i] = MIN2(src->ui[i], BITFIELD_MASK(desc->channel[channel].size)); + break; + case UTIL_FORMAT_TYPE_FIXED: + case UTIL_FORMAT_TYPE_FLOAT: + dst->ui[i] = src->ui[i]; + break; + } +} + +void +zink_format_clamp_channel_srgb(const struct util_format_description *desc, union pipe_color_union *dst, const union pipe_color_union *src, unsigned i) +{ + unsigned channel = desc->swizzle[i]; + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && + channel <= PIPE_SWIZZLE_W) { + switch (desc->channel[channel].type) { + case UTIL_FORMAT_TYPE_SIGNED: + case UTIL_FORMAT_TYPE_UNSIGNED: + dst->f[i] = CLAMP(src->f[i], 0.0, 1.0); + return; + default: + break; + } + } + + dst->ui[i] = src->ui[i]; +} diff --git a/src/gallium/drivers/zink/zink_format.h b/src/gallium/drivers/zink/zink_format.h index 3324265177d..171c79360ad 100644 --- a/src/gallium/drivers/zink/zink_format.h +++ b/src/gallium/drivers/zink/zink_format.h @@ -24,17 +24,39 @@ #ifndef ZINK_FORMAT_H #define ZINK_FORMAT_H -#include "pipe/p_format.h" +#include "util/format/u_formats.h" +#include "util/format/u_format.h" #include <stdbool.h> -#include <vulkan/vulkan.h> +#include <vulkan/vulkan_core.h> + +union pipe_color_union; enum pipe_format zink_decompose_vertex_format(enum pipe_format format); -VkFormat -zink_pipe_format_to_vk_format(enum pipe_format format); - bool zink_format_is_voidable_rgba_variant(enum pipe_format format); +bool +zink_format_is_red_alpha(enum pipe_format format); +bool +zink_format_is_emulated_alpha(enum pipe_format format); +enum pipe_format +zink_format_get_emulated_alpha(enum pipe_format format); +void +zink_format_clamp_channel_color(const struct util_format_description *desc, union pipe_color_union *dst, const union pipe_color_union *src, unsigned i); +void +zink_format_clamp_channel_srgb(const struct util_format_description *desc, union pipe_color_union *dst, const union pipe_color_union *src, unsigned i); + +static inline bool +zink_format_needs_mutable(enum pipe_format a, enum pipe_format b) +{ + if (a == b) + return false; + if (util_format_is_srgb(a)) + return util_format_linear(a) != b; + if (util_format_is_srgb(b)) + return util_format_linear(b) != a; + return true; +} #endif diff --git a/src/gallium/drivers/zink/zink_format_test.c b/src/gallium/drivers/zink/zink_format_test.c index 502a2cbb661..3ff587ba346 100644 --- a/src/gallium/drivers/zink/zink_format_test.c +++ b/src/gallium/drivers/zink/zink_format_test.c @@ -7,13 +7,18 @@ main(int argc, char *argv[]) int ret = 0; for (int i = 0; i < PIPE_FORMAT_COUNT; ++i) { enum pipe_format pipe_fmt = i; - VkFormat vk_fmt = zink_pipe_format_to_vk_format(i); + VkFormat vk_fmt = vk_format_from_pipe_format(i); /* skip unsupported formats */ if (vk_fmt == VK_FORMAT_UNDEFINED) continue; enum pipe_format roundtrip = vk_format_to_pipe_format(vk_fmt); + + /* This one gets aliased to ETC2 rather than round tripping. */ + if (pipe_fmt == PIPE_FORMAT_ETC1_RGB8 && roundtrip == PIPE_FORMAT_ETC2_RGB8) + continue; + if (roundtrip != pipe_fmt) { fprintf(stderr, "Format does not roundtrip\n" "\tgot: %s\n" diff --git a/src/gallium/drivers/zink/zink_framebuffer.c b/src/gallium/drivers/zink/zink_framebuffer.c index ef785c55319..991bd427bcc 100644 --- a/src/gallium/drivers/zink/zink_framebuffer.c +++ b/src/gallium/drivers/zink/zink_framebuffer.c @@ -37,7 +37,7 @@ zink_destroy_framebuffer(struct zink_screen *screen, struct zink_framebuffer *fb) { hash_table_foreach(&fb->objects, he) { -#if defined(_WIN64) || defined(__x86_64__) +#if VK_USE_64_BIT_PTR_DEFINES VKSCR(DestroyFramebuffer)(screen->dev, he->data, NULL); #else VkFramebuffer *ptr = he->data; @@ -49,7 +49,7 @@ zink_destroy_framebuffer(struct zink_screen *screen, } void -zink_init_framebuffer_imageless(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp) +zink_init_framebuffer(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp) { VkFramebuffer ret; @@ -60,7 +60,7 @@ zink_init_framebuffer_imageless(struct zink_screen *screen, struct zink_framebuf struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&fb->objects, hash, rp); if (he) { -#if defined(_WIN64) || defined(__x86_64__) +#if VK_USE_64_BIT_PTR_DEFINES ret = (VkFramebuffer)he->data; #else VkFramebuffer *ptr = he->data; @@ -69,6 +69,8 @@ zink_init_framebuffer_imageless(struct zink_screen *screen, struct zink_framebuf goto out; } + assert(rp->state.num_cbufs + rp->state.have_zsbuf + rp->state.num_cresolves + rp->state.num_zsresolves == fb->state.num_attachments); + VkFramebufferCreateInfo fci; fci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; fci.flags = VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT; @@ -88,7 +90,7 @@ zink_init_framebuffer_imageless(struct zink_screen *screen, struct zink_framebuf if (VKSCR(CreateFramebuffer)(screen->dev, &fci, NULL, &ret) != VK_SUCCESS) return; -#if defined(_WIN64) || defined(__x86_64__) +#if VK_USE_64_BIT_PTR_DEFINES _mesa_hash_table_insert_pre_hashed(&fb->objects, hash, rp, ret); #else VkFramebuffer *ptr = ralloc(fb, VkFramebuffer); @@ -110,8 +112,8 @@ populate_attachment_info(VkFramebufferAttachmentImageInfo *att, struct zink_surf att->sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_ATTACHMENT_IMAGE_INFO; att->pNext = NULL; memcpy(&att->flags, &info->flags, offsetof(struct zink_surface_info, format)); - att->viewFormatCount = 1; - att->pViewFormats = &info->format; + att->viewFormatCount = 1 + !!info->format[1]; + att->pViewFormats = info->format; } static struct zink_framebuffer * @@ -136,30 +138,53 @@ fail: } struct zink_framebuffer * -zink_get_framebuffer_imageless(struct zink_context *ctx) +zink_get_framebuffer(struct zink_context *ctx) { assert(zink_screen(ctx->base.screen)->info.have_KHR_imageless_framebuffer); + bool have_zsbuf = ctx->fb_state.zsbuf && zink_is_zsbuf_used(ctx); struct zink_framebuffer_state state; + state.num_attachments = ctx->fb_state.nr_cbufs; + + const unsigned cresolve_offset = ctx->fb_state.nr_cbufs + !!have_zsbuf; + unsigned num_resolves = 0; for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { struct pipe_surface *psurf = ctx->fb_state.cbufs[i]; - if (!psurf) - psurf = ctx->dummy_surface[util_logbase2_ceil(ctx->gfx_pipeline_state.rast_samples+1)]; + if (!psurf) { + psurf = zink_get_dummy_pipe_surface(ctx, util_logbase2_ceil(ctx->gfx_pipeline_state.rast_samples+1)); + } struct zink_surface *surface = zink_csurface(psurf); - memcpy(&state.infos[i], &surface->info, sizeof(surface->info)); + struct zink_surface *transient = zink_transient_surface(psurf); + if (transient) { + memcpy(&state.infos[i], &transient->info, sizeof(transient->info)); + memcpy(&state.infos[cresolve_offset + i], &surface->info, sizeof(surface->info)); + num_resolves++; + } else { + memcpy(&state.infos[i], &surface->info, sizeof(surface->info)); + } } - state.num_attachments = ctx->fb_state.nr_cbufs; - if (ctx->fb_state.zsbuf) { + const unsigned zsresolve_offset = cresolve_offset + num_resolves; + if (have_zsbuf) { struct pipe_surface *psurf = ctx->fb_state.zsbuf; struct zink_surface *surface = zink_csurface(psurf); - memcpy(&state.infos[state.num_attachments], &surface->info, sizeof(surface->info)); + struct zink_surface *transient = zink_transient_surface(psurf); + if (transient) { + memcpy(&state.infos[state.num_attachments], &transient->info, sizeof(transient->info)); + memcpy(&state.infos[zsresolve_offset], &surface->info, sizeof(surface->info)); + num_resolves++; + } else { + memcpy(&state.infos[state.num_attachments], &surface->info, sizeof(surface->info)); + } state.num_attachments++; } + /* avoid bitfield explosion */ + assert(state.num_attachments + num_resolves < 16); + state.num_attachments += num_resolves; state.width = MAX2(ctx->fb_state.width, 1); state.height = MAX2(ctx->fb_state.height, 1); - state.layers = MAX2(util_framebuffer_get_num_layers(&ctx->fb_state), 1) - 1; + state.layers = MAX2(zink_framebuffer_get_num_layers(&ctx->fb_state), 1) - 1; state.samples = ctx->fb_state.samples - 1; struct zink_framebuffer *fb; @@ -174,140 +199,42 @@ zink_get_framebuffer_imageless(struct zink_context *ctx) } void -zink_init_framebuffer(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp) -{ - VkFramebuffer ret; - - if (fb->rp == rp) - return; - - uint32_t hash = _mesa_hash_pointer(rp); - - struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&fb->objects, hash, rp); - if (he) { -#if defined(_WIN64) || defined(__x86_64__) - ret = (VkFramebuffer)he->data; -#else - VkFramebuffer *ptr = he->data; - ret = *ptr; -#endif - goto out; - } - - VkFramebufferCreateInfo fci = {0}; - fci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - fci.renderPass = rp->render_pass; - fci.attachmentCount = fb->state.num_attachments; - fci.pAttachments = fb->state.attachments; - fci.width = fb->state.width; - fci.height = fb->state.height; - fci.layers = fb->state.layers + 1; - - if (VKSCR(CreateFramebuffer)(screen->dev, &fci, NULL, &ret) != VK_SUCCESS) - return; -#if defined(_WIN64) || defined(__x86_64__) - _mesa_hash_table_insert_pre_hashed(&fb->objects, hash, rp, ret); -#else - VkFramebuffer *ptr = ralloc(fb, VkFramebuffer); - if (!ptr) { - VKSCR(DestroyFramebuffer)(screen->dev, ret, NULL); - return; - } - *ptr = ret; - _mesa_hash_table_insert_pre_hashed(&fb->objects, hash, rp, ptr); -#endif -out: - fb->rp = rp; - fb->fb = ret; -} - -static struct zink_framebuffer * -create_framebuffer(struct zink_context *ctx, - struct zink_framebuffer_state *state, - struct pipe_surface **attachments) +debug_describe_zink_framebuffer(char* buf, const struct zink_framebuffer *ptr) { - struct zink_screen *screen = zink_screen(ctx->base.screen); - struct zink_framebuffer *fb = rzalloc(NULL, struct zink_framebuffer); - if (!fb) - return NULL; - - unsigned num_attachments = 0; - for (int i = 0; i < state->num_attachments; i++) { - struct zink_surface *surf; - if (state->attachments[i]) { - surf = zink_csurface(attachments[i]); - /* no ref! */ - fb->surfaces[i] = attachments[i]; - num_attachments++; - util_dynarray_append(&surf->framebuffer_refs, struct zink_framebuffer*, fb); - } else { - surf = zink_csurface(ctx->dummy_surface[util_logbase2_ceil(state->samples+1)]); - state->attachments[i] = surf->image_view; - } - } - pipe_reference_init(&fb->reference, 1 + num_attachments); - - if (!_mesa_hash_table_init(&fb->objects, fb, _mesa_hash_pointer, _mesa_key_pointer_equal)) - goto fail; - memcpy(&fb->state, state, sizeof(struct zink_framebuffer_state)); - - return fb; -fail: - zink_destroy_framebuffer(screen, fb); - return NULL; + sprintf(buf, "zink_framebuffer"); } void -debug_describe_zink_framebuffer(char* buf, const struct zink_framebuffer *ptr) +zink_update_framebuffer_state(struct zink_context *ctx) { - sprintf(buf, "zink_framebuffer"); + /* get_framebuffer adds a ref if the fb is reused or created; + * always do get_framebuffer first to avoid deleting the same fb + * we're about to use + */ + struct zink_framebuffer *fb = zink_get_framebuffer(ctx); + ctx->fb_changed |= ctx->framebuffer != fb; + ctx->framebuffer = fb; } -struct zink_framebuffer * -zink_get_framebuffer(struct zink_context *ctx) +/* same as u_framebuffer_get_num_layers, but clamp to lowest layer count */ +unsigned +zink_framebuffer_get_num_layers(const struct pipe_framebuffer_state *fb) { - struct zink_screen *screen = zink_screen(ctx->base.screen); - - assert(!screen->info.have_KHR_imageless_framebuffer); - - struct pipe_surface *attachments[PIPE_MAX_COLOR_BUFS + 1] = {0}; - - struct zink_framebuffer_state state = {0}; - for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { - struct pipe_surface *psurf = ctx->fb_state.cbufs[i]; - state.attachments[i] = psurf ? zink_csurface(psurf)->image_view : VK_NULL_HANDLE; - attachments[i] = psurf; - } - - state.num_attachments = ctx->fb_state.nr_cbufs; - if (ctx->fb_state.zsbuf) { - struct pipe_surface *psurf = ctx->fb_state.zsbuf; - state.attachments[state.num_attachments] = psurf ? zink_csurface(psurf)->image_view : VK_NULL_HANDLE; - attachments[state.num_attachments++] = psurf; + unsigned i, num_layers = UINT32_MAX; + if (!(fb->nr_cbufs || fb->zsbuf)) + return MAX2(fb->layers, 1); + + for (i = 0; i < fb->nr_cbufs; i++) { + if (fb->cbufs[i]) { + unsigned num = fb->cbufs[i]->u.tex.last_layer - + fb->cbufs[i]->u.tex.first_layer + 1; + num_layers = MIN2(num_layers, num); + } } - - state.width = MAX2(ctx->fb_state.width, 1); - state.height = MAX2(ctx->fb_state.height, 1); - state.layers = MAX2(util_framebuffer_get_num_layers(&ctx->fb_state), 1) - 1; - state.samples = ctx->fb_state.samples - 1; - - struct zink_framebuffer *fb; - simple_mtx_lock(&screen->framebuffer_mtx); - struct hash_entry *entry = _mesa_hash_table_search(&screen->framebuffer_cache, &state); - if (entry) { - fb = (void*)entry->data; - struct zink_framebuffer *fb_ref = NULL; - /* this gains 1 ref every time we reuse it */ - zink_framebuffer_reference(screen, &fb_ref, fb); - } else { - /* this adds 1 extra ref on creation because all newly-created framebuffers are - * going to be bound; necessary to handle framebuffers which have no "real" attachments - * and are only using null surfaces since the only ref they get is the extra one here - */ - fb = create_framebuffer(ctx, &state, attachments); - _mesa_hash_table_insert(&screen->framebuffer_cache, &fb->state, fb); + if (fb->zsbuf) { + unsigned num = fb->zsbuf->u.tex.last_layer - + fb->zsbuf->u.tex.first_layer + 1; + num_layers = MIN2(num_layers, num); } - simple_mtx_unlock(&screen->framebuffer_mtx); - - return fb; + return MAX2(num_layers, 1); } diff --git a/src/gallium/drivers/zink/zink_framebuffer.h b/src/gallium/drivers/zink/zink_framebuffer.h index 4fb8bf67b90..246fb486df7 100644 --- a/src/gallium/drivers/zink/zink_framebuffer.h +++ b/src/gallium/drivers/zink/zink_framebuffer.h @@ -24,47 +24,10 @@ #ifndef ZINK_FRAMEBUFFER_H #define ZINK_FRAMEBUFFER_H -#include "pipe/p_state.h" -#include <vulkan/vulkan.h> - -#include "util/hash_table.h" -#include "util/u_inlines.h" - -struct zink_context; -struct zink_screen; -struct zink_render_pass; - -struct zink_framebuffer_state { - uint32_t width; - uint16_t height; - uint32_t layers:6; - uint32_t samples:6; - uint32_t num_attachments:4; - union { - VkImageView attachments[PIPE_MAX_COLOR_BUFS + 1]; - struct zink_surface_info infos[PIPE_MAX_COLOR_BUFS + 1]; - }; -}; - -struct zink_framebuffer { - struct pipe_reference reference; - - /* current objects */ - VkFramebuffer fb; - struct zink_render_pass *rp; - - struct zink_framebuffer_state state; - union { - struct pipe_surface *surfaces[PIPE_MAX_COLOR_BUFS + 1]; - VkFramebufferAttachmentImageInfo infos[PIPE_MAX_COLOR_BUFS + 1]; - }; - struct hash_table objects; -}; +#include "zink_types.h" void zink_init_framebuffer(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp); -void -zink_init_framebuffer_imageless(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp); void zink_destroy_framebuffer(struct zink_screen *screen, @@ -91,8 +54,10 @@ zink_framebuffer_reference(struct zink_screen *screen, } struct zink_framebuffer * -zink_get_framebuffer_imageless(struct zink_context *ctx); - -struct zink_framebuffer * zink_get_framebuffer(struct zink_context *ctx); + +void +zink_update_framebuffer_state(struct zink_context *ctx); +unsigned +zink_framebuffer_get_num_layers(const struct pipe_framebuffer_state *fb); #endif diff --git a/src/gallium/drivers/zink/zink_inlines.h b/src/gallium/drivers/zink/zink_inlines.h index fe873828423..44d4474d99a 100644 --- a/src/gallium/drivers/zink/zink_inlines.h +++ b/src/gallium/drivers/zink/zink_inlines.h @@ -6,7 +6,9 @@ static inline void zink_select_draw_vbo(struct zink_context *ctx) { ctx->base.draw_vbo = ctx->draw_vbo[ctx->pipeline_changed[0]]; + ctx->base.draw_vertex_state = ctx->draw_state[ctx->pipeline_changed[0]]; assert(ctx->base.draw_vbo); + assert(ctx->base.draw_vertex_state); } static inline void diff --git a/src/gallium/drivers/zink/zink_instance.py b/src/gallium/drivers/zink/zink_instance.py index 831be322d7b..b9c3c5a6ae0 100644 --- a/src/gallium/drivers/zink/zink_instance.py +++ b/src/gallium/drivers/zink/zink_instance.py @@ -28,6 +28,7 @@ from os import path from xml.etree import ElementTree from zink_extensions import Extension,Layer,ExtensionRegistry,Version import sys +import platform # constructor: Extension(name, conditions=[], nonstandard=False) # The attributes: @@ -37,11 +38,24 @@ import sys EXTENSIONS = [ Extension("VK_EXT_debug_utils"), Extension("VK_KHR_get_physical_device_properties2"), + Extension("VK_KHR_external_memory_capabilities"), + Extension("VK_KHR_external_semaphore_capabilities"), Extension("VK_MVK_moltenvk", nonstandard=True), Extension("VK_KHR_surface"), + Extension("VK_EXT_headless_surface"), + Extension("VK_KHR_wayland_surface", + conditions=["!display_dev"]), + Extension("VK_KHR_xcb_surface", + conditions=["!display_dev"]), + Extension("VK_KHR_win32_surface"), ] +if platform.system() == "Darwin": + EXTENSIONS += [ + Extension("VK_KHR_portability_enumeration"), + ] + # constructor: Layer(name, conditions=[]) # - conditions: See documentation of EXTENSIONS. LAYERS = [ @@ -60,15 +74,17 @@ header_code = """ #ifndef ZINK_INSTANCE_H #define ZINK_INSTANCE_H -#include "os/os_process.h" +#include "util/u_process.h" -#include <vulkan/vulkan.h> +#include <vulkan/vulkan_core.h> -#if defined(__APPLE__) +#ifdef __APPLE__ +#include "MoltenVK/mvk_vulkan.h" // Source of MVK_VERSION -#include "MoltenVK/vk_mvk_moltenvk.h" -#endif +#include "MoltenVK/mvk_config.h" +#endif /* __APPLE__ */ +struct pipe_screen; struct zink_screen; struct zink_instance_info { @@ -83,8 +99,8 @@ struct zink_instance_info { %endfor }; -VkInstance -zink_create_instance(struct zink_instance_info *instance_info); +bool +zink_create_instance(struct zink_screen *screen, bool display_dev); void zink_verify_instance_extensions(struct zink_screen *screen); @@ -103,16 +119,22 @@ void zink_stub_${cmd.lstrip("vk")}(void); %endif %endfor +struct pipe_screen; +struct pipe_resource; + #endif """ impl_code = """ +#include "vk_enum_to_str.h" #include "zink_instance.h" #include "zink_screen.h" -VkInstance -zink_create_instance(struct zink_instance_info *instance_info) +bool +zink_create_instance(struct zink_screen *screen, bool display_dev) { + struct zink_instance_info *instance_info = &screen->instance_info; + /* reserve one slot for MoltenVK */ const char *layers[${len(layers) + 1}] = {0}; uint32_t num_layers = 0; @@ -132,12 +154,24 @@ zink_create_instance(struct zink_instance_info *instance_info) bool have_moltenvk_layer = false; #endif + GET_PROC_ADDR_INSTANCE_LOCAL(screen, NULL, EnumerateInstanceExtensionProperties); + GET_PROC_ADDR_INSTANCE_LOCAL(screen, NULL, EnumerateInstanceLayerProperties); + if (!vk_EnumerateInstanceExtensionProperties || + !vk_EnumerateInstanceLayerProperties) + return false; + // Build up the extensions from the reported ones but only for the unnamed layer uint32_t extension_count = 0; - if (vkEnumerateInstanceExtensionProperties(NULL, &extension_count, NULL) == VK_SUCCESS) { + if (vk_EnumerateInstanceExtensionProperties(NULL, &extension_count, NULL) != VK_SUCCESS) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: vkEnumerateInstanceExtensionProperties failed"); + } else { VkExtensionProperties *extension_props = malloc(extension_count * sizeof(VkExtensionProperties)); if (extension_props) { - if (vkEnumerateInstanceExtensionProperties(NULL, &extension_count, extension_props) == VK_SUCCESS) { + if (vk_EnumerateInstanceExtensionProperties(NULL, &extension_count, extension_props) != VK_SUCCESS) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: vkEnumerateInstanceExtensionProperties failed"); + } else { for (uint32_t i = 0; i < extension_count; i++) { %for ext in extensions: if (!strcmp(extension_props[i].extensionName, ${ext.extension_name_literal()})) { @@ -153,10 +187,16 @@ zink_create_instance(struct zink_instance_info *instance_info) // Build up the layers from the reported ones uint32_t layer_count = 0; - if (vkEnumerateInstanceLayerProperties(&layer_count, NULL) == VK_SUCCESS) { + if (vk_EnumerateInstanceLayerProperties(&layer_count, NULL) != VK_SUCCESS) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: vkEnumerateInstanceLayerProperties failed"); + } else { VkLayerProperties *layer_props = malloc(layer_count * sizeof(VkLayerProperties)); if (layer_props) { - if (vkEnumerateInstanceLayerProperties(&layer_count, layer_props) == VK_SUCCESS) { + if (vk_EnumerateInstanceLayerProperties(&layer_count, layer_props) != VK_SUCCESS) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: vkEnumerateInstanceLayerProperties failed"); + } else { for (uint32_t i = 0; i < layer_count; i++) { %for layer in layers: if (!strcmp(layer_props[i].layerName, ${layer.extension_name_literal()})) { @@ -206,29 +246,36 @@ zink_create_instance(struct zink_instance_info *instance_info) VkApplicationInfo ai = {0}; ai.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - char proc_name[128]; - if (os_get_process_name(proc_name, ARRAY_SIZE(proc_name))) - ai.pApplicationName = proc_name; - else - ai.pApplicationName = "unknown"; + const char *proc_name = util_get_process_name(); + if (!proc_name) + proc_name = "unknown"; + ai.pApplicationName = proc_name; ai.pEngineName = "mesa zink"; ai.apiVersion = instance_info->loader_version; VkInstanceCreateInfo ici = {0}; ici.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; +#ifdef __APPLE__ + ici.flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; +#endif ici.pApplicationInfo = &ai; ici.ppEnabledExtensionNames = extensions; ici.enabledExtensionCount = num_extensions; ici.ppEnabledLayerNames = layers; ici.enabledLayerCount = num_layers; - VkInstance instance = VK_NULL_HANDLE; - VkResult err = vkCreateInstance(&ici, NULL, &instance); - if (err != VK_SUCCESS) - return VK_NULL_HANDLE; + GET_PROC_ADDR_INSTANCE_LOCAL(screen, NULL, CreateInstance); + assert(vk_CreateInstance); + + VkResult err = vk_CreateInstance(&ici, NULL, &screen->instance); + if (err != VK_SUCCESS) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: vkCreateInstance failed (%s)", vk_Result_to_str(err)); + return false; + } - return instance; + return true; } void @@ -236,6 +283,9 @@ zink_verify_instance_extensions(struct zink_screen *screen) { %for ext in extensions: %if registry.in_registry(ext.name): +%if ext.platform_guard: +#ifdef ${ext.platform_guard} +%endif if (screen->instance_info.have_${ext.name_with_vendor()}) { %for cmd in registry.get_registry_entry(ext.name).instance_commands: if (!screen->vk.${cmd.lstrip("vk")}) { @@ -257,6 +307,9 @@ zink_verify_instance_extensions(struct zink_screen *screen) %endfor } %endif +%if ext.platform_guard: +#endif +%endif %endfor } @@ -273,12 +326,18 @@ zink_verify_instance_extensions(struct zink_screen *screen) %else: <% generated_funcs.add(cmd) %> %endif +%if ext.platform_guard: +#ifdef ${ext.platform_guard} +%endif void zink_stub_${cmd.lstrip("vk")}() { mesa_loge("ZINK: ${cmd} is not loaded properly!"); abort(); } +%if ext.platform_guard: +#endif +%endif %endfor %endif %endfor @@ -335,16 +394,19 @@ if __name__ == "__main__": if entry.promoted_in: ext.core_since = Version((*entry.promoted_in, 0)) + if entry.platform_guard: + ext.platform_guard = entry.platform_guard + if error_count > 0: print("zink_instance.py: Found {} error(s) in total. Quitting.".format(error_count)) exit(1) - with open(header_path, "w") as header_file: + with open(header_path, "w", encoding='utf-8') as header_file: header = Template(header_code).render(extensions=extensions, layers=layers, registry=registry).strip() header = replace_code(header, replacement) print(header, file=header_file) - with open(impl_path, "w") as impl_file: + with open(impl_path, "w", encoding='utf-8') as impl_file: impl = Template(impl_code).render(extensions=extensions, layers=layers, registry=registry).strip() impl = replace_code(impl, replacement) print(impl, file=impl_file) diff --git a/src/gallium/drivers/zink/zink_kopper.c b/src/gallium/drivers/zink/zink_kopper.c new file mode 100644 index 00000000000..d508e2e06d7 --- /dev/null +++ b/src/gallium/drivers/zink/zink_kopper.c @@ -0,0 +1,1162 @@ +/* + * Copyright 2020 Red Hat, Inc. + * Copyright © 2021 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "util/detect_os.h" +#include "driver_trace/tr_screen.h" + +#include "zink_context.h" +#include "zink_screen.h" +#include "zink_surface.h" +#include "zink_resource.h" +#include "zink_kopper.h" + +static void +zink_kopper_set_present_mode_for_interval(struct kopper_displaytarget *cdt, int interval) +{ +#if DETECT_OS_WINDOWS + // not hooked up yet so let's not sabotage benchmarks + cdt->present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; +#else + assert(interval >= 0); /* TODO: VK_PRESENT_MODE_FIFO_RELAXED_KHR */ + if (interval == 0) { + if (cdt->present_modes & BITFIELD_BIT(VK_PRESENT_MODE_IMMEDIATE_KHR)) + cdt->present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; + else + cdt->present_mode = VK_PRESENT_MODE_MAILBOX_KHR; + } else if (interval > 0) { + cdt->present_mode = VK_PRESENT_MODE_FIFO_KHR; + } + assert(cdt->present_modes & BITFIELD_BIT(cdt->present_mode)); +#endif +} + +static void +init_dt_type(struct kopper_displaytarget *cdt) +{ + VkStructureType type = cdt->info.bos.sType; + switch (type) { +#ifdef VK_USE_PLATFORM_XCB_KHR + case VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR: + cdt->type = KOPPER_X11; + break; +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + case VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR: + cdt->type = KOPPER_WAYLAND; + break; +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + case VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR: + cdt->type = KOPPER_WIN32; + break; +#endif + default: + unreachable("unsupported!"); + } +} + +static VkSurfaceKHR +kopper_CreateSurface(struct zink_screen *screen, struct kopper_displaytarget *cdt) +{ + VkSurfaceKHR surface = VK_NULL_HANDLE; + VkResult error = VK_SUCCESS; + + init_dt_type(cdt); + VkStructureType type = cdt->info.bos.sType; + switch (type) { +#ifdef VK_USE_PLATFORM_XCB_KHR + case VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR: { +#ifdef GLX_USE_APPLE + error = VK_INCOMPLETE; +#else + VkXcbSurfaceCreateInfoKHR *xcb = (VkXcbSurfaceCreateInfoKHR *)&cdt->info.bos; + error = VKSCR(CreateXcbSurfaceKHR)(screen->instance, xcb, NULL, &surface); +#endif + break; + } +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + case VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR: { + VkWaylandSurfaceCreateInfoKHR *wlsci = (VkWaylandSurfaceCreateInfoKHR *)&cdt->info.bos; + error = VKSCR(CreateWaylandSurfaceKHR)(screen->instance, wlsci, NULL, &surface); + break; + } +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + case VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR: { + VkWin32SurfaceCreateInfoKHR *win32 = (VkWin32SurfaceCreateInfoKHR *)&cdt->info.bos; + error = VKSCR(CreateWin32SurfaceKHR)(screen->instance, win32, NULL, &surface); + break; + } +#endif + default: + unreachable("unsupported!"); + } + if (error != VK_SUCCESS) { + return VK_NULL_HANDLE; + } + + VkBool32 supported; + error = VKSCR(GetPhysicalDeviceSurfaceSupportKHR)(screen->pdev, screen->gfx_queue, surface, &supported); + if (!zink_screen_handle_vkresult(screen, error) || !supported) + goto fail; + + unsigned count = 10; + VkPresentModeKHR modes[10]; + error = VKSCR(GetPhysicalDeviceSurfacePresentModesKHR)(screen->pdev, surface, &count, modes); + if (!zink_screen_handle_vkresult(screen, error)) + goto fail; + + for (unsigned i = 0; i < count; i++) { + /* VK_PRESENT_MODE_SHARED_DEMAND_REFRESH_KHR and VK_PRESENT_MODE_SHARED_CONTINUOUS_REFRESH_KHR + * are not handled + */ + assert(modes[i] <= VK_PRESENT_MODE_FIFO_RELAXED_KHR); + if (modes[i] <= VK_PRESENT_MODE_FIFO_RELAXED_KHR) + cdt->present_modes |= BITFIELD_BIT(modes[i]); + } + + zink_kopper_set_present_mode_for_interval(cdt, cdt->info.initial_swap_interval); + + return surface; +fail: + VKSCR(DestroySurfaceKHR)(screen->instance, surface, NULL); + return VK_NULL_HANDLE; +} + +static void +destroy_swapchain(struct zink_screen *screen, struct kopper_swapchain *cswap) +{ + if (!cswap) + return; + util_queue_fence_destroy(&cswap->present_fence); + for (unsigned i = 0; i < cswap->num_images; i++) { + simple_mtx_lock(&screen->semaphores_lock); + util_dynarray_append(&screen->semaphores, VkSemaphore, cswap->images[i].acquire); + simple_mtx_unlock(&screen->semaphores_lock); + pipe_resource_reference(&cswap->images[i].readback, NULL); + } + free(cswap->images); + hash_table_foreach(cswap->presents, he) { + struct util_dynarray *arr = he->data; + simple_mtx_lock(&screen->semaphores_lock); + util_dynarray_append_dynarray(&screen->semaphores, arr); + simple_mtx_unlock(&screen->semaphores_lock); + util_dynarray_fini(arr); + free(arr); + } + _mesa_hash_table_destroy(cswap->presents, NULL); + VKSCR(DestroySwapchainKHR)(screen->dev, cswap->swapchain, NULL); + free(cswap); +} + +static void +prune_old_swapchains(struct zink_screen *screen, struct kopper_displaytarget *cdt, bool wait) +{ + while (cdt->old_swapchain) { + struct kopper_swapchain *cswap = cdt->old_swapchain; + if (cswap->async_presents) { + if (wait) + continue; + return; + } + struct zink_batch_usage *u = cswap->batch_uses; + if (!zink_screen_usage_check_completion(screen, u)) { + /* these can't ever be pruned */ + if (!wait || zink_batch_usage_is_unflushed(u)) + return; + + zink_screen_timeline_wait(screen, u->usage, UINT64_MAX); + cswap->batch_uses = NULL; + } + cdt->old_swapchain = cswap->next; + destroy_swapchain(screen, cswap); + } +} + +static struct hash_entry * +find_dt_entry(struct zink_screen *screen, const struct kopper_displaytarget *cdt) +{ + struct hash_entry *he = NULL; + switch (cdt->type) { +#ifdef VK_USE_PLATFORM_XCB_KHR + case KOPPER_X11: { + VkXcbSurfaceCreateInfoKHR *xcb = (VkXcbSurfaceCreateInfoKHR *)&cdt->info.bos; + he = _mesa_hash_table_search_pre_hashed(&screen->dts, xcb->window, (void*)(uintptr_t)xcb->window); + break; + } +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + case KOPPER_WAYLAND: { + VkWaylandSurfaceCreateInfoKHR *wlsci = (VkWaylandSurfaceCreateInfoKHR *)&cdt->info.bos; + he = _mesa_hash_table_search(&screen->dts, wlsci->surface); + break; + } +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + case KOPPER_WIN32: { + VkWin32SurfaceCreateInfoKHR *win32 = (VkWin32SurfaceCreateInfoKHR *)&cdt->info.bos; + he = _mesa_hash_table_search(&screen->dts, win32->hwnd); + break; + } +#endif + default: + unreachable("unsupported!"); + } + return he; +} + +void +zink_kopper_deinit_displaytarget(struct zink_screen *screen, struct kopper_displaytarget *cdt) +{ + if (!cdt->surface) + return; + simple_mtx_lock(&screen->dt_lock); + struct hash_entry *he = find_dt_entry(screen, cdt); + assert(he); + /* this deinits the registered entry, which should always be the "right" entry */ + cdt = he->data; + _mesa_hash_table_remove(&screen->dts, he); + simple_mtx_unlock(&screen->dt_lock); + destroy_swapchain(screen, cdt->swapchain); + prune_old_swapchains(screen, cdt, true); + VKSCR(DestroySurfaceKHR)(screen->instance, cdt->surface, NULL); + cdt->swapchain = cdt->old_swapchain = NULL; + cdt->surface = VK_NULL_HANDLE; +} + +static struct kopper_swapchain * +kopper_CreateSwapchain(struct zink_screen *screen, struct kopper_displaytarget *cdt, unsigned w, unsigned h, VkResult *result) +{ + VkResult error = VK_SUCCESS; + struct kopper_swapchain *cswap = CALLOC_STRUCT(kopper_swapchain); + if (!cswap) { + *result = VK_ERROR_OUT_OF_HOST_MEMORY; + return NULL; + } + cswap->last_present_prune = 1; + util_queue_fence_init(&cswap->present_fence); + + bool has_alpha = cdt->info.has_alpha && (cdt->caps.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR); + if (cdt->swapchain) { + cswap->scci = cdt->swapchain->scci; + /* avoid UAF if async present needs to-be-retired swapchain */ + if (cdt->type == KOPPER_WAYLAND && cdt->swapchain->swapchain) + util_queue_fence_wait(&cdt->swapchain->present_fence); + cswap->scci.oldSwapchain = cdt->swapchain->swapchain; + } else { + cswap->scci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; + cswap->scci.pNext = NULL; + cswap->scci.surface = cdt->surface; + cswap->scci.flags = zink_kopper_has_srgb(cdt) ? VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR : 0; + cswap->scci.imageFormat = cdt->formats[0]; + cswap->scci.imageColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; + // TODO: This is where you'd hook up stereo + cswap->scci.imageArrayLayers = 1; + cswap->scci.imageUsage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + if (cdt->caps.supportedUsageFlags & VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) + cswap->scci.imageUsage |= VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + cswap->scci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + cswap->scci.queueFamilyIndexCount = 0; + cswap->scci.pQueueFamilyIndices = NULL; + cswap->scci.compositeAlpha = has_alpha ? VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR : VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + cswap->scci.clipped = VK_TRUE; + } + cswap->scci.presentMode = cdt->present_mode; + cswap->scci.minImageCount = cdt->caps.minImageCount; + cswap->scci.preTransform = cdt->caps.currentTransform; + if (cdt->formats[1]) + cswap->scci.pNext = &cdt->format_list; + + /* different display platforms have, by vulkan spec, different sizing methodologies */ + switch (cdt->type) { + case KOPPER_X11: + case KOPPER_WIN32: + /* With Xcb, minImageExtent, maxImageExtent, and currentExtent must always equal the window size. + * ... + * Due to above restrictions, it is only possible to create a new swapchain on this + * platform with imageExtent being equal to the current size of the window. + */ + cswap->scci.imageExtent.width = cdt->caps.currentExtent.width; + cswap->scci.imageExtent.height = cdt->caps.currentExtent.height; + break; + case KOPPER_WAYLAND: + /* On Wayland, currentExtent is the special value (0xFFFFFFFF, 0xFFFFFFFF), indicating that the + * surface size will be determined by the extent of a swapchain targeting the surface. Whatever the + * application sets a swapchain’s imageExtent to will be the size of the window, after the first image is + * presented. + */ + cswap->scci.imageExtent.width = w; + cswap->scci.imageExtent.height = h; + break; + default: + unreachable("unknown display platform"); + } + + error = VKSCR(CreateSwapchainKHR)(screen->dev, &cswap->scci, NULL, + &cswap->swapchain); + if (error == VK_ERROR_NATIVE_WINDOW_IN_USE_KHR) { + if (util_queue_is_initialized(&screen->flush_queue)) + util_queue_finish(&screen->flush_queue); + simple_mtx_lock(&screen->queue_lock); + VkResult wait_result = VKSCR(QueueWaitIdle)(screen->queue); + simple_mtx_unlock(&screen->queue_lock); + if (wait_result != VK_SUCCESS) + mesa_loge("ZINK: vkQueueWaitIdle failed (%s)", vk_Result_to_str(wait_result)); + error = VKSCR(CreateSwapchainKHR)(screen->dev, &cswap->scci, NULL, + &cswap->swapchain); + } + if (error != VK_SUCCESS) { + mesa_loge("CreateSwapchainKHR failed with %s\n", vk_Result_to_str(error)); + free(cswap); + *result = error; + return NULL; + } + cswap->last_present = UINT32_MAX; + + *result = VK_SUCCESS; + return cswap; +} + +static VkResult +kopper_GetSwapchainImages(struct zink_screen *screen, struct kopper_swapchain *cswap) +{ + VkResult error = VKSCR(GetSwapchainImagesKHR)(screen->dev, cswap->swapchain, &cswap->num_images, NULL); + zink_screen_handle_vkresult(screen, error); + if (error != VK_SUCCESS) + return error; + cswap->images = calloc(cswap->num_images, sizeof(struct kopper_swapchain_image)); + if (!cswap->images) { + mesa_loge("ZINK: failed to allocate cswap->images!"); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + cswap->presents = _mesa_hash_table_create_u32_keys(NULL); + VkImage images[32]; + error = VKSCR(GetSwapchainImagesKHR)(screen->dev, cswap->swapchain, &cswap->num_images, images); + assert(cswap->num_images <= ARRAY_SIZE(images)); + if (zink_screen_handle_vkresult(screen, error)) { + for (unsigned i = 0; i < cswap->num_images; i++) + cswap->images[i].image = images[i]; + } + cswap->max_acquires = cswap->num_images - cswap->scci.minImageCount + 1; + return error; +} + +static VkResult +update_caps(struct zink_screen *screen, struct kopper_displaytarget *cdt) +{ + VkResult error = VKSCR(GetPhysicalDeviceSurfaceCapabilitiesKHR)(screen->pdev, cdt->surface, &cdt->caps); + zink_screen_handle_vkresult(screen, error); + return error; +} + +static VkResult +update_swapchain(struct zink_screen *screen, struct kopper_displaytarget *cdt, unsigned w, unsigned h) +{ + VkResult error = update_caps(screen, cdt); + if (error != VK_SUCCESS) + return error; + struct kopper_swapchain *cswap = kopper_CreateSwapchain(screen, cdt, w, h, &error); + if (!cswap) + return error; + prune_old_swapchains(screen, cdt, false); + struct kopper_swapchain **pswap = &cdt->old_swapchain; + while (*pswap) + *pswap = (*pswap)->next; + *pswap = cdt->swapchain; + cdt->swapchain = cswap; + + return kopper_GetSwapchainImages(screen, cdt->swapchain); +} + +struct kopper_displaytarget * +zink_kopper_displaytarget_create(struct zink_screen *screen, unsigned tex_usage, + enum pipe_format format, unsigned width, + unsigned height, unsigned alignment, + const void *loader_private, unsigned *stride) +{ + struct kopper_displaytarget *cdt; + const struct kopper_loader_info *info = loader_private; + + { + struct kopper_displaytarget k; + struct hash_entry *he = NULL; + k.info = *info; + init_dt_type(&k); + simple_mtx_lock(&screen->dt_lock); + if (unlikely(!screen->dts.table)) { + switch (k.type) { + case KOPPER_X11: + _mesa_hash_table_init(&screen->dts, screen, NULL, _mesa_key_pointer_equal); + break; + case KOPPER_WAYLAND: + case KOPPER_WIN32: + _mesa_hash_table_init(&screen->dts, screen, _mesa_hash_pointer, _mesa_key_pointer_equal); + break; + default: + unreachable("unknown kopper type"); + } + } else { + he = find_dt_entry(screen, &k); + } + simple_mtx_unlock(&screen->dt_lock); + if (he) { + cdt = he->data; + p_atomic_inc(&cdt->refcount); + *stride = cdt->stride; + return cdt; + } + } + + cdt = CALLOC_STRUCT(kopper_displaytarget); + if (!cdt) + return NULL; + + cdt->refcount = 1; + cdt->loader_private = (void*)loader_private; + cdt->info = *info; + + enum pipe_format srgb = PIPE_FORMAT_NONE; + if (screen->info.have_KHR_swapchain_mutable_format) { + srgb = util_format_is_srgb(format) ? util_format_linear(format) : util_format_srgb(format); + /* why do these helpers have different default return values? */ + if (srgb == format) + srgb = PIPE_FORMAT_NONE; + } + cdt->formats[0] = zink_get_format(screen, format); + if (srgb) { + cdt->format_list.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO; + cdt->format_list.pNext = NULL; + cdt->format_list.viewFormatCount = 2; + cdt->format_list.pViewFormats = cdt->formats; + + cdt->formats[1] = zink_get_format(screen, srgb); + } + + cdt->surface = kopper_CreateSurface(screen, cdt); + if (!cdt->surface) + goto out; + + if (update_swapchain(screen, cdt, width, height) != VK_SUCCESS) + goto out; + + simple_mtx_lock(&screen->dt_lock); + switch (cdt->type) { +#ifdef VK_USE_PLATFORM_XCB_KHR + case KOPPER_X11: { + VkXcbSurfaceCreateInfoKHR *xcb = (VkXcbSurfaceCreateInfoKHR *)&cdt->info.bos; + _mesa_hash_table_insert_pre_hashed(&screen->dts, xcb->window, (void*)(uintptr_t)xcb->window, cdt); + break; + } +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + case KOPPER_WAYLAND: { + VkWaylandSurfaceCreateInfoKHR *wlsci = (VkWaylandSurfaceCreateInfoKHR *)&cdt->info.bos; + _mesa_hash_table_insert(&screen->dts, wlsci->surface, cdt); + break; + } +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + case KOPPER_WIN32: { + VkWin32SurfaceCreateInfoKHR *win32 = (VkWin32SurfaceCreateInfoKHR *)&cdt->info.bos; + _mesa_hash_table_insert(&screen->dts, win32->hwnd, cdt); + break; + } +#endif + default: + unreachable("unsupported!"); + } + simple_mtx_unlock(&screen->dt_lock); + + *stride = cdt->stride; + return cdt; + +//moar cleanup +out: + FREE(cdt); + return NULL; +} + +void +zink_kopper_displaytarget_destroy(struct zink_screen *screen, struct kopper_displaytarget *cdt) +{ + if (!p_atomic_dec_zero(&cdt->refcount)) + return; + zink_kopper_deinit_displaytarget(screen, cdt); + FREE(cdt); +} + +static VkResult +kopper_acquire(struct zink_screen *screen, struct zink_resource *res, uint64_t timeout) +{ + struct kopper_displaytarget *cdt = res->obj->dt; + + /* if: + * - we don't need a new image + * - we have a swapchain image + * - that image is either acquired or acquiring + * + * then this is a no-op + */ + if (!res->obj->new_dt && res->obj->dt_idx != UINT32_MAX && + (cdt->swapchain->images[res->obj->dt_idx].acquire || cdt->swapchain->images[res->obj->dt_idx].acquired)) + return VK_SUCCESS; + VkSemaphore acquire = VK_NULL_HANDLE; + + while (true) { + if (res->obj->new_dt) { + VkResult error = update_swapchain(screen, cdt, res->base.b.width0, res->base.b.height0); + zink_screen_handle_vkresult(screen, error); + if (error != VK_SUCCESS) + return error; + res->obj->new_dt = false; + res->layout = VK_IMAGE_LAYOUT_UNDEFINED; + res->obj->access = 0; + res->obj->access_stage = 0; + } + if (timeout == UINT64_MAX && util_queue_is_initialized(&screen->flush_queue) && + p_atomic_read_relaxed(&cdt->swapchain->num_acquires) >= cdt->swapchain->max_acquires) { + util_queue_fence_wait(&cdt->swapchain->present_fence); + /* With a sequence of + glDrawBuffer(GL_FRONT_AND_BACK); + glClearBufferfv(GL_COLOR, 0, purple); + glReadBuffer(GL_FRONT); + glReadPIxels(...); + kopper_present is never called, but with glReadPIxels the pipeline + is flushed, and since we draw to the front- and the backbuffer, two + swapchain images are acquired one after the other. Because with + that we possibly acquire too many images at once and when using + "timeout == UINT64_MAX" forwad progress of vkAcquireNextImageKHR + can no longer be guaranteed, i.e. the call may block indefinitely; + VUID-vkAcquireNextImageKHR-surface-07783 is raised to warn + about exceeding the limit for acquires. + + So let's check whether the number of acquired images is still too + large after the fence was signalled, and if so then clear the timeout. + */ + if (p_atomic_read_relaxed(&cdt->swapchain->num_acquires) >= cdt->swapchain->max_acquires) + timeout = 0; + } + VkResult ret; + if (!acquire) { + acquire = zink_create_semaphore(screen); + assert(acquire); + if (!acquire) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + ret = VKSCR(AcquireNextImageKHR)(screen->dev, cdt->swapchain->swapchain, timeout, acquire, VK_NULL_HANDLE, &res->obj->dt_idx); + if (ret != VK_SUCCESS && ret != VK_SUBOPTIMAL_KHR) { + if (ret == VK_ERROR_OUT_OF_DATE_KHR) { + res->obj->new_dt = true; + continue; + } + if (ret == VK_NOT_READY || ret == VK_TIMEOUT) { + if (timeout > 1000000) + unreachable("kopper_acquire: updated timeout after failure has become unreasonable large"); + timeout += 4000; + continue; + } + VKSCR(DestroySemaphore)(screen->dev, acquire, NULL); + return ret; + } + break; + } + + cdt->swapchain->images[res->obj->dt_idx].acquire = acquire; + if (cdt->swapchain->images[res->obj->dt_idx].readback) + zink_resource(cdt->swapchain->images[res->obj->dt_idx].readback)->valid = false; + res->obj->image = cdt->swapchain->images[res->obj->dt_idx].image; + if (!cdt->age_locked) + zink_kopper_update_last_written(res); + cdt->swapchain->images[res->obj->dt_idx].acquired = NULL; + if (!cdt->swapchain->images[res->obj->dt_idx].init) { + /* swapchain images are initially in the UNDEFINED layout */ + res->layout = VK_IMAGE_LAYOUT_UNDEFINED; + cdt->swapchain->images[res->obj->dt_idx].init = true; + } + if (timeout == UINT64_MAX) { + res->obj->indefinite_acquire = true; + p_atomic_inc(&cdt->swapchain->num_acquires); + } + cdt->swapchain->images[res->obj->dt_idx].dt_has_data = false; + return VK_SUCCESS; +} + +static void +kill_swapchain(struct zink_context *ctx, struct zink_resource *res) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + /* dead swapchain */ + mesa_loge("zink: swapchain killed %p\n", res); + zink_batch_reference_resource(&ctx->batch, res); + struct pipe_resource *pres = screen->base.resource_create(&screen->base, &res->base.b); + zink_resource_object_reference(screen, &res->obj, zink_resource(pres)->obj); + res->layout = VK_IMAGE_LAYOUT_UNDEFINED; + res->swapchain = false; + pipe_resource_reference(&pres, NULL); +} + +static bool +is_swapchain_kill(VkResult ret) +{ + return ret != VK_SUCCESS && + ret != VK_TIMEOUT && + ret != VK_NOT_READY && + ret != VK_SUBOPTIMAL_KHR; +} + +bool +zink_kopper_acquire(struct zink_context *ctx, struct zink_resource *res, uint64_t timeout) +{ + assert(zink_is_swapchain(res)); + struct kopper_displaytarget *cdt = res->obj->dt; + if (!cdt) + /* dead swapchain */ + return false; + if (cdt->is_kill) { + kill_swapchain(ctx, res); + return false; + } + const struct kopper_swapchain *cswap = cdt->swapchain; + res->obj->new_dt |= res->base.b.width0 != cswap->scci.imageExtent.width || + res->base.b.height0 != cswap->scci.imageExtent.height; + VkResult ret = kopper_acquire(zink_screen(trace_screen_unwrap(ctx->base.screen)), res, timeout); + if (ret == VK_SUCCESS || ret == VK_SUBOPTIMAL_KHR) { + if (cswap != cdt->swapchain) { + ctx->swapchain_size = cdt->swapchain->scci.imageExtent; + res->base.b.width0 = ctx->swapchain_size.width; + res->base.b.height0 = ctx->swapchain_size.height; + } + } else if (is_swapchain_kill(ret)) { + kill_swapchain(ctx, res); + } + bool is_kill = is_swapchain_kill(ret); + zink_batch_usage_set(&cdt->swapchain->batch_uses, ctx->batch.state); + return !is_kill; +} + +VkSemaphore +zink_kopper_acquire_submit(struct zink_screen *screen, struct zink_resource *res) +{ + assert(res->obj->dt); + struct kopper_displaytarget *cdt = res->obj->dt; + assert(res->obj->dt_idx != UINT32_MAX); + if (cdt->swapchain->images[res->obj->dt_idx].dt_has_data) + return VK_NULL_HANDLE; + assert(res->obj->dt_idx != UINT32_MAX); + if (cdt->swapchain->images[res->obj->dt_idx].acquired) { + assert(!cdt->swapchain->images[res->obj->dt_idx].acquire); + return VK_NULL_HANDLE; + } + assert(cdt->swapchain->images[res->obj->dt_idx].acquire); + cdt->swapchain->images[res->obj->dt_idx].acquired = res; + /* this is now owned by the batch */ + VkSemaphore acquire = cdt->swapchain->images[res->obj->dt_idx].acquire; + cdt->swapchain->images[res->obj->dt_idx].acquire = VK_NULL_HANDLE; + cdt->swapchain->images[res->obj->dt_idx].dt_has_data = true; + return acquire; +} + +VkSemaphore +zink_kopper_present(struct zink_screen *screen, struct zink_resource *res) +{ + assert(res->obj->dt); + assert(!res->obj->present); + assert(zink_kopper_acquired(res->obj->dt, res->obj->dt_idx)); + res->obj->present = zink_create_semaphore(screen); + return res->obj->present; +} + +static void +kopper_present(void *data, void *gdata, int thread_idx) +{ + struct zink_kopper_present_info *cpi = data; + struct kopper_displaytarget *cdt = cpi->res->obj->dt; + struct kopper_swapchain *swapchain = cpi->swapchain; + struct zink_screen *screen = gdata; + VkResult error = VK_SUCCESS; + cpi->info.pResults = &error; + + simple_mtx_lock(&screen->queue_lock); + if (screen->driver_workarounds.implicit_sync && cdt->type != KOPPER_WIN32) { + if (!screen->fence) { + VkFenceCreateInfo fci = {0}; + fci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + VKSCR(CreateFence)(screen->dev, &fci, NULL, &screen->fence); + } + VKSCR(ResetFences)(screen->dev, 1, &screen->fence); + VkSubmitInfo si = {0}; + si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + si.waitSemaphoreCount = 1; + si.pWaitSemaphores = cpi->info.pWaitSemaphores; + VkPipelineStageFlags stages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + si.pWaitDstStageMask = &stages; + + error = VKSCR(QueueSubmit)(screen->queue, 1, &si, screen->fence); + if (!zink_screen_handle_vkresult(screen, error)) { + simple_mtx_unlock(&screen->queue_lock); + VKSCR(DestroySemaphore)(screen->dev, cpi->sem, NULL); + goto out; + } + error = VKSCR(WaitForFences)(screen->dev, 1, &screen->fence, VK_TRUE, UINT64_MAX); + if (!zink_screen_handle_vkresult(screen, error)) { + simple_mtx_unlock(&screen->queue_lock); + VKSCR(DestroySemaphore)(screen->dev, cpi->sem, NULL); + goto out; + } + cpi->info.pWaitSemaphores = NULL; + cpi->info.waitSemaphoreCount = 0; + } + VkResult error2 = VKSCR(QueuePresentKHR)(screen->queue, &cpi->info); + zink_screen_debug_marker_end(screen, screen->frame_marker_emitted); + zink_screen_debug_marker_begin(screen, "frame"); + simple_mtx_unlock(&screen->queue_lock); + swapchain->last_present = cpi->image; + if (cpi->indefinite_acquire) + p_atomic_dec(&swapchain->num_acquires); + if (error2 == VK_SUBOPTIMAL_KHR && cdt->swapchain == swapchain) + cpi->res->obj->new_dt = true; + + /* it's illegal to destroy semaphores if they're in use by a cmdbuf. + * but what does "in use" actually mean? + * in truth, when using timelines, nobody knows. especially not VVL. + * + * thus, to avoid infinite error spam and thread-related races, + * present semaphores need their own free queue based on the + * last-known completed timeline id so that the semaphore persists through + * normal cmdbuf submit/signal and then also exists here when it's needed for the present operation + */ + struct util_dynarray *arr; + for (; screen->last_finished && swapchain->last_present_prune != screen->last_finished; swapchain->last_present_prune++) { + struct hash_entry *he = _mesa_hash_table_search(swapchain->presents, + (void*)(uintptr_t)swapchain->last_present_prune); + if (he) { + arr = he->data; + simple_mtx_lock(&screen->semaphores_lock); + util_dynarray_append_dynarray(&screen->semaphores, arr); + simple_mtx_unlock(&screen->semaphores_lock); + util_dynarray_fini(arr); + free(arr); + _mesa_hash_table_remove(swapchain->presents, he); + } + } + /* queue this wait semaphore for deletion on completion of the next batch */ + assert(screen->curr_batch > 0); + uint32_t next = (uint32_t)screen->curr_batch + 1; + /* handle overflow */ + next = MAX2(next + 1, 1); + struct hash_entry *he = _mesa_hash_table_search(swapchain->presents, (void*)(uintptr_t)next); + if (he) + arr = he->data; + else { + arr = malloc(sizeof(struct util_dynarray)); + if (!arr) { + mesa_loge("ZINK: failed to allocate arr!"); + return; + } + + util_dynarray_init(arr, NULL); + _mesa_hash_table_insert(swapchain->presents, (void*)(uintptr_t)next, arr); + } + util_dynarray_append(arr, VkSemaphore, cpi->sem); +out: + if (thread_idx != -1) { + p_atomic_dec(&swapchain->async_presents); + struct pipe_resource *pres = &cpi->res->base.b; + pipe_resource_reference(&pres, NULL); + } + slab_free_st(&screen->present_mempool, cpi); +} + +void +zink_kopper_present_queue(struct zink_screen *screen, struct zink_resource *res, unsigned nrects, struct pipe_box *boxes) +{ + assert(res->obj->dt); + struct kopper_displaytarget *cdt = res->obj->dt; + assert(zink_kopper_acquired(res->obj->dt, res->obj->dt_idx)); + assert(res->obj->present); + + /* always try to prune if the current swapchain has seen presents */ + if (cdt->swapchain->last_present != UINT32_MAX) + prune_old_swapchains(screen, cdt, false); + + struct zink_kopper_present_info *cpi = slab_alloc_st(&screen->present_mempool); + if (!cpi) { + mesa_loge("ZINK: failed to allocate cpi!"); + return; + } + + cpi->sem = res->obj->present; + cpi->res = res; + cpi->swapchain = cdt->swapchain; + cpi->indefinite_acquire = res->obj->indefinite_acquire; + cpi->image = res->obj->dt_idx; + cpi->info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + cpi->info.pNext = NULL; + cpi->info.waitSemaphoreCount = 1; + cpi->info.pWaitSemaphores = &cpi->sem; + cpi->info.swapchainCount = 1; + cpi->info.pSwapchains = &cdt->swapchain->swapchain; + cpi->info.pImageIndices = &cpi->image; + cpi->info.pResults = NULL; + res->obj->present = VK_NULL_HANDLE; + if (nrects) { + cpi->rinfo.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR; + cpi->rinfo.pNext = NULL; + cpi->rinfo.swapchainCount = 1; + cpi->rinfo.pRegions = &cpi->region; + cpi->region.rectangleCount = nrects; + cpi->region.pRectangles = cpi->regions; + for (unsigned i = 0; i < nrects; i++) { + cpi->regions[i].offset.x = boxes[i].x; + /* + 2) Where is the origin of the VkRectLayerKHR? + + RESOLVED: The upper left corner of the presentable image(s) of the swapchain, per the definition of framebuffer coordinates. + */ + cpi->regions[i].offset.y = cdt->swapchain->scci.imageExtent.height - boxes[i].y - boxes[i].height; + cpi->regions[i].extent.width = boxes[i].width; + cpi->regions[i].extent.height = boxes[i].height; + cpi->regions[i].extent.width = MIN2(cpi->regions[i].extent.width, cpi->swapchain->scci.imageExtent.width - cpi->regions[i].offset.x); + cpi->regions[i].extent.height = MIN2(cpi->regions[i].extent.height, cpi->swapchain->scci.imageExtent.height - cpi->regions[i].offset.y); + cpi->regions[i].layer = boxes[i].z; + } + cpi->info.pNext = &cpi->rinfo; + } + /* Ex GLX_EXT_buffer_age: + * + * Buffers' ages are initialized to 0 at buffer creation time. + * When a frame boundary is reached, the following occurs before + * any exchanging or copying of color buffers: + * + * * The current back buffer's age is set to 1. + * * Any other color buffers' ages are incremented by 1 if + * their age was previously greater than 0. + */ + if (!cdt->age_locked) { + for (int i = 0; i < cdt->swapchain->num_images; i++) { + if (i == res->obj->dt_idx) + cdt->swapchain->images[i].age = 1; + else if (cdt->swapchain->images[i].age > 0) + cdt->swapchain->images[i].age += 1; + } + } + if (util_queue_is_initialized(&screen->flush_queue)) { + p_atomic_inc(&cpi->swapchain->async_presents); + struct pipe_resource *pres = NULL; + pipe_resource_reference(&pres, &res->base.b); + util_queue_add_job(&screen->flush_queue, cpi, &cdt->swapchain->present_fence, + kopper_present, NULL, 0); + } else { + kopper_present(cpi, screen, -1); + } + res->obj->indefinite_acquire = false; + cdt->swapchain->images[res->obj->dt_idx].acquired = NULL; + res->obj->dt_idx = UINT32_MAX; +} + +void +zink_kopper_update_last_written(struct zink_resource *res) +{ + res->obj->last_dt_idx = res->obj->dt_idx; +} + +void +zink_kopper_set_readback_needs_update(struct zink_resource *res) +{ + struct kopper_displaytarget *cdt = res->obj->dt; + struct kopper_swapchain *cswap = cdt->swapchain; + cswap->images[res->obj->dt_idx].readback_needs_update = true; +} + +static bool +kopper_ensure_readback(struct zink_screen *screen, struct zink_resource *res) +{ + struct kopper_displaytarget *cdt = res->obj->dt; + struct kopper_swapchain *cswap = cdt->swapchain; + + for (unsigned i = 0; i < cswap->num_images; i++) { + if (cswap->images[i].readback) + return false; + struct pipe_resource templ = res->base.b; + templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + cswap->images[i].readback = screen->base.resource_create(&screen->base, &templ); + } + return true; +} + +bool +zink_kopper_acquire_readback(struct zink_context *ctx, struct zink_resource *res, struct zink_resource **readback) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + assert(res->obj->dt); + struct kopper_displaytarget *cdt = res->obj->dt; + const struct kopper_swapchain *cswap = cdt->swapchain; + uint32_t last_dt_idx = res->obj->last_dt_idx; + VkResult ret = VK_SUCCESS; + + if (++cdt->readback_counter >= ZINK_READBACK_THRESHOLD) { + if (kopper_ensure_readback(screen, res) && + res->obj->dt_idx != UINT32_MAX && cswap->images[res->obj->dt_idx].readback_needs_update) + zink_kopper_readback_update(ctx, res); + } + /* if this hasn't been presented or if it has data, use this as the readback target */ + if (res->obj->last_dt_idx == UINT32_MAX || + (res->obj->dt_idx != UINT32_MAX && cdt->swapchain->images[res->obj->dt_idx].age)) { + *readback = res; + return false; + } + if (cswap->images[last_dt_idx].acquired) { + struct zink_resource *rb = cswap->images[last_dt_idx].acquired; + *readback = rb; + return false; + } + if (cswap->images[last_dt_idx].readback) { + struct zink_resource *rb = zink_resource(cswap->images[res->obj->last_dt_idx].readback); + if (!cswap->images[last_dt_idx].readback_needs_update) { + *readback = rb; + return false; + } + } + while (res->obj->dt_idx != last_dt_idx) { + cdt->age_locked = true; + if (res->obj->dt_idx != UINT32_MAX && !zink_kopper_present_readback(ctx, res)) + break; + cdt->age_locked = true; + do { + ret = kopper_acquire(screen, res, 0); + } while (!is_swapchain_kill(ret) && (ret == VK_NOT_READY || ret == VK_TIMEOUT)); + if (is_swapchain_kill(ret)) { + kill_swapchain(ctx, res); + *readback = NULL; + cdt->age_locked = false; + return false; + } + } + if (cswap != cdt->swapchain) { + ctx->swapchain_size = cdt->swapchain->scci.imageExtent; + res->base.b.width0 = ctx->swapchain_size.width; + res->base.b.height0 = ctx->swapchain_size.height; + } + zink_batch_usage_set(&cdt->swapchain->batch_uses, ctx->batch.state); + *readback = res; + return true; +} + +bool +zink_kopper_present_readback(struct zink_context *ctx, struct zink_resource *res) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + VkSubmitInfo si = {0}; + assert(zink_is_swapchain(res)); + if (res->obj->last_dt_idx == UINT32_MAX) + return true; + if (res->layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR) { + zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 0, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + ctx->base.flush(&ctx->base, NULL, 0); + } + si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + si.signalSemaphoreCount = 1; + VkPipelineStageFlags mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + si.pWaitDstStageMask = &mask; + VkSemaphore acquire = zink_kopper_acquire_submit(screen, res); + VkSemaphore present = res->obj->present ? res->obj->present : zink_kopper_present(screen, res); + if (screen->threaded_submit) + util_queue_finish(&screen->flush_queue); + si.waitSemaphoreCount = !!acquire; + si.pWaitSemaphores = &acquire; + si.pSignalSemaphores = &present; + simple_mtx_lock(&screen->queue_lock); + VkResult error = VKSCR(QueueSubmit)(screen->queue, 1, &si, VK_NULL_HANDLE); + simple_mtx_unlock(&screen->queue_lock); + if (!zink_screen_handle_vkresult(screen, error)) + return false; + + zink_kopper_present_queue(screen, res, 0, NULL); + if (util_queue_is_initialized(&screen->flush_queue)) { + struct kopper_displaytarget *cdt = res->obj->dt; + util_queue_fence_wait(&cdt->swapchain->present_fence); + } + + simple_mtx_lock(&screen->queue_lock); + error = VKSCR(QueueWaitIdle)(screen->queue); + simple_mtx_unlock(&screen->queue_lock); + + simple_mtx_lock(&screen->semaphores_lock); + util_dynarray_append(&screen->semaphores, VkSemaphore, acquire); + simple_mtx_unlock(&screen->semaphores_lock); + + struct kopper_displaytarget *cdt = res->obj->dt; + cdt->age_locked = false; + + return zink_screen_handle_vkresult(screen, error); +} + +void +zink_kopper_readback_update(struct zink_context *ctx, struct zink_resource *res) +{ + assert(res->obj->dt); + struct kopper_displaytarget *cdt = res->obj->dt; + struct kopper_swapchain *cswap = cdt->swapchain; + assert(res->obj->dt_idx != UINT32_MAX); + struct pipe_resource *readback = cswap->images[res->obj->dt_idx].readback; + struct pipe_box box; + u_box_3d(0, 0, 0, res->base.b.width0, res->base.b.height0, res->base.b.depth0, &box); + + if (cswap->images[res->obj->dt_idx].readback_needs_update && readback) + ctx->base.resource_copy_region(&ctx->base, readback, 0, 0, 0, 0, &res->base.b, 0, &box); + cswap->images[res->obj->dt_idx].readback_needs_update = false; +} + +bool +zink_kopper_update(struct pipe_screen *pscreen, struct pipe_resource *pres, int *w, int *h) +{ + struct zink_resource *res = zink_resource(pres); + struct zink_screen *screen = zink_screen(pscreen); + if (!res->obj->dt) + return false; + struct kopper_displaytarget *cdt = res->obj->dt; + if (cdt->type != KOPPER_X11) { + *w = res->base.b.width0; + *h = res->base.b.height0; + return true; + } + VkResult ret = update_caps(screen, cdt); + if (ret != VK_SUCCESS) { + mesa_loge("zink: failed to update swapchain capabilities: %s", vk_Result_to_str(ret)); + cdt->is_kill = true; + return false; + } + *w = cdt->caps.currentExtent.width; + *h = cdt->caps.currentExtent.height; + return true; +} + +bool +zink_kopper_is_cpu(const struct pipe_screen *pscreen) +{ + const struct zink_screen *screen = (const struct zink_screen*)pscreen; + return screen->is_cpu; +} + +void +zink_kopper_fixup_depth_buffer(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (!ctx->fb_state.zsbuf) + return; + + assert(ctx->fb_state.zsbuf->texture->bind & PIPE_BIND_DISPLAY_TARGET); + + struct zink_resource *res = zink_resource(ctx->fb_state.zsbuf->texture); + struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf); + struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)ctx->fb_state.zsbuf; + if (surf->info.width == ctx->fb_state.width && + surf->info.height == ctx->fb_state.height) + return; + + struct pipe_resource templ = *ctx->fb_state.zsbuf->texture; + templ.width0 = ctx->fb_state.width; + templ.height0 = ctx->fb_state.height; + struct pipe_resource *pz = screen->base.resource_create(&screen->base, &templ); + struct zink_resource *z = zink_resource(pz); + zink_resource_object_reference(screen, &res->obj, z->obj); + res->base.b.width0 = ctx->fb_state.width; + res->base.b.height0 = ctx->fb_state.height; + pipe_resource_reference(&pz, NULL); + + ctx->fb_state.zsbuf->width = ctx->fb_state.width; + ctx->fb_state.zsbuf->height = ctx->fb_state.height; + struct pipe_surface *psurf = ctx->base.create_surface(&ctx->base, &res->base.b, ctx->fb_state.zsbuf); + struct zink_ctx_surface *cz = (struct zink_ctx_surface*)psurf; + + /* oh god why */ + zink_surface_reference(screen, &csurf->surf, cz->surf); + pipe_surface_release(&ctx->base, &psurf); +} + +bool +zink_kopper_check(struct pipe_resource *pres) +{ + struct zink_resource *res = zink_resource(pres); + assert(pres->bind & PIPE_BIND_DISPLAY_TARGET); + if (!res->obj->dt) + return false; + struct kopper_displaytarget *cdt = res->obj->dt; + return !cdt->is_kill; +} + +void +zink_kopper_set_swap_interval(struct pipe_screen *pscreen, struct pipe_resource *pres, int interval) +{ + struct zink_resource *res = zink_resource(pres); + struct zink_screen *screen = zink_screen(pscreen); + assert(res->obj->dt); + struct kopper_displaytarget *cdt = res->obj->dt; + VkPresentModeKHR old_present_mode = cdt->present_mode; + + zink_kopper_set_present_mode_for_interval(cdt, interval); + + if (old_present_mode != cdt->present_mode) + update_swapchain(screen, cdt, cdt->caps.currentExtent.width, cdt->caps.currentExtent.height); +} + +int +zink_kopper_query_buffer_age(struct pipe_context *pctx, struct pipe_resource *pres) +{ + struct zink_context *ctx = zink_context(pctx); + struct zink_resource *res = zink_resource(pres); + assert(res->obj->dt); + struct kopper_displaytarget *cdt = res->obj->dt; + + ctx = zink_tc_context_unwrap(pctx, zink_screen(pctx->screen)->threaded); + + /* Returning 0 here isn't ideal (yes, the buffer is undefined, because you + * lost it) but threading the error up is more hassle than it's worth. + */ + if (!zink_kopper_acquired(res->obj->dt, res->obj->dt_idx)) + if (!zink_kopper_acquire(ctx, res, UINT64_MAX)) + return 0; + + return cdt->swapchain->images[res->obj->dt_idx].age; +} + +static void +swapchain_prune_batch_usage(struct kopper_swapchain *cswap, const struct zink_batch_usage *u) +{ + if (cswap->batch_uses == u) + cswap->batch_uses = NULL; +} + +void +zink_kopper_prune_batch_usage(struct kopper_displaytarget *cdt, const struct zink_batch_usage *u) +{ + struct kopper_swapchain *cswap = cdt->swapchain; + swapchain_prune_batch_usage(cswap, u); + for (cswap = cdt->old_swapchain; cswap; cswap = cswap->next) + swapchain_prune_batch_usage(cswap, u); +} diff --git a/src/gallium/drivers/zink/zink_kopper.h b/src/gallium/drivers/zink/zink_kopper.h new file mode 100644 index 00000000000..89106975266 --- /dev/null +++ b/src/gallium/drivers/zink/zink_kopper.h @@ -0,0 +1,186 @@ +/* + * Copyright © 2021 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Mike Blumenkrantz <michael.blumenkrantz@gmail.com> + */ + +#ifndef ZINK_KOPPER_H +#define ZINK_KOPPER_H + +#include "kopper_interface.h" +#include "util/u_queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct zink_batch_usage; + +/* number of times a swapchain can be read without forcing readback mode */ +#define ZINK_READBACK_THRESHOLD 3 + +struct kopper_swapchain_image { + bool init; + bool readback_needs_update; + bool dt_has_data; + int age; + VkImage image; + struct zink_resource *acquired; + struct pipe_resource *readback; + VkSemaphore acquire; + VkImageLayout layout; +}; + +struct kopper_swapchain { + struct kopper_swapchain *next; + VkSwapchainKHR swapchain; + + unsigned last_present; + unsigned num_images; + uint32_t last_present_prune; + struct hash_table *presents; + VkSwapchainCreateInfoKHR scci; + unsigned num_acquires; + unsigned max_acquires; + unsigned async_presents; + struct util_queue_fence present_fence; + struct zink_batch_usage *batch_uses; + struct kopper_swapchain_image *images; +}; + +enum kopper_type { + KOPPER_X11, + KOPPER_WAYLAND, + KOPPER_WIN32 +}; + +struct kopper_displaytarget +{ + unsigned refcount; + VkFormat formats[2]; + unsigned width; + unsigned height; + unsigned stride; + void *loader_private; + + VkSurfaceKHR surface; + uint32_t present_modes; //VkPresentModeKHR bitmask + struct kopper_swapchain *swapchain; + struct kopper_swapchain *old_swapchain; + + struct kopper_loader_info info; + + VkSurfaceCapabilitiesKHR caps; + VkImageFormatListCreateInfo format_list; + enum kopper_type type; + bool is_kill; + VkPresentModeKHR present_mode; + unsigned readback_counter; + + bool age_locked; //disables buffer age during readback +}; + +struct zink_kopper_present_info { + VkPresentInfoKHR info; + VkPresentRegionsKHR rinfo; + VkPresentRegionKHR region; + VkRectLayerKHR regions[64]; + uint32_t image; + struct kopper_swapchain *swapchain; + struct zink_resource *res; + VkSemaphore sem; + bool indefinite_acquire; +}; + +struct zink_context; +struct zink_screen; +struct zink_resource; + +static inline bool +zink_kopper_has_srgb(const struct kopper_displaytarget *cdt) +{ + return cdt->formats[1] != VK_FORMAT_UNDEFINED; +} + +static inline bool +zink_kopper_last_present_eq(const struct kopper_displaytarget *cdt, uint32_t idx) +{ + return cdt->swapchain->last_present == idx; +} + +static inline bool +zink_kopper_acquired(const struct kopper_displaytarget *cdt, uint32_t idx) +{ + return idx != UINT32_MAX && cdt->swapchain->images[idx].acquired; +} + +void +zink_kopper_update_last_written(struct zink_resource *res); + +struct kopper_displaytarget * +zink_kopper_displaytarget_create(struct zink_screen *screen, unsigned tex_usage, + enum pipe_format format, unsigned width, + unsigned height, unsigned alignment, + const void *loader_private, unsigned *stride); +void +zink_kopper_displaytarget_destroy(struct zink_screen *screen, struct kopper_displaytarget *cdt); + + +bool +zink_kopper_acquire(struct zink_context *ctx, struct zink_resource *res, uint64_t timeout); +VkSemaphore +zink_kopper_acquire_submit(struct zink_screen *screen, struct zink_resource *res); +VkSemaphore +zink_kopper_present(struct zink_screen *screen, struct zink_resource *res); +void +zink_kopper_present_queue(struct zink_screen *screen, struct zink_resource *res, unsigned nrects, struct pipe_box *boxes); +bool +zink_kopper_acquire_readback(struct zink_context *ctx, struct zink_resource *res, struct zink_resource **readback); +bool +zink_kopper_present_readback(struct zink_context *ctx, struct zink_resource *res); +void +zink_kopper_readback_update(struct zink_context *ctx, struct zink_resource *res); +void +zink_kopper_deinit_displaytarget(struct zink_screen *screen, struct kopper_displaytarget *cdt); +bool +zink_kopper_update(struct pipe_screen *pscreen, struct pipe_resource *pres, int *w, int *h); +bool +zink_kopper_is_cpu(const struct pipe_screen *pscreen); +void +zink_kopper_fixup_depth_buffer(struct zink_context *ctx); +bool +zink_kopper_check(struct pipe_resource *pres); +void +zink_kopper_set_swap_interval(struct pipe_screen *pscreen, struct pipe_resource *pres, int interval); +int +zink_kopper_query_buffer_age(struct pipe_context *pctx, struct pipe_resource *pres); +void +zink_kopper_prune_batch_usage(struct kopper_displaytarget *cdt, const struct zink_batch_usage *u); +void +zink_kopper_set_readback_needs_update(struct zink_resource *res); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/zink/zink_lower_cubemap_to_array.c b/src/gallium/drivers/zink/zink_lower_cubemap_to_array.c new file mode 100644 index 00000000000..55a8425f130 --- /dev/null +++ b/src/gallium/drivers/zink/zink_lower_cubemap_to_array.c @@ -0,0 +1,533 @@ +/* + * Copyright © Microsoft Corporation + * Copyright © 2022 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nir_builder.h" +#include "nir_builtin_builder.h" + + +static const struct glsl_type * +make_2darray_sampler_from_cubemap(const struct glsl_type *type) +{ + return glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ? + glsl_sampler_type( + GLSL_SAMPLER_DIM_2D, + false, true, + glsl_get_sampler_result_type(type)) : type; +} + +static const struct glsl_type * +make_2darray_from_cubemap_with_array(const struct glsl_type *type) +{ + if (glsl_type_is_array(type)) { + const struct glsl_type *new_type = glsl_without_array(type); + return new_type != type ? glsl_array_type(make_2darray_from_cubemap_with_array(glsl_without_array(type)), + glsl_get_length(type), 0) : type; + } + return make_2darray_sampler_from_cubemap(type); +} + +static bool +lower_cubemap_to_array_filter(const nir_instr *instr, const void *mask) +{ + const uint32_t *nonseamless_cube_mask = mask; + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + + if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) + return false; + + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txd: + case nir_texop_txl: + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_tg4: + break; + default: + return false; + } + return (BITFIELD_BIT(tex->sampler_index) & (*nonseamless_cube_mask)) != 0; + } + + return false; +} + +typedef struct { + nir_def *rx; + nir_def *ry; + nir_def *rz; + nir_def *arx; + nir_def *ary; + nir_def *arz; + nir_def *array; +} coord_t; + + +/* This is taken from from sp_tex_sample:convert_cube */ +static nir_def * +evaluate_face_x(nir_builder *b, coord_t *coord) +{ + nir_def *sign = nir_fsign(b, coord->rx); + nir_def *positive = nir_fge_imm(b, coord->rx, 0.0); + nir_def *ima = nir_fdiv(b, nir_imm_float(b, -0.5), coord->arx); + + nir_def *x = nir_fadd_imm(b, nir_fmul(b, nir_fmul(b, sign, ima), coord->rz), 0.5); + nir_def *y = nir_fadd_imm(b, nir_fmul(b, ima, coord->ry), 0.5); + nir_def *face = nir_bcsel(b, positive, nir_imm_float(b, 0.0), nir_imm_float(b, 1.0)); + + if (coord->array) + face = nir_fadd(b, face, coord->array); + + return nir_vec3(b, x,y, face); +} + +static nir_def * +evaluate_face_y(nir_builder *b, coord_t *coord) +{ + nir_def *sign = nir_fsign(b, coord->ry); + nir_def *positive = nir_fge_imm(b, coord->ry, 0.0); + nir_def *ima = nir_fdiv(b, nir_imm_float(b, 0.5), coord->ary); + + nir_def *x = nir_fadd_imm(b, nir_fmul(b, ima, coord->rx), 0.5); + nir_def *y = nir_fadd_imm(b, nir_fmul(b, nir_fmul(b, sign, ima), coord->rz), 0.5); + nir_def *face = nir_bcsel(b, positive, nir_imm_float(b, 2.0), nir_imm_float(b, 3.0)); + + if (coord->array) + face = nir_fadd(b, face, coord->array); + + return nir_vec3(b, x,y, face); +} + +static nir_def * +evaluate_face_z(nir_builder *b, coord_t *coord) +{ + nir_def *sign = nir_fsign(b, coord->rz); + nir_def *positive = nir_fge_imm(b, coord->rz, 0.0); + nir_def *ima = nir_fdiv(b, nir_imm_float(b, -0.5), coord->arz); + + nir_def *x = nir_fadd_imm(b, nir_fmul(b, nir_fmul(b, sign, ima), nir_fneg(b, coord->rx)), 0.5); + nir_def *y = nir_fadd_imm(b, nir_fmul(b, ima, coord->ry), 0.5); + nir_def *face = nir_bcsel(b, positive, nir_imm_float(b, 4.0), nir_imm_float(b, 5.0)); + + if (coord->array) + face = nir_fadd(b, face, coord->array); + + return nir_vec3(b, x,y, face); +} + +static nir_def * +create_array_tex_from_cube_tex(nir_builder *b, nir_tex_instr *tex, nir_def *coord, nir_texop op) +{ + nir_tex_instr *array_tex; + + unsigned num_srcs = tex->num_srcs; + if (op == nir_texop_txf && nir_tex_instr_src_index(tex, nir_tex_src_comparator) != -1) + num_srcs--; + array_tex = nir_tex_instr_create(b->shader, num_srcs); + array_tex->op = op; + array_tex->sampler_dim = GLSL_SAMPLER_DIM_2D; + array_tex->is_array = true; + array_tex->is_shadow = tex->is_shadow; + array_tex->is_sparse = tex->is_sparse; + array_tex->is_new_style_shadow = tex->is_new_style_shadow; + array_tex->texture_index = tex->texture_index; + array_tex->sampler_index = tex->sampler_index; + array_tex->dest_type = tex->dest_type; + array_tex->coord_components = 3; + + nir_src coord_src = nir_src_for_ssa(coord); + unsigned s = 0; + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (op == nir_texop_txf && tex->src[i].src_type == nir_tex_src_comparator) + continue; + nir_src *psrc = (tex->src[i].src_type == nir_tex_src_coord) ? + &coord_src : &tex->src[i].src; + + array_tex->src[s].src_type = tex->src[i].src_type; + if (psrc->ssa->num_components != nir_tex_instr_src_size(array_tex, s)) { + nir_def *c = nir_trim_vector(b, psrc->ssa, + nir_tex_instr_src_size(array_tex, s)); + array_tex->src[s].src = nir_src_for_ssa(c); + } else + array_tex->src[s].src = nir_src_for_ssa(psrc->ssa); + s++; + } + + nir_def_init(&array_tex->instr, &array_tex->def, + nir_tex_instr_dest_size(array_tex), + tex->def.bit_size); + nir_builder_instr_insert(b, &array_tex->instr); + return &array_tex->def; +} + +static nir_def * +handle_cube_edge(nir_builder *b, nir_def *x, nir_def *y, nir_def *face, nir_def *array_slice_cube_base, nir_def *tex_size) +{ + enum cube_remap + { + cube_remap_zero = 0, + cube_remap_x, + cube_remap_y, + cube_remap_tex_size, + cube_remap_tex_size_minus_x, + cube_remap_tex_size_minus_y, + + cube_remap_size, + }; + + struct cube_remap_table + { + enum cube_remap remap_x; + enum cube_remap remap_y; + uint32_t remap_face; + }; + + static const struct cube_remap_table cube_remap_neg_x[6] = + { + {cube_remap_tex_size, cube_remap_y, 4}, + {cube_remap_tex_size, cube_remap_y, 5}, + {cube_remap_y, cube_remap_zero, 1}, + {cube_remap_tex_size_minus_y, cube_remap_tex_size, 1}, + {cube_remap_tex_size, cube_remap_y, 1}, + {cube_remap_tex_size, cube_remap_y, 0}, + }; + + static const struct cube_remap_table cube_remap_pos_x[6] = + { + {cube_remap_zero, cube_remap_y, 5}, + {cube_remap_zero, cube_remap_y, 4}, + {cube_remap_tex_size_minus_y, cube_remap_zero, 0}, + {cube_remap_y, cube_remap_tex_size, 0}, + {cube_remap_zero, cube_remap_y, 0}, + {cube_remap_zero, cube_remap_y, 1}, + }; + + static const struct cube_remap_table cube_remap_neg_y[6] = + { + {cube_remap_tex_size, cube_remap_tex_size_minus_x, 2}, + {cube_remap_zero, cube_remap_x, 2}, + {cube_remap_tex_size_minus_x, cube_remap_zero, 5}, + {cube_remap_x, cube_remap_tex_size, 4}, + {cube_remap_x, cube_remap_tex_size, 2}, + {cube_remap_tex_size_minus_x, cube_remap_zero, 2}, + }; + + static const struct cube_remap_table cube_remap_pos_y[6] = + { + {cube_remap_tex_size, cube_remap_x, 3}, + {cube_remap_zero, cube_remap_tex_size_minus_x, 3}, + {cube_remap_x, cube_remap_zero, 4}, + {cube_remap_tex_size_minus_x, cube_remap_tex_size, 5}, + {cube_remap_x, cube_remap_zero, 3}, + {cube_remap_tex_size_minus_x, cube_remap_tex_size, 3}, + }; + + static const struct cube_remap_table* remap_tables[4] = { + cube_remap_neg_x, + cube_remap_pos_x, + cube_remap_neg_y, + cube_remap_pos_y + }; + + nir_def *zero = nir_imm_int(b, 0); + + /* Doesn't matter since the texture is square */ + tex_size = nir_channel(b, tex_size, 0); + + nir_def *x_on = nir_iand(b, nir_ige(b, x, zero), nir_ige(b, tex_size, x)); + nir_def *y_on = nir_iand(b, nir_ige(b, y, zero), nir_ige(b, tex_size, y)); + nir_def *one_on = nir_ixor(b, x_on, y_on); + + /* If the sample did not fall off the face in either dimension, then set output = input */ + nir_def *x_result = x; + nir_def *y_result = y; + nir_def *face_result = face; + + /* otherwise, if the sample fell off the face in either the X or the Y direction, remap to the new face */ + nir_def *remap_predicates[4] = + { + nir_iand(b, one_on, nir_ilt(b, x, zero)), + nir_iand(b, one_on, nir_ilt(b, tex_size, x)), + nir_iand(b, one_on, nir_ilt(b, y, zero)), + nir_iand(b, one_on, nir_ilt(b, tex_size, y)), + }; + + nir_def *remap_array[cube_remap_size]; + + remap_array[cube_remap_zero] = zero; + remap_array[cube_remap_x] = x; + remap_array[cube_remap_y] = y; + remap_array[cube_remap_tex_size] = tex_size; + remap_array[cube_remap_tex_size_minus_x] = nir_isub(b, tex_size, x); + remap_array[cube_remap_tex_size_minus_y] = nir_isub(b, tex_size, y); + + /* For each possible way the sample could have fallen off */ + for (unsigned i = 0; i < 4; i++) { + const struct cube_remap_table* remap_table = remap_tables[i]; + + /* For each possible original face */ + for (unsigned j = 0; j < 6; j++) { + nir_def *predicate = nir_iand(b, remap_predicates[i], nir_ieq_imm(b, face, j)); + + x_result = nir_bcsel(b, predicate, remap_array[remap_table[j].remap_x], x_result); + y_result = nir_bcsel(b, predicate, remap_array[remap_table[j].remap_y], y_result); + face_result = nir_bcsel(b, predicate, remap_array[remap_table[j].remap_face], face_result); + } + } + + return nir_vec3(b, x_result, y_result, nir_iadd(b, face_result, array_slice_cube_base)); +} + +static nir_def * +handle_cube_gather(nir_builder *b, nir_tex_instr *tex, nir_def *coord) +{ + tex->is_array = true; + nir_def *tex_size = nir_get_texture_size(b, tex); + + /* nir_get_texture_size puts the cursor before the tex op */ + b->cursor = nir_after_instr(coord->parent_instr); + + nir_def *const_05 = nir_imm_float(b, 0.5f); + nir_def *texel_coords = nir_fmul(b, nir_trim_vector(b, coord, 2), + nir_i2f32(b, nir_trim_vector(b, tex_size, 2))); + + nir_def *x_orig = nir_channel(b, texel_coords, 0); + nir_def *y_orig = nir_channel(b, texel_coords, 1); + + nir_def *x_pos = nir_f2i32(b, nir_fadd(b, x_orig, const_05)); + nir_def *x_neg = nir_f2i32(b, nir_fsub(b, x_orig, const_05)); + nir_def *y_pos = nir_f2i32(b, nir_fadd(b, y_orig, const_05)); + nir_def *y_neg = nir_f2i32(b, nir_fsub(b, y_orig, const_05)); + nir_def *coords[4][2] = { + { x_neg, y_pos }, + { x_pos, y_pos }, + { x_pos, y_neg }, + { x_neg, y_neg }, + }; + + nir_def *array_slice_2d = nir_f2i32(b, nir_channel(b, coord, 2)); + nir_def *face = nir_imod_imm(b, array_slice_2d, 6); + nir_def *array_slice_cube_base = nir_isub(b, array_slice_2d, face); + + nir_def *channels[4]; + for (unsigned i = 0; i < 4; ++i) { + nir_def *final_coord = handle_cube_edge(b, coords[i][0], coords[i][1], face, array_slice_cube_base, tex_size); + nir_def *sampled_val = create_array_tex_from_cube_tex(b, tex, final_coord, nir_texop_txf); + channels[i] = nir_channel(b, sampled_val, tex->component); + } + + return nir_vec(b, channels, 4); +} + +static nir_def * +lower_cube_coords(nir_builder *b, nir_def *coord, bool is_array) +{ + coord_t coords; + coords.rx = nir_channel(b, coord, 0); + coords.ry = nir_channel(b, coord, 1); + coords.rz = nir_channel(b, coord, 2); + coords.arx = nir_fabs(b, coords.rx); + coords.ary = nir_fabs(b, coords.ry); + coords.arz = nir_fabs(b, coords.rz); + coords.array = NULL; + if (is_array) + coords.array = nir_fmul_imm(b, nir_channel(b, coord, 3), 6.0f); + + nir_def *use_face_x = nir_iand(b, + nir_fge(b, coords.arx, coords.ary), + nir_fge(b, coords.arx, coords.arz)); + + nir_if *use_face_x_if = nir_push_if(b, use_face_x); + nir_def *face_x_coord = evaluate_face_x(b, &coords); + nir_if *use_face_x_else = nir_push_else(b, use_face_x_if); + + nir_def *use_face_y = nir_iand(b, + nir_fge(b, coords.ary, coords.arx), + nir_fge(b, coords.ary, coords.arz)); + + nir_if *use_face_y_if = nir_push_if(b, use_face_y); + nir_def *face_y_coord = evaluate_face_y(b, &coords); + nir_if *use_face_y_else = nir_push_else(b, use_face_y_if); + + nir_def *face_z_coord = evaluate_face_z(b, &coords); + + nir_pop_if(b, use_face_y_else); + nir_def *face_y_or_z_coord = nir_if_phi(b, face_y_coord, face_z_coord); + nir_pop_if(b, use_face_x_else); + + // This contains in xy the normalized sample coordinates, and in z the face index + nir_def *coord_and_face = nir_if_phi(b, face_x_coord, face_y_or_z_coord); + + return coord_and_face; +} + +static void +rewrite_cube_var_type(nir_builder *b, nir_tex_instr *tex) +{ + unsigned index = tex->texture_index; + nir_variable *sampler = NULL; + int highest = -1; + nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) { + if (!glsl_type_is_sampler(glsl_without_array(var->type))) + continue; + unsigned size = glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; + if (var->data.driver_location == index || + (var->data.driver_location < index && var->data.driver_location + size > index)) { + sampler = var; + break; + } + /* handle array sampler access: use the next-closest sampler */ + if (var->data.driver_location > highest && var->data.driver_location < index) { + highest = var->data.driver_location; + sampler = var; + } + } + assert(sampler); + sampler->type = make_2darray_from_cubemap_with_array(sampler->type); +} + +/* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */ +/* tex(s, coord) = txl(s, coord, lod(s, coord).x) */ +static nir_tex_instr * +lower_tex_to_txl(nir_builder *b, nir_tex_instr *tex) +{ + b->cursor = nir_after_instr(&tex->instr); + int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); + unsigned num_srcs = bias_idx >= 0 ? tex->num_srcs : tex->num_srcs + 1; + nir_tex_instr *txl = nir_tex_instr_create(b->shader, num_srcs); + + txl->op = nir_texop_txl; + txl->sampler_dim = tex->sampler_dim; + txl->dest_type = tex->dest_type; + txl->coord_components = tex->coord_components; + txl->texture_index = tex->texture_index; + txl->sampler_index = tex->sampler_index; + txl->is_array = tex->is_array; + txl->is_shadow = tex->is_shadow; + txl->is_sparse = tex->is_sparse; + txl->is_new_style_shadow = tex->is_new_style_shadow; + + unsigned s = 0; + for (int i = 0; i < tex->num_srcs; i++) { + if (i == bias_idx) + continue; + txl->src[s].src = nir_src_for_ssa(tex->src[i].src.ssa); + txl->src[s].src_type = tex->src[i].src_type; + s++; + } + nir_def *lod = nir_get_texture_lod(b, tex); + + if (bias_idx >= 0) + lod = nir_fadd(b, lod, tex->src[bias_idx].src.ssa); + lod = nir_fadd_imm(b, lod, -1.0); + txl->src[s] = nir_tex_src_for_ssa(nir_tex_src_lod, lod); + + b->cursor = nir_before_instr(&tex->instr); + nir_def_init(&txl->instr, &txl->def, + tex->def.num_components, + tex->def.bit_size); + nir_builder_instr_insert(b, &txl->instr); + nir_def_rewrite_uses(&tex->def, &txl->def); + return txl; +} + +static nir_def * +lower_cube_sample(nir_builder *b, nir_tex_instr *tex) +{ + if (!tex->is_shadow && (tex->op == nir_texop_txb || tex->op == nir_texop_tex)) { + tex = lower_tex_to_txl(b, tex); + } + + int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); + assert(coord_index >= 0); + + /* Evaluate the face and the xy coordinates for a 2D tex op */ + nir_def *coord = tex->src[coord_index].src.ssa; + nir_def *coord_and_face = lower_cube_coords(b, coord, tex->is_array); + + rewrite_cube_var_type(b, tex); + + if (tex->op == nir_texop_tg4 && !tex->is_shadow) + return handle_cube_gather(b, tex, coord_and_face); + else + return create_array_tex_from_cube_tex(b, tex, coord_and_face, tex->op); +} + +static nir_def * +lower_cube_txs(nir_builder *b, nir_tex_instr *tex) +{ + b->cursor = nir_after_instr(&tex->instr); + + rewrite_cube_var_type(b, tex); + unsigned num_components = tex->def.num_components; + /* force max components to unbreak textureSize().xy */ + tex->def.num_components = 3; + tex->is_array = true; + nir_def *array_dim = nir_channel(b, &tex->def, 2); + nir_def *cube_array_dim = nir_idiv(b, array_dim, nir_imm_int(b, 6)); + nir_def *size = nir_vec3(b, nir_channel(b, &tex->def, 0), + nir_channel(b, &tex->def, 1), + cube_array_dim); + return nir_trim_vector(b, size, num_components); +} + +static nir_def * +lower_cubemap_to_array_tex(nir_builder *b, nir_tex_instr *tex) +{ + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txd: + case nir_texop_txl: + case nir_texop_lod: + case nir_texop_tg4: + return lower_cube_sample(b, tex); + case nir_texop_txs: + return lower_cube_txs(b, tex); + default: + unreachable("Unsupported cupe map texture operation"); + } +} + +static nir_def * +lower_cubemap_to_array_impl(nir_builder *b, nir_instr *instr, + UNUSED void *_options) +{ + if (instr->type == nir_instr_type_tex) + return lower_cubemap_to_array_tex(b, nir_instr_as_tex(instr)); + return NULL; +} + +bool +zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask); +bool +zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask) +{ + return nir_shader_lower_instructions(s, + lower_cubemap_to_array_filter, + lower_cubemap_to_array_impl, + &nonseamless_cube_mask); +} diff --git a/src/gallium/drivers/zink/zink_pipeline.c b/src/gallium/drivers/zink/zink_pipeline.c index b16b64701dc..80d2f5479ce 100644 --- a/src/gallium/drivers/zink/zink_pipeline.c +++ b/src/gallium/drivers/zink/zink_pipeline.c @@ -26,6 +26,7 @@ #include "zink_pipeline.h" #include "zink_compiler.h" +#include "nir_to_spirv/nir_to_spirv.h" #include "zink_context.h" #include "zink_program.h" #include "zink_render_pass.h" @@ -35,35 +36,37 @@ #include "util/u_debug.h" #include "util/u_prim.h" -static VkBlendFactor -clamp_void_blend_factor(VkBlendFactor f) -{ - if (f == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA) - return VK_BLEND_FACTOR_ZERO; - if (f == VK_BLEND_FACTOR_DST_ALPHA) - return VK_BLEND_FACTOR_ONE; - return f; -} - VkPipeline zink_create_gfx_pipeline(struct zink_screen *screen, struct zink_gfx_program *prog, + struct zink_shader_object *objs, struct zink_gfx_pipeline_state *state, - VkPrimitiveTopology primitive_topology) + const uint8_t *binding_map, + VkPrimitiveTopology primitive_topology, + bool optimize, + struct util_dynarray *dgc) { - struct zink_rasterizer_hw_state *hw_rast_state = (void*)state; + struct zink_rasterizer_hw_state *hw_rast_state = (void*)&state->dyn_state3; VkPipelineVertexInputStateCreateInfo vertex_input_state; - if (!screen->info.have_EXT_vertex_input_dynamic_state) { + bool needs_vi = !screen->info.have_EXT_vertex_input_dynamic_state; + if (needs_vi) { memset(&vertex_input_state, 0, sizeof(vertex_input_state)); vertex_input_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; vertex_input_state.pVertexBindingDescriptions = state->element_state->b.bindings; vertex_input_state.vertexBindingDescriptionCount = state->element_state->num_bindings; vertex_input_state.pVertexAttributeDescriptions = state->element_state->attribs; vertex_input_state.vertexAttributeDescriptionCount = state->element_state->num_attribs; + if (!screen->info.have_EXT_extended_dynamic_state || !state->uses_dynamic_stride) { + for (int i = 0; i < state->element_state->num_bindings; ++i) { + const unsigned buffer_id = binding_map[i]; + VkVertexInputBindingDescription *binding = &state->element_state->b.bindings[i]; + binding->stride = state->vertex_strides[buffer_id]; + } + } } VkPipelineVertexInputDivisorStateCreateInfoEXT vdiv_state; - if (!screen->info.have_EXT_vertex_input_dynamic_state && state->element_state->b.divisors_present) { + if (needs_vi && state->element_state->b.divisors_present) { memset(&vdiv_state, 0, sizeof(vdiv_state)); vertex_input_state.pNext = &vdiv_state; vdiv_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT; @@ -78,72 +81,87 @@ zink_create_gfx_pipeline(struct zink_screen *screen, switch (primitive_topology) { case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: + if (screen->info.have_EXT_primitive_topology_list_restart) { + primitive_state.primitiveRestartEnable = state->dyn_state2.primitive_restart ? VK_TRUE : VK_FALSE; + break; + } + FALLTHROUGH; case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: - if (state->primitive_restart) - debug_printf("restart_index set with unsupported primitive topology %u\n", primitive_topology); + if (state->dyn_state2.primitive_restart) + mesa_loge("zink: restart_index set with unsupported primitive topology %s\n", vk_PrimitiveTopology_to_str(primitive_topology)); primitive_state.primitiveRestartEnable = VK_FALSE; break; default: - primitive_state.primitiveRestartEnable = state->primitive_restart ? VK_TRUE : VK_FALSE; + primitive_state.primitiveRestartEnable = state->dyn_state2.primitive_restart ? VK_TRUE : VK_FALSE; } } - VkPipelineColorBlendAttachmentState blend_att[PIPE_MAX_COLOR_BUFS]; VkPipelineColorBlendStateCreateInfo blend_state = {0}; blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; if (state->blend_state) { - unsigned num_attachments = state->render_pass->state.num_rts; - if (state->render_pass->state.have_zsbuf) + unsigned num_attachments = state->render_pass ? + state->render_pass->state.num_rts : + state->rendering_info.colorAttachmentCount; + if (state->render_pass && state->render_pass->state.have_zsbuf) num_attachments--; - if (state->void_alpha_attachments) { - for (unsigned i = 0; i < num_attachments; i++) { - blend_att[i] = state->blend_state->attachments[i]; - if (state->void_alpha_attachments & BITFIELD_BIT(i)) { - blend_att[i].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; - blend_att[i].srcColorBlendFactor = clamp_void_blend_factor(blend_att[i].srcColorBlendFactor); - blend_att[i].dstColorBlendFactor = clamp_void_blend_factor(blend_att[i].dstColorBlendFactor); - } - } - blend_state.pAttachments = blend_att; - } else - blend_state.pAttachments = state->blend_state->attachments; + blend_state.pAttachments = state->blend_state->attachments; blend_state.attachmentCount = num_attachments; blend_state.logicOpEnable = state->blend_state->logicop_enable; blend_state.logicOp = state->blend_state->logicop_func; } + if (state->rast_attachment_order) + blend_state.flags |= VK_PIPELINE_COLOR_BLEND_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_BIT_EXT; VkPipelineMultisampleStateCreateInfo ms_state = {0}; ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; ms_state.rasterizationSamples = state->rast_samples + 1; if (state->blend_state) { ms_state.alphaToCoverageEnable = state->blend_state->alpha_to_coverage; - if (state->blend_state->alpha_to_one && !screen->info.feats.features.alphaToOne) - warn_missing_feature("alphaToOne"); + if (state->blend_state->alpha_to_one && !screen->info.feats.features.alphaToOne) { + static bool warned = false; + warn_missing_feature(warned, "alphaToOne"); + } ms_state.alphaToOneEnable = state->blend_state->alpha_to_one; } - ms_state.pSampleMask = state->sample_mask ? &state->sample_mask : NULL; - if (hw_rast_state->force_persample_interp) { + /* "If pSampleMask is NULL, it is treated as if the mask has all bits set to 1." + * - Chapter 27. Rasterization + * + * thus it never makes sense to leave this as NULL since gallium will provide correct + * data here as long as sample_mask is initialized on context creation + */ + ms_state.pSampleMask = &state->sample_mask; + if (state->force_persample_interp) { ms_state.sampleShadingEnable = VK_TRUE; ms_state.minSampleShading = 1.0; + } else if (state->min_samples > 0) { + ms_state.sampleShadingEnable = VK_TRUE; + ms_state.minSampleShading = (float)(state->rast_samples + 1) / (state->min_samples + 1); } VkPipelineViewportStateCreateInfo viewport_state = {0}; + VkPipelineViewportDepthClipControlCreateInfoEXT clip = { + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT, + NULL, + VK_TRUE + }; viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; viewport_state.viewportCount = screen->info.have_EXT_extended_dynamic_state ? 0 : state->dyn_state1.num_viewports; viewport_state.pViewports = NULL; viewport_state.scissorCount = screen->info.have_EXT_extended_dynamic_state ? 0 : state->dyn_state1.num_viewports; viewport_state.pScissors = NULL; + if (screen->info.have_EXT_depth_clip_control && !hw_rast_state->clip_halfz) + viewport_state.pNext = &clip; VkPipelineRasterizationStateCreateInfo rast_state = {0}; rast_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; rast_state.depthClampEnable = hw_rast_state->depth_clamp; - rast_state.rasterizerDiscardEnable = hw_rast_state->rasterizer_discard; + rast_state.rasterizerDiscardEnable = state->dyn_state2.rasterizer_discard; rast_state.polygonMode = hw_rast_state->polygon_mode; - rast_state.cullMode = hw_rast_state->cull_mode; + rast_state.cullMode = state->dyn_state1.cull_mode; rast_state.frontFace = state->dyn_state1.front_face; rast_state.depthBiasEnable = VK_TRUE; @@ -152,6 +170,17 @@ zink_create_gfx_pipeline(struct zink_screen *screen, rast_state.depthBiasSlopeFactor = 0.0; rast_state.lineWidth = 1.0f; + VkPipelineRasterizationDepthClipStateCreateInfoEXT depth_clip_state = {0}; + depth_clip_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT; + depth_clip_state.depthClipEnable = hw_rast_state->depth_clip; + if (screen->info.have_EXT_depth_clip_enable) { + depth_clip_state.pNext = rast_state.pNext; + rast_state.pNext = &depth_clip_state; + } else { + static bool warned = false; + warn_missing_feature(warned, "VK_EXT_depth_clip_enable"); + } + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT pv_state; pv_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT; pv_state.provokingVertexMode = hw_rast_state->pv_last ? @@ -174,7 +203,7 @@ zink_create_gfx_pipeline(struct zink_screen *screen, depth_stencil_state.back = state->dyn_state1.depth_stencil_alpha_state->stencil_back; depth_stencil_state.depthWriteEnable = state->dyn_state1.depth_stencil_alpha_state->depth_write; - VkDynamicState dynamicStateEnables[30] = { + VkDynamicState dynamicStateEnables[80] = { VK_DYNAMIC_STATE_LINE_WIDTH, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, @@ -182,19 +211,20 @@ zink_create_gfx_pipeline(struct zink_screen *screen, }; unsigned state_count = 4; if (screen->info.have_EXT_extended_dynamic_state) { - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT; - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT; dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS; - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT; - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT; - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT; - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_COMPARE_OP; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE; dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK; dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK; - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_OP_EXT; - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT; - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_FRONT_FACE_EXT; - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_OP; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_FRONT_FACE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_CULL_MODE; if (state->sample_locations_enabled) dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT; } else { @@ -203,35 +233,134 @@ zink_create_gfx_pipeline(struct zink_screen *screen, } if (screen->info.have_EXT_vertex_input_dynamic_state) dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_EXT; - else if (screen->info.have_EXT_extended_dynamic_state) - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT; - if (screen->info.have_EXT_extended_dynamic_state2) - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT; + else if (screen->info.have_EXT_extended_dynamic_state && state->uses_dynamic_stride && state->element_state->num_attribs) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE; + if (screen->info.have_EXT_extended_dynamic_state2) { + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE; + if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT; + } + if (screen->info.have_EXT_extended_dynamic_state3) { + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_POLYGON_MODE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT; + if (!screen->driver_workarounds.no_linestipple) { + if (screen->info.dynamic_state3_feats.extendedDynamicState3LineStippleEnable) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_EXT; + } + if (screen->have_full_ds3) { + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SAMPLE_MASK_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_RASTERIZATION_SAMPLES_EXT; + if (state->blend_state) { + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LOGIC_OP_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT; + if (screen->info.feats.features.alphaToOne) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT; + if (state->rendering_info.colorAttachmentCount) { + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT; + } + } + } + } + if (screen->info.have_EXT_color_write_enable) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT; + + assert(state->rast_prim != MESA_PRIM_COUNT || zink_debug & ZINK_DEBUG_SHADERDB); VkPipelineRasterizationLineStateCreateInfoEXT rast_line_state; - if (screen->info.have_EXT_line_rasterization) { + if (screen->info.have_EXT_line_rasterization && + !state->shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.lower_line_smooth) { rast_line_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT; rast_line_state.pNext = rast_state.pNext; rast_line_state.stippledLineEnable = VK_FALSE; - rast_line_state.lineRasterizationMode = hw_rast_state->line_mode; + rast_line_state.lineRasterizationMode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT; + + if (state->rast_prim == MESA_PRIM_LINES) { + const char *features[4][2] = { + [VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT] = {"",""}, + [VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT] = {"rectangularLines", "stippledRectangularLines"}, + [VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT] = {"bresenhamLines", "stippledBresenhamLines"}, + [VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT] = {"smoothLines", "stippledSmoothLines"}, + }; + static bool warned[6] = {0}; + const VkPhysicalDeviceLineRasterizationFeaturesEXT *line_feats = &screen->info.line_rast_feats; + /* line features can be represented as an array VkBool32[6], + * with the 3 base features preceding the 3 (matching) stippled features + */ + const VkBool32 *feat = &line_feats->rectangularLines; + unsigned mode_idx = hw_rast_state->line_mode - VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; + /* add base mode index, add 3 if stippling is enabled */ + mode_idx += hw_rast_state->line_stipple_enable * 3; + if (*(feat + mode_idx)) + rast_line_state.lineRasterizationMode = hw_rast_state->line_mode; + else if (hw_rast_state->line_stipple_enable && + screen->driver_workarounds.no_linestipple) { + /* drop line stipple, we can emulate it */ + mode_idx -= hw_rast_state->line_stipple_enable * 3; + if (*(feat + mode_idx)) + rast_line_state.lineRasterizationMode = hw_rast_state->line_mode; + /* non-strictLine default lines are either parallelogram or bresenham which while not in GL spec, + * in practice end up being within the two-pixel exception in the GL spec. + */ + else if ((mode_idx != 1) || screen->info.props.limits.strictLines) + warn_missing_feature(warned[mode_idx], features[hw_rast_state->line_mode][0]); + } else if ((mode_idx != 1) || screen->info.props.limits.strictLines) + warn_missing_feature(warned[mode_idx], features[hw_rast_state->line_mode][hw_rast_state->line_stipple_enable]); + } if (hw_rast_state->line_stipple_enable) { - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_EXT; + if (!screen->info.have_EXT_extended_dynamic_state3) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_EXT; rast_line_state.stippledLineEnable = VK_TRUE; } + rast_state.pNext = &rast_line_state; } + assert(state_count < ARRAY_SIZE(dynamicStateEnables)); VkPipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo = {0}; pipelineDynamicStateCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; pipelineDynamicStateCreateInfo.pDynamicStates = dynamicStateEnables; - pipelineDynamicStateCreateInfo.dynamicStateCount = state_count; VkGraphicsPipelineCreateInfo pci = {0}; pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + if (zink_debug & ZINK_DEBUG_SHADERDB) + pci.flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; + if (!optimize) + pci.flags |= VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; + if (screen->info.have_EXT_attachment_feedback_loop_dynamic_state) { + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT; + } else { + static bool feedback_warn = false; + if (state->feedback_loop) { + if (screen->info.have_EXT_attachment_feedback_loop_layout) + pci.flags |= VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + else + warn_missing_feature(feedback_warn, "EXT_attachment_feedback_loop_layout"); + } + if (state->feedback_loop_zs) { + if (screen->info.have_EXT_attachment_feedback_loop_layout) + pci.flags |= VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + else + warn_missing_feature(feedback_warn, "EXT_attachment_feedback_loop_layout"); + } + } + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; pci.layout = prog->base.layout; - pci.renderPass = state->render_pass->render_pass; - if (!screen->info.have_EXT_vertex_input_dynamic_state) + if (state->render_pass) + pci.renderPass = state->render_pass->render_pass; + else + pci.pNext = &state->rendering_info; + if (needs_vi) pci.pVertexInputState = &vertex_input_state; pci.pInputAssemblyState = &primitive_state; pci.pRasterizationState = &rast_state; @@ -240,29 +369,39 @@ zink_create_gfx_pipeline(struct zink_screen *screen, pci.pViewportState = &viewport_state; pci.pDepthStencilState = &depth_stencil_state; pci.pDynamicState = &pipelineDynamicStateCreateInfo; + pipelineDynamicStateCreateInfo.dynamicStateCount = state_count; VkPipelineTessellationStateCreateInfo tci = {0}; VkPipelineTessellationDomainOriginStateCreateInfo tdci = {0}; - if (prog->shaders[PIPE_SHADER_TESS_CTRL] && prog->shaders[PIPE_SHADER_TESS_EVAL]) { + unsigned tess_bits = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL); + if ((prog->stages_present & tess_bits) == tess_bits) { tci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; - tci.patchControlPoints = state->vertices_per_patch + 1; + tci.patchControlPoints = state->dyn_state2.vertices_per_patch; pci.pTessellationState = &tci; tci.pNext = &tdci; tdci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO; tdci.domainOrigin = VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT; } - VkPipelineShaderStageCreateInfo shader_stages[ZINK_SHADER_COUNT]; + VkPipelineShaderStageCreateInfo shader_stages[ZINK_GFX_SHADER_COUNT]; + VkShaderModuleCreateInfo smci[ZINK_GFX_SHADER_COUNT] = {0}; uint32_t num_stages = 0; - for (int i = 0; i < ZINK_SHADER_COUNT; ++i) { - if (!prog->modules[i]) + for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + if (!(prog->stages_present & BITFIELD_BIT(i))) continue; VkPipelineShaderStageCreateInfo stage = {0}; stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage.stage = zink_shader_stage(i); - stage.module = prog->modules[i]->shader; + stage.stage = mesa_to_vk_shader_stage(i); stage.pName = "main"; + if (objs[i].mod) { + stage.module = objs[i].mod; + } else { + smci[i].sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + stage.pNext = &smci[i]; + smci[i].codeSize = objs[i].spirv->num_words * sizeof(uint32_t); + smci[i].pCode = objs[i].spirv->words; + } shader_stages[num_stages++] = stage; } assert(num_stages > 0); @@ -270,13 +409,39 @@ zink_create_gfx_pipeline(struct zink_screen *screen, pci.pStages = shader_stages; pci.stageCount = num_stages; - VkPipeline pipeline; - if (vkCreateGraphicsPipelines(screen->dev, prog->base.pipeline_cache, 1, &pci, - NULL, &pipeline) != VK_SUCCESS) { - debug_printf("vkCreateGraphicsPipelines failed\n"); - return VK_NULL_HANDLE; + VkGraphicsShaderGroupCreateInfoNV gci = { + VK_STRUCTURE_TYPE_GRAPHICS_SHADER_GROUP_CREATE_INFO_NV, + NULL, + pci.stageCount, + pci.pStages, + pci.pVertexInputState, + pci.pTessellationState + }; + VkGraphicsPipelineShaderGroupsCreateInfoNV dgci = { + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_SHADER_GROUPS_CREATE_INFO_NV, + pci.pNext, + 1, + &gci, + dgc ? util_dynarray_num_elements(dgc, VkPipeline) : 0, + dgc ? dgc->data : NULL + }; + if (zink_debug & ZINK_DEBUG_DGC) { + pci.flags |= VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV; + pci.pNext = &dgci; } + VkPipeline pipeline; + u_rwlock_wrlock(&prog->base.pipeline_cache_lock); + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline), + u_rwlock_wrunlock(&prog->base.pipeline_cache_lock); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result)); + return VK_NULL_HANDLE; + } + ); + return pipeline; } @@ -286,38 +451,533 @@ zink_create_compute_pipeline(struct zink_screen *screen, struct zink_compute_pro VkComputePipelineCreateInfo pci = {0}; pci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; pci.layout = comp->base.layout; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; VkPipelineShaderStageCreateInfo stage = {0}; stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage.module = comp->module->shader; + stage.module = comp->curr->obj.mod; stage.pName = "main"; VkSpecializationInfo sinfo = {0}; - VkSpecializationMapEntry me[3]; - if (state->use_local_size) { - stage.pSpecializationInfo = &sinfo; - sinfo.mapEntryCount = 3; - sinfo.pMapEntries = &me[0]; - sinfo.dataSize = sizeof(state->local_size); - sinfo.pData = &state->local_size[0]; - uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z}; - for (int i = 0; i < 3; i++) { + VkSpecializationMapEntry me[4]; + uint32_t data[4]; + if (state) { + int i = 0; + + if (comp->use_local_size) { + sinfo.mapEntryCount += 3; + sinfo.dataSize += sizeof(state->local_size); + + uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z}; + for (int l = 0; l < 3; l++, i++) { + data[i] = state->local_size[l]; + me[i].size = sizeof(uint32_t); + me[i].constantID = ids[l]; + me[i].offset = i * sizeof(uint32_t); + } + } + + if (comp->has_variable_shared_mem) { + sinfo.mapEntryCount += 1; + sinfo.dataSize += sizeof(uint32_t); + data[i] = state->variable_shared_mem; me[i].size = sizeof(uint32_t); - me[i].constantID = ids[i]; + me[i].constantID = ZINK_VARIABLE_SHARED_MEM; me[i].offset = i * sizeof(uint32_t); + i++; } + + if (sinfo.dataSize) { + stage.pSpecializationInfo = &sinfo; + sinfo.pData = data; + sinfo.pMapEntries = me; + } + + assert(i <= ARRAY_SIZE(data)); + STATIC_ASSERT(ARRAY_SIZE(data) == ARRAY_SIZE(me)); } pci.stage = stage; VkPipeline pipeline; - if (vkCreateComputePipelines(screen->dev, comp->base.pipeline_cache, 1, &pci, - NULL, &pipeline) != VK_SUCCESS) { - debug_printf("vkCreateComputePipelines failed\n"); - return VK_NULL_HANDLE; + VkResult result; + u_rwlock_wrlock(&comp->base.pipeline_cache_lock); + VRAM_ALLOC_LOOP(result, + VKSCR(CreateComputePipelines)(screen->dev, comp->base.pipeline_cache, 1, &pci, NULL, &pipeline), + u_rwlock_wrunlock(&comp->base.pipeline_cache_lock); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateComputePipelines failed (%s)", vk_Result_to_str(result)); + return VK_NULL_HANDLE; + } + ); + + return pipeline; +} + +VkPipeline +zink_create_gfx_pipeline_output(struct zink_screen *screen, struct zink_gfx_pipeline_state *state) +{ + VkGraphicsPipelineLibraryCreateInfoEXT gplci = { + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT, + &state->rendering_info, + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT, + }; + + VkPipelineColorBlendStateCreateInfo blend_state = {0}; + blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + if (state->rast_attachment_order) + blend_state.flags |= VK_PIPELINE_COLOR_BLEND_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_BIT_EXT; + + VkPipelineMultisampleStateCreateInfo ms_state = {0}; + ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + if (state->force_persample_interp) { + ms_state.sampleShadingEnable = VK_TRUE; + ms_state.minSampleShading = 1.0; + } else if (state->min_samples > 0) { + ms_state.sampleShadingEnable = VK_TRUE; + ms_state.minSampleShading = (float)(state->rast_samples + 1) / (state->min_samples + 1); + } + + VkDynamicState dynamicStateEnables[30] = { + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + }; + unsigned state_count = 1; + if (screen->info.have_EXT_extended_dynamic_state) { + if (state->sample_locations_enabled) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT; + } + if (screen->info.have_EXT_color_write_enable) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT; + + if (screen->have_full_ds3) { + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SAMPLE_MASK_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_RASTERIZATION_SAMPLES_EXT; + if (state->blend_state) { + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LOGIC_OP_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT; + if (screen->info.feats.features.alphaToOne) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT; + if (state->rendering_info.colorAttachmentCount) { + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT; + } + } + } else { + if (state->blend_state) { + blend_state.pAttachments = state->blend_state->attachments; + blend_state.attachmentCount = state->rendering_info.colorAttachmentCount; + blend_state.logicOpEnable = state->blend_state->logicop_enable; + blend_state.logicOp = state->blend_state->logicop_func; + + ms_state.alphaToCoverageEnable = state->blend_state->alpha_to_coverage; + if (state->blend_state->alpha_to_one && !screen->info.feats.features.alphaToOne) { + static bool warned = false; + warn_missing_feature(warned, "alphaToOne"); + } + ms_state.alphaToOneEnable = state->blend_state->alpha_to_one; + } + ms_state.rasterizationSamples = state->rast_samples + 1; + /* "If pSampleMask is NULL, it is treated as if the mask has all bits set to 1." + * - Chapter 27. Rasterization + * + * thus it never makes sense to leave this as NULL since gallium will provide correct + * data here as long as sample_mask is initialized on context creation + */ + ms_state.pSampleMask = &state->sample_mask; + } + assert(state_count < ARRAY_SIZE(dynamicStateEnables)); + + VkPipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo = {0}; + pipelineDynamicStateCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + pipelineDynamicStateCreateInfo.pDynamicStates = dynamicStateEnables; + + VkGraphicsPipelineCreateInfo pci = {0}; + pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pci.pNext = &gplci; + pci.flags = VK_PIPELINE_CREATE_LIBRARY_BIT_KHR | VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT; + if (screen->info.have_EXT_attachment_feedback_loop_dynamic_state) { + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT; + } else { + static bool feedback_warn = false; + if (state->feedback_loop) { + if (screen->info.have_EXT_attachment_feedback_loop_layout) + pci.flags |= VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + else + warn_missing_feature(feedback_warn, "EXT_attachment_feedback_loop_layout"); + } + if (state->feedback_loop_zs) { + if (screen->info.have_EXT_attachment_feedback_loop_layout) + pci.flags |= VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + else + warn_missing_feature(feedback_warn, "EXT_attachment_feedback_loop_layout"); + } + } + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + pipelineDynamicStateCreateInfo.dynamicStateCount = state_count; + if (!screen->have_full_ds3) + pci.pColorBlendState = &blend_state; + pci.pMultisampleState = &ms_state; + pci.pDynamicState = &pipelineDynamicStateCreateInfo; + + VkPipeline pipeline; + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, NULL, &pipeline), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result)); + return VK_NULL_HANDLE; + } + ); + + return pipeline; +} + +VkPipeline +zink_create_gfx_pipeline_input(struct zink_screen *screen, + struct zink_gfx_pipeline_state *state, + const uint8_t *binding_map, + VkPrimitiveTopology primitive_topology) +{ + VkGraphicsPipelineLibraryCreateInfoEXT gplci = { + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT, + NULL, + VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT + }; + + VkPipelineVertexInputStateCreateInfo vertex_input_state; + memset(&vertex_input_state, 0, sizeof(vertex_input_state)); + vertex_input_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + if (!screen->info.have_EXT_vertex_input_dynamic_state || !state->uses_dynamic_stride) { + vertex_input_state.pVertexBindingDescriptions = state->element_state->b.bindings; + vertex_input_state.vertexBindingDescriptionCount = state->element_state->num_bindings; + vertex_input_state.pVertexAttributeDescriptions = state->element_state->attribs; + vertex_input_state.vertexAttributeDescriptionCount = state->element_state->num_attribs; + if (!state->uses_dynamic_stride) { + for (int i = 0; i < state->element_state->num_bindings; ++i) { + const unsigned buffer_id = binding_map[i]; + VkVertexInputBindingDescription *binding = &state->element_state->b.bindings[i]; + binding->stride = state->vertex_strides[buffer_id]; + } + } + } + + VkPipelineVertexInputDivisorStateCreateInfoEXT vdiv_state; + if (!screen->info.have_EXT_vertex_input_dynamic_state && state->element_state->b.divisors_present) { + memset(&vdiv_state, 0, sizeof(vdiv_state)); + vertex_input_state.pNext = &vdiv_state; + vdiv_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT; + vdiv_state.vertexBindingDivisorCount = state->element_state->b.divisors_present; + vdiv_state.pVertexBindingDivisors = state->element_state->b.divisors; + } + + VkPipelineInputAssemblyStateCreateInfo primitive_state = {0}; + primitive_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + primitive_state.topology = primitive_topology; + assert(screen->info.have_EXT_extended_dynamic_state2); + + VkDynamicState dynamicStateEnables[30]; + unsigned state_count = 0; + if (screen->info.have_EXT_vertex_input_dynamic_state) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_EXT; + else if (state->uses_dynamic_stride && state->element_state->num_attribs) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE; + assert(state_count < ARRAY_SIZE(dynamicStateEnables)); + + VkPipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo = {0}; + pipelineDynamicStateCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + pipelineDynamicStateCreateInfo.pDynamicStates = dynamicStateEnables; + pipelineDynamicStateCreateInfo.dynamicStateCount = state_count; + + VkGraphicsPipelineCreateInfo pci = {0}; + pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pci.pNext = &gplci; + pci.flags = VK_PIPELINE_CREATE_LIBRARY_BIT_KHR | VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + pci.pVertexInputState = &vertex_input_state; + pci.pInputAssemblyState = &primitive_state; + pci.pDynamicState = &pipelineDynamicStateCreateInfo; + + VkPipeline pipeline; + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, NULL, &pipeline), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result)); + return VK_NULL_HANDLE; + } + ); + + return pipeline; +} + +static VkPipeline +create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_object *objs, unsigned stage_mask, VkPipelineLayout layout, VkPipelineCache pipeline_cache) +{ + assert(screen->info.have_EXT_extended_dynamic_state && screen->info.have_EXT_extended_dynamic_state2); + VkPipelineRenderingCreateInfo rendering_info; + rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO; + rendering_info.pNext = NULL; + rendering_info.viewMask = 0; + VkGraphicsPipelineLibraryCreateInfoEXT gplci = { + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT, + &rendering_info, + 0 + }; + if (stage_mask & BITFIELD_BIT(MESA_SHADER_VERTEX)) + gplci.flags |= VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT; + if (stage_mask & BITFIELD_BIT(MESA_SHADER_FRAGMENT)) + gplci.flags |= VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT; + + VkPipelineViewportStateCreateInfo viewport_state = {0}; + viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state.viewportCount = 0; + viewport_state.pViewports = NULL; + viewport_state.scissorCount = 0; + viewport_state.pScissors = NULL; + + VkPipelineRasterizationStateCreateInfo rast_state = {0}; + rast_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rast_state.depthBiasEnable = VK_TRUE; + + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {0}; + depth_stencil_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + + VkDynamicState dynamicStateEnables[64] = { + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + unsigned state_count = 3; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_COMPARE_OP; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_OP; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_FRONT_FACE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_CULL_MODE; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE; + if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT; + + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_POLYGON_MODE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE_EXT; + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT; + if (screen->info.dynamic_state3_feats.extendedDynamicState3LineStippleEnable) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT; + if (!screen->driver_workarounds.no_linestipple) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_EXT; + assert(state_count < ARRAY_SIZE(dynamicStateEnables)); + + VkPipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo = {0}; + pipelineDynamicStateCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + pipelineDynamicStateCreateInfo.pDynamicStates = dynamicStateEnables; + pipelineDynamicStateCreateInfo.dynamicStateCount = state_count; + + VkGraphicsPipelineCreateInfo pci = {0}; + pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pci.pNext = &gplci; + pci.flags = VK_PIPELINE_CREATE_LIBRARY_BIT_KHR; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + pci.layout = layout; + pci.pRasterizationState = &rast_state; + pci.pViewportState = &viewport_state; + pci.pDepthStencilState = &depth_stencil_state; + pci.pDynamicState = &pipelineDynamicStateCreateInfo; + + VkPipelineTessellationStateCreateInfo tci = {0}; + VkPipelineTessellationDomainOriginStateCreateInfo tdci = {0}; + unsigned tess_bits = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL); + if ((stage_mask & tess_bits) == tess_bits) { + tci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; + //this is a wild guess; pray for extendedDynamicState2PatchControlPoints + if (!screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) { + static bool warned = false; + warn_missing_feature(warned, "extendedDynamicState2PatchControlPoints"); + } + tci.patchControlPoints = 32; + pci.pTessellationState = &tci; + tci.pNext = &tdci; + tdci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO; + tdci.domainOrigin = VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT; + } + + VkPipelineShaderStageCreateInfo shader_stages[ZINK_GFX_SHADER_COUNT]; + uint32_t num_stages = 0; + for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + if (!(stage_mask & BITFIELD_BIT(i))) + continue; + + VkPipelineShaderStageCreateInfo stage = {0}; + stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage.stage = mesa_to_vk_shader_stage(i); + stage.module = objs[i].mod; + stage.pName = "main"; + shader_stages[num_stages++] = stage; } - zink_screen_update_pipeline_cache(screen, &comp->base); + assert(num_stages > 0); + + pci.pStages = shader_stages; + pci.stageCount = num_stages; + /* Only keep LTO information for full pipeline libs. For separable shaders, they will only + * ever be used with fast linking, and to optimize them a new pipeline lib will be created with full + * link time information for the full set of shader stages (rather than linking in these single-stage libs). + */ + if (num_stages > 1) + pci.flags |= VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT; + + VkPipeline pipeline; + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateGraphicsPipelines)(screen->dev, pipeline_cache, 1, &pci, NULL, &pipeline), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); + return VK_NULL_HANDLE; + } + ); return pipeline; } + +VkPipeline +zink_create_gfx_pipeline_library(struct zink_screen *screen, struct zink_gfx_program *prog) +{ + u_rwlock_wrlock(&prog->base.pipeline_cache_lock); + VkPipeline pipeline = create_gfx_pipeline_library(screen, prog->objs, prog->stages_present, prog->base.layout, prog->base.pipeline_cache); + u_rwlock_wrunlock(&prog->base.pipeline_cache_lock); + return pipeline; +} + +VkPipeline +zink_create_gfx_pipeline_separate(struct zink_screen *screen, struct zink_shader_object *objs, VkPipelineLayout layout, gl_shader_stage stage) +{ + return create_gfx_pipeline_library(screen, objs, BITFIELD_BIT(stage), layout, VK_NULL_HANDLE); +} + +VkPipeline +zink_create_gfx_pipeline_combined(struct zink_screen *screen, struct zink_gfx_program *prog, VkPipeline input, VkPipeline *library, unsigned libcount, VkPipeline output, bool optimized, bool testonly) +{ + VkPipeline libraries[4]; + VkPipelineLibraryCreateInfoKHR libstate = {0}; + libstate.sType = VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR; + if (input) + libraries[libstate.libraryCount++] = input; + for (unsigned i = 0; i < libcount; i++) + libraries[libstate.libraryCount++] = library[i]; + if (output) + libraries[libstate.libraryCount++] = output; + libstate.pLibraries = libraries; + + VkGraphicsPipelineCreateInfo pci = {0}; + pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pci.layout = prog->base.layout; + if (optimized) + pci.flags = VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT; + else + pci.flags = VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; + if (testonly) + pci.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + pci.pNext = &libstate; + + if (!input && !output) + pci.flags |= VK_PIPELINE_CREATE_LIBRARY_BIT_KHR; + + VkPipeline pipeline; + u_rwlock_wrlock(&prog->base.pipeline_cache_lock); + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline), + u_rwlock_wrunlock(&prog->base.pipeline_cache_lock); + if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED) { + mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); + return VK_NULL_HANDLE; + } + ); + + return pipeline; +} + + +/* vertex input pipeline library states with dynamic vertex input: only the topology matters */ +struct zink_gfx_input_key * +zink_find_or_create_input_dynamic(struct zink_context *ctx, VkPrimitiveTopology vkmode) +{ + uint32_t hash = hash_gfx_input_dynamic(&ctx->gfx_pipeline_state.input); + struct set_entry *he = _mesa_set_search_pre_hashed(&ctx->gfx_inputs, hash, &ctx->gfx_pipeline_state.input); + if (!he) { + struct zink_gfx_input_key *ikey = rzalloc(ctx, struct zink_gfx_input_key); + ikey->idx = ctx->gfx_pipeline_state.idx; + ikey->pipeline = zink_create_gfx_pipeline_input(zink_screen(ctx->base.screen), &ctx->gfx_pipeline_state, NULL, vkmode); + he = _mesa_set_add_pre_hashed(&ctx->gfx_inputs, hash, ikey); + } + return (struct zink_gfx_input_key *)he->key; +} + +/* vertex input pipeline library states without dynamic vertex input: everything is hashed */ +struct zink_gfx_input_key * +zink_find_or_create_input(struct zink_context *ctx, VkPrimitiveTopology vkmode) +{ + uint32_t hash = hash_gfx_input(&ctx->gfx_pipeline_state.input); + struct set_entry *he = _mesa_set_search_pre_hashed(&ctx->gfx_inputs, hash, &ctx->gfx_pipeline_state.input); + if (!he) { + struct zink_gfx_input_key *ikey = rzalloc(ctx, struct zink_gfx_input_key); + if (ctx->gfx_pipeline_state.uses_dynamic_stride) { + memcpy(ikey, &ctx->gfx_pipeline_state.input, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask)); + ikey->element_state = ctx->gfx_pipeline_state.element_state; + } else { + memcpy(ikey, &ctx->gfx_pipeline_state.input, offsetof(struct zink_gfx_input_key, pipeline)); + } + ikey->pipeline = zink_create_gfx_pipeline_input(zink_screen(ctx->base.screen), &ctx->gfx_pipeline_state, ikey->element_state->binding_map, vkmode); + he = _mesa_set_add_pre_hashed(&ctx->gfx_inputs, hash, ikey); + } + return (struct zink_gfx_input_key*)he->key; +} + +/* fragment output pipeline library states with dynamic state3 */ +struct zink_gfx_output_key * +zink_find_or_create_output_ds3(struct zink_context *ctx) +{ + uint32_t hash = hash_gfx_output_ds3(&ctx->gfx_pipeline_state); + struct set_entry *he = _mesa_set_search_pre_hashed(&ctx->gfx_outputs, hash, &ctx->gfx_pipeline_state); + if (!he) { + struct zink_gfx_output_key *okey = rzalloc(ctx, struct zink_gfx_output_key); + memcpy(okey, &ctx->gfx_pipeline_state, sizeof(uint32_t)); + okey->pipeline = zink_create_gfx_pipeline_output(zink_screen(ctx->base.screen), &ctx->gfx_pipeline_state); + he = _mesa_set_add_pre_hashed(&ctx->gfx_outputs, hash, okey); + } + return (struct zink_gfx_output_key*)he->key; +} + +/* fragment output pipeline library states without dynamic state3 */ +struct zink_gfx_output_key * +zink_find_or_create_output(struct zink_context *ctx) +{ + uint32_t hash = hash_gfx_output(&ctx->gfx_pipeline_state); + struct set_entry *he = _mesa_set_search_pre_hashed(&ctx->gfx_outputs, hash, &ctx->gfx_pipeline_state); + if (!he) { + struct zink_gfx_output_key *okey = rzalloc(ctx, struct zink_gfx_output_key); + memcpy(okey, &ctx->gfx_pipeline_state, offsetof(struct zink_gfx_output_key, pipeline)); + okey->pipeline = zink_create_gfx_pipeline_output(zink_screen(ctx->base.screen), &ctx->gfx_pipeline_state); + he = _mesa_set_add_pre_hashed(&ctx->gfx_outputs, hash, okey); + } + return (struct zink_gfx_output_key*)he->key; +} diff --git a/src/gallium/drivers/zink/zink_pipeline.h b/src/gallium/drivers/zink/zink_pipeline.h index 4acc6c44285..355670e9d46 100644 --- a/src/gallium/drivers/zink/zink_pipeline.h +++ b/src/gallium/drivers/zink/zink_pipeline.h @@ -24,87 +24,49 @@ #ifndef ZINK_PIPELINE_H #define ZINK_PIPELINE_H -#include <vulkan/vulkan.h> -#include "pipe/p_state.h" -#include "zink_shader_keys.h" -#include "zink_state.h" +#include "zink_types.h" -struct zink_blend_state; -struct zink_depth_stencil_alpha_state; -struct zink_gfx_program; -struct zink_compute_program; -struct zink_rasterizer_state; -struct zink_render_pass; -struct zink_screen; -struct zink_vertex_elements_state; - -struct zink_gfx_pipeline_state { - uint32_t rast_state : ZINK_RAST_HW_STATE_SIZE; //zink_rasterizer_hw_state - uint32_t vertices_per_patch:5; - uint32_t rast_samples:7; - uint32_t void_alpha_attachments:PIPE_MAX_COLOR_BUFS; - VkSampleMask sample_mask; - - unsigned rp_state; - uint32_t blend_id; - - /* Pre-hashed value for table lookup, invalid when zero. - * Members after this point are not included in pipeline state hash key */ - uint32_t hash; - bool dirty; - - struct { - struct zink_depth_stencil_alpha_hw_state *depth_stencil_alpha_state; //non-dynamic state - VkFrontFace front_face; - unsigned num_viewports; - } dyn_state1; - - bool primitive_restart; //dynamic state2 - - VkShaderModule modules[PIPE_SHADER_TYPES - 1]; - bool modules_changed; - - struct zink_vertex_elements_hw_state *element_state; - uint32_t vertex_hash; - - uint32_t final_hash; - - uint32_t vertex_buffers_enabled_mask; - uint32_t vertex_strides[PIPE_MAX_ATTRIBS]; - bool sample_locations_enabled; - bool have_EXT_extended_dynamic_state; - bool have_EXT_extended_dynamic_state2; - uint8_t has_points; //either gs outputs points or prim type is points - struct { - struct zink_shader_key key[5]; - struct zink_shader_key last_vertex; - } shader_keys; - struct zink_blend_state *blend_state; - struct zink_render_pass *render_pass; - VkPipeline pipeline; - uint8_t patch_vertices; - unsigned idx : 8; - enum pipe_prim_type gfx_prim_mode; //pending mode -}; - -struct zink_compute_pipeline_state { - /* Pre-hashed value for table lookup, invalid when zero. - * Members after this point are not included in pipeline state hash key */ - uint32_t hash; - bool dirty; - bool use_local_size; - uint32_t local_size[3]; +#ifdef __cplusplus +extern "C" { +#endif - VkPipeline pipeline; -}; +struct zink_gfx_output_key * +zink_find_or_create_output(struct zink_context *ctx); +struct zink_gfx_output_key * +zink_find_or_create_output_ds3(struct zink_context *ctx); +struct zink_gfx_input_key * +zink_find_or_create_input(struct zink_context *ctx, VkPrimitiveTopology vkmode); +struct zink_gfx_input_key * +zink_find_or_create_input_dynamic(struct zink_context *ctx, VkPrimitiveTopology vkmode); VkPipeline zink_create_gfx_pipeline(struct zink_screen *screen, struct zink_gfx_program *prog, + struct zink_shader_object *objs, struct zink_gfx_pipeline_state *state, - VkPrimitiveTopology primitive_topology); + const uint8_t *binding_map, + VkPrimitiveTopology primitive_topology, + bool optimize, + struct util_dynarray *dgc); VkPipeline zink_create_compute_pipeline(struct zink_screen *screen, struct zink_compute_program *comp, struct zink_compute_pipeline_state *state); + +VkPipeline +zink_create_gfx_pipeline_input(struct zink_screen *screen, + struct zink_gfx_pipeline_state *state, + const uint8_t *binding_map, + VkPrimitiveTopology primitive_topology); +VkPipeline +zink_create_gfx_pipeline_library(struct zink_screen *screen, struct zink_gfx_program *prog); +VkPipeline +zink_create_gfx_pipeline_output(struct zink_screen *screen, struct zink_gfx_pipeline_state *state); +VkPipeline +zink_create_gfx_pipeline_combined(struct zink_screen *screen, struct zink_gfx_program *prog, VkPipeline input, VkPipeline *library, unsigned libcount, VkPipeline output, bool optimized, bool testonly); +VkPipeline +zink_create_gfx_pipeline_separate(struct zink_screen *screen, struct zink_shader_object *objs, VkPipelineLayout layout, gl_shader_stage stage); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index 0aedff77d4e..4034713cbde 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -27,32 +27,28 @@ #include "zink_context.h" #include "zink_descriptors.h" #include "zink_helpers.h" +#include "zink_pipeline.h" #include "zink_render_pass.h" #include "zink_resource.h" #include "zink_screen.h" #include "zink_state.h" #include "zink_inlines.h" -#include "util/hash_table.h" -#include "util/set.h" +#include "util/memstream.h" #include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_prim.h" -#include "tgsi/tgsi_from_mesa.h" +#include "nir_serialize.h" +#include "nir/nir_draw_helpers.h" /* for pipeline cache */ #define XXH_INLINE_ALL #include "util/xxhash.h" -struct gfx_pipeline_cache_entry { - struct zink_gfx_pipeline_state state; - VkPipeline pipeline; -}; - -struct compute_pipeline_cache_entry { - struct zink_compute_pipeline_state state; - VkPipeline pipeline; -}; +static void +precompile_job(void *data, void *gdata, int thread_index); +struct zink_gfx_program * +create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch); void debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr) @@ -66,298 +62,1099 @@ debug_describe_zink_compute_program(char *buf, const struct zink_compute_program sprintf(buf, "zink_compute_program"); } -static bool -shader_key_matches(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms) +ALWAYS_INLINE static bool +shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms) { - if (zm->key_size != key->size || zm->num_uniforms != num_uniforms) + if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask || + zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle) return false; - return !memcmp(zm->key, key, zm->key_size) && - (!num_uniforms || !memcmp(zm->key + zm->key_size, key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))); + const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0; + return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) && + (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size, + key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))); +} + +ALWAYS_INLINE static bool +shader_key_matches(const struct zink_shader_module *zm, + const struct zink_shader_key *key, unsigned num_uniforms, + bool has_inline, bool has_nonseamless) +{ + const uint32_t nonseamless_size = !has_nonseamless && zm->has_nonseamless ? sizeof(uint32_t) : 0; + if (has_inline) { + if (zm->num_uniforms != num_uniforms || + (num_uniforms && + memcmp(zm->key + zm->key_size + nonseamless_size, + key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)))) + return false; + } + if (!has_nonseamless) { + if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask || + (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size))) + return false; + } + if (zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle) + return false; + return !memcmp(zm->key, key, zm->key_size); } static uint32_t shader_module_hash(const struct zink_shader_module *zm) { - unsigned key_size = zm->key_size + zm->num_uniforms * sizeof(uint32_t); + const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0; + unsigned key_size = zm->key_size + nonseamless_size + zm->num_uniforms * sizeof(uint32_t); return _mesa_hash_data(zm->key, key_size); } -static struct zink_shader_module * -get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen, - struct zink_shader *zs, struct zink_gfx_program *prog, - struct zink_gfx_pipeline_state *state) +ALWAYS_INLINE static void +gather_shader_module_info(struct zink_context *ctx, struct zink_screen *screen, + struct zink_shader *zs, struct zink_gfx_program *prog, + struct zink_gfx_pipeline_state *state, + bool has_inline, //is inlining enabled? + bool has_nonseamless, //is nonseamless ext present? + unsigned *inline_size, unsigned *nonseamless_size) { - gl_shader_stage stage = zs->nir->info.stage; - enum pipe_shader_type pstage = pipe_shader_type_from_mesa(stage); - VkShaderModule mod; - struct zink_shader_module *zm = NULL; - unsigned base_size = 0; - struct zink_shader_key *key = &state->shader_keys.key[pstage]; - - if (ctx && zs->nir->info.num_inlinable_uniforms && - ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(pstage)) { - if (prog->inlined_variant_count[pstage] < ZINK_MAX_INLINED_VARIANTS) - base_size = zs->nir->info.num_inlinable_uniforms; + gl_shader_stage stage = zs->info.stage; + struct zink_shader_key *key = &state->shader_keys.key[stage]; + if (has_inline && ctx && zs->info.num_inlinable_uniforms && + ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(stage)) { + if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[stage] < ZINK_MAX_INLINED_VARIANTS)) + *inline_size = zs->info.num_inlinable_uniforms; else key->inline_uniforms = false; } + if (!has_nonseamless && key->base.nonseamless_cube_mask) + *nonseamless_size = sizeof(uint32_t); +} - struct zink_shader_module *iter, *next; - LIST_FOR_EACH_ENTRY_SAFE(iter, next, &prog->shader_cache[pstage][!!base_size], list) { - if (!shader_key_matches(iter, key, base_size)) - continue; - list_delinit(&iter->list); - zm = iter; - break; - } - +ALWAYS_INLINE static struct zink_shader_module * +create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen, + struct zink_shader *zs, struct zink_gfx_program *prog, + gl_shader_stage stage, + struct zink_gfx_pipeline_state *state, + unsigned inline_size, unsigned nonseamless_size, + bool has_inline, //is inlining enabled? + bool has_nonseamless) //is nonseamless ext present? +{ + struct zink_shader_module *zm; + const struct zink_shader_key *key = &state->shader_keys.key[stage]; + /* non-generated tcs won't use the shader key */ + const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; + const bool shadow_needs_shader_swizzle = key->base.needs_zs_shader_swizzle || + (stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle); + zm = malloc(sizeof(struct zink_shader_module) + key->size + + (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) + + (shadow_needs_shader_swizzle ? sizeof(struct zink_zs_swizzle_key) : 0)); if (!zm) { - zm = malloc(sizeof(struct zink_shader_module) + key->size + base_size * sizeof(uint32_t)); - if (!zm) { - return NULL; - } - mod = zink_shader_compile(screen, zs, prog->nir[stage], key); - if (!mod) { - FREE(zm); - return NULL; - } - zm->shader = mod; - list_inithead(&zm->list); - zm->num_uniforms = base_size; + return NULL; + } + unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices; + if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) { + assert(ctx); //TODO async + zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base); + } else { + zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage], &prog->base); + } + if (!zm->obj.mod) { + FREE(zm); + return NULL; + } + zm->shobj = prog->base.uses_shobj; + zm->num_uniforms = inline_size; + if (!is_nongenerated_tcs) { zm->key_size = key->size; memcpy(zm->key, key, key->size); - if (base_size) - memcpy(zm->key + key->size, &key->base, base_size * sizeof(uint32_t)); + } else { + zm->key_size = 0; + memset(zm->key, 0, key->size); + } + if (!has_nonseamless && nonseamless_size) { + /* nonseamless mask gets added to base key if it exists */ + memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size); + } + zm->needs_zs_shader_swizzle = shadow_needs_shader_swizzle; + zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size; + if (inline_size) + memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t)); + if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) + zm->hash = patch_vertices; + else zm->hash = shader_module_hash(zm); - zm->default_variant = !base_size && list_is_empty(&prog->shader_cache[pstage][0]); - if (base_size) - prog->inlined_variant_count[pstage]++; + if (unlikely(shadow_needs_shader_swizzle)) { + memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)); + zm->hash ^= _mesa_hash_data(&ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)); + } + zm->default_variant = !shadow_needs_shader_swizzle && !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*); + if (inline_size) + prog->inlined_variant_count[stage]++; + util_dynarray_append(&prog->shader_cache[stage][has_nonseamless ? 0 : !!nonseamless_size][!!inline_size], void*, zm); + return zm; +} + +ALWAYS_INLINE static struct zink_shader_module * +get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen, + struct zink_shader *zs, struct zink_gfx_program *prog, + gl_shader_stage stage, + struct zink_gfx_pipeline_state *state, + unsigned inline_size, unsigned nonseamless_size, + bool has_inline, //is inlining enabled? + bool has_nonseamless) //is nonseamless ext present? +{ + const struct zink_shader_key *key = &state->shader_keys.key[stage]; + /* non-generated tcs won't use the shader key */ + const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; + const bool shadow_needs_shader_swizzle = unlikely(key->base.needs_zs_shader_swizzle) || + (stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle)); + + struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0]; + unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *); + struct zink_shader_module **pzm = shader_cache->data; + for (unsigned i = 0; i < count; i++) { + struct zink_shader_module *iter = pzm[i]; + if (is_nongenerated_tcs) { + if (!shader_key_matches_tcs_nongenerated(iter, key, has_inline ? !!inline_size : 0)) + continue; + } else { + if (stage == MESA_SHADER_VERTEX && iter->key_size != key->size) + continue; + if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless)) + continue; + if (unlikely(shadow_needs_shader_swizzle)) { + /* shadow swizzle data needs a manual compare since it's so fat */ + if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t), + &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key))) + continue; + } + } + if (i > 0) { + struct zink_shader_module *zero = pzm[0]; + pzm[0] = iter; + pzm[i] = zero; + } + return iter; + } + + return NULL; +} + +ALWAYS_INLINE static struct zink_shader_module * +create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen, + struct zink_shader *zs, struct zink_gfx_program *prog, + gl_shader_stage stage, + struct zink_gfx_pipeline_state *state) +{ + struct zink_shader_module *zm; + uint16_t *key; + unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8); + bool shadow_needs_shader_swizzle = false; + if (zs == prog->last_vertex_stage) { + key = (uint16_t*)&state->shader_keys_optimal.key.vs_base; + } else if (stage == MESA_SHADER_FRAGMENT) { + key = (uint16_t*)&state->shader_keys_optimal.key.fs; + shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false; + } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) { + key = (uint16_t*)&state->shader_keys_optimal.key.tcs; + } else { + key = NULL; + } + size_t key_size = sizeof(uint16_t); + zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_zs_swizzle_key) : 0)); + if (!zm) { + return NULL; + } + if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) { + assert(ctx || screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints); + unsigned patch_vertices = 3; + if (ctx) { + struct zink_tcs_key *tcs = (struct zink_tcs_key*)key; + patch_vertices = tcs->patch_vertices; + } + zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base); + } else { + zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), + (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base); } - list_add(&zm->list, &prog->shader_cache[pstage][!!base_size]); + if (!zm->obj.mod) { + FREE(zm); + return NULL; + } + zm->shobj = prog->base.uses_shobj; + /* non-generated tcs won't use the shader key */ + const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; + if (key && !is_nongenerated_tcs) { + zm->key_size = key_size; + uint16_t *data = (uint16_t*)zm->key; + /* sanitize actual key bits */ + *data = (*key) & mask; + if (unlikely(shadow_needs_shader_swizzle)) + memcpy(&data[1], &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)); + } + zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*); + util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm); return zm; } +ALWAYS_INLINE static struct zink_shader_module * +get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen, + struct zink_shader *zs, struct zink_gfx_program *prog, + gl_shader_stage stage, + struct zink_gfx_pipeline_state *state) +{ + /* non-generated tcs won't use the shader key */ + const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; + bool shadow_needs_shader_swizzle = false; + uint16_t *key; + unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8); + if (zs == prog->last_vertex_stage) { + key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base; + } else if (stage == MESA_SHADER_FRAGMENT) { + key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs; + shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle; + } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) { + key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs; + } else { + key = NULL; + } + struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0]; + unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *); + struct zink_shader_module **pzm = shader_cache->data; + for (unsigned i = 0; i < count; i++) { + struct zink_shader_module *iter = pzm[i]; + if (is_nongenerated_tcs) { + /* always match */ + } else if (key) { + uint16_t val = (*key) & mask; + /* no key is bigger than uint16_t */ + if (memcmp(iter->key, &val, sizeof(uint16_t))) + continue; + if (unlikely(shadow_needs_shader_swizzle)) { + /* shadow swizzle data needs a manual compare since it's so fat */ + if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key))) + continue; + } + } + if (i > 0) { + struct zink_shader_module *zero = pzm[0]; + pzm[0] = iter; + pzm[i] = zero; + } + return iter; + } + + return NULL; +} + static void zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm) { - VKSCR(DestroyShaderModule)(screen->dev, zm->shader, NULL); + if (zm->shobj) + VKSCR(DestroyShaderEXT)(screen->dev, zm->obj.obj, NULL); + else + VKSCR(DestroyShaderModule)(screen->dev, zm->obj.mod, NULL); + ralloc_free(zm->obj.spirv); free(zm); } static void -destroy_shader_cache(struct zink_screen *screen, struct list_head *sc) +destroy_shader_cache(struct zink_screen *screen, struct util_dynarray *sc) { - struct zink_shader_module *zm, *next; - LIST_FOR_EACH_ENTRY_SAFE(zm, next, sc, list) { - list_delinit(&zm->list); + while (util_dynarray_contains(sc, void*)) { + struct zink_shader_module *zm = util_dynarray_pop(sc, struct zink_shader_module*); zink_destroy_shader_module(screen, zm); } } -static void -update_shader_modules(struct zink_context *ctx, +ALWAYS_INLINE static void +update_gfx_shader_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, uint32_t mask, - struct zink_gfx_pipeline_state *state) + struct zink_gfx_pipeline_state *state, + bool has_inline, //is inlining enabled? + bool has_nonseamless) //is nonseamless ext present? { bool hash_changed = false; bool default_variants = true; - bool first = !prog->modules[PIPE_SHADER_VERTEX]; + assert(prog->objs[MESA_SHADER_VERTEX].mod); uint32_t variant_hash = prog->last_variant_hash; - u_foreach_bit(pstage, mask) { - assert(prog->shaders[pstage]); - struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[pstage], prog, state); - if (prog->modules[pstage] == zm) + prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags; + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + if (!(mask & BITFIELD_BIT(i))) continue; - if (prog->modules[pstage]) - variant_hash ^= prog->modules[pstage]->hash; + + assert(prog->shaders[i]); + + unsigned inline_size = 0, nonseamless_size = 0; + gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, has_inline, has_nonseamless, &inline_size, &nonseamless_size); + struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state, + inline_size, nonseamless_size, has_inline, has_nonseamless); + if (!zm) + zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state, + inline_size, nonseamless_size, has_inline, has_nonseamless); + state->modules[i] = zm->obj.mod; + if (prog->objs[i].mod == zm->obj.mod) + continue; + prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated; + variant_hash ^= prog->module_hash[i]; hash_changed = true; default_variants &= zm->default_variant; - prog->modules[pstage] = zm; - variant_hash ^= prog->modules[pstage]->hash; - state->modules[pstage] = zm->shader; + prog->objs[i] = zm->obj; + prog->objects[i] = zm->obj.obj; + prog->module_hash[i] = zm->hash; + if (has_inline) { + if (zm->num_uniforms) + prog->inline_variants |= BITFIELD_BIT(i); + else + prog->inline_variants &= ~BITFIELD_BIT(i); + } + variant_hash ^= prog->module_hash[i]; } if (hash_changed && state) { - if (!first && likely(state->pipeline)) //avoid on first hash - state->final_hash ^= prog->last_variant_hash; - - if (default_variants && !first) + if (default_variants) prog->last_variant_hash = prog->default_variant_hash; - else { + else prog->last_variant_hash = variant_hash; - if (first) { - p_atomic_dec(&prog->base.reference.count); - prog->default_variant_hash = prog->last_variant_hash; - } - } - state->final_hash ^= prog->last_variant_hash; state->modules_changed = true; } } +static void +generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state) +{ + assert(!prog->objs[MESA_SHADER_VERTEX].mod); + uint32_t variant_hash = 0; + bool default_variants = true; + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + if (!(prog->stages_present & BITFIELD_BIT(i))) + continue; + + assert(prog->shaders[i]); + + unsigned inline_size = 0, nonseamless_size = 0; + gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, + screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map, + &inline_size, &nonseamless_size); + struct zink_shader_module *zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state, + inline_size, nonseamless_size, + screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map); + state->modules[i] = zm->obj.mod; + prog->objs[i] = zm->obj; + prog->objects[i] = zm->obj.obj; + prog->module_hash[i] = zm->hash; + if (zm->num_uniforms) + prog->inline_variants |= BITFIELD_BIT(i); + default_variants &= zm->default_variant; + variant_hash ^= prog->module_hash[i]; + } + + state->modules_changed = true; + + prog->last_variant_hash = variant_hash; + if (default_variants) + prog->default_variant_hash = prog->last_variant_hash; +} + +static void +generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state) +{ + assert(!prog->objs[MESA_SHADER_VERTEX].mod); + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + if (!(prog->stages_present & BITFIELD_BIT(i))) + continue; + + assert(prog->shaders[i]); + + struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state); + prog->objs[i] = zm->obj; + prog->objects[i] = zm->obj.obj; + } + + state->modules_changed = true; + prog->last_variant_hash = state->optimal_key; +} + static uint32_t -hash_gfx_pipeline_state(const void *key) +hash_pipeline_lib_generated_tcs(const void *key) { - const struct zink_gfx_pipeline_state *state = key; - uint32_t hash = _mesa_hash_data(key, offsetof(struct zink_gfx_pipeline_state, hash)); - if (!state->have_EXT_extended_dynamic_state2) - hash = XXH32(&state->primitive_restart, 1, hash); - if (state->have_EXT_extended_dynamic_state) - return hash; - return XXH32(&state->dyn_state1, sizeof(state->dyn_state1), hash); + const struct zink_gfx_library_key *gkey = key; + return gkey->optimal_key; } + static bool -equals_gfx_pipeline_state(const void *a, const void *b) +equals_pipeline_lib_generated_tcs(const void *a, const void *b) { - const struct zink_gfx_pipeline_state *sa = a; - const struct zink_gfx_pipeline_state *sb = b; - if (!sa->have_EXT_extended_dynamic_state) { - if (sa->vertex_buffers_enabled_mask != sb->vertex_buffers_enabled_mask) - return false; - /* if we don't have dynamic states, we have to hash the enabled vertex buffer bindings */ - uint32_t mask_a = sa->vertex_buffers_enabled_mask; - uint32_t mask_b = sb->vertex_buffers_enabled_mask; - while (mask_a || mask_b) { - unsigned idx_a = u_bit_scan(&mask_a); - unsigned idx_b = u_bit_scan(&mask_b); - if (sa->vertex_strides[idx_a] != sb->vertex_strides[idx_b]) - return false; - } - if (sa->dyn_state1.front_face != sb->dyn_state1.front_face) - return false; - if (!!sa->dyn_state1.depth_stencil_alpha_state != !!sb->dyn_state1.depth_stencil_alpha_state || - (sa->dyn_state1.depth_stencil_alpha_state && - memcmp(sa->dyn_state1.depth_stencil_alpha_state, sb->dyn_state1.depth_stencil_alpha_state, - sizeof(struct zink_depth_stencil_alpha_hw_state)))) - return false; + return !memcmp(a, b, sizeof(uint32_t)); +} + +static uint32_t +hash_pipeline_lib(const void *key) +{ + const struct zink_gfx_library_key *gkey = key; + /* remove generated tcs bits */ + return zink_shader_key_optimal_no_tcs(gkey->optimal_key); +} + +static bool +equals_pipeline_lib(const void *a, const void *b) +{ + const struct zink_gfx_library_key *ak = a; + const struct zink_gfx_library_key *bk = b; + /* remove generated tcs bits */ + uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key); + uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key); + return val_a == val_b; +} + +uint32_t +hash_gfx_input_dynamic(const void *key) +{ + const struct zink_gfx_input_key *ikey = key; + return ikey->idx; +} + +static bool +equals_gfx_input_dynamic(const void *a, const void *b) +{ + const struct zink_gfx_input_key *ikey_a = a; + const struct zink_gfx_input_key *ikey_b = b; + return ikey_a->idx == ikey_b->idx; +} + +uint32_t +hash_gfx_input(const void *key) +{ + const struct zink_gfx_input_key *ikey = key; + if (ikey->uses_dynamic_stride) + return ikey->input; + return _mesa_hash_data(key, offsetof(struct zink_gfx_input_key, pipeline)); +} + +static bool +equals_gfx_input(const void *a, const void *b) +{ + const struct zink_gfx_input_key *ikey_a = a; + const struct zink_gfx_input_key *ikey_b = b; + if (ikey_a->uses_dynamic_stride) + return ikey_a->element_state == ikey_b->element_state && + !memcmp(a, b, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask)); + return !memcmp(a, b, offsetof(struct zink_gfx_input_key, pipeline)); +} + +uint32_t +hash_gfx_output_ds3(const void *key) +{ + const uint8_t *data = key; + return _mesa_hash_data(data, sizeof(uint32_t)); +} + +static bool +equals_gfx_output_ds3(const void *a, const void *b) +{ + const uint8_t *da = a; + const uint8_t *db = b; + return !memcmp(da, db, sizeof(uint32_t)); +} + +uint32_t +hash_gfx_output(const void *key) +{ + const uint8_t *data = key; + return _mesa_hash_data(data, offsetof(struct zink_gfx_output_key, pipeline)); +} + +static bool +equals_gfx_output(const void *a, const void *b) +{ + const uint8_t *da = a; + const uint8_t *db = b; + return !memcmp(da, db, offsetof(struct zink_gfx_output_key, pipeline)); +} + +ALWAYS_INLINE static void +update_gfx_program_nonseamless(struct zink_context *ctx, struct zink_gfx_program *prog, bool has_nonseamless) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (screen->driconf.inline_uniforms || prog->needs_inlining) + update_gfx_shader_modules(ctx, screen, prog, + ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state, + true, has_nonseamless); + else + update_gfx_shader_modules(ctx, screen, prog, + ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state, + false, has_nonseamless); +} + +static void +update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (screen->info.have_EXT_non_seamless_cube_map) + update_gfx_program_nonseamless(ctx, prog, true); + else + update_gfx_program_nonseamless(ctx, prog, false); +} + +void +zink_gfx_program_update(struct zink_context *ctx) +{ + if (ctx->last_vertex_stage_dirty) { + gl_shader_stage pstage = ctx->last_vertex_stage->info.stage; + ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage); + memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base, + &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base, + sizeof(struct zink_vs_key_base)); + ctx->last_vertex_stage_dirty = false; } - if (!sa->have_EXT_extended_dynamic_state2) { - if (sa->primitive_restart != sb->primitive_restart) - return false; + if (ctx->gfx_dirty) { + struct zink_gfx_program *prog = NULL; + + simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); + struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)]; + const uint32_t hash = ctx->gfx_hash; + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); + /* this must be done before prog is updated */ + if (ctx->curr_program) + ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + if (entry) { + prog = (struct zink_gfx_program*)entry->data; + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + if (prog->stages_present & ~ctx->dirty_gfx_stages & BITFIELD_BIT(i)) + ctx->gfx_pipeline_state.modules[i] = prog->objs[i].mod; + } + /* ensure variants are always updated if keys have changed since last use */ + ctx->dirty_gfx_stages |= prog->stages_present; + update_gfx_program(ctx, prog); + } else { + ctx->dirty_gfx_stages |= ctx->shader_stages; + prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash); + zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false); + _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); + prog->base.removed = false; + generate_gfx_program_modules(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state); + } + simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); + if (prog && prog != ctx->curr_program) + zink_batch_reference_program(&ctx->batch, &prog->base); + ctx->curr_program = prog; + ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + ctx->gfx_dirty = false; + } else if (ctx->dirty_gfx_stages) { + /* remove old hash */ + ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + update_gfx_program(ctx, ctx->curr_program); + /* apply new hash */ + ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; } - return !memcmp(sa->modules, sb->modules, sizeof(sa->modules)) && - !memcmp(a, b, offsetof(struct zink_gfx_pipeline_state, hash)); + ctx->dirty_gfx_stages = 0; +} + +ALWAYS_INLINE static bool +update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (screen->info.have_EXT_graphics_pipeline_library) + util_queue_fence_wait(&prog->base.cache_fence); + struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state); + if (!zm) { + zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state); + perf_debug(ctx, "zink[gfx_compile]: %s shader variant required\n", _mesa_shader_stage_to_string(pstage)); + } + + bool changed = prog->objs[pstage].mod != zm->obj.mod; + prog->objs[pstage] = zm->obj; + prog->objects[pstage] = zm->obj.obj; + return changed; +} + +static void +update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog) +{ + const union zink_shader_key_optimal *key = (union zink_shader_key_optimal*)&ctx->gfx_pipeline_state.optimal_key; + const union zink_shader_key_optimal *last_prog_key = (union zink_shader_key_optimal*)&prog->last_variant_hash; + if (key->vs_bits != last_prog_key->vs_bits) { + assert(!prog->is_separable); + bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->info.stage); + ctx->gfx_pipeline_state.modules_changed |= changed; + } + const bool shadow_needs_shader_swizzle = last_prog_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT)); + if (key->fs_bits != last_prog_key->fs_bits || + /* always recheck shadow swizzles since they aren't directly part of the key */ + unlikely(shadow_needs_shader_swizzle)) { + assert(!prog->is_separable); + bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT); + ctx->gfx_pipeline_state.modules_changed |= changed; + if (unlikely(shadow_needs_shader_swizzle)) { + struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data; + ctx->gfx_pipeline_state.shadow = (struct zink_zs_swizzle_key*)pzm[0]->key + sizeof(uint16_t); + } + } + if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated && + key->tcs_bits != last_prog_key->tcs_bits) { + assert(!prog->is_separable); + bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL); + ctx->gfx_pipeline_state.modules_changed |= changed; + } + prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key; +} + +static struct zink_gfx_program * +replace_separable_prog(struct zink_context *ctx, struct hash_entry *entry, struct zink_gfx_program *prog) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_gfx_program *real = prog->full_prog ? + prog->full_prog : + /* this will be NULL with ZINK_DEBUG_NOOPT */ + zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash); + entry->data = real; + entry->key = real->shaders; + real->base.removed = false; + zink_gfx_program_reference(screen, &prog->full_prog, NULL); + prog->base.removed = true; + return real; } void -zink_update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog) +zink_gfx_program_update_optimal(struct zink_context *ctx) { - update_shader_modules(ctx, zink_screen(ctx->base.screen), prog, ctx->dirty_shader_stages & prog->stages_present, &ctx->gfx_pipeline_state); + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (ctx->gfx_dirty) { + struct zink_gfx_program *prog = NULL; + ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); + struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)]; + const uint32_t hash = ctx->gfx_hash; + simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); + + if (ctx->curr_program) + ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + if (entry) { + prog = (struct zink_gfx_program*)entry->data; + bool must_replace = prog->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (prog->is_separable && !zink_can_use_pipeline_libs(ctx)); + if (prog->is_separable) { + /* shader variants can't be handled by separable programs: sync and compile */ + if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace) + util_queue_fence_wait(&prog->base.cache_fence); + /* If the optimized linked pipeline is done compiling, swap it into place. */ + if (util_queue_fence_is_signalled(&prog->base.cache_fence) && + /* but only if needed for ZINK_DEBUG=noopt */ + (!(zink_debug & ZINK_DEBUG_NOOPT) || !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace)) { + prog = replace_separable_prog(ctx, entry, prog); + } + } + update_gfx_program_optimal(ctx, prog); + } else { + ctx->dirty_gfx_stages |= ctx->shader_stages; + prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch); + prog->base.removed = false; + _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); + if (!prog->is_separable) { + zink_screen_get_pipeline_cache(screen, &prog->base, false); + perf_debug(ctx, "zink[gfx_compile]: new program created (probably legacy GL features in use)\n"); + generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state); + } + } + simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); + if (prog && prog != ctx->curr_program) + zink_batch_reference_program(&ctx->batch, &prog->base); + ctx->curr_program = prog; + ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + } else if (ctx->dirty_gfx_stages) { + /* remove old hash */ + ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); + ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + + bool must_replace = ctx->curr_program->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (ctx->curr_program->is_separable && !zink_can_use_pipeline_libs(ctx)); + if (must_replace || (ctx->curr_program->is_separable && !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key))) { + struct zink_gfx_program *prog = ctx->curr_program; + + util_queue_fence_wait(&prog->base.cache_fence); + /* shader variants can't be handled by separable programs: sync and compile */ + perf_debug(ctx, "zink[gfx_compile]: non-default shader variant required with separate shader object program\n"); + struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)]; + const uint32_t hash = ctx->gfx_hash; + simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); + ctx->curr_program = replace_separable_prog(ctx, entry, prog); + simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); + } + update_gfx_program_optimal(ctx, ctx->curr_program); + /* apply new hash */ + ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + } + ctx->dirty_gfx_stages = 0; + ctx->gfx_dirty = false; + ctx->last_vertex_stage_dirty = false; } -VkPipelineLayout -zink_pipeline_layout_create(struct zink_screen *screen, struct zink_program *pg) +static void +optimized_compile_job(void *data, void *gdata, int thread_index) { - VkPipelineLayoutCreateInfo plci = {0}; - plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + struct zink_gfx_pipeline_cache_entry *pc_entry = data; + struct zink_screen *screen = gdata; + VkPipeline pipeline; + if (pc_entry->gpl.gkey) + pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->gpl.ikey->pipeline, &pc_entry->gpl.gkey->pipeline, 1, pc_entry->gpl.okey->pipeline, true, false); + else + pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, pc_entry->prog->objs, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true, NULL); + if (pipeline) { + pc_entry->gpl.unoptimized_pipeline = pc_entry->pipeline; + pc_entry->pipeline = pipeline; + } +} - plci.pSetLayouts = pg->dsl; - plci.setLayoutCount = pg->num_dsl; +static void +optimized_shobj_compile_job(void *data, void *gdata, int thread_index) +{ + struct zink_gfx_pipeline_cache_entry *pc_entry = data; + struct zink_screen *screen = gdata; - VkPushConstantRange pcr[2] = {0}; - if (pg->is_compute) { - if (((struct zink_compute_program*)pg)->shader->nir->info.stage == MESA_SHADER_KERNEL) { - pcr[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - pcr[0].offset = 0; - pcr[0].size = sizeof(struct zink_cs_push_constant); - plci.pushConstantRangeCount = 1; + struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT]; + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + objs[i].mod = VK_NULL_HANDLE; + objs[i].spirv = pc_entry->shobjs[i].spirv; + } + pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true, NULL); + /* no unoptimized_pipeline dance */ +} + +void +zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (screen->driver_workarounds.disable_optimized_compile) + return; + if (zink_debug & ZINK_DEBUG_NOBGC) { + if (pc_entry->prog->base.uses_shobj) + optimized_shobj_compile_job(pc_entry, screen, 0); + else + optimized_compile_job(pc_entry, screen, 0); + } else { + util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence, + pc_entry->prog->base.uses_shobj ? optimized_shobj_compile_job : optimized_compile_job, NULL, 0); + } +} + +void +zink_program_finish(struct zink_context *ctx, struct zink_program *pg) +{ + util_queue_fence_wait(&pg->cache_fence); + if (pg->is_compute) + return; + struct zink_gfx_program *prog = (struct zink_gfx_program*)pg; + for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) { + for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { + hash_table_foreach(&prog->pipelines[r][i], entry) { + struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data; + util_queue_fence_wait(&pc_entry->fence); + } + } + } +} + +static void +update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_shader *zs = comp->shader; + struct zink_shader_module *zm = NULL; + unsigned inline_size = 0, nonseamless_size = 0, zs_swizzle_size = 0; + struct zink_shader_key *key = &ctx->compute_pipeline_state.key; + ASSERTED bool check_robustness = screen->driver_workarounds.lower_robustImageAccess2 && (ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS); + assert(zink_cs_key(key)->robust_access == check_robustness); + + if (ctx && zs->info.num_inlinable_uniforms && + ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(MESA_SHADER_COMPUTE)) { + if (screen->is_cpu || comp->inlined_variant_count < ZINK_MAX_INLINED_VARIANTS) + inline_size = zs->info.num_inlinable_uniforms; + else + key->inline_uniforms = false; + } + if (key->base.nonseamless_cube_mask) + nonseamless_size = sizeof(uint32_t); + if (key->base.needs_zs_shader_swizzle) + zs_swizzle_size = sizeof(struct zink_zs_swizzle_key); + + if (inline_size || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size) { + struct util_dynarray *shader_cache = &comp->shader_cache[!!nonseamless_size]; + unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *); + struct zink_shader_module **pzm = shader_cache->data; + for (unsigned i = 0; i < count; i++) { + struct zink_shader_module *iter = pzm[i]; + if (!shader_key_matches(iter, key, inline_size, + screen->driconf.inline_uniforms, + screen->info.have_EXT_non_seamless_cube_map)) + continue; + if (unlikely(zs_swizzle_size)) { + /* zs swizzle data needs a manual compare since it's so fat */ + if (memcmp(iter->key + iter->key_size + nonseamless_size + inline_size * sizeof(uint32_t), + &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size)) + continue; + } + if (i > 0) { + struct zink_shader_module *zero = pzm[0]; + pzm[0] = iter; + pzm[i] = zero; + } + zm = iter; } } else { - pcr[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - pcr[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed); - pcr[0].size = 2 * sizeof(unsigned); - pcr[1].stageFlags = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - pcr[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level); - pcr[1].size = sizeof(float) * 6; - plci.pushConstantRangeCount = 2; + zm = comp->module; + } + + if (!zm) { + zm = malloc(sizeof(struct zink_shader_module) + nonseamless_size + inline_size * sizeof(uint32_t) + zs_swizzle_size); + if (!zm) { + return; + } + zm->shobj = false; + zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base); + if (!zm->obj.spirv) { + FREE(zm); + return; + } + zm->num_uniforms = inline_size; + zm->key_size = key->size; + memcpy(zm->key, key, key->size); + zm->has_nonseamless = !!nonseamless_size; + zm->needs_zs_shader_swizzle = !!zs_swizzle_size; + assert(nonseamless_size || inline_size || zink_cs_key(key)->robust_access || zs_swizzle_size); + if (nonseamless_size) + memcpy(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size); + if (inline_size) + memcpy(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t)); + if (zs_swizzle_size) + memcpy(zm->key + zm->key_size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size); + + zm->hash = shader_module_hash(zm); + zm->default_variant = false; + if (inline_size) + comp->inlined_variant_count++; + + /* this is otherwise the default variant, which is stored as comp->module */ + if (zm->num_uniforms || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size) + util_dynarray_append(&comp->shader_cache[!!nonseamless_size], void*, zm); + } + if (comp->curr == zm) + return; + ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash; + comp->curr = zm; + ctx->compute_pipeline_state.module_hash = zm->hash; + ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash; + ctx->compute_pipeline_state.module_changed = true; +} + +void +zink_update_compute_program(struct zink_context *ctx) +{ + util_queue_fence_wait(&ctx->curr_compute->base.cache_fence); + update_cs_shader_module(ctx, ctx->curr_compute); +} + +VkPipelineLayout +zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags) +{ + VkPipelineLayoutCreateInfo plci = {0}; + plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + plci.flags = flags; + + plci.pSetLayouts = dsl; + plci.setLayoutCount = num_dsl; + + VkPushConstantRange pcr; + if (!is_compute) { + pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; + pcr.offset = 0; + pcr.size = sizeof(struct zink_gfx_push_constant); + plci.pushConstantRangeCount = 1; + plci.pPushConstantRanges = &pcr; } - plci.pPushConstantRanges = &pcr[0]; VkPipelineLayout layout; - if (VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout) != VK_SUCCESS) { - debug_printf("vkCreatePipelineLayout failed!\n"); + VkResult result = VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout); + if (result != VK_SUCCESS) { + mesa_loge("vkCreatePipelineLayout failed (%s)", vk_Result_to_str(result)); return VK_NULL_HANDLE; } return layout; } -static void -assign_io(struct zink_gfx_program *prog, struct zink_shader *stages[ZINK_SHADER_COUNT]) +static void * +create_program(struct zink_context *ctx, bool is_compute) { - struct zink_shader *shaders[PIPE_SHADER_TYPES]; - - /* build array in pipeline order */ - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) - shaders[tgsi_processor_to_shader_stage(i)] = stages[i]; + struct zink_program *pg = rzalloc_size(NULL, is_compute ? sizeof(struct zink_compute_program) : sizeof(struct zink_gfx_program)); + if (!pg) + return NULL; + + pipe_reference_init(&pg->reference, 1); + u_rwlock_init(&pg->pipeline_cache_lock); + util_queue_fence_init(&pg->cache_fence); + pg->is_compute = is_compute; + pg->ctx = ctx; + return (void*)pg; +} +static void +assign_io(struct zink_screen *screen, + nir_shader *shaders[ZINK_GFX_SHADER_COUNT]) +{ for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) { - nir_shader *producer = shaders[i]->nir; - for (unsigned j = i + 1; j < ZINK_SHADER_COUNT; i++, j++) { - struct zink_shader *consumer = shaders[j]; + nir_shader *producer = shaders[i]; + for (unsigned j = i + 1; j < ZINK_GFX_SHADER_COUNT; i++, j++) { + nir_shader *consumer = shaders[j]; if (!consumer) continue; - if (!prog->nir[producer->info.stage]) - prog->nir[producer->info.stage] = nir_shader_clone(prog, producer); - if (!prog->nir[j]) - prog->nir[j] = nir_shader_clone(prog, consumer->nir); - zink_compiler_assign_io(prog->nir[producer->info.stage], prog->nir[j]); + zink_compiler_assign_io(screen, producer, consumer); i = j; break; } } } +void +zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs) +{ + if (!p_atomic_dec_zero(&libs->refcount)) + return; + + simple_mtx_destroy(&libs->lock); + set_foreach_remove(&libs->libs, he) { + struct zink_gfx_library_key *gkey = (void*)he->key; + VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL); + FREE(gkey); + } + ralloc_free(libs->libs.table); + FREE(libs); +} + +static struct zink_gfx_lib_cache * +create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs) +{ + struct zink_gfx_lib_cache *libs = CALLOC_STRUCT(zink_gfx_lib_cache); + libs->stages_present = prog->stages_present; + if (generated_tcs) + libs->stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); + simple_mtx_init(&libs->lock, mtx_plain); + if (generated_tcs) + _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs); + else + _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib, equals_pipeline_lib); + return libs; +} + +static struct zink_gfx_lib_cache * +find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog) +{ + unsigned stages_present = prog->stages_present; + bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated; + if (generated_tcs) + stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); + unsigned idx = zink_program_cache_stages(stages_present); + struct set *ht = &screen->pipeline_libs[idx]; + const uint32_t hash = prog->gfx_hash; + + simple_mtx_lock(&screen->pipeline_libs_lock[idx]); + bool found = false; + struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found); + struct zink_gfx_lib_cache *libs; + if (found) { + libs = (void*)entry->key; + } else { + libs = create_lib_cache(prog, generated_tcs); + memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders)); + entry->key = libs; + unsigned refs = 0; + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) { + simple_mtx_lock(&prog->shaders[i]->lock); + util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs); + simple_mtx_unlock(&prog->shaders[i]->lock); + refs++; + } + } + p_atomic_set(&libs->refcount, refs); + } + simple_mtx_unlock(&screen->pipeline_libs_lock[idx]); + return libs; +} + struct zink_gfx_program * zink_create_gfx_program(struct zink_context *ctx, - struct zink_shader *stages[ZINK_SHADER_COUNT], - unsigned vertices_per_patch) + struct zink_shader **stages, + unsigned vertices_per_patch, + uint32_t gfx_hash) { struct zink_screen *screen = zink_screen(ctx->base.screen); - struct zink_gfx_program *prog = rzalloc(NULL, struct zink_gfx_program); + struct zink_gfx_program *prog = create_program(ctx, false); if (!prog) goto fail; - pipe_reference_init(&prog->base.reference, 1); + prog->gfx_hash = gfx_hash; + prog->base.removed = true; + prog->optimal_keys = screen->optimal_keys; + + nir_shader *nir[ZINK_GFX_SHADER_COUNT]; - for (int i = 0; i < ZINK_SHADER_COUNT; ++i) { - list_inithead(&prog->shader_cache[i][0]); - list_inithead(&prog->shader_cache[i][1]); + prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX] && + prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags; + for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + util_dynarray_init(&prog->shader_cache[i][0][0], prog); + util_dynarray_init(&prog->shader_cache[i][0][1], prog); + util_dynarray_init(&prog->shader_cache[i][1][0], prog); + util_dynarray_init(&prog->shader_cache[i][1][1], prog); if (stages[i]) { prog->shaders[i] = stages[i]; prog->stages_present |= BITFIELD_BIT(i); + if (i != MESA_SHADER_FRAGMENT) + prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated; + prog->needs_inlining |= prog->shaders[i]->needs_inlining; + nir[i] = zink_shader_deserialize(screen, stages[i]); + } else { + nir[i] = NULL; } } - if (stages[PIPE_SHADER_TESS_EVAL] && !stages[PIPE_SHADER_TESS_CTRL]) { - prog->shaders[PIPE_SHADER_TESS_EVAL]->generated = - prog->shaders[PIPE_SHADER_TESS_CTRL] = - zink_shader_tcs_create(screen, stages[PIPE_SHADER_VERTEX], vertices_per_patch); - prog->stages_present |= BITFIELD_BIT(PIPE_SHADER_TESS_CTRL); + if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) { + prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs = + prog->shaders[MESA_SHADER_TESS_CTRL] = + zink_shader_tcs_create(screen, nir[MESA_SHADER_TESS_EVAL], vertices_per_patch, &nir[MESA_SHADER_TESS_CTRL]); + prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL); } + prog->stages_remaining = prog->stages_present; - assign_io(prog, prog->shaders); + assign_io(screen, nir); + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + if (nir[i]) + zink_shader_serialize_blob(nir[i], &prog->blobs[i]); + ralloc_free(nir[i]); + } - if (stages[PIPE_SHADER_GEOMETRY]) - prog->last_vertex_stage = stages[PIPE_SHADER_GEOMETRY]; - else if (stages[PIPE_SHADER_TESS_EVAL]) - prog->last_vertex_stage = stages[PIPE_SHADER_TESS_EVAL]; - else - prog->last_vertex_stage = stages[PIPE_SHADER_VERTEX]; - for (int i = 0; i < ARRAY_SIZE(prog->pipelines); ++i) { - _mesa_hash_table_init(&prog->pipelines[i], prog, NULL, equals_gfx_pipeline_state); - /* only need first 3/4 for point/line/tri/patch */ - if (screen->info.have_EXT_extended_dynamic_state && - i == (prog->last_vertex_stage->nir->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3)) - break; + if (stages[MESA_SHADER_GEOMETRY]) + prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY]; + else if (stages[MESA_SHADER_TESS_EVAL]) + prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL]; + else + prog->last_vertex_stage = stages[MESA_SHADER_VERTEX]; + + for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) { + for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { + _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog)); + /* only need first 3/4 for point/line/tri/patch */ + if (screen->info.have_EXT_extended_dynamic_state && + i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3)) + break; + } } + if (screen->optimal_keys) + prog->libs = find_or_create_lib_cache(screen, prog); + if (prog->libs) + p_atomic_inc(&prog->libs->refcount); + struct mesa_sha1 sctx; _mesa_sha1_init(&sctx); - for (int i = 0; i < ZINK_SHADER_COUNT; ++i) { + for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { if (prog->shaders[i]) { simple_mtx_lock(&prog->shaders[i]->lock); _mesa_set_add(prog->shaders[i]->programs, prog); @@ -367,11 +1164,11 @@ zink_create_gfx_program(struct zink_context *ctx, } } _mesa_sha1_final(&sctx, prog->base.sha1); + p_atomic_dec(&prog->base.reference.count); - if (!screen->descriptor_program_init(ctx, &prog->base)) + if (!zink_descriptor_program_init(ctx, &prog->base)) goto fail; - zink_screen_get_pipeline_cache(screen, &prog->base); return prog; fail: @@ -380,130 +1177,260 @@ fail: return NULL; } +/* Creates a replacement, optimized zink_gfx_program for this set of separate shaders, which will + * be swapped in in place of the fast-linked separable program once it's done compiling. + */ +static void +create_linked_separable_job(void *data, void *gdata, int thread_index) +{ + struct zink_gfx_program *prog = data; + prog->full_prog = zink_create_gfx_program(prog->base.ctx, prog->shaders, 0, prog->gfx_hash); + /* add an ownership ref */ + zink_gfx_program_reference(zink_screen(prog->base.ctx->base.screen), NULL, prog->full_prog); + precompile_job(prog->full_prog, gdata, thread_index); +} + +struct zink_gfx_program * +create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + bool is_separate = true; + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) + is_separate &= !stages[i] || stages[i]->info.separate_shader; + /* filter cases that need real pipelines */ + if (!is_separate || + /* TODO: maybe try variants? grimace */ + !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || + !zink_can_use_pipeline_libs(ctx)) + return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash); + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + /* ensure async shader creation is done */ + if (stages[i]) { + util_queue_fence_wait(&stages[i]->precompile.fence); + if (!stages[i]->precompile.obj.mod) + return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash); + } + } + + struct zink_gfx_program *prog = create_program(ctx, false); + if (!prog) + goto fail; + + prog->is_separable = true; + prog->gfx_hash = ctx->gfx_hash; + prog->base.uses_shobj = screen->info.have_EXT_shader_object; + + prog->stages_remaining = prog->stages_present = ctx->shader_stages; + memcpy(prog->shaders, stages, sizeof(prog->shaders)); + prog->last_vertex_stage = ctx->last_vertex_stage; + + if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) { + prog->shaders[MESA_SHADER_TESS_CTRL] = stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs; + prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL); + } + + if (!screen->info.have_EXT_shader_object) { + prog->libs = create_lib_cache(prog, false); + /* this libs cache is owned by the program */ + p_atomic_set(&prog->libs->refcount, 1); + } + + unsigned refs = 0; + for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + if (prog->shaders[i]) { + simple_mtx_lock(&prog->shaders[i]->lock); + _mesa_set_add(prog->shaders[i]->programs, prog); + simple_mtx_unlock(&prog->shaders[i]->lock); + if (screen->info.have_EXT_shader_object) { + if (!prog->objects[i]) + prog->objects[i] = prog->shaders[i]->precompile.obj.obj; + } + refs++; + } + } + /* We can do this add after the _mesa_set_adds above because we know the prog->shaders[] are + * referenced by the draw state and zink_gfx_shader_free() can't be called on them while we're in here. + */ + p_atomic_add(&prog->base.reference.count, refs - 1); + + for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) { + for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { + _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog)); + /* only need first 3/4 for point/line/tri/patch */ + if (screen->info.have_EXT_extended_dynamic_state && + i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3)) + break; + } + } + + for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + if (!prog->shaders[i] || !prog->shaders[i]->precompile.dsl) + continue; + int idx = !i ? 0 : screen->info.have_EXT_shader_object ? i : 1; + prog->base.dd.binding_usage |= BITFIELD_BIT(idx); + prog->base.dsl[idx] = prog->shaders[i]->precompile.dsl; + /* guarantee a null dsl if previous stages don't have descriptors */ + if (prog->shaders[i]->precompile.dsl) + prog->base.num_dsl = idx + 1; + prog->base.dd.bindless |= prog->shaders[i]->bindless; + } + if (prog->base.dd.bindless) { + prog->base.num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES; + prog->base.dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout; + } + prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT); + + prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key; + + if (!screen->info.have_EXT_shader_object) { + VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl}; + struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key); + if (!gkey) { + mesa_loge("ZINK: failed to allocate gkey!"); + goto fail; + } + gkey->optimal_key = prog->last_variant_hash; + assert(gkey->optimal_key); + gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false, false); + _mesa_set_add(&prog->libs->libs, gkey); + } + + if (!(zink_debug & ZINK_DEBUG_NOOPT)) + util_queue_add_job(&screen->cache_get_thread, prog, &prog->base.cache_fence, create_linked_separable_job, NULL, 0); + + return prog; +fail: + if (prog) + zink_destroy_gfx_program(screen, prog); + return NULL; +} + static uint32_t -hash_compute_pipeline_state(const void *key) +hash_compute_pipeline_state_local_size(const void *key) { const struct zink_compute_pipeline_state *state = key; uint32_t hash = _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash)); - if (state->use_local_size) - hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash); + hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash); return hash; } -void -zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const uint block[3]) +static uint32_t +hash_compute_pipeline_state(const void *key) { - struct zink_shader *zs = comp->shader; - bool use_local_size = !(zs->nir->info.workgroup_size[0] || - zs->nir->info.workgroup_size[1] || - zs->nir->info.workgroup_size[2]); - if (ctx->compute_pipeline_state.use_local_size != use_local_size) - ctx->compute_pipeline_state.dirty = true; - ctx->compute_pipeline_state.use_local_size = use_local_size; + const struct zink_compute_pipeline_state *state = key; + return _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash)); +} - if (ctx->compute_pipeline_state.use_local_size) { +void +zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const struct pipe_grid_info *info) +{ + if (comp->use_local_size) { for (int i = 0; i < ARRAY_SIZE(ctx->compute_pipeline_state.local_size); i++) { - if (ctx->compute_pipeline_state.local_size[i] != block[i]) + if (ctx->compute_pipeline_state.local_size[i] != info->block[i]) ctx->compute_pipeline_state.dirty = true; - ctx->compute_pipeline_state.local_size[i] = block[i]; + ctx->compute_pipeline_state.local_size[i] = info->block[i]; } - } else - ctx->compute_pipeline_state.local_size[0] = - ctx->compute_pipeline_state.local_size[1] = - ctx->compute_pipeline_state.local_size[2] = 0; + } + if (ctx->compute_pipeline_state.variable_shared_mem != info->variable_shared_mem) { + ctx->compute_pipeline_state.dirty = true; + ctx->compute_pipeline_state.variable_shared_mem = info->variable_shared_mem; + } } static bool equals_compute_pipeline_state(const void *a, const void *b) { - return memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) == 0; + const struct zink_compute_pipeline_state *sa = a; + const struct zink_compute_pipeline_state *sb = b; + return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) && + sa->module == sb->module; } -struct zink_compute_program * -zink_create_compute_program(struct zink_context *ctx, struct zink_shader *shader) +static bool +equals_compute_pipeline_state_local_size(const void *a, const void *b) { - struct zink_screen *screen = zink_screen(ctx->base.screen); - struct zink_compute_program *comp = rzalloc(NULL, struct zink_compute_program); - if (!comp) - goto fail; + const struct zink_compute_pipeline_state *sa = a; + const struct zink_compute_pipeline_state *sb = b; + return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) && + !memcmp(sa->local_size, sb->local_size, sizeof(sa->local_size)) && + sa->module == sb->module; +} - pipe_reference_init(&comp->base.reference, 1); - comp->base.is_compute = true; +static void +precompile_compute_job(void *data, void *gdata, int thread_index) +{ + struct zink_compute_program *comp = data; + struct zink_screen *screen = gdata; - comp->module = CALLOC_STRUCT(zink_shader_module); + comp->shader = zink_shader_create(screen, comp->nir); + comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module); assert(comp->module); - comp->module->shader = zink_shader_compile(screen, shader, shader->nir, NULL); - assert(comp->module->shader); - - comp->pipelines = _mesa_hash_table_create(NULL, hash_compute_pipeline_state, - equals_compute_pipeline_state); - - _mesa_set_add(shader->programs, comp); - comp->shader = shader; - memcpy(comp->base.sha1, shader->base.sha1, sizeof(shader->base.sha1)); - - if (!screen->descriptor_program_init(ctx, &comp->base)) - goto fail; - - zink_screen_get_pipeline_cache(screen, &comp->base); - return comp; - -fail: - if (comp) - zink_destroy_compute_program(screen, comp); - return NULL; + comp->module->shobj = false; + comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, &comp->base); + /* comp->nir will be freed by zink_shader_compile */ + comp->nir = NULL; + assert(comp->module->obj.spirv); + util_dynarray_init(&comp->shader_cache[0], comp); + util_dynarray_init(&comp->shader_cache[1], comp); + + struct mesa_sha1 sha1_ctx; + _mesa_sha1_init(&sha1_ctx); + _mesa_sha1_update(&sha1_ctx, comp->shader->blob.data, comp->shader->blob.size); + _mesa_sha1_final(&sha1_ctx, comp->base.sha1); + + zink_descriptor_program_init(comp->base.ctx, &comp->base); + + zink_screen_get_pipeline_cache(screen, &comp->base, true); + if (comp->base.can_precompile) + comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL); + if (comp->base_pipeline) + zink_screen_update_pipeline_cache(screen, &comp->base, true); } -uint32_t -zink_program_get_descriptor_usage(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type) +static struct zink_compute_program * +create_compute_program(struct zink_context *ctx, nir_shader *nir) { - struct zink_shader *zs = NULL; - switch (stage) { - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_TESS_CTRL: - case PIPE_SHADER_TESS_EVAL: - case PIPE_SHADER_GEOMETRY: - case PIPE_SHADER_FRAGMENT: - zs = ctx->gfx_stages[stage]; - break; - case PIPE_SHADER_COMPUTE: { - zs = ctx->compute_stage; - break; - } - default: - unreachable("unknown shader type"); - } - if (!zs) - return 0; - switch (type) { - case ZINK_DESCRIPTOR_TYPE_UBO: - return zs->ubos_used; - case ZINK_DESCRIPTOR_TYPE_SSBO: - return zs->ssbos_used; - case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: - return BITSET_TEST_RANGE(zs->nir->info.textures_used, 0, PIPE_MAX_SAMPLERS - 1); - case ZINK_DESCRIPTOR_TYPE_IMAGE: - return zs->nir->info.images_used; - default: - unreachable("unknown descriptor type!"); - } - return 0; + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_compute_program *comp = create_program(ctx, true); + if (!comp) + return NULL; + simple_mtx_init(&comp->cache_lock, mtx_plain); + comp->scratch_size = nir->scratch_size; + comp->nir = nir; + comp->num_inlinable_uniforms = nir->info.num_inlinable_uniforms; + + comp->use_local_size = !(nir->info.workgroup_size[0] || + nir->info.workgroup_size[1] || + nir->info.workgroup_size[2]); + comp->has_variable_shared_mem = nir->info.cs.has_variable_shared_mem; + comp->base.can_precompile = !comp->use_local_size && + (screen->info.have_EXT_non_seamless_cube_map || !zink_shader_has_cubes(nir)) && + (screen->info.rb2_feats.robustImageAccess2 || !(ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS)); + _mesa_hash_table_init(&comp->pipelines, comp, NULL, comp->use_local_size ? + equals_compute_pipeline_state_local_size : + equals_compute_pipeline_state); + if (zink_debug & ZINK_DEBUG_NOBGC) + precompile_compute_job(comp, screen, 0); + else + util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence, + precompile_compute_job, NULL, 0); + return comp; } bool -zink_program_descriptor_is_buffer(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type, unsigned i) +zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i) { struct zink_shader *zs = NULL; switch (stage) { - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_TESS_CTRL: - case PIPE_SHADER_TESS_EVAL: - case PIPE_SHADER_GEOMETRY: - case PIPE_SHADER_FRAGMENT: + case MESA_SHADER_VERTEX: + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + case MESA_SHADER_FRAGMENT: zs = ctx->gfx_stages[stage]; break; - case PIPE_SHADER_COMPUTE: { - zs = ctx->compute_stage; + case MESA_SHADER_COMPUTE: { + zs = ctx->curr_compute->shader; break; } default: @@ -518,6 +1445,8 @@ static unsigned get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type) { switch (type) { + case ZINK_DESCRIPTOR_TYPE_UNIFORMS: + return !!zs->has_uniforms; case ZINK_DESCRIPTOR_TYPE_UBO: case ZINK_DESCRIPTOR_TYPE_SSBO: return zs->num_bindings[type]; @@ -531,15 +1460,15 @@ get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type) } unsigned -zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type, bool is_compute) +zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type) { unsigned num_bindings = 0; - if (is_compute) { + if (pg->is_compute) { struct zink_compute_program *comp = (void*)pg; return get_num_bindings(comp->shader, type); } struct zink_gfx_program *prog = (void*)pg; - for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) { + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { if (prog->shaders[i]) num_bindings += get_num_bindings(prog->shaders[i], type); } @@ -547,182 +1476,109 @@ zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descrip } unsigned -zink_program_num_bindings(const struct zink_program *pg, bool is_compute) +zink_program_num_bindings(const struct zink_program *pg) { unsigned num_bindings = 0; - for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) - num_bindings += zink_program_num_bindings_typed(pg, i, is_compute); + for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) + num_bindings += zink_program_num_bindings_typed(pg, i); return num_bindings; } +static void +deinit_program(struct zink_screen *screen, struct zink_program *pg) +{ + util_queue_fence_wait(&pg->cache_fence); + if (pg->layout) + VKSCR(DestroyPipelineLayout)(screen->dev, pg->layout, NULL); + + if (pg->pipeline_cache) + VKSCR(DestroyPipelineCache)(screen->dev, pg->pipeline_cache, NULL); + u_rwlock_destroy(&pg->pipeline_cache_lock); + zink_descriptor_program_deinit(screen, pg); +} + void zink_destroy_gfx_program(struct zink_screen *screen, struct zink_gfx_program *prog) { - if (prog->base.layout) - VKSCR(DestroyPipelineLayout)(screen->dev, prog->base.layout, NULL); - - for (int i = 0; i < ZINK_SHADER_COUNT; ++i) { - if (prog->shaders[i]) { - _mesa_set_remove_key(prog->shaders[i]->programs, prog); - prog->shaders[i] = NULL; - } - destroy_shader_cache(screen, &prog->shader_cache[i][0]); - destroy_shader_cache(screen, &prog->shader_cache[i][1]); - ralloc_free(prog->nir[i]); - } - - unsigned max_idx = ARRAY_SIZE(prog->pipelines); + unsigned max_idx = ARRAY_SIZE(prog->pipelines[0]); if (screen->info.have_EXT_extended_dynamic_state) { /* only need first 3/4 for point/line/tri/patch */ if ((prog->stages_present & - (BITFIELD_BIT(PIPE_SHADER_TESS_EVAL) | BITFIELD_BIT(PIPE_SHADER_GEOMETRY))) == - BITFIELD_BIT(PIPE_SHADER_TESS_EVAL)) + (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) == + BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) max_idx = 4; else max_idx = 3; max_idx++; } - for (int i = 0; i < max_idx; ++i) { - hash_table_foreach(&prog->pipelines[i], entry) { - struct gfx_pipeline_cache_entry *pc_entry = entry->data; + if (prog->is_separable) + zink_gfx_program_reference(screen, &prog->full_prog, NULL); + for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) { + for (int i = 0; i < max_idx; ++i) { + hash_table_foreach(&prog->pipelines[r][i], entry) { + struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data; + + util_queue_fence_wait(&pc_entry->fence); + VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL); + VKSCR(DestroyPipeline)(screen->dev, pc_entry->gpl.unoptimized_pipeline, NULL); + free(pc_entry); + } + } + } + + deinit_program(screen, &prog->base); - VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL); - free(pc_entry); + for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + if (prog->shaders[i]) { + _mesa_set_remove_key(prog->shaders[i]->programs, prog); + prog->shaders[i] = NULL; + } + if (!prog->is_separable) { + destroy_shader_cache(screen, &prog->shader_cache[i][0][0]); + destroy_shader_cache(screen, &prog->shader_cache[i][0][1]); + destroy_shader_cache(screen, &prog->shader_cache[i][1][0]); + destroy_shader_cache(screen, &prog->shader_cache[i][1][1]); + blob_finish(&prog->blobs[i]); } } - if (prog->base.pipeline_cache) - VKSCR(DestroyPipelineCache)(screen->dev, prog->base.pipeline_cache, NULL); - screen->descriptor_program_deinit(screen, &prog->base); + if (prog->libs) + zink_gfx_lib_cache_unref(screen, prog->libs); ralloc_free(prog); } void zink_destroy_compute_program(struct zink_screen *screen, - struct zink_compute_program *comp) + struct zink_compute_program *comp) { - if (comp->base.layout) - VKSCR(DestroyPipelineLayout)(screen->dev, comp->base.layout, NULL); + deinit_program(screen, &comp->base); + + assert(comp->shader); + assert(!comp->shader->spirv); + + zink_shader_free(screen, comp->shader); - if (comp->shader) - _mesa_set_remove_key(comp->shader->programs, comp); + destroy_shader_cache(screen, &comp->shader_cache[0]); + destroy_shader_cache(screen, &comp->shader_cache[1]); - hash_table_foreach(comp->pipelines, entry) { + hash_table_foreach(&comp->pipelines, entry) { struct compute_pipeline_cache_entry *pc_entry = entry->data; VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL); free(pc_entry); } - _mesa_hash_table_destroy(comp->pipelines, NULL); - VKSCR(DestroyShaderModule)(screen->dev, comp->module->shader, NULL); - free(comp->module); - if (comp->base.pipeline_cache) - VKSCR(DestroyPipelineCache)(screen->dev, comp->base.pipeline_cache, NULL); - screen->descriptor_program_deinit(screen, &comp->base); + VKSCR(DestroyPipeline)(screen->dev, comp->base_pipeline, NULL); + zink_destroy_shader_module(screen, comp->module); ralloc_free(comp); } -static unsigned -get_pipeline_idx(bool have_EXT_extended_dynamic_state, enum pipe_prim_type mode, VkPrimitiveTopology vkmode) -{ - /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT specifies that the topology state in - * VkPipelineInputAssemblyStateCreateInfo only specifies the topology class, - * and the specific topology order and adjacency must be set dynamically - * with vkCmdSetPrimitiveTopologyEXT before any drawing commands. - */ - if (have_EXT_extended_dynamic_state) { - if (mode == PIPE_PRIM_PATCHES) - return 3; - switch (u_reduced_prim(mode)) { - case PIPE_PRIM_POINTS: - return 0; - case PIPE_PRIM_LINES: - return 1; - default: - return 2; - } - } - return vkmode; -} - - -VkPipeline -zink_get_gfx_pipeline(struct zink_context *ctx, - struct zink_gfx_program *prog, - struct zink_gfx_pipeline_state *state, - enum pipe_prim_type mode) +ALWAYS_INLINE static bool +compute_can_shortcut(const struct zink_compute_program *comp) { - struct zink_screen *screen = zink_screen(ctx->base.screen); - const bool have_EXT_vertex_input_dynamic_state = screen->info.have_EXT_vertex_input_dynamic_state; - const bool have_EXT_extended_dynamic_state = screen->info.have_EXT_extended_dynamic_state; - - VkPrimitiveTopology vkmode = zink_primitive_topology(mode); - const unsigned idx = get_pipeline_idx(screen->info.have_EXT_extended_dynamic_state, mode, vkmode); - assert(idx <= ARRAY_SIZE(prog->pipelines)); - if (!state->dirty && !state->modules_changed && - (have_EXT_vertex_input_dynamic_state || !ctx->vertex_state_changed) && - idx == state->idx) - return state->pipeline; - - struct hash_entry *entry = NULL; - - if (state->dirty) { - if (state->pipeline) //avoid on first hash - state->final_hash ^= state->hash; - state->hash = hash_gfx_pipeline_state(state); - state->final_hash ^= state->hash; - state->dirty = false; - } - if (!have_EXT_vertex_input_dynamic_state && ctx->vertex_state_changed) { - if (state->pipeline) - state->final_hash ^= state->vertex_hash; - if (!have_EXT_extended_dynamic_state) { - uint32_t hash = 0; - /* if we don't have dynamic states, we have to hash the enabled vertex buffer bindings */ - uint32_t vertex_buffers_enabled_mask = state->vertex_buffers_enabled_mask; - hash = XXH32(&vertex_buffers_enabled_mask, sizeof(uint32_t), hash); - - for (unsigned i = 0; i < state->element_state->num_bindings; i++) { - struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->binding_map[i]; - state->vertex_strides[i] = vb->buffer.resource ? vb->stride : 0; - hash = XXH32(&state->vertex_strides[i], sizeof(uint32_t), hash); - } - state->vertex_hash = hash ^ state->element_state->hash; - } else - state->vertex_hash = state->element_state->hash; - state->final_hash ^= state->vertex_hash; - } - state->modules_changed = false; - ctx->vertex_state_changed = false; - - entry = _mesa_hash_table_search_pre_hashed(&prog->pipelines[idx], state->final_hash, state); - - if (!entry) { - util_queue_fence_wait(&prog->base.cache_fence); - VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, - state, vkmode); - if (pipeline == VK_NULL_HANDLE) - return VK_NULL_HANDLE; - - zink_screen_update_pipeline_cache(screen, &prog->base); - struct gfx_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(gfx_pipeline_cache_entry); - if (!pc_entry) - return VK_NULL_HANDLE; - - memcpy(&pc_entry->state, state, sizeof(*state)); - pc_entry->pipeline = pipeline; - - entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[idx], state->final_hash, pc_entry, pc_entry); - assert(entry); - } - - struct gfx_pipeline_cache_entry *cache_entry = entry->data; - state->pipeline = cache_entry->pipeline; - state->idx = idx; - return state->pipeline; + return !comp->use_local_size && !comp->curr->num_uniforms && !comp->curr->has_nonseamless; } VkPipeline @@ -731,98 +1587,213 @@ zink_get_compute_pipeline(struct zink_screen *screen, struct zink_compute_pipeline_state *state) { struct hash_entry *entry = NULL; + struct compute_pipeline_cache_entry *cache_entry; - if (!state->dirty) + if (!state->dirty && !state->module_changed) return state->pipeline; if (state->dirty) { - state->hash = hash_compute_pipeline_state(state); + if (state->pipeline) //avoid on first hash + state->final_hash ^= state->hash; + if (comp->use_local_size) + state->hash = hash_compute_pipeline_state_local_size(state); + else + state->hash = hash_compute_pipeline_state(state); state->dirty = false; + state->final_hash ^= state->hash; } - entry = _mesa_hash_table_search_pre_hashed(comp->pipelines, state->hash, state); + + util_queue_fence_wait(&comp->base.cache_fence); + if (comp->base_pipeline && compute_can_shortcut(comp)) { + state->pipeline = comp->base_pipeline; + return state->pipeline; + } + entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state); if (!entry) { - util_queue_fence_wait(&comp->base.cache_fence); + simple_mtx_lock(&comp->cache_lock); + entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state); + if (entry) { + simple_mtx_unlock(&comp->cache_lock); + goto out; + } VkPipeline pipeline = zink_create_compute_pipeline(screen, comp, state); - if (pipeline == VK_NULL_HANDLE) + if (pipeline == VK_NULL_HANDLE) { + simple_mtx_unlock(&comp->cache_lock); return VK_NULL_HANDLE; + } + + zink_screen_update_pipeline_cache(screen, &comp->base, false); + if (compute_can_shortcut(comp)) { + simple_mtx_unlock(&comp->cache_lock); + /* don't add base pipeline to cache */ + state->pipeline = comp->base_pipeline = pipeline; + return state->pipeline; + } struct compute_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(compute_pipeline_cache_entry); - if (!pc_entry) + if (!pc_entry) { + simple_mtx_unlock(&comp->cache_lock); return VK_NULL_HANDLE; + } memcpy(&pc_entry->state, state, sizeof(*state)); pc_entry->pipeline = pipeline; - entry = _mesa_hash_table_insert_pre_hashed(comp->pipelines, state->hash, pc_entry, pc_entry); + entry = _mesa_hash_table_insert_pre_hashed(&comp->pipelines, state->final_hash, pc_entry, pc_entry); assert(entry); + simple_mtx_unlock(&comp->cache_lock); } - - struct compute_pipeline_cache_entry *cache_entry = entry->data; +out: + cache_entry = entry->data; state->pipeline = cache_entry->pipeline; return state->pipeline; } -static inline void -bind_stage(struct zink_context *ctx, enum pipe_shader_type stage, - struct zink_shader *shader) +static void +bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader) { - if (shader && shader->nir->info.num_inlinable_uniforms) + /* RADV doesn't support binding pipelines in DGC */ + if (zink_screen(ctx->base.screen)->info.nv_dgc_props.maxGraphicsShaderGroupCount == 0) + zink_flush_dgc_if_enabled(ctx); + if (shader && shader->info.num_inlinable_uniforms) ctx->shader_has_inlinable_uniforms_mask |= 1 << stage; else ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage); - if (stage == PIPE_SHADER_COMPUTE) { - if (shader && shader != ctx->compute_stage) { - struct hash_entry *entry = _mesa_hash_table_search(&ctx->compute_program_cache, shader); - if (entry) { - ctx->compute_pipeline_state.dirty = true; - ctx->curr_compute = entry->data; - } else { - struct zink_compute_program *comp = zink_create_compute_program(ctx, shader); - _mesa_hash_table_insert(&ctx->compute_program_cache, comp->shader, comp); - ctx->compute_pipeline_state.dirty = true; - ctx->curr_compute = comp; - zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base); - } - } else if (!shader) - ctx->curr_compute = NULL; - ctx->compute_stage = shader; - zink_select_launch_grid(ctx); + if (ctx->gfx_stages[stage]) + ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash; + + if (stage == MESA_SHADER_GEOMETRY && ctx->is_generated_gs_bound && (!shader || !shader->non_fs.parent)) { + ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY); + ctx->is_generated_gs_bound = false; + } + + ctx->gfx_stages[stage] = shader; + ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX]; + ctx->gfx_pipeline_state.modules_changed = true; + if (shader) { + ctx->shader_stages |= BITFIELD_BIT(stage); + ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash; } else { - if (ctx->gfx_stages[stage]) - ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash; - ctx->gfx_stages[stage] = shader; - ctx->gfx_dirty = ctx->gfx_stages[PIPE_SHADER_FRAGMENT] && ctx->gfx_stages[PIPE_SHADER_VERTEX]; - ctx->gfx_pipeline_state.modules_changed = true; - if (shader) { - ctx->shader_stages |= BITFIELD_BIT(stage); - ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash; - } else { - ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE; - if (ctx->curr_program) - ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; - ctx->curr_program = NULL; - ctx->shader_stages &= ~BITFIELD_BIT(stage); + ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE; + if (ctx->curr_program) + ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + ctx->curr_program = NULL; + ctx->shader_stages &= ~BITFIELD_BIT(stage); + } +} + +static enum mesa_prim +gs_output_to_reduced_prim_type(struct shader_info *info) +{ + switch (info->gs.output_primitive) { + case MESA_PRIM_POINTS: + return MESA_PRIM_POINTS; + + case MESA_PRIM_LINES: + case MESA_PRIM_LINE_LOOP: + case MESA_PRIM_LINE_STRIP: + case MESA_PRIM_LINES_ADJACENCY: + case MESA_PRIM_LINE_STRIP_ADJACENCY: + return MESA_PRIM_LINES; + + case MESA_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLE_STRIP: + case MESA_PRIM_TRIANGLE_FAN: + case MESA_PRIM_TRIANGLES_ADJACENCY: + case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: + return MESA_PRIM_TRIANGLES; + + default: + unreachable("unexpected output primitive type"); + } +} + +static enum mesa_prim +update_rast_prim(struct zink_shader *shader) +{ + struct shader_info *info = &shader->info; + if (info->stage == MESA_SHADER_GEOMETRY) + return gs_output_to_reduced_prim_type(info); + else if (info->stage == MESA_SHADER_TESS_EVAL) { + if (info->tess.point_mode) + return MESA_PRIM_POINTS; + else { + switch (info->tess._primitive_mode) { + case TESS_PRIMITIVE_ISOLINES: + return MESA_PRIM_LINES; + case TESS_PRIMITIVE_TRIANGLES: + case TESS_PRIMITIVE_QUADS: + return MESA_PRIM_TRIANGLES; + default: + return MESA_PRIM_COUNT; + } } } + return MESA_PRIM_COUNT; +} + +static void +unbind_generated_gs(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader) +{ + if (prev_shader->non_fs.is_generated) + ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY); + + if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] && + ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.parent == + prev_shader) { + bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL); + } } static void -bind_last_vertex_stage(struct zink_context *ctx) +bind_last_vertex_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader) { - enum pipe_shader_type old = ctx->last_vertex_stage ? pipe_shader_type_from_mesa(ctx->last_vertex_stage->nir->info.stage) : PIPE_SHADER_TYPES; - if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY]) - ctx->last_vertex_stage = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; - else if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]) - ctx->last_vertex_stage = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]; + if (prev_shader && stage < MESA_SHADER_GEOMETRY) + unbind_generated_gs(ctx, stage, prev_shader); + + gl_shader_stage old = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_STAGES; + if (ctx->gfx_stages[MESA_SHADER_GEOMETRY]) + ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_GEOMETRY]; + else if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL]) + ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL]; else - ctx->last_vertex_stage = ctx->gfx_stages[PIPE_SHADER_VERTEX]; - enum pipe_shader_type current = ctx->last_vertex_stage ? pipe_shader_type_from_mesa(ctx->last_vertex_stage->nir->info.stage) : PIPE_SHADER_VERTEX; + ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_VERTEX]; + gl_shader_stage current = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_VERTEX; + + /* update rast_prim */ + ctx->gfx_pipeline_state.shader_rast_prim = + ctx->last_vertex_stage ? update_rast_prim(ctx->last_vertex_stage) : + MESA_PRIM_COUNT; + if (old != current) { - if (old != PIPE_SHADER_TYPES) { - memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base)); - ctx->dirty_shader_stages |= BITFIELD_BIT(old); + if (!zink_screen(ctx->base.screen)->optimal_keys) { + if (old != MESA_SHADER_STAGES) { + memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base)); + ctx->dirty_gfx_stages |= BITFIELD_BIT(old); + } else { + /* always unset vertex shader values when changing to a non-vs last stage */ + memset(&ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base)); + } + } + + unsigned num_viewports = ctx->vp_state.num_viewports; + struct zink_screen *screen = zink_screen(ctx->base.screen); + /* number of enabled viewports is based on whether last vertex stage writes viewport index */ + if (ctx->last_vertex_stage) { + if (ctx->last_vertex_stage->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK)) + ctx->vp_state.num_viewports = MIN2(screen->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS); + else + ctx->vp_state.num_viewports = 1; + } else { + ctx->vp_state.num_viewports = 1; + } + ctx->vp_state_changed |= num_viewports != ctx->vp_state.num_viewports; + if (!screen->info.have_EXT_extended_dynamic_state) { + if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports) + ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports; } ctx->last_vertex_stage_dirty = true; } @@ -833,21 +1804,42 @@ zink_bind_vs_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); - if (!cso && !ctx->gfx_stages[PIPE_SHADER_VERTEX]) + if (!cso && !ctx->gfx_stages[MESA_SHADER_VERTEX]) return; - void *prev = ctx->gfx_stages[PIPE_SHADER_VERTEX]; - bind_stage(ctx, PIPE_SHADER_VERTEX, cso); + struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_VERTEX]; + bind_gfx_stage(ctx, MESA_SHADER_VERTEX, cso); + bind_last_vertex_stage(ctx, MESA_SHADER_VERTEX, prev_shader); if (cso) { struct zink_shader *zs = cso; - ctx->shader_reads_drawid = BITSET_TEST(zs->nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); - ctx->shader_reads_basevertex = BITSET_TEST(zs->nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX); + ctx->shader_reads_drawid = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_DRAW_ID); + ctx->shader_reads_basevertex = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX); } else { ctx->shader_reads_drawid = false; ctx->shader_reads_basevertex = false; } - if (ctx->last_vertex_stage == prev) - ctx->last_vertex_stage = cso; +} + +/* if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count: + * in GL, samples==1 means ignore gl_SampleMask[] + * in VK, gl_SampleMask[] is never ignored + */ +void +zink_update_fs_key_samples(struct zink_context *ctx) +{ + if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT]) + return; + if (zink_shader_uses_samples(ctx->gfx_stages[MESA_SHADER_FRAGMENT])) { + bool samples = zink_get_fs_base_key(ctx)->samples; + if (samples != (ctx->fb_state.samples > 1)) + zink_set_fs_base_key(ctx)->samples = ctx->fb_state.samples > 1; + } +} +void zink_update_gs_key_rectangular_line(struct zink_context *ctx) +{ + bool line_rectangular = zink_get_gs_key(ctx)->line_rectangular; + if (line_rectangular != ctx->rast_state->base.line_rectangular) + zink_set_gs_key(ctx)->line_rectangular = ctx->rast_state->base.line_rectangular; } static void @@ -855,18 +1847,42 @@ zink_bind_fs_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); - if (!cso && !ctx->gfx_stages[PIPE_SHADER_FRAGMENT]) + if (!cso && !ctx->gfx_stages[MESA_SHADER_FRAGMENT]) + return; + if (ctx->disable_fs && !ctx->disable_color_writes && cso != ctx->null_fs) { + ctx->saved_fs = cso; + zink_set_null_fs(ctx); return; - bind_stage(ctx, PIPE_SHADER_FRAGMENT, cso); + } + bool writes_cbuf0 = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? (ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0 : true; + unsigned shadow_mask = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask : 0; + bind_gfx_stage(ctx, MESA_SHADER_FRAGMENT, cso); ctx->fbfetch_outputs = 0; if (cso) { - nir_shader *nir = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]->nir; - if (nir->info.fs.uses_fbfetch_output) { - nir_foreach_shader_out_variable(var, ctx->gfx_stages[PIPE_SHADER_FRAGMENT]->nir) { - if (var->data.fb_fetch_output) - ctx->fbfetch_outputs |= BITFIELD_BIT(var->data.location - FRAG_RESULT_DATA0); - } + shader_info *info = &ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info; + bool new_writes_cbuf0 = (info->outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0; + if (ctx->gfx_pipeline_state.blend_state && ctx->gfx_pipeline_state.blend_state->alpha_to_coverage && + writes_cbuf0 != new_writes_cbuf0 && zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state3) { + ctx->blend_state_changed = true; + ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_A2C); + } + if (info->fs.uses_fbfetch_output) { + if (info->outputs_read & (BITFIELD_BIT(FRAG_RESULT_DEPTH) | BITFIELD_BIT(FRAG_RESULT_STENCIL))) + ctx->fbfetch_outputs |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS); + ctx->fbfetch_outputs |= info->outputs_read >> FRAG_RESULT_DATA0; } + zink_update_fs_key_samples(ctx); + if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) { + if (ctx->gfx_pipeline_state.rast_attachment_order != info->fs.uses_fbfetch_output) + ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.rast_attachment_order = info->fs.uses_fbfetch_output; + } + zink_set_zs_needs_shader_swizzle_key(ctx, MESA_SHADER_FRAGMENT, false); + if (shadow_mask != ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask && + !zink_screen(pctx->screen)->driver_workarounds.needs_zs_shader_swizzle) + zink_update_shadow_samplerviews(ctx, shadow_mask | ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask); + if (!ctx->track_renderpasses && !ctx->blitting) + ctx->rp_tc_info_updated = true; } zink_update_fbfetch(ctx); } @@ -876,25 +1892,17 @@ zink_bind_gs_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); - if (!cso && !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]) + if (!cso && !ctx->gfx_stages[MESA_SHADER_GEOMETRY]) return; - bool had_points = ctx->gfx_stages[PIPE_SHADER_GEOMETRY] ? ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->nir->info.gs.output_primitive == GL_POINTS : false; - bind_stage(ctx, PIPE_SHADER_GEOMETRY, cso); - bind_last_vertex_stage(ctx); - if (cso) { - if (!had_points && ctx->last_vertex_stage->nir->info.gs.output_primitive == GL_POINTS) - ctx->gfx_pipeline_state.has_points++; - } else { - if (had_points) - ctx->gfx_pipeline_state.has_points--; - } + bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, cso); + bind_last_vertex_stage(ctx, MESA_SHADER_GEOMETRY, NULL); } static void zink_bind_tcs_state(struct pipe_context *pctx, void *cso) { - bind_stage(zink_context(pctx), PIPE_SHADER_TESS_CTRL, cso); + bind_gfx_stage(zink_context(pctx), MESA_SHADER_TESS_CTRL, cso); } static void @@ -902,17 +1910,18 @@ zink_bind_tes_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); - if (!cso && !ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]) + if (!cso && !ctx->gfx_stages[MESA_SHADER_TESS_EVAL]) return; - if (!!ctx->gfx_stages[PIPE_SHADER_TESS_EVAL] != !!cso) { + if (!!ctx->gfx_stages[MESA_SHADER_TESS_EVAL] != !!cso) { if (!cso) { /* if unsetting a TESS that uses a generated TCS, ensure the TCS is unset */ - if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]->generated) - ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = NULL; + if (ctx->gfx_stages[MESA_SHADER_TESS_CTRL] == ctx->gfx_stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs) + ctx->gfx_stages[MESA_SHADER_TESS_CTRL] = NULL; } } - bind_stage(ctx, PIPE_SHADER_TESS_EVAL, cso); - bind_last_vertex_stage(ctx); + struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_TESS_EVAL]; + bind_gfx_stage(ctx, MESA_SHADER_TESS_EVAL, cso); + bind_last_vertex_stage(ctx, MESA_SHADER_TESS_EVAL, prev_shader); } static void * @@ -925,32 +1934,324 @@ zink_create_cs_state(struct pipe_context *pctx, else nir = (struct nir_shader *)shader->prog; - return zink_shader_create(zink_screen(pctx->screen), nir, NULL); + if (nir->info.uses_bindless) + zink_descriptors_init_bindless(zink_context(pctx)); + + return create_compute_program(zink_context(pctx), nir); } static void zink_bind_cs_state(struct pipe_context *pctx, void *cso) { - bind_stage(zink_context(pctx), PIPE_SHADER_COMPUTE, cso); + struct zink_context *ctx = zink_context(pctx); + struct zink_compute_program *comp = cso; + if (comp && comp->num_inlinable_uniforms) + ctx->shader_has_inlinable_uniforms_mask |= 1 << MESA_SHADER_COMPUTE; + else + ctx->shader_has_inlinable_uniforms_mask &= ~(1 << MESA_SHADER_COMPUTE); + + if (ctx->curr_compute) { + zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base); + ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash; + ctx->compute_pipeline_state.module = VK_NULL_HANDLE; + ctx->compute_pipeline_state.module_hash = 0; + } + ctx->compute_pipeline_state.dirty = true; + ctx->curr_compute = comp; + if (comp && comp != ctx->curr_compute) { + ctx->compute_pipeline_state.module_hash = ctx->curr_compute->curr->hash; + if (util_queue_fence_is_signalled(&comp->base.cache_fence)) + ctx->compute_pipeline_state.module = ctx->curr_compute->curr->obj.mod; + ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash; + if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask) + ctx->compute_dirty = true; + } + zink_select_launch_grid(ctx); +} + +static void +zink_get_compute_state_info(struct pipe_context *pctx, void *cso, struct pipe_compute_state_object_info *info) +{ + struct zink_compute_program *comp = cso; + struct zink_screen *screen = zink_screen(pctx->screen); + + info->max_threads = screen->info.props.limits.maxComputeWorkGroupInvocations; + info->private_memory = comp->scratch_size; + if (screen->info.props11.subgroupSize) { + info->preferred_simd_size = screen->info.props11.subgroupSize; + info->simd_sizes = info->preferred_simd_size; + } else { + // just guess it + info->preferred_simd_size = 64; + // only used for actual subgroup support + info->simd_sizes = 0; + } +} + +static void +zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso) +{ + struct zink_compute_program *comp = cso; + zink_compute_program_reference(zink_screen(pctx->screen), &comp, NULL); +} + +/* caller must lock prog->libs->lock */ +struct zink_gfx_library_key * +zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state) +{ + struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key); + if (!gkey) { + mesa_loge("ZINK: failed to allocate gkey!"); + return NULL; + } + + gkey->optimal_key = state->optimal_key; + assert(gkey->optimal_key); + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) + gkey->modules[i] = prog->objs[i].mod; + gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog); + _mesa_set_add(&prog->libs->libs, gkey); + return gkey; +} + +static const char * +print_exe_stages(VkShaderStageFlags stages) +{ + if (stages == VK_SHADER_STAGE_VERTEX_BIT) + return "VS"; + if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT)) + return "VS+GS"; + if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) + return "VS+TCS+TES"; + if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT)) + return "VS+TCS+TES+GS"; + if (stages == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) + return "TCS"; + if (stages == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) + return "TES"; + if (stages == VK_SHADER_STAGE_GEOMETRY_BIT) + return "GS"; + if (stages == VK_SHADER_STAGE_FRAGMENT_BIT) + return "FS"; + if (stages == VK_SHADER_STAGE_COMPUTE_BIT) + return "CS"; + unreachable("unhandled combination of stages!"); +} + +static void +print_pipeline_stats(struct zink_screen *screen, VkPipeline pipeline, struct util_debug_callback *debug) +{ + VkPipelineInfoKHR pinfo = { + VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR, + NULL, + pipeline + }; + unsigned exe_count = 0; + VkPipelineExecutablePropertiesKHR props[10] = {0}; + for (unsigned i = 0; i < ARRAY_SIZE(props); i++) { + props[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR; + props[i].pNext = NULL; + } + VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, NULL); + VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, props); + for (unsigned e = 0; e < exe_count; e++) { + VkPipelineExecutableInfoKHR info = { + VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR, + NULL, + pipeline, + e + }; + unsigned count = 0; + + struct u_memstream stream; + char *print_buf; + size_t print_buf_sz; + + if (!u_memstream_open(&stream, &print_buf, &print_buf_sz)) { + mesa_loge("ZINK: failed to open memstream!"); + return; + } + + FILE *f = u_memstream_get(&stream); + fprintf(f, "type: %s", props[e].name); + VkPipelineExecutableStatisticKHR *stats = NULL; + VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, NULL); + stats = calloc(count, sizeof(VkPipelineExecutableStatisticKHR)); + if (!stats) { + mesa_loge("ZINK: failed to allocate stats!"); + return; + } + + for (unsigned i = 0; i < count; i++) + stats[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR; + VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, stats); + + for (unsigned i = 0; i < count; i++) { + fprintf(f, ", "); + switch (stats[i].format) { + case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR: + fprintf(f, "%s: %u", stats[i].name, stats[i].value.b32); + break; + case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR: + fprintf(f, "%s: %" PRIi64, stats[i].name, stats[i].value.i64); + break; + case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR: + fprintf(f, "%s: %" PRIu64, stats[i].name, stats[i].value.u64); + break; + case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR: + fprintf(f, "%s: %g", stats[i].name, stats[i].value.f64); + break; + default: + unreachable("unknown statistic"); + } + } + + /* print_buf is only valid after flushing. */ + fflush(f); + util_debug_message(debug, SHADER_INFO, "%s\n", print_buf); + + u_memstream_close(&stream); + free(print_buf); + } +} + +static void +precompile_job(void *data, void *gdata, int thread_index) +{ + struct zink_screen *screen = gdata; + struct zink_gfx_program *prog = data; + + struct zink_gfx_pipeline_state state = {0}; + state.shader_keys_optimal.key.vs_base.last_vertex_stage = true; + state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard + state.optimal_key = state.shader_keys_optimal.key.val; + generate_gfx_program_modules_optimal(NULL, screen, prog, &state); + zink_screen_get_pipeline_cache(screen, &prog->base, true); + if (!screen->info.have_EXT_shader_object) { + simple_mtx_lock(&prog->libs->lock); + zink_create_pipeline_lib(screen, prog, &state); + simple_mtx_unlock(&prog->libs->lock); + } + zink_screen_update_pipeline_cache(screen, &prog->base, true); +} + +static void +precompile_separate_shader_job(void *data, void *gdata, int thread_index) +{ + struct zink_screen *screen = gdata; + struct zink_shader *zs = data; + + zs->precompile.obj = zink_shader_compile_separate(screen, zs); + if (!screen->info.have_EXT_shader_object) { + struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT] = {0}; + objs[zs->info.stage].mod = zs->precompile.obj.mod; + zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, objs, zs->precompile.layout, zs->info.stage); + } +} + +static void +zink_link_gfx_shader(struct pipe_context *pctx, void **shaders) +{ + struct zink_context *ctx = zink_context(pctx); + struct zink_shader **zshaders = (struct zink_shader **)shaders; + if (shaders[MESA_SHADER_COMPUTE]) + return; + /* explicitly block sample shading: this needs full pipelines always */ + if (zshaders[MESA_SHADER_FRAGMENT] && zshaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading) + return; + /* can't precompile fixedfunc */ + if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) { + /* handled directly from shader create */ + return; + } + unsigned hash = 0; + unsigned shader_stages = 0; + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + if (zshaders[i]) { + hash ^= zshaders[i]->hash; + shader_stages |= BITFIELD_BIT(i); + } + } + unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL); + unsigned tess = shader_stages & tess_stages; + /* can't do fixedfunc tes either */ + if (tess && !shaders[MESA_SHADER_TESS_EVAL]) + return; + struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)]; + simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); + /* link can be called repeatedly with the same shaders: ignore */ + if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) { + simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); + return; + } + struct zink_gfx_program *prog = zink_create_gfx_program(ctx, zshaders, 3, hash); + u_foreach_bit(i, shader_stages) + assert(prog->shaders[i]); + _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); + prog->base.removed = false; + simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); + if (zink_debug & ZINK_DEBUG_SHADERDB) { + struct zink_screen *screen = zink_screen(pctx->screen); + if (screen->optimal_keys) + generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state); + else + generate_gfx_program_modules(ctx, screen, prog, &ctx->gfx_pipeline_state); + VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, &ctx->gfx_pipeline_state, + ctx->gfx_pipeline_state.element_state->binding_map, + shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true, NULL); + print_pipeline_stats(screen, pipeline, &ctx->dbg); + VKSCR(DestroyPipeline)(screen->dev, pipeline, NULL); + } else { + if (zink_screen(pctx->screen)->info.have_EXT_shader_object) + prog->base.uses_shobj = !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); + if (zink_debug & ZINK_DEBUG_NOBGC) + precompile_job(prog, pctx->screen, 0); + else + util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0); + } } void zink_delete_shader_state(struct pipe_context *pctx, void *cso) { - zink_shader_free(zink_context(pctx), cso); + zink_gfx_shader_free(zink_screen(pctx->screen), cso); } void * zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader) { + struct zink_screen *screen = zink_screen(pctx->screen); nir_shader *nir; if (shader->type != PIPE_SHADER_IR_NIR) nir = zink_tgsi_to_nir(pctx->screen, shader->tokens); else nir = (struct nir_shader *)shader->ir.nir; - return zink_shader_create(zink_screen(pctx->screen), nir, &shader->stream_output); + if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) + zink_descriptor_util_init_fbfetch(zink_context(pctx)); + if (nir->info.uses_bindless) + zink_descriptors_init_bindless(zink_context(pctx)); + + void *ret = zink_shader_create(zink_screen(pctx->screen), nir); + + if (!(zink_debug & ZINK_DEBUG_NOPC)) { + if (nir->info.separate_shader && zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && + (screen->info.have_EXT_shader_object || + (screen->info.have_EXT_graphics_pipeline_library && (nir->info.stage == MESA_SHADER_FRAGMENT || nir->info.stage == MESA_SHADER_VERTEX)))) { + struct zink_shader *zs = ret; + /* sample shading can't precompile */ + if (nir->info.stage != MESA_SHADER_FRAGMENT || !nir->info.fs.uses_sample_shading) { + if (zink_debug & ZINK_DEBUG_NOBGC) + precompile_separate_shader_job(zs, screen, 0); + else + util_queue_add_job(&screen->cache_get_thread, zs, &zs->precompile.fence, precompile_separate_shader_job, NULL, 0); + } + } + } + ralloc_free(nir); + + return ret; } static void @@ -968,6 +2269,7 @@ zink_create_cached_shader_state(struct pipe_context *pctx, const struct pipe_sha return util_live_shader_cache_get(pctx, &screen->shaders, shader, &cache_hit); } + void zink_program_init(struct zink_context *ctx) { @@ -993,5 +2295,257 @@ zink_program_init(struct zink_context *ctx) ctx->base.create_compute_state = zink_create_cs_state; ctx->base.bind_compute_state = zink_bind_cs_state; - ctx->base.delete_compute_state = zink_delete_shader_state; + ctx->base.get_compute_state_info = zink_get_compute_state_info; + ctx->base.delete_compute_state = zink_delete_cs_shader_state; + + if (zink_screen(ctx->base.screen)->info.have_EXT_vertex_input_dynamic_state) + _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input_dynamic, equals_gfx_input_dynamic); + else + _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input, equals_gfx_input); + if (zink_screen(ctx->base.screen)->have_full_ds3) + _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output_ds3, equals_gfx_output_ds3); + else + _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output, equals_gfx_output); + /* validate struct packing */ + STATIC_ASSERT(offsetof(struct zink_gfx_output_key, sample_mask) == sizeof(uint32_t)); + STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_pipeline_state, input) == + offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_input_key, input)); + STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_strides) - offsetof(struct zink_gfx_pipeline_state, input) == + offsetof(struct zink_gfx_input_key, vertex_strides) - offsetof(struct zink_gfx_input_key, input)); + STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, element_state) - offsetof(struct zink_gfx_pipeline_state, input) == + offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input)); + + STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t)); + + /* no precompile at all */ + if (zink_debug & ZINK_DEBUG_NOPC) + return; + + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (screen->info.have_EXT_graphics_pipeline_library || screen->info.have_EXT_shader_object || zink_debug & ZINK_DEBUG_SHADERDB) + ctx->base.link_shader = zink_link_gfx_shader; +} + +bool +zink_set_rasterizer_discard(struct zink_context *ctx, bool disable) +{ + bool value = disable ? false : (ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false); + bool changed = ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard != value; + ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard = value; + if (!changed) + return false; + if (!zink_screen(ctx->base.screen)->info.have_EXT_extended_dynamic_state2) + ctx->gfx_pipeline_state.dirty |= true; + ctx->rasterizer_discard_changed = true; + return true; +} + +void +zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data, + struct util_queue_fence *fence, + pipe_driver_thread_func execute, + pipe_driver_thread_func cleanup, + const size_t job_size) +{ + struct zink_screen *screen = zink_screen(pscreen); + util_queue_add_job(&screen->cache_get_thread, data, fence, execute, cleanup, job_size); +} + +static bool +has_edge_flags(struct zink_context *ctx) +{ + switch(ctx->gfx_pipeline_state.gfx_prim_mode) { + case MESA_PRIM_POINTS: + case MESA_PRIM_LINE_STRIP: + case MESA_PRIM_LINE_STRIP_ADJACENCY: + case MESA_PRIM_LINES: + case MESA_PRIM_LINE_LOOP: + case MESA_PRIM_LINES_ADJACENCY: + case MESA_PRIM_TRIANGLE_STRIP: + case MESA_PRIM_TRIANGLE_FAN: + case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: + case MESA_PRIM_QUAD_STRIP: + case MESA_PRIM_PATCHES: + return false; + case MESA_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLES_ADJACENCY: + case MESA_PRIM_QUADS: + case MESA_PRIM_POLYGON: + case MESA_PRIM_COUNT: + default: + break; + } + return (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES || + ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) && + ctx->gfx_stages[MESA_SHADER_VERTEX]->has_edgeflags; +} + +static enum zink_rast_prim +zink_rast_prim_for_pipe(enum mesa_prim prim) +{ + switch (prim) { + case MESA_PRIM_POINTS: + return ZINK_PRIM_POINTS; + case MESA_PRIM_LINES: + return ZINK_PRIM_LINES; + case MESA_PRIM_TRIANGLES: + default: + return ZINK_PRIM_TRIANGLES; + } +} + +static enum mesa_prim +zink_tess_prim_type(struct zink_shader *tess) +{ + if (tess->info.tess.point_mode) + return MESA_PRIM_POINTS; + else { + switch (tess->info.tess._primitive_mode) { + case TESS_PRIMITIVE_ISOLINES: + return MESA_PRIM_LINES; + case TESS_PRIMITIVE_TRIANGLES: + case TESS_PRIMITIVE_QUADS: + return MESA_PRIM_TRIANGLES; + default: + return MESA_PRIM_COUNT; + } + } +} + +static inline void +zink_add_inline_uniform(nir_shader *shader, int offset) +{ + shader->info.inlinable_uniform_dw_offsets[shader->info.num_inlinable_uniforms] = offset; + ++shader->info.num_inlinable_uniforms; +} + +static unsigned +encode_lower_pv_mode(enum mesa_prim prim_type) +{ + switch (prim_type) { + case MESA_PRIM_TRIANGLE_STRIP: + case MESA_PRIM_QUAD_STRIP: + return ZINK_PVE_PRIMITIVE_TRISTRIP; + case MESA_PRIM_TRIANGLE_FAN: + return ZINK_PVE_PRIMITIVE_FAN; + default: + return ZINK_PVE_PRIMITIVE_SIMPLE; + } +} + +void +zink_set_primitive_emulation_keys(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + bool lower_line_stipple = false, lower_line_smooth = false; + unsigned lower_pv_mode = 0; + if (!screen->optimal_keys) { + lower_line_stipple = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES && + screen->driver_workarounds.no_linestipple && + ctx->rast_state->base.line_stipple_enable && + !ctx->num_so_targets; + + bool lower_point_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS && + screen->driconf.emulate_point_smooth && + ctx->rast_state->base.point_smooth; + if (zink_get_fs_key(ctx)->lower_line_stipple != lower_line_stipple) { + assert(zink_get_gs_key(ctx)->lower_line_stipple == + zink_get_fs_key(ctx)->lower_line_stipple); + zink_set_fs_key(ctx)->lower_line_stipple = lower_line_stipple; + zink_set_gs_key(ctx)->lower_line_stipple = lower_line_stipple; + } + + lower_line_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES && + screen->driver_workarounds.no_linesmooth && + ctx->rast_state->base.line_smooth && + !ctx->num_so_targets; + + if (zink_get_fs_key(ctx)->lower_line_smooth != lower_line_smooth) { + assert(zink_get_gs_key(ctx)->lower_line_smooth == + zink_get_fs_key(ctx)->lower_line_smooth); + zink_set_fs_key(ctx)->lower_line_smooth = lower_line_smooth; + zink_set_gs_key(ctx)->lower_line_smooth = lower_line_smooth; + } + + if (zink_get_fs_key(ctx)->lower_point_smooth != lower_point_smooth) { + zink_set_fs_key(ctx)->lower_point_smooth = lower_point_smooth; + } + + lower_pv_mode = ctx->gfx_pipeline_state.dyn_state3.pv_last && + !screen->info.have_EXT_provoking_vertex; + if (lower_pv_mode) + lower_pv_mode = encode_lower_pv_mode(ctx->gfx_pipeline_state.gfx_prim_mode); + + if (zink_get_gs_key(ctx)->lower_pv_mode != lower_pv_mode) + zink_set_gs_key(ctx)->lower_pv_mode = lower_pv_mode; + } + + bool lower_edge_flags = has_edge_flags(ctx); + + bool lower_quad_prim = ctx->gfx_pipeline_state.gfx_prim_mode == MESA_PRIM_QUADS; + + bool lower_filled_quad = lower_quad_prim && + ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_TRIANGLES; + + if (lower_line_stipple || lower_line_smooth || + lower_edge_flags || lower_quad_prim || + lower_pv_mode || zink_get_gs_key(ctx)->lower_gl_point) { + enum pipe_shader_type prev_vertex_stage = + ctx->gfx_stages[MESA_SHADER_TESS_EVAL] ? + MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; + enum zink_rast_prim zink_prim_type = + zink_rast_prim_for_pipe(ctx->gfx_pipeline_state.rast_prim); + + //when using transform feedback primitives must be tessellated + lower_filled_quad |= lower_quad_prim && ctx->gfx_stages[prev_vertex_stage]->info.has_transform_feedback_varyings; + + if (!ctx->gfx_stages[MESA_SHADER_GEOMETRY] || (ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated && + ctx->gfx_stages[MESA_SHADER_GEOMETRY]->info.gs.input_primitive != ctx->gfx_pipeline_state.gfx_prim_mode)) { + + if (!ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]) { + nir_shader *prev_stage = zink_shader_deserialize(screen, ctx->gfx_stages[prev_vertex_stage]); + nir_shader *nir; + if (lower_filled_quad) { + nir = zink_create_quads_emulation_gs( + &screen->nir_options, + prev_stage); + } else { + enum mesa_prim prim = ctx->gfx_pipeline_state.gfx_prim_mode; + if (prev_vertex_stage == MESA_SHADER_TESS_EVAL) + prim = zink_tess_prim_type(ctx->gfx_stages[MESA_SHADER_TESS_EVAL]); + nir = nir_create_passthrough_gs( + &screen->nir_options, + prev_stage, + prim, + ctx->gfx_pipeline_state.rast_prim, + lower_edge_flags, + lower_line_stipple || lower_quad_prim); + } + zink_lower_system_values_to_inlined_uniforms(nir); + + zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK); + zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK+1); + zink_add_inline_uniform(nir, ZINK_INLINE_VAL_PV_LAST_VERT); + ralloc_free(prev_stage); + struct zink_shader *shader = zink_shader_create(screen, nir); + shader->needs_inlining = true; + ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type] = shader; + shader->non_fs.is_generated = true; + shader->non_fs.parent = ctx->gfx_stages[prev_vertex_stage]; + shader->can_inline = true; + memcpy(shader->sinfo.stride, ctx->gfx_stages[prev_vertex_stage]->sinfo.stride, sizeof(shader->sinfo.stride)); + } + + ctx->base.bind_gs_state(&ctx->base, + ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]); + ctx->is_generated_gs_bound = true; + } + + ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 3, + (uint32_t []){ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags, + ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags >> 32, + ctx->gfx_pipeline_state.dyn_state3.pv_last}); + } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] && + ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated) + ctx->base.bind_gs_state(&ctx->base, NULL); } diff --git a/src/gallium/drivers/zink/zink_program.h b/src/gallium/drivers/zink/zink_program.h index f4c65f6c7e2..b4b2bd21bcd 100644 --- a/src/gallium/drivers/zink/zink_program.h +++ b/src/gallium/drivers/zink/zink_program.h @@ -24,102 +24,19 @@ #ifndef ZINK_PROGRAM_H #define ZINK_PROGRAM_H -#include <vulkan/vulkan.h> - -#include "compiler/shader_enums.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -#include "zink_context.h" -#include "zink_compiler.h" -#include "zink_shader_keys.h" +#include "zink_types.h" #ifdef __cplusplus extern "C" { #endif +#include "util/u_prim.h" -struct zink_screen; -struct zink_shader; -struct zink_gfx_pipeline_state; -struct zink_descriptor_set; - -struct hash_table; -struct set; -struct util_dynarray; - -struct zink_program; - -struct zink_gfx_push_constant { - unsigned draw_mode_is_indexed; - unsigned draw_id; - float default_inner_level[2]; - float default_outer_level[4]; -}; - -struct zink_cs_push_constant { - unsigned work_dim; -}; - -/* a shader module is used for directly reusing a shader module between programs, - * e.g., in the case where we're swapping out only one shader, - * allowing us to skip going through shader keys - */ -struct zink_shader_module { - struct list_head list; - VkShaderModule shader; - uint32_t hash; - bool default_variant; - uint8_t num_uniforms; - uint8_t key_size; - uint8_t key[0]; /* | key | uniforms | */ -}; - -struct zink_program { - struct pipe_reference reference; - unsigned char sha1[20]; - struct util_queue_fence cache_fence; - VkPipelineCache pipeline_cache; - size_t pipeline_cache_size; - struct zink_batch_usage *batch_uses; - bool is_compute; - - struct zink_program_descriptor_data *dd; - - VkPipelineLayout layout; - VkDescriptorSetLayout dsl[ZINK_DESCRIPTOR_TYPES + 1]; // one for each type + push - unsigned num_dsl; - - bool removed; +struct compute_pipeline_cache_entry { + struct zink_compute_pipeline_state state; + VkPipeline pipeline; }; #define ZINK_MAX_INLINED_VARIANTS 5 -struct zink_gfx_program { - struct zink_program base; - - uint32_t stages_present; //mask of stages present in this program - struct nir_shader *nir[ZINK_SHADER_COUNT]; - - struct zink_shader_module *modules[ZINK_SHADER_COUNT]; // compute stage doesn't belong here - - struct zink_shader *last_vertex_stage; - - struct list_head shader_cache[ZINK_SHADER_COUNT][2]; //normal, inline uniforms - unsigned inlined_variant_count[ZINK_SHADER_COUNT]; - - struct zink_shader *shaders[ZINK_SHADER_COUNT]; - struct hash_table pipelines[11]; // number of draw modes we support - uint32_t default_variant_hash; - uint32_t last_variant_hash; -}; - -struct zink_compute_program { - struct zink_program base; - - struct zink_shader_module *module; - struct zink_shader *shader; - struct hash_table *pipelines; -}; - static inline enum zink_descriptor_type zink_desc_type_from_vktype(VkDescriptorType type) { @@ -127,6 +44,8 @@ zink_desc_type_from_vktype(VkDescriptorType type) case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: return ZINK_DESCRIPTOR_TYPE_UBO; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: return ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW; @@ -141,44 +60,47 @@ zink_desc_type_from_vktype(VkDescriptorType type) } static inline VkPrimitiveTopology -zink_primitive_topology(enum pipe_prim_type mode) +zink_primitive_topology(enum mesa_prim mode) { switch (mode) { - case PIPE_PRIM_POINTS: + case MESA_PRIM_POINTS: return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; - case PIPE_PRIM_LINES: + case MESA_PRIM_LINES: return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; - case PIPE_PRIM_LINE_STRIP: + case MESA_PRIM_LINE_STRIP: return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; - case PIPE_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLES: return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - case PIPE_PRIM_TRIANGLE_STRIP: + case MESA_PRIM_TRIANGLE_STRIP: return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; - case PIPE_PRIM_TRIANGLE_FAN: + case MESA_PRIM_TRIANGLE_FAN: return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; - case PIPE_PRIM_LINE_STRIP_ADJACENCY: + case MESA_PRIM_LINE_STRIP_ADJACENCY: return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY; - case PIPE_PRIM_LINES_ADJACENCY: + case MESA_PRIM_LINES_ADJACENCY: return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY; - case PIPE_PRIM_TRIANGLES_ADJACENCY: + case MESA_PRIM_TRIANGLES_ADJACENCY: return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY; - case PIPE_PRIM_PATCHES: + case MESA_PRIM_PATCHES: return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + case MESA_PRIM_QUADS: + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + default: - unreachable("unexpected enum pipe_prim_type"); + unreachable("unexpected enum mesa_prim"); } } @@ -188,38 +110,61 @@ void * zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader); unsigned -zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type, bool is_compute); +zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type); unsigned -zink_program_num_bindings(const struct zink_program *pg, bool is_compute); +zink_program_num_bindings(const struct zink_program *pg); bool -zink_program_descriptor_is_buffer(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type, unsigned i); +zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i); + +void +zink_gfx_program_update(struct zink_context *ctx); +void +zink_gfx_program_update_optimal(struct zink_context *ctx); + +struct zink_gfx_library_key * +zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state); +uint32_t hash_gfx_output(const void *key); +uint32_t hash_gfx_output_ds3(const void *key); +uint32_t hash_gfx_input(const void *key); +uint32_t hash_gfx_input_dynamic(const void *key); + +void +zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry); void -zink_update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog); +zink_program_finish(struct zink_context *ctx, struct zink_program *pg); + +static inline unsigned +get_primtype_idx(enum mesa_prim mode) +{ + if (mode == MESA_PRIM_PATCHES) + return 3; + switch (u_reduced_prim(mode)) { + case MESA_PRIM_POINTS: + return 0; + case MESA_PRIM_LINES: + return 1; + default: + return 2; + } +} struct zink_gfx_program * zink_create_gfx_program(struct zink_context *ctx, - struct zink_shader *stages[ZINK_SHADER_COUNT], - unsigned vertices_per_patch); + struct zink_shader **stages, + unsigned vertices_per_patch, + uint32_t gfx_hash); void zink_destroy_gfx_program(struct zink_screen *screen, struct zink_gfx_program *prog); - -VkPipeline -zink_get_gfx_pipeline(struct zink_context *ctx, - struct zink_gfx_program *prog, - struct zink_gfx_pipeline_state *state, - enum pipe_prim_type mode); - +void +zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs); void zink_program_init(struct zink_context *ctx); -uint32_t -zink_program_get_descriptor_usage(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type); - void debug_describe_zink_gfx_program(char* buf, const struct zink_gfx_program *ptr); @@ -240,11 +185,9 @@ zink_gfx_program_reference(struct zink_screen *screen, return ret; } -struct zink_compute_program * -zink_create_compute_program(struct zink_context *ctx, struct zink_shader *shader); void zink_destroy_compute_program(struct zink_screen *screen, - struct zink_compute_program *comp); + struct zink_compute_program *comp); void debug_describe_zink_compute_program(char* buf, const struct zink_compute_program *ptr); @@ -266,12 +209,30 @@ zink_compute_program_reference(struct zink_screen *screen, return ret; } +static inline bool +zink_program_reference(struct zink_screen *screen, + struct zink_program **dst, + struct zink_program *src) +{ + struct zink_program *pg = src ? src : dst ? *dst : NULL; + if (!pg) + return false; + if (pg->is_compute) { + struct zink_compute_program *comp = (struct zink_compute_program*)pg; + return zink_compute_program_reference(screen, &comp, NULL); + } else { + struct zink_gfx_program *prog = (struct zink_gfx_program*)pg; + return zink_gfx_program_reference(screen, &prog, NULL); + } +} + VkPipelineLayout -zink_pipeline_layout_create(struct zink_screen *screen, struct zink_program *pg); +zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags); void -zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const uint block[3]); - +zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const struct pipe_grid_info *info); +void +zink_update_compute_program(struct zink_context *ctx); VkPipeline zink_get_compute_pipeline(struct zink_screen *screen, struct zink_compute_program *comp, @@ -283,58 +244,225 @@ zink_program_has_descriptors(const struct zink_program *pg) return pg->num_dsl > 0; } +static inline struct zink_fs_key_base * +zink_set_fs_base_key(struct zink_context *ctx) +{ + ctx->dirty_gfx_stages |= BITFIELD_BIT(MESA_SHADER_FRAGMENT); + return zink_screen(ctx->base.screen)->optimal_keys ? + &ctx->gfx_pipeline_state.shader_keys_optimal.key.fs : + &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.base; +} + +static inline const struct zink_fs_key_base * +zink_get_fs_base_key(const struct zink_context *ctx) +{ + return zink_screen(ctx->base.screen)->optimal_keys ? + &ctx->gfx_pipeline_state.shader_keys_optimal.key.fs : + &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.base; +} + static inline struct zink_fs_key * zink_set_fs_key(struct zink_context *ctx) { - ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_FRAGMENT); - return (struct zink_fs_key *)&ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_FRAGMENT]; + assert(!zink_screen(ctx->base.screen)->optimal_keys); + ctx->dirty_gfx_stages |= BITFIELD_BIT(MESA_SHADER_FRAGMENT); + return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs; } static inline const struct zink_fs_key * -zink_get_fs_key(struct zink_context *ctx) +zink_get_fs_key(const struct zink_context *ctx) +{ + assert(!zink_screen(ctx->base.screen)->optimal_keys); + return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs; +} + +static inline struct zink_gs_key * +zink_set_gs_key(struct zink_context *ctx) +{ + ctx->dirty_gfx_stages |= BITFIELD_BIT(MESA_SHADER_GEOMETRY); + assert(!zink_screen(ctx->base.screen)->optimal_keys); + return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_GEOMETRY].key.gs; +} + +static inline const struct zink_gs_key * +zink_get_gs_key(const struct zink_context *ctx) +{ + return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_GEOMETRY].key.gs; +} + +static inline bool +zink_set_tcs_key_patches(struct zink_context *ctx, uint8_t patch_vertices) { - return (const struct zink_fs_key *)&ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_FRAGMENT]; + struct zink_tcs_key *tcs = zink_screen(ctx->base.screen)->optimal_keys ? + &ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs : + &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs; + if (tcs->patch_vertices == patch_vertices) + return false; + ctx->dirty_gfx_stages |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL); + tcs->patch_vertices = patch_vertices; + return true; } +static inline const struct zink_tcs_key * +zink_get_tcs_key(const struct zink_context *ctx) +{ + return zink_screen(ctx->base.screen)->optimal_keys ? + &ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs : + &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs; +} + +void +zink_update_fs_key_samples(struct zink_context *ctx); + +void +zink_update_gs_key_rectangular_line(struct zink_context *ctx); + static inline struct zink_vs_key * zink_set_vs_key(struct zink_context *ctx) { - ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_VERTEX); - return (struct zink_vs_key *)&ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_VERTEX]; + ctx->dirty_gfx_stages |= BITFIELD_BIT(MESA_SHADER_VERTEX); + assert(!zink_screen(ctx->base.screen)->optimal_keys); + return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs; } static inline const struct zink_vs_key * -zink_get_vs_key(struct zink_context *ctx) +zink_get_vs_key(const struct zink_context *ctx) { - return (const struct zink_vs_key *)&ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_VERTEX]; + assert(!zink_screen(ctx->base.screen)->optimal_keys); + return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs; } static inline struct zink_vs_key_base * zink_set_last_vertex_key(struct zink_context *ctx) { ctx->last_vertex_stage_dirty = true; - return (struct zink_vs_key_base *)&ctx->gfx_pipeline_state.shader_keys.last_vertex; + return zink_screen(ctx->base.screen)->optimal_keys ? + &ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base : + &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base; } static inline const struct zink_vs_key_base * -zink_get_last_vertex_key(struct zink_context *ctx) +zink_get_last_vertex_key(const struct zink_context *ctx) { - return (const struct zink_vs_key_base *)&ctx->gfx_pipeline_state.shader_keys.last_vertex; + return zink_screen(ctx->base.screen)->optimal_keys ? + &ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base : + &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base; } static inline void zink_set_fs_point_coord_key(struct zink_context *ctx) { - const struct zink_fs_key *fs = zink_get_fs_key(ctx); - bool disable = !ctx->gfx_pipeline_state.has_points || !ctx->rast_state->base.sprite_coord_enable; + const struct zink_fs_key_base *fs = zink_get_fs_base_key(ctx); + bool disable = ctx->gfx_pipeline_state.rast_prim != MESA_PRIM_POINTS; uint8_t coord_replace_bits = disable ? 0 : ctx->rast_state->base.sprite_coord_enable; - bool coord_replace_yinvert = disable ? false : !!ctx->rast_state->base.sprite_coord_mode; - if (fs->coord_replace_bits != coord_replace_bits || fs->coord_replace_yinvert != coord_replace_yinvert) { - zink_set_fs_key(ctx)->coord_replace_bits = coord_replace_bits; - zink_set_fs_key(ctx)->coord_replace_yinvert = coord_replace_yinvert; + bool point_coord_yinvert = disable ? false : !!ctx->rast_state->base.sprite_coord_mode; + if (fs->coord_replace_bits != coord_replace_bits || fs->point_coord_yinvert != point_coord_yinvert) { + zink_set_fs_base_key(ctx)->coord_replace_bits = coord_replace_bits; + zink_set_fs_base_key(ctx)->point_coord_yinvert = point_coord_yinvert; } } +void +zink_set_primitive_emulation_keys(struct zink_context *ctx); + +void +zink_create_primitive_emulation_gs(struct zink_context *ctx); + +static inline const struct zink_shader_key_base * +zink_get_shader_key_base(const struct zink_context *ctx, gl_shader_stage pstage) +{ + assert(!zink_screen(ctx->base.screen)->optimal_keys); + return &ctx->gfx_pipeline_state.shader_keys.key[pstage].base; +} + +static inline struct zink_shader_key_base * +zink_set_shader_key_base(struct zink_context *ctx, gl_shader_stage pstage) +{ + ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage); + assert(!zink_screen(ctx->base.screen)->optimal_keys); + return &ctx->gfx_pipeline_state.shader_keys.key[pstage].base; +} + +static inline void +zink_set_zs_needs_shader_swizzle_key(struct zink_context *ctx, gl_shader_stage pstage, bool swizzle_update) +{ + if (!zink_screen(ctx->base.screen)->driver_workarounds.needs_zs_shader_swizzle) { + if (pstage != MESA_SHADER_FRAGMENT) + return; + const struct zink_fs_key_base *fs = zink_get_fs_base_key(ctx); + bool enable = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && (ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask & ctx->di.zs_swizzle[pstage].mask) > 0; + if (enable != fs->shadow_needs_shader_swizzle || (enable && swizzle_update)) + zink_set_fs_base_key(ctx)->shadow_needs_shader_swizzle = enable; + return; + } + bool enable = !!ctx->di.zs_swizzle[pstage].mask; + const struct zink_shader_key_base *key = zink_get_shader_key_base(ctx, pstage); + if (enable != key->needs_zs_shader_swizzle || (enable && swizzle_update)) + zink_set_shader_key_base(ctx, pstage)->needs_zs_shader_swizzle = enable; +} + +ALWAYS_INLINE static bool +zink_can_use_pipeline_libs(const struct zink_context *ctx) +{ + return + !ctx->gfx_pipeline_state.render_pass && + /* this is just terrible */ + !zink_get_fs_base_key(ctx)->shadow_needs_shader_swizzle && + /* TODO: is sample shading even possible to handle with GPL? */ + !ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading && + !zink_get_fs_base_key(ctx)->fbfetch_ms && + !ctx->gfx_pipeline_state.force_persample_interp && + !ctx->gfx_pipeline_state.min_samples && + !ctx->is_generated_gs_bound; +} + +/* stricter requirements */ +ALWAYS_INLINE static bool +zink_can_use_shader_objects(const struct zink_context *ctx) +{ + return + !ctx->gfx_pipeline_state.render_pass && + ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) && + /* TODO: is sample shading even possible to handle with GPL? */ + !ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading && + !ctx->gfx_pipeline_state.force_persample_interp && + !ctx->gfx_pipeline_state.min_samples && + !ctx->is_generated_gs_bound; +} + +bool +zink_set_rasterizer_discard(struct zink_context *ctx, bool disable); +void +zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data, + struct util_queue_fence *fence, + pipe_driver_thread_func execute, + pipe_driver_thread_func cleanup, + const size_t job_size); +equals_gfx_pipeline_state_func +zink_get_gfx_pipeline_eq_func(struct zink_screen *screen, struct zink_gfx_program *prog); + +/* determines whether the 'samples' shader key is valid */ +static inline bool +zink_shader_uses_samples(const struct zink_shader *zs) +{ + assert(zs->info.stage == MESA_SHADER_FRAGMENT); + return zs->uses_sample || zs->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); +} + +static inline uint32_t +zink_sanitize_optimal_key(struct zink_shader **shaders, uint32_t val) +{ + union zink_shader_key_optimal k; + if (shaders[MESA_SHADER_TESS_EVAL] && !shaders[MESA_SHADER_TESS_CTRL]) + k.val = val; + else + k.val = zink_shader_key_optimal_no_tcs(val); + if (!zink_shader_uses_samples(shaders[MESA_SHADER_FRAGMENT])) + k.fs.samples = false; + if (!(shaders[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1))) + k.fs.force_dual_color_blend = false; + return k.val; +} #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/zink/zink_program_state.hpp b/src/gallium/drivers/zink/zink_program_state.hpp new file mode 100644 index 00000000000..2cabc678660 --- /dev/null +++ b/src/gallium/drivers/zink/zink_program_state.hpp @@ -0,0 +1,423 @@ +/* + * Copyright © 2022 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Mike Blumenkrantz <michael.blumenkrantz@gmail.com> + */ + + +/** + * this file is used to optimize pipeline state management + * pipeline state comparisons are the most significant cause of CPU overhead aside from descriptors, + * so more effort must be taken to reduce it by any means + */ +#include "zink_types.h" +#include "zink_pipeline.h" +#include "zink_program.h" +#include "zink_screen.h" + +/* runtime-optimized pipeline state hashing */ +template <zink_dynamic_state DYNAMIC_STATE> +static uint32_t +hash_gfx_pipeline_state(const void *key, struct zink_screen *screen) +{ + const struct zink_gfx_pipeline_state *state = (const struct zink_gfx_pipeline_state *)key; + uint32_t hash = _mesa_hash_data(key, screen->have_full_ds3 ? + offsetof(struct zink_gfx_pipeline_state, sample_mask) : + offsetof(struct zink_gfx_pipeline_state, hash)); + if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE2) + hash = XXH32(&state->dyn_state3, sizeof(state->dyn_state3), hash); + if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE3) + hash = XXH32(&state->dyn_state2, sizeof(state->dyn_state2), hash); + if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) + return hash; + return XXH32(&state->dyn_state1, sizeof(state->dyn_state1), hash); +} + +template <bool HAS_DYNAMIC> +static unsigned +get_pipeline_idx(enum mesa_prim mode, VkPrimitiveTopology vkmode) +{ + /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY specifies that the topology state in + * VkPipelineInputAssemblyStateCreateInfo only specifies the topology class, + * and the specific topology order and adjacency must be set dynamically + * with vkCmdSetPrimitiveTopology before any drawing commands. + */ + if (HAS_DYNAMIC) { + return get_primtype_idx(mode); + } + return vkmode; +} + +/* + VUID-vkCmdBindVertexBuffers2-pStrides-06209 + If pStrides is not NULL each element of pStrides must be either 0 or greater than or equal + to the maximum extent of all vertex input attributes fetched from the corresponding + binding, where the extent is calculated as the VkVertexInputAttributeDescription::offset + plus VkVertexInputAttributeDescription::format size + + * thus, if the stride doesn't meet the minimum requirement for a binding, + * disable the dynamic state here and use a fully-baked pipeline + */ +static bool +check_vertex_strides(struct zink_context *ctx) +{ + const struct zink_vertex_elements_state *ves = ctx->element_state; + for (unsigned i = 0; i < ves->hw_state.num_bindings; i++) { + const struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ves->hw_state.binding_map[i]; + unsigned stride = vb->buffer.resource ? ves->hw_state.b.strides[i] : 0; + if (stride && stride < ves->min_stride[i]) + return false; + } + return true; +} + +/* runtime-optimized function to recalc pipeline state and find a usable pipeline: + * in theory, zink supports many feature levels, + * but it's important to provide a more optimized codepath for drivers that support all the best features + */ +template <zink_dynamic_state DYNAMIC_STATE, bool HAVE_LIB> +VkPipeline +zink_get_gfx_pipeline(struct zink_context *ctx, + struct zink_gfx_program *prog, + struct zink_gfx_pipeline_state *state, + enum mesa_prim mode) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + bool uses_dynamic_stride = state->uses_dynamic_stride; + + VkPrimitiveTopology vkmode = zink_primitive_topology(mode); + const unsigned idx = screen->info.dynamic_state3_props.dynamicPrimitiveTopologyUnrestricted ? + 0 : + get_pipeline_idx<DYNAMIC_STATE >= ZINK_DYNAMIC_STATE>(mode, vkmode); + assert(idx <= ARRAY_SIZE(prog->pipelines[0])); + if (!state->dirty && !state->modules_changed && + ((DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2) && !ctx->vertex_state_changed) && + idx == state->idx) + return state->pipeline; + + struct hash_entry *entry = NULL; + + /* recalc the base pipeline state hash */ + if (state->dirty) { + if (state->pipeline) //avoid on first hash + state->final_hash ^= state->hash; + state->hash = hash_gfx_pipeline_state<DYNAMIC_STATE>(state, screen); + state->final_hash ^= state->hash; + state->dirty = false; + } + /* extra safety asserts for optimal path to catch refactoring bugs */ + if (prog->optimal_keys) { + ASSERTED const union zink_shader_key_optimal *opt = (union zink_shader_key_optimal*)&prog->last_variant_hash; + ASSERTED uint32_t sanitized = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); + assert(opt->val == sanitized); + assert(state->optimal_key == sanitized); + } + /* recalc vertex state if missing optimal extensions */ + if (DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT && ctx->vertex_state_changed) { + if (state->pipeline) + state->final_hash ^= state->vertex_hash; + /* even if dynamic stride is available, it may not be usable with the current pipeline */ + if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) + uses_dynamic_stride = check_vertex_strides(ctx); + if (!uses_dynamic_stride) { + uint32_t hash = 0; + /* if we don't have dynamic states, we have to hash the enabled vertex buffer bindings */ + uint32_t vertex_buffers_enabled_mask = state->vertex_buffers_enabled_mask; + hash = XXH32(&vertex_buffers_enabled_mask, sizeof(uint32_t), hash); + + for (unsigned i = 0; i < state->element_state->num_bindings; i++) { + const unsigned buffer_id = ctx->element_state->hw_state.binding_map[i]; + struct pipe_vertex_buffer *vb = ctx->vertex_buffers + buffer_id; + state->vertex_strides[buffer_id] = vb->buffer.resource ? state->element_state->b.strides[i] : 0; + hash = XXH32(&state->vertex_strides[buffer_id], sizeof(uint32_t), hash); + } + state->vertex_hash = hash ^ state->element_state->hash; + } else + state->vertex_hash = state->element_state->hash; + state->final_hash ^= state->vertex_hash; + } + state->modules_changed = false; + state->uses_dynamic_stride = uses_dynamic_stride; + state->idx = idx; + ctx->vertex_state_changed = false; + + const int rp_idx = state->render_pass ? 1 : 0; + /* shortcut for reusing previous pipeline across program changes */ + if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2) { + if (prog->last_finalized_hash[rp_idx][idx] == state->final_hash && + !prog->inline_variants && likely(prog->last_pipeline[rp_idx][idx]) && + /* this data is too big to compare in the fast-path */ + likely(!prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask)) { + state->pipeline = prog->last_pipeline[rp_idx][idx]->pipeline; + return state->pipeline; + } + } + entry = _mesa_hash_table_search_pre_hashed(&prog->pipelines[rp_idx][idx], state->final_hash, state); + + if (!entry) { + /* always wait on async precompile/cache fence */ + util_queue_fence_wait(&prog->base.cache_fence); + struct zink_gfx_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(zink_gfx_pipeline_cache_entry); + if (!pc_entry) + return VK_NULL_HANDLE; + /* cache entries must have all state needed to construct pipelines + * TODO: maybe optimize this since all these values aren't actually needed + */ + memcpy(&pc_entry->state, state, sizeof(*state)); + pc_entry->state.rendering_info.pColorAttachmentFormats = pc_entry->state.rendering_formats; + pc_entry->prog = prog; + /* init the optimized background compile fence */ + util_queue_fence_init(&pc_entry->fence); + entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[rp_idx][idx], state->final_hash, pc_entry, pc_entry); + if (prog->base.uses_shobj && !prog->is_separable) { + memcpy(pc_entry->shobjs, prog->objs, sizeof(prog->objs)); + zink_gfx_program_compile_queue(ctx, pc_entry); + } else if (HAVE_LIB && zink_can_use_pipeline_libs(ctx)) { + /* this is the graphics pipeline library path: find/construct all partial pipelines */ + simple_mtx_lock(&prog->libs->lock); + struct set_entry *he = _mesa_set_search(&prog->libs->libs, &ctx->gfx_pipeline_state.optimal_key); + struct zink_gfx_library_key *gkey; + if (he) { + gkey = (struct zink_gfx_library_key *)he->key; + } else { + assert(!prog->is_separable); + gkey = zink_create_pipeline_lib(screen, prog, &ctx->gfx_pipeline_state); + } + simple_mtx_unlock(&prog->libs->lock); + struct zink_gfx_input_key *ikey = DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT ? + zink_find_or_create_input_dynamic(ctx, vkmode) : + zink_find_or_create_input(ctx, vkmode); + struct zink_gfx_output_key *okey = DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3 && screen->have_full_ds3 ? + zink_find_or_create_output_ds3(ctx) : + zink_find_or_create_output(ctx); + /* partial pipelines are stored to the cache entry for async optimized pipeline compiles */ + pc_entry->gpl.ikey = ikey; + pc_entry->gpl.gkey = gkey; + pc_entry->gpl.okey = okey; + /* try to hit optimized compile cache first if possible */ + if (!prog->is_separable) + pc_entry->pipeline = zink_create_gfx_pipeline_combined(screen, prog, ikey->pipeline, &gkey->pipeline, 1, okey->pipeline, true, true); + if (!pc_entry->pipeline) { + /* create the non-optimized pipeline first using fast-linking to avoid stuttering */ + pc_entry->pipeline = zink_create_gfx_pipeline_combined(screen, prog, ikey->pipeline, &gkey->pipeline, 1, okey->pipeline, false, false); + if (!prog->is_separable) + /* trigger async optimized pipeline compile if this was the fast-linked unoptimized pipeline */ + zink_gfx_program_compile_queue(ctx, pc_entry); + } + } else { + /* optimize by default only when expecting precompiles in order to reduce stuttering */ + if (DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT) + pc_entry->pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, state->element_state->binding_map, vkmode, !HAVE_LIB, NULL); + else + pc_entry->pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, NULL, vkmode, !HAVE_LIB, NULL); + if (HAVE_LIB && !prog->is_separable) + /* trigger async optimized pipeline compile if this was an unoptimized pipeline */ + zink_gfx_program_compile_queue(ctx, pc_entry); + } + if (pc_entry->pipeline == VK_NULL_HANDLE) + return VK_NULL_HANDLE; + + zink_screen_update_pipeline_cache(screen, &prog->base, false); + } + + struct zink_gfx_pipeline_cache_entry *cache_entry = (struct zink_gfx_pipeline_cache_entry *)entry->data; + state->pipeline = cache_entry->pipeline; + /* update states for fastpath */ + if (DYNAMIC_STATE >= ZINK_DYNAMIC_VERTEX_INPUT) { + prog->last_finalized_hash[rp_idx][idx] = state->final_hash; + prog->last_pipeline[rp_idx][idx] = cache_entry; + } + return state->pipeline; +} + +/* runtime-optimized pipeline state comparisons */ +template <zink_pipeline_dynamic_state DYNAMIC_STATE, unsigned STAGE_MASK> +static bool +equals_gfx_pipeline_state(const void *a, const void *b) +{ + const struct zink_gfx_pipeline_state *sa = (const struct zink_gfx_pipeline_state *)a; + const struct zink_gfx_pipeline_state *sb = (const struct zink_gfx_pipeline_state *)b; + if (DYNAMIC_STATE < ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT) { + if (sa->uses_dynamic_stride != sb->uses_dynamic_stride) + return false; + } + if (DYNAMIC_STATE == ZINK_PIPELINE_NO_DYNAMIC_STATE || + (DYNAMIC_STATE < ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT && !sa->uses_dynamic_stride)) { + if (sa->vertex_buffers_enabled_mask != sb->vertex_buffers_enabled_mask) + return false; + /* if we don't have dynamic states, we have to hash the enabled vertex buffer bindings */ + uint32_t mask_a = sa->vertex_buffers_enabled_mask; + uint32_t mask_b = sb->vertex_buffers_enabled_mask; + while (mask_a || mask_b) { + unsigned idx_a = u_bit_scan(&mask_a); + unsigned idx_b = u_bit_scan(&mask_b); + if (sa->vertex_strides[idx_a] != sb->vertex_strides[idx_b]) + return false; + } + } + + /* each dynamic state extension has its own struct on the pipeline state to compare + * if all extensions are supported, none of them are accessed + */ + if (DYNAMIC_STATE == ZINK_PIPELINE_NO_DYNAMIC_STATE) { + if (memcmp(&sa->dyn_state1, &sb->dyn_state1, offsetof(struct zink_pipeline_dynamic_state1, depth_stencil_alpha_state))) + return false; + if (!!sa->dyn_state1.depth_stencil_alpha_state != !!sb->dyn_state1.depth_stencil_alpha_state || + (sa->dyn_state1.depth_stencil_alpha_state && + memcmp(sa->dyn_state1.depth_stencil_alpha_state, sb->dyn_state1.depth_stencil_alpha_state, + sizeof(struct zink_depth_stencil_alpha_hw_state)))) + return false; + } + if (DYNAMIC_STATE < ZINK_PIPELINE_DYNAMIC_STATE3) { + if (DYNAMIC_STATE < ZINK_PIPELINE_DYNAMIC_STATE2) { + if (memcmp(&sa->dyn_state2, &sb->dyn_state2, sizeof(sa->dyn_state2))) + return false; + } + if (memcmp(&sa->dyn_state3, &sb->dyn_state3, sizeof(sa->dyn_state3))) + return false; + } else if (DYNAMIC_STATE != ZINK_PIPELINE_DYNAMIC_STATE2_PCP && + DYNAMIC_STATE != ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT2_PCP && + DYNAMIC_STATE != ZINK_PIPELINE_DYNAMIC_STATE3_PCP && + DYNAMIC_STATE != ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT_PCP && + (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) && + !(STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_CTRL))) { + if (sa->dyn_state2.vertices_per_patch != sb->dyn_state2.vertices_per_patch) + return false; + } + /* optimal keys are the fastest path: only a single uint32_t comparison for all shader module variants */ + if (STAGE_MASK & STAGE_MASK_OPTIMAL) { + if (sa->optimal_key != sb->optimal_key) + return false; + if (STAGE_MASK & STAGE_MASK_OPTIMAL_SHADOW) { + if (sa->shadow != sb->shadow) + return false; + } + } else { + if (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) { + if (sa->modules[MESA_SHADER_TESS_CTRL] != sb->modules[MESA_SHADER_TESS_CTRL]) + return false; + } + if (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) { + if (sa->modules[MESA_SHADER_TESS_EVAL] != sb->modules[MESA_SHADER_TESS_EVAL]) + return false; + } + if (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_GEOMETRY)) { + if (sa->modules[MESA_SHADER_GEOMETRY] != sb->modules[MESA_SHADER_GEOMETRY]) + return false; + } + if (sa->modules[MESA_SHADER_VERTEX] != sb->modules[MESA_SHADER_VERTEX]) + return false; + if (sa->modules[MESA_SHADER_FRAGMENT] != sb->modules[MESA_SHADER_FRAGMENT]) + return false; + } + /* the base pipeline state is a 12 byte comparison */ + return !memcmp(a, b, offsetof(struct zink_gfx_pipeline_state, hash)); +} + +/* below is a bunch of code to pick the right equals_gfx_pipeline_state template for runtime */ +template <zink_pipeline_dynamic_state DYNAMIC_STATE, unsigned STAGE_MASK> +static equals_gfx_pipeline_state_func +get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys, bool shadow_needs_shader_swizzle) +{ + if (optimal_keys) { + if (shadow_needs_shader_swizzle) + return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL | STAGE_MASK_OPTIMAL_SHADOW>; + return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL>; + } + return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK>; +} + +template <zink_pipeline_dynamic_state DYNAMIC_STATE> +static equals_gfx_pipeline_state_func +get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys) +{ + bool shadow_needs_shader_swizzle = prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask > 0; + unsigned vertex_stages = prog->stages_present & BITFIELD_MASK(MESA_SHADER_FRAGMENT); + if (vertex_stages & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) { + if (prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated) + vertex_stages &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); + } + if (vertex_stages & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) { + if (vertex_stages == BITFIELD_MASK(MESA_SHADER_FRAGMENT)) + /* all stages */ + return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE, + BITFIELD_MASK(MESA_SHADER_COMPUTE)>(optimal_keys, shadow_needs_shader_swizzle); + if (vertex_stages == BITFIELD_MASK(MESA_SHADER_GEOMETRY)) + /* tess only: includes generated tcs too */ + return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE, + BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle); + if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) + /* geom only */ + return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE, + BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle); + } + if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_FRAGMENT) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL))) + /* all stages but tcs */ + return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE, + BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)>(optimal_keys, shadow_needs_shader_swizzle); + if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_GEOMETRY) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL))) + /* tess only: generated tcs */ + return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE, + BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~(BITFIELD_BIT(MESA_SHADER_GEOMETRY) | BITFIELD_BIT(MESA_SHADER_TESS_CTRL))>(optimal_keys, shadow_needs_shader_swizzle); + if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) + /* geom only */ + return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE, + BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle); + return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE, + BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT)>(optimal_keys, shadow_needs_shader_swizzle); +} + +equals_gfx_pipeline_state_func +zink_get_gfx_pipeline_eq_func(struct zink_screen *screen, struct zink_gfx_program *prog) +{ + if (screen->info.have_EXT_extended_dynamic_state) { + if (screen->info.have_EXT_extended_dynamic_state2) { + if (screen->info.have_EXT_extended_dynamic_state3) { + if (screen->info.have_EXT_vertex_input_dynamic_state) { + if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) + return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT_PCP>(prog, screen->optimal_keys); + else + return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT>(prog, screen->optimal_keys); + } else { + if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) + return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_STATE3_PCP>(prog, screen->optimal_keys); + else + return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_STATE3>(prog, screen->optimal_keys); + } + } + if (screen->info.have_EXT_vertex_input_dynamic_state) { + if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) + return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT2_PCP>(prog, screen->optimal_keys); + else + return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT2>(prog, screen->optimal_keys); + } else { + if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) + return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_STATE2_PCP>(prog, screen->optimal_keys); + else + return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_STATE2>(prog, screen->optimal_keys); + } + } + return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_STATE>(prog, screen->optimal_keys); + } + return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_NO_DYNAMIC_STATE>(prog, screen->optimal_keys); +} diff --git a/src/gallium/drivers/zink/zink_public.h b/src/gallium/drivers/zink/zink_public.h index a5a4f6bca42..cb3bf6e7daf 100644 --- a/src/gallium/drivers/zink/zink_public.h +++ b/src/gallium/drivers/zink/zink_public.h @@ -29,7 +29,7 @@ struct sw_winsys; struct pipe_screen_config; struct pipe_screen * -zink_create_screen(struct sw_winsys *winsys); +zink_create_screen(struct sw_winsys *winsys, const struct pipe_screen_config *config); struct pipe_screen * zink_drm_create_screen(int fd, const struct pipe_screen_config *config); diff --git a/src/gallium/drivers/zink/zink_query.c b/src/gallium/drivers/zink/zink_query.c index 8b8d1cc44c3..589d8288293 100644 --- a/src/gallium/drivers/zink/zink_query.c +++ b/src/gallium/drivers/zink/zink_query.c @@ -1,55 +1,82 @@ #include "zink_query.h" #include "zink_context.h" -#include "zink_fence.h" +#include "zink_clear.h" +#include "zink_program.h" #include "zink_resource.h" #include "zink_screen.h" -#include "util/hash_table.h" -#include "util/set.h" #include "util/u_dump.h" #include "util/u_inlines.h" #include "util/u_memory.h" -#if defined(PIPE_ARCH_X86_64) || defined(PIPE_ARCH_PPC_64) || defined(PIPE_ARCH_AARCH64) || defined(PIPE_ARCH_MIPS64) -#define NUM_QUERIES 5000 -#else #define NUM_QUERIES 500 -#endif + +#define ZINK_QUERY_RENDER_PASSES (PIPE_QUERY_DRIVER_SPECIFIC + 0) + +struct zink_query_pool { + struct list_head list; + VkQueryType vk_query_type; + VkQueryPipelineStatisticFlags pipeline_stats; + VkQueryPool query_pool; + unsigned last_range; + unsigned refcount; +}; struct zink_query_buffer { struct list_head list; unsigned num_results; - struct pipe_resource *buffer; - struct pipe_resource *xfb_buffers[PIPE_MAX_VERTEX_STREAMS - 1]; + struct pipe_resource *buffers[PIPE_MAX_VERTEX_STREAMS]; +}; + +struct zink_vk_query { + struct zink_query_pool *pool; + unsigned query_id; + bool needs_reset; + bool started; + uint32_t refcount; +}; + +struct zink_query_start { + union { + struct { + bool have_gs; + bool have_xfb; + bool was_line_loop; + }; + uint32_t data; + }; + struct zink_vk_query *vkq[PIPE_MAX_VERTEX_STREAMS]; }; struct zink_query { struct threaded_query base; enum pipe_query_type type; - VkQueryPool query_pool; - VkQueryPool xfb_query_pool[PIPE_MAX_VERTEX_STREAMS - 1]; //stream 0 is in the base pool - unsigned curr_query, last_start; + /* Everytime the gallium query needs + * another vulkan query, add a new start. + */ + struct util_dynarray starts; + unsigned start_offset; VkQueryType vkqtype; unsigned index; bool precise; - bool xfb_running; - bool xfb_overflow; bool active; /* query is considered active by vk */ bool needs_reset; /* query is considered active by vk and cannot be destroyed */ bool dead; /* query should be destroyed when its fence finishes */ bool needs_update; /* query needs to update its qbos */ + bool needs_rast_discard_workaround; /* query needs discard disabled */ + bool suspended; + bool started_in_rp; //needs to be stopped in rp struct list_head active_list; struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */ - bool have_gs[NUM_QUERIES]; /* geometry shaders use GEOMETRY_SHADER_PRIMITIVES_BIT */ - bool have_xfb[NUM_QUERIES]; /* xfb was active during this query */ + bool has_draws; /* have_gs and have_xfb are valid for idx=curr_query */ - struct zink_batch_usage *batch_id; //batch that the query was started in + struct zink_batch_usage *batch_uses; //batch that the query was started in struct list_head buffers; union { @@ -61,34 +88,20 @@ struct zink_query { bool predicate_dirty; }; -static void -update_qbo(struct zink_context *ctx, struct zink_query *q); -static void -reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q); +static const struct pipe_driver_query_info zink_specific_queries[] = { + {"render-passes", ZINK_QUERY_RENDER_PASSES, { 0 }}, +}; -static inline unsigned -get_num_results(enum pipe_query_type query_type) +static inline int +get_num_starts(struct zink_query *q) { - switch (query_type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - case PIPE_QUERY_TIME_ELAPSED: - case PIPE_QUERY_TIMESTAMP: - case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: - return 1; - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - case PIPE_QUERY_PRIMITIVES_EMITTED: - return 2; - default: - debug_printf("unknown query: %s\n", - util_str_query_type(query_type, true)); - unreachable("zink: unknown query type"); - } + return util_dynarray_num_elements(&q->starts, struct zink_query_start); } +static void +update_query_id(struct zink_context *ctx, struct zink_query *q); + + static VkQueryPipelineStatisticFlags pipeline_statistic_convert(enum pipe_statistics_query_index idx) { @@ -110,6 +123,164 @@ pipeline_statistic_convert(enum pipe_statistics_query_index idx) } static void +begin_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index, + VkQueryControlFlags flags) +{ + struct zink_batch *batch = &ctx->batch; + if (!vkq->started) { + VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf, + vkq->pool->query_pool, + vkq->query_id, + flags, + index); + vkq->started = true; + } +} + +static void +end_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index) +{ + struct zink_batch *batch = &ctx->batch; + if (vkq->started) { + VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf, + vkq->pool->query_pool, + vkq->query_id, index); + vkq->started = false; + } +} + +static void +reset_vk_query_pool(struct zink_context *ctx, struct zink_vk_query *vkq) +{ + struct zink_batch *batch = &ctx->batch; + if (vkq->needs_reset) { + VKCTX(CmdResetQueryPool)(batch->state->reordered_cmdbuf, vkq->pool->query_pool, vkq->query_id, 1); + batch->state->has_barriers = true; + } + vkq->needs_reset = false; +} + +void +zink_context_destroy_query_pools(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + list_for_each_entry_safe(struct zink_query_pool, pool, &ctx->query_pools, list) { + VKSCR(DestroyQueryPool)(screen->dev, pool->query_pool, NULL); + list_del(&pool->list); + FREE(pool); + } +} + +static struct zink_query_pool * +find_or_allocate_qp(struct zink_context *ctx, struct zink_query *q, unsigned idx) +{ + VkQueryPipelineStatisticFlags pipeline_stats = 0; + if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) + pipeline_stats = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT; + else if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE) + pipeline_stats = pipeline_statistic_convert(q->index); + + VkQueryType vk_query_type = q->vkqtype; + /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */ + if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && idx == 1) { + vk_query_type = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT; + pipeline_stats = 0; + } + + struct zink_screen *screen = zink_screen(ctx->base.screen); + list_for_each_entry(struct zink_query_pool, pool, &ctx->query_pools, list) { + if (pool->vk_query_type == vk_query_type) { + if (vk_query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { + if (pool->pipeline_stats == pipeline_stats) + return pool; + } else + return pool; + } + } + + struct zink_query_pool *new_pool = CALLOC_STRUCT(zink_query_pool); + if (!new_pool) + return NULL; + + new_pool->vk_query_type = vk_query_type; + new_pool->pipeline_stats = pipeline_stats; + + VkQueryPoolCreateInfo pool_create = {0}; + pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + pool_create.queryType = vk_query_type; + pool_create.queryCount = NUM_QUERIES; + pool_create.pipelineStatistics = pipeline_stats; + + VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &new_pool->query_pool); + if (status != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateQueryPool failed (%s)", vk_Result_to_str(status)); + FREE(new_pool); + return NULL; + } + + list_addtail(&new_pool->list, &ctx->query_pools); + return new_pool; +} + +static void +update_qbo(struct zink_context *ctx, struct zink_query *q); +static void +reset_qbos(struct zink_context *ctx, struct zink_query *q); + + +static bool +is_emulated_primgen(const struct zink_query *q) +{ + return q->type == PIPE_QUERY_PRIMITIVES_GENERATED && + q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT; +} + +static inline unsigned +get_num_query_pools(struct zink_query *q) +{ + if (is_emulated_primgen(q)) + return 2; + return 1; +} + +static inline unsigned +get_num_queries(struct zink_query *q) +{ + if (is_emulated_primgen(q)) + return 2; + if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) + return PIPE_MAX_VERTEX_STREAMS; + return 1; +} + +static inline unsigned +get_num_results(struct zink_query *q) +{ + if (q->type < PIPE_QUERY_DRIVER_SPECIFIC && + q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) + return 1; + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: + return 1; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_PRIMITIVES_EMITTED: + return 2; + default: + debug_printf("unknown query: %s\n", + util_str_query_type(q->type, true)); + unreachable("zink: unknown query type"); + } +} + +static void timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp) { /* The number of valid bits in a timestamp value is determined by @@ -123,11 +294,11 @@ timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp) * can be obtained from VkPhysicalDeviceLimits::timestampPeriod * - 17.5. Timestamp Queries */ - *timestamp *= screen->info.props.limits.timestampPeriod; + *timestamp *= (double)screen->info.props.limits.timestampPeriod; } static VkQueryType -convert_query_type(unsigned query_type, bool *precise) +convert_query_type(struct zink_screen *screen, enum pipe_query_type query_type, bool *precise) { *precise = false; switch (query_type) { @@ -140,8 +311,11 @@ convert_query_type(unsigned query_type, bool *precise) case PIPE_QUERY_TIME_ELAPSED: case PIPE_QUERY_TIMESTAMP: return VK_QUERY_TYPE_TIMESTAMP; - case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: case PIPE_QUERY_PRIMITIVES_GENERATED: + return screen->info.have_EXT_primitives_generated_query ? + VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT : + VK_QUERY_TYPE_PIPELINE_STATISTICS; + case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: return VK_QUERY_TYPE_PIPELINE_STATISTICS; case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: @@ -157,7 +331,7 @@ convert_query_type(unsigned query_type, bool *precise) static bool needs_stats_list(struct zink_query *query) { - return query->type == PIPE_QUERY_PRIMITIVES_GENERATED || + return is_emulated_primgen(query) || query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE; } @@ -183,22 +357,6 @@ is_bool_query(struct zink_query *query) query->type == PIPE_QUERY_GPU_FINISHED; } -static void -qbo_sync_from_prev(struct zink_context *ctx, struct zink_query *query, unsigned id_offset, unsigned last_start) -{ - assert(id_offset); - - struct zink_query_buffer *prev = list_last_entry(&query->buffers, struct zink_query_buffer, list); - unsigned result_size = get_num_results(query->type) * sizeof(uint64_t); - /* this is get_buffer_offset() but without the zink_query object */ - unsigned qbo_offset = last_start * get_num_results(query->type) * sizeof(uint64_t); - query->curr_query = id_offset; - query->curr_qbo->num_results = id_offset; - zink_copy_buffer(ctx, zink_resource(query->curr_qbo->buffer), zink_resource(prev->buffer), 0, - qbo_offset, - id_offset * result_size); -} - static bool qbo_append(struct pipe_screen *screen, struct zink_query *query) { @@ -207,60 +365,69 @@ qbo_append(struct pipe_screen *screen, struct zink_query *query) struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer); if (!qbo) return false; - qbo->buffer = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER, - PIPE_USAGE_STAGING, - /* this is the maximum possible size of the results in a given buffer */ - NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t)); - if (!qbo->buffer) - goto fail; - if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) { - /* need separate xfb buffer */ - qbo->xfb_buffers[0] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER, - PIPE_USAGE_STAGING, - /* this is the maximum possible size of the results in a given buffer */ - NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t)); - if (!qbo->xfb_buffers[0]) + int num_buffers = get_num_queries(query); + + for (unsigned i = 0; i < num_buffers; i++) { + qbo->buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER, + PIPE_USAGE_STAGING, + /* this is the maximum possible size of the results in a given buffer */ + (query->type == PIPE_QUERY_TIMESTAMP ? 1 : NUM_QUERIES) * get_num_results(query) * sizeof(uint64_t)); + if (!qbo->buffers[i]) goto fail; - } else if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { - /* need to monitor all xfb streams */ - for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++) { - /* need separate xfb buffer */ - qbo->xfb_buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER, - PIPE_USAGE_STAGING, - /* this is the maximum possible size of the results in a given buffer */ - NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t)); - if (!qbo->xfb_buffers[i]) - goto fail; - } } list_addtail(&qbo->list, &query->buffers); return true; fail: - pipe_resource_reference(&qbo->buffer, NULL); - for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++) - pipe_resource_reference(&qbo->xfb_buffers[i], NULL); + for (unsigned i = 0; i < num_buffers; i++) + pipe_resource_reference(&qbo->buffers[i], NULL); FREE(qbo); return false; } static void -destroy_query(struct zink_screen *screen, struct zink_query *query) +unref_vk_pool(struct zink_context *ctx, struct zink_query_pool *pool) +{ + if (!pool || --pool->refcount) + return; + util_dynarray_append(&ctx->batch.state->dead_querypools, VkQueryPool, pool->query_pool); + if (list_is_linked(&pool->list)) + list_del(&pool->list); + FREE(pool); +} + +static void +unref_vk_query(struct zink_context *ctx, struct zink_vk_query *vkq) +{ + if (!vkq) + return; + unref_vk_pool(ctx, vkq->pool); + vkq->refcount--; + if (vkq->refcount == 0) + FREE(vkq); +} + +static void +destroy_query(struct zink_context *ctx, struct zink_query *query) { - assert(zink_screen_usage_check_completion(screen, query->batch_id)); - if (query->query_pool) - VKSCR(DestroyQueryPool)(screen->dev, query->query_pool, NULL); + ASSERTED struct zink_screen *screen = zink_screen(ctx->base.screen); + assert(zink_screen_usage_check_completion(screen, query->batch_uses)); struct zink_query_buffer *qbo, *next; + + struct zink_query_start *starts = query->starts.data; + unsigned num_starts = query->starts.capacity / sizeof(struct zink_query_start); + for (unsigned j = 0; j < num_starts; j++) { + for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) { + unref_vk_query(ctx, starts[j].vkq[i]); + } + } + + util_dynarray_fini(&query->starts); LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) { - pipe_resource_reference(&qbo->buffer, NULL); - for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++) - pipe_resource_reference(&qbo->xfb_buffers[i], NULL); + for (unsigned i = 0; i < ARRAY_SIZE(qbo->buffers); i++) + pipe_resource_reference(&qbo->buffers[i], NULL); FREE(qbo); } - for (unsigned i = 0; i < ARRAY_SIZE(query->xfb_query_pool); i++) { - if (query->xfb_query_pool[i]) - VKSCR(DestroyQueryPool)(screen->dev, query->xfb_query_pool[i], NULL); - } pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL); FREE(query); } @@ -272,13 +439,66 @@ reset_qbo(struct zink_query *q) q->curr_qbo->num_results = 0; } +static void +query_pool_get_range(struct zink_context *ctx, struct zink_query *q) +{ + bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP; + struct zink_query_start *start; + int num_queries = get_num_queries(q); + if (!is_timestamp || get_num_starts(q) == 0) { + size_t size = q->starts.capacity; + start = util_dynarray_grow(&q->starts, struct zink_query_start, 1); + if (size != q->starts.capacity) { + /* when resizing, always zero the new data to avoid garbage */ + uint8_t *data = q->starts.data; + memset(data + size, 0, q->starts.capacity - size); + } + } else { + start = util_dynarray_top_ptr(&q->starts, struct zink_query_start); + } + start->data = 0; + + unsigned num_pools = get_num_query_pools(q); + for (unsigned i = 0; i < num_queries; i++) { + int pool_idx = num_pools > 1 ? i : 0; + /* try and find the active query for this */ + struct zink_vk_query *vkq; + int xfb_idx = num_queries == 4 ? i : q->index; + if ((q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT || + (pool_idx == 1)) && ctx->curr_xfb_queries[xfb_idx]) { + vkq = ctx->curr_xfb_queries[xfb_idx]; + vkq->refcount++; + vkq->pool->refcount++; + } else { + struct zink_query_pool *pool = find_or_allocate_qp(ctx, q, pool_idx); + if (pool->last_range == NUM_QUERIES) { + list_del(&pool->list); + pool = find_or_allocate_qp(ctx, q, pool_idx); + } + vkq = CALLOC_STRUCT(zink_vk_query); + if (!vkq) { + mesa_loge("ZINK: failed to allocate vkq!"); + return; + } + + pool->refcount++; + vkq->refcount = 1; + vkq->needs_reset = true; + vkq->pool = pool; + vkq->started = false; + vkq->query_id = pool->last_range++; + } + unref_vk_query(ctx, start->vkq[i]); + start->vkq[i] = vkq; + } +} + static struct pipe_query * zink_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index) { struct zink_screen *screen = zink_screen(pctx->screen); struct zink_query *query = CALLOC_STRUCT(zink_query); - VkQueryPoolCreateInfo pool_create = {0}; if (!query) return NULL; @@ -286,50 +506,37 @@ zink_create_query(struct pipe_context *pctx, query->index = index; query->type = query_type; - if (query->type == PIPE_QUERY_GPU_FINISHED) + + if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC) return (struct pipe_query *)query; - query->vkqtype = convert_query_type(query_type, &query->precise); + + if (query->type == PIPE_QUERY_GPU_FINISHED || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT) + return (struct pipe_query *)query; + query->vkqtype = convert_query_type(screen, query_type, &query->precise); if (query->vkqtype == -1) return NULL; + util_dynarray_init(&query->starts, NULL); + assert(!query->precise || query->vkqtype == VK_QUERY_TYPE_OCCLUSION); - query->curr_query = 0; + /* use emulated path for drivers without full support */ + if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && index && + !screen->info.primgen_feats.primitivesGeneratedQueryWithNonZeroStreams) + query->vkqtype = VK_QUERY_TYPE_PIPELINE_STATISTICS; - pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; - pool_create.queryType = query->vkqtype; - pool_create.queryCount = NUM_QUERIES; - if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) - pool_create.pipelineStatistics = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | - VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT; - else if (query_type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE) - pool_create.pipelineStatistics = pipeline_statistic_convert(index); - - VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->query_pool); - if (status != VK_SUCCESS) - goto fail; - if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) { - /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */ - pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; - pool_create.queryType = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT; - pool_create.queryCount = NUM_QUERIES; - - status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->xfb_query_pool[0]); - if (status != VK_SUCCESS) - goto fail; - } else if (query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { - /* need to monitor all xfb streams */ - for (unsigned i = 0; i < ARRAY_SIZE(query->xfb_query_pool); i++) { - status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->xfb_query_pool[i]); - if (status != VK_SUCCESS) - goto fail; - } + if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) { + query->needs_rast_discard_workaround = !screen->info.primgen_feats.primitivesGeneratedQueryWithRasterizerDiscard; + } else if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) { + query->needs_rast_discard_workaround = true; } + if (!qbo_append(pctx->screen, query)) goto fail; struct zink_batch *batch = &zink_context(pctx)->batch; batch->has_work = true; query->needs_reset = true; + query->predicate_dirty = true; if (query->type == PIPE_QUERY_TIMESTAMP) { query->active = true; /* defer pool reset until end_query since we're guaranteed to be threadsafe then */ @@ -337,7 +544,7 @@ zink_create_query(struct pipe_context *pctx, } return (struct pipe_query *)query; fail: - destroy_query(screen, query); + destroy_query(zink_context(pctx), query); return NULL; } @@ -345,37 +552,39 @@ static void zink_destroy_query(struct pipe_context *pctx, struct pipe_query *q) { - struct zink_screen *screen = zink_screen(pctx->screen); struct zink_query *query = (struct zink_query *)q; /* only destroy if this query isn't active on any batches, * otherwise just mark dead and wait */ - if (query->batch_id) { - p_atomic_set(&query->dead, true); + if (query->batch_uses) { + query->dead = true; return; } - destroy_query(screen, query); + destroy_query(zink_context(pctx), query); } void -zink_prune_query(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_query *query) +zink_prune_query(struct zink_batch_state *bs, struct zink_query *query) { - if (!zink_batch_usage_matches(query->batch_id, bs)) + if (!zink_batch_usage_matches(query->batch_uses, bs)) return; - query->batch_id = NULL; - if (p_atomic_read(&query->dead)) - destroy_query(screen, query); + query->batch_uses = NULL; + if (query->dead) + destroy_query(bs->ctx, query); } static void check_query_results(struct zink_query *query, union pipe_query_result *result, - int num_results, uint64_t *results, uint64_t *xfb_results) + int num_starts, uint64_t *results, uint64_t *xfb_results) { uint64_t last_val = 0; - int result_size = get_num_results(query->type); - for (int i = 0; i < num_results * result_size; i += result_size) { + int result_size = get_num_results(query); + int idx = 0; + util_dynarray_foreach(&query->starts, struct zink_query_start, start) { + unsigned i = idx * result_size; + idx++; switch (query->type) { case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: @@ -396,11 +605,13 @@ check_query_results(struct zink_query *query, union pipe_query_result *result, result->u64 += results[i]; break; case PIPE_QUERY_PRIMITIVES_GENERATED: - if (query->have_xfb[query->last_start + i / 2] || query->index) + if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) + result->u64 += results[i]; + else if (start->have_xfb || query->index) result->u64 += xfb_results[i + 1]; else - /* if a given draw had a geometry shader, we need to use the second result */ - result->u64 += results[i + query->have_gs[query->last_start + i / 2]]; + /* if a given draw had a geometry shader, we need to use the first result */ + result->u64 += results[i + !start->have_gs]; break; case PIPE_QUERY_PRIMITIVES_EMITTED: /* A query pool created with this type will capture 2 integers - @@ -417,11 +628,18 @@ check_query_results(struct zink_query *query, union pipe_query_result *result, * for the specified vertex stream output from the last vertex processing stage. * - from VK_EXT_transform_feedback spec */ - if (query->have_xfb[query->last_start + i / 2]) + if (start->have_xfb) result->b |= results[i] != results[i + 1]; break; case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: - result->u64 += results[i]; + switch (query->index) { + case PIPE_STAT_QUERY_IA_VERTICES: + result->u64 += start->was_line_loop ? results[i] / 2 : results[i]; + break; + default: + result->u64 += results[i]; + break; + } break; default: @@ -450,60 +668,54 @@ get_query_result(struct pipe_context *pctx, util_query_clear_result(result, query->type); - int num_results = query->curr_query - query->last_start; - int result_size = get_num_results(query->type) * sizeof(uint64_t); + int num_starts = get_num_starts(query); + /* no results: return zero */ + if (!num_starts) + return true; + int result_size = get_num_results(query) * sizeof(uint64_t); + int num_maps = get_num_queries(query); struct zink_query_buffer *qbo; - struct pipe_transfer *xfer; + struct pipe_transfer *xfer[PIPE_MAX_VERTEX_STREAMS] = { 0 }; LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) { - uint64_t *xfb_results = NULL; - uint64_t *results; - bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT; - results = pipe_buffer_map_range(pctx, qbo->buffer, 0, - (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer); - if (!results) { - if (wait) - debug_printf("zink: qbo read failed!"); - return false; - } - struct pipe_transfer *xfb_xfer = NULL; - if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) { - xfb_results = pipe_buffer_map_range(pctx, qbo->xfb_buffers[0], 0, - qbo->num_results * result_size, flags, &xfb_xfer); - if (!xfb_results) { + uint64_t *results[PIPE_MAX_VERTEX_STREAMS] = { NULL, NULL }; + bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP; + if (!qbo->num_results) + continue; + + for (unsigned i = 0; i < num_maps; i++) { + results[i] = pipe_buffer_map_range(pctx, qbo->buffers[i], 0, + (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer[i]); + if (!results[i]) { if (wait) - debug_printf("zink: xfb qbo read failed!"); - pipe_buffer_unmap(pctx, xfer); - return false; + debug_printf("zink: qbo read failed!"); + goto fail; } } - check_query_results(query, result, is_timestamp ? 1 : qbo->num_results, results, xfb_results); - pipe_buffer_unmap(pctx, xfer); - if (xfb_xfer) - pipe_buffer_unmap(pctx, xfb_xfer); if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { - for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers) && !result->b; i++) { - uint64_t *results = pipe_buffer_map_range(pctx, qbo->xfb_buffers[i], - 0, - qbo->num_results * result_size, flags, &xfer); - if (!results) { - if (wait) - debug_printf("zink: qbo read failed!"); - return false; - } - check_query_results(query, result, num_results, results, xfb_results); - pipe_buffer_unmap(pctx, xfer); + for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS && !result->b; i++) { + check_query_results(query, result, num_starts, results[i], NULL); } - /* if overflow is detected we can stop */ - if (result->b) - break; - } + } else + check_query_results(query, result, num_starts, results[0], results[1]); + + for (unsigned i = 0 ; i < num_maps; i++) + pipe_buffer_unmap(pctx, xfer[i]); + + /* if overflow is detected we can stop */ + if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE && result->b) + break; } if (is_time_query(query)) timestamp_to_nanoseconds(screen, &result->u64); return true; +fail: + for (unsigned i = 0 ; i < num_maps; i++) + if (xfer[i]) + pipe_buffer_unmap(pctx, xfer[i]); + return false; } static void @@ -512,7 +724,7 @@ force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_qu struct pipe_context *pctx = &ctx->base; unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t); struct zink_query *query = (struct zink_query*)pquery; - union pipe_query_result result; + union pipe_query_result result = {0}; if (query->needs_update) update_qbo(ctx, query); @@ -534,14 +746,14 @@ force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_qu u32 = result.b; else u32 = MIN2(limit, result.u64); - pipe_buffer_write(pctx, pres, offset, result_size, &u32); + tc_buffer_write(pctx, pres, offset, result_size, &u32); } else { uint64_t u64; if (is_bool_query(query)) u64 = result.b; else u64 = result.u64; - pipe_buffer_write(pctx, pres, offset, result_size, &u64); + tc_buffer_write(pctx, pres, offset, result_size, &u64); } } @@ -552,49 +764,50 @@ copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, { struct zink_batch *batch = &ctx->batch; unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t); - unsigned base_result_size = get_num_results(query->type) * type_size; + unsigned base_result_size = get_num_results(query) * type_size; unsigned result_size = base_result_size * num_results; if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) result_size += type_size; + + bool marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "update_qbo(%s: id=%u, num_results=%d)", vk_QueryType_to_str(query->vkqtype), query_id, num_results); + zink_batch_no_rp(ctx); /* if it's a single query that doesn't need special handling, we can copy it and be done */ zink_batch_reference_resource_rw(batch, res, true); - zink_resource_buffer_barrier(ctx, res, VK_ACCESS_TRANSFER_WRITE_BIT, 0); + res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT; + res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size); assert(query_id < NUM_QUERIES); + res->obj->unordered_read = res->obj->unordered_write = false; VKCTX(CmdCopyQueryPoolResults)(batch->state->cmdbuf, pool, query_id, num_results, res->obj->buffer, - offset, 0, flags); + offset, base_result_size, flags); + zink_cmd_debug_marker_end(ctx, batch->state->cmdbuf, marker); } static void copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags) { - copy_pool_results_to_buffer(ctx, query, query->query_pool, query->last_start, res, offset, num_results, flags); + struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); + copy_pool_results_to_buffer(ctx, query, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, res, offset, num_results, flags); } + static void -reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q) +reset_query_range(struct zink_context *ctx, struct zink_query *q) +{ + int num_queries = get_num_queries(q); + struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start); + for (unsigned i = 0; i < num_queries; i++) { + reset_vk_query_pool(ctx, start->vkq[i]); + } +} + +static void +reset_qbos(struct zink_context *ctx, struct zink_query *q) { - unsigned last_start = q->last_start; - unsigned id_offset = q->curr_query - q->last_start; - /* This command must only be called outside of a render pass instance - * - * - vkCmdResetQueryPool spec - */ - zink_batch_no_rp(ctx); if (q->needs_update) update_qbo(ctx, q); - VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->query_pool, 0, NUM_QUERIES); - if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED) - VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->xfb_query_pool[0], 0, NUM_QUERIES); - else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { - for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) - VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->xfb_query_pool[i], 0, NUM_QUERIES); - } - memset(q->have_gs, 0, sizeof(q->have_gs)); - memset(q->have_xfb, 0, sizeof(q->have_xfb)); - q->last_start = q->curr_query = 0; q->needs_reset = false; /* create new qbo for non-timestamp queries: * timestamp queries should never need more than 2 entries in the qbo @@ -605,51 +818,63 @@ reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query reset_qbo(q); else debug_printf("zink: qbo alloc failed on reset!"); - if (id_offset) - qbo_sync_from_prev(ctx, q, id_offset, last_start); } static inline unsigned -get_buffer_offset(struct zink_query *q, struct pipe_resource *pres, unsigned query_id) +get_buffer_offset(struct zink_query *q) { - return (query_id - q->last_start) * get_num_results(q->type) * sizeof(uint64_t); + return (get_num_starts(q) - 1) * get_num_results(q) * sizeof(uint64_t); } static void update_qbo(struct zink_context *ctx, struct zink_query *q) { struct zink_query_buffer *qbo = q->curr_qbo; - unsigned offset = 0; - uint32_t query_id = q->curr_query - 1; - bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP || q->type == PIPE_QUERY_TIMESTAMP_DISJOINT; + unsigned num_starts = get_num_starts(q); + struct zink_query_start *starts = q->starts.data; + bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP; /* timestamp queries just write to offset 0 always */ - if (!is_timestamp) - offset = get_buffer_offset(q, qbo->buffer, query_id); - copy_pool_results_to_buffer(ctx, q, q->query_pool, query_id, zink_resource(qbo->buffer), - offset, - 1, VK_QUERY_RESULT_64_BIT); - - if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED || - q->type == PIPE_QUERY_PRIMITIVES_GENERATED || - q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) { - copy_pool_results_to_buffer(ctx, q, - q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool, - query_id, - zink_resource(qbo->xfb_buffers[0] ? qbo->xfb_buffers[0] : qbo->buffer), - get_buffer_offset(q, qbo->xfb_buffers[0] ? qbo->xfb_buffers[0] : qbo->buffer, query_id), - 1, VK_QUERY_RESULT_64_BIT); - } - - else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { - for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) { - copy_pool_results_to_buffer(ctx, q, q->xfb_query_pool[i], query_id, zink_resource(qbo->xfb_buffers[i]), - get_buffer_offset(q, qbo->xfb_buffers[i], query_id), - 1, VK_QUERY_RESULT_64_BIT); + int num_queries = get_num_queries(q); + unsigned num_results = qbo->num_results; + for (unsigned i = 0; i < num_queries; i++) { + unsigned start_offset = q->start_offset; + while (start_offset < num_starts) { + unsigned num_merged_copies = 0; + VkQueryPool qp = starts[start_offset].vkq[i]->pool->query_pool; + unsigned base_id = starts[start_offset].vkq[i]->query_id; + /* iterate over all the starts to see how many can be merged */ + for (unsigned j = start_offset; j < num_starts; j++, num_merged_copies++) { + if (starts[j].vkq[i]->pool->query_pool != qp || starts[j].vkq[i]->query_id != base_id + num_merged_copies) + break; + } + assert(num_merged_copies); + unsigned cur_offset = start_offset * get_num_results(q) * sizeof(uint64_t); + unsigned offset = is_timestamp ? 0 : cur_offset; + copy_pool_results_to_buffer(ctx, q, starts[start_offset].vkq[i]->pool->query_pool, starts[start_offset].vkq[i]->query_id, + zink_resource(qbo->buffers[i]), + offset, + num_merged_copies, + /* + there is an implicit execution dependency from + each such query command to all query commands previously submitted to the same queue. There + is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not + include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before + the results of vkCmdEndQuery are available. + + * - Chapter 18. Queries + */ + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + if (!is_timestamp) + q->curr_qbo->num_results += num_merged_copies; + start_offset += num_merged_copies; } } + q->start_offset += q->curr_qbo->num_results - num_results; + + + if (is_timestamp) + q->curr_qbo->num_results = 1; - if (!is_timestamp) - q->curr_qbo->num_results++; q->needs_update = false; } @@ -658,53 +883,82 @@ begin_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_quer { VkQueryControlFlags flags = 0; + if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC) + return; + + if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_CS_INVOCATIONS && ctx->batch.in_rp) { + /* refuse to start CS queries in renderpasses */ + if (!list_is_linked(&q->active_list)) + list_addtail(&q->active_list, &ctx->suspended_queries); + q->suspended = true; + return; + } + + zink_flush_dgc_if_enabled(ctx); + + update_query_id(ctx, q); q->predicate_dirty = true; if (q->needs_reset) - reset_pool(ctx, batch, q); - assert(q->curr_query < NUM_QUERIES); + reset_qbos(ctx, q); + reset_query_range(ctx, q); q->active = true; batch->has_work = true; + + struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start); if (q->type == PIPE_QUERY_TIME_ELAPSED) { - VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, q->query_pool, q->curr_query); - q->curr_query++; - update_qbo(ctx, q); - zink_batch_usage_set(&q->batch_id, batch->state); - _mesa_set_add(batch->state->active_queries, q); + VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id); + if (!batch->in_rp) + update_qbo(ctx, q); + zink_batch_usage_set(&q->batch_uses, batch->state); + _mesa_set_add(&batch->state->active_queries, q); } /* ignore the rest of begin_query for timestamps */ if (is_time_query(q)) return; + + /* A query must either begin and end inside the same subpass of a render pass + instance, or must both begin and end outside of a render pass instance + (i.e. contain entire render pass instances). + - 18.2. Query Operation + */ + q->started_in_rp = ctx->batch.in_rp; + if (q->precise) flags |= VK_QUERY_CONTROL_PRECISE_BIT; + if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED || - q->type == PIPE_QUERY_PRIMITIVES_GENERATED || + is_emulated_primgen(q) || q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) { - VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf, - q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool, - q->curr_query, - flags, - q->index); - q->xfb_running = true; + struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0]; + assert(!ctx->curr_xfb_queries[q->index] || ctx->curr_xfb_queries[q->index] == vkq); + ctx->curr_xfb_queries[q->index] = vkq; + + begin_vk_query_indexed(ctx, vkq, q->index, flags); } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { - VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf, - q->query_pool, - q->curr_query, - flags, - 0); - for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) - VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf, - q->xfb_query_pool[i], - q->curr_query, - flags, - i + 1); - q->xfb_running = true; + for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) { + assert(!ctx->curr_xfb_queries[i] || ctx->curr_xfb_queries[i] == start->vkq[i]); + ctx->curr_xfb_queries[i] = start->vkq[i]; + + begin_vk_query_indexed(ctx, start->vkq[i], i, flags); + } + } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) { + begin_vk_query_indexed(ctx, start->vkq[0], q->index, flags); + } + if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) + VKCTX(CmdBeginQuery)(batch->state->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, flags); + if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_IA_VERTICES) { + assert(!ctx->vertices_query); + ctx->vertices_query = q; } - if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT) - VKCTX(CmdBeginQuery)(batch->state->cmdbuf, q->query_pool, q->curr_query, flags); if (needs_stats_list(q)) list_addtail(&q->stats_list, &ctx->primitives_generated_queries); - zink_batch_usage_set(&q->batch_id, batch->state); - _mesa_set_add(batch->state->active_queries, q); + zink_batch_usage_set(&q->batch_uses, batch->state); + _mesa_set_add(&batch->state->active_queries, q); + if (q->needs_rast_discard_workaround) { + ctx->primitives_generated_active = true; + if (zink_set_rasterizer_discard(ctx, true)) + zink_set_null_fs(ctx); + } } static bool @@ -715,11 +969,28 @@ zink_begin_query(struct pipe_context *pctx, struct zink_context *ctx = zink_context(pctx); struct zink_batch *batch = &ctx->batch; - query->last_start = query->curr_query; /* drop all past results */ reset_qbo(query); - begin_query(ctx, batch, query); + if (query->type < PIPE_QUERY_DRIVER_SPECIFIC && query->vkqtype == VK_QUERY_TYPE_OCCLUSION) + ctx->occlusion_query_active = true; + if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS) + ctx->fs_query_active = true; + + query->predicate_dirty = true; + + util_dynarray_clear(&query->starts); + query->start_offset = 0; + + if (batch->in_rp) { + begin_query(ctx, batch, query); + } else { + /* never directly start queries out of renderpass, always defer */ + list_addtail(&query->active_list, &ctx->suspended_queries); + query->suspended = true; + if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) + ctx->primitives_generated_suspended = query->needs_rast_discard_workaround; + } return true; } @@ -727,46 +998,59 @@ zink_begin_query(struct pipe_context *pctx, static void update_query_id(struct zink_context *ctx, struct zink_query *q) { - if (++q->curr_query == NUM_QUERIES) { - /* always reset on start; this ensures we can actually submit the batch that the current query is on */ - q->needs_reset = true; - } + query_pool_get_range(ctx, q); ctx->batch.has_work = true; - - if (ctx->batch.in_rp) - q->needs_update = true; - else - update_qbo(ctx, q); + q->has_draws = false; } static void end_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q) { + if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC) + return; + + zink_flush_dgc_if_enabled(ctx); + ASSERTED struct zink_query_buffer *qbo = q->curr_qbo; assert(qbo); assert(!is_time_query(q)); q->active = false; + assert(q->started_in_rp == batch->in_rp); + struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start); + if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED || - q->type == PIPE_QUERY_PRIMITIVES_GENERATED || - q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) { - VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf, - q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool, - q->curr_query, q->index); - } + is_emulated_primgen(q) || + q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) { + struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0]; + end_vk_query_indexed(ctx, vkq, q->index); + ctx->curr_xfb_queries[q->index] = NULL; + } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { - VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf, q->query_pool, q->curr_query, 0); - for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) { - VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf, q->xfb_query_pool[i], q->curr_query, i + 1); + for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) { + end_vk_query_indexed(ctx, start->vkq[i], i); + ctx->curr_xfb_queries[i] = NULL; } + } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) { + end_vk_query_indexed(ctx, start->vkq[0], q->index); } - if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && !is_time_query(q)) - VKCTX(CmdEndQuery)(batch->state->cmdbuf, q->query_pool, q->curr_query); + if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && + q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && !is_time_query(q)) + VKCTX(CmdEndQuery)(batch->state->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id); + + if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && + q->index == PIPE_STAT_QUERY_IA_VERTICES) + ctx->vertices_query = NULL; if (needs_stats_list(q)) list_delinit(&q->stats_list); - update_query_id(ctx, q); + q->needs_update = true; + if (q->needs_rast_discard_workaround) { + ctx->primitives_generated_active = false; + if (zink_set_rasterizer_discard(ctx, false)) + zink_set_null_fs(ctx); + } } static bool @@ -777,6 +1061,9 @@ zink_end_query(struct pipe_context *pctx, struct zink_query *query = (struct zink_query *)q; struct zink_batch *batch = &ctx->batch; + if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT || query->type >= PIPE_QUERY_DRIVER_SPECIFIC) + return true; + if (query->type == PIPE_QUERY_GPU_FINISHED) { pctx->flush(pctx, &query->fence, PIPE_FLUSH_DEFERRED); return true; @@ -785,18 +1072,41 @@ zink_end_query(struct pipe_context *pctx, /* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */ threaded_context_unwrap_sync(pctx); - if (needs_stats_list(query)) + if (query->vkqtype == VK_QUERY_TYPE_OCCLUSION) + ctx->occlusion_query_active = true; + if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS) + ctx->fs_query_active = true; + + bool unset_null_fs = query->type == PIPE_QUERY_PRIMITIVES_GENERATED && (ctx->primitives_generated_suspended || ctx->primitives_generated_active); + if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) + ctx->primitives_generated_suspended = false; + + if (list_is_linked(&query->stats_list)) list_delinit(&query->stats_list); + if (query->suspended) { + list_delinit(&query->active_list); + query->suspended = false; + } if (is_time_query(query)) { + update_query_id(ctx, query); if (query->needs_reset) - reset_pool(ctx, batch, query); + reset_qbos(ctx, query); + reset_query_range(ctx, query); + struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - query->query_pool, query->curr_query); - zink_batch_usage_set(&query->batch_id, batch->state); - _mesa_set_add(batch->state->active_queries, query); - update_query_id(ctx, query); - } else if (query->active) + start->vkq[0]->pool->query_pool, start->vkq[0]->query_id); + zink_batch_usage_set(&query->batch_uses, batch->state); + _mesa_set_add(&batch->state->active_queries, query); + query->needs_update = true; + } else if (query->active) { + /* this should be a tc-optimized query end that doesn't split a renderpass */ + if (!query->started_in_rp) + zink_batch_no_rp(ctx); end_query(ctx, batch, query); + } + + if (unset_null_fs) + zink_set_null_fs(ctx); return true; } @@ -810,69 +1120,154 @@ zink_get_query_result(struct pipe_context *pctx, struct zink_query *query = (void*)q; struct zink_context *ctx = zink_context(pctx); + if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT) { + result->timestamp_disjoint.frequency = zink_screen(pctx->screen)->info.props.limits.timestampPeriod * 1000000.0; + result->timestamp_disjoint.disjoint = false; + return true; + } + if (query->type == PIPE_QUERY_GPU_FINISHED) { struct pipe_screen *screen = pctx->screen; result->b = screen->fence_finish(screen, query->base.flushed ? NULL : pctx, - query->fence, wait ? PIPE_TIMEOUT_INFINITE : 0); + query->fence, wait ? OS_TIMEOUT_INFINITE : 0); return result->b; } - if (query->needs_update) + if (query->type == ZINK_QUERY_RENDER_PASSES) { + result->u64 = ctx->hud.render_passes; + ctx->hud.render_passes = 0; + return true; + } + + if (query->needs_update) { + assert(!ctx->tc || !threaded_query(q)->flushed); update_qbo(ctx, query); + } - if (zink_batch_usage_is_unflushed(query->batch_id)) { + if (zink_batch_usage_is_unflushed(query->batch_uses)) { if (!threaded_query(q)->flushed) pctx->flush(pctx, NULL, 0); if (!wait) return false; - } else if (!threaded_query(q)->flushed && - /* timeline drivers can wait during buffer map */ - !zink_screen(pctx->screen)->info.have_KHR_timeline_semaphore) - zink_batch_usage_check_completion(ctx, query->batch_id); + } return get_query_result(pctx, q, wait, result); } -void -zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch) +static void +suspend_query(struct zink_context *ctx, struct zink_query *query) { - set_foreach(batch->state->active_queries, entry) { + /* if a query isn't active here then we don't need to reactivate it on the next batch */ + if (query->active && !is_time_query(query)) + end_query(ctx, &ctx->batch, query); + if (query->needs_update && !ctx->batch.in_rp) + update_qbo(ctx, query); +} + +static void +suspend_queries(struct zink_context *ctx, bool rp_only) +{ + set_foreach(&ctx->batch.state->active_queries, entry) { struct zink_query *query = (void*)entry->key; - /* if a query isn't active here then we don't need to reactivate it on the next batch */ + if (query->suspended || (rp_only && !query->started_in_rp)) + continue; if (query->active && !is_time_query(query)) { - end_query(ctx, batch, query); /* the fence is going to steal the set off the batch, so we have to copy * the active queries onto a list */ list_addtail(&query->active_list, &ctx->suspended_queries); + query->suspended = true; + if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) + ctx->primitives_generated_suspended = query->needs_rast_discard_workaround; } - if (query->needs_update) - update_qbo(ctx, query); - if (query->last_start && query->curr_query > NUM_QUERIES / 2) - reset_pool(ctx, batch, query); + suspend_query(ctx, query); } } void +zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch) +{ + suspend_queries(ctx, false); +} + +void zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch) { struct zink_query *query, *next; LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) { - begin_query(ctx, batch, query); list_delinit(&query->active_list); + query->suspended = false; + if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) + ctx->primitives_generated_suspended = false; + if (query->needs_update && !ctx->batch.in_rp) + update_qbo(ctx, query); + begin_query(ctx, batch, query); + } +} + +void +zink_resume_cs_query(struct zink_context *ctx) +{ + struct zink_query *query, *next; + LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) { + if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_CS_INVOCATIONS) { + list_delinit(&query->active_list); + query->suspended = false; + begin_query(ctx, &ctx->batch, query); + } } } void +zink_query_renderpass_suspend(struct zink_context *ctx) +{ + suspend_queries(ctx, true); +} + +void zink_query_update_gs_states(struct zink_context *ctx) { struct zink_query *query; + bool suspendall = false; + bool have_gs = !!ctx->gfx_stages[MESA_SHADER_GEOMETRY]; + bool have_xfb = !!ctx->num_so_targets; + LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) { - assert(query->curr_query < ARRAY_SIZE(query->have_gs)); + struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); assert(query->active); - query->have_gs[query->curr_query] = !!ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; - query->have_xfb[query->curr_query] = !!ctx->num_so_targets; + if (query->has_draws) { + if (last_start->have_gs != have_gs || + last_start->have_xfb != have_xfb) { + suspendall = true; + } + } + } + + if (ctx->vertices_query) { + query = ctx->vertices_query; + struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); + assert(query->active); + if (last_start->was_line_loop != ctx->was_line_loop) { + suspendall = true; + } + } + if (suspendall) { + zink_suspend_queries(ctx, &ctx->batch); + zink_resume_queries(ctx, &ctx->batch); + } + + LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) { + struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); + last_start->have_gs = have_gs; + last_start->have_xfb = have_xfb; + query->has_draws = true; + } + if (ctx->vertices_query) { + query = ctx->vertices_query; + struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); + last_start->was_line_loop = ctx->was_line_loop; + query->has_draws = true; } } @@ -880,19 +1275,29 @@ static void zink_set_active_query_state(struct pipe_context *pctx, bool enable) { struct zink_context *ctx = zink_context(pctx); + /* unordered blits already disable queries */ + if (ctx->unordered_blitting) + return; ctx->queries_disabled = !enable; struct zink_batch *batch = &ctx->batch; if (ctx->queries_disabled) zink_suspend_queries(ctx, batch); - else + else if (ctx->batch.in_rp) zink_resume_queries(ctx, batch); } void +zink_query_sync(struct zink_context *ctx, struct zink_query *query) +{ + if (query->needs_update) + update_qbo(ctx, query); +} + +void zink_start_conditional_render(struct zink_context *ctx) { - if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering)) + if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || ctx->render_condition.active) return; struct zink_batch *batch = &ctx->batch; VkConditionalRenderingFlagsEXT begin_flags = 0; @@ -902,32 +1307,22 @@ zink_start_conditional_render(struct zink_context *ctx) begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer; begin_info.flags = begin_flags; + ctx->render_condition.query->predicate->obj->unordered_read = false; VKCTX(CmdBeginConditionalRenderingEXT)(batch->state->cmdbuf, &begin_info); zink_batch_reference_resource_rw(batch, ctx->render_condition.query->predicate, false); + ctx->render_condition.active = true; } void zink_stop_conditional_render(struct zink_context *ctx) { + zink_flush_dgc_if_enabled(ctx); struct zink_batch *batch = &ctx->batch; zink_clear_apply_conditionals(ctx); - if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering)) + if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || !ctx->render_condition.active) return; VKCTX(CmdEndConditionalRenderingEXT)(batch->state->cmdbuf); -} - -bool -zink_check_conditional_render(struct zink_context *ctx) -{ - if (!ctx->render_condition_active) - return true; - assert(ctx->render_condition.query); - - union pipe_query_result result; - zink_get_query_result(&ctx->base, (struct pipe_query*)ctx->render_condition.query, true, &result); - return is_bool_query(ctx->render_condition.query) ? - ctx->render_condition.inverted != result.b : - ctx->render_condition.inverted != !!result.u64; + ctx->render_condition.active = false; } static void @@ -941,12 +1336,12 @@ zink_render_condition(struct pipe_context *pctx, zink_batch_no_rp(ctx); VkQueryResultFlagBits flags = 0; + zink_flush_dgc_if_enabled(ctx); if (query == NULL) { /* force conditional clears if they exist */ if (ctx->clears_enabled && !ctx->batch.in_rp) zink_batch_rp(ctx); - if (ctx->batch.in_rp) - zink_stop_conditional_render(ctx); + zink_stop_conditional_render(ctx); ctx->render_condition_active = false; ctx->render_condition.query = NULL; return; @@ -969,14 +1364,21 @@ zink_render_condition(struct pipe_context *pctx, flags |= VK_QUERY_RESULT_WAIT_BIT; flags |= VK_QUERY_RESULT_64_BIT; - int num_results = query->curr_query - query->last_start; - if (query->type != PIPE_QUERY_PRIMITIVES_GENERATED && - !is_so_overflow_query(query)) { - copy_results_to_buffer(ctx, query, res, 0, num_results, flags); + int num_results = get_num_starts(query); + if (num_results) { + if (!is_emulated_primgen(query) && + !is_so_overflow_query(query) && + num_results == 1) { + copy_results_to_buffer(ctx, query, res, 0, num_results, flags); + } else { + /* these need special handling */ + force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0); + } } else { - /* these need special handling */ - force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0); + uint64_t zero = 0; + tc_buffer_write(pctx, &res->base.b, 0, sizeof(zero), &zero); } + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT); query->predicate_dirty = false; } ctx->render_condition.inverted = condition; @@ -989,7 +1391,7 @@ zink_render_condition(struct pipe_context *pctx, static void zink_get_query_result_resource(struct pipe_context *pctx, struct pipe_query *pquery, - bool wait, + enum pipe_query_flags flags, enum pipe_query_value_type result_type, int index, struct pipe_resource *pres, @@ -1001,8 +1403,15 @@ zink_get_query_result_resource(struct pipe_context *pctx, struct zink_resource *res = zink_resource(pres); unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t); VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT; - unsigned num_queries = query->curr_query - query->last_start; - unsigned query_id = query->last_start; + unsigned num_queries = get_num_starts(query); + + /* it's possible that a query may have no data at all: write out zeroes to the buffer and return */ + uint64_t u64[4] = {0}; + unsigned src_offset = result_size * get_num_results(query); + if (!num_queries) { + tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset); + return; + } if (index == -1) { /* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data @@ -1014,31 +1423,45 @@ zink_get_query_result_resource(struct pipe_context *pctx, */ VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT; - if (zink_batch_usage_check_completion(ctx, query->batch_id)) { - uint64_t u64[2] = {0}; - if (VKCTX(GetQueryPoolResults)(screen->dev, query->query_pool, query_id, 1, 2 * result_size, u64, - 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag) == VK_SUCCESS) { - pipe_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + result_size); + if (zink_batch_usage_check_completion(ctx, query->batch_uses)) { + struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); + unsigned query_id = start->vkq[0]->query_id; + VkResult result = VKCTX(GetQueryPoolResults)(screen->dev, start->vkq[0]->pool->query_pool, query_id, 1, + sizeof(u64), u64, 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag); + if (result == VK_SUCCESS) { + tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset); return; + } else { + mesa_loge("ZINK: vkGetQueryPoolResults failed (%s)", vk_Result_to_str(result)); } } - struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, result_size * 2); + struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size); copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag); - zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size, result_size); + zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size); pipe_resource_reference(&staging, NULL); return; } + /* + there is an implicit execution dependency from + each such query command to all query commands previously submitted to the same queue. There + is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not + include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before + the results of vkCmdEndQuery are available. + + * - Chapter 18. Queries + */ + size_flags |= VK_QUERY_RESULT_WAIT_BIT; if (!is_time_query(query) && !is_bool_query(query)) { - if (num_queries == 1 && query->type != PIPE_QUERY_PRIMITIVES_GENERATED && + if (num_queries == 1 && !is_emulated_primgen(query) && query->type != PIPE_QUERY_PRIMITIVES_EMITTED && !is_bool_query(query)) { if (size_flags == VK_QUERY_RESULT_64_BIT) { if (query->needs_update) update_qbo(ctx, query); /* internal qbo always writes 64bit value so we can just direct copy */ - zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffer), offset, - get_buffer_offset(query, query->curr_qbo->buffer, query->last_start), + zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset, + get_buffer_offset(query), result_size); } else /* have to do a new copy for 32bit */ @@ -1055,16 +1478,33 @@ zink_get_query_result_resource(struct pipe_context *pctx, force_cpu_read(ctx, pquery, result_type, pres, offset); } -static uint64_t -zink_get_timestamp(struct pipe_context *pctx) +uint64_t +zink_get_timestamp(struct pipe_screen *pscreen) { - struct zink_screen *screen = zink_screen(pctx->screen); + struct zink_screen *screen = zink_screen(pscreen); uint64_t timestamp, deviation; - assert(screen->info.have_EXT_calibrated_timestamps); - VkCalibratedTimestampInfoEXT cti = {0}; - cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; - cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT; - VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, ×tamp, &deviation); + if (screen->info.have_EXT_calibrated_timestamps) { + VkCalibratedTimestampInfoEXT cti = {0}; + cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; + cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT; + VkResult result = VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, ×tamp, &deviation); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkGetCalibratedTimestampsEXT failed (%s)", vk_Result_to_str(result)); + } + } else { + zink_screen_lock_context(screen); + struct pipe_context *pctx = &screen->copy_context->base; + struct pipe_query *pquery = pctx->create_query(pctx, PIPE_QUERY_TIMESTAMP, 0); + if (!pquery) + return 0; + union pipe_query_result result = {0}; + pctx->begin_query(pctx, pquery); + pctx->end_query(pctx, pquery); + pctx->get_query_result(pctx, pquery, true, &result); + pctx->destroy_query(pctx, pquery); + zink_screen_unlock_context(screen); + timestamp = result.u64; + } timestamp_to_nanoseconds(screen, ×tamp); return timestamp; } @@ -1084,5 +1524,32 @@ zink_context_query_init(struct pipe_context *pctx) pctx->get_query_result_resource = zink_get_query_result_resource; pctx->set_active_query_state = zink_set_active_query_state; pctx->render_condition = zink_render_condition; - pctx->get_timestamp = zink_get_timestamp; +} + +int +zink_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_group_info *info) +{ + if (!info) + return 1; + + assert(index == 0); + info->name = "Zink counters"; + info->max_active_queries = ARRAY_SIZE(zink_specific_queries); + info->num_queries = ARRAY_SIZE(zink_specific_queries); + + return 1; +} + +int +zink_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_info *info) +{ + if (!info) + return ARRAY_SIZE(zink_specific_queries); + + assert(index < ARRAY_SIZE(zink_specific_queries)); + *info = zink_specific_queries[index]; + + return 1; } diff --git a/src/gallium/drivers/zink/zink_query.h b/src/gallium/drivers/zink/zink_query.h index 73fd31eeda7..2b96a72c700 100644 --- a/src/gallium/drivers/zink/zink_query.h +++ b/src/gallium/drivers/zink/zink_query.h @@ -25,13 +25,9 @@ #define ZINK_QUERY_H #include <stdbool.h> +#include <inttypes.h> +#include "zink_types.h" -struct zink_batch; -struct zink_batch_state; -struct zink_context; -struct zink_fence; -struct zink_query; -struct zink_screen; #ifdef __cplusplus extern "C" { #endif @@ -43,7 +39,15 @@ void zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch); void -zink_prune_query(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_query *query); +zink_query_renderpass_suspend(struct zink_context *ctx); + +void +zink_resume_cs_query(struct zink_context *ctx); + +void +zink_prune_query(struct zink_batch_state *bs, struct zink_query *query); +void +zink_query_sync(struct zink_context *ctx, struct zink_query *query); void zink_query_update_gs_states(struct zink_context *ctx); @@ -54,8 +58,19 @@ zink_start_conditional_render(struct zink_context *ctx); void zink_stop_conditional_render(struct zink_context *ctx); -bool -zink_check_conditional_render(struct zink_context *ctx); +void +zink_context_destroy_query_pools(struct zink_context *ctx); +uint64_t +zink_get_timestamp(struct pipe_screen *pscreen); + +int +zink_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_group_info *info); + +int +zink_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_info *info); + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/zink/zink_render_pass.c b/src/gallium/drivers/zink/zink_render_pass.c index 132dcd0d278..d2b907ba78b 100644 --- a/src/gallium/drivers/zink/zink_render_pass.c +++ b/src/gallium/drivers/zink/zink_render_pass.c @@ -21,63 +21,125 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "zink_context.h" +#include "zink_clear.h" +#include "zink_framebuffer.h" +#include "zink_kopper.h" +#include "zink_query.h" #include "zink_render_pass.h" - +#include "zink_resource.h" #include "zink_screen.h" +#include "zink_surface.h" #include "util/u_memory.h" #include "util/u_string.h" +#include "util/u_blitter.h" + +static VkAttachmentLoadOp +get_rt_loadop(const struct zink_rt_attrib *rt, bool clear) +{ + return clear ? VK_ATTACHMENT_LOAD_OP_CLEAR : + /* TODO: need replicate EXT */ + //rt->resolve || rt->invalid ? + rt->invalid ? + VK_ATTACHMENT_LOAD_OP_DONT_CARE : + VK_ATTACHMENT_LOAD_OP_LOAD; +} + +static VkImageLayout +get_color_rt_layout(const struct zink_rt_attrib *rt) +{ + return rt->feedback_loop ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; +} + +static VkImageLayout +get_zs_rt_layout(const struct zink_rt_attrib *rt) +{ + bool has_clear = rt->clear_color || rt->clear_stencil; + if (rt->feedback_loop) + return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + return rt->needs_write || has_clear ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; +} static VkRenderPass -create_render_pass(struct zink_screen *screen, struct zink_render_pass_state *state, struct zink_render_pass_pipeline_state *pstate) +create_render_pass2(struct zink_screen *screen, struct zink_render_pass_state *state, struct zink_render_pass_pipeline_state *pstate) { - VkAttachmentReference color_refs[PIPE_MAX_COLOR_BUFS], zs_ref; - VkAttachmentReference input_attachments[PIPE_MAX_COLOR_BUFS]; - VkAttachmentDescription attachments[PIPE_MAX_COLOR_BUFS + 1]; + VkAttachmentReference2 color_refs[PIPE_MAX_COLOR_BUFS], color_resolves[PIPE_MAX_COLOR_BUFS], zs_ref, zs_resolve; + VkAttachmentReference2 input_attachments[PIPE_MAX_COLOR_BUFS]; + VkAttachmentDescription2 attachments[2 * (PIPE_MAX_COLOR_BUFS + 1)]; VkPipelineStageFlags dep_pipeline = 0; VkAccessFlags dep_access = 0; unsigned input_count = 0; + const unsigned cresolve_offset = state->num_cbufs + state->have_zsbuf; + const unsigned zsresolve_offset = cresolve_offset + state->num_cresolves; pstate->num_attachments = state->num_cbufs; + pstate->num_cresolves = state->num_cresolves; + pstate->num_zsresolves = state->num_zsresolves; + pstate->fbfetch = 0; + pstate->msaa_samples = state->msaa_samples; for (int i = 0; i < state->num_cbufs; i++) { struct zink_rt_attrib *rt = state->rts + i; + attachments[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; + attachments[i].pNext = NULL; attachments[i].flags = 0; pstate->attachments[i].format = attachments[i].format = rt->format; pstate->attachments[i].samples = attachments[i].samples = rt->samples; - attachments[i].loadOp = rt->clear_color ? VK_ATTACHMENT_LOAD_OP_CLEAR : - state->swapchain_init && rt->swapchain ? - VK_ATTACHMENT_LOAD_OP_DONT_CARE : - VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[i].loadOp = get_rt_loadop(rt, rt->clear_color); + + /* TODO: need replicate EXT */ + //attachments[i].storeOp = rt->resolve ? VK_ATTACHMENT_STORE_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE; attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; /* if layout changes are ever handled here, need VkAttachmentSampleLocationsEXT */ - VkImageLayout layout = rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkImageLayout layout = get_color_rt_layout(rt); attachments[i].initialLayout = layout; attachments[i].finalLayout = layout; + color_refs[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; + color_refs[i].pNext = NULL; color_refs[i].attachment = i; color_refs[i].layout = layout; + color_refs[i].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; dep_pipeline |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - if (rt->fbfetch) - memcpy(&input_attachments[input_count++], &color_refs[i], sizeof(VkAttachmentReference)); + if (rt->fbfetch) { + memcpy(&input_attachments[input_count++], &color_refs[i], sizeof(VkAttachmentReference2)); + dep_pipeline |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + dep_access |= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + pstate->fbfetch = 1; + } dep_access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; if (attachments[i].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) dep_access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; + + if (rt->resolve) { + memcpy(&attachments[cresolve_offset + i], &attachments[i], sizeof(VkAttachmentDescription2)); + attachments[cresolve_offset + i].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[cresolve_offset + i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[cresolve_offset + i].samples = 1; + memcpy(&color_resolves[i], &color_refs[i], sizeof(VkAttachmentReference2)); + color_resolves[i].attachment = cresolve_offset + i; + if (attachments[cresolve_offset + i].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) + dep_access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; + } } int num_attachments = state->num_cbufs; if (state->have_zsbuf) { struct zink_rt_attrib *rt = state->rts + state->num_cbufs; - bool has_clear = rt->clear_color || rt->clear_stencil; - VkImageLayout write_layout = rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - VkImageLayout layout = rt->needs_write || has_clear ? write_layout : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + VkImageLayout layout = get_zs_rt_layout(rt); + attachments[num_attachments].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; + attachments[num_attachments].pNext = NULL; attachments[num_attachments].flags = 0; pstate->attachments[num_attachments].format = attachments[num_attachments].format = rt->format; pstate->attachments[num_attachments].samples = attachments[num_attachments].samples = rt->samples; - attachments[num_attachments].loadOp = rt->clear_color ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[num_attachments].loadOp = get_rt_loadop(rt, rt->clear_color); + attachments[num_attachments].stencilLoadOp = get_rt_loadop(rt, rt->clear_stencil); + /* TODO: need replicate EXT */ + //attachments[num_attachments].storeOp = rt->resolve ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE; + //attachments[num_attachments].stencilStoreOp = rt->resolve ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE; attachments[num_attachments].storeOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[num_attachments].stencilLoadOp = rt->clear_stencil ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD; attachments[num_attachments].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; /* if layout changes are ever handled here, need VkAttachmentSampleLocationsEXT */ attachments[num_attachments].initialLayout = layout; @@ -90,36 +152,93 @@ create_render_pass(struct zink_screen *screen, struct zink_render_pass_state *st attachments[num_attachments].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) dep_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; - zs_ref.attachment = num_attachments++; + zs_ref.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; + zs_ref.pNext = NULL; + zs_ref.attachment = num_attachments; zs_ref.layout = layout; + if (rt->resolve) { + memcpy(&attachments[zsresolve_offset], &attachments[num_attachments], sizeof(VkAttachmentDescription2)); + attachments[zsresolve_offset].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[zsresolve_offset].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[zsresolve_offset].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[zsresolve_offset].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[zsresolve_offset].samples = 1; + memcpy(&zs_resolve, &zs_ref, sizeof(VkAttachmentReference2)); + zs_resolve.attachment = zsresolve_offset; + if (attachments[zsresolve_offset].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD || + attachments[zsresolve_offset].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) + dep_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + } + num_attachments++; pstate->num_attachments++; } + pstate->color_read = (dep_access & VK_ACCESS_COLOR_ATTACHMENT_READ_BIT) > 0; + pstate->depth_read = (dep_access & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT) > 0; + pstate->depth_write = (dep_access & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT) > 0; - VkSubpassDependency deps[] = { - [0] = {VK_SUBPASS_EXTERNAL, 0, dep_pipeline, dep_pipeline, 0, dep_access, VK_DEPENDENCY_BY_REGION_BIT}, - [1] = {0, VK_SUBPASS_EXTERNAL, dep_pipeline, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, dep_access, 0, VK_DEPENDENCY_BY_REGION_BIT} + if (!screen->info.have_KHR_synchronization2) + dep_pipeline = MAX2(dep_pipeline, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); + + VkDependencyFlags flag = screen->info.have_KHR_synchronization2 ? VK_DEPENDENCY_BY_REGION_BIT : 0; + VkSubpassDependency2 deps[] = { + {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, VK_SUBPASS_EXTERNAL, 0, dep_pipeline, dep_pipeline, 0, dep_access, flag, 0}, + {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, 0, VK_SUBPASS_EXTERNAL, dep_pipeline, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, dep_access, 0, flag, 0} + }; + VkPipelineStageFlags input_dep = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + //if (zs_fbfetch) input_dep |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + VkAccessFlags input_access = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + //if (zs_fbfetch) input_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + VkSubpassDependency2 fbfetch_deps[] = { + {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, VK_SUBPASS_EXTERNAL, 0, dep_pipeline, dep_pipeline, 0, dep_access, flag, 0}, + {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, 0, 0, dep_pipeline, input_dep, dep_access, input_access, flag, 0}, + {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, 0, VK_SUBPASS_EXTERNAL, dep_pipeline, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, dep_access, 0, flag, 0} }; - VkSubpassDescription subpass = {0}; + VkSubpassDescription2 subpass = {0}; + if (pstate->fbfetch && screen->info.have_EXT_rasterization_order_attachment_access) + subpass.flags |= VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT; + VkSubpassDescriptionDepthStencilResolve zsresolve; + subpass.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.colorAttachmentCount = state->num_cbufs; subpass.pColorAttachments = color_refs; subpass.pDepthStencilAttachment = state->have_zsbuf ? &zs_ref : NULL; subpass.inputAttachmentCount = input_count; subpass.pInputAttachments = input_attachments; + if (state->num_cresolves) + subpass.pResolveAttachments = color_resolves; + if (state->num_zsresolves) { + subpass.pNext = &zsresolve; + zsresolve.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE; + zsresolve.pNext = NULL; + zsresolve.depthResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + zsresolve.stencilResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + zsresolve.pDepthStencilResolveAttachment = &zs_resolve; + } else + subpass.pNext = NULL; + + VkMultisampledRenderToSingleSampledInfoEXT msrtss = { + VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, + &subpass.pNext, + VK_TRUE, + state->msaa_samples, + }; + if (state->msaa_samples) + subpass.pNext = &msrtss; - VkRenderPassCreateInfo rpci = {0}; - rpci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - rpci.attachmentCount = num_attachments; + VkRenderPassCreateInfo2 rpci = {0}; + rpci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2; + rpci.attachmentCount = num_attachments + state->num_cresolves + state->num_zsresolves; rpci.pAttachments = attachments; rpci.subpassCount = 1; rpci.pSubpasses = &subpass; - rpci.dependencyCount = 2; - rpci.pDependencies = deps; + rpci.dependencyCount = input_count ? 3 : 2; + rpci.pDependencies = input_count ? fbfetch_deps : deps; VkRenderPass render_pass; - if (VKSCR(CreateRenderPass)(screen->dev, &rpci, NULL, &render_pass) != VK_SUCCESS) { - debug_printf("vkCreateRenderPass failed\n"); + VkResult result = VKSCR(CreateRenderPass2)(screen->dev, &rpci, NULL, &render_pass); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateRenderPass2 failed (%s)", vk_Result_to_str(result)); return VK_NULL_HANDLE; } @@ -135,7 +254,7 @@ zink_create_render_pass(struct zink_screen *screen, if (!rp) goto fail; - rp->render_pass = create_render_pass(screen, state, pstate); + rp->render_pass = create_render_pass2(screen, state, pstate); if (!rp->render_pass) goto fail; memcpy(&rp->state, state, sizeof(struct zink_render_pass_state)); @@ -156,26 +275,622 @@ zink_destroy_render_pass(struct zink_screen *screen, } VkImageLayout -zink_render_pass_attachment_get_barrier_info(const struct zink_render_pass *rp, unsigned idx, +zink_render_pass_attachment_get_barrier_info(const struct zink_rt_attrib *rt, bool color, VkPipelineStageFlags *pipeline, VkAccessFlags *access) { *access = 0; - assert(idx < rp->state.num_rts); - const struct zink_rt_attrib *rt = &rp->state.rts[idx]; - if (idx < rp->state.num_cbufs) { + if (color) { *pipeline = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; *access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - if (!rt->clear_color && (!rp->state.swapchain_init || !rt->swapchain)) + if (!rt->clear_color && !rt->invalid) *access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; - return rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + return get_color_rt_layout(rt); } - assert(rp->state.have_zsbuf); *pipeline = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - if (!rp->state.rts[idx].clear_color && !rp->state.rts[idx].clear_stencil) + if (!rt->clear_color && !rt->clear_stencil) *access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; - if (!rp->state.rts[idx].clear_color && !rp->state.rts[idx].clear_stencil && !rp->state.rts[idx].needs_write) - return VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; - *access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - return rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + if (rt->clear_color || rt->clear_stencil || rt->needs_write) + *access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + return get_zs_rt_layout(rt); +} + +VkImageLayout +zink_tc_renderpass_info_parse(struct zink_context *ctx, const struct tc_renderpass_info *info, unsigned idx, VkPipelineStageFlags *pipeline, VkAccessFlags *access) +{ + if (idx < PIPE_MAX_COLOR_BUFS) { + *pipeline = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + *access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + if (info->cbuf_load & BITFIELD_BIT(idx)) + *access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; + return (ctx->feedback_loops & BITFIELD_BIT(idx)) ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : + (info->cbuf_fbfetch & BITFIELD_BIT(idx)) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } else { + *access = 0; + if (info->zsbuf_load || info->zsbuf_read_dsa) + *access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + if (info->zsbuf_clear | info->zsbuf_clear_partial | info->zsbuf_write_fs | info->zsbuf_write_dsa) + *access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + assert(*access); + *pipeline = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + if (ctx->feedback_loops & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS)) + return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + return (info->zsbuf_clear | info->zsbuf_clear_partial | info->zsbuf_write_fs | info->zsbuf_write_dsa) ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + } +} + +static size_t +rp_state_size(const struct zink_render_pass_pipeline_state *pstate) +{ + return offsetof(struct zink_render_pass_pipeline_state, attachments) + + sizeof(pstate->attachments[0]) * pstate->num_attachments; +} + +static uint32_t +hash_rp_state(const void *key) +{ + const struct zink_render_pass_pipeline_state *s = key; + return _mesa_hash_data(key, rp_state_size(s)); +} + +static bool +equals_rp_state(const void *a, const void *b) +{ + return !memcmp(a, b, rp_state_size(a)); +} + +static uint32_t +hash_render_pass_state(const void *key) +{ + struct zink_render_pass_state* s = (struct zink_render_pass_state*)key; + return _mesa_hash_data(key, offsetof(struct zink_render_pass_state, rts) + sizeof(s->rts[0]) * s->num_rts); +} + +static bool +equals_render_pass_state(const void *a, const void *b) +{ + const struct zink_render_pass_state *s_a = a, *s_b = b; + if (s_a->num_rts != s_b->num_rts) + return false; + return memcmp(a, b, offsetof(struct zink_render_pass_state, rts) + sizeof(s_a->rts[0]) * s_a->num_rts) == 0; +} + +void +zink_init_zs_attachment(struct zink_context *ctx, struct zink_rt_attrib *rt) +{ + const struct pipe_framebuffer_state *fb = &ctx->fb_state; + struct zink_resource *zsbuf = zink_resource(fb->zsbuf->texture); + struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS]; + struct zink_surface *transient = zink_transient_surface(fb->zsbuf); + rt->format = zsbuf->format; + rt->samples = MAX3(transient ? transient->base.nr_samples : 0, fb->zsbuf->texture->nr_samples, 1); + rt->clear_color = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && + !zink_fb_clear_first_needs_explicit(fb_clear) && + (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH); + rt->clear_stencil = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && + !zink_fb_clear_first_needs_explicit(fb_clear) && + (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_STENCIL); + const uint64_t outputs_written = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? + ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written : 0; + bool needs_write_z = (ctx->dsa_state && ctx->dsa_state->hw_state.depth_write) || + outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH); + needs_write_z |= transient || rt->clear_color || + (zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH)); + + bool needs_write_s = (ctx->dsa_state && (util_writes_stencil(&ctx->dsa_state->base.stencil[0]) || util_writes_stencil(&ctx->dsa_state->base.stencil[1]))) || + rt->clear_stencil || (outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) || + (zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_STENCIL)); + rt->needs_write = needs_write_z | needs_write_s; + rt->invalid = !zsbuf->valid; + rt->feedback_loop = (ctx->feedback_loops & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS)) > 0; +} + +void +zink_tc_init_zs_attachment(struct zink_context *ctx, const struct tc_renderpass_info *info, struct zink_rt_attrib *rt) +{ + const struct pipe_framebuffer_state *fb = &ctx->fb_state; + struct zink_resource *zsbuf = zink_resource(fb->zsbuf->texture); + struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS]; + struct zink_surface *transient = zink_transient_surface(fb->zsbuf); + rt->format = zsbuf->format; + rt->samples = MAX3(transient ? transient->base.nr_samples : 0, fb->zsbuf->texture->nr_samples, 1); + rt->clear_color = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && + !zink_fb_clear_first_needs_explicit(fb_clear) && + (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH); + rt->clear_stencil = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && + !zink_fb_clear_first_needs_explicit(fb_clear) && + (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_STENCIL); + rt->needs_write = info->zsbuf_clear | info->zsbuf_clear_partial | info->zsbuf_write_fs | info->zsbuf_write_dsa; + rt->invalid = !zsbuf->valid; + rt->feedback_loop = (ctx->feedback_loops & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS)) > 0; +} + +void +zink_init_color_attachment(struct zink_context *ctx, unsigned i, struct zink_rt_attrib *rt) +{ + const struct pipe_framebuffer_state *fb = &ctx->fb_state; + struct pipe_surface *psurf = fb->cbufs[i]; + if (psurf) { + struct zink_surface *surf = zink_csurface(psurf); + struct zink_surface *transient = zink_transient_surface(psurf); + rt->format = surf->info.format[0]; + rt->samples = MAX3(transient ? transient->base.nr_samples : 0, psurf->texture->nr_samples, 1); + rt->clear_color = zink_fb_clear_enabled(ctx, i) && !zink_fb_clear_first_needs_explicit(&ctx->fb_clears[i]); + rt->invalid = !zink_resource(psurf->texture)->valid; + rt->fbfetch = (ctx->fbfetch_outputs & BITFIELD_BIT(i)) > 0; + rt->feedback_loop = (ctx->feedback_loops & BITFIELD_BIT(i)) > 0; + } else { + memset(rt, 0, sizeof(struct zink_rt_attrib)); + rt->format = VK_FORMAT_R8G8B8A8_UNORM; + rt->samples = fb->samples; + } +} + +void +zink_tc_init_color_attachment(struct zink_context *ctx, const struct tc_renderpass_info *info, unsigned i, struct zink_rt_attrib *rt) +{ + const struct pipe_framebuffer_state *fb = &ctx->fb_state; + struct pipe_surface *psurf = fb->cbufs[i]; + if (psurf) { + struct zink_surface *surf = zink_csurface(psurf); + struct zink_surface *transient = zink_transient_surface(psurf); + rt->format = surf->info.format[0]; + rt->samples = MAX3(transient ? transient->base.nr_samples : 0, psurf->texture->nr_samples, 1); + rt->clear_color = zink_fb_clear_enabled(ctx, i) && !zink_fb_clear_first_needs_explicit(&ctx->fb_clears[i]); + rt->invalid = !zink_resource(psurf->texture)->valid; + rt->fbfetch = (info->cbuf_fbfetch & BITFIELD_BIT(i)) > 0; + rt->feedback_loop = (ctx->feedback_loops & BITFIELD_BIT(i)) > 0; + } else { + memset(rt, 0, sizeof(struct zink_rt_attrib)); + rt->format = VK_FORMAT_R8G8B8A8_UNORM; + rt->samples = fb->samples; + } +} + +static struct zink_render_pass * +get_render_pass(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + const struct pipe_framebuffer_state *fb = &ctx->fb_state; + struct zink_render_pass_state state = {0}; + uint32_t clears = 0; + bool have_zsbuf = fb->zsbuf && zink_is_zsbuf_used(ctx); + bool use_tc_info = !ctx->blitting && ctx->track_renderpasses; + state.samples = fb->samples > 0; + + for (int i = 0; i < fb->nr_cbufs; i++) { + if (use_tc_info) + zink_tc_init_color_attachment(ctx, &ctx->dynamic_fb.tc_info, i, &state.rts[i]); + else + zink_init_color_attachment(ctx, i, &state.rts[i]); + struct pipe_surface *surf = fb->cbufs[i]; + if (surf) { + clears |= !!state.rts[i].clear_color ? PIPE_CLEAR_COLOR0 << i : 0; + struct zink_surface *transient = zink_transient_surface(surf); + if (transient) { + state.num_cresolves++; + state.rts[i].resolve = true; + if (!state.rts[i].clear_color) + state.msaa_expand_mask |= BITFIELD_BIT(i); + } else { + state.rts[i].resolve = false; + } + } + state.num_rts++; + } + state.msaa_samples = screen->info.have_EXT_multisampled_render_to_single_sampled && ctx->transient_attachments ? + ctx->gfx_pipeline_state.rast_samples + 1 : 0; + state.num_cbufs = fb->nr_cbufs; + assert(!state.num_cresolves || state.num_cbufs == state.num_cresolves); + + if (have_zsbuf) { + if (use_tc_info) + zink_tc_init_zs_attachment(ctx, &ctx->dynamic_fb.tc_info, &state.rts[fb->nr_cbufs]); + else + zink_init_zs_attachment(ctx, &state.rts[fb->nr_cbufs]); + struct zink_surface *transient = zink_transient_surface(fb->zsbuf); + if (transient) { + state.num_zsresolves = 1; + state.rts[fb->nr_cbufs].resolve = true; + } + if (state.rts[fb->nr_cbufs].clear_color) + clears |= PIPE_CLEAR_DEPTH; + if (state.rts[fb->nr_cbufs].clear_stencil) + clears |= PIPE_CLEAR_STENCIL; + state.num_rts++; + } + state.have_zsbuf = have_zsbuf; + assert(clears == ctx->rp_clears_enabled); + state.clears = clears; + uint32_t hash = hash_render_pass_state(&state); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->render_pass_cache, hash, + &state); + struct zink_render_pass *rp; + if (entry) { + rp = entry->data; + assert(rp->state.clears == clears); + } else { + struct zink_render_pass_pipeline_state pstate; + pstate.samples = state.samples; + rp = zink_create_render_pass(screen, &state, &pstate); + if (!_mesa_hash_table_insert_pre_hashed(ctx->render_pass_cache, hash, &rp->state, rp)) + return NULL; + bool found = false; + struct set_entry *cache_entry = _mesa_set_search_or_add(&ctx->render_pass_state_cache, &pstate, &found); + struct zink_render_pass_pipeline_state *ppstate; + if (!found) { + cache_entry->key = ralloc(ctx, struct zink_render_pass_pipeline_state); + ppstate = (void*)cache_entry->key; + memcpy(ppstate, &pstate, rp_state_size(&pstate)); + ppstate->id = ctx->render_pass_state_cache.entries; + } + ppstate = (void*)cache_entry->key; + rp->pipeline_state = ppstate->id; + } + return rp; +} + +/* check whether the active rp needs to be split to replace it with rp2 */ +static bool +rp_must_change(const struct zink_render_pass *rp, const struct zink_render_pass *rp2, bool in_rp) +{ + if (rp == rp2) + return false; + unsigned num_cbufs = rp->state.num_cbufs; + if (rp->pipeline_state != rp2->pipeline_state) { + /* if any core attrib bits are different, must split */ + if (rp->state.val != rp2->state.val) + return true; + for (unsigned i = 0; i < num_cbufs; i++) { + const struct zink_rt_attrib *rt = &rp->state.rts[i]; + const struct zink_rt_attrib *rt2 = &rp2->state.rts[i]; + /* if layout changed, must split */ + if (get_color_rt_layout(rt) != get_color_rt_layout(rt2)) + return true; + } + } + if (rp->state.have_zsbuf) { + const struct zink_rt_attrib *rt = &rp->state.rts[num_cbufs]; + const struct zink_rt_attrib *rt2 = &rp2->state.rts[num_cbufs]; + /* if zs layout has gone from read-only to read-write, split renderpass */ + if (get_zs_rt_layout(rt) == VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL && + get_zs_rt_layout(rt2) == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) + return true; + } + /* any other change doesn't require splitting a renderpass */ + return !in_rp; +} + +static void +setup_framebuffer(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_render_pass *rp = ctx->gfx_pipeline_state.render_pass; + + zink_update_vk_sample_locations(ctx); + + if (ctx->rp_changed || ctx->rp_layout_changed || (!ctx->batch.in_rp && ctx->rp_loadop_changed)) { + /* 0. ensure no stale pointers are set */ + ctx->gfx_pipeline_state.next_render_pass = NULL; + /* 1. calc new rp */ + rp = get_render_pass(ctx); + /* 2. evaluate whether to use new rp */ + if (ctx->gfx_pipeline_state.render_pass) { + /* 2a. if previous rp exists, check whether new rp MUST be used */ + bool must_change = rp_must_change(ctx->gfx_pipeline_state.render_pass, rp, ctx->batch.in_rp); + ctx->fb_changed |= must_change; + if (!must_change) + /* 2b. if non-essential attribs have changed, store for later use and continue on */ + ctx->gfx_pipeline_state.next_render_pass = rp; + } else { + /* 2c. no previous rp in use, use this one */ + ctx->fb_changed = true; + } + } else if (ctx->gfx_pipeline_state.next_render_pass) { + /* previous rp was calculated but deferred: use it */ + assert(!ctx->batch.in_rp); + rp = ctx->gfx_pipeline_state.next_render_pass; + ctx->gfx_pipeline_state.next_render_pass = NULL; + ctx->fb_changed = true; + } + if (rp->pipeline_state != ctx->gfx_pipeline_state.rp_state) { + ctx->gfx_pipeline_state.rp_state = rp->pipeline_state; + ctx->gfx_pipeline_state.dirty = true; + } + + ctx->rp_loadop_changed = false; + ctx->rp_layout_changed = false; + ctx->rp_changed = false; + + if (zink_render_update_swapchain(ctx)) + zink_render_fixup_swapchain(ctx); + + if (!ctx->fb_changed) + return; + + zink_update_framebuffer_state(ctx); + zink_init_framebuffer(screen, ctx->framebuffer, rp); + ctx->fb_changed = false; + ctx->gfx_pipeline_state.render_pass = rp; + zink_batch_no_rp(ctx); +} + +static bool +prep_fb_attachments(struct zink_context *ctx, VkImageView *att) +{ + bool have_zsbuf = ctx->fb_state.zsbuf && zink_is_zsbuf_used(ctx); + const unsigned cresolve_offset = ctx->fb_state.nr_cbufs + !!have_zsbuf; + unsigned num_resolves = 0; + for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { + struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]); + struct zink_surface *transient = zink_transient_surface(ctx->fb_state.cbufs[i]); + if (transient) { + att[i] = zink_prep_fb_attachment(ctx, transient, i); + att[i + cresolve_offset] = zink_prep_fb_attachment(ctx, surf, i); + num_resolves++; + } else { + att[i] = zink_prep_fb_attachment(ctx, surf, i); + if (!att[i]) + /* dead swapchain */ + return false; + } + } + if (have_zsbuf) { + struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf); + struct zink_surface *transient = zink_transient_surface(ctx->fb_state.zsbuf); + if (transient) { + att[ctx->fb_state.nr_cbufs] = zink_prep_fb_attachment(ctx, transient, ctx->fb_state.nr_cbufs); + att[cresolve_offset + num_resolves] = zink_prep_fb_attachment(ctx, surf, ctx->fb_state.nr_cbufs); + } else { + att[ctx->fb_state.nr_cbufs] = zink_prep_fb_attachment(ctx, surf, ctx->fb_state.nr_cbufs); + } + } + return true; +} + +static unsigned +begin_render_pass(struct zink_context *ctx) +{ + struct zink_batch *batch = &ctx->batch; + struct pipe_framebuffer_state *fb_state = &ctx->fb_state; + + VkRenderPassBeginInfo rpbi = {0}; + rpbi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rpbi.renderPass = ctx->gfx_pipeline_state.render_pass->render_pass; + rpbi.renderArea.offset.x = 0; + rpbi.renderArea.offset.y = 0; + rpbi.renderArea.extent.width = fb_state->width; + rpbi.renderArea.extent.height = fb_state->height; + + if (ctx->fb_state.cbufs[0]) { + struct zink_resource *res = zink_resource(ctx->fb_state.cbufs[0]->texture); + if (zink_is_swapchain(res)) { + if (res->use_damage) + rpbi.renderArea = res->damage; + } + } + + VkClearValue clears[PIPE_MAX_COLOR_BUFS + 1] = {0}; + unsigned clear_buffers = 0; + uint32_t clear_validate = 0; + for (int i = 0; i < fb_state->nr_cbufs; i++) { + /* these are no-ops */ + if (!fb_state->cbufs[i] || !zink_fb_clear_enabled(ctx, i)) + continue; + /* these need actual clear calls inside the rp */ + struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(&ctx->fb_clears[i], 0); + if (zink_fb_clear_needs_explicit(&ctx->fb_clears[i])) { + clear_buffers |= (PIPE_CLEAR_COLOR0 << i); + if (zink_fb_clear_count(&ctx->fb_clears[i]) < 2 || + zink_fb_clear_element_needs_explicit(clear)) + continue; + } + /* we now know there's one clear that can be done here */ + memcpy(&clears[i].color, &clear->color, sizeof(float) * 4); + rpbi.clearValueCount = i + 1; + clear_validate |= PIPE_CLEAR_COLOR0 << i; + assert(ctx->framebuffer->rp->state.clears); + } + if (fb_state->zsbuf && zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS)) { + struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS]; + struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0); + if (!zink_fb_clear_element_needs_explicit(clear)) { + clears[fb_state->nr_cbufs].depthStencil.depth = clear->zs.depth; + clears[fb_state->nr_cbufs].depthStencil.stencil = clear->zs.stencil; + rpbi.clearValueCount = fb_state->nr_cbufs + 1; + clear_validate |= clear->zs.bits; + assert(ctx->framebuffer->rp->state.clears); + } + if (zink_fb_clear_needs_explicit(fb_clear)) { + for (int j = !zink_fb_clear_element_needs_explicit(clear); + (clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL && j < zink_fb_clear_count(fb_clear); + j++) + clear_buffers |= zink_fb_clear_element(fb_clear, j)->zs.bits; + } + } + assert(clear_validate == ctx->framebuffer->rp->state.clears); + rpbi.pClearValues = &clears[0]; + rpbi.framebuffer = ctx->framebuffer->fb; + + assert(ctx->gfx_pipeline_state.render_pass && ctx->framebuffer); + + VkRenderPassAttachmentBeginInfo infos; + VkImageView att[2 * (PIPE_MAX_COLOR_BUFS + 1)]; + infos.sType = VK_STRUCTURE_TYPE_RENDER_PASS_ATTACHMENT_BEGIN_INFO; + infos.pNext = NULL; + infos.attachmentCount = ctx->framebuffer->state.num_attachments; + infos.pAttachments = att; + if (!prep_fb_attachments(ctx, att)) + return 0; + ctx->zsbuf_unused = !zink_is_zsbuf_used(ctx); + /* this can be set if fbfetch is activated */ + ctx->rp_changed = false; +#ifndef NDEBUG + bool zsbuf_used = ctx->fb_state.zsbuf && zink_is_zsbuf_used(ctx); + const unsigned cresolve_offset = ctx->fb_state.nr_cbufs + !!zsbuf_used; + unsigned num_cresolves = 0; + for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { + if (ctx->fb_state.cbufs[i]) { + struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]); + struct zink_surface *transient = zink_transient_surface(ctx->fb_state.cbufs[i]); + if (surf->base.format == ctx->fb_state.cbufs[i]->format) { + if (transient) { + num_cresolves++; + assert(zink_resource(transient->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[i].usage); + assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[cresolve_offset].usage); + } else { + assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[i].usage); + } + } + } + } + if (ctx->gfx_pipeline_state.render_pass->state.have_zsbuf) { + struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf); + struct zink_surface *transient = zink_transient_surface(ctx->fb_state.zsbuf); + if (transient) { + assert(zink_resource(transient->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].usage); + assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[cresolve_offset + num_cresolves].usage); + } else { + assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].usage); + } + } +#endif + rpbi.pNext = &infos; + + VKCTX(CmdBeginRenderPass)(batch->state->cmdbuf, &rpbi, VK_SUBPASS_CONTENTS_INLINE); + batch->in_rp = true; + return clear_buffers; +} + +unsigned +zink_begin_render_pass(struct zink_context *ctx) +{ + setup_framebuffer(ctx); + if (ctx->batch.in_rp) + return 0; + + if (ctx->framebuffer->rp->state.msaa_expand_mask) { + uint32_t rp_state = ctx->gfx_pipeline_state.rp_state; + struct zink_render_pass *rp = ctx->gfx_pipeline_state.render_pass; + struct zink_framebuffer *fb = ctx->framebuffer; + bool blitting = ctx->blitting; + + u_foreach_bit(i, ctx->framebuffer->rp->state.msaa_expand_mask) { + struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)ctx->fb_state.cbufs[i]; + /* skip replicate blit if the image will be full-cleared */ + if ((i == PIPE_MAX_COLOR_BUFS && (ctx->rp_clears_enabled & PIPE_CLEAR_DEPTHSTENCIL)) || + (ctx->rp_clears_enabled >> 2) & BITFIELD_BIT(i)) { + csurf->transient_init |= zink_fb_clear_full_exists(ctx, i); + } + if (csurf->transient_init) + continue; + struct pipe_surface *dst_view = (struct pipe_surface*)csurf->transient; + assert(dst_view); + struct pipe_sampler_view src_templ, *src_view; + struct pipe_resource *src = ctx->fb_state.cbufs[i]->texture; + struct pipe_box dstbox; + + u_box_3d(0, 0, 0, ctx->fb_state.width, ctx->fb_state.height, + 1 + dst_view->u.tex.last_layer - dst_view->u.tex.first_layer, &dstbox); + + util_blitter_default_src_texture(ctx->blitter, &src_templ, src, ctx->fb_state.cbufs[i]->u.tex.level); + src_view = ctx->base.create_sampler_view(&ctx->base, src, &src_templ); + + zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES); + ctx->blitting = false; + zink_blit_barriers(ctx, zink_resource(src), zink_resource(dst_view->texture), true); + ctx->blitting = true; + unsigned clear_mask = i == PIPE_MAX_COLOR_BUFS ? + (BITFIELD_MASK(PIPE_MAX_COLOR_BUFS) << 2) : + (PIPE_CLEAR_DEPTHSTENCIL | ((BITFIELD_MASK(PIPE_MAX_COLOR_BUFS) & ~BITFIELD_BIT(i)) << 2)); + unsigned clears_enabled = ctx->clears_enabled & clear_mask; + unsigned rp_clears_enabled = ctx->rp_clears_enabled & clear_mask; + ctx->clears_enabled &= ~clear_mask; + ctx->rp_clears_enabled &= ~clear_mask; + util_blitter_blit_generic(ctx->blitter, dst_view, &dstbox, + src_view, &dstbox, ctx->fb_state.width, ctx->fb_state.height, + PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, + false, false, 0); + ctx->clears_enabled = clears_enabled; + ctx->rp_clears_enabled = rp_clears_enabled; + ctx->blitting = false; + if (blitting) { + zink_blit_barriers(ctx, NULL, zink_resource(dst_view->texture), true); + zink_blit_barriers(ctx, NULL, zink_resource(src), true); + } + ctx->blitting = blitting; + pipe_sampler_view_reference(&src_view, NULL); + csurf->transient_init = true; + } + ctx->rp_layout_changed = ctx->rp_loadop_changed = false; + ctx->fb_changed = ctx->rp_changed = false; + ctx->gfx_pipeline_state.rp_state = rp_state; + ctx->gfx_pipeline_state.render_pass = rp; + /* manually re-set fb: depth buffer may have been eliminated */ + ctx->framebuffer = fb; + ctx->framebuffer->rp = rp; + } + assert(ctx->gfx_pipeline_state.render_pass); + return begin_render_pass(ctx); +} + +void +zink_end_render_pass(struct zink_context *ctx) +{ + if (ctx->batch.in_rp) { + VKCTX(CmdEndRenderPass)(ctx->batch.state->cmdbuf); + + for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) { + struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)ctx->fb_state.cbufs[i]; + if (csurf) + csurf->transient_init = true; + } + } + ctx->batch.in_rp = false; +} + +bool +zink_init_render_pass(struct zink_context *ctx) +{ + _mesa_set_init(&ctx->render_pass_state_cache, ctx, hash_rp_state, equals_rp_state); + ctx->render_pass_cache = _mesa_hash_table_create(NULL, + hash_render_pass_state, + equals_render_pass_state); + return !!ctx->render_pass_cache; +} + +void +zink_render_fixup_swapchain(struct zink_context *ctx) +{ + if ((ctx->swapchain_size.width || ctx->swapchain_size.height)) { + unsigned old_w = ctx->fb_state.width; + unsigned old_h = ctx->fb_state.height; + ctx->fb_state.width = ctx->swapchain_size.width; + ctx->fb_state.height = ctx->swapchain_size.height; + ctx->dynamic_fb.info.renderArea.extent.width = MIN2(ctx->dynamic_fb.info.renderArea.extent.width, ctx->fb_state.width); + ctx->dynamic_fb.info.renderArea.extent.height = MIN2(ctx->dynamic_fb.info.renderArea.extent.height, ctx->fb_state.height); + zink_kopper_fixup_depth_buffer(ctx); + if (ctx->fb_state.width != old_w || ctx->fb_state.height != old_h) + ctx->scissor_changed = true; + if (ctx->framebuffer) + zink_update_framebuffer_state(ctx); + ctx->swapchain_size.width = ctx->swapchain_size.height = 0; + } +} + +bool +zink_render_update_swapchain(struct zink_context *ctx) +{ + bool has_swapchain = false; + for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) { + if (!ctx->fb_state.cbufs[i]) + continue; + struct zink_resource *res = zink_resource(ctx->fb_state.cbufs[i]->texture); + if (zink_is_swapchain(res)) { + has_swapchain = true; + if (zink_kopper_acquire(ctx, res, UINT64_MAX)) + zink_surface_swapchain_update(ctx, zink_csurface(ctx->fb_state.cbufs[i])); + } + } + return has_swapchain; } diff --git a/src/gallium/drivers/zink/zink_render_pass.h b/src/gallium/drivers/zink/zink_render_pass.h index 38efbc6a5b7..3d5bd417ab1 100644 --- a/src/gallium/drivers/zink/zink_render_pass.h +++ b/src/gallium/drivers/zink/zink_render_pass.h @@ -24,52 +24,7 @@ #ifndef ZINK_RENDERPASS_H #define ZINK_RENDERPASS_H -#include <vulkan/vulkan.h> - -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -struct zink_screen; - -struct zink_rt_attrib { - VkFormat format; - VkSampleCountFlagBits samples; - bool clear_color; - bool clear_stencil; - bool fbfetch; - union { - bool swapchain; - bool needs_write; - }; -}; - -struct zink_render_pass_state { - uint8_t num_cbufs : 4; /* PIPE_MAX_COLOR_BUFS = 8 */ - uint8_t have_zsbuf : 1; - bool samples; //for fs samplemask - bool swapchain_init; - struct zink_rt_attrib rts[PIPE_MAX_COLOR_BUFS + 1]; - unsigned num_rts; - uint32_t clears; //for extra verification and update flagging -}; - -struct zink_pipeline_rt { - VkFormat format; - VkSampleCountFlagBits samples; -}; - -struct zink_render_pass_pipeline_state { - uint32_t num_attachments:31; - bool samples:1; //for fs samplemask - struct zink_pipeline_rt attachments[PIPE_MAX_COLOR_BUFS + 1]; - unsigned id; -}; - -struct zink_render_pass { - VkRenderPass render_pass; - struct zink_render_pass_state state; - unsigned pipeline_state; -}; +#include "zink_types.h" struct zink_render_pass * zink_create_render_pass(struct zink_screen *screen, @@ -80,6 +35,28 @@ void zink_destroy_render_pass(struct zink_screen *screen, struct zink_render_pass *rp); + +unsigned +zink_begin_render_pass(struct zink_context *ctx); +void +zink_end_render_pass(struct zink_context *ctx); + VkImageLayout -zink_render_pass_attachment_get_barrier_info(const struct zink_render_pass *rp, unsigned idx, VkPipelineStageFlags *pipeline, VkAccessFlags *access); +zink_render_pass_attachment_get_barrier_info(const struct zink_rt_attrib *rt, bool color, VkPipelineStageFlags *pipeline, VkAccessFlags *access); +VkImageLayout +zink_tc_renderpass_info_parse(struct zink_context *ctx, const struct tc_renderpass_info *info, unsigned idx, VkPipelineStageFlags *pipeline, VkAccessFlags *access); +bool +zink_init_render_pass(struct zink_context *ctx); +bool +zink_render_update_swapchain(struct zink_context *ctx); +void +zink_render_fixup_swapchain(struct zink_context *ctx); +void +zink_init_zs_attachment(struct zink_context *ctx, struct zink_rt_attrib *rt); +void +zink_init_color_attachment(struct zink_context *ctx, unsigned i, struct zink_rt_attrib *rt); +void +zink_tc_init_zs_attachment(struct zink_context *ctx, const struct tc_renderpass_info *info, struct zink_rt_attrib *rt); +void +zink_tc_init_color_attachment(struct zink_context *ctx, const struct tc_renderpass_info *info, unsigned i, struct zink_rt_attrib *rt); #endif diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c index 81cd735a8d6..16b3b0413c6 100644 --- a/src/gallium/drivers/zink/zink_resource.c +++ b/src/gallium/drivers/zink/zink_resource.c @@ -24,17 +24,19 @@ #include "zink_resource.h" #include "zink_batch.h" +#include "zink_clear.h" #include "zink_context.h" #include "zink_fence.h" +#include "zink_format.h" #include "zink_program.h" #include "zink_screen.h" +#include "zink_kopper.h" #ifdef VK_USE_PLATFORM_METAL_EXT #include "QuartzCore/CAMetalLayer.h" #endif -#include "vulkan/wsi/wsi_common.h" -#include "util/slab.h" +#include "vk_format.h" #include "util/u_blitter.h" #include "util/u_debug.h" #include "util/format/u_format.h" @@ -43,14 +45,13 @@ #include "util/u_memory.h" #include "util/u_upload_mgr.h" #include "util/os_file.h" -#include "frontend/sw_winsys.h" +#include "frontend/winsys_handle.h" -#ifndef _WIN32 +#if !defined(__APPLE__) #define ZINK_USE_DMABUF #endif -#ifdef ZINK_USE_DMABUF -#include <xf86drm.h> +#if defined(ZINK_USE_DMABUF) && !defined(_WIN32) #include "drm-uapi/drm_fourcc.h" #else /* these won't actually be used */ @@ -58,17 +59,126 @@ #define DRM_FORMAT_MOD_LINEAR 0 #endif +#ifdef __APPLE__ +#include "MoltenVK/mvk_vulkan.h" +// Source of MVK_VERSION +#include "MoltenVK/mvk_config.h" +#endif /* __APPLE__ */ + +#define ZINK_EXTERNAL_MEMORY_HANDLE 999 + + + +struct zink_debug_mem_entry { + uint32_t count; + uint64_t size; + const char *name; +}; + +static const char * +zink_debug_mem_add(struct zink_screen *screen, uint64_t size, const char *name) +{ + assert(name); + + simple_mtx_lock(&screen->debug_mem_lock); + struct hash_entry *entry = _mesa_hash_table_search(screen->debug_mem_sizes, name); + struct zink_debug_mem_entry *debug_bos; + + if (!entry) { + debug_bos = calloc(1, sizeof(struct zink_debug_mem_entry)); + debug_bos->name = strdup(name); + _mesa_hash_table_insert(screen->debug_mem_sizes, debug_bos->name, debug_bos); + } else { + debug_bos = (struct zink_debug_mem_entry *) entry->data; + } + + debug_bos->count++; + debug_bos->size += align(size, 4096); + simple_mtx_unlock(&screen->debug_mem_lock); + + return debug_bos->name; +} + +static void +zink_debug_mem_del(struct zink_screen *screen, struct zink_bo *bo) +{ + simple_mtx_lock(&screen->debug_mem_lock); + struct hash_entry *entry = _mesa_hash_table_search(screen->debug_mem_sizes, bo->name); + /* If we're finishing the BO, it should have been added already */ + assert(entry); + + struct zink_debug_mem_entry *debug_bos = entry->data; + debug_bos->count--; + debug_bos->size -= align(zink_bo_get_size(bo), 4096); + if (!debug_bos->count) { + _mesa_hash_table_remove(screen->debug_mem_sizes, entry); + free((void*)debug_bos->name); + free(debug_bos); + } + simple_mtx_unlock(&screen->debug_mem_lock); +} + +static int +debug_bos_count_compare(const void *in_a, const void *in_b) +{ + struct zink_debug_mem_entry *a = *(struct zink_debug_mem_entry **)in_a; + struct zink_debug_mem_entry *b = *(struct zink_debug_mem_entry **)in_b; + return a->count - b->count; +} + +void +zink_debug_mem_print_stats(struct zink_screen *screen) +{ + simple_mtx_lock(&screen->debug_mem_lock); + + /* Put the HT's sizes data in an array so we can sort by number of allocations. */ + struct util_dynarray dyn; + util_dynarray_init(&dyn, NULL); + + uint32_t size = 0; + uint32_t count = 0; + hash_table_foreach(screen->debug_mem_sizes, entry) + { + struct zink_debug_mem_entry *debug_bos = entry->data; + util_dynarray_append(&dyn, struct zink_debug_mem_entry *, debug_bos); + size += debug_bos->size / 1024; + count += debug_bos->count; + } + + qsort(dyn.data, + util_dynarray_num_elements(&dyn, struct zink_debug_mem_entry *), + sizeof(struct zink_debug_mem_entryos_entry *), debug_bos_count_compare); + + util_dynarray_foreach(&dyn, struct zink_debug_mem_entry *, entryp) + { + struct zink_debug_mem_entry *debug_bos = *entryp; + mesa_logi("%30s: %4d bos, %lld kb\n", debug_bos->name, debug_bos->count, + (long long) (debug_bos->size / 1024)); + } + + mesa_logi("submitted %d bos (%d MB)\n", count, DIV_ROUND_UP(size, 1024)); + + util_dynarray_fini(&dyn); + + simple_mtx_unlock(&screen->debug_mem_lock); +} static bool equals_ivci(const void *a, const void *b) { - return memcmp(a, b, sizeof(VkImageViewCreateInfo)) == 0; + const uint8_t *pa = a; + const uint8_t *pb = b; + size_t offset = offsetof(VkImageViewCreateInfo, flags); + return memcmp(pa + offset, pb + offset, sizeof(VkImageViewCreateInfo) - offset) == 0; } static bool equals_bvci(const void *a, const void *b) { - return memcmp(a, b, sizeof(VkBufferViewCreateInfo)) == 0; + const uint8_t *pa = a; + const uint8_t *pb = b; + size_t offset = offsetof(VkBufferViewCreateInfo, flags); + return memcmp(pa + offset, pb + offset, sizeof(VkBufferViewCreateInfo) - offset) == 0; } static void @@ -86,16 +196,35 @@ void zink_destroy_resource_object(struct zink_screen *screen, struct zink_resource_object *obj) { if (obj->is_buffer) { - util_dynarray_foreach(&obj->tmp, VkBuffer, buffer) - VKSCR(DestroyBuffer)(screen->dev, *buffer, NULL); - VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL); + while (util_dynarray_contains(&obj->views, VkBufferView)) + VKSCR(DestroyBufferView)(screen->dev, util_dynarray_pop(&obj->views, VkBufferView), NULL); } else { + while (util_dynarray_contains(&obj->views, VkImageView)) + VKSCR(DestroyImageView)(screen->dev, util_dynarray_pop(&obj->views, VkImageView), NULL); + } + if (!obj->dt && zink_debug & ZINK_DEBUG_MEM) + zink_debug_mem_del(screen, obj->bo); + util_dynarray_fini(&obj->views); + for (unsigned i = 0; i < ARRAY_SIZE(obj->copies); i++) + util_dynarray_fini(&obj->copies[i]); + if (obj->is_buffer) { + VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL); + VKSCR(DestroyBuffer)(screen->dev, obj->storage_buffer, NULL); + } else if (obj->dt) { + zink_kopper_displaytarget_destroy(screen, obj->dt); + } else if (!obj->is_aux) { VKSCR(DestroyImage)(screen->dev, obj->image, NULL); + } else { +#if defined(ZINK_USE_DMABUF) && !defined(_WIN32) + close(obj->handle); +#endif } - util_dynarray_fini(&obj->tmp); - zink_descriptor_set_refs_clear(&obj->desc_set_refs, obj); - zink_bo_unref(screen, obj->bo); + simple_mtx_destroy(&obj->view_lock); + if (obj->dt) { + FREE(obj->bo); //this is a dummy struct + } else + zink_bo_unref(screen, obj->bo); FREE(obj); } @@ -108,15 +237,19 @@ zink_resource_destroy(struct pipe_screen *pscreen, if (pres->target == PIPE_BUFFER) { util_range_destroy(&res->valid_buffer_range); util_idalloc_mt_free(&screen->buffer_ids, res->base.buffer_id_unique); + assert(!_mesa_hash_table_num_entries(&res->bufferview_cache)); simple_mtx_destroy(&res->bufferview_mtx); - } else + ralloc_free(res->bufferview_cache.table); + } else { + assert(!_mesa_hash_table_num_entries(&res->surface_cache)); simple_mtx_destroy(&res->surface_mtx); + ralloc_free(res->surface_cache.table); + } /* no need to do anything for the caches, these objects own the resource lifetimes */ zink_resource_object_reference(screen, &res->obj, NULL); - zink_resource_object_reference(screen, &res->scanout_obj, NULL); threaded_resource_deinit(pres); - FREE(res); + FREE_CL(res); } static VkImageAspectFlags @@ -147,39 +280,71 @@ create_bci(struct zink_screen *screen, const struct pipe_resource *templ, unsign bci.flags = 0; assert(bci.size > 0); - bci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - - bci.usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT | - VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT; + if (bind & ZINK_BIND_DESCRIPTOR) { + /* gallium sizes are all uint32_t, while the total size of this buffer may exceed that limit */ + bci.usage = 0; + bci.usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT; + } else { + bci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + + bci.usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT; + } + if (screen->info.have_KHR_buffer_device_address) + bci.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; if (bind & PIPE_BIND_SHADER_IMAGE) bci.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + if (bind & PIPE_BIND_QUERY_BUFFER) + bci.usage |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT; + if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) - bci.flags |= VK_BUFFER_CREATE_SPARSE_BINDING_BIT; + bci.flags |= VK_BUFFER_CREATE_SPARSE_BINDING_BIT | VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT; return bci; } -static bool +typedef enum { + USAGE_FAIL_NONE, + USAGE_FAIL_ERROR, + USAGE_FAIL_SUBOPTIMAL, +} usage_fail; + +static usage_fail check_ici(struct zink_screen *screen, VkImageCreateInfo *ici, uint64_t modifier) { VkImageFormatProperties image_props; VkResult ret; + bool optimalDeviceAccess = true; assert(modifier == DRM_FORMAT_MOD_INVALID || (VKSCR(GetPhysicalDeviceImageFormatProperties2) && screen->info.have_EXT_image_drm_format_modifier)); if (VKSCR(GetPhysicalDeviceImageFormatProperties2)) { VkImageFormatProperties2 props2; props2.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2; props2.pNext = NULL; + VkSamplerYcbcrConversionImageFormatProperties ycbcr_props; + ycbcr_props.sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES; + ycbcr_props.pNext = NULL; + if (screen->info.have_KHR_sampler_ycbcr_conversion) + props2.pNext = &ycbcr_props; + VkHostImageCopyDevicePerformanceQueryEXT hic = { + VK_STRUCTURE_TYPE_HOST_IMAGE_COPY_DEVICE_PERFORMANCE_QUERY_EXT, + props2.pNext, + }; + if (screen->info.have_EXT_host_image_copy && ici->usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT) + props2.pNext = &hic; VkPhysicalDeviceImageFormatInfo2 info; info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2; + /* possibly VkImageFormatListCreateInfo */ + info.pNext = ici->pNext; info.format = ici->format; info.type = ici->imageType; info.tiling = ici->tiling; @@ -189,47 +354,84 @@ check_ici(struct zink_screen *screen, VkImageCreateInfo *ici, uint64_t modifier) VkPhysicalDeviceImageDrmFormatModifierInfoEXT mod_info; if (modifier != DRM_FORMAT_MOD_INVALID) { mod_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT; - mod_info.pNext = NULL; + mod_info.pNext = info.pNext; mod_info.drmFormatModifier = modifier; mod_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; mod_info.queueFamilyIndexCount = 0; + mod_info.pQueueFamilyIndices = NULL; info.pNext = &mod_info; - } else - info.pNext = NULL; + } ret = VKSCR(GetPhysicalDeviceImageFormatProperties2)(screen->pdev, &info, &props2); + /* this is using VK_IMAGE_CREATE_EXTENDED_USAGE_BIT and can't be validated */ + if (vk_format_aspects(ici->format) & VK_IMAGE_ASPECT_PLANE_1_BIT) + ret = VK_SUCCESS; image_props = props2.imageFormatProperties; + if (screen->info.have_EXT_host_image_copy && ici->usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT) + optimalDeviceAccess = hic.optimalDeviceAccess; } else ret = VKSCR(GetPhysicalDeviceImageFormatProperties)(screen->pdev, ici->format, ici->imageType, ici->tiling, ici->usage, ici->flags, &image_props); - return ret == VK_SUCCESS; + if (ret != VK_SUCCESS) + return USAGE_FAIL_ERROR; + if (ici->extent.depth > image_props.maxExtent.depth || + ici->extent.height > image_props.maxExtent.height || + ici->extent.width > image_props.maxExtent.width) + return USAGE_FAIL_ERROR; + if (ici->mipLevels > image_props.maxMipLevels) + return USAGE_FAIL_ERROR; + if (ici->arrayLayers > image_props.maxArrayLayers) + return USAGE_FAIL_ERROR; + if (!(ici->samples & image_props.sampleCounts)) + return USAGE_FAIL_ERROR; + if (!optimalDeviceAccess) + return USAGE_FAIL_SUBOPTIMAL; + return USAGE_FAIL_NONE; } static VkImageUsageFlags -get_image_usage_for_feats(struct zink_screen *screen, VkFormatFeatureFlags feats, const struct pipe_resource *templ, unsigned bind) +get_image_usage_for_feats(struct zink_screen *screen, VkFormatFeatureFlags2 feats, const struct pipe_resource *templ, unsigned bind, bool *need_extended) { VkImageUsageFlags usage = 0; - /* sadly, gallium doesn't let us know if it'll ever need this, so we have to assume */ - if (feats & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT) - usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; - if (feats & VK_FORMAT_FEATURE_TRANSFER_DST_BIT) - usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; - if (feats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT && (bind & (PIPE_BIND_LINEAR | PIPE_BIND_SHARED)) != (PIPE_BIND_LINEAR | PIPE_BIND_SHARED)) - usage |= VK_IMAGE_USAGE_SAMPLED_BIT; - - if ((templ->nr_samples <= 1 || screen->info.feats.features.shaderStorageImageMultisample) && - (bind & PIPE_BIND_SHADER_IMAGE)) { - if (feats & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) + bool is_planar = util_format_get_num_planes(templ->format) > 1; + *need_extended = false; + + if (bind & ZINK_BIND_TRANSIENT) + usage |= VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT; + else { + /* sadly, gallium doesn't let us know if it'll ever need this, so we have to assume */ + if (is_planar || (feats & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)) + usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + if (is_planar || (feats & VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) + usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + if (feats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + + if ((is_planar || (feats & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) && (bind & PIPE_BIND_SHADER_IMAGE)) { + assert(templ->nr_samples <= 1 || screen->info.feats.features.shaderStorageImageMultisample); usage |= VK_IMAGE_USAGE_STORAGE_BIT; + } } if (bind & PIPE_BIND_RENDER_TARGET) { if (feats & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) { usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - if ((bind & (PIPE_BIND_LINEAR | PIPE_BIND_SHARED)) != (PIPE_BIND_LINEAR | PIPE_BIND_SHARED)) + if (!(bind & ZINK_BIND_TRANSIENT) && (bind & (PIPE_BIND_LINEAR | PIPE_BIND_SHARED)) != (PIPE_BIND_LINEAR | PIPE_BIND_SHARED)) usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; - } else + if (!(bind & ZINK_BIND_TRANSIENT) && screen->info.have_EXT_attachment_feedback_loop_layout) + usage |= VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + } else { + /* trust that gallium isn't going to give us anything wild */ + *need_extended = true; + return 0; + } + } else if ((bind & PIPE_BIND_SAMPLER_VIEW) && !util_format_is_depth_or_stencil(templ->format)) { + if (!(feats & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { + /* ensure we can u_blitter this later */ + *need_extended = true; return 0; + } + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; } if (bind & PIPE_BIND_DEPTH_STENCIL) { @@ -237,6 +439,8 @@ get_image_usage_for_feats(struct zink_screen *screen, VkFormatFeatureFlags feats usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; else return 0; + if (screen->info.have_EXT_attachment_feedback_loop_layout && !(bind & ZINK_BIND_TRANSIENT)) + usage |= VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; /* this is unlikely to occur and has been included for completeness */ } else if (bind & PIPE_BIND_SAMPLER_VIEW && !(usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT)) { if (feats & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) @@ -245,11 +449,12 @@ get_image_usage_for_feats(struct zink_screen *screen, VkFormatFeatureFlags feats return 0; } - if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) - usage |= VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT; - if (bind & PIPE_BIND_STREAM_OUTPUT) usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + + if (screen->info.have_EXT_host_image_copy && feats & VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT) + usage |= VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT; + return usage; } @@ -265,50 +470,142 @@ find_modifier_feats(const struct zink_modifier_prop *prop, uint64_t modifier, ui return 0; } +/* check HIC optimalness */ +static bool +suboptimal_check_ici(struct zink_screen *screen, VkImageCreateInfo *ici, uint64_t *mod) +{ + usage_fail fail = check_ici(screen, ici, *mod); + if (!fail) + return true; + if (fail == USAGE_FAIL_SUBOPTIMAL) { + ici->usage &= ~VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT; + fail = check_ici(screen, ici, *mod); + if (!fail) + return true; + } + return false; +} + +/* If the driver can't do mutable with this ICI, then try again after removing mutable (and + * thus also the list of formats we might might mutate to) + */ +static bool +double_check_ici(struct zink_screen *screen, VkImageCreateInfo *ici, VkImageUsageFlags usage, uint64_t *mod) +{ + if (!usage) + return false; + + ici->usage = usage; + + if (suboptimal_check_ici(screen, ici, mod)) + return true; + usage_fail fail = check_ici(screen, ici, *mod); + if (!fail) + return true; + if (fail == USAGE_FAIL_SUBOPTIMAL) { + ici->usage &= ~VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT; + fail = check_ici(screen, ici, *mod); + if (!fail) + return true; + } + const void *pNext = ici->pNext; + if (pNext) { + VkBaseOutStructure *prev = NULL; + VkBaseOutStructure *fmt_list = NULL; + vk_foreach_struct(strct, (void*)ici->pNext) { + if (strct->sType == VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO) { + fmt_list = strct; + if (prev) { + prev->pNext = strct->pNext; + } else { + ici->pNext = strct->pNext; + } + fmt_list->pNext = NULL; + break; + } + prev = strct; + } + ici->flags &= ~VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + if (suboptimal_check_ici(screen, ici, mod)) + return true; + fmt_list->pNext = (void*)ici->pNext; + ici->pNext = fmt_list; + ici->flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + } + return false; +} + static VkImageUsageFlags -get_image_usage(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe_resource *templ, unsigned bind, unsigned modifiers_count, const uint64_t *modifiers, uint64_t *mod) +get_image_usage(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe_resource *templ, unsigned bind, unsigned modifiers_count, uint64_t *modifiers, uint64_t *mod) { VkImageTiling tiling = ici->tiling; + bool need_extended = false; *mod = DRM_FORMAT_MOD_INVALID; if (modifiers_count) { bool have_linear = false; const struct zink_modifier_prop *prop = &screen->modifier_props[templ->format]; assert(tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT); + bool found = false; + uint64_t good_mod = 0; + VkImageUsageFlags good_usage = 0; for (unsigned i = 0; i < modifiers_count; i++) { if (modifiers[i] == DRM_FORMAT_MOD_LINEAR) { have_linear = true; + if (!screen->info.have_EXT_image_drm_format_modifier) + break; continue; } VkFormatFeatureFlags feats = find_modifier_feats(prop, modifiers[i], mod); if (feats) { - VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind); - if (usage) { - ici->usage = usage; - if (check_ici(screen, ici, *mod)) - return usage; + VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind, &need_extended); + assert(!need_extended); + if (double_check_ici(screen, ici, usage, mod)) { + if (!found) { + found = true; + good_mod = modifiers[i]; + good_usage = usage; + } + } else { + modifiers[i] = DRM_FORMAT_MOD_LINEAR; } } } + if (found) { + *mod = good_mod; + return good_usage; + } /* only try linear if no other options available */ if (have_linear) { VkFormatFeatureFlags feats = find_modifier_feats(prop, DRM_FORMAT_MOD_LINEAR, mod); if (feats) { - VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind); - if (usage) { - ici->usage = usage; - if (check_ici(screen, ici, *mod)) - return usage; - } + VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind, &need_extended); + assert(!need_extended); + if (double_check_ici(screen, ici, usage, mod)) + return usage; } } - } else - { - VkFormatProperties props = screen->format_props[templ->format]; - VkFormatFeatureFlags feats = tiling == VK_IMAGE_TILING_LINEAR ? props.linearTilingFeatures : props.optimalTilingFeatures; - VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind); - if (usage) { - ici->usage = usage; - if (check_ici(screen, ici, *mod)) + } else { + struct zink_format_props props = screen->format_props[templ->format]; + VkFormatFeatureFlags2 feats = tiling == VK_IMAGE_TILING_LINEAR ? props.linearTilingFeatures : props.optimalTilingFeatures; + if (ici->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) + feats = UINT32_MAX; + VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind, &need_extended); + if (need_extended) { + ici->flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT | VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + feats = UINT32_MAX; + usage = get_image_usage_for_feats(screen, feats, templ, bind, &need_extended); + } + if (double_check_ici(screen, ici, usage, mod)) + return usage; + if (util_format_is_depth_or_stencil(templ->format)) { + if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL)) { + usage &= ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + if (double_check_ici(screen, ici, usage, mod)) + return usage; + } + } else if (!(templ->bind & PIPE_BIND_RENDER_TARGET)) { + usage &= ~VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + if (double_check_ici(screen, ici, usage, mod)) return usage; } } @@ -317,24 +614,114 @@ get_image_usage(struct zink_screen *screen, VkImageCreateInfo *ici, const struct } static uint64_t -create_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe_resource *templ, bool dmabuf, unsigned bind, unsigned modifiers_count, const uint64_t *modifiers, bool *success) +eval_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe_resource *templ, unsigned bind, unsigned modifiers_count, uint64_t *modifiers, bool *success) +{ + /* sampleCounts will be set to VK_SAMPLE_COUNT_1_BIT if at least one of the following conditions is true: + * - flags contains VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT + * + * 44.1.1. Supported Sample Counts + */ + bool want_cube = ici->samples == 1 && + (templ->target == PIPE_TEXTURE_CUBE || + templ->target == PIPE_TEXTURE_CUBE_ARRAY || + (templ->target == PIPE_TEXTURE_2D_ARRAY && ici->extent.width == ici->extent.height && ici->arrayLayers >= 6)); + + if (ici->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) + modifiers_count = 0; + + bool first = true; + bool tried[2] = {0}; + uint64_t mod = DRM_FORMAT_MOD_INVALID; +retry: + while (!ici->usage) { + if (!first) { + switch (ici->tiling) { + case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT: + ici->tiling = VK_IMAGE_TILING_OPTIMAL; + modifiers_count = 0; + break; + case VK_IMAGE_TILING_OPTIMAL: + ici->tiling = VK_IMAGE_TILING_LINEAR; + break; + case VK_IMAGE_TILING_LINEAR: + if (bind & PIPE_BIND_LINEAR) { + *success = false; + return DRM_FORMAT_MOD_INVALID; + } + ici->tiling = VK_IMAGE_TILING_OPTIMAL; + break; + default: + unreachable("unhandled tiling mode"); + } + if (tried[ici->tiling]) { + if (ici->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) { + *success = false; + return DRM_FORMAT_MOD_INVALID; + } + ici->flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT | VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + tried[0] = false; + tried[1] = false; + first = true; + goto retry; + } + } + ici->usage = get_image_usage(screen, ici, templ, bind, modifiers_count, modifiers, &mod); + first = false; + if (ici->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) + tried[ici->tiling] = true; + } + if (want_cube) { + ici->flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + if ((get_image_usage(screen, ici, templ, bind, modifiers_count, modifiers, &mod) & ici->usage) != ici->usage) + ici->flags &= ~VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + } + + *success = true; + return mod; +} + +static void +init_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe_resource *templ, unsigned bind, unsigned modifiers_count) { ici->sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - ici->pNext = NULL; - ici->flags = modifiers_count || dmabuf || bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DEPTH_STENCIL) ? 0 : VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + /* pNext may already be set */ + if (util_format_get_num_planes(templ->format) > 1) + ici->flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; + else if (bind & ZINK_BIND_MUTABLE) + ici->flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + else + ici->flags = 0; + if (ici->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) + /* unset VkImageFormatListCreateInfo if mutable */ + ici->pNext = NULL; + else if (ici->pNext) + /* add mutable if VkImageFormatListCreateInfo */ + ici->flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; ici->usage = 0; ici->queueFamilyIndexCount = 0; + ici->pQueueFamilyIndices = NULL; + + /* assume we're going to be doing some CompressedTexSubImage */ + if (util_format_is_compressed(templ->format) && (ici->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) && + !vk_find_struct_const(ici->pNext, IMAGE_FORMAT_LIST_CREATE_INFO)) + ici->flags |= VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT; + + if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) + ici->flags |= VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT; + bool need_2D = false; switch (templ->target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_1D_ARRAY: - ici->imageType = VK_IMAGE_TYPE_1D; + if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) + need_2D |= screen->need_2D_sparse; + if (util_format_is_depth_or_stencil(templ->format)) + need_2D |= screen->need_2D_zs; + ici->imageType = need_2D ? VK_IMAGE_TYPE_2D : VK_IMAGE_TYPE_1D; break; case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: - ici->flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; - FALLTHROUGH; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_RECT: @@ -343,8 +730,10 @@ create_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe case PIPE_TEXTURE_3D: ici->imageType = VK_IMAGE_TYPE_3D; - if (bind & PIPE_BIND_RENDER_TARGET) + if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE)) ici->flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; + if (screen->info.have_EXT_image_2d_view_of_3d) + ici->flags |= VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT; break; case PIPE_BUFFER: @@ -366,265 +755,208 @@ create_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe ici->mipLevels = templ->last_level + 1; ici->arrayLayers = MAX2(templ->array_size, 1); ici->samples = templ->nr_samples ? templ->nr_samples : VK_SAMPLE_COUNT_1_BIT; - ici->tiling = modifiers_count ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : bind & PIPE_BIND_LINEAR ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + ici->tiling = screen->info.have_EXT_image_drm_format_modifier && modifiers_count ? + VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : + bind & (PIPE_BIND_LINEAR | ZINK_BIND_DMABUF) ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + /* XXX: does this have perf implications anywhere? hopefully not */ + if (ici->samples == VK_SAMPLE_COUNT_1_BIT && + screen->info.have_EXT_multisampled_render_to_single_sampled && + ici->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) + ici->flags |= VK_IMAGE_CREATE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_BIT_EXT; ici->sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ici->initialLayout = dmabuf ? VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; - - if (templ->target == PIPE_TEXTURE_CUBE || - templ->target == PIPE_TEXTURE_CUBE_ARRAY || - (templ->target == PIPE_TEXTURE_2D_ARRAY && - ici->extent.width == ici->extent.height && - ici->arrayLayers >= 6)) { - VkImageFormatProperties props; - if (vkGetPhysicalDeviceImageFormatProperties(screen->pdev, ici->format, - ici->imageType, ici->tiling, - ici->usage, ici->flags | - VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT, - &props) == VK_SUCCESS) { - if (props.sampleCounts & ici->samples) - ici->flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; - } - } + ici->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; if (templ->target == PIPE_TEXTURE_CUBE) ici->arrayLayers *= 6; - - if (templ->usage == PIPE_USAGE_STAGING && - templ->format != PIPE_FORMAT_B4G4R4A4_UNORM && - templ->format != PIPE_FORMAT_B4G4R4A4_UINT) - ici->tiling = VK_IMAGE_TILING_LINEAR; - - bool first = true; - bool tried[2] = {0}; - uint64_t mod = DRM_FORMAT_MOD_INVALID; - while (!ici->usage) { - if (!first) { - switch (ici->tiling) { - case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT: - ici->tiling = VK_IMAGE_TILING_OPTIMAL; - modifiers_count = 0; - break; - case VK_IMAGE_TILING_OPTIMAL: - ici->tiling = VK_IMAGE_TILING_LINEAR; - break; - case VK_IMAGE_TILING_LINEAR: - if (bind & PIPE_BIND_LINEAR) { - *success = false; - return DRM_FORMAT_MOD_INVALID; - } - ici->tiling = VK_IMAGE_TILING_OPTIMAL; - break; - default: - unreachable("unhandled tiling mode"); - } - if (tried[ici->tiling]) { - *success = false; - return DRM_FORMAT_MOD_INVALID; - } - } - ici->usage = get_image_usage(screen, ici, templ, bind, modifiers_count, modifiers, &mod); - first = false; - if (ici->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) - tried[ici->tiling] = true; - } - - *success = true; - return mod; } -static struct zink_resource_object * -resource_object_create(struct zink_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle, bool *optimal_tiling, - const uint64_t *modifiers, int modifiers_count) +static inline bool +create_sampler_conversion(VkImageCreateInfo ici, struct zink_screen *screen, + struct zink_resource_object *obj) { - struct zink_resource_object *obj = CALLOC_STRUCT(zink_resource_object); - if (!obj) - return NULL; - - VkMemoryRequirements reqs; - VkMemoryPropertyFlags flags; - bool need_dedicated = false; - VkExternalMemoryHandleTypeFlags export_types = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; - - VkExternalMemoryHandleTypeFlags external = 0; - if (whandle) { - if (whandle->type == WINSYS_HANDLE_TYPE_FD) - external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; - else - unreachable("unknown handle type"); + if (obj->vkfeats & VK_FORMAT_FEATURE_DISJOINT_BIT) + ici.flags |= VK_IMAGE_CREATE_DISJOINT_BIT; + VkSamplerYcbcrConversionCreateInfo sycci = {0}; + sycci.sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO; + sycci.pNext = NULL; + sycci.format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; + sycci.ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709; + sycci.ycbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL; + sycci.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; + sycci.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; + sycci.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; + sycci.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + if (!obj->vkfeats || (obj->vkfeats & VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT)) { + sycci.xChromaOffset = VK_CHROMA_LOCATION_COSITED_EVEN; + sycci.yChromaOffset = VK_CHROMA_LOCATION_COSITED_EVEN; + } else { + assert(obj->vkfeats & VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT); + sycci.xChromaOffset = VK_CHROMA_LOCATION_MIDPOINT; + sycci.yChromaOffset = VK_CHROMA_LOCATION_MIDPOINT; } + sycci.chromaFilter = VK_FILTER_LINEAR; + sycci.forceExplicitReconstruction = VK_FALSE; + VkResult res = VKSCR(CreateSamplerYcbcrConversion)(screen->dev, &sycci, NULL, &obj->sampler_conversion); + if (res != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateSamplerYcbcrConversion failed"); + return false; + } + return true; +} - /* TODO: remove linear for wsi */ - bool scanout = templ->bind & PIPE_BIND_SCANOUT; - bool shared = templ->bind & PIPE_BIND_SHARED; - if (shared && screen->info.have_EXT_external_memory_dma_buf) - export_types |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; - - pipe_reference_init(&obj->reference, 1); - util_dynarray_init(&obj->tmp, NULL); - util_dynarray_init(&obj->desc_set_refs.refs, NULL); - if (templ->target == PIPE_BUFFER) { - VkBufferCreateInfo bci = create_bci(screen, templ, templ->bind); +static const VkImageAspectFlags plane_aspects[] = { + VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT, +}; - if (VKSCR(CreateBuffer)(screen->dev, &bci, NULL, &obj->buffer) != VK_SUCCESS) { - debug_printf("vkCreateBuffer failed\n"); - goto fail1; +static inline bool +get_image_memory_requirement(struct zink_screen *screen, struct zink_resource_object *obj, + unsigned num_planes, VkMemoryRequirements *reqs) +{ + bool need_dedicated = false; + if (VKSCR(GetImageMemoryRequirements2)) { + VkMemoryRequirements2 req2; + req2.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; + VkImageMemoryRequirementsInfo2 info2; + info2.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2; + info2.pNext = NULL; + info2.image = obj->image; + VkMemoryDedicatedRequirements ded; + ded.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS; + ded.pNext = NULL; + req2.pNext = &ded; + VkImagePlaneMemoryRequirementsInfo plane; + plane.sType = VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO; + plane.pNext = NULL; + if (num_planes > 1) + info2.pNext = &plane; + unsigned offset = 0; + for (unsigned i = 0; i < num_planes; i++) { + assert(i < ARRAY_SIZE(plane_aspects)); + plane.planeAspect = plane_aspects[i]; + VKSCR(GetImageMemoryRequirements2)(screen->dev, &info2, &req2); + if (!i) + reqs->alignment = req2.memoryRequirements.alignment; + obj->plane_offsets[i] = offset; + offset += req2.memoryRequirements.size; + reqs->size += req2.memoryRequirements.size; + reqs->memoryTypeBits |= req2.memoryRequirements.memoryTypeBits; + need_dedicated |= ded.prefersDedicatedAllocation || ded.requiresDedicatedAllocation; } - - VKSCR(GetBufferMemoryRequirements)(screen->dev, obj->buffer, &reqs); - if (templ->usage == PIPE_USAGE_STAGING) - flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - else if (templ->usage == PIPE_USAGE_STREAM) - flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - else if (templ->usage == PIPE_USAGE_IMMUTABLE) - flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - else - flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - obj->is_buffer = true; - obj->transfer_dst = true; } else { - bool winsys_modifier = shared && whandle && whandle->modifier != DRM_FORMAT_MOD_INVALID; - const uint64_t *ici_modifiers = winsys_modifier ? &whandle->modifier : modifiers; - unsigned ici_modifier_count = winsys_modifier ? 1 : modifiers_count; - bool success = false; - VkImageCreateInfo ici; - uint64_t mod = create_ici(screen, &ici, templ, !!external, templ->bind, ici_modifier_count, ici_modifiers, &success); - VkExternalMemoryImageCreateInfo emici; - VkImageDrmFormatModifierExplicitCreateInfoEXT idfmeci; - VkImageDrmFormatModifierListCreateInfoEXT idfmlci; - if (!success) - goto fail1; - - if (shared || external) { - emici.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; - emici.pNext = NULL; - emici.handleTypes = export_types; - ici.pNext = &emici; - - assert(ici.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT || mod != DRM_FORMAT_MOD_INVALID); - if (winsys_modifier && ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { - assert(mod == whandle->modifier); - idfmeci.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT; - idfmeci.pNext = ici.pNext; - idfmeci.drmFormatModifier = mod; - - /* TODO: store these values from other planes in their - * respective zink_resource, and walk the next-pointers to - * build up the planar array here instead. - */ - assert(util_format_get_num_planes(templ->format) == 1); - idfmeci.drmFormatModifierPlaneCount = 1; - VkSubresourceLayout plane_layout = { - .offset = whandle->offset, - .size = 0, - .rowPitch = whandle->stride, - .arrayPitch = 0, - .depthPitch = 0, - }; - idfmeci.pPlaneLayouts = &plane_layout; - - ici.pNext = &idfmeci; - } else if (ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { - idfmlci.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT; - idfmlci.pNext = ici.pNext; - idfmlci.drmFormatModifierCount = modifiers_count; - idfmlci.pDrmFormatModifiers = modifiers; - ici.pNext = &idfmlci; - } else if (ici.tiling == VK_IMAGE_TILING_OPTIMAL) { - // TODO: remove for wsi - if (!external) - ici.pNext = NULL; - scanout = false; - shared = false; - } - } - - if (optimal_tiling) - *optimal_tiling = ici.tiling == VK_IMAGE_TILING_OPTIMAL; - - if (ici.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) - obj->transfer_dst = true; + VKSCR(GetImageMemoryRequirements)(screen->dev, obj->image, reqs); + } + return need_dedicated; +} - if (ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) - obj->modifier_aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT; +static inline VkFormatFeatureFlags +get_format_feature_flags(VkImageCreateInfo ici, struct zink_screen *screen, const struct pipe_resource *templ) +{ + VkFormatFeatureFlags feats = 0; + switch (ici.tiling) { + case VK_IMAGE_TILING_LINEAR: + feats = screen->format_props[templ->format].linearTilingFeatures; + break; + case VK_IMAGE_TILING_OPTIMAL: + feats = screen->format_props[templ->format].optimalTilingFeatures; + break; + case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT: + feats = VK_FORMAT_FEATURE_FLAG_BITS_MAX_ENUM; + /* + If is tiling then VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, the value of + imageCreateFormatFeatures is found by calling vkGetPhysicalDeviceFormatProperties2 + with VkImageFormatProperties::format equal to VkImageCreateInfo::format and with + VkDrmFormatModifierPropertiesListEXT chained into VkImageFormatProperties2; by + collecting all members of the returned array + VkDrmFormatModifierPropertiesListEXT::pDrmFormatModifierProperties + whose drmFormatModifier belongs to imageCreateDrmFormatModifiers; and by taking the bitwise + intersection, over the collected array members, of drmFormatModifierTilingFeatures. + (The resultant imageCreateFormatFeatures may be empty). + * -Chapter 12. Resource Creation + */ + for (unsigned i = 0; i < screen->modifier_props[templ->format].drmFormatModifierCount; i++) + feats &= screen->modifier_props[templ->format].pDrmFormatModifierProperties[i].drmFormatModifierTilingFeatures; + break; + default: + unreachable("unknown tiling"); + } + return feats; +} - struct wsi_image_create_info image_wsi_info = { - VK_STRUCTURE_TYPE_WSI_IMAGE_CREATE_INFO_MESA, - NULL, - .scanout = true, - }; +#if !defined(_WIN32) + #define ZINK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_BIT VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT +#else + #define ZINK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_BIT VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +#endif - if ((screen->needs_mesa_wsi || screen->needs_mesa_flush_wsi) && scanout) { - image_wsi_info.pNext = ici.pNext; - ici.pNext = &image_wsi_info; - } - VkResult result = VKSCR(CreateImage)(screen->dev, &ici, NULL, &obj->image); - if (result != VK_SUCCESS) { - debug_printf("vkCreateImage failed\n"); - goto fail1; - } +struct mem_alloc_info { + struct winsys_handle *whandle; + VkMemoryPropertyFlags flags; + enum zink_alloc_flag aflags; + bool need_dedicated; + bool shared; + const void *user_mem; + VkExternalMemoryHandleTypeFlags external; + VkExternalMemoryHandleTypeFlags export_types; +}; - if (VKSCR(GetImageMemoryRequirements2)) { - VkMemoryRequirements2 req2; - req2.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; - VkImageMemoryRequirementsInfo2 info2; - info2.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2; - info2.pNext = NULL; - info2.image = obj->image; - VkMemoryDedicatedRequirements ded; - ded.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS; - ded.pNext = NULL; - req2.pNext = &ded; - VKSCR(GetImageMemoryRequirements2)(screen->dev, &info2, &req2); - memcpy(&reqs, &req2.memoryRequirements, sizeof(VkMemoryRequirements)); - need_dedicated = ded.prefersDedicatedAllocation || ded.requiresDedicatedAllocation; +static inline bool +get_export_flags(struct zink_screen *screen, const struct pipe_resource *templ, struct mem_alloc_info *alloc_info) +{ + bool needs_export = (templ->bind & (ZINK_BIND_VIDEO | ZINK_BIND_DMABUF)) != 0; + if (alloc_info->whandle) { + if (alloc_info->whandle->type == WINSYS_HANDLE_TYPE_FD || + alloc_info->whandle->type == ZINK_EXTERNAL_MEMORY_HANDLE) + needs_export |= true; + else + unreachable("unknown handle type"); + } + if (needs_export) { + if (alloc_info->whandle && alloc_info->whandle->type == ZINK_EXTERNAL_MEMORY_HANDLE) { + alloc_info->external = ZINK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_BIT; + } else if (screen->info.have_EXT_external_memory_dma_buf) { + alloc_info->external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + alloc_info->export_types |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; } else { - VKSCR(GetImageMemoryRequirements)(screen->dev, obj->image, &reqs); + return false; } - if (templ->usage == PIPE_USAGE_STAGING && ici.tiling == VK_IMAGE_TILING_LINEAR) - flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - else - flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - - obj->vkflags = ici.flags; - obj->vkusage = ici.usage; } - obj->alignment = reqs.alignment; + if (alloc_info->user_mem) { + assert(!alloc_info->whandle); + alloc_info->external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; + alloc_info->export_types = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; + } + /* we may export WINSYS_HANDLE_TYPE_FD handle which is dma-buf */ + if (templ->bind & PIPE_BIND_SHARED && screen->info.have_EXT_external_memory_dma_buf) + alloc_info->export_types |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + return true; +} - if (templ->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT || templ->usage == PIPE_USAGE_DYNAMIC) - flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - else if (!(flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && - templ->usage == PIPE_USAGE_STAGING) - flags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; +enum resource_object_create_result { + roc_success, + roc_success_early_return, + roc_fail_and_free_object, + roc_fail_and_cleanup_object, + roc_fail_and_cleanup_all +}; +static inline enum resource_object_create_result +allocate_bo(struct zink_screen *screen, const struct pipe_resource *templ, + VkMemoryRequirements *reqs, struct zink_resource_object *obj, + struct mem_alloc_info *alloc_info) +{ VkMemoryAllocateInfo mai; - enum zink_alloc_flag aflags = templ->flags & PIPE_RESOURCE_FLAG_SPARSE ? ZINK_ALLOC_SPARSE : 0; mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; mai.pNext = NULL; - mai.allocationSize = reqs.size; - enum zink_heap heap = zink_heap_from_domain_flags(flags, aflags); - mai.memoryTypeIndex = screen->heap_map[heap]; - if (unlikely(!(reqs.memoryTypeBits & BITFIELD_BIT(mai.memoryTypeIndex)))) { - /* not valid based on reqs; demote to more compatible type */ - switch (heap) { - case ZINK_HEAP_DEVICE_LOCAL_VISIBLE: - heap = ZINK_HEAP_DEVICE_LOCAL; - break; - case ZINK_HEAP_HOST_VISIBLE_CACHED: - heap = ZINK_HEAP_HOST_VISIBLE_COHERENT; - break; - default: - break; - } - mai.memoryTypeIndex = screen->heap_map[heap]; - assert(reqs.memoryTypeBits & BITFIELD_BIT(mai.memoryTypeIndex)); + mai.allocationSize = reqs->size; + enum zink_heap heap = zink_heap_from_domain_flags(alloc_info->flags, alloc_info->aflags); + if (templ->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) { + if (!(vk_domain_from_heap(heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) + heap = zink_heap_from_domain_flags(alloc_info->flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, alloc_info->aflags); } - VkMemoryType mem_type = screen->info.mem_props.memoryTypes[mai.memoryTypeIndex]; - obj->coherent = mem_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE)) - obj->host_visible = mem_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - VkMemoryDedicatedAllocateInfo ded_alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, .pNext = mai.pNext, @@ -632,85 +964,595 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t .buffer = VK_NULL_HANDLE, }; - if (screen->info.have_KHR_dedicated_allocation && need_dedicated) { + if (screen->info.have_KHR_dedicated_allocation && alloc_info->need_dedicated) { ded_alloc_info.pNext = mai.pNext; mai.pNext = &ded_alloc_info; } VkExportMemoryAllocateInfo emai; - if (templ->bind & PIPE_BIND_SHARED && shared) { + if ((templ->bind & ZINK_BIND_VIDEO) || ((templ->bind & PIPE_BIND_SHARED) && alloc_info->shared) || (templ->bind & ZINK_BIND_DMABUF)) { emai.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; - emai.handleTypes = export_types; + emai.handleTypes = alloc_info->export_types; emai.pNext = mai.pNext; mai.pNext = &emai; + obj->exportable = true; } +#ifdef ZINK_USE_DMABUF + +#if !defined(_WIN32) VkImportMemoryFdInfoKHR imfi = { VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, NULL, }; - if (whandle) { + if (alloc_info->whandle) { imfi.pNext = NULL; - imfi.handleType = external; - imfi.fd = os_dupfd_cloexec(whandle->handle); + imfi.handleType = alloc_info->external; + imfi.fd = os_dupfd_cloexec(alloc_info->whandle->handle); if (imfi.fd < 0) { mesa_loge("ZINK: failed to dup dmabuf fd: %s\n", strerror(errno)); - goto fail1; + return roc_fail_and_cleanup_object; } imfi.pNext = mai.pNext; mai.pNext = &imfi; } - - struct wsi_memory_allocate_info memory_wsi_info = { - VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA, +#else + VkImportMemoryWin32HandleInfoKHR imfi = { + VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR, NULL, }; - if (screen->needs_mesa_wsi && scanout) { - memory_wsi_info.implicit_sync = true; + if (alloc_info->whandle) { + HANDLE source_target = GetCurrentProcess(); + HANDLE out_handle; + + bool result = DuplicateHandle(source_target, alloc_info->whandle->handle, source_target, &out_handle, 0, false, DUPLICATE_SAME_ACCESS); - memory_wsi_info.pNext = mai.pNext; - mai.pNext = &memory_wsi_info; + if (!result || !out_handle) { + mesa_loge("ZINK: failed to DuplicateHandle with winerr: %08x\n", (int)GetLastError()); + return roc_fail_and_cleanup_object; + } + + imfi.pNext = NULL; + imfi.handleType = alloc_info->external; + imfi.handle = out_handle; + + imfi.pNext = mai.pNext; + mai.pNext = &imfi; } +#endif - unsigned alignment = MAX2(reqs.alignment, 256); +#endif + + VkImportMemoryHostPointerInfoEXT imhpi = { + VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + NULL, + }; + if (alloc_info->user_mem) { + imhpi.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; + imhpi.pHostPointer = (void*)alloc_info->user_mem; + imhpi.pNext = mai.pNext; + mai.pNext = &imhpi; + } + + unsigned alignment = MAX2(reqs->alignment, 256); if (templ->usage == PIPE_USAGE_STAGING && obj->is_buffer) alignment = MAX2(alignment, screen->info.props.limits.minMemoryMapAlignment); obj->alignment = alignment; - obj->bo = zink_bo(zink_bo_create(screen, reqs.size, alignment, heap, mai.pNext ? ZINK_ALLOC_NO_SUBALLOC : 0, mai.pNext)); - if (!obj->bo) - goto fail2; - if (aflags == ZINK_ALLOC_SPARSE) { + + if (zink_mem_type_idx_from_types(screen, heap, reqs->memoryTypeBits) == UINT32_MAX) { + /* not valid based on reqs; demote to more compatible type */ + switch (heap) { + case ZINK_HEAP_DEVICE_LOCAL_VISIBLE: + heap = ZINK_HEAP_DEVICE_LOCAL; + break; + case ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED: + heap = ZINK_HEAP_HOST_VISIBLE_COHERENT; + break; + default: + break; + } + assert(zink_mem_type_idx_from_types(screen, heap, reqs->memoryTypeBits) != UINT32_MAX); + } + + while (1) { + /* iterate over all available memory types to reduce chance of oom */ + for (unsigned i = 0; !obj->bo && i < screen->heap_count[heap]; i++) { + if (!(reqs->memoryTypeBits & BITFIELD_BIT(screen->heap_map[heap][i]))) + continue; + + mai.memoryTypeIndex = screen->heap_map[heap][i]; + obj->bo = zink_bo(zink_bo_create(screen, reqs->size, alignment, heap, mai.pNext ? ZINK_ALLOC_NO_SUBALLOC : 0, mai.memoryTypeIndex, mai.pNext)); + } + + if (obj->bo || heap != ZINK_HEAP_DEVICE_LOCAL_VISIBLE) + break; + + /* demote BAR allocations to a different heap on failure to avoid oom */ + if (templ->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT || templ->usage == PIPE_USAGE_DYNAMIC) + heap = ZINK_HEAP_HOST_VISIBLE_COHERENT; + else + heap = ZINK_HEAP_DEVICE_LOCAL; + }; + + return obj->bo ? roc_success : roc_fail_and_cleanup_object; +} + +static inline bool +update_alloc_info_flags(struct zink_screen *screen, const struct pipe_resource *templ, + VkMemoryRequirements *reqs, struct mem_alloc_info *alloc_info) +{ + if (templ->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT || templ->usage == PIPE_USAGE_DYNAMIC) + alloc_info->flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + else if (!(alloc_info->flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && + templ->usage == PIPE_USAGE_STAGING) + alloc_info->flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if (templ->bind & ZINK_BIND_TRANSIENT) + alloc_info->flags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; + + if (alloc_info->user_mem) { + VkExternalMemoryHandleTypeFlagBits handle_type = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; + VkMemoryHostPointerPropertiesEXT memory_host_pointer_properties = {0}; + memory_host_pointer_properties.sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT; + memory_host_pointer_properties.pNext = NULL; + VkResult res = VKSCR(GetMemoryHostPointerPropertiesEXT)(screen->dev, handle_type, alloc_info->user_mem, &memory_host_pointer_properties); + if (res != VK_SUCCESS) { + mesa_loge("ZINK: vkGetMemoryHostPointerPropertiesEXT failed"); + return false; + } + reqs->memoryTypeBits &= memory_host_pointer_properties.memoryTypeBits; + alloc_info->flags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + + alloc_info->aflags = templ->flags & PIPE_RESOURCE_FLAG_SPARSE ? ZINK_ALLOC_SPARSE : 0; + return true; +} + +static inline void +update_obj_info(struct zink_screen *screen, struct zink_resource_object *obj, + const struct pipe_resource *templ, struct mem_alloc_info *alloc_info) +{ + if (alloc_info->aflags == ZINK_ALLOC_SPARSE) { obj->size = templ->width0; } else { obj->offset = zink_bo_get_offset(obj->bo); obj->size = zink_bo_get_size(obj->bo); } - if (templ->target == PIPE_BUFFER) { - if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE)) - if (VKSCR(BindBufferMemory)(screen->dev, obj->buffer, zink_bo_get_mem(obj->bo), obj->offset) != VK_SUCCESS) - goto fail3; + obj->coherent = screen->info.mem_props.memoryTypes[obj->bo->base.base.placement].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE)) { + obj->host_visible = screen->info.mem_props.memoryTypes[obj->bo->base.base.placement].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } +} + +static inline void +debug_resource_mem(struct zink_resource_object *obj, const struct pipe_resource *templ, struct zink_screen *screen) +{ + char buf[4096]; + unsigned idx = 0; + if (obj->is_buffer) { + size_t size = (size_t)DIV_ROUND_UP(obj->size, 1024); + if (templ->bind == PIPE_BIND_QUERY_BUFFER && templ->usage == PIPE_USAGE_STAGING) //internal qbo + idx += snprintf(buf, sizeof(buf), "QBO(%zu)", size); + else + idx += snprintf(buf, sizeof(buf), "BUF(%zu)", size); + } else { + idx += snprintf(buf, sizeof(buf), "IMG(%s:%ux%ux%u)", util_format_short_name(templ->format), templ->width0, templ->height0, templ->depth0); + } + /* + zink_vkflags_func flag_func = obj->is_buffer ? (zink_vkflags_func)vk_BufferCreateFlagBits_to_str : (zink_vkflags_func)vk_ImageCreateFlagBits_to_str; + zink_vkflags_func usage_func = obj->is_buffer ? (zink_vkflags_func)vk_BufferUsageFlagBits_to_str : (zink_vkflags_func)vk_ImageUsageFlagBits_to_str; + if (obj->vkflags) { + buf[idx++] = '['; + idx += zink_string_vkflags_unroll(&buf[idx], sizeof(buf) - idx, obj->vkflags, flag_func); + buf[idx++] = ']'; + } + if (obj->vkusage) { + buf[idx++] = '['; + idx += zink_string_vkflags_unroll(&buf[idx], sizeof(buf) - idx, obj->vkusage, usage_func); + buf[idx++] = ']'; + } + */ + buf[idx] = 0; + obj->bo->name = zink_debug_mem_add(screen, obj->size, buf); +} + +static inline enum resource_object_create_result +allocate_bo_and_update_obj(struct zink_screen *screen, const struct pipe_resource *templ, + VkMemoryRequirements *reqs, struct zink_resource_object *obj, + struct mem_alloc_info *alloc_info) +{ + if (!update_alloc_info_flags(screen, templ, reqs, alloc_info)) + return roc_fail_and_free_object; + + enum resource_object_create_result retval = allocate_bo(screen, templ, reqs, obj, alloc_info); + assert(retval != roc_success_early_return); + if (retval != roc_success) + return retval; + + update_obj_info(screen, obj, templ, alloc_info); + + if (zink_debug & ZINK_DEBUG_MEM) + debug_resource_mem(obj, templ, screen); + return roc_success; +} + +static inline enum resource_object_create_result +create_buffer(struct zink_screen *screen, struct zink_resource_object *obj, + const struct pipe_resource *templ, uint64_t *modifiers, + int modifiers_count, struct mem_alloc_info *alloc_info) +{ + VkBufferCreateInfo bci = create_bci(screen, templ, templ->bind); + VkExternalMemoryBufferCreateInfo embci; + VkMemoryRequirements reqs = {0}; + + embci.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO; + if (alloc_info->external) { + embci.pNext = bci.pNext; + embci.handleTypes = alloc_info->export_types; + bci.pNext = &embci; + } + + if (VKSCR(CreateBuffer)(screen->dev, &bci, NULL, &obj->buffer) != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateBuffer failed"); + return roc_fail_and_free_object; + } + + if (!(templ->bind & (PIPE_BIND_SHADER_IMAGE | ZINK_BIND_DESCRIPTOR))) { + bci.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + if (VKSCR(CreateBuffer)(screen->dev, &bci, NULL, &obj->storage_buffer) != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateBuffer failed"); + VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL); + return roc_fail_and_free_object; + } + } + + if (modifiers_count) { + assert(modifiers_count == 3); + /* this is the DGC path because there's no other way to pass mem bits and I don't wanna copy/paste everything around */ + reqs.size = modifiers[0]; + reqs.alignment = modifiers[1]; + reqs.memoryTypeBits = modifiers[2]; } else { - if (VKSCR(BindImageMemory)(screen->dev, obj->image, zink_bo_get_mem(obj->bo), obj->offset) != VK_SUCCESS) - goto fail3; + VKSCR(GetBufferMemoryRequirements)(screen->dev, obj->buffer, &reqs); } - return obj; -fail3: - zink_bo_unref(screen, obj->bo); + if (templ->usage == PIPE_USAGE_STAGING) + alloc_info->flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + else if (templ->usage == PIPE_USAGE_STREAM) + alloc_info->flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + else if (templ->usage == PIPE_USAGE_IMMUTABLE) + alloc_info->flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + alloc_info->flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + obj->is_buffer = true; + obj->transfer_dst = true; + obj->vkflags = bci.flags; + obj->vkusage = bci.usage; + + enum resource_object_create_result retval = allocate_bo_and_update_obj(screen, templ, &reqs, obj, alloc_info); + assert(retval != roc_success_early_return); + if (retval != roc_success) + return retval; + + if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE)) { + if (VKSCR(BindBufferMemory)(screen->dev, obj->buffer, zink_bo_get_mem(obj->bo), obj->offset) != VK_SUCCESS) { + mesa_loge("ZINK: vkBindBufferMemory failed"); + return roc_fail_and_cleanup_all ; + } + if (obj->storage_buffer && VKSCR(BindBufferMemory)(screen->dev, obj->storage_buffer, zink_bo_get_mem(obj->bo), obj->offset) != VK_SUCCESS) { + mesa_loge("ZINK: vkBindBufferMemory failed"); + return roc_fail_and_cleanup_all; + } + } + return roc_success; +} -fail2: - if (templ->target == PIPE_BUFFER) - VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL); +static inline enum resource_object_create_result +create_image(struct zink_screen *screen, struct zink_resource_object *obj, + const struct pipe_resource *templ, bool *linear, + uint64_t *modifiers, int modifiers_count, + struct mem_alloc_info *alloc_info) +{ + VkMemoryRequirements reqs = {0}; + bool winsys_modifier = (alloc_info->export_types & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT) && + alloc_info->whandle && + alloc_info->whandle->modifier != DRM_FORMAT_MOD_INVALID; + uint64_t *ici_modifiers = winsys_modifier ? &alloc_info->whandle->modifier : modifiers; + unsigned ici_modifier_count = winsys_modifier ? 1 : modifiers_count; + VkImageCreateInfo ici; + enum pipe_format srgb = PIPE_FORMAT_NONE; + /* we often need to be able to mutate between srgb and linear, but we don't need general + * image view/shader image format compatibility (that path means losing fast clears or compression on some hardware). + */ + if (!(templ->bind & ZINK_BIND_MUTABLE)) { + srgb = util_format_is_srgb(templ->format) ? util_format_linear(templ->format) : util_format_srgb(templ->format); + /* why do these helpers have different default return values? */ + if (srgb == templ->format) + srgb = PIPE_FORMAT_NONE; + } + VkFormat formats[2]; + VkImageFormatListCreateInfo format_list; + if (srgb) { + formats[0] = zink_get_format(screen, templ->format); + formats[1] = zink_get_format(screen, srgb); + /* only use format list if both formats have supported vk equivalents */ + if (formats[0] && formats[1]) { + format_list.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO; + format_list.pNext = NULL; + format_list.viewFormatCount = 2; + format_list.pViewFormats = formats; + ici.pNext = &format_list; + } else { + ici.pNext = NULL; + } + } else { + ici.pNext = NULL; + } + init_ici(screen, &ici, templ, templ->bind, ici_modifier_count); + + bool success = false; + uint64_t mod = eval_ici(screen, &ici, templ, templ->bind, ici_modifier_count, ici_modifiers, &success); + if (ici.format == VK_FORMAT_A8_UNORM_KHR && !success) { + ici.format = zink_get_format(screen, zink_format_get_emulated_alpha(templ->format)); + mod = eval_ici(screen, &ici, templ, templ->bind, ici_modifier_count, ici_modifiers, &success); + } + if (!success) + return roc_fail_and_free_object; + + if (ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && srgb && + util_format_get_nr_components(srgb) == 4 && + !(ici.flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) { + mesa_loge("zink: refusing to create possibly-srgb dmabuf due to missing driver support: %s not supported!", util_format_name(srgb)); + return roc_fail_and_free_object; + } + VkExternalMemoryImageCreateInfo emici; + VkImageDrmFormatModifierExplicitCreateInfoEXT idfmeci; + VkImageDrmFormatModifierListCreateInfoEXT idfmlci; + VkSubresourceLayout plane_layouts[4]; + VkSubresourceLayout plane_layout = { + .offset = alloc_info->whandle ? alloc_info->whandle->offset : 0, + .size = 0, + .rowPitch = alloc_info->whandle ? alloc_info->whandle->stride : 0, + .arrayPitch = 0, + .depthPitch = 0, + }; + + obj->render_target = (ici.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) != 0; + + if (alloc_info->shared || alloc_info->external) { + emici.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; + emici.pNext = ici.pNext; + emici.handleTypes = alloc_info->export_types; + ici.pNext = &emici; + + assert(ici.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT || mod != DRM_FORMAT_MOD_INVALID); + if (alloc_info->whandle && ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { + assert(mod == alloc_info->whandle->modifier || !winsys_modifier); + idfmeci.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT; + idfmeci.pNext = ici.pNext; + idfmeci.drmFormatModifier = mod; + idfmeci.drmFormatModifierPlaneCount = obj->plane_count; + + plane_layouts[0] = plane_layout; + struct pipe_resource *pnext = templ->next; + for (unsigned i = 1; i < obj->plane_count; i++, pnext = pnext->next) { + struct zink_resource *next = zink_resource(pnext); + obj->plane_offsets[i] = plane_layouts[i].offset = next->obj->plane_offsets[i]; + obj->plane_strides[i] = plane_layouts[i].rowPitch = next->obj->plane_strides[i]; + plane_layouts[i].size = 0; + plane_layouts[i].arrayPitch = 0; + plane_layouts[i].depthPitch = 0; + } + idfmeci.pPlaneLayouts = plane_layouts; + + ici.pNext = &idfmeci; + } else if (ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { + idfmlci.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT; + idfmlci.pNext = ici.pNext; + idfmlci.drmFormatModifierCount = modifiers_count; + idfmlci.pDrmFormatModifiers = modifiers; + ici.pNext = &idfmlci; + } else if (ici.tiling == VK_IMAGE_TILING_OPTIMAL) { + alloc_info->shared = false; + } + } else if (alloc_info->user_mem) { + emici.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; + emici.pNext = ici.pNext; + emici.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; + ici.pNext = &emici; + } + + if (linear) + *linear = ici.tiling == VK_IMAGE_TILING_LINEAR; + + if (ici.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) + obj->transfer_dst = true; + +#if defined(ZINK_USE_DMABUF) && !defined(_WIN32) + if (obj->is_aux) { + obj->modifier = mod; + obj->modifier_aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT << alloc_info->whandle->plane; + obj->plane_offsets[alloc_info->whandle->plane] = alloc_info->whandle->offset; + obj->plane_strides[alloc_info->whandle->plane] = alloc_info->whandle->stride; + obj->handle = os_dupfd_cloexec(alloc_info->whandle->handle); + if (obj->handle < 0) { + mesa_loge("ZINK: failed to dup dmabuf fd: %s\n", strerror(errno)); + return roc_fail_and_free_object; + } + return roc_success_early_return; + } +#endif + + obj->vkfeats = get_format_feature_flags(ici, screen, templ);; + if (util_format_is_yuv(templ->format)) { + if (!create_sampler_conversion(ici, screen, obj)) + return roc_fail_and_free_object; + } else if (alloc_info->whandle) { + obj->plane_strides[alloc_info->whandle->plane] = alloc_info->whandle->stride; + } + + VkResult result = VKSCR(CreateImage)(screen->dev, &ici, NULL, &obj->image); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateImage failed (%s)", vk_Result_to_str(result)); + return roc_fail_and_free_object; + } + + if (ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { + VkImageDrmFormatModifierPropertiesEXT modprops = {0}; + modprops.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT; + result = VKSCR(GetImageDrmFormatModifierPropertiesEXT)(screen->dev, obj->image, &modprops); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkGetImageDrmFormatModifierPropertiesEXT failed"); + return roc_fail_and_free_object; + } + obj->modifier = modprops.drmFormatModifier; + unsigned num_dmabuf_planes = screen->base.get_dmabuf_modifier_planes(&screen->base, obj->modifier, templ->format); + obj->modifier_aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT; + if (num_dmabuf_planes > 1) + obj->modifier_aspect |= VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT; + if (num_dmabuf_planes > 2) + obj->modifier_aspect |= VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT; + if (num_dmabuf_planes > 3) + obj->modifier_aspect |= VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT; + assert(num_dmabuf_planes <= 4); + } + + unsigned num_planes = util_format_get_num_planes(templ->format); + alloc_info->need_dedicated = get_image_memory_requirement(screen, obj, num_planes, &reqs); + if (templ->usage == PIPE_USAGE_STAGING && ici.tiling == VK_IMAGE_TILING_LINEAR) + alloc_info->flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; else - VKSCR(DestroyImage)(screen->dev, obj->image, NULL); -fail1: - FREE(obj); - return NULL; + alloc_info->flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + obj->vkflags = ici.flags; + obj->vkusage = ici.usage; + + enum resource_object_create_result retval = allocate_bo_and_update_obj(screen, templ, &reqs, obj, alloc_info); + assert(retval != roc_success_early_return); + if (retval != roc_success) + return retval; + + if (num_planes > 1) { + VkBindImageMemoryInfo infos[3]; + VkBindImagePlaneMemoryInfo planes[3]; + for (unsigned i = 0; i < num_planes; i++) { + infos[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; + infos[i].image = obj->image; + infos[i].memory = zink_bo_get_mem(obj->bo); + infos[i].memoryOffset = obj->plane_offsets[i]; + if (templ->bind & ZINK_BIND_VIDEO) { + infos[i].pNext = &planes[i]; + planes[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO; + planes[i].pNext = NULL; + planes[i].planeAspect = plane_aspects[i]; + } + } + if (VKSCR(BindImageMemory2)(screen->dev, num_planes, infos) != VK_SUCCESS) { + mesa_loge("ZINK: vkBindImageMemory2 failed"); + return roc_fail_and_cleanup_all; + } + } else { + if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE)) + if (VKSCR(BindImageMemory)(screen->dev, obj->image, zink_bo_get_mem(obj->bo), obj->offset) != VK_SUCCESS) { + mesa_loge("ZINK: vkBindImageMemory failed"); + return roc_fail_and_cleanup_all; + } + } + + return roc_success; +} + +static struct zink_resource_object * +resource_object_create(struct zink_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle, bool *linear, + uint64_t *modifiers, int modifiers_count, const void *loader_private, const void *user_mem) +{ + struct zink_resource_object *obj = CALLOC_STRUCT(zink_resource_object); + unsigned max_level = 0; + if (!obj) + return NULL; + simple_mtx_init(&obj->view_lock, mtx_plain); + util_dynarray_init(&obj->views, NULL); + u_rwlock_init(&obj->copy_lock); + obj->unordered_read = true; + obj->unordered_write = true; + obj->unsync_access = true; + obj->last_dt_idx = obj->dt_idx = UINT32_MAX; //TODO: unionize + + struct mem_alloc_info alloc_info = { + .whandle = whandle, + .need_dedicated = false, + .export_types = ZINK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_BIT, + .shared = templ->bind & PIPE_BIND_SHARED, + .user_mem = user_mem + }; + + /* figure out aux plane count */ + if (whandle && whandle->plane >= util_format_get_num_planes(whandle->format)) + obj->is_aux = true; + struct pipe_resource *pnext = templ->next; + for (obj->plane_count = 1; pnext; obj->plane_count++, pnext = pnext->next) { + struct zink_resource *next = zink_resource(pnext); + if (!next->obj->is_aux) + break; + } + + if (!get_export_flags(screen, templ, &alloc_info)) { + /* can't export anything, fail early */ + return NULL; + } + + pipe_reference_init(&obj->reference, 1); + if (loader_private) { + obj->bo = CALLOC_STRUCT(zink_bo); + if (!obj->bo) { + mesa_loge("ZINK: failed to allocate obj->bo!"); + return NULL; + } + + obj->transfer_dst = true; + return obj; + } + + enum resource_object_create_result create_result; + if (templ->target == PIPE_BUFFER) { + max_level = 1; + create_result = create_buffer(screen, obj, templ, modifiers, modifiers_count, &alloc_info); + } else { + max_level = templ->last_level + 1; + create_result = create_image(screen, obj, templ, linear, modifiers, modifiers_count, + &alloc_info); + } + + switch (create_result) { + case roc_success: + for (unsigned i = 0; i < max_level; i++) + util_dynarray_init(&obj->copies[i], NULL); + FALLTHROUGH; + case roc_success_early_return: + return obj; + + case roc_fail_and_cleanup_all: + zink_bo_unref(screen, obj->bo); + FALLTHROUGH; + case roc_fail_and_cleanup_object: + if (templ->target == PIPE_BUFFER) { + VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL); + VKSCR(DestroyBuffer)(screen->dev, obj->storage_buffer, NULL); + } else + VKSCR(DestroyImage)(screen->dev, obj->image, NULL); + FALLTHROUGH; + case roc_fail_and_free_object: + FREE(obj); + return NULL; + default: + unreachable("Invalid create object result code"); + } } static struct pipe_resource * @@ -718,53 +1560,52 @@ resource_create(struct pipe_screen *pscreen, const struct pipe_resource *templ, struct winsys_handle *whandle, unsigned external_usage, - const uint64_t *modifiers, int modifiers_count) + const uint64_t *modifiers, int modifiers_count, + const void *loader_private, const void *user_mem) { struct zink_screen *screen = zink_screen(pscreen); - struct zink_resource *res = CALLOC_STRUCT(zink_resource); + struct zink_resource *res = CALLOC_STRUCT_CL(zink_resource); + + if (!res) { + mesa_loge("ZINK: failed to allocate res!"); + return NULL; + } - if (modifiers_count > 0) { + if (modifiers_count > 0 && screen->info.have_EXT_image_drm_format_modifier) { /* for rebinds */ res->modifiers_count = modifiers_count; res->modifiers = mem_dup(modifiers, modifiers_count * sizeof(uint64_t)); if (!res->modifiers) { - FREE(res); + FREE_CL(res); return NULL; } - /* TODO: remove this when multi-plane modifiers are supported */ - const struct zink_modifier_prop *prop = &screen->modifier_props[templ->format]; - for (unsigned i = 0; i < modifiers_count; i++) { - for (unsigned j = 0; j < prop->drmFormatModifierCount; j++) { - if (prop->pDrmFormatModifierProperties[j].drmFormatModifier == modifiers[i]) { - if (prop->pDrmFormatModifierProperties[j].drmFormatModifierPlaneCount != 1) - res->modifiers[i] = DRM_FORMAT_MOD_INVALID; - break; - } - } - } } res->base.b = *templ; - threaded_resource_init(&res->base.b); + bool allow_cpu_storage = (templ->target == PIPE_BUFFER) && + (templ->width0 < 0x1000); + threaded_resource_init(&res->base.b, allow_cpu_storage); pipe_reference_init(&res->base.b.reference, 1); res->base.b.screen = pscreen; - bool optimal_tiling = false; + bool linear = false; struct pipe_resource templ2 = *templ; - unsigned scanout_flags = templ->bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED); - if (!(templ->bind & PIPE_BIND_LINEAR)) - templ2.bind &= ~scanout_flags; - res->obj = resource_object_create(screen, &templ2, whandle, &optimal_tiling, NULL, 0); + if (templ2.flags & PIPE_RESOURCE_FLAG_SPARSE && + (util_res_sample_count(templ) == 1 || screen->info.feats.features.shaderStorageImageMultisample)) + templ2.bind |= PIPE_BIND_SHADER_IMAGE; + res->obj = resource_object_create(screen, &templ2, whandle, &linear, res->modifiers, res->modifiers_count, loader_private, user_mem); if (!res->obj) { free(res->modifiers); - FREE(res); + FREE_CL(res); return NULL; } + res->queue = VK_QUEUE_FAMILY_IGNORED; res->internal_format = templ->format; if (templ->target == PIPE_BUFFER) { util_range_init(&res->valid_buffer_range); + res->base.b.bind |= PIPE_BIND_SHADER_IMAGE; if (!screen->resizable_bar && templ->width0 >= 8196) { /* We don't want to evict buffers from VRAM by mapping them for CPU access, * because they might never be moved back again. If a buffer is large enough, @@ -775,39 +1616,90 @@ resource_create(struct pipe_screen *pscreen, */ res->base.b.flags |= PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY; } + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB || zink_debug & ZINK_DEBUG_DGC) + zink_resource_get_address(screen, res); } else { + if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) + res->base.b.bind |= PIPE_BIND_SHADER_IMAGE; + if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) { + uint32_t count = 1; + VKSCR(GetImageSparseMemoryRequirements)(screen->dev, res->obj->image, &count, &res->sparse); + res->base.b.nr_sparse_levels = res->sparse.imageMipTailFirstLod; + } res->format = zink_get_format(screen, templ->format); - res->dmabuf_acquire = whandle && whandle->type == WINSYS_HANDLE_TYPE_FD; - res->layout = res->dmabuf_acquire ? VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; - res->optimal_tiling = optimal_tiling; + if (templ->target == PIPE_TEXTURE_1D || templ->target == PIPE_TEXTURE_1D_ARRAY) { + res->need_2D = (screen->need_2D_zs && util_format_is_depth_or_stencil(templ->format)) || + (screen->need_2D_sparse && (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)); + } + res->dmabuf = whandle && whandle->type == WINSYS_HANDLE_TYPE_FD; + if (res->dmabuf) + res->queue = VK_QUEUE_FAMILY_FOREIGN_EXT; + res->layout = res->dmabuf ? VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; + res->linear = linear; res->aspect = aspect_from_format(templ->format); - if (scanout_flags && optimal_tiling) { - // TODO: remove for wsi - templ2 = res->base.b; - templ2.bind = scanout_flags | PIPE_BIND_LINEAR; - res->scanout_obj = resource_object_create(screen, &templ2, whandle, &optimal_tiling, res->modifiers, res->modifiers_count); - assert(!optimal_tiling); + } + + if (loader_private) { + if (templ->bind & PIPE_BIND_DISPLAY_TARGET) { + /* backbuffer */ + res->obj->dt = zink_kopper_displaytarget_create(screen, + res->base.b.bind, + res->base.b.format, + templ->width0, + templ->height0, + 64, loader_private, + &res->dt_stride); + if (!res->obj->dt) { + mesa_loge("zink: could not create swapchain"); + FREE(res->obj); + free(res->modifiers); + FREE_CL(res); + return NULL; + } + struct kopper_displaytarget *cdt = res->obj->dt; + if (cdt->swapchain->num_acquires) { + /* this should be a reused swapchain after a MakeCurrent dance that deleted the original resource */ + for (unsigned i = 0; i < cdt->swapchain->num_images; i++) { + if (!cdt->swapchain->images[i].acquired) + continue; + res->obj->dt_idx = i; + res->obj->image = cdt->swapchain->images[i].image; + res->layout = cdt->swapchain->images[i].layout; + } + } + } else { + /* frontbuffer */ + struct zink_resource *back = (void*)loader_private; + struct kopper_displaytarget *cdt = back->obj->dt; + cdt->refcount++; + assert(back->obj->dt); + res->obj->dt = back->obj->dt; } + struct kopper_displaytarget *cdt = res->obj->dt; + if (zink_kopper_has_srgb(cdt)) + res->obj->vkflags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + if (cdt->swapchain->scci.flags == VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR) + res->obj->vkflags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; + res->obj->vkusage = cdt->swapchain->scci.imageUsage; + res->base.b.bind |= PIPE_BIND_DISPLAY_TARGET; + res->linear = false; + res->swapchain = true; } - if (screen->winsys && (templ->bind & PIPE_BIND_DISPLAY_TARGET)) { - struct sw_winsys *winsys = screen->winsys; - res->dt = winsys->displaytarget_create(screen->winsys, - res->base.b.bind, - res->base.b.format, - templ->width0, - templ->height0, - 64, NULL, - &res->dt_stride); + if (!res->obj->host_visible) { + res->base.b.flags |= PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY; + res->base.allow_cpu_storage = false; } if (res->obj->is_buffer) { res->base.buffer_id_unique = util_idalloc_mt_alloc(&screen->buffer_ids); - _mesa_hash_table_init(&res->bufferview_cache, screen, NULL, equals_bvci); + _mesa_hash_table_init(&res->bufferview_cache, NULL, NULL, equals_bvci); simple_mtx_init(&res->bufferview_mtx, mtx_plain); } else { - _mesa_hash_table_init(&res->surface_cache, screen, NULL, equals_ivci); + _mesa_hash_table_init(&res->surface_cache, NULL, NULL, equals_ivci); simple_mtx_init(&res->surface_mtx, mtx_plain); } + if (res->obj->exportable) + res->base.b.bind |= ZINK_BIND_DMABUF; return &res->base.b; } @@ -815,14 +1707,68 @@ static struct pipe_resource * zink_resource_create(struct pipe_screen *pscreen, const struct pipe_resource *templ) { - return resource_create(pscreen, templ, NULL, 0, NULL, 0); + return resource_create(pscreen, templ, NULL, 0, NULL, 0, NULL, NULL); } static struct pipe_resource * zink_resource_create_with_modifiers(struct pipe_screen *pscreen, const struct pipe_resource *templ, const uint64_t *modifiers, int modifiers_count) { - return resource_create(pscreen, templ, NULL, 0, modifiers, modifiers_count); + return resource_create(pscreen, templ, NULL, 0, modifiers, modifiers_count, NULL, NULL); +} + +static struct pipe_resource * +zink_resource_create_drawable(struct pipe_screen *pscreen, + const struct pipe_resource *templ, + const void *loader_private) +{ + return resource_create(pscreen, templ, NULL, 0, NULL, 0, loader_private, NULL); +} + +static bool +add_resource_bind(struct zink_context *ctx, struct zink_resource *res, unsigned bind) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + assert((res->base.b.bind & bind) == 0); + res->base.b.bind |= bind; + struct zink_resource_object *old_obj = res->obj; + if (bind & ZINK_BIND_DMABUF && !res->modifiers_count && screen->info.have_EXT_image_drm_format_modifier) { + res->modifiers_count = 1; + res->modifiers = malloc(res->modifiers_count * sizeof(uint64_t)); + if (!res->modifiers) { + mesa_loge("ZINK: failed to allocate res->modifiers!"); + return false; + } + + res->modifiers[0] = DRM_FORMAT_MOD_LINEAR; + } + struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, &res->linear, res->modifiers, res->modifiers_count, NULL, NULL); + if (!new_obj) { + debug_printf("new backing resource alloc failed!\n"); + res->base.b.bind &= ~bind; + return false; + } + struct zink_resource staging = *res; + staging.obj = old_obj; + staging.all_binds = 0; + res->layout = VK_IMAGE_LAYOUT_UNDEFINED; + res->obj = new_obj; + res->queue = VK_QUEUE_FAMILY_IGNORED; + for (unsigned i = 0; i <= res->base.b.last_level; i++) { + struct pipe_box box; + u_box_3d(0, 0, 0, + u_minify(res->base.b.width0, i), + u_minify(res->base.b.height0, i), res->base.b.array_size, &box); + box.depth = util_num_layers(&res->base.b, i); + ctx->base.resource_copy_region(&ctx->base, &res->base.b, i, 0, 0, 0, &staging.base.b, i, &box); + } + if (old_obj->exportable) { + simple_mtx_lock(&ctx->batch.state->exportable_lock); + _mesa_set_remove_key(&ctx->batch.state->dmabuf_exports, &staging); + simple_mtx_unlock(&ctx->batch.state->exportable_lock); + } + zink_resource_object_reference(screen, &old_obj, NULL); + return true; } static bool @@ -837,14 +1783,37 @@ zink_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx, { struct zink_screen *screen = zink_screen(pscreen); struct zink_resource *res = zink_resource(pres); - //TODO: remove for wsi - struct zink_resource_object *obj = res->scanout_obj ? res->scanout_obj : res->obj; - VkImageAspectFlags aspect = obj->modifier_aspect ? obj->modifier_aspect : res->aspect; + struct zink_resource_object *obj = res->obj; struct winsys_handle whandle; + VkImageAspectFlags aspect; + if (obj->modifier_aspect) { + switch (plane) { + case 0: + aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT; + break; + case 1: + aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT; + break; + case 2: + aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT; + break; + case 3: + aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT; + break; + default: + unreachable("how many planes you got in this thing?"); + } + } else if (res->obj->sampler_conversion) { + aspect = VK_IMAGE_ASPECT_PLANE_0_BIT; + } else { + aspect = res->aspect; + } switch (param) { case PIPE_RESOURCE_PARAM_NPLANES: - /* not yet implemented */ - *value = 1; + if (screen->info.have_EXT_image_drm_format_modifier) + *value = screen->base.get_dmabuf_modifier_planes(&screen->base, obj->modifier, res->internal_format); + else + *value = 1; break; case PIPE_RESOURCE_PARAM_STRIDE: { @@ -872,16 +1841,7 @@ zink_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx, } case PIPE_RESOURCE_PARAM_MODIFIER: { - *value = DRM_FORMAT_MOD_INVALID; - if (!screen->info.have_EXT_image_drm_format_modifier) - return false; - if (!res->modifiers) - return false; - VkImageDrmFormatModifierPropertiesEXT prop; - prop.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT; - prop.pNext = NULL; - if (VKSCR(GetImageDrmFormatModifierPropertiesEXT)(screen->dev, obj->image, &prop) == VK_SUCCESS) - *value = prop.drmFormatModifier; + *value = obj->modifier; break; } @@ -900,13 +1860,15 @@ zink_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx, break; } - case PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED: + return false; case PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS: + case PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED: case PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD: { +#ifdef ZINK_USE_DMABUF memset(&whandle, 0, sizeof(whandle)); if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED) whandle.type = WINSYS_HANDLE_TYPE_SHARED; - else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS) + if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS) whandle.type = WINSYS_HANDLE_TYPE_KMS; else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD) whandle.type = WINSYS_HANDLE_TYPE_FD; @@ -914,8 +1876,16 @@ zink_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx, if (!pscreen->resource_get_handle(pscreen, pctx, pres, &whandle, handle_usage)) return false; +#ifdef _WIN32 + *value = (uintptr_t)whandle.handle; +#else *value = whandle.handle; +#endif break; +#else + (void)whandle; + return false; +#endif } } return true; @@ -928,34 +1898,76 @@ zink_resource_get_handle(struct pipe_screen *pscreen, struct winsys_handle *whandle, unsigned usage) { + if (tex->target == PIPE_BUFFER) + tc_buffer_disable_cpu_storage(tex); if (whandle->type == WINSYS_HANDLE_TYPE_FD || whandle->type == WINSYS_HANDLE_TYPE_KMS) { #ifdef ZINK_USE_DMABUF struct zink_resource *res = zink_resource(tex); struct zink_screen *screen = zink_screen(pscreen); - //TODO: remove for wsi - struct zink_resource_object *obj = res->scanout_obj ? res->scanout_obj : res->obj; + struct zink_resource_object *obj = res->obj; + +#if !defined(_WIN32) + if (whandle->type == WINSYS_HANDLE_TYPE_KMS && screen->drm_fd == -1) { + whandle->handle = -1; + } else { + if (!res->obj->exportable) { + assert(!zink_resource_usage_is_unflushed(res)); + if (!screen->info.have_EXT_image_drm_format_modifier) { + static bool warned = false; + warn_missing_feature(warned, "EXT_image_drm_format_modifier"); + return false; + } + unsigned bind = ZINK_BIND_DMABUF; + if (!(res->base.b.bind & PIPE_BIND_SHARED)) + bind |= PIPE_BIND_SHARED; + zink_screen_lock_context(screen); + if (!add_resource_bind(screen->copy_context, res, bind)) { + zink_screen_unlock_context(screen); + return false; + } + if (res->all_binds) + p_atomic_inc(&screen->image_rebind_counter); + screen->copy_context->base.flush(&screen->copy_context->base, NULL, 0); + zink_screen_unlock_context(screen); + obj = res->obj; + } - VkMemoryGetFdInfoKHR fd_info = {0}; - int fd; - fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + VkMemoryGetFdInfoKHR fd_info = {0}; + int fd; + fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + fd_info.memory = zink_bo_get_mem(obj->bo); + if (whandle->type == WINSYS_HANDLE_TYPE_FD) + fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + else + fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + VkResult result = VKSCR(GetMemoryFdKHR)(screen->dev, &fd_info, &fd); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkGetMemoryFdKHR failed"); + return false; + } + if (whandle->type == WINSYS_HANDLE_TYPE_KMS) { + uint32_t h; + bool ret = zink_bo_get_kms_handle(screen, obj->bo, fd, &h); + close(fd); + if (!ret) + return false; + fd = h; + } + + whandle->handle = fd; + } +#else + VkMemoryGetWin32HandleInfoKHR handle_info = {0}; + HANDLE handle; + handle_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; //TODO: remove for wsi - fd_info.memory = zink_bo_get_mem(obj->bo); - if (whandle->type == WINSYS_HANDLE_TYPE_FD) - fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; - else - fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; - VkResult result = VKSCR(GetMemoryFdKHR)(screen->dev, &fd_info, &fd); + handle_info.memory = zink_bo_get_mem(obj->bo); + handle_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + VkResult result = VKSCR(GetMemoryWin32HandleKHR)(screen->dev, &handle_info, &handle); if (result != VK_SUCCESS) return false; - if (whandle->type == WINSYS_HANDLE_TYPE_KMS) { - uint32_t h; - bool success = drmPrimeFDToHandle(screen->drm_fd, fd, &h) == 0; - close(fd); - if (!success) - return false; - fd = h; - } - whandle->handle = fd; + whandle->handle = handle; +#endif uint64_t value; zink_resource_get_param(pscreen, context, tex, 0, 0, 0, PIPE_RESOURCE_PARAM_MODIFIER, 0, &value); whandle->modifier = value; @@ -981,23 +1993,118 @@ zink_resource_from_handle(struct pipe_screen *pscreen, !zink_screen(pscreen)->info.have_EXT_image_drm_format_modifier) return NULL; - /* ignore any AUX planes, as well as planar formats */ - if (templ->format == PIPE_FORMAT_NONE || - util_format_get_num_planes(templ->format) != 1) - return NULL; + struct pipe_resource templ2 = *templ; + if (templ->format == PIPE_FORMAT_NONE) + templ2.format = whandle->format; - uint64_t modifier = DRM_FORMAT_MOD_INVALID; - int modifier_count = 0; - if (whandle->modifier != DRM_FORMAT_MOD_INVALID) { + uint64_t modifier = DRM_FORMAT_MOD_LINEAR; + int modifier_count = 1; + if (whandle->modifier != DRM_FORMAT_MOD_INVALID) modifier = whandle->modifier; - modifier_count = 1; + else { + if (!zink_screen(pscreen)->driver_workarounds.can_do_invalid_linear_modifier) { + mesa_loge("zink: display server doesn't support DRI3 modifiers and driver can't handle INVALID<->LINEAR!"); + return NULL; + } + whandle->modifier = modifier; + } + templ2.bind |= ZINK_BIND_DMABUF; + struct pipe_resource *pres = resource_create(pscreen, &templ2, whandle, usage, &modifier, modifier_count, NULL, NULL); + if (pres) { + struct zink_resource *res = zink_resource(pres); + if (pres->target != PIPE_BUFFER) + res->valid = true; + else + tc_buffer_disable_cpu_storage(pres); + res->internal_format = whandle->format; } - return resource_create(pscreen, templ, whandle, usage, &modifier, modifier_count); + return pres; #else return NULL; #endif } +static struct pipe_resource * +zink_resource_from_user_memory(struct pipe_screen *pscreen, + const struct pipe_resource *templ, + void *user_memory) +{ + struct zink_screen *screen = zink_screen(pscreen); + VkDeviceSize alignMask = screen->info.ext_host_mem_props.minImportedHostPointerAlignment - 1; + + /* Validate the user_memory pointer and fail early. + * minImportedHostPointerAlignment is required to be POT */ + if (((uintptr_t)user_memory) & alignMask) + return NULL; + + return resource_create(pscreen, templ, NULL, 0, NULL, 0, NULL, user_memory); +} + +struct zink_memory_object { + struct pipe_memory_object b; + struct winsys_handle whandle; +}; + +static struct pipe_memory_object * +zink_memobj_create_from_handle(struct pipe_screen *pscreen, struct winsys_handle *whandle, bool dedicated) +{ + struct zink_memory_object *memobj = CALLOC_STRUCT(zink_memory_object); + if (!memobj) + return NULL; + memcpy(&memobj->whandle, whandle, sizeof(struct winsys_handle)); + memobj->whandle.type = ZINK_EXTERNAL_MEMORY_HANDLE; + +#ifdef ZINK_USE_DMABUF + +#if !defined(_WIN32) + memobj->whandle.handle = os_dupfd_cloexec(whandle->handle); +#else + HANDLE source_target = GetCurrentProcess(); + HANDLE out_handle; + + DuplicateHandle(source_target, whandle->handle, source_target, &out_handle, 0, false, DUPLICATE_SAME_ACCESS); + memobj->whandle.handle = out_handle; + +#endif /* _WIN32 */ +#endif /* ZINK_USE_DMABUF */ + + return (struct pipe_memory_object *)memobj; +} + +static void +zink_memobj_destroy(struct pipe_screen *pscreen, struct pipe_memory_object *pmemobj) +{ +#ifdef ZINK_USE_DMABUF + struct zink_memory_object *memobj = (struct zink_memory_object *)pmemobj; + +#if !defined(_WIN32) + close(memobj->whandle.handle); +#else + CloseHandle(memobj->whandle.handle); +#endif /* _WIN32 */ +#endif /* ZINK_USE_DMABUF */ + + FREE(pmemobj); +} + +static struct pipe_resource * +zink_resource_from_memobj(struct pipe_screen *pscreen, + const struct pipe_resource *templ, + struct pipe_memory_object *pmemobj, + uint64_t offset) +{ + struct zink_memory_object *memobj = (struct zink_memory_object *)pmemobj; + + struct pipe_resource *pres = resource_create(pscreen, templ, &memobj->whandle, 0, NULL, 0, NULL, NULL); + if (pres) { + if (pres->target != PIPE_BUFFER) + zink_resource(pres)->valid = true; + else + tc_buffer_disable_cpu_storage(pres); + } + return pres; +} + static bool invalidate_buffer(struct zink_context *ctx, struct zink_resource *res) { @@ -1008,7 +2115,10 @@ invalidate_buffer(struct zink_context *ctx, struct zink_resource *res) if (res->base.b.flags & PIPE_RESOURCE_FLAG_SPARSE) return false; - if (res->valid_buffer_range.start > res->valid_buffer_range.end) + struct pipe_box box; + u_box_3d(0, 0, 0, res->base.b.width0, 0, 0, &box); + if (res->valid_buffer_range.start > res->valid_buffer_range.end && + !zink_resource_copy_box_intersects(res, 0, &box)) return false; if (res->so_valid) @@ -1020,17 +2130,19 @@ invalidate_buffer(struct zink_context *ctx, struct zink_resource *res) if (!zink_resource_has_usage(res)) return false; - struct zink_resource_object *old_obj = res->obj; - struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, NULL, NULL, 0); + struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, NULL, NULL, 0, NULL, 0); if (!new_obj) { - debug_printf("new backing resource alloc failed!"); + debug_printf("new backing resource alloc failed!\n"); return false; } + bool needs_bda = !!res->obj->bda; /* this ref must be transferred before rebind or else BOOM */ zink_batch_reference_resource_move(&ctx->batch, res); res->obj = new_obj; + res->queue = VK_QUEUE_FAMILY_IGNORED; + if (needs_bda) + zink_resource_get_address(screen, res); zink_resource_rebind(ctx, res); - zink_descriptor_set_refs_clear(&old_obj->desc_set_refs, old_obj); return true; } @@ -1040,6 +2152,12 @@ zink_resource_invalidate(struct pipe_context *pctx, struct pipe_resource *pres) { if (pres->target == PIPE_BUFFER) invalidate_buffer(zink_context(pctx), zink_resource(pres)); + else { + struct zink_resource *res = zink_resource(pres); + if (res->valid && res->fb_bind_count) + zink_context(pctx)->rp_loadop_changed = true; + res->valid = false; + } } static void @@ -1058,13 +2176,9 @@ zink_transfer_copy_bufimage(struct zink_context *ctx, if (buf2img) box.x = trans->offset; - if (dst->obj->transfer_dst) - zink_copy_image_buffer(ctx, dst, src, trans->base.b.level, buf2img ? x : 0, - box.y, box.z, trans->base.b.level, &box, trans->base.b.usage); - else - util_blitter_copy_texture(ctx->blitter, &dst->base.b, trans->base.b.level, - x, box.y, box.z, &src->base.b, - 0, &box); + assert(dst->obj->transfer_dst); + zink_copy_image_buffer(ctx, dst, src, trans->base.b.level, buf2img ? x : 0, + box.y, box.z, trans->base.b.level, &box, trans->base.b.usage); } ALWAYS_INLINE static void @@ -1117,15 +2231,14 @@ create_transfer(struct zink_context *ctx, struct pipe_resource *pres, unsigned u struct zink_transfer *trans; if (usage & PIPE_MAP_THREAD_SAFE) - trans = malloc(sizeof(*trans)); + trans = calloc(1, sizeof(*trans)); else if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) - trans = slab_alloc(&ctx->transfer_pool_unsync); + trans = slab_zalloc(&ctx->transfer_pool_unsync); else - trans = slab_alloc(&ctx->transfer_pool); + trans = slab_zalloc(&ctx->transfer_pool); if (!trans) return NULL; - memset(trans, 0, sizeof(*trans)); pipe_resource_reference(&trans->base.b.resource, pres); trans->base.b.usage = usage; @@ -1170,7 +2283,8 @@ zink_buffer_map(struct pipe_context *pctx, * in which case it can be mapped unsynchronized. */ if (!(usage & (PIPE_MAP_UNSYNCHRONIZED | TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) && usage & PIPE_MAP_WRITE && !res->base.is_shared && - !util_ranges_intersect(&res->valid_buffer_range, box->x, box->x + box->width)) { + !util_ranges_intersect(&res->valid_buffer_range, box->x, box->x + box->width) && + !zink_resource_copy_box_intersects(res, 0, box)) { usage |= PIPE_MAP_UNSYNCHRONIZED; } @@ -1204,6 +2318,7 @@ zink_buffer_map(struct pipe_context *pctx, } } + unsigned map_offset = box->x; if (usage & PIPE_MAP_DISCARD_RANGE && (!res->obj->host_visible || !(usage & (PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_PERSISTENT)))) { @@ -1225,13 +2340,13 @@ zink_buffer_map(struct pipe_context *pctx, mgr = ctx->tc->base.stream_uploader; else mgr = ctx->base.stream_uploader; - u_upload_alloc(mgr, 0, box->width + box->x, + u_upload_alloc(mgr, 0, box->width, screen->info.props.limits.minMemoryMapAlignment, &offset, (struct pipe_resource **)&trans->staging_res, (void **)&ptr); res = zink_resource(trans->staging_res); - trans->offset = offset + box->x; + trans->offset = offset; usage |= PIPE_MAP_UNSYNCHRONIZED; - ptr = ((uint8_t *)ptr) + box->x; + ptr = ((uint8_t *)ptr); } else { /* At this point, the buffer is always idle (we checked it above). */ usage |= PIPE_MAP_UNSYNCHRONIZED; @@ -1243,30 +2358,45 @@ zink_buffer_map(struct pipe_context *pctx, if (!zink_resource_usage_check_completion(screen, res, ZINK_RESOURCE_ACCESS_WRITE)) goto success; usage |= PIPE_MAP_UNSYNCHRONIZED; - } else if (!(usage & PIPE_MAP_UNSYNCHRONIZED) && - (((usage & PIPE_MAP_READ) && !(usage & PIPE_MAP_PERSISTENT) && res->base.b.usage != PIPE_USAGE_STAGING) || !res->obj->host_visible)) { - assert(!(usage & (TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_THREAD_SAFE))); - if (!res->obj->host_visible || !(usage & PIPE_MAP_ONCE)) { - trans->offset = box->x % screen->info.props.limits.minMemoryMapAlignment; + } else if (((usage & PIPE_MAP_READ) && !(usage & PIPE_MAP_PERSISTENT) && + ((screen->info.mem_props.memoryTypes[res->obj->bo->base.base.placement].propertyFlags & VK_STAGING_RAM) != VK_STAGING_RAM)) || + !res->obj->host_visible) { + /* any read, non-HV write, or unmappable that reaches this point needs staging */ + if ((usage & PIPE_MAP_READ) || !res->obj->host_visible || res->base.b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY) { +overwrite: + trans->offset = box->x % MAX2(screen->info.props.limits.minMemoryMapAlignment, 1 << MIN_SLAB_ORDER); trans->staging_res = pipe_buffer_create(&screen->base, PIPE_BIND_LINEAR, PIPE_USAGE_STAGING, box->width + trans->offset); if (!trans->staging_res) goto fail; struct zink_resource *staging_res = zink_resource(trans->staging_res); - zink_copy_buffer(ctx, staging_res, res, trans->offset, box->x, box->width); + if (usage & (PIPE_MAP_THREAD_SAFE | PIPE_MAP_UNSYNCHRONIZED | TC_TRANSFER_MAP_THREADED_UNSYNC)) { + assert(ctx != screen->copy_context); + /* this map can't access the passed context: use the copy context */ + zink_screen_lock_context(screen); + ctx = screen->copy_context; + } + if (usage & PIPE_MAP_READ) + zink_copy_buffer(ctx, staging_res, res, trans->offset, box->x, box->width); res = staging_res; usage &= ~PIPE_MAP_UNSYNCHRONIZED; - ptr = map_resource(screen, res); - ptr = ((uint8_t *)ptr) + trans->offset; + map_offset = trans->offset; } } if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) { - if (usage & PIPE_MAP_WRITE) + if (usage & PIPE_MAP_WRITE) { + if (!(usage & PIPE_MAP_READ)) { + zink_resource_usage_try_wait(ctx, res, ZINK_RESOURCE_ACCESS_RW); + if (zink_resource_has_unflushed_usage(res)) + goto overwrite; + } zink_resource_usage_wait(ctx, res, ZINK_RESOURCE_ACCESS_RW); - else + } else zink_resource_usage_wait(ctx, res, ZINK_RESOURCE_ACCESS_WRITE); res->obj->access = 0; res->obj->access_stage = 0; + res->obj->last_write = 0; + zink_resource_copies_reset(res); } if (!ptr) { @@ -1279,7 +2409,7 @@ zink_buffer_map(struct pipe_context *pctx, ptr = map_resource(screen, res); if (!ptr) goto fail; - ptr = ((uint8_t *)ptr) + box->x; + ptr = ((uint8_t *)ptr) + map_offset; } if (!res->obj->coherent @@ -1296,6 +2426,7 @@ zink_buffer_map(struct pipe_context *pctx, VkDeviceSize offset = res->obj->offset + trans->offset; VkMappedMemoryRange range = zink_resource_init_mem_range(screen, res->obj, offset, size); if (VKSCR(InvalidateMappedMemoryRanges)(screen->dev, 1, &range) != VK_SUCCESS) { + mesa_loge("ZINK: vkInvalidateMappedMemoryRanges failed"); zink_bo_unmap(screen, res->obj->bo); goto fail; } @@ -1303,14 +2434,17 @@ zink_buffer_map(struct pipe_context *pctx, trans->base.b.usage = usage; if (usage & PIPE_MAP_WRITE) util_range_add(&res->base.b, &res->valid_buffer_range, box->x, box->x + box->width); - if ((usage & PIPE_MAP_PERSISTENT) && !(usage & PIPE_MAP_COHERENT)) - res->obj->persistent_maps++; success: + /* ensure the copy context gets unlocked */ + if (ctx == screen->copy_context) + zink_screen_unlock_context(screen); *transfer = &trans->base.b; return ptr; fail: + if (ctx == screen->copy_context) + zink_screen_unlock_context(screen); destroy_transfer(ctx, trans); return NULL; } @@ -1331,15 +2465,20 @@ zink_image_map(struct pipe_context *pctx, return NULL; trans->base.b.level = level; + if (zink_is_swapchain(res)) + /* this is probably a multi-chain which has already been acquired */ + zink_kopper_acquire(ctx, res, 0); void *ptr; - if (usage & PIPE_MAP_WRITE && !(usage & PIPE_MAP_READ)) - /* this is like a blit, so we can potentially dump some clears or maybe we have to */ - zink_fb_clears_apply_or_discard(ctx, pres, zink_rect_from_box(box), false); - else if (usage & PIPE_MAP_READ) - /* if the map region intersects with any clears then we have to apply them */ - zink_fb_clears_apply_region(ctx, pres, zink_rect_from_box(box)); - if (res->optimal_tiling || !res->obj->host_visible) { + if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) { + if (usage & PIPE_MAP_WRITE && !(usage & PIPE_MAP_READ)) + /* this is like a blit, so we can potentially dump some clears or maybe we have to */ + zink_fb_clears_apply_or_discard(ctx, pres, zink_rect_from_box(box), false); + else if (usage & PIPE_MAP_READ) + /* if the map region intersects with any clears then we have to apply them */ + zink_fb_clears_apply_region(ctx, pres, zink_rect_from_box(box)); + } + if (!res->linear || !res->obj->host_visible) { enum pipe_format format = pres->format; if (usage & PIPE_MAP_DEPTH_ONLY) format = util_format_get_depth_only(pres->format); @@ -1351,6 +2490,7 @@ zink_image_map(struct pipe_context *pctx, box->height); struct pipe_resource templ = *pres; + templ.next = NULL; templ.format = format; templ.usage = usage & PIPE_MAP_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; templ.target = PIPE_BUFFER; @@ -1368,6 +2508,7 @@ zink_image_map(struct pipe_context *pctx, struct zink_resource *staging_res = zink_resource(trans->staging_res); if (usage & PIPE_MAP_READ) { + assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)); /* force multi-context sync */ if (zink_resource_usage_is_unflushed_write(res)) zink_resource_usage_wait(ctx, res, ZINK_RESOURCE_ACCESS_WRITE); @@ -1378,18 +2519,19 @@ zink_image_map(struct pipe_context *pctx, ptr = map_resource(screen, staging_res); } else { - assert(!res->optimal_tiling); + assert(res->linear); ptr = map_resource(screen, res); if (!ptr) goto fail; if (zink_resource_has_usage(res)) { + assert(!(usage & PIPE_MAP_UNSYNCHRONIZED)); if (usage & PIPE_MAP_WRITE) zink_fence_wait(pctx); else zink_resource_usage_wait(ctx, res, ZINK_RESOURCE_ACCESS_WRITE); } VkImageSubresource isr = { - res->obj->modifier_aspect ? res->obj->modifier_aspect : res->aspect, + res->modifiers ? res->obj->modifier_aspect : res->aspect, level, 0 }; @@ -1410,17 +2552,24 @@ zink_image_map(struct pipe_context *pctx, if (!res->obj->coherent) { VkDeviceSize size = (VkDeviceSize)box->width * box->height * desc->block.bits / 8; VkMappedMemoryRange range = zink_resource_init_mem_range(screen, res->obj, res->obj->offset + offset, size); - VKSCR(FlushMappedMemoryRanges)(screen->dev, 1, &range); + if (VKSCR(FlushMappedMemoryRanges)(screen->dev, 1, &range) != VK_SUCCESS) { + mesa_loge("ZINK: vkFlushMappedMemoryRanges failed"); + } } ptr = ((uint8_t *)ptr) + offset; } if (!ptr) goto fail; + if (usage & PIPE_MAP_WRITE) { + if (!res->valid && res->fb_bind_count) { + assert(!(usage & PIPE_MAP_UNSYNCHRONIZED)); + ctx->rp_loadop_changed = true; + } + res->valid = true; + } if (sizeof(void*) == 4) trans->base.b.usage |= ZINK_MAP_TEMPORARY; - if ((usage & PIPE_MAP_PERSISTENT) && !(usage & PIPE_MAP_COHERENT)) - res->obj->persistent_maps++; *transfer = &trans->base.b; return ptr; @@ -1431,6 +2580,110 @@ fail: } static void +zink_image_subdata(struct pipe_context *pctx, + struct pipe_resource *pres, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + uintptr_t layer_stride) +{ + struct zink_screen *screen = zink_screen(pctx->screen); + struct zink_context *ctx = zink_context(pctx); + struct zink_resource *res = zink_resource(pres); + + /* flush clears to avoid subdata conflict */ + if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC) && + (res->obj->vkusage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT)) + zink_fb_clears_apply_or_discard(ctx, pres, zink_rect_from_box(box), false); + /* only use HIC if supported on image and no pending usage */ + while (res->obj->vkusage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT && + zink_resource_usage_check_completion(screen, res, ZINK_RESOURCE_ACCESS_RW)) { + /* uninit images are always supported */ + bool change_layout = res->layout == VK_IMAGE_LAYOUT_UNDEFINED || res->layout == VK_IMAGE_LAYOUT_PREINITIALIZED; + if (!change_layout) { + /* image in some other layout: test for support */ + bool can_copy_layout = false; + for (unsigned i = 0; i < screen->info.hic_props.copyDstLayoutCount; i++) { + if (screen->info.hic_props.pCopyDstLayouts[i] == res->layout) { + can_copy_layout = true; + break; + } + } + /* some layouts don't permit HIC copies */ + if (!can_copy_layout) + break; + } + bool is_arrayed = false; + switch (pres->target) { + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + is_arrayed = true; + break; + default: break; + } + /* recalc strides into texel strides because HIC spec is insane */ + unsigned vk_stride = util_format_get_stride(pres->format, 1); + stride /= vk_stride; + unsigned vk_layer_stride = util_format_get_2d_size(pres->format, stride, 1) * vk_stride; + layer_stride /= vk_layer_stride; + + VkHostImageLayoutTransitionInfoEXT t = { + VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO_EXT, + NULL, + res->obj->image, + res->layout, + /* GENERAL support is guaranteed */ + VK_IMAGE_LAYOUT_GENERAL, + {res->aspect, 0, VK_REMAINING_MIP_LEVELS, 0, VK_REMAINING_ARRAY_LAYERS} + }; + /* only pre-transition uninit images to avoid thrashing */ + if (change_layout) { + VKSCR(TransitionImageLayoutEXT)(screen->dev, 1, &t); + res->layout = VK_IMAGE_LAYOUT_GENERAL; + } + VkMemoryToImageCopyEXT region = { + VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT, + NULL, + data, + stride, + layer_stride, + {res->aspect, level, is_arrayed ? box->z : 0, is_arrayed ? box->depth : 1}, + {box->x, box->y, is_arrayed ? 0 : box->z}, + {box->width, box->height, is_arrayed ? 1 : box->depth} + }; + VkCopyMemoryToImageInfoEXT copy = { + VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO_EXT, + NULL, + 0, + res->obj->image, + res->layout, + 1, + ®ion + }; + VKSCR(CopyMemoryToImageEXT)(screen->dev, ©); + if (change_layout && screen->can_hic_shader_read && !pres->last_level && !box->x && !box->y && !box->z && + box->width == pres->width0 && box->height == pres->height0 && + ((is_arrayed && box->depth == pres->array_size) || (!is_arrayed && box->depth == pres->depth0))) { + /* assume full copy single-mip images use shader read access */ + t.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + t.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + VKSCR(TransitionImageLayoutEXT)(screen->dev, 1, &t); + res->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + /* assume multi-mip where further subdata calls may happen */ + } + /* make sure image is marked as having data */ + res->valid = true; + return; + } + /* fallback case for per-resource unsupported or device-level unsupported */ + u_default_texture_subdata(pctx, pres, level, usage, box, data, stride, layer_stride); +} + +static void zink_transfer_flush_region(struct pipe_context *pctx, struct pipe_transfer *ptrans, const struct pipe_box *box) @@ -1443,47 +2696,282 @@ zink_transfer_flush_region(struct pipe_context *pctx, struct zink_screen *screen = zink_screen(pctx->screen); struct zink_resource *m = trans->staging_res ? zink_resource(trans->staging_res) : res; - ASSERTED VkDeviceSize size, offset; + ASSERTED VkDeviceSize size, src_offset, dst_offset = 0; if (m->obj->is_buffer) { size = box->width; - offset = trans->offset; + src_offset = box->x + (trans->staging_res ? trans->offset : ptrans->box.x); + dst_offset = box->x + ptrans->box.x; } else { size = (VkDeviceSize)box->width * box->height * util_format_get_blocksize(m->base.b.format); - offset = trans->offset + + src_offset = trans->offset + box->z * trans->depthPitch + util_format_get_2d_size(m->base.b.format, trans->base.b.stride, box->y) + util_format_get_stride(m->base.b.format, box->x); - assert(offset + size <= res->obj->size); + assert(src_offset + size <= res->obj->size); } if (!m->obj->coherent) { VkMappedMemoryRange range = zink_resource_init_mem_range(screen, m->obj, m->obj->offset, m->obj->size); - VKSCR(FlushMappedMemoryRanges)(screen->dev, 1, &range); + if (VKSCR(FlushMappedMemoryRanges)(screen->dev, 1, &range) != VK_SUCCESS) { + mesa_loge("ZINK: vkFlushMappedMemoryRanges failed"); + } } if (trans->staging_res) { struct zink_resource *staging_res = zink_resource(trans->staging_res); if (ptrans->resource->target == PIPE_BUFFER) - zink_copy_buffer(ctx, res, staging_res, box->x, offset, box->width); + zink_copy_buffer(ctx, res, staging_res, dst_offset, src_offset, size); else zink_transfer_copy_bufimage(ctx, res, staging_res, trans); } } } +/* used to determine whether to emit a TRANSFER_DST barrier on copies */ +bool +zink_resource_copy_box_intersects(struct zink_resource *res, unsigned level, const struct pipe_box *box) +{ + /* if there are no valid copy rects tracked, this needs a barrier */ + if (!res->obj->copies_valid) + return true; + /* untracked huge miplevel */ + if (level >= ARRAY_SIZE(res->obj->copies)) + return true; + u_rwlock_rdlock(&res->obj->copy_lock); + struct pipe_box *b = res->obj->copies[level].data; + unsigned num_boxes = util_dynarray_num_elements(&res->obj->copies[level], struct pipe_box); + bool (*intersect)(const struct pipe_box *, const struct pipe_box *); + /* determine intersection function based on dimensionality */ + switch (res->base.b.target) { + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + intersect = u_box_test_intersection_1d; + break; + + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D: + intersect = u_box_test_intersection_2d; + break; + + default: + intersect = u_box_test_intersection_3d; + break; + } + /* if any of the tracked boxes intersect with this one, a barrier is needed */ + bool ret = false; + for (unsigned i = 0; i < num_boxes; i++) { + if (intersect(box, b + i)) { + ret = true; + break; + } + } + u_rwlock_rdunlock(&res->obj->copy_lock); + /* no intersection = no barrier */ + return ret; +} + +/* track a new region for TRANSFER_DST barrier emission */ +void +zink_resource_copy_box_add(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box) +{ + u_rwlock_wrlock(&res->obj->copy_lock); + if (res->obj->copies_valid) { + struct pipe_box *b = res->obj->copies[level].data; + unsigned num_boxes = util_dynarray_num_elements(&res->obj->copies[level], struct pipe_box); + for (unsigned i = 0; i < num_boxes; i++) { + switch (res->base.b.target) { + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + /* no-op included region */ + if (b[i].x <= box->x && b[i].x + b[i].width >= box->x + box->width) + goto out; + + /* try to merge adjacent regions */ + if (b[i].x == box->x + box->width) { + b[i].x -= box->width; + b[i].width += box->width; + goto out; + } + if (b[i].x + b[i].width == box->x) { + b[i].width += box->width; + goto out; + } + + /* try to merge into region */ + if (box->x <= b[i].x && box->x + box->width >= b[i].x + b[i].width) { + *b = *box; + goto out; + } + break; + + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D: + /* no-op included region */ + if (b[i].x <= box->x && b[i].x + b[i].width >= box->x + box->width && + b[i].y <= box->y && b[i].y + b[i].height >= box->y + box->height) + goto out; + + /* try to merge adjacent regions */ + if (b[i].y == box->y && b[i].height == box->height) { + if (b[i].x == box->x + box->width) { + b[i].x -= box->width; + b[i].width += box->width; + goto out; + } + if (b[i].x + b[i].width == box->x) { + b[i].width += box->width; + goto out; + } + } else if (b[i].x == box->x && b[i].width == box->width) { + if (b[i].y == box->y + box->height) { + b[i].y -= box->height; + b[i].height += box->height; + goto out; + } + if (b[i].y + b[i].height == box->y) { + b[i].height += box->height; + goto out; + } + } + + /* try to merge into region */ + if (box->x <= b[i].x && box->x + box->width >= b[i].x + b[i].width && + box->y <= b[i].y && box->y + box->height >= b[i].y + b[i].height) { + *b = *box; + goto out; + } + break; + + default: + /* no-op included region */ + if (b[i].x <= box->x && b[i].x + b[i].width >= box->x + box->width && + b[i].y <= box->y && b[i].y + b[i].height >= box->y + box->height && + b[i].z <= box->z && b[i].z + b[i].depth >= box->z + box->depth) + goto out; + + /* try to merge adjacent regions */ + if (b[i].z == box->z && b[i].depth == box->depth) { + if (b[i].y == box->y && b[i].height == box->height) { + if (b[i].x == box->x + box->width) { + b[i].x -= box->width; + b[i].width += box->width; + goto out; + } + if (b[i].x + b[i].width == box->x) { + b[i].width += box->width; + goto out; + } + } else if (b[i].x == box->x && b[i].width == box->width) { + if (b[i].y == box->y + box->height) { + b[i].y -= box->height; + b[i].height += box->height; + goto out; + } + if (b[i].y + b[i].height == box->y) { + b[i].height += box->height; + goto out; + } + } + } else if (b[i].x == box->x && b[i].width == box->width) { + if (b[i].y == box->y && b[i].height == box->height) { + if (b[i].z == box->z + box->depth) { + b[i].z -= box->depth; + b[i].depth += box->depth; + goto out; + } + if (b[i].z + b[i].depth == box->z) { + b[i].depth += box->depth; + goto out; + } + } else if (b[i].z == box->z && b[i].depth == box->depth) { + if (b[i].y == box->y + box->height) { + b[i].y -= box->height; + b[i].height += box->height; + goto out; + } + if (b[i].y + b[i].height == box->y) { + b[i].height += box->height; + goto out; + } + } + } else if (b[i].y == box->y && b[i].height == box->height) { + if (b[i].z == box->z && b[i].depth == box->depth) { + if (b[i].x == box->x + box->width) { + b[i].x -= box->width; + b[i].width += box->width; + goto out; + } + if (b[i].x + b[i].width == box->x) { + b[i].width += box->width; + goto out; + } + } else if (b[i].x == box->x && b[i].width == box->width) { + if (b[i].z == box->z + box->depth) { + b[i].z -= box->depth; + b[i].depth += box->depth; + goto out; + } + if (b[i].z + b[i].depth == box->z) { + b[i].depth += box->depth; + goto out; + } + } + } + + /* try to merge into region */ + if (box->x <= b[i].x && box->x + box->width >= b[i].x + b[i].width && + box->y <= b[i].y && box->y + box->height >= b[i].y + b[i].height && + box->z <= b[i].z && box->z + box->depth >= b[i].z + b[i].depth) + goto out; + + break; + } + } + } + util_dynarray_append(&res->obj->copies[level], struct pipe_box, *box); + if (!res->copies_warned && util_dynarray_num_elements(&res->obj->copies[level], struct pipe_box) > 100) { + perf_debug(ctx, "zink: PERF WARNING! > 100 copy boxes detected for %p\n", res); + mesa_logw("zink: PERF WARNING! > 100 copy boxes detected for %p\n", res); + res->copies_warned = true; + } + res->obj->copies_valid = true; +out: + u_rwlock_wrunlock(&res->obj->copy_lock); +} + +void +zink_resource_copies_reset(struct zink_resource *res) +{ + if (!res->obj->copies_valid) + return; + u_rwlock_wrlock(&res->obj->copy_lock); + unsigned max_level = res->base.b.target == PIPE_BUFFER ? 1 : (res->base.b.last_level + 1); + if (res->base.b.target == PIPE_BUFFER) { + /* flush transfer regions back to valid range on reset */ + struct pipe_box *b = res->obj->copies[0].data; + unsigned num_boxes = util_dynarray_num_elements(&res->obj->copies[0], struct pipe_box); + for (unsigned i = 0; i < num_boxes; i++) + util_range_add(&res->base.b, &res->valid_buffer_range, b[i].x, b[i].x + b[i].width); + } + for (unsigned i = 0; i < max_level; i++) + util_dynarray_clear(&res->obj->copies[i]); + res->obj->copies_valid = false; + res->obj->copies_need_reset = false; + u_rwlock_wrunlock(&res->obj->copy_lock); +} + static void transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans) { struct zink_context *ctx = zink_context(pctx); - struct zink_resource *res = zink_resource(ptrans->resource); struct zink_transfer *trans = (struct zink_transfer *)ptrans; if (!(trans->base.b.usage & (PIPE_MAP_FLUSH_EXPLICIT | PIPE_MAP_COHERENT))) { - zink_transfer_flush_region(pctx, ptrans, &ptrans->box); + /* flush_region is relative to the mapped region: use only the extents */ + struct pipe_box box = ptrans->box; + box.x = box.y = box.z = 0; + zink_transfer_flush_region(pctx, ptrans, &box); } - if ((trans->base.b.usage & PIPE_MAP_PERSISTENT) && !(trans->base.b.usage & PIPE_MAP_COHERENT)) - res->obj->persistent_maps--; - if (trans->staging_res) pipe_resource_reference(&trans->staging_res, NULL); pipe_resource_reference(&trans->base.b.resource, NULL); @@ -1500,6 +2988,16 @@ do_transfer_unmap(struct zink_screen *screen, struct zink_transfer *trans) unmap_resource(screen, res); } +void +zink_screen_buffer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptrans) +{ + struct zink_screen *screen = zink_screen(pscreen); + struct zink_transfer *trans = (struct zink_transfer *)ptrans; + if (trans->base.b.usage & PIPE_MAP_ONCE && !trans->staging_res) + do_transfer_unmap(screen, trans); + transfer_unmap(NULL, ptrans); +} + static void zink_buffer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans) { @@ -1555,79 +3053,50 @@ zink_resource_get_separate_stencil(struct pipe_resource *pres) } -VkBuffer -zink_resource_tmp_buffer(struct zink_screen *screen, struct zink_resource *res, unsigned offset_add, unsigned add_binds, unsigned *offset_out) +static bool +resource_object_add_bind(struct zink_context *ctx, struct zink_resource *res, unsigned bind) { - VkBufferCreateInfo bci = create_bci(screen, &res->base.b, res->base.b.bind | add_binds); - VkDeviceSize size = bci.size - offset_add; - VkDeviceSize offset = offset_add; - if (offset_add) { - assert(bci.size > offset_add); - - align_offset_size(res->obj->alignment, &offset, &size, bci.size); + /* base resource already has the cap */ + if (res->base.b.bind & bind) + return true; + if (res->obj->is_buffer) { + unreachable("zink: all buffers should have this bit"); + return true; } - bci.size = size; + assert(!res->obj->dt); + zink_fb_clears_apply_region(ctx, &res->base.b, (struct u_rect){0, res->base.b.width0, 0, res->base.b.height0}); + bool ret = add_resource_bind(ctx, res, bind); + if (ret) + zink_resource_rebind(ctx, res); - VkBuffer buffer; - if (VKSCR(CreateBuffer)(screen->dev, &bci, NULL, &buffer) != VK_SUCCESS) - return VK_NULL_HANDLE; - VKSCR(BindBufferMemory)(screen->dev, buffer, zink_bo_get_mem(res->obj->bo), res->obj->offset + offset); - if (offset_out) - *offset_out = offset_add - offset; - return buffer; + return ret; } bool zink_resource_object_init_storage(struct zink_context *ctx, struct zink_resource *res) { - struct zink_screen *screen = zink_screen(ctx->base.screen); - /* base resource already has the cap */ - if (res->base.b.bind & PIPE_BIND_SHADER_IMAGE) - return true; - if (res->obj->is_buffer) { - if (res->base.b.bind & PIPE_BIND_SHADER_IMAGE) - return true; - - VkBuffer buffer = zink_resource_tmp_buffer(screen, res, 0, PIPE_BIND_SHADER_IMAGE, NULL); - if (!buffer) - return false; - util_dynarray_append(&res->obj->tmp, VkBuffer, res->obj->buffer); - res->obj->buffer = buffer; - res->base.b.bind |= PIPE_BIND_SHADER_IMAGE; - } else { - zink_fb_clears_apply_region(ctx, &res->base.b, (struct u_rect){0, res->base.b.width0, 0, res->base.b.height0}); - zink_resource_image_barrier(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 0, 0); - res->base.b.bind |= PIPE_BIND_SHADER_IMAGE; - struct zink_resource_object *old_obj = res->obj; - struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, &res->optimal_tiling, res->modifiers, res->modifiers_count); - if (!new_obj) { - debug_printf("new backing resource alloc failed!"); - res->base.b.bind &= ~PIPE_BIND_SHADER_IMAGE; - return false; - } - struct zink_resource staging = *res; - staging.obj = old_obj; - bool needs_unref = true; - if (zink_resource_has_usage(res)) { - zink_batch_reference_resource_move(&ctx->batch, res); - needs_unref = false; - } - res->obj = new_obj; - zink_descriptor_set_refs_clear(&old_obj->desc_set_refs, old_obj); - for (unsigned i = 0; i <= res->base.b.last_level; i++) { - struct pipe_box box = {0, 0, 0, - u_minify(res->base.b.width0, i), - u_minify(res->base.b.height0, i), res->base.b.array_size}; - box.depth = util_num_layers(&res->base.b, i); - ctx->base.resource_copy_region(&ctx->base, &res->base.b, i, 0, 0, 0, &staging.base.b, i, &box); - } - if (needs_unref) - zink_resource_object_reference(screen, &old_obj, NULL); - } + return resource_object_add_bind(ctx, res, PIPE_BIND_SHADER_IMAGE); +} - zink_resource_rebind(ctx, res); +bool +zink_resource_object_init_mutable(struct zink_context *ctx, struct zink_resource *res) +{ + return resource_object_add_bind(ctx, res, ZINK_BIND_MUTABLE); +} - return true; +VkDeviceAddress +zink_resource_get_address(struct zink_screen *screen, struct zink_resource *res) +{ + assert(res->obj->is_buffer); + if (!res->obj->bda) { + VkBufferDeviceAddressInfo info = { + VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + NULL, + res->obj->buffer + }; + res->obj->bda = VKSCR(GetBufferDeviceAddress)(screen->dev, &info); + } + return res->obj->bda; } void @@ -1650,17 +3119,17 @@ zink_resource_setup_transfer_layouts(struct zink_context *ctx, struct zink_resou * VK_IMAGE_LAYOUT_GENERAL. And since this isn't a present-related * operation, VK_IMAGE_LAYOUT_GENERAL seems most appropriate. */ - zink_resource_image_barrier(ctx, src, + zink_screen(ctx->base.screen)->image_barrier(ctx, src, VK_IMAGE_LAYOUT_GENERAL, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); } else { - zink_resource_image_barrier(ctx, src, + zink_screen(ctx->base.screen)->image_barrier(ctx, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); - zink_resource_image_barrier(ctx, dst, + zink_screen(ctx->base.screen)->image_barrier(ctx, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); @@ -1717,15 +3186,28 @@ bool zink_screen_resource_init(struct pipe_screen *pscreen) { struct zink_screen *screen = zink_screen(pscreen); - pscreen->resource_create = zink_resource_create; + pscreen->resource_create = u_transfer_helper_resource_create; pscreen->resource_create_with_modifiers = zink_resource_create_with_modifiers; - pscreen->resource_destroy = zink_resource_destroy; - pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl, true, true, false, false); - - if (screen->info.have_KHR_external_memory_fd) { + pscreen->resource_create_drawable = zink_resource_create_drawable; + pscreen->resource_destroy = u_transfer_helper_resource_destroy; + pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl, + U_TRANSFER_HELPER_SEPARATE_Z32S8 | U_TRANSFER_HELPER_SEPARATE_STENCIL | + U_TRANSFER_HELPER_INTERLEAVE_IN_PLACE | + U_TRANSFER_HELPER_MSAA_MAP | + (!screen->have_D24_UNORM_S8_UINT ? U_TRANSFER_HELPER_Z24_IN_Z32F : 0)); + + if (screen->info.have_KHR_external_memory_fd || screen->info.have_KHR_external_memory_win32) { pscreen->resource_get_handle = zink_resource_get_handle; pscreen->resource_from_handle = zink_resource_from_handle; } + if (screen->info.have_EXT_external_memory_host) { + pscreen->resource_from_user_memory = zink_resource_from_user_memory; + } + if (screen->instance_info.have_KHR_external_memory_capabilities) { + pscreen->memobj_create_from_handle = zink_memobj_create_from_handle; + pscreen->memobj_destroy = zink_memobj_destroy; + pscreen->resource_from_memobj = zink_resource_from_memobj; + } pscreen->resource_get_param = zink_resource_get_param; return true; } @@ -1735,11 +3217,11 @@ zink_context_resource_init(struct pipe_context *pctx) { pctx->buffer_map = zink_buffer_map; pctx->buffer_unmap = zink_buffer_unmap; - pctx->texture_map = u_transfer_helper_deinterleave_transfer_map; - pctx->texture_unmap = u_transfer_helper_deinterleave_transfer_unmap; + pctx->texture_map = u_transfer_helper_transfer_map; + pctx->texture_unmap = u_transfer_helper_transfer_unmap; pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region; pctx->buffer_subdata = zink_buffer_subdata; - pctx->texture_subdata = u_default_texture_subdata; + pctx->texture_subdata = zink_image_subdata; pctx->invalidate_resource = zink_resource_invalidate; } diff --git a/src/gallium/drivers/zink/zink_resource.h b/src/gallium/drivers/zink/zink_resource.h index 98520404fb0..c7185d32125 100644 --- a/src/gallium/drivers/zink/zink_resource.h +++ b/src/gallium/drivers/zink/zink_resource.h @@ -24,143 +24,26 @@ #ifndef ZINK_RESOURCE_H #define ZINK_RESOURCE_H -struct pipe_screen; -struct sw_displaytarget; -struct zink_batch; -struct zink_context; -struct zink_bo; - -#include "util/hash_table.h" -#include "util/simple_mtx.h" -#include "util/u_transfer.h" -#include "util/u_range.h" -#include "util/u_dynarray.h" -#include "util/u_threaded_context.h" - -#include "zink_batch.h" -#include "zink_descriptors.h" - -#include <vulkan/vulkan.h> +#include "zink_types.h" #define ZINK_MAP_TEMPORARY (PIPE_MAP_DRV_PRV << 0) - -struct mem_key { - unsigned seen_count; - struct { - unsigned heap_index; - VkMemoryRequirements reqs; - } key; -}; - -struct zink_resource_object { - struct pipe_reference reference; - - VkPipelineStageFlagBits access_stage; - VkAccessFlags access; - bool unordered_barrier; - - unsigned persistent_maps; //if nonzero, requires vkFlushMappedMemoryRanges during batch use - struct zink_descriptor_refs desc_set_refs; - - struct zink_batch_usage *reads; - struct zink_batch_usage *writes; - - struct util_dynarray tmp; - - union { - VkBuffer buffer; - VkImage image; - }; - - VkSampleLocationsInfoEXT zs_evaluate; - bool needs_zs_evaluate; - - bool storage_init; //layout was set for image - bool transfer_dst; - bool is_buffer; - VkImageAspectFlags modifier_aspect; - - struct zink_bo *bo; - VkDeviceSize offset, size, alignment; - VkImageCreateFlags vkflags; - VkImageUsageFlags vkusage; - - bool host_visible; - bool coherent; -}; - -struct zink_resource { - struct threaded_resource base; - - enum pipe_format internal_format:16; - - struct zink_resource_object *obj; - struct zink_resource_object *scanout_obj; //TODO: remove for wsi - bool scanout_obj_init; - union { - struct { - struct util_range valid_buffer_range; - uint32_t vbo_bind_mask : PIPE_MAX_ATTRIBS; - uint8_t ubo_bind_count[2]; - uint8_t so_bind_count; - bool so_valid; - uint32_t ubo_bind_mask[PIPE_SHADER_TYPES]; - uint32_t ssbo_bind_mask[PIPE_SHADER_TYPES]; - }; - struct { - VkFormat format; - VkImageLayout layout; - VkImageAspectFlags aspect; - bool optimal_tiling; - uint8_t fb_binds; - }; - }; - uint32_t sampler_binds[PIPE_SHADER_TYPES]; - uint16_t image_bind_count[2]; //gfx, compute - uint16_t write_bind_count[2]; //gfx, compute - union { - uint16_t bind_count[2]; //gfx, compute - uint32_t all_binds; - }; - - union { - struct { - struct hash_table bufferview_cache; - simple_mtx_t bufferview_mtx; - }; - struct { - struct hash_table surface_cache; - simple_mtx_t surface_mtx; - }; - }; - - bool dmabuf_acquire; - struct sw_displaytarget *dt; - unsigned dt_stride; - - uint8_t modifiers_count; - uint64_t *modifiers; -}; - -struct zink_transfer { - struct threaded_transfer base; - struct pipe_resource *staging_res; - unsigned offset; - unsigned depthPitch; -}; - -static inline struct zink_resource * -zink_resource(struct pipe_resource *r) -{ - return (struct zink_resource *)r; -} +#define ZINK_BIND_DESCRIPTOR (1u << 27) +#define ZINK_BIND_MUTABLE (1u << 28) +#define ZINK_BIND_DMABUF (1u << 29) +#define ZINK_BIND_TRANSIENT (1u << 30) //transient fb attachment +#define ZINK_BIND_VIDEO (1u << 31) + +#ifdef __cplusplus +extern "C" { +#endif bool zink_screen_resource_init(struct pipe_screen *pscreen); void zink_context_resource_init(struct pipe_context *pctx); - +void +zink_screen_buffer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptrans); void zink_get_depth_stencil_resources(struct pipe_resource *res, struct zink_resource **out_z, @@ -189,20 +72,36 @@ zink_resource_object_reference(struct zink_screen *screen, if (dst) *dst = src; } -VkBuffer -zink_resource_tmp_buffer(struct zink_screen *screen, struct zink_resource *res, unsigned offset_add, unsigned add_binds, unsigned *offset); - bool zink_resource_object_init_storage(struct zink_context *ctx, struct zink_resource *res); +bool +zink_resource_object_init_mutable(struct zink_context *ctx, struct zink_resource *res); -static inline bool +VkDeviceAddress +zink_resource_get_address(struct zink_screen *screen, struct zink_resource *res); + +static ALWAYS_INLINE bool zink_resource_has_binds(const struct zink_resource *res) { return res->all_binds > 0; } -#ifndef __cplusplus +static ALWAYS_INLINE bool +zink_is_swapchain(const struct zink_resource *res) +{ + return res->swapchain; +} + +bool +zink_resource_copy_box_intersects(struct zink_resource *res, unsigned level, const struct pipe_box *box); +void +zink_resource_copy_box_add(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box); +void +zink_resource_copies_reset(struct zink_resource *res); + +#include "zink_batch.h" #include "zink_bo.h" +#include "zink_kopper.h" static inline bool zink_resource_usage_is_unflushed(const struct zink_resource *res) @@ -213,7 +112,7 @@ zink_resource_usage_is_unflushed(const struct zink_resource *res) static inline bool zink_resource_usage_is_unflushed_write(const struct zink_resource *res) { - return zink_batch_usage_is_unflushed(res->obj->bo->writes); + return zink_batch_usage_is_unflushed(res->obj->bo->writes.u); } @@ -241,6 +140,18 @@ zink_resource_usage_check_completion(struct zink_screen *screen, struct zink_res return zink_bo_usage_check_completion(screen, res->obj->bo, access); } +static inline bool +zink_resource_usage_check_completion_fast(struct zink_screen *screen, struct zink_resource *res, enum zink_resource_access access) +{ + return zink_bo_usage_check_completion_fast(screen, res->obj->bo, access); +} + +static inline void +zink_resource_usage_try_wait(struct zink_context *ctx, struct zink_resource *res, enum zink_resource_access access) +{ + zink_bo_usage_try_wait(ctx, res->obj->bo, access); +} + static inline void zink_resource_usage_wait(struct zink_context *ctx, struct zink_resource *res, enum zink_resource_access access) { @@ -251,6 +162,7 @@ static inline void zink_resource_usage_set(struct zink_resource *res, struct zink_batch_state *bs, bool write) { zink_bo_usage_set(res->obj->bo, bs, write); + res->obj->unsync_access = false; } static inline bool @@ -259,5 +171,31 @@ zink_resource_object_usage_unset(struct zink_resource_object *obj, struct zink_b return zink_bo_usage_unset(obj->bo, bs); } +static inline void +zink_batch_resource_usage_set(struct zink_batch *batch, struct zink_resource *res, bool write, bool is_buffer) +{ + if (!is_buffer) { + if (res->obj->dt) { + VkSemaphore acquire = zink_kopper_acquire_submit(zink_screen(batch->state->ctx->base.screen), res); + if (acquire) + util_dynarray_append(&batch->state->acquires, VkSemaphore, acquire); + } + if (write) { + if (!res->valid && res->fb_bind_count) + batch->state->ctx->rp_loadop_changed = true; + res->valid = true; + } + } + zink_resource_usage_set(res, batch->state, write); + + batch->has_work = true; +} + +void +zink_debug_mem_print_stats(struct zink_screen *screen); + +#ifdef __cplusplus +} #endif + #endif diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index b40f1e39387..ae33ac21f11 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -23,44 +23,75 @@ #include "zink_screen.h" +#include "zink_kopper.h" #include "zink_compiler.h" #include "zink_context.h" -#include "zink_device_info.h" #include "zink_descriptors.h" #include "zink_fence.h" +#include "vk_format.h" #include "zink_format.h" #include "zink_framebuffer.h" -#include "zink_instance.h" #include "zink_program.h" #include "zink_public.h" +#include "zink_query.h" #include "zink_resource.h" +#include "zink_state.h" #include "nir_to_spirv/nir_to_spirv.h" // for SPIRV_VERSION -#include "os/os_process.h" #include "util/u_debug.h" -#include "util/format/u_format.h" -#include "util/hash_table.h" +#include "util/u_dl.h" #include "util/os_file.h" -#include "util/u_math.h" #include "util/u_memory.h" #include "util/u_screen.h" #include "util/u_string.h" +#include "util/perf/u_trace.h" #include "util/u_transfer_helper.h" +#include "util/hex.h" #include "util/xmlconfig.h" #include "util/u_cpu_detect.h" -#include "frontend/sw_winsys.h" +#ifdef HAVE_LIBDRM +#include <xf86drm.h> +#include <fcntl.h> +#include <sys/stat.h> +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#endif +#ifdef MAJOR_IN_SYSMACROS +#include <sys/sysmacros.h> +#endif +#endif + +static int num_screens = 0; +bool zink_tracing = false; #if DETECT_OS_WINDOWS #include <io.h> +#define VK_LIBNAME "vulkan-1.dll" #else #include <unistd.h> +#if DETECT_OS_APPLE +#define VK_LIBNAME "libvulkan.1.dylib" +#elif DETECT_OS_ANDROID +#define VK_LIBNAME "libvulkan.so" +#else +#define VK_LIBNAME "libvulkan.so.1" +#endif #endif -#if defined(__APPLE__) +#ifdef __APPLE__ +#include "MoltenVK/mvk_vulkan.h" // Source of MVK_VERSION -#include "MoltenVK/vk_mvk_moltenvk.h" +#include "MoltenVK/mvk_config.h" +#define VK_NO_PROTOTYPES +#include "MoltenVK/mvk_deprecated_api.h" +#include "MoltenVK/mvk_private_api.h" +#endif /* __APPLE__ */ + +#ifdef HAVE_LIBDRM +#include "drm-uapi/dma-buf.h" +#include <xf86drm.h> #endif static const struct debug_named_value @@ -69,6 +100,25 @@ zink_debug_options[] = { { "spirv", ZINK_DEBUG_SPIRV, "Dump SPIR-V during program compile" }, { "tgsi", ZINK_DEBUG_TGSI, "Dump TGSI during program compile" }, { "validation", ZINK_DEBUG_VALIDATION, "Dump Validation layer output" }, + { "vvl", ZINK_DEBUG_VALIDATION, "Dump Validation layer output" }, + { "sync", ZINK_DEBUG_SYNC, "Force synchronization before draws/dispatches" }, + { "compact", ZINK_DEBUG_COMPACT, "Use only 4 descriptor sets" }, + { "noreorder", ZINK_DEBUG_NOREORDER, "Do not reorder command streams" }, + { "gpl", ZINK_DEBUG_GPL, "Force using Graphics Pipeline Library for all shaders" }, + { "shaderdb", ZINK_DEBUG_SHADERDB, "Do stuff to make shader-db work" }, + { "rp", ZINK_DEBUG_RP, "Enable renderpass tracking/optimizations" }, + { "norp", ZINK_DEBUG_NORP, "Disable renderpass tracking/optimizations" }, + { "map", ZINK_DEBUG_MAP, "Track amount of mapped VRAM" }, + { "flushsync", ZINK_DEBUG_FLUSHSYNC, "Force synchronous flushes/presents" }, + { "noshobj", ZINK_DEBUG_NOSHOBJ, "Disable EXT_shader_object" }, + { "optimal_keys", ZINK_DEBUG_OPTIMAL_KEYS, "Debug/use optimal_keys" }, + { "noopt", ZINK_DEBUG_NOOPT, "Disable async optimized pipeline compiles" }, + { "nobgc", ZINK_DEBUG_NOBGC, "Disable all async pipeline compiles" }, + { "dgc", ZINK_DEBUG_DGC, "Use DGC (driver testing only)" }, + { "mem", ZINK_DEBUG_MEM, "Debug memory allocations" }, + { "quiet", ZINK_DEBUG_QUIET, "Suppress warnings" }, + { "ioopt", ZINK_DEBUG_IOOPT, "Optimize IO" }, + { "nopc", ZINK_DEBUG_NOPC, "No precompilation" }, DEBUG_NAMED_VALUE_END }; @@ -82,17 +132,18 @@ static const struct debug_named_value zink_descriptor_options[] = { { "auto", ZINK_DESCRIPTOR_MODE_AUTO, "Automatically detect best mode" }, { "lazy", ZINK_DESCRIPTOR_MODE_LAZY, "Don't cache, do least amount of updates" }, - { "nofallback", ZINK_DESCRIPTOR_MODE_NOFALLBACK, "Cache, never use lazy fallback" }, - { "notemplates", ZINK_DESCRIPTOR_MODE_NOTEMPLATES, "Cache, but disable templated updates" }, + { "db", ZINK_DESCRIPTOR_MODE_DB, "Use descriptor buffers" }, DEBUG_NAMED_VALUE_END }; DEBUG_GET_ONCE_FLAGS_OPTION(zink_descriptor_mode, "ZINK_DESCRIPTORS", zink_descriptor_options, ZINK_DESCRIPTOR_MODE_AUTO) +enum zink_descriptor_mode zink_descriptor_mode; + static const char * zink_get_vendor(struct pipe_screen *pscreen) { - return "Collabora Ltd"; + return "Mesa"; } static const char * @@ -108,23 +159,85 @@ static const char * zink_get_name(struct pipe_screen *pscreen) { struct zink_screen *screen = zink_screen(pscreen); + const char *driver_name = vk_DriverId_to_str(screen->info.driver_props.driverID) + strlen("VK_DRIVER_ID_"); static char buf[1000]; - snprintf(buf, sizeof(buf), "zink (%s)", screen->info.props.deviceName); + snprintf(buf, sizeof(buf), "zink Vulkan %d.%d(%s (%s))", + VK_VERSION_MAJOR(screen->info.device_version), + VK_VERSION_MINOR(screen->info.device_version), + screen->info.props.deviceName, + strstr(vk_DriverId_to_str(screen->info.driver_props.driverID), "VK_DRIVER_ID_") ? driver_name : "Driver Unknown" + ); return buf; } +static void +zink_get_driver_uuid(struct pipe_screen *pscreen, char *uuid) +{ + struct zink_screen *screen = zink_screen(pscreen); + if (screen->vk_version >= VK_MAKE_VERSION(1,2,0)) { + memcpy(uuid, screen->info.props11.driverUUID, VK_UUID_SIZE); + } else { + memcpy(uuid, screen->info.deviceid_props.driverUUID, VK_UUID_SIZE); + } +} + +static void +zink_get_device_uuid(struct pipe_screen *pscreen, char *uuid) +{ + struct zink_screen *screen = zink_screen(pscreen); + if (screen->vk_version >= VK_MAKE_VERSION(1,2,0)) { + memcpy(uuid, screen->info.props11.deviceUUID, VK_UUID_SIZE); + } else { + memcpy(uuid, screen->info.deviceid_props.deviceUUID, VK_UUID_SIZE); + } +} + +static void +zink_get_device_luid(struct pipe_screen *pscreen, char *luid) +{ + struct zink_screen *screen = zink_screen(pscreen); + if (screen->info.have_vulkan12) { + memcpy(luid, screen->info.props11.deviceLUID, VK_LUID_SIZE); + } else { + memcpy(luid, screen->info.deviceid_props.deviceLUID, VK_LUID_SIZE); + } +} + static uint32_t -hash_framebuffer_state(const void *key) +zink_get_device_node_mask(struct pipe_screen *pscreen) +{ + struct zink_screen *screen = zink_screen(pscreen); + if (screen->info.have_vulkan12) { + return screen->info.props11.deviceNodeMask; + } else { + return screen->info.deviceid_props.deviceNodeMask; + } +} + +static void +zink_set_max_shader_compiler_threads(struct pipe_screen *pscreen, unsigned max_threads) { - struct zink_framebuffer_state* s = (struct zink_framebuffer_state*)key; - return _mesa_hash_data(key, offsetof(struct zink_framebuffer_state, attachments) + sizeof(s->attachments[0]) * s->num_attachments); + struct zink_screen *screen = zink_screen(pscreen); + util_queue_adjust_num_threads(&screen->cache_get_thread, max_threads, false); } static bool -equals_framebuffer_state(const void *a, const void *b) +zink_is_parallel_shader_compilation_finished(struct pipe_screen *screen, void *shader, enum pipe_shader_type shader_type) { - struct zink_framebuffer_state *s = (struct zink_framebuffer_state*)a; - return memcmp(a, b, offsetof(struct zink_framebuffer_state, attachments) + sizeof(s->attachments[0]) * s->num_attachments) == 0; + if (shader_type == MESA_SHADER_COMPUTE) { + struct zink_program *pg = shader; + return !pg->can_precompile || util_queue_fence_is_signalled(&pg->cache_fence); + } + + struct zink_shader *zs = shader; + if (!util_queue_fence_is_signalled(&zs->precompile.fence)) + return false; + bool finished = true; + set_foreach(zs->programs, entry) { + struct zink_gfx_program *prog = (void*)entry->key; + finished &= util_queue_fence_is_signalled(&prog->base.cache_fence); + } + return finished; } static VkDeviceSize @@ -139,19 +252,87 @@ get_video_mem(struct zink_screen *screen) return size; } -static void +/** + * Creates the disk cache used by mesa/st frontend for caching the GLSL -> NIR + * path. + * + * The output that gets stored in the frontend's cache is the result of + * zink_shader_finalize(). So, our sha1 cache key here needs to include + * everything that would change the NIR we generate from a given set of GLSL + * source, including our driver build, the Vulkan device and driver (which could + * affect the pipe caps we show the frontend), and any debug flags that change + * codegen. + * + * This disk cache also gets used by zink itself for storing its output from NIR + * -> SPIRV translation. + */ +static bool disk_cache_init(struct zink_screen *screen) { + if (zink_debug & ZINK_DEBUG_SHADERDB) + return true; + #ifdef ENABLE_SHADER_CACHE - static char buf[1000]; - snprintf(buf, sizeof(buf), "zink_%x04x", screen->info.props.vendorID); + struct mesa_sha1 ctx; + _mesa_sha1_init(&ctx); + +#ifdef HAVE_DL_ITERATE_PHDR + /* Hash in the zink driver build. */ + const struct build_id_note *note = + build_id_find_nhdr_for_addr(disk_cache_init); + unsigned build_id_len = build_id_length(note); + assert(note && build_id_len == 20); /* sha1 */ + _mesa_sha1_update(&ctx, build_id_data(note), build_id_len); +#endif + + /* Hash in the Vulkan pipeline cache UUID to identify the combination of + * vulkan device and driver (or any inserted layer that would invalidate our + * cached pipelines). + * + * "Although they have identical descriptions, VkPhysicalDeviceIDProperties + * ::deviceUUID may differ from + * VkPhysicalDeviceProperties2::pipelineCacheUUID. The former is intended to + * identify and correlate devices across API and driver boundaries, while the + * latter is used to identify a compatible device and driver combination to + * use when serializing and de-serializing pipeline state." + */ + _mesa_sha1_update(&ctx, screen->info.props.pipelineCacheUUID, VK_UUID_SIZE); + + /* Hash in our debug flags that affect NIR generation as of finalize_nir */ + unsigned shader_debug_flags = zink_debug & ZINK_DEBUG_COMPACT; + _mesa_sha1_update(&ctx, &shader_debug_flags, sizeof(shader_debug_flags)); + + /* Some of the driconf options change shaders. Let's just hash the whole + * thing to not forget any (especially as options get added). + */ + _mesa_sha1_update(&ctx, &screen->driconf, sizeof(screen->driconf)); - screen->disk_cache = disk_cache_create(buf, screen->info.props.deviceName, 0); - if (screen->disk_cache) { - util_queue_init(&screen->cache_put_thread, "zcq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL, screen); - util_queue_init(&screen->cache_get_thread, "zcfq", 8, 4, UTIL_QUEUE_INIT_RESIZE_IF_FULL, screen); + /* EXT_shader_object causes different descriptor layouts for separate shaders */ + _mesa_sha1_update(&ctx, &screen->info.have_EXT_shader_object, sizeof(screen->info.have_EXT_shader_object)); + + /* Finish the sha1 and format it as text. */ + unsigned char sha1[20]; + _mesa_sha1_final(&ctx, sha1); + + char cache_id[20 * 2 + 1]; + mesa_bytes_to_hex(cache_id, sha1, 20); + + screen->disk_cache = disk_cache_create("zink", cache_id, 0); + + if (!screen->disk_cache) + return true; + + if (!util_queue_init(&screen->cache_put_thread, "zcq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL, screen)) { + mesa_loge("zink: Failed to create disk cache queue\n"); + + disk_cache_destroy(screen->disk_cache); + screen->disk_cache = NULL; + + return false; } #endif + + return true; } @@ -161,30 +342,45 @@ cache_put_job(void *data, void *gdata, int thread_index) struct zink_program *pg = data; struct zink_screen *screen = gdata; size_t size = 0; - if (VKSCR(GetPipelineCacheData)(screen->dev, pg->pipeline_cache, &size, NULL) != VK_SUCCESS) + u_rwlock_rdlock(&pg->pipeline_cache_lock); + VkResult result = VKSCR(GetPipelineCacheData)(screen->dev, pg->pipeline_cache, &size, NULL); + if (result != VK_SUCCESS) { + u_rwlock_rdunlock(&pg->pipeline_cache_lock); + mesa_loge("ZINK: vkGetPipelineCacheData failed (%s)", vk_Result_to_str(result)); return; - if (pg->pipeline_cache_size == size) + } + if (pg->pipeline_cache_size == size) { + u_rwlock_rdunlock(&pg->pipeline_cache_lock); return; + } void *pipeline_data = malloc(size); - if (!pipeline_data) + if (!pipeline_data) { + u_rwlock_rdunlock(&pg->pipeline_cache_lock); return; - if (VKSCR(GetPipelineCacheData)(screen->dev, pg->pipeline_cache, &size, pipeline_data) == VK_SUCCESS) { + } + result = VKSCR(GetPipelineCacheData)(screen->dev, pg->pipeline_cache, &size, pipeline_data); + u_rwlock_rdunlock(&pg->pipeline_cache_lock); + if (result == VK_SUCCESS) { pg->pipeline_cache_size = size; cache_key key; disk_cache_compute_key(screen->disk_cache, pg->sha1, sizeof(pg->sha1), key); disk_cache_put_nocopy(screen->disk_cache, key, pipeline_data, size, NULL); + } else { + mesa_loge("ZINK: vkGetPipelineCacheData failed (%s)", vk_Result_to_str(result)); } } void -zink_screen_update_pipeline_cache(struct zink_screen *screen, struct zink_program *pg) +zink_screen_update_pipeline_cache(struct zink_screen *screen, struct zink_program *pg, bool in_thread) { - util_queue_fence_init(&pg->cache_fence); - if (!screen->disk_cache) + if (!screen->disk_cache || !pg->pipeline_cache) return; - util_queue_add_job(&screen->cache_put_thread, pg, NULL, cache_put_job, NULL, 0); + if (in_thread) + cache_put_job(pg, screen, 0); + else if (util_queue_fence_is_signalled(&pg->cache_fence)) + util_queue_add_job(&screen->cache_put_thread, pg, &pg->cache_fence, cache_put_job, NULL, 0); } static void @@ -196,7 +392,7 @@ cache_get_job(void *data, void *gdata, int thread_index) VkPipelineCacheCreateInfo pcci; pcci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; pcci.pNext = NULL; - pcci.flags = screen->info.have_EXT_pipeline_creation_cache_control ? VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT : 0; + pcci.flags = screen->info.have_EXT_pipeline_creation_cache_control ? VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT : 0; pcci.initialDataSize = 0; pcci.pInitialData = NULL; @@ -204,18 +400,24 @@ cache_get_job(void *data, void *gdata, int thread_index) disk_cache_compute_key(screen->disk_cache, pg->sha1, sizeof(pg->sha1), key); pcci.pInitialData = disk_cache_get(screen->disk_cache, key, &pg->pipeline_cache_size); pcci.initialDataSize = pg->pipeline_cache_size; - VKSCR(CreatePipelineCache)(screen->dev, &pcci, NULL, &pg->pipeline_cache); + + VkResult res = VKSCR(CreatePipelineCache)(screen->dev, &pcci, NULL, &pg->pipeline_cache); + if (res != VK_SUCCESS) { + mesa_loge("ZINK: vkCreatePipelineCache failed (%s)", vk_Result_to_str(res)); + } free((void*)pcci.pInitialData); } void -zink_screen_get_pipeline_cache(struct zink_screen *screen, struct zink_program *pg) +zink_screen_get_pipeline_cache(struct zink_screen *screen, struct zink_program *pg, bool in_thread) { - util_queue_fence_init(&pg->cache_fence); if (!screen->disk_cache) return; - util_queue_add_job(&screen->cache_get_thread, pg, &pg->cache_fence, cache_get_job, NULL, 0); + if (in_thread) + cache_get_job(pg, screen, 0); + else + util_queue_add_job(&screen->cache_get_thread, pg, &pg->cache_fence, cache_get_job, NULL, 0); } static int @@ -231,7 +433,7 @@ zink_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, switch (param) { case PIPE_COMPUTE_CAP_ADDRESS_BITS: - RET((uint32_t []){ 32 }); + RET((uint32_t []){ 64 }); case PIPE_COMPUTE_CAP_IR_TARGET: if (ret) @@ -262,13 +464,21 @@ zink_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: RET((uint32_t []) { 1 }); - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: RET((uint32_t []) { screen->info.props11.subgroupSize }); case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: - case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: - case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + RET((uint64_t []) { screen->clamp_video_mem }); + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: + RET((uint64_t []) { screen->total_video_mem }); + + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + // no way in vulkan to retrieve this information. + RET((uint32_t []) { 1 }); + + case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: // XXX: I think these are for Clover... @@ -279,63 +489,131 @@ zink_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, } } +static uint32_t +get_smallest_buffer_heap(struct zink_screen *screen) +{ + enum zink_heap heaps[] = { + ZINK_HEAP_DEVICE_LOCAL, + ZINK_HEAP_DEVICE_LOCAL_VISIBLE, + ZINK_HEAP_HOST_VISIBLE_COHERENT, + ZINK_HEAP_HOST_VISIBLE_COHERENT + }; + unsigned size = UINT32_MAX; + for (unsigned i = 0; i < ARRAY_SIZE(heaps); i++) { + for (unsigned j = 0; j < screen->heap_count[i]; j++) { + unsigned heap_idx = screen->info.mem_props.memoryTypes[screen->heap_map[i][j]].heapIndex; + size = MIN2(screen->info.mem_props.memoryHeaps[heap_idx].size, size); + } + } + return size; +} + +static inline bool +have_fp32_filter_linear(struct zink_screen *screen) +{ + const VkFormat fp32_formats[] = { + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_D32_SFLOAT, + }; + for (int i = 0; i < ARRAY_SIZE(fp32_formats); ++i) { + VkFormatProperties props; + VKSCR(GetPhysicalDeviceFormatProperties)(screen->pdev, + fp32_formats[i], + &props); + if (((props.linearTilingFeatures | props.optimalTilingFeatures) & + (VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)) == + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) { + return false; + } + } + return true; +} + static int zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { struct zink_screen *screen = zink_screen(pscreen); switch (param) { + case PIPE_CAP_NULL_TEXTURES: + return screen->info.rb_image_feats.robustImageAccess; + case PIPE_CAP_TEXRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARTIAL_STRIDE: + return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return screen->info.feats.features.samplerAnisotropy; case PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART: return 1; case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART: { - uint32_t modes = BITFIELD_BIT(PIPE_PRIM_LINE_STRIP) | - BITFIELD_BIT(PIPE_PRIM_TRIANGLE_STRIP) | - BITFIELD_BIT(PIPE_PRIM_LINE_STRIP_ADJACENCY) | - BITFIELD_BIT(PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY); + uint32_t modes = BITFIELD_BIT(MESA_PRIM_LINE_STRIP) | + BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP) | + BITFIELD_BIT(MESA_PRIM_LINE_STRIP_ADJACENCY) | + BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP_ADJACENCY); if (screen->have_triangle_fans) - modes |= BITFIELD_BIT(PIPE_PRIM_TRIANGLE_FAN); + modes |= BITFIELD_BIT(MESA_PRIM_TRIANGLE_FAN); if (screen->info.have_EXT_primitive_topology_list_restart) { - modes |= BITFIELD_BIT(PIPE_PRIM_POINTS) | - BITFIELD_BIT(PIPE_PRIM_LINES) | - BITFIELD_BIT(PIPE_PRIM_TRIANGLES) | - BITFIELD_BIT(PIPE_PRIM_TRIANGLES_ADJACENCY); + modes |= BITFIELD_BIT(MESA_PRIM_POINTS) | + BITFIELD_BIT(MESA_PRIM_LINES) | + BITFIELD_BIT(MESA_PRIM_LINES_ADJACENCY) | + BITFIELD_BIT(MESA_PRIM_TRIANGLES) | + BITFIELD_BIT(MESA_PRIM_TRIANGLES_ADJACENCY); if (screen->info.list_restart_feats.primitiveTopologyPatchListRestart) - modes |= BITFIELD_BIT(PIPE_PRIM_PATCHES); + modes |= BITFIELD_BIT(MESA_PRIM_PATCHES); } return modes; } case PIPE_CAP_SUPPORTED_PRIM_MODES: { - uint32_t modes = BITFIELD_MASK(PIPE_PRIM_MAX); - modes &= ~BITFIELD_BIT(PIPE_PRIM_QUADS); - modes &= ~BITFIELD_BIT(PIPE_PRIM_QUAD_STRIP); - modes &= ~BITFIELD_BIT(PIPE_PRIM_POLYGON); - modes &= ~BITFIELD_BIT(PIPE_PRIM_LINE_LOOP); + uint32_t modes = BITFIELD_MASK(MESA_PRIM_COUNT); if (!screen->have_triangle_fans) - modes &= ~BITFIELD_BIT(PIPE_PRIM_TRIANGLE_FAN); + modes &= ~BITFIELD_BIT(MESA_PRIM_QUADS); + modes &= ~BITFIELD_BIT(MESA_PRIM_QUAD_STRIP); + modes &= ~BITFIELD_BIT(MESA_PRIM_POLYGON); + modes &= ~BITFIELD_BIT(MESA_PRIM_LINE_LOOP); + if (!screen->have_triangle_fans) + modes &= ~BITFIELD_BIT(MESA_PRIM_TRIANGLE_FAN); return modes; } case PIPE_CAP_FBFETCH: return 1; - + case PIPE_CAP_FBFETCH_COHERENT: + return screen->info.have_EXT_rasterization_order_attachment_access; + + case PIPE_CAP_MEMOBJ: + return screen->instance_info.have_KHR_external_memory_capabilities && (screen->info.have_KHR_external_memory_fd || screen->info.have_KHR_external_memory_win32); + case PIPE_CAP_FENCE_SIGNAL: + return screen->info.have_KHR_external_semaphore_fd || screen->info.have_KHR_external_semaphore_win32; + case PIPE_CAP_NATIVE_FENCE_FD: + return screen->instance_info.have_KHR_external_semaphore_capabilities && screen->info.have_KHR_external_semaphore_fd; + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + return screen->info.have_EXT_external_memory_host; + + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + return screen->info.have_vulkan11 || screen->info.have_KHR_maintenance2; + + case PIPE_CAP_VALIDATE_ALL_DIRTY_STATES: + case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: + case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE: + case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_QUERY_MEMORY_INFO: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: - case PIPE_CAP_CLEAR_TEXTURE: case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: - case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: - case PIPE_CAP_TGSI_ARRAY_COMPONENTS: + case PIPE_CAP_SHADER_ARRAY_COMPONENTS: case PIPE_CAP_QUERY_BUFFER_OBJECT: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_CLIP_HALFZ: - case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_TEXTURE_QUERY_SAMPLES: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_QUERY_SO_OVERFLOW: case PIPE_CAP_GL_SPIRV: @@ -343,20 +621,36 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0: case PIPE_CAP_PACKED_UNIFORMS: - case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_SHADER_PACK_HALF_FLOAT: + case PIPE_CAP_CULL_DISTANCE_NOCOMBINE: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_LOAD_CONSTBUF: + case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + case PIPE_CAP_ALLOW_GLTHREAD_BUFFER_SUBDATA_OPT: return 1; - case PIPE_CAP_DRAW_PARAMETERS: - return screen->info.feats11.shaderDrawParameters || screen->info.have_KHR_shader_draw_parameters; + case PIPE_CAP_DRAW_VERTEX_STATE: + return screen->info.have_EXT_vertex_input_dynamic_state; - case PIPE_CAP_TGSI_VOTE: - return screen->spirv_version >= SPIRV_VERSION(1, 3); + case PIPE_CAP_SURFACE_SAMPLE_COUNT: + return screen->vk_version >= VK_MAKE_VERSION(1,2,0); + case PIPE_CAP_SHADER_GROUP_VOTE: + if (screen->info.have_vulkan11 && + (screen->info.subgroup.supportedOperations & VK_SUBGROUP_FEATURE_VOTE_BIT) && + (screen->info.subgroup.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT)) + return true; + if (screen->info.have_EXT_shader_subgroup_vote) + return true; + return false; case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: - return screen->info.have_EXT_provoking_vertex; + return 1; case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: - return screen->info.have_KHR_sampler_mirror_clamp_to_edge; + return screen->info.have_KHR_sampler_mirror_clamp_to_edge || (screen->info.have_vulkan12 && screen->info.feats12.samplerMirrorClampToEdge); + + case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: + return 1; case PIPE_CAP_POLYGON_OFFSET_CLAMP: return screen->info.feats.features.depthBiasClamp; @@ -365,15 +659,26 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return screen->info.feats.features.pipelineStatisticsQuery; case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: - return screen->info.feats.features.robustBufferAccess; + return screen->info.feats.features.robustBufferAccess && + (screen->info.rb2_feats.robustImageAccess2 || screen->driver_workarounds.lower_robustImageAccess2); case PIPE_CAP_MULTI_DRAW_INDIRECT: return screen->info.feats.features.multiDrawIndirect; + case PIPE_CAP_IMAGE_ATOMIC_FLOAT_ADD: + return (screen->info.have_EXT_shader_atomic_float && + screen->info.atomic_float_feats.shaderSharedFloat32AtomicAdd && + screen->info.atomic_float_feats.shaderBufferFloat32AtomicAdd); + case PIPE_CAP_SHADER_ATOMIC_INT64: + return (screen->info.have_KHR_shader_atomic_int64 && + screen->info.atomic_int_feats.shaderSharedInt64Atomics && + screen->info.atomic_int_feats.shaderBufferInt64Atomics); + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: return screen->info.have_KHR_draw_indirect_count; case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_DRAW_PARAMETERS: return (screen->info.have_vulkan12 && screen->info.feats11.shaderDrawParameters) || screen->info.have_KHR_shader_draw_parameters; @@ -383,8 +688,10 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MAX_VERTEX_STREAMS: return screen->info.tf_props.maxTransformFeedbackStreams; + case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES: + return screen->info.have_NV_compute_shader_derivatives; + case PIPE_CAP_INT64: - case PIPE_CAP_INT64_DIVMOD: case PIPE_CAP_DOUBLES: return 1; @@ -411,14 +718,22 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK: return screen->info.have_EXT_fragment_shader_interlock; - case PIPE_CAP_TGSI_CLOCK: + case PIPE_CAP_SHADER_CLOCK: return screen->info.have_KHR_shader_clock; - case PIPE_CAP_POINT_SPRITE: - return 1; + case PIPE_CAP_SHADER_BALLOT: + if (screen->info.props11.subgroupSize > 64) + return false; + if (screen->info.have_vulkan11 && + screen->info.subgroup.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) + return true; + if (screen->info.have_EXT_shader_subgroup_ballot) + return true; + return false; - case PIPE_CAP_TGSI_BALLOT: - return screen->vk_version >= VK_MAKE_VERSION(1,2,0) && screen->info.props11.subgroupSize <= 64; + case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION: + return screen->spirv_version >= SPIRV_VERSION(1, 6) || + screen->info.have_EXT_shader_demote_to_helper_invocation; case PIPE_CAP_SAMPLE_SHADING: return screen->info.feats.features.sampleRateShading; @@ -426,20 +741,33 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_SWIZZLE: return 1; + case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY: + return 1; + case PIPE_CAP_GL_CLAMP: return 0; - case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: - /* This is also broken on the other AMD drivers for old HW, but - * there's no obvious way to test for that. + case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF: + return 0; /* Assume that the vk driver is capable of moving imm arrays to some sort of constant storage on its own. */ + + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: { + enum pipe_quirk_texture_border_color_swizzle quirk = PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_ALPHA_NOT_W; + if (!screen->info.border_color_feats.customBorderColorWithoutFormat) + return quirk | PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO; + /* assume that if drivers don't implement this extension they either: + * - don't support custom border colors + * - handle things correctly + * - hate border color accuracy */ - if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV || - screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY) - return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50; - return 0; + if (screen->info.have_EXT_border_color_swizzle && + !screen->info.border_swizzle_feats.borderColorSwizzleFromImage) + return quirk | PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50; + return quirk; + } case PIPE_CAP_MAX_TEXTURE_2D_SIZE: - return screen->info.props.limits.maxImageDimension2D; + return MIN2(screen->info.props.limits.maxImageDimension1D, + screen->info.props.limits.maxImageDimension2D); case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return 1 + util_logbase2(screen->info.props.limits.maxImageDimension3D); case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: @@ -447,7 +775,6 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD: case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES: - case PIPE_CAP_VERTEX_SHADER_SATURATE: return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: @@ -455,6 +782,9 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: return screen->info.feats.features.independentBlend; + case PIPE_CAP_DITHERING: + return 0; + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: return screen->info.have_EXT_transform_feedback ? screen->info.tf_props.maxTransformFeedbackBuffers : 0; case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: @@ -465,13 +795,12 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return screen->info.props.limits.maxImageArrayLayers; case PIPE_CAP_DEPTH_CLIP_DISABLE: - return screen->info.feats.features.depthClamp; + return screen->info.have_EXT_depth_clip_enable; case PIPE_CAP_SHADER_STENCIL_EXPORT: return screen->info.have_EXT_shader_stencil_export; - case PIPE_CAP_TGSI_INSTANCEID: - case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_VS_INSTANCEID: case PIPE_CAP_SEAMLESS_CUBE_MAP: return 1; @@ -497,11 +826,16 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return screen->info.props.limits.minUniformBufferOffsetAlignment; case PIPE_CAP_QUERY_TIMESTAMP: - return screen->info.have_EXT_calibrated_timestamps && - screen->timestamp_valid_bits > 0; + return screen->timestamp_valid_bits > 0; + + case PIPE_CAP_QUERY_TIMESTAMP_BITS: + return screen->timestamp_valid_bits; + + case PIPE_CAP_TIMER_RESOLUTION: + return ceil(screen->info.props.limits.timestampPeriod); case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: - return screen->info.props.limits.minMemoryMapAlignment; + return 1 << MIN_SLAB_ORDER; case PIPE_CAP_CUBE_MAP_ARRAY: return screen->info.feats.features.imageCubeArray; @@ -510,14 +844,30 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PRIMITIVE_RESTART: return 1; + case PIPE_CAP_BINDLESS_TEXTURE: + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && + (screen->info.db_props.maxDescriptorBufferBindings < 2 || screen->info.db_props.maxSamplerDescriptorBufferBindings < 2)) + return 0; + return screen->info.have_EXT_descriptor_indexing; + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: return screen->info.props.limits.minTexelBufferOffsetAlignment; - case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: - return 1; + case PIPE_CAP_TEXTURE_TRANSFER_MODES: { + enum pipe_texture_transfer_mode mode = PIPE_TEXTURE_TRANSFER_BLIT; + if (!screen->is_cpu && + /* this needs substantial perf tuning */ + screen->info.driver_props.driverID != VK_DRIVER_ID_MESA_TURNIP && + screen->info.have_KHR_8bit_storage && + screen->info.have_KHR_16bit_storage && + screen->info.have_KHR_shader_float16_int8) + mode |= PIPE_TEXTURE_TRANSFER_COMPUTE; + return mode; + } - case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: - return screen->info.props.limits.maxTexelBufferElements; + case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT: + return MIN2(get_smallest_buffer_heap(screen), + screen->info.props.limits.maxTexelBufferElements); case PIPE_CAP_ENDIANNESS: return PIPE_ENDIAN_NATIVE; /* unsure */ @@ -528,6 +878,9 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_IMAGE_LOAD_FORMATTED: return screen->info.feats.features.shaderStorageImageReadWithoutFormat; + case PIPE_CAP_IMAGE_STORE_FORMATTED: + return screen->info.feats.features.shaderStorageImageWriteWithoutFormat; + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: return 1; @@ -545,9 +898,13 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return screen->info.props.limits.maxTexelGatherOffset; case PIPE_CAP_SAMPLER_REDUCTION_MINMAX_ARB: - return screen->vk_version >= VK_MAKE_VERSION(1,2,0) || screen->info.have_EXT_sampler_filter_minmax; + return screen->info.feats12.samplerFilterMinmax || screen->info.have_EXT_sampler_filter_minmax; - case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_OPENCL_INTEGER_FUNCTIONS: + case PIPE_CAP_INTEGER_MULTIPLY_32X16: + return screen->info.have_INTEL_shader_integer_functions2; + + case PIPE_CAP_FS_FINE_DERIVATIVE: return 1; case PIPE_CAP_VENDOR_ID: @@ -556,7 +913,7 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return screen->info.props.deviceID; case PIPE_CAP_ACCELERATED: - return 1; + return !screen->is_cpu; case PIPE_CAP_VIDEO_MEMORY: return get_video_mem(screen) >> 20; case PIPE_CAP_UMA: @@ -568,14 +925,16 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SAMPLER_VIEW_TARGET: return 1; - case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: - case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: + case PIPE_CAP_VS_LAYER_VIEWPORT: + case PIPE_CAP_TES_LAYER_VIEWPORT: return screen->info.have_EXT_shader_viewport_index_layer || (screen->spirv_version >= SPIRV_VERSION(1, 5) && screen->info.feats12.shaderOutputLayer && screen->info.feats12.shaderOutputViewportIndex); case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + return have_fp32_filter_linear(screen); + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: return 1; @@ -592,15 +951,31 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return screen->info.feats.features.shaderCullDistance; case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: - - return screen->info.feats.features.sparseBinding ? ZINK_SPARSE_BUFFER_PAGE_SIZE : 0; + return screen->info.feats.features.sparseResidencyBuffer ? ZINK_SPARSE_BUFFER_PAGE_SIZE : 0; + + /* Sparse texture */ + case PIPE_CAP_MAX_SPARSE_TEXTURE_SIZE: + return screen->info.feats.features.sparseResidencyImage2D ? + zink_get_param(pscreen, PIPE_CAP_MAX_TEXTURE_2D_SIZE) : 0; + case PIPE_CAP_MAX_SPARSE_3D_TEXTURE_SIZE: + return screen->info.feats.features.sparseResidencyImage3D ? + (1 << (zink_get_param(pscreen, PIPE_CAP_MAX_TEXTURE_3D_LEVELS) - 1)) : 0; + case PIPE_CAP_MAX_SPARSE_ARRAY_TEXTURE_LAYERS: + return screen->info.feats.features.sparseResidencyImage2D ? + zink_get_param(pscreen, PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS) : 0; + case PIPE_CAP_SPARSE_TEXTURE_FULL_ARRAY_CUBE_MIPMAPS: + return screen->info.feats.features.sparseResidencyImage2D ? 1 : 0; + case PIPE_CAP_QUERY_SPARSE_TEXTURE_RESIDENCY: + return screen->info.feats.features.sparseResidency2Samples && + screen->info.feats.features.shaderResourceResidency ? 1 : 0; + case PIPE_CAP_CLAMP_SPARSE_TEXTURE_LOD: + return screen->info.feats.features.shaderResourceMinLod && + screen->info.feats.features.sparseResidency2Samples && + screen->info.feats.features.shaderResourceResidency ? 1 : 0; case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: return screen->info.props.limits.viewportSubPixelBits; - case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: - return 0; /* not sure */ - case PIPE_CAP_MAX_GS_INVOCATIONS: return screen->info.props.limits.maxGeometryShaderInvocations; @@ -608,44 +983,51 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) /* gallium handles this automatically */ return 0; - case PIPE_CAP_MAX_SHADER_BUFFER_SIZE: + case PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT: /* 1<<27 is required by VK spec */ assert(screen->info.props.limits.maxStorageBufferRange >= 1 << 27); - /* but Gallium can't handle values that are too big, so clamp to VK spec minimum */ - return 1 << 27; + /* clamp to VK spec minimum */ + return MIN2(get_smallest_buffer_heap(screen), screen->info.props.limits.maxStorageBufferRange); - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER: return 0; - case PIPE_CAP_NIR_COMPACT_ARRAYS: - return 1; - - case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_FS_POINT_IS_SYSVAL: return 1; case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED: return 1; + case PIPE_CAP_POINT_SIZE_FIXED: + return screen->info.have_KHR_maintenance5 ? PIPE_POINT_SIZE_LOWER_USER_ONLY : PIPE_POINT_SIZE_LOWER_ALWAYS; case PIPE_CAP_FLATSHADE: case PIPE_CAP_ALPHA_TEST: case PIPE_CAP_CLIP_PLANES: - case PIPE_CAP_POINT_SIZE_FIXED: case PIPE_CAP_TWO_SIDED_COLOR: return 0; case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: - return screen->info.props.limits.maxTessellationControlPerVertexOutputComponents / 4; + return screen->info.props.limits.maxTessellationControlPerPatchOutputComponents / 4; case PIPE_CAP_MAX_VARYINGS: /* need to reserve up to 60 of our varying components and 16 slots for streamout */ return MIN2(screen->info.props.limits.maxVertexOutputComponents / 4 / 2, 16); case PIPE_CAP_DMABUF: - return screen->info.have_KHR_external_memory_fd && screen->info.have_EXT_external_memory_dma_buf && screen->info.have_EXT_queue_family_foreign; +#if defined(HAVE_LIBDRM) && (DETECT_OS_LINUX || DETECT_OS_BSD) + return screen->info.have_KHR_external_memory_fd && + screen->info.have_EXT_external_memory_dma_buf && + screen->info.have_EXT_queue_family_foreign + ? DRM_PRIME_CAP_IMPORT | DRM_PRIME_CAP_EXPORT + : 0; +#else + return 0; +#endif case PIPE_CAP_DEPTH_BOUNDS_TEST: return screen->info.feats.features.depthBounds; @@ -667,14 +1049,38 @@ zink_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) struct zink_screen *screen = zink_screen(pscreen); switch (param) { + case PIPE_CAPF_MIN_LINE_WIDTH: + case PIPE_CAPF_MIN_LINE_WIDTH_AA: + if (!screen->info.feats.features.wideLines) + return 1.0f; + return MAX2(screen->info.props.limits.lineWidthRange[0], 0.01); + + case PIPE_CAPF_MIN_POINT_SIZE: + case PIPE_CAPF_MIN_POINT_SIZE_AA: + if (!screen->info.feats.features.largePoints) + return 1.0f; + return MAX2(screen->info.props.limits.pointSizeRange[0], 0.01); + + + case PIPE_CAPF_LINE_WIDTH_GRANULARITY: + if (!screen->info.feats.features.wideLines) + return 0.1f; + return screen->info.props.limits.lineWidthGranularity; + + case PIPE_CAPF_POINT_SIZE_GRANULARITY: + if (!screen->info.feats.features.largePoints) + return 0.1f; + return screen->info.props.limits.pointSizeGranularity; + + case PIPE_CAPF_MAX_LINE_WIDTH: case PIPE_CAPF_MAX_LINE_WIDTH_AA: if (!screen->info.feats.features.wideLines) return 1.0f; return screen->info.props.limits.lineWidthRange[1]; - case PIPE_CAPF_MAX_POINT_WIDTH: - case PIPE_CAPF_MAX_POINT_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_SIZE: + case PIPE_CAPF_MAX_POINT_SIZE_AA: if (!screen->info.feats.features.largePoints) return 1.0f; return screen->info.props.limits.pointSizeRange[1]; @@ -699,7 +1105,7 @@ zink_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) static int zink_get_shader_param(struct pipe_screen *pscreen, - enum pipe_shader_type shader, + gl_shader_stage shader, enum pipe_shader_cap param) { struct zink_screen *screen = zink_screen(pscreen); @@ -707,22 +1113,22 @@ zink_get_shader_param(struct pipe_screen *pscreen, switch (param) { case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: switch (shader) { - case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_VERTEX: + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_VERTEX: return INT_MAX; - case PIPE_SHADER_TESS_CTRL: - case PIPE_SHADER_TESS_EVAL: + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: if (screen->info.feats.features.tessellationShader && screen->info.have_KHR_maintenance2) return INT_MAX; break; - case PIPE_SHADER_GEOMETRY: + case MESA_SHADER_GEOMETRY: if (screen->info.feats.features.geometryShader) return INT_MAX; break; - case PIPE_SHADER_COMPUTE: + case MESA_SHADER_COMPUTE: return INT_MAX; default: break; @@ -737,49 +1143,59 @@ zink_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_INPUTS: { uint32_t max = 0; switch (shader) { - case PIPE_SHADER_VERTEX: + case MESA_SHADER_VERTEX: max = MIN2(screen->info.props.limits.maxVertexInputAttributes, PIPE_MAX_ATTRIBS); break; - case PIPE_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_CTRL: max = screen->info.props.limits.maxTessellationControlPerVertexInputComponents / 4; break; - case PIPE_SHADER_TESS_EVAL: + case MESA_SHADER_TESS_EVAL: max = screen->info.props.limits.maxTessellationEvaluationInputComponents / 4; break; - case PIPE_SHADER_GEOMETRY: - max = screen->info.props.limits.maxGeometryInputComponents; + case MESA_SHADER_GEOMETRY: + max = screen->info.props.limits.maxGeometryInputComponents / 4; break; - case PIPE_SHADER_FRAGMENT: + case MESA_SHADER_FRAGMENT: /* intel drivers report fewer components, but it's a value that's compatible * with what we need for GL, so we can still force a conformant value here */ - if (screen->info.driver_props.driverID == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR || - screen->info.driver_props.driverID == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) + if (screen->info.driver_props.driverID == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA || + screen->info.driver_props.driverID == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || + (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_VENUS + && screen->info.props.vendorID == 0x8086)) return 32; max = screen->info.props.limits.maxFragmentInputComponents / 4; break; default: return 0; /* unsupported stage */ } + switch (shader) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + /* last vertex stage must support streamout, and this is capped in glsl compiler */ + return MIN2(max, MAX_VARYING); + default: break; + } return MIN2(max, 64); // prevent overflowing struct shader_info::inputs_read } case PIPE_SHADER_CAP_MAX_OUTPUTS: { uint32_t max = 0; switch (shader) { - case PIPE_SHADER_VERTEX: + case MESA_SHADER_VERTEX: max = screen->info.props.limits.maxVertexOutputComponents / 4; break; - case PIPE_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_CTRL: max = screen->info.props.limits.maxTessellationControlPerVertexOutputComponents / 4; break; - case PIPE_SHADER_TESS_EVAL: + case MESA_SHADER_TESS_EVAL: max = screen->info.props.limits.maxTessellationEvaluationOutputComponents / 4; break; - case PIPE_SHADER_GEOMETRY: + case MESA_SHADER_GEOMETRY: max = screen->info.props.limits.maxGeometryOutputComponents / 4; break; - case PIPE_SHADER_FRAGMENT: + case MESA_SHADER_FRAGMENT: max = screen->info.props.limits.maxColorAttachments; break; default: @@ -788,11 +1204,12 @@ zink_get_shader_param(struct pipe_screen *pscreen, return MIN2(max, 64); // prevent overflowing struct shader_info::outputs_read/written } - case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE: /* At least 16384 is guaranteed by VK spec */ assert(screen->info.props.limits.maxUniformBufferRange >= 16384); /* but Gallium can't handle values that are too big */ - return MIN2(screen->info.props.limits.maxUniformBufferRange, 1 << 31); + return MIN3(get_smallest_buffer_heap(screen), + screen->info.props.limits.maxUniformBufferRange, BITFIELD_BIT(31)); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return MIN2(screen->info.props.limits.maxPerStageDescriptorUniformBuffers, @@ -805,22 +1222,23 @@ zink_get_shader_param(struct pipe_screen *pscreen, return 1; case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 1; - + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + return 1; + case PIPE_SHADER_CAP_SUBROUTINES: case PIPE_SHADER_CAP_INT64_ATOMICS: case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS: return 0; /* not implemented */ case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: - return screen->info.feats11.uniformAndStorageBuffer16BitAccess || - (screen->info.have_KHR_16bit_storage && screen->info.storage_16bit_feats.uniformAndStorageBuffer16BitAccess); + //enabling this breaks GTF-GL46.gtf21.GL2Tests.glGetUniform.glGetUniform + //return screen->info.feats11.uniformAndStorageBuffer16BitAccess || + //(screen->info.have_KHR_16bit_storage && screen->info.storage_16bit_feats.uniformAndStorageBuffer16BitAccess); + return 0; case PIPE_SHADER_CAP_FP16_DERIVATIVES: - return screen->info.feats11.storageInputOutput16 || - (screen->info.have_KHR_16bit_storage && screen->info.storage_16bit_feats.storageInputOutput16); + return 0; //spirv requires 32bit derivative srcs and dests case PIPE_SHADER_CAP_FP16: return screen->info.feats12.shaderFloat16 || (screen->info.have_KHR_shader_float16_int8 && @@ -829,9 +1247,6 @@ zink_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_INT16: return screen->info.feats.features.shaderInt16; - case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_NIR; - case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 0; /* not implemented */ @@ -841,28 +1256,20 @@ zink_get_shader_param(struct pipe_screen *pscreen, screen->info.props.limits.maxPerStageDescriptorSampledImages), PIPE_MAX_SAMPLERS); - case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: - case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: - case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: - return 0; /* not implemented */ - case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; /* no idea */ - case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: - return 0; - case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: switch (shader) { - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_TESS_CTRL: - case PIPE_SHADER_TESS_EVAL: - case PIPE_SHADER_GEOMETRY: + case MESA_SHADER_VERTEX: + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: if (!screen->info.feats.features.vertexPipelineStoresAndAtomics) return 0; break; - case PIPE_SHADER_FRAGMENT: + case MESA_SHADER_FRAGMENT: if (!screen->info.feats.features.fragmentStoresAndAtomics) return 0; break; @@ -881,18 +1288,14 @@ zink_get_shader_param(struct pipe_screen *pscreen, if (screen->info.feats.features.shaderStorageImageExtendedFormats && screen->info.feats.features.shaderStorageImageWriteWithoutFormat) return MIN2(screen->info.props.limits.maxPerStageDescriptorStorageImages, - PIPE_MAX_SHADER_IMAGES); + ZINK_MAX_SHADER_IMAGES); return 0; - case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: - case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: - return 0; /* unsure */ - - case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: - case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 0; /* not implemented */ + case PIPE_SHADER_CAP_CONT_SUPPORTED: + return 1; } /* should only get here on unhandled cases */ @@ -916,6 +1319,23 @@ vk_sample_count_flags(uint32_t sample_count) } static bool +zink_is_compute_copy_faster(struct pipe_screen *pscreen, + enum pipe_format src_format, + enum pipe_format dst_format, + unsigned width, + unsigned height, + unsigned depth, + bool cpu) +{ + if (cpu) + /* very basic for now, probably even worse for some cases, + * but fixes lots of others + */ + return width * height * depth > 64 * 64; + return false; +} + +static bool zink_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, enum pipe_texture_target target, @@ -925,6 +1345,9 @@ zink_is_format_supported(struct pipe_screen *pscreen, { struct zink_screen *screen = zink_screen(pscreen); + if (storage_sample_count && !screen->info.feats.features.shaderStorageImageMultisample && bind & PIPE_BIND_SHADER_IMAGE) + return false; + if (format == PIPE_FORMAT_NONE) return screen->info.props.limits.framebufferNoAttachmentsSampleCounts & vk_sample_count_flags(sample_count); @@ -939,7 +1362,8 @@ zink_is_format_supported(struct pipe_screen *pscreen, return false; } - VkFormat vkformat = zink_get_format(screen, format); + /* always use superset to determine feature support */ + VkFormat vkformat = zink_get_format(screen, PIPE_FORMAT_A8_UNORM ? zink_format_get_emulated_alpha(format) : format); if (vkformat == VK_FORMAT_UNDEFINED) return false; @@ -984,9 +1408,80 @@ zink_is_format_supported(struct pipe_screen *pscreen, if (!(screen->info.props.limits.storageImageSampleCounts & sample_mask)) return false; } + VkResult ret; + VkImageFormatProperties image_props; + VkImageFormatProperties2 props2; + props2.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2; + props2.pNext = NULL; + VkPhysicalDeviceImageFormatInfo2 info; + info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2; + info.pNext = NULL; + info.format = vkformat; + info.flags = 0; + info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + info.tiling = VK_IMAGE_TILING_OPTIMAL; + switch (target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: { + bool need_2D = false; + if (util_format_is_depth_or_stencil(format)) + need_2D |= screen->need_2D_zs; + info.type = need_2D ? VK_IMAGE_TYPE_2D : VK_IMAGE_TYPE_1D; + break; + } + + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + FALLTHROUGH; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_RECT: + info.type = VK_IMAGE_TYPE_2D; + break; + + case PIPE_TEXTURE_3D: + info.type = VK_IMAGE_TYPE_3D; + if (bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) + info.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; + if (screen->info.have_EXT_image_2d_view_of_3d) + info.flags |= VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT; + break; + + default: + unreachable("unknown texture target"); + } + u_foreach_bit(b, bind) { + switch (1<<b) { + case PIPE_BIND_RENDER_TARGET: + info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + break; + case PIPE_BIND_DEPTH_STENCIL: + info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + break; + case PIPE_BIND_SAMPLER_VIEW: + info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + break; + } + } + + if (VKSCR(GetPhysicalDeviceImageFormatProperties2)) { + ret = VKSCR(GetPhysicalDeviceImageFormatProperties2)(screen->pdev, &info, &props2); + /* this is using VK_IMAGE_CREATE_EXTENDED_USAGE_BIT and can't be validated */ + if (vk_format_aspects(vkformat) & VK_IMAGE_ASPECT_PLANE_1_BIT) + ret = VK_SUCCESS; + image_props = props2.imageFormatProperties; + } else { + ret = VKSCR(GetPhysicalDeviceImageFormatProperties)(screen->pdev, vkformat, info.type, + info.tiling, info.usage, info.flags, &image_props); + } + if (ret != VK_SUCCESS) + return false; + if (!(sample_count & image_props.sampleCounts)) + return false; } - VkFormatProperties props = screen->format_props[format]; + struct zink_format_props props = screen->format_props[format]; if (target == PIPE_BUFFER) { if (bind & PIPE_BIND_VERTEX_BUFFER) { @@ -1041,14 +1536,23 @@ zink_is_format_supported(struct pipe_screen *pscreen, return false; } - if (util_format_is_compressed(format)) { - const struct util_format_description *desc = util_format_description(format); - if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC && - !screen->info.feats.features.textureCompressionBC) - return false; + return true; +} + +static void +zink_set_damage_region(struct pipe_screen *pscreen, struct pipe_resource *pres, unsigned int nrects, const struct pipe_box *rects) +{ + struct zink_resource *res = zink_resource(pres); + + for (unsigned i = 0; i < nrects; i++) { + int y = pres->height0 - rects[i].y - rects[i].height; + res->damage.extent.width = MAX2(res->damage.extent.width, rects[i].x + rects[i].width); + res->damage.extent.height = MAX2(res->damage.extent.height, y + rects[i].height); + res->damage.offset.x = MIN2(res->damage.offset.x, rects[i].x); + res->damage.offset.y = MIN2(res->damage.offset.y, y); } - return true; + res->use_damage = nrects > 0; } static void @@ -1056,95 +1560,207 @@ zink_destroy_screen(struct pipe_screen *pscreen) { struct zink_screen *screen = zink_screen(pscreen); +#ifdef HAVE_RENDERDOC_APP_H + if (screen->renderdoc_capture_all && p_atomic_dec_zero(&num_screens)) + screen->renderdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(screen->instance), NULL); +#endif + + hash_table_foreach(&screen->dts, entry) + zink_kopper_deinit_displaytarget(screen, entry->data); + + if (screen->copy_context) + screen->copy_context->base.destroy(&screen->copy_context->base); + + struct zink_batch_state *bs = screen->free_batch_states; + while (bs) { + struct zink_batch_state *bs_next = bs->next; + zink_batch_state_destroy(screen, bs); + bs = bs_next; + } + if (VK_NULL_HANDLE != screen->debugUtilsCallbackHandle) { VKSCR(DestroyDebugUtilsMessengerEXT)(screen->instance, screen->debugUtilsCallbackHandle, NULL); } - if (!screen->info.have_KHR_imageless_framebuffer) { - hash_table_foreach(&screen->framebuffer_cache, entry) { - struct zink_framebuffer* fb = (struct zink_framebuffer*)entry->data; - zink_destroy_framebuffer(screen, fb); - } - simple_mtx_destroy(&screen->framebuffer_mtx); - } + util_vertex_state_cache_deinit(&screen->vertex_state_cache); + + if (screen->gfx_push_constant_layout) + VKSCR(DestroyPipelineLayout)(screen->dev, screen->gfx_push_constant_layout, NULL); u_transfer_helper_destroy(pscreen->transfer_helper); + if (util_queue_is_initialized(&screen->cache_get_thread)) { + util_queue_finish(&screen->cache_get_thread); + util_queue_destroy(&screen->cache_get_thread); + } #ifdef ENABLE_SHADER_CACHE - if (screen->disk_cache) { + if (screen->disk_cache && util_queue_is_initialized(&screen->cache_put_thread)) { util_queue_finish(&screen->cache_put_thread); - util_queue_finish(&screen->cache_get_thread); disk_cache_wait_for_idle(screen->disk_cache); util_queue_destroy(&screen->cache_put_thread); - util_queue_destroy(&screen->cache_get_thread); } #endif disk_cache_destroy(screen->disk_cache); + + /* we don't have an API to check if a set is already initialized */ + for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs); i++) + if (screen->pipeline_libs[i].table) + _mesa_set_clear(&screen->pipeline_libs[i], NULL); + zink_bo_deinit(screen); util_live_shader_cache_deinit(&screen->shaders); + zink_descriptor_layouts_deinit(screen); + if (screen->sem) VKSCR(DestroySemaphore)(screen->dev, screen->sem, NULL); - if (screen->prev_sem) - VKSCR(DestroySemaphore)(screen->dev, screen->prev_sem, NULL); - if (screen->threaded) + if (screen->fence) + VKSCR(DestroyFence)(screen->dev, screen->fence, NULL); + + if (util_queue_is_initialized(&screen->flush_queue)) util_queue_destroy(&screen->flush_queue); - VKSCR(DestroyDevice)(screen->dev, NULL); - vkDestroyInstance(screen->instance, NULL); + while (util_dynarray_contains(&screen->semaphores, VkSemaphore)) + VKSCR(DestroySemaphore)(screen->dev, util_dynarray_pop(&screen->semaphores, VkSemaphore), NULL); + while (util_dynarray_contains(&screen->fd_semaphores, VkSemaphore)) + VKSCR(DestroySemaphore)(screen->dev, util_dynarray_pop(&screen->fd_semaphores, VkSemaphore), NULL); + if (screen->bindless_layout) + VKSCR(DestroyDescriptorSetLayout)(screen->dev, screen->bindless_layout, NULL); + + if (screen->dev) + VKSCR(DestroyDevice)(screen->dev, NULL); + + if (screen->instance) + VKSCR(DestroyInstance)(screen->instance, NULL); + util_idalloc_mt_fini(&screen->buffer_ids); + if (screen->loader_lib) + util_dl_close(screen->loader_lib); + if (screen->drm_fd != -1) close(screen->drm_fd); slab_destroy_parent(&screen->transfer_pool); + slab_destroy(&screen->present_mempool); ralloc_free(screen); + glsl_type_singleton_decref(); } -static void -choose_pdev(struct zink_screen *screen) +static int +zink_get_display_device(const struct zink_screen *screen, uint32_t pdev_count, + const VkPhysicalDevice *pdevs, int64_t dev_major, + int64_t dev_minor) { - uint32_t i, pdev_count; - VkPhysicalDevice *pdevs; - VkResult result = vkEnumeratePhysicalDevices(screen->instance, &pdev_count, NULL); - if (result != VK_SUCCESS) - return; + VkPhysicalDeviceDrmPropertiesEXT drm_props = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT, + }; + VkPhysicalDeviceProperties2 props = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + .pNext = &drm_props, + }; + + for (uint32_t i = 0; i < pdev_count; ++i) { + VKSCR(GetPhysicalDeviceProperties2)(pdevs[i], &props); + if (drm_props.renderMajor == dev_major && + drm_props.renderMinor == dev_minor) + return i; + } + + return -1; +} - assert(pdev_count > 0); +static int +zink_get_cpu_device_type(const struct zink_screen *screen, uint32_t pdev_count, + const VkPhysicalDevice *pdevs) +{ + VkPhysicalDeviceProperties props; - pdevs = malloc(sizeof(*pdevs) * pdev_count); - result = vkEnumeratePhysicalDevices(screen->instance, &pdev_count, pdevs); - assert(result == VK_SUCCESS); - assert(pdev_count > 0); + for (uint32_t i = 0; i < pdev_count; ++i) { + VKSCR(GetPhysicalDeviceProperties)(pdevs[i], &props); - VkPhysicalDeviceProperties *props = &screen->info.props; - for (i = 0; i < pdev_count; ++i) { - vkGetPhysicalDeviceProperties(pdevs[i], props); + /* if user wants cpu, only give them cpu */ + if (props.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) + return i; + } -#ifdef ZINK_WITH_SWRAST_VK - char *use_lavapipe = getenv("ZINK_USE_LAVAPIPE"); - if (use_lavapipe) { - if (props->deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) { - screen->pdev = pdevs[i]; - screen->info.device_version = props->apiVersion; - break; - } - continue; + mesa_loge("ZINK: CPU device requested but none found!"); + + return -1; +} + +static void +choose_pdev(struct zink_screen *screen, int64_t dev_major, int64_t dev_minor) +{ + bool cpu = debug_get_bool_option("LIBGL_ALWAYS_SOFTWARE", false) || + debug_get_bool_option("D3D_ALWAYS_SOFTWARE", false); + + if (cpu || (dev_major > 0 && dev_major < 255)) { + uint32_t pdev_count; + int idx; + VkPhysicalDevice *pdevs; + VkResult result = VKSCR(EnumeratePhysicalDevices)(screen->instance, &pdev_count, NULL); + if (result != VK_SUCCESS) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: vkEnumeratePhysicalDevices failed (%s)", vk_Result_to_str(result)); + return; } -#endif - if (props->deviceType != VK_PHYSICAL_DEVICE_TYPE_CPU) { - screen->pdev = pdevs[i]; - screen->info.device_version = props->apiVersion; - break; + + assert(pdev_count > 0); + + pdevs = malloc(sizeof(*pdevs) * pdev_count); + if (!pdevs) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: failed to allocate pdevs!"); + return; + } + result = VKSCR(EnumeratePhysicalDevices)(screen->instance, &pdev_count, pdevs); + assert(result == VK_SUCCESS); + assert(pdev_count > 0); + + if (cpu) + idx = zink_get_cpu_device_type(screen, pdev_count, pdevs); + else + idx = zink_get_display_device(screen, pdev_count, pdevs, dev_major, + dev_minor); + + if (idx != -1) + /* valid cpu device */ + screen->pdev = pdevs[idx]; + + free(pdevs); + + if (idx == -1) + return; + + } else { + VkPhysicalDevice pdev; + unsigned pdev_count = 1; + VkResult result = VKSCR(EnumeratePhysicalDevices)(screen->instance, &pdev_count, &pdev); + if (result != VK_SUCCESS && result != VK_INCOMPLETE) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: vkEnumeratePhysicalDevices failed (%s)", vk_Result_to_str(result)); + return; } + screen->pdev = pdev; + } + VKSCR(GetPhysicalDeviceProperties)(screen->pdev, &screen->info.props); + + /* allow software rendering only if forced by the user */ + if (!cpu && screen->info.props.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) { + screen->pdev = VK_NULL_HANDLE; + return; } - free(pdevs); + + screen->info.device_version = screen->info.props.apiVersion; /* runtime version is the lesser of the instance version and device version */ screen->vk_version = MIN2(screen->info.device_version, screen->instance_info.loader_version); /* calculate SPIR-V version based on VK version */ - if (screen->vk_version >= VK_MAKE_VERSION(1, 2, 0)) + if (screen->vk_version >= VK_MAKE_VERSION(1, 3, 0)) + screen->spirv_version = SPIRV_VERSION(1, 6); + else if (screen->vk_version >= VK_MAKE_VERSION(1, 2, 0)) screen->spirv_version = SPIRV_VERSION(1, 5); else if (screen->vk_version >= VK_MAKE_VERSION(1, 1, 0)) screen->spirv_version = SPIRV_VERSION(1, 3); @@ -1156,65 +1772,90 @@ static void update_queue_props(struct zink_screen *screen) { uint32_t num_queues; - vkGetPhysicalDeviceQueueFamilyProperties(screen->pdev, &num_queues, NULL); + VKSCR(GetPhysicalDeviceQueueFamilyProperties)(screen->pdev, &num_queues, NULL); assert(num_queues > 0); VkQueueFamilyProperties *props = malloc(sizeof(*props) * num_queues); - vkGetPhysicalDeviceQueueFamilyProperties(screen->pdev, &num_queues, props); + if (!props) { + mesa_loge("ZINK: failed to allocate props!"); + return; + } + + VKSCR(GetPhysicalDeviceQueueFamilyProperties)(screen->pdev, &num_queues, props); + bool found_gfx = false; + uint32_t sparse_only = UINT32_MAX; + screen->sparse_queue = UINT32_MAX; for (uint32_t i = 0; i < num_queues; i++) { if (props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { - screen->gfx_queue = i; + if (found_gfx) + continue; + screen->sparse_queue = screen->gfx_queue = i; screen->max_queues = props[i].queueCount; screen->timestamp_valid_bits = props[i].timestampValidBits; - break; - } + found_gfx = true; + } else if (props[i].queueFlags & VK_QUEUE_SPARSE_BINDING_BIT) + sparse_only = i; } + if (sparse_only != UINT32_MAX) + screen->sparse_queue = sparse_only; free(props); } static void init_queue(struct zink_screen *screen) { - vkGetDeviceQueue(screen->dev, screen->gfx_queue, 0, &screen->queue); - if (screen->threaded && screen->max_queues > 1) - vkGetDeviceQueue(screen->dev, screen->gfx_queue, 1, &screen->thread_queue); + simple_mtx_init(&screen->queue_lock, mtx_plain); + VKSCR(GetDeviceQueue)(screen->dev, screen->gfx_queue, 0, &screen->queue); + if (screen->sparse_queue != screen->gfx_queue) + VKSCR(GetDeviceQueue)(screen->dev, screen->sparse_queue, 0, &screen->queue_sparse); else - screen->thread_queue = screen->queue; + screen->queue_sparse = screen->queue; } static void zink_flush_frontbuffer(struct pipe_screen *pscreen, - struct pipe_context *pcontext, + struct pipe_context *pctx, struct pipe_resource *pres, unsigned level, unsigned layer, void *winsys_drawable_handle, + unsigned nboxes, struct pipe_box *sub_box) { struct zink_screen *screen = zink_screen(pscreen); - struct sw_winsys *winsys = screen->winsys; struct zink_resource *res = zink_resource(pres); + struct zink_context *ctx = zink_context(pctx); + + /* if the surface is no longer a swapchain, this is a no-op */ + if (!zink_is_swapchain(res)) + return; + + ctx = zink_tc_context_unwrap(pctx, screen->threaded); + + if (!zink_kopper_acquired(res->obj->dt, res->obj->dt_idx)) { + /* swapbuffers to an undefined surface: acquire and present garbage */ + zink_kopper_acquire(ctx, res, UINT64_MAX); + ctx->needs_present = res; + /* set batch usage to submit acquire semaphore */ + zink_batch_resource_usage_set(&ctx->batch, res, true, false); + /* ensure the resource is set up to present garbage */ + ctx->base.flush_resource(&ctx->base, pres); + } - if (!winsys) - return; - void *map = winsys->displaytarget_map(winsys, res->dt, 0); - - if (map) { - struct pipe_transfer *transfer = NULL; - void *res_map = pipe_texture_map(pcontext, pres, level, layer, PIPE_MAP_READ, 0, 0, - u_minify(pres->width0, level), - u_minify(pres->height0, level), - &transfer); - if (res_map) { - util_copy_rect((ubyte*)map, pres->format, res->dt_stride, 0, 0, - transfer->box.width, transfer->box.height, - (const ubyte*)res_map, transfer->stride, 0, 0); - pipe_texture_unmap(pcontext, transfer); + /* handle any outstanding acquire submits (not just from above) */ + if (ctx->batch.swapchain || ctx->needs_present) { + ctx->batch.has_work = true; + pctx->flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME); + if (ctx->last_batch_state && screen->threaded_submit) { + struct zink_batch_state *bs = ctx->last_batch_state; + util_queue_fence_wait(&bs->flush_completed); } - winsys->displaytarget_unmap(winsys, res->dt); } + res->use_damage = false; - winsys->displaytarget_display(winsys, res->dt, winsys_drawable_handle, sub_box); + /* always verify that this was acquired */ + assert(zink_kopper_acquired(res->obj->dt, res->obj->dt_idx)); + zink_kopper_present_queue(screen, res, nboxes, sub_box); } bool @@ -1229,13 +1870,15 @@ zink_is_depth_format_supported(struct zink_screen *screen, VkFormat format) static enum pipe_format emulate_x8(enum pipe_format format) { - /* convert missing X8 variants to A8 */ + /* convert missing Xn variants to An */ switch (format) { case PIPE_FORMAT_B8G8R8X8_UNORM: return PIPE_FORMAT_B8G8R8A8_UNORM; case PIPE_FORMAT_B8G8R8X8_SRGB: return PIPE_FORMAT_B8G8R8A8_SRGB; + case PIPE_FORMAT_R8G8B8X8_SRGB: + return PIPE_FORMAT_R8G8B8A8_SRGB; case PIPE_FORMAT_R8G8B8X8_SINT: return PIPE_FORMAT_R8G8B8A8_SINT; @@ -1244,6 +1887,20 @@ emulate_x8(enum pipe_format format) case PIPE_FORMAT_R8G8B8X8_UNORM: return PIPE_FORMAT_R8G8B8A8_UNORM; + case PIPE_FORMAT_R16G16B16X16_FLOAT: + return PIPE_FORMAT_R16G16B16A16_FLOAT; + case PIPE_FORMAT_R16G16B16X16_SINT: + return PIPE_FORMAT_R16G16B16A16_SINT; + case PIPE_FORMAT_R16G16B16X16_SNORM: + return PIPE_FORMAT_R16G16B16A16_SNORM; + case PIPE_FORMAT_R16G16B16X16_UNORM: + return PIPE_FORMAT_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R32G32B32X32_FLOAT: + return PIPE_FORMAT_R32G32B32A32_FLOAT; + case PIPE_FORMAT_R32G32B32X32_SINT: + return PIPE_FORMAT_R32G32B32A32_SINT; + default: return format; } @@ -1252,9 +1909,15 @@ emulate_x8(enum pipe_format format) VkFormat zink_get_format(struct zink_screen *screen, enum pipe_format format) { - VkFormat ret = zink_pipe_format_to_vk_format(emulate_x8(format)); + if (format == PIPE_FORMAT_A8_UNORM && !screen->driver_workarounds.missing_a8_unorm) + return VK_FORMAT_A8_UNORM_KHR; + else if (!screen->driver_workarounds.broken_l4a4 || format != PIPE_FORMAT_L4A4_UNORM) + format = zink_format_get_emulated_alpha(format); - if (format == PIPE_FORMAT_X32_S8X24_UINT) + VkFormat ret = vk_format_from_pipe_format(emulate_x8(format)); + + if (format == PIPE_FORMAT_X32_S8X24_UINT && + screen->have_D32_SFLOAT_S8_UINT) return VK_FORMAT_D32_SFLOAT_S8_UINT; if (format == PIPE_FORMAT_X24S8_UINT) @@ -1270,61 +1933,76 @@ zink_get_format(struct zink_screen *screen, enum pipe_format format) if (ret == VK_FORMAT_D24_UNORM_S8_UINT && !screen->have_D24_UNORM_S8_UINT) { - assert(zink_is_depth_format_supported(screen, - VK_FORMAT_D32_SFLOAT_S8_UINT)); + assert(screen->have_D32_SFLOAT_S8_UINT); return VK_FORMAT_D32_SFLOAT_S8_UINT; } - if ((ret == VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT && + if ((ret == VK_FORMAT_A4B4G4R4_UNORM_PACK16 && !screen->info.format_4444_feats.formatA4B4G4R4) || - (ret == VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT && + (ret == VK_FORMAT_A4R4G4B4_UNORM_PACK16 && !screen->info.format_4444_feats.formatA4R4G4B4)) return VK_FORMAT_UNDEFINED; + if (format == PIPE_FORMAT_R4A4_UNORM) + return VK_FORMAT_R4G4_UNORM_PACK8; + return ret; } void -zink_screen_init_descriptor_funcs(struct zink_screen *screen, bool fallback) -{ - if (screen->info.have_KHR_descriptor_update_template && - !fallback && - screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY) { -#define LAZY(FUNC) screen->FUNC = zink_##FUNC##_lazy - LAZY(descriptor_program_init); - LAZY(descriptor_program_deinit); - LAZY(context_invalidate_descriptor_state); - LAZY(batch_descriptor_init); - LAZY(batch_descriptor_reset); - LAZY(batch_descriptor_deinit); - LAZY(descriptors_init); - LAZY(descriptors_deinit); - LAZY(descriptors_update); -#undef LAZY - } else { -#define DEFAULT(FUNC) screen->FUNC = zink_##FUNC - DEFAULT(descriptor_program_init); - DEFAULT(descriptor_program_deinit); - DEFAULT(context_invalidate_descriptor_state); - DEFAULT(batch_descriptor_init); - DEFAULT(batch_descriptor_reset); - DEFAULT(batch_descriptor_deinit); - DEFAULT(descriptors_init); - DEFAULT(descriptors_deinit); - DEFAULT(descriptors_update); -#undef DEFAULT +zink_convert_color(const struct zink_screen *screen, enum pipe_format format, + union pipe_color_union *dst, + const union pipe_color_union *src) +{ + const struct util_format_description *desc = util_format_description(format); + union pipe_color_union tmp = *src; + + for (unsigned i = 0; i < 4; i++) + zink_format_clamp_channel_color(desc, &tmp, src, i); + + if (zink_format_is_emulated_alpha(format) && + /* Don't swizzle colors if the driver supports real A8_UNORM */ + (format != PIPE_FORMAT_A8_UNORM || + screen->driver_workarounds.missing_a8_unorm)) { + if (util_format_is_alpha(format)) { + tmp.ui[0] = tmp.ui[3]; + tmp.ui[1] = 0; + tmp.ui[2] = 0; + tmp.ui[3] = 0; + } else if (util_format_is_luminance(format)) { + tmp.ui[1] = 0; + tmp.ui[2] = 0; + tmp.f[3] = 1.0; + } else if (util_format_is_luminance_alpha(format)) { + tmp.ui[1] = tmp.ui[3]; + tmp.ui[2] = 0; + tmp.f[3] = 1.0; + } else /* zink_format_is_red_alpha */ { + tmp.ui[1] = tmp.ui[3]; + tmp.ui[2] = 0; + tmp.ui[3] = 0; + } } + + memcpy(dst, &tmp, sizeof(union pipe_color_union)); } static bool check_have_device_time(struct zink_screen *screen) { uint32_t num_domains = 0; - VKSCR(GetPhysicalDeviceCalibrateableTimeDomainsEXT)(screen->pdev, &num_domains, NULL); + VkTimeDomainEXT domains[8]; //current max is 4 + VkResult result = VKSCR(GetPhysicalDeviceCalibrateableTimeDomainsEXT)(screen->pdev, &num_domains, NULL); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkGetPhysicalDeviceCalibrateableTimeDomainsEXT failed (%s)", vk_Result_to_str(result)); + } assert(num_domains > 0); + assert(num_domains < ARRAY_SIZE(domains)); - VkTimeDomainEXT *domains = malloc(sizeof(VkTimeDomainEXT) * num_domains); - VKSCR(GetPhysicalDeviceCalibrateableTimeDomainsEXT)(screen->pdev, &num_domains, domains); + result = VKSCR(GetPhysicalDeviceCalibrateableTimeDomainsEXT)(screen->pdev, &num_domains, domains); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkGetPhysicalDeviceCalibrateableTimeDomainsEXT failed (%s)", vk_Result_to_str(result)); + } /* VK_TIME_DOMAIN_DEVICE_EXT is used for the ctx->get_timestamp hook and is the only one we really need */ for (unsigned i = 0; i < num_domains; i++) { @@ -1333,10 +2011,29 @@ check_have_device_time(struct zink_screen *screen) } } - free(domains); return false; } +static void +zink_error(const char *msg) +{ +} + +static void +zink_warn(const char *msg) +{ +} + +static void +zink_info(const char *msg) +{ +} + +static void +zink_msg(const char *msg) +{ +} + static VKAPI_ATTR VkBool32 VKAPI_CALL zink_debug_util_callback( VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, @@ -1344,19 +2041,17 @@ zink_debug_util_callback( const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData) { - const char *severity = "MSG"; - // Pick message prefix and color to use. // Only MacOS and Linux have been tested for color support if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { - severity = "ERR"; + zink_error(pCallbackData->pMessage); } else if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { - severity = "WRN"; + zink_warn(pCallbackData->pMessage); } else if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { - severity = "NFO"; - } + zink_info(pCallbackData->pMessage); + } else + zink_msg(pCallbackData->pMessage); - fprintf(stderr, "zink DEBUG: %s: '%s'\n", severity, pCallbackData->pMessage); return VK_FALSE; } @@ -1380,12 +2075,14 @@ create_debug(struct zink_screen *screen) VkDebugUtilsMessengerEXT vkDebugUtilsCallbackEXT = VK_NULL_HANDLE; - VKSCR(CreateDebugUtilsMessengerEXT)( - screen->instance, - &vkDebugUtilsMessengerCreateInfoEXT, - NULL, - &vkDebugUtilsCallbackEXT - ); + VkResult result = VKSCR(CreateDebugUtilsMessengerEXT)( + screen->instance, + &vkDebugUtilsMessengerCreateInfoEXT, + NULL, + &vkDebugUtilsCallbackEXT); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateDebugUtilsMessengerEXT failed (%s)", vk_Result_to_str(result)); + } screen->debugUtilsCallbackHandle = vkDebugUtilsCallbackEXT; @@ -1399,9 +2096,9 @@ zink_internal_setup_moltenvk(struct zink_screen *screen) if (!screen->instance_info.have_MVK_moltenvk) return true; - GET_PROC_ADDR_INSTANCE_LOCAL(screen->instance, GetMoltenVKConfigurationMVK); - GET_PROC_ADDR_INSTANCE_LOCAL(screen->instance, SetMoltenVKConfigurationMVK); - GET_PROC_ADDR_INSTANCE_LOCAL(screen->instance, GetVersionStringsMVK); + GET_PROC_ADDR_INSTANCE_LOCAL(screen, screen->instance, GetMoltenVKConfigurationMVK); + GET_PROC_ADDR_INSTANCE_LOCAL(screen, screen->instance, SetMoltenVKConfigurationMVK); + GET_PROC_ADDR_INSTANCE_LOCAL(screen, screen->instance, GetVersionStringsMVK); if (vk_GetVersionStringsMVK) { char molten_version[64] = {0}; @@ -1430,26 +2127,97 @@ zink_internal_setup_moltenvk(struct zink_screen *screen) } static void -check_device_needs_mesa_wsi(struct zink_screen *screen) +check_vertex_formats(struct zink_screen *screen) { - if ( - /* Raspberry Pi 4 V3DV driver */ - (screen->info.props.vendorID == 0x14E4 && - screen->info.props.deviceID == 42) || - /* RADV */ - screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV_KHR - ) { - screen->needs_mesa_wsi = true; - } else if (screen->info.driver_props.driverID == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR) - screen->needs_mesa_flush_wsi = true; - + /* from vbuf */ + enum pipe_format format_list[] = { + /* not supported by vk + PIPE_FORMAT_R32_FIXED, + PIPE_FORMAT_R32G32_FIXED, + PIPE_FORMAT_R32G32B32_FIXED, + PIPE_FORMAT_R32G32B32A32_FIXED, + */ + PIPE_FORMAT_R16_FLOAT, + PIPE_FORMAT_R16G16_FLOAT, + PIPE_FORMAT_R16G16B16_FLOAT, + PIPE_FORMAT_R16G16B16A16_FLOAT, + /* not supported by vk + PIPE_FORMAT_R64_FLOAT, + PIPE_FORMAT_R64G64_FLOAT, + PIPE_FORMAT_R64G64B64_FLOAT, + PIPE_FORMAT_R64G64B64A64_FLOAT, + PIPE_FORMAT_R32_UNORM, + PIPE_FORMAT_R32G32_UNORM, + PIPE_FORMAT_R32G32B32_UNORM, + PIPE_FORMAT_R32G32B32A32_UNORM, + PIPE_FORMAT_R32_SNORM, + PIPE_FORMAT_R32G32_SNORM, + PIPE_FORMAT_R32G32B32_SNORM, + PIPE_FORMAT_R32G32B32A32_SNORM, + PIPE_FORMAT_R32_USCALED, + PIPE_FORMAT_R32G32_USCALED, + PIPE_FORMAT_R32G32B32_USCALED, + PIPE_FORMAT_R32G32B32A32_USCALED, + PIPE_FORMAT_R32_SSCALED, + PIPE_FORMAT_R32G32_SSCALED, + PIPE_FORMAT_R32G32B32_SSCALED, + PIPE_FORMAT_R32G32B32A32_SSCALED, + */ + PIPE_FORMAT_R16_UNORM, + PIPE_FORMAT_R16G16_UNORM, + PIPE_FORMAT_R16G16B16_UNORM, + PIPE_FORMAT_R16G16B16A16_UNORM, + PIPE_FORMAT_R16_SNORM, + PIPE_FORMAT_R16G16_SNORM, + PIPE_FORMAT_R16G16B16_SNORM, + PIPE_FORMAT_R16G16B16_SINT, + PIPE_FORMAT_R16G16B16_UINT, + PIPE_FORMAT_R16G16B16A16_SNORM, + PIPE_FORMAT_R16_USCALED, + PIPE_FORMAT_R16G16_USCALED, + PIPE_FORMAT_R16G16B16_USCALED, + PIPE_FORMAT_R16G16B16A16_USCALED, + PIPE_FORMAT_R16_SSCALED, + PIPE_FORMAT_R16G16_SSCALED, + PIPE_FORMAT_R16G16B16_SSCALED, + PIPE_FORMAT_R16G16B16A16_SSCALED, + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8G8_UNORM, + PIPE_FORMAT_R8G8B8_UNORM, + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_R8_SNORM, + PIPE_FORMAT_R8G8_SNORM, + PIPE_FORMAT_R8G8B8_SNORM, + PIPE_FORMAT_R8G8B8A8_SNORM, + PIPE_FORMAT_R8_USCALED, + PIPE_FORMAT_R8G8_USCALED, + PIPE_FORMAT_R8G8B8_USCALED, + PIPE_FORMAT_R8G8B8A8_USCALED, + PIPE_FORMAT_R8_SSCALED, + PIPE_FORMAT_R8G8_SSCALED, + PIPE_FORMAT_R8G8B8_SSCALED, + PIPE_FORMAT_R8G8B8A8_SSCALED, + }; + for (unsigned i = 0; i < ARRAY_SIZE(format_list); i++) { + if (zink_is_format_supported(&screen->base, format_list[i], PIPE_BUFFER, 0, 0, PIPE_BIND_VERTEX_BUFFER)) + continue; + if (util_format_get_nr_components(format_list[i]) == 1) + continue; + enum pipe_format decomposed = zink_decompose_vertex_format(format_list[i]); + if (zink_is_format_supported(&screen->base, decomposed, PIPE_BUFFER, 0, 0, PIPE_BIND_VERTEX_BUFFER)) { + screen->need_decompose_attrs = true; + mesa_logw("zink: this application would be much faster if %s supported vertex format %s", screen->info.props.deviceName, util_format_name(format_list[i])); + } + } } static void populate_format_props(struct zink_screen *screen) { for (unsigned i = 0; i < PIPE_FORMAT_COUNT; i++) { - VkFormat format = zink_get_format(screen, i); + VkFormat format; +retry: + format = zink_get_format(screen, i); if (!format) continue; if (VKSCR(GetPhysicalDeviceFormatProperties2)) { @@ -1465,8 +2233,29 @@ populate_format_props(struct zink_screen *screen) mod_props.pDrmFormatModifierProperties = mods; props.pNext = &mod_props; } + VkFormatProperties3 props3 = {0}; + if (screen->info.have_KHR_format_feature_flags2 || screen->info.have_vulkan13) { + props3.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3; + props3.pNext = props.pNext; + props.pNext = &props3; + } + VKSCR(GetPhysicalDeviceFormatProperties2)(screen->pdev, format, &props); - screen->format_props[i] = props.formatProperties; + + if (screen->info.have_KHR_format_feature_flags2 || screen->info.have_vulkan13) { + screen->format_props[i].linearTilingFeatures = props3.linearTilingFeatures; + screen->format_props[i].optimalTilingFeatures = props3.optimalTilingFeatures; + screen->format_props[i].bufferFeatures = props3.bufferFeatures; + + if (props3.linearTilingFeatures & VK_FORMAT_FEATURE_2_LINEAR_COLOR_ATTACHMENT_BIT_NV) + screen->format_props[i].linearTilingFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + } else { + // MoltenVk is 1.2 API + screen->format_props[i].linearTilingFeatures = props.formatProperties.linearTilingFeatures; + screen->format_props[i].optimalTilingFeatures = props.formatProperties.optimalTilingFeatures; + screen->format_props[i].bufferFeatures = props.formatProperties.bufferFeatures; + } + if (screen->info.have_EXT_image_drm_format_modifier && mod_props.drmFormatModifierCount) { screen->modifier_props[i].drmFormatModifierCount = mod_props.drmFormatModifierCount; screen->modifier_props[i].pDrmFormatModifierProperties = ralloc_array(screen, VkDrmFormatModifierPropertiesEXT, mod_props.drmFormatModifierCount); @@ -1475,9 +2264,80 @@ populate_format_props(struct zink_screen *screen) screen->modifier_props[i].pDrmFormatModifierProperties[j] = mod_props.pDrmFormatModifierProperties[j]; } } - } else - VKSCR(GetPhysicalDeviceFormatProperties)(screen->pdev, format, &screen->format_props[i]); + } else { + VkFormatProperties props = {0}; + VKSCR(GetPhysicalDeviceFormatProperties)(screen->pdev, format, &props); + screen->format_props[i].linearTilingFeatures = props.linearTilingFeatures; + screen->format_props[i].optimalTilingFeatures = props.optimalTilingFeatures; + screen->format_props[i].bufferFeatures = props.bufferFeatures; + } + if (i == PIPE_FORMAT_A8_UNORM && !screen->driver_workarounds.missing_a8_unorm) { + if (!screen->format_props[i].linearTilingFeatures && + !screen->format_props[i].optimalTilingFeatures && + !screen->format_props[i].bufferFeatures) { + screen->driver_workarounds.missing_a8_unorm = true; + goto retry; + } + } + if (zink_format_is_emulated_alpha(i)) { + VkFormatFeatureFlags blocked = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + screen->format_props[i].linearTilingFeatures &= ~blocked; + screen->format_props[i].optimalTilingFeatures &= ~blocked; + screen->format_props[i].bufferFeatures = 0; + } + } + check_vertex_formats(screen); + VkImageFormatProperties image_props; + VkResult ret = VKSCR(GetPhysicalDeviceImageFormatProperties)(screen->pdev, VK_FORMAT_D32_SFLOAT, + VK_IMAGE_TYPE_1D, + VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + 0, &image_props); + if (ret != VK_SUCCESS && ret != VK_ERROR_FORMAT_NOT_SUPPORTED) { + mesa_loge("ZINK: vkGetPhysicalDeviceImageFormatProperties failed (%s)", vk_Result_to_str(ret)); + } + screen->need_2D_zs = ret != VK_SUCCESS; + + if (screen->info.feats.features.sparseResidencyImage2D) + screen->need_2D_sparse = !screen->base.get_sparse_texture_virtual_page_size(&screen->base, PIPE_TEXTURE_1D, false, PIPE_FORMAT_R32_FLOAT, 0, 16, NULL, NULL, NULL); +} + +static void +setup_renderdoc(struct zink_screen *screen) +{ +#ifdef HAVE_RENDERDOC_APP_H + const char *capture_id = debug_get_option("ZINK_RENDERDOC", NULL); + if (!capture_id) + return; + void *renderdoc = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD); + /* not loaded */ + if (!renderdoc) + return; + + pRENDERDOC_GetAPI get_api = dlsym(renderdoc, "RENDERDOC_GetAPI"); + if (!get_api) + return; + + /* need synchronous dispatch for renderdoc coherency */ + screen->threaded_submit = false; + get_api(eRENDERDOC_API_Version_1_0_0, (void*)&screen->renderdoc_api); + screen->renderdoc_api->SetActiveWindow(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(screen->instance), NULL); + + int count = sscanf(capture_id, "%u:%u", &screen->renderdoc_capture_start, &screen->renderdoc_capture_end); + if (count != 2) { + count = sscanf(capture_id, "%u", &screen->renderdoc_capture_start); + if (!count) { + if (!strcmp(capture_id, "all")) { + screen->renderdoc_capture_all = true; + } else { + printf("`ZINK_RENDERDOC` usage: ZINK_RENDERDOC=all|frame_no[:end_frame_no]\n"); + abort(); + } + } + screen->renderdoc_capture_end = screen->renderdoc_capture_start; } + p_atomic_set(&screen->renderdoc_frame, 1); +#endif } bool @@ -1485,135 +2345,185 @@ zink_screen_init_semaphore(struct zink_screen *screen) { VkSemaphoreCreateInfo sci = {0}; VkSemaphoreTypeCreateInfo tci = {0}; - VkSemaphore sem; sci.pNext = &tci; sci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; tci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO; tci.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE; - if (VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &sem) == VK_SUCCESS) { - /* semaphore signal values can never decrease, - * so we need a new semaphore anytime we overflow - */ - if (screen->prev_sem) - VKSCR(DestroySemaphore)(screen->dev, screen->prev_sem, NULL); - screen->prev_sem = screen->sem; - screen->sem = sem; - return true; - } - screen->info.have_KHR_timeline_semaphore = false; - return false; + return VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &screen->sem) == VK_SUCCESS; } -bool -zink_screen_timeline_wait(struct zink_screen *screen, uint32_t batch_id, uint64_t timeout) +VkSemaphore +zink_create_exportable_semaphore(struct zink_screen *screen) { - VkSemaphoreWaitInfo wi = {0}; + VkExportSemaphoreCreateInfo eci = { + VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, + NULL, + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT + }; + VkSemaphoreCreateInfo sci = { + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + &eci, + 0 + }; - if (zink_screen_check_last_finished(screen, batch_id)) - return true; + VkSemaphore sem = VK_NULL_HANDLE; + if (util_dynarray_contains(&screen->fd_semaphores, VkSemaphore)) { + simple_mtx_lock(&screen->semaphores_lock); + if (util_dynarray_contains(&screen->fd_semaphores, VkSemaphore)) + sem = util_dynarray_pop(&screen->fd_semaphores, VkSemaphore); + simple_mtx_unlock(&screen->semaphores_lock); + } + if (sem) + return sem; + VkResult ret = VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &sem); + return ret == VK_SUCCESS ? sem : VK_NULL_HANDLE; +} - wi.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO; - wi.semaphoreCount = 1; - /* handle batch_id overflow */ - wi.pSemaphores = batch_id > screen->curr_batch ? &screen->prev_sem : &screen->sem; - uint64_t batch_id64 = batch_id; - wi.pValues = &batch_id64; - bool success = false; - if (screen->device_lost) - return true; - VkResult ret = VKSCR(WaitSemaphores)(screen->dev, &wi, timeout); - success = zink_screen_handle_vkresult(screen, ret); +VkSemaphore +zink_screen_export_dmabuf_semaphore(struct zink_screen *screen, struct zink_resource *res) +{ + VkSemaphore sem = VK_NULL_HANDLE; +#if defined(HAVE_LIBDRM) && (DETECT_OS_LINUX || DETECT_OS_BSD) + struct dma_buf_export_sync_file export = { + .flags = DMA_BUF_SYNC_RW, + .fd = -1, + }; - if (success) - zink_screen_update_last_finished(screen, batch_id); + int fd = -1; + if (res->obj->is_aux) { + fd = os_dupfd_cloexec(res->obj->handle); + } else { + VkMemoryGetFdInfoKHR fd_info = {0}; + fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + fd_info.memory = zink_bo_get_mem(res->obj->bo); + fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + VKSCR(GetMemoryFdKHR)(screen->dev, &fd_info, &fd); + } - return success; -} + if (unlikely(fd < 0)) { + mesa_loge("MESA: Unable to get a valid memory fd"); + return VK_NULL_HANDLE; + } -struct noop_submit_info { - struct zink_screen *screen; - VkFence fence; -}; + int ret = drmIoctl(fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE, &export); + if (ret) { + if (errno == ENOTTY || errno == EBADF || errno == ENOSYS) { + assert(!"how did this fail?"); + return VK_NULL_HANDLE; + } else { + mesa_loge("MESA: failed to import sync file '%s'", strerror(errno)); + return VK_NULL_HANDLE; + } + } -static void -noop_submit(void *data, void *gdata, int thread_index) -{ - struct noop_submit_info *n = data; - VkSubmitInfo si = {0}; - si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - if (n->VKSCR(QueueSubmit)(n->screen->threaded ? n->screen->thread_queue : n->screen->queue, - 1, &si, n->fence) != VK_SUCCESS) { - debug_printf("ZINK: vkQueueSubmit() failed\n"); - n->screen->device_lost = true; + sem = zink_create_exportable_semaphore(screen); + + const VkImportSemaphoreFdInfoKHR sdi = { + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR, + .semaphore = sem, + .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + .fd = export.fd, + }; + bool success = VKSCR(ImportSemaphoreFdKHR)(screen->dev, &sdi) == VK_SUCCESS; + close(fd); + if (!success) { + VKSCR(DestroySemaphore)(screen->dev, sem, NULL); + return VK_NULL_HANDLE; } +#endif + return sem; } bool -zink_screen_batch_id_wait(struct zink_screen *screen, uint32_t batch_id, uint64_t timeout) +zink_screen_import_dmabuf_semaphore(struct zink_screen *screen, struct zink_resource *res, VkSemaphore sem) { - if (zink_screen_check_last_finished(screen, batch_id)) - return true; - - if (screen->info.have_KHR_timeline_semaphore) - return zink_screen_timeline_wait(screen, batch_id, timeout); - - if (!timeout) - return false; - - uint32_t new_id = 0; - while (!new_id) - new_id = p_atomic_inc_return(&screen->curr_batch); - VkResult ret; - struct noop_submit_info n; - uint64_t abs_timeout = os_time_get_absolute_timeout(timeout); - uint64_t remaining = PIPE_TIMEOUT_INFINITE; - VkFenceCreateInfo fci = {0}; - struct util_queue_fence fence; - util_queue_fence_init(&fence); - fci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - - if (VKSCR(CreateFence)(screen->dev, &fci, NULL, &n.fence) != VK_SUCCESS) +#if defined(HAVE_LIBDRM) && (DETECT_OS_LINUX || DETECT_OS_BSD) + const VkSemaphoreGetFdInfoKHR get_fd_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .semaphore = sem, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + }; + int sync_file_fd = -1; + VkResult result = VKSCR(GetSemaphoreFdKHR)(screen->dev, &get_fd_info, &sync_file_fd); + if (result != VK_SUCCESS) { return false; + } - n.screen = screen; - if (screen->threaded) { - /* must use thread dispatch for sanity */ - util_queue_add_job(&screen->flush_queue, &n, &fence, noop_submit, NULL, 0); - util_queue_fence_wait(&fence); + bool ret = false; + int fd; + if (res->obj->is_aux) { + fd = os_dupfd_cloexec(res->obj->handle); } else { - noop_submit(&n, NULL, 0); + VkMemoryGetFdInfoKHR fd_info = {0}; + fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + fd_info.memory = zink_bo_get_mem(res->obj->bo); + fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + if (VKSCR(GetMemoryFdKHR)(screen->dev, &fd_info, &fd) != VK_SUCCESS) + fd = -1; } - if (timeout != PIPE_TIMEOUT_INFINITE) { - int64_t time_ns = os_time_get_nano(); - remaining = abs_timeout > time_ns ? abs_timeout - time_ns : 0; + if (fd != -1) { + struct dma_buf_import_sync_file import = { + .flags = DMA_BUF_SYNC_RW, + .fd = sync_file_fd, + }; + int ioctl_ret = drmIoctl(fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE, &import); + if (ioctl_ret) { + if (errno == ENOTTY || errno == EBADF || errno == ENOSYS) { + assert(!"how did this fail?"); + } else { + ret = true; + } + } + close(fd); } + close(sync_file_fd); + return ret; +#else + return true; +#endif +} - if (remaining) - ret = VKSCR(WaitForFences)(screen->dev, 1, &n.fence, VK_TRUE, remaining); - else - ret = VKSCR(GetFenceStatus)(screen->dev, n.fence); - VKSCR(DestroyFence)(screen->dev, n.fence, NULL); - bool success = zink_screen_handle_vkresult(screen, ret); +bool +zink_screen_timeline_wait(struct zink_screen *screen, uint64_t batch_id, uint64_t timeout) +{ + VkSemaphoreWaitInfo wi = {0}; + + if (zink_screen_check_last_finished(screen, batch_id)) + return true; + + wi.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO; + wi.semaphoreCount = 1; + wi.pSemaphores = &screen->sem; + wi.pValues = &batch_id; + bool success = false; + if (screen->device_lost) + return true; + VkResult ret = VKSCR(WaitSemaphores)(screen->dev, &wi, timeout); + success = zink_screen_handle_vkresult(screen, ret); if (success) - zink_screen_update_last_finished(screen, new_id); + zink_screen_update_last_finished(screen, batch_id); return success; } static uint32_t -zink_get_loader_version(void) +zink_get_loader_version(struct zink_screen *screen) { uint32_t loader_version = VK_API_VERSION_1_0; // Get the Loader version - GET_PROC_ADDR_INSTANCE_LOCAL(NULL, EnumerateInstanceVersion); + GET_PROC_ADDR_INSTANCE_LOCAL(screen, NULL, EnumerateInstanceVersion); if (vk_EnumerateInstanceVersion) { uint32_t loader_version_temp = VK_API_VERSION_1_0; - if (VK_SUCCESS == (*vk_EnumerateInstanceVersion)(&loader_version_temp)) { + VkResult result = (*vk_EnumerateInstanceVersion)(&loader_version_temp); + if (VK_SUCCESS == result) { loader_version = loader_version_temp; + } else { + mesa_loge("ZINK: vkEnumerateInstanceVersion failed (%s)", vk_Result_to_str(result)); } } @@ -1638,11 +2548,11 @@ zink_query_memory_info(struct pipe_screen *pscreen, struct pipe_memory_info *inf if (mem.memoryProperties.memoryHeaps[i].flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { /* VRAM */ info->total_device_memory += mem.memoryProperties.memoryHeaps[i].size / 1024; - info->avail_device_memory += (budget.heapBudget[i] - budget.heapUsage[i]) / 1024; + info->avail_device_memory += (mem.memoryProperties.memoryHeaps[i].size - budget.heapUsage[i]) / 1024; } else { /* GART */ info->total_staging_memory += mem.memoryProperties.memoryHeaps[i].size / 1024; - info->avail_staging_memory += (budget.heapBudget[i] - budget.heapUsage[i]) / 1024; + info->avail_staging_memory += (mem.memoryProperties.memoryHeaps[i].size - budget.heapUsage[i]) / 1024; } } /* evictions not yet supported in vulkan */ @@ -1668,8 +2578,12 @@ zink_query_dmabuf_modifiers(struct pipe_screen *pscreen, enum pipe_format format { struct zink_screen *screen = zink_screen(pscreen); *count = screen->modifier_props[format].drmFormatModifierCount; - for (int i = 0; i < MIN2(max, *count); i++) + for (int i = 0; i < MIN2(max, *count); i++) { + if (external_only) + external_only[i] = 0; + modifiers[i] = screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifier; + } } static bool @@ -1689,7 +2603,115 @@ zink_get_dmabuf_modifier_planes(struct pipe_screen *pscreen, uint64_t modifier, for (unsigned i = 0; i < screen->modifier_props[format].drmFormatModifierCount; i++) if (screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifier == modifier) return screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifierPlaneCount; - return 0; + return util_format_get_num_planes(format); +} + +static int +zink_get_sparse_texture_virtual_page_size(struct pipe_screen *pscreen, + enum pipe_texture_target target, + bool multi_sample, + enum pipe_format pformat, + unsigned offset, unsigned size, + int *x, int *y, int *z) +{ + struct zink_screen *screen = zink_screen(pscreen); + static const int page_size_2d[][3] = { + { 256, 256, 1 }, /* 8bpp */ + { 256, 128, 1 }, /* 16bpp */ + { 128, 128, 1 }, /* 32bpp */ + { 128, 64, 1 }, /* 64bpp */ + { 64, 64, 1 }, /* 128bpp */ + }; + static const int page_size_3d[][3] = { + { 64, 32, 32 }, /* 8bpp */ + { 32, 32, 32 }, /* 16bpp */ + { 32, 32, 16 }, /* 32bpp */ + { 32, 16, 16 }, /* 64bpp */ + { 16, 16, 16 }, /* 128bpp */ + }; + /* Only support one type of page size. */ + if (offset != 0) + return 0; + + /* reject multisample if 2x isn't supported; assume none are */ + if (multi_sample && !screen->info.feats.features.sparseResidency2Samples) + return 0; + + VkFormat format = zink_get_format(screen, pformat); + bool is_zs = util_format_is_depth_or_stencil(pformat); + VkImageType type; + switch (target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + type = (screen->need_2D_sparse || (screen->need_2D_zs && is_zs)) ? VK_IMAGE_TYPE_2D : VK_IMAGE_TYPE_1D; + break; + + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE_ARRAY: + type = VK_IMAGE_TYPE_2D; + break; + + case PIPE_TEXTURE_3D: + type = VK_IMAGE_TYPE_3D; + break; + + case PIPE_BUFFER: + goto hack_it_up; + + default: + return 0; + } + + VkImageUsageFlags use_flags = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_STORAGE_BIT; + use_flags |= is_zs ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + VkImageUsageFlags flags = screen->format_props[pformat].optimalTilingFeatures & use_flags; + VkSparseImageFormatProperties props[4]; //planar? + unsigned prop_count = ARRAY_SIZE(props); + VKSCR(GetPhysicalDeviceSparseImageFormatProperties)(screen->pdev, format, type, + multi_sample ? VK_SAMPLE_COUNT_2_BIT : VK_SAMPLE_COUNT_1_BIT, + flags, + VK_IMAGE_TILING_OPTIMAL, + &prop_count, props); + if (!prop_count) { + /* format may not support storage; try without */ + flags &= ~VK_IMAGE_USAGE_STORAGE_BIT; + prop_count = ARRAY_SIZE(props); + VKSCR(GetPhysicalDeviceSparseImageFormatProperties)(screen->pdev, format, type, + multi_sample ? VK_SAMPLE_COUNT_2_BIT : VK_SAMPLE_COUNT_1_BIT, + flags, + VK_IMAGE_TILING_OPTIMAL, + &prop_count, props); + if (!prop_count) + return 0; + } + + if (size) { + if (x) + *x = props[0].imageGranularity.width; + if (y) + *y = props[0].imageGranularity.height; + if (z) + *z = props[0].imageGranularity.depth; + } + + return 1; +hack_it_up: + { + const int (*page_sizes)[3] = target == PIPE_TEXTURE_3D ? page_size_3d : page_size_2d; + int blk_size = util_format_get_blocksize(pformat); + + if (size) { + unsigned index = util_logbase2(blk_size); + if (x) *x = page_sizes[index][0]; + if (y) *y = page_sizes[index][1]; + if (z) *z = page_sizes[index][2]; + } + } + return 1; } static VkDevice @@ -1697,17 +2719,27 @@ zink_create_logical_device(struct zink_screen *screen) { VkDevice dev = VK_NULL_HANDLE; - VkDeviceQueueCreateInfo qci = {0}; + VkDeviceQueueCreateInfo qci[2] = {0}; + uint32_t queues[3] = { + screen->gfx_queue, + screen->sparse_queue, + }; float dummy = 0.0f; - qci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - qci.queueFamilyIndex = screen->gfx_queue; - qci.queueCount = screen->threaded && screen->max_queues > 1 ? 2 : 1; - qci.pQueuePriorities = &dummy; + for (unsigned i = 0; i < ARRAY_SIZE(qci); i++) { + qci[i].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + qci[i].queueFamilyIndex = queues[i]; + qci[i].queueCount = 1; + qci[i].pQueuePriorities = &dummy; + } + + unsigned num_queues = 1; + if (screen->sparse_queue != screen->gfx_queue) + num_queues++; VkDeviceCreateInfo dci = {0}; dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - dci.queueCreateInfoCount = 1; - dci.pQueueCreateInfos = &qci; + dci.queueCreateInfoCount = num_queues; + dci.pQueueCreateInfos = qci; /* extensions don't have bool members in pEnabledFeatures. * this requires us to pass the whole VkPhysicalDeviceFeatures2 struct */ @@ -1720,26 +2752,27 @@ zink_create_logical_device(struct zink_screen *screen) dci.ppEnabledExtensionNames = screen->info.extensions; dci.enabledExtensionCount = screen->info.num_extensions; - vkCreateDevice(screen->pdev, &dci, NULL, &dev); + VkResult result = VKSCR(CreateDevice)(screen->pdev, &dci, NULL, &dev); + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkCreateDevice failed (%s)", vk_Result_to_str(result)); + return dev; } static void -pre_hash_descriptor_states(struct zink_screen *screen) -{ - VkImageViewCreateInfo null_info = {.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; - VkBufferViewCreateInfo null_binfo = {.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO}; - screen->null_descriptor_hashes.image_view = _mesa_hash_data(&null_info, sizeof(VkImageViewCreateInfo)); - screen->null_descriptor_hashes.buffer_view = _mesa_hash_data(&null_binfo, sizeof(VkBufferViewCreateInfo)); -} - -static void check_base_requirements(struct zink_screen *screen) { + if (zink_debug & ZINK_DEBUG_QUIET) + return; + if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_V3DV) { + /* v3dv doesn't support straddling i/o, but zink doesn't do that so this is effectively supported: + * don't spam errors in this case + */ + screen->info.feats12.scalarBlockLayout = true; + screen->info.have_EXT_scalar_block_layout = true; + } if (!screen->info.feats.features.logicOp || !screen->info.feats.features.fillModeNonSolid || - !screen->info.feats.features.wideLines || - !screen->info.feats.features.largePoints || !screen->info.feats.features.shaderClipDistance || !(screen->info.feats12.scalarBlockLayout || screen->info.have_EXT_scalar_block_layout) || @@ -1754,16 +2787,18 @@ check_base_requirements(struct zink_screen *screen) fprintf(stderr, "%s ", #X) CHECK_OR_PRINT(feats.features.logicOp); CHECK_OR_PRINT(feats.features.fillModeNonSolid); - CHECK_OR_PRINT(feats.features.wideLines); - CHECK_OR_PRINT(feats.features.largePoints); CHECK_OR_PRINT(feats.features.shaderClipDistance); if (!screen->info.feats12.scalarBlockLayout && !screen->info.have_EXT_scalar_block_layout) - printf("scalarBlockLayout OR EXT_scalar_block_layout "); + fprintf(stderr, "scalarBlockLayout OR EXT_scalar_block_layout "); CHECK_OR_PRINT(have_KHR_maintenance1); CHECK_OR_PRINT(have_EXT_custom_border_color); CHECK_OR_PRINT(have_EXT_line_rasterization); fprintf(stderr, "\n"); } + if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_V3DV) { + screen->info.feats12.scalarBlockLayout = false; + screen->info.have_EXT_scalar_block_layout = false; + } } static void @@ -1777,43 +2812,544 @@ zink_get_sample_pixel_grid(struct pipe_screen *pscreen, unsigned sample_count, *height = screen->maxSampleLocationGridSize[idx].height; } +static void +init_driver_workarounds(struct zink_screen *screen) +{ + /* enable implicit sync for all non-mesa drivers */ + screen->driver_workarounds.implicit_sync = true; + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_MESA_RADV: + case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA: + case VK_DRIVER_ID_MESA_LLVMPIPE: + case VK_DRIVER_ID_MESA_TURNIP: + case VK_DRIVER_ID_MESA_V3DV: + case VK_DRIVER_ID_MESA_PANVK: + case VK_DRIVER_ID_MESA_VENUS: + screen->driver_workarounds.implicit_sync = false; + break; + default: + break; + } + /* TODO: maybe compile multiple variants for different set counts for compact mode? */ + if (screen->info.props.limits.maxBoundDescriptorSets < ZINK_DESCRIPTOR_ALL_TYPES || + zink_debug & (ZINK_DEBUG_COMPACT | ZINK_DEBUG_NOSHOBJ)) + screen->info.have_EXT_shader_object = false; + /* EDS2 is only used with EDS1 */ + if (!screen->info.have_EXT_extended_dynamic_state) { + screen->info.have_EXT_extended_dynamic_state2 = false; + /* CWE usage needs EDS1 */ + screen->info.have_EXT_color_write_enable = false; + } + if (screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY) + /* this completely breaks xfb somehow */ + screen->info.have_EXT_extended_dynamic_state2 = false; + /* EDS3 is only used with EDS2 */ + if (!screen->info.have_EXT_extended_dynamic_state2) + screen->info.have_EXT_extended_dynamic_state3 = false; + /* EXT_vertex_input_dynamic_state is only used with EDS2 and above */ + if (!screen->info.have_EXT_extended_dynamic_state2) + screen->info.have_EXT_vertex_input_dynamic_state = false; + if (screen->info.line_rast_feats.stippledRectangularLines && + screen->info.line_rast_feats.stippledBresenhamLines && + screen->info.line_rast_feats.stippledSmoothLines && + !screen->info.dynamic_state3_feats.extendedDynamicState3LineStippleEnable) + screen->info.have_EXT_extended_dynamic_state3 = false; + if (!screen->info.dynamic_state3_feats.extendedDynamicState3PolygonMode || + !screen->info.dynamic_state3_feats.extendedDynamicState3DepthClampEnable || + !screen->info.dynamic_state3_feats.extendedDynamicState3DepthClipNegativeOneToOne || + !screen->info.dynamic_state3_feats.extendedDynamicState3DepthClipEnable || + !screen->info.dynamic_state3_feats.extendedDynamicState3ProvokingVertexMode || + !screen->info.dynamic_state3_feats.extendedDynamicState3LineRasterizationMode) + screen->info.have_EXT_extended_dynamic_state3 = false; + else if (screen->info.dynamic_state3_feats.extendedDynamicState3SampleMask && + screen->info.dynamic_state3_feats.extendedDynamicState3AlphaToCoverageEnable && + (!screen->info.feats.features.alphaToOne || screen->info.dynamic_state3_feats.extendedDynamicState3AlphaToOneEnable) && + screen->info.dynamic_state3_feats.extendedDynamicState3ColorBlendEnable && + screen->info.dynamic_state3_feats.extendedDynamicState3RasterizationSamples && + screen->info.dynamic_state3_feats.extendedDynamicState3ColorWriteMask && + screen->info.dynamic_state3_feats.extendedDynamicState3ColorBlendEquation && + screen->info.dynamic_state3_feats.extendedDynamicState3LogicOpEnable && + screen->info.dynamic_state2_feats.extendedDynamicState2LogicOp) + screen->have_full_ds3 = true; + if (screen->info.have_EXT_graphics_pipeline_library) + screen->info.have_EXT_graphics_pipeline_library = screen->info.have_EXT_extended_dynamic_state && + screen->info.have_EXT_extended_dynamic_state2 && + ((zink_debug & ZINK_DEBUG_GPL) || + screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) && + screen->info.have_EXT_extended_dynamic_state3 && + screen->info.have_KHR_dynamic_rendering && + screen->info.have_EXT_non_seamless_cube_map && + (!(zink_debug & ZINK_DEBUG_GPL) || + screen->info.gpl_props.graphicsPipelineLibraryFastLinking || + screen->is_cpu); + screen->driver_workarounds.broken_l4a4 = screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY; + if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_TURNIP) { + /* performance */ + screen->info.border_color_feats.customBorderColorWithoutFormat = VK_FALSE; + } + if (!screen->info.have_KHR_maintenance5) + screen->driver_workarounds.missing_a8_unorm = true; + + if ((!screen->info.have_EXT_line_rasterization || + !screen->info.line_rast_feats.stippledBresenhamLines) && + screen->info.feats.features.geometryShader && + screen->info.feats.features.sampleRateShading) { + /* we're using stippledBresenhamLines as a proxy for all of these, to + * avoid accidentally changing behavior on VK-drivers where we don't + * want to add emulation. + */ + screen->driver_workarounds.no_linestipple = true; + } + + if (screen->info.driver_props.driverID == + VK_DRIVER_ID_IMAGINATION_PROPRIETARY) { + assert(screen->info.feats.features.geometryShader); + screen->driver_workarounds.no_linesmooth = true; + } + + /* This is a workarround for the lack of + * gl_PointSize + glPolygonMode(..., GL_LINE), in the imagination + * proprietary driver. + */ + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_IMAGINATION_PROPRIETARY: + screen->driver_workarounds.no_hw_gl_point = true; + break; + default: + screen->driver_workarounds.no_hw_gl_point = false; + break; + } + + if (screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE || + screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY || + screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY || + screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV) + screen->driver_workarounds.z24_unscaled_bias = 1<<23; + else + screen->driver_workarounds.z24_unscaled_bias = 1<<24; + if (screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY) + screen->driver_workarounds.z16_unscaled_bias = 1<<15; + else + screen->driver_workarounds.z16_unscaled_bias = 1<<16; + /* these drivers don't use VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT, so it can always be set */ + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_MESA_RADV: + case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA: + case VK_DRIVER_ID_MESA_LLVMPIPE: + case VK_DRIVER_ID_MESA_VENUS: + case VK_DRIVER_ID_NVIDIA_PROPRIETARY: + case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS: + case VK_DRIVER_ID_IMAGINATION_PROPRIETARY: + screen->driver_workarounds.always_feedback_loop = screen->info.have_EXT_attachment_feedback_loop_layout; + break; + default: + break; + } + /* these drivers don't use VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT, so it can always be set */ + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_MESA_LLVMPIPE: + case VK_DRIVER_ID_MESA_VENUS: + case VK_DRIVER_ID_NVIDIA_PROPRIETARY: + case VK_DRIVER_ID_IMAGINATION_PROPRIETARY: + screen->driver_workarounds.always_feedback_loop_zs = screen->info.have_EXT_attachment_feedback_loop_layout; + break; + default: + break; + } + /* use same mechanics if dynamic state is supported */ + screen->driver_workarounds.always_feedback_loop |= screen->info.have_EXT_attachment_feedback_loop_dynamic_state; + screen->driver_workarounds.always_feedback_loop_zs |= screen->info.have_EXT_attachment_feedback_loop_dynamic_state; + + /* these drivers cannot handle OOB gl_Layer values, and therefore need clamping in shader. + * TODO: Vulkan extension that details whether vulkan driver can handle OOB layer values + */ + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_IMAGINATION_PROPRIETARY: + screen->driver_workarounds.needs_sanitised_layer = true; + break; + default: + screen->driver_workarounds.needs_sanitised_layer = false; + break; + } + /* these drivers will produce undefined results when using swizzle 1 with combined z/s textures + * TODO: use a future device property when available + */ + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_IMAGINATION_PROPRIETARY: + case VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA: + screen->driver_workarounds.needs_zs_shader_swizzle = true; + break; + default: + screen->driver_workarounds.needs_zs_shader_swizzle = false; + break; + } + + /* When robust contexts are advertised but robustImageAccess2 is not available */ + screen->driver_workarounds.lower_robustImageAccess2 = + !screen->info.rb2_feats.robustImageAccess2 && + screen->info.feats.features.robustBufferAccess && + screen->info.rb_image_feats.robustImageAccess; + + /* once more testing has been done, use the #if 0 block */ + unsigned illegal = ZINK_DEBUG_RP | ZINK_DEBUG_NORP; + if ((zink_debug & illegal) == illegal) { + mesa_loge("Cannot specify ZINK_DEBUG=rp and ZINK_DEBUG=norp"); + abort(); + } + + /* these drivers benefit from renderpass optimization */ + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_MESA_LLVMPIPE: + case VK_DRIVER_ID_MESA_TURNIP: + case VK_DRIVER_ID_MESA_PANVK: + case VK_DRIVER_ID_MESA_V3DV: + case VK_DRIVER_ID_IMAGINATION_PROPRIETARY: + case VK_DRIVER_ID_QUALCOMM_PROPRIETARY: + case VK_DRIVER_ID_BROADCOM_PROPRIETARY: + case VK_DRIVER_ID_ARM_PROPRIETARY: + screen->driver_workarounds.track_renderpasses = true; //screen->info.primgen_feats.primitivesGeneratedQueryWithRasterizerDiscard + break; + default: + break; + } + if (zink_debug & ZINK_DEBUG_RP) + screen->driver_workarounds.track_renderpasses = true; + else if (zink_debug & ZINK_DEBUG_NORP) + screen->driver_workarounds.track_renderpasses = false; + + /* these drivers can't optimize non-overlapping copy ops */ + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_MESA_TURNIP: + case VK_DRIVER_ID_QUALCOMM_PROPRIETARY: + screen->driver_workarounds.broken_cache_semantics = true; + break; + default: + break; + } + + /* these drivers can successfully do INVALID <-> LINEAR dri3 modifier swap */ + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_MESA_TURNIP: + case VK_DRIVER_ID_MESA_VENUS: + screen->driver_workarounds.can_do_invalid_linear_modifier = true; + break; + default: + break; + } + + /* these drivers have no difference between unoptimized and optimized shader compilation */ + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_MESA_LLVMPIPE: + screen->driver_workarounds.disable_optimized_compile = true; + break; + default: + if (zink_debug & ZINK_DEBUG_NOOPT) + screen->driver_workarounds.disable_optimized_compile = true; + break; + } + + switch (screen->info.driver_props.driverID) { + case VK_DRIVER_ID_MESA_RADV: + case VK_DRIVER_ID_AMD_OPEN_SOURCE: + case VK_DRIVER_ID_AMD_PROPRIETARY: + /* this has bad perf on AMD */ + screen->info.have_KHR_push_descriptor = false; + break; + default: + break; + } + + if (!screen->resizable_bar) + screen->info.have_EXT_host_image_copy = false; +} + +static void +fixup_driver_props(struct zink_screen *screen) +{ + VkPhysicalDeviceProperties2 props = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 + }; + if (screen->info.have_EXT_host_image_copy) { + /* fill in layouts */ + screen->info.hic_props.pNext = props.pNext; + props.pNext = &screen->info.hic_props; + screen->info.hic_props.pCopySrcLayouts = ralloc_array(screen, VkImageLayout, screen->info.hic_props.copySrcLayoutCount); + screen->info.hic_props.pCopyDstLayouts = ralloc_array(screen, VkImageLayout, screen->info.hic_props.copyDstLayoutCount); + } + if (props.pNext) + screen->vk.GetPhysicalDeviceProperties2(screen->pdev, &props); + + if (screen->info.have_EXT_host_image_copy) { + for (unsigned i = 0; i < screen->info.hic_props.copyDstLayoutCount; i++) { + if (screen->info.hic_props.pCopyDstLayouts[i] == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + screen->can_hic_shader_read = true; + break; + } + } + } +} + +static void +init_optimal_keys(struct zink_screen *screen) +{ + /* assume that anyone who knows enough to enable optimal_keys on turnip doesn't care about missing line stipple */ + if (zink_debug & ZINK_DEBUG_OPTIMAL_KEYS && screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_TURNIP) + zink_debug |= ZINK_DEBUG_QUIET; + screen->optimal_keys = !screen->need_decompose_attrs && + screen->info.have_EXT_non_seamless_cube_map && + screen->info.have_EXT_provoking_vertex && + !screen->driconf.inline_uniforms && + !screen->driver_workarounds.no_linestipple && + !screen->driver_workarounds.no_linesmooth && + !screen->driver_workarounds.no_hw_gl_point && + !screen->driver_workarounds.lower_robustImageAccess2 && + !screen->driconf.emulate_point_smooth && + !screen->driver_workarounds.needs_zs_shader_swizzle; + if (!screen->optimal_keys && zink_debug & ZINK_DEBUG_OPTIMAL_KEYS && !(zink_debug & ZINK_DEBUG_QUIET)) { + fprintf(stderr, "The following criteria are preventing optimal_keys enablement:\n"); + if (screen->need_decompose_attrs) + fprintf(stderr, "missing vertex attribute formats\n"); + if (screen->driconf.inline_uniforms) + fprintf(stderr, "uniform inlining must be disabled (set ZINK_INLINE_UNIFORMS=0 in your env)\n"); + if (screen->driconf.emulate_point_smooth) + fprintf(stderr, "smooth point emulation is enabled\n"); + if (screen->driver_workarounds.needs_zs_shader_swizzle) + fprintf(stderr, "Z/S shader swizzle workaround is enabled\n"); + CHECK_OR_PRINT(have_EXT_line_rasterization); + CHECK_OR_PRINT(line_rast_feats.stippledBresenhamLines); + CHECK_OR_PRINT(feats.features.geometryShader); + CHECK_OR_PRINT(feats.features.sampleRateShading); + CHECK_OR_PRINT(have_EXT_non_seamless_cube_map); + CHECK_OR_PRINT(have_EXT_provoking_vertex); + if (screen->driver_workarounds.no_linesmooth) + fprintf(stderr, "driver does not support smooth lines\n"); + if (screen->driver_workarounds.no_hw_gl_point) + fprintf(stderr, "driver does not support hardware GL_POINT\n"); + CHECK_OR_PRINT(rb2_feats.robustImageAccess2); + CHECK_OR_PRINT(feats.features.robustBufferAccess); + CHECK_OR_PRINT(rb_image_feats.robustImageAccess); + printf("\n"); + mesa_logw("zink: force-enabling optimal_keys despite missing features. Good luck!"); + } + if (zink_debug & ZINK_DEBUG_OPTIMAL_KEYS) + screen->optimal_keys = true; + if (!screen->optimal_keys) + screen->info.have_EXT_graphics_pipeline_library = false; + + if (!screen->optimal_keys || + !screen->info.have_KHR_maintenance5 || + /* EXT_shader_object needs either dynamic feedback loop or per-app enablement */ + (!screen->driconf.zink_shader_object_enable && !screen->info.have_EXT_attachment_feedback_loop_dynamic_state)) + screen->info.have_EXT_shader_object = false; + if (screen->info.have_EXT_shader_object) + screen->have_full_ds3 = true; + if (zink_debug & ZINK_DEBUG_DGC) { + if (!screen->optimal_keys) { + mesa_loge("zink: can't DGC without optimal_keys!"); + zink_debug &= ~ZINK_DEBUG_DGC; + } else { + screen->info.have_EXT_multi_draw = false; + screen->info.have_EXT_shader_object = false; + screen->info.have_EXT_graphics_pipeline_library = false; + screen->info.have_EXT_vertex_input_dynamic_state = false; + } + } +} + +static struct disk_cache * +zink_get_disk_shader_cache(struct pipe_screen *_screen) +{ + struct zink_screen *screen = zink_screen(_screen); + + return screen->disk_cache; +} + +VkSemaphore +zink_create_semaphore(struct zink_screen *screen) +{ + VkSemaphoreCreateInfo sci = { + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + NULL, + 0 + }; + VkSemaphore sem = VK_NULL_HANDLE; + if (util_dynarray_contains(&screen->semaphores, VkSemaphore)) { + simple_mtx_lock(&screen->semaphores_lock); + if (util_dynarray_contains(&screen->semaphores, VkSemaphore)) + sem = util_dynarray_pop(&screen->semaphores, VkSemaphore); + simple_mtx_unlock(&screen->semaphores_lock); + } + if (sem) + return sem; + VkResult ret = VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &sem); + return ret == VK_SUCCESS ? sem : VK_NULL_HANDLE; +} + +void +zink_screen_lock_context(struct zink_screen *screen) +{ + simple_mtx_lock(&screen->copy_context_lock); + if (!screen->copy_context) + screen->copy_context = zink_context(screen->base.context_create(&screen->base, NULL, ZINK_CONTEXT_COPY_ONLY)); + if (!screen->copy_context) { + mesa_loge("zink: failed to create copy context"); + /* realistically there's nothing that can be done here */ + } +} + +void +zink_screen_unlock_context(struct zink_screen *screen) +{ + simple_mtx_unlock(&screen->copy_context_lock); +} + +static bool +init_layouts(struct zink_screen *screen) +{ + if (screen->info.have_EXT_descriptor_indexing) { + VkDescriptorSetLayoutBinding bindings[4]; + const unsigned num_bindings = 4; + VkDescriptorSetLayoutCreateInfo dcslci = {0}; + dcslci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + dcslci.pNext = NULL; + VkDescriptorSetLayoutBindingFlagsCreateInfo fci = {0}; + VkDescriptorBindingFlags flags[4]; + dcslci.pNext = &fci; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + dcslci.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + else + dcslci.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT; + fci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; + fci.bindingCount = num_bindings; + fci.pBindingFlags = flags; + for (unsigned i = 0; i < num_bindings; i++) { + flags[i] = VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT; + if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) + flags[i] |= VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT; + } + /* there is exactly 1 bindless descriptor set per context, and it has 4 bindings, 1 for each descriptor type */ + for (unsigned i = 0; i < num_bindings; i++) { + bindings[i].binding = i; + bindings[i].descriptorType = zink_descriptor_type_from_bindless_index(i); + bindings[i].descriptorCount = ZINK_MAX_BINDLESS_HANDLES; + bindings[i].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT; + bindings[i].pImmutableSamplers = NULL; + } + + dcslci.bindingCount = num_bindings; + dcslci.pBindings = bindings; + VkResult result = VKSCR(CreateDescriptorSetLayout)(screen->dev, &dcslci, 0, &screen->bindless_layout); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateDescriptorSetLayout failed (%s)", vk_Result_to_str(result)); + return false; + } + } + + screen->gfx_push_constant_layout = zink_pipeline_layout_create(screen, NULL, 0, false, 0); + return !!screen->gfx_push_constant_layout; +} + +static int +zink_screen_get_fd(struct pipe_screen *pscreen) +{ + struct zink_screen *screen = zink_screen(pscreen); + + return screen->drm_fd; +} + static struct zink_screen * -zink_internal_create_screen(const struct pipe_screen_config *config) +zink_internal_create_screen(const struct pipe_screen_config *config, int64_t dev_major, int64_t dev_minor) { + if (getenv("ZINK_USE_LAVAPIPE")) { + mesa_loge("ZINK_USE_LAVAPIPE is obsolete. Use LIBGL_ALWAYS_SOFTWARE\n"); + return NULL; + } + struct zink_screen *screen = rzalloc(NULL, struct zink_screen); - if (!screen) + if (!screen) { + if (!config->implicit_driver_load) + mesa_loge("ZINK: failed to allocate screen"); return NULL; + } - util_cpu_detect(); - screen->threaded = util_get_cpu_caps()->nr_cpus > 1 && debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1); - if (screen->threaded) - util_queue_init(&screen->flush_queue, "zfq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL); + screen->implicitly_loaded = config->implicit_driver_load; + screen->drm_fd = -1; + glsl_type_singleton_init_or_ref(); zink_debug = debug_get_option_zink_debug(); - screen->descriptor_mode = debug_get_option_zink_descriptor_mode(); - if (screen->descriptor_mode > ZINK_DESCRIPTOR_MODE_NOTEMPLATES) { - printf("Specify exactly one descriptor mode.\n"); - abort(); + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_AUTO) + zink_descriptor_mode = debug_get_option_zink_descriptor_mode(); + + screen->threaded = util_get_cpu_caps()->nr_cpus > 1 && debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1); + if (zink_debug & ZINK_DEBUG_FLUSHSYNC) + screen->threaded_submit = false; + else + screen->threaded_submit = screen->threaded; + screen->abort_on_hang = debug_get_bool_option("ZINK_HANG_ABORT", false); + + + u_trace_state_init(); + + screen->loader_lib = util_dl_open(VK_LIBNAME); + if (!screen->loader_lib) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: failed to load "VK_LIBNAME); + goto fail; } - screen->instance_info.loader_version = zink_get_loader_version(); - screen->instance = zink_create_instance(&screen->instance_info); + screen->vk_GetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)util_dl_get_proc_address(screen->loader_lib, "vkGetInstanceProcAddr"); + screen->vk_GetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)util_dl_get_proc_address(screen->loader_lib, "vkGetDeviceProcAddr"); + if (!screen->vk_GetInstanceProcAddr || + !screen->vk_GetDeviceProcAddr) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: failed to get proc address"); + goto fail; + } + + screen->instance_info.loader_version = zink_get_loader_version(screen); + if (config) { + driParseConfigFiles(config->options, config->options_info, 0, "zink", + NULL, NULL, NULL, 0, NULL, 0); + screen->driconf.dual_color_blend_by_location = driQueryOptionb(config->options, "dual_color_blend_by_location"); + //screen->driconf.inline_uniforms = driQueryOptionb(config->options, "radeonsi_inline_uniforms"); + screen->driconf.emulate_point_smooth = driQueryOptionb(config->options, "zink_emulate_point_smooth"); + screen->driconf.zink_shader_object_enable = driQueryOptionb(config->options, "zink_shader_object_enable"); + } - if (!screen->instance) + if (!zink_create_instance(screen, dev_major > 0 && dev_major < 255)) goto fail; - vk_instance_dispatch_table_load(&screen->vk.instance, &vkGetInstanceProcAddr, screen->instance); - vk_physical_device_dispatch_table_load(&screen->vk.physical_device, &vkGetInstanceProcAddr, screen->instance); + if (zink_debug & ZINK_DEBUG_VALIDATION) { + if (!screen->instance_info.have_layer_KHRONOS_validation && + !screen->instance_info.have_layer_LUNARG_standard_validation) { + if (!screen->implicitly_loaded) + mesa_loge("Failed to load validation layer"); + goto fail; + } + } + + vk_instance_uncompacted_dispatch_table_load(&screen->vk.instance, + screen->vk_GetInstanceProcAddr, + screen->instance); + vk_physical_device_uncompacted_dispatch_table_load(&screen->vk.physical_device, + screen->vk_GetInstanceProcAddr, + screen->instance); zink_verify_instance_extensions(screen); if (screen->instance_info.have_EXT_debug_utils && - (zink_debug & ZINK_DEBUG_VALIDATION) && !create_debug(screen)) - debug_printf("ZINK: failed to setup debug utils\n"); + (zink_debug & ZINK_DEBUG_VALIDATION) && !create_debug(screen)) { + if (!screen->implicitly_loaded) + debug_printf("ZINK: failed to setup debug utils\n"); + } - choose_pdev(screen); - if (screen->pdev == VK_NULL_HANDLE) + choose_pdev(screen, dev_major, dev_minor); + if (screen->pdev == VK_NULL_HANDLE) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: failed to choose pdev"); goto fail; + } + screen->is_cpu = screen->info.props.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU; update_queue_props(screen); @@ -1821,65 +3357,171 @@ zink_internal_create_screen(const struct pipe_screen_config *config) VK_FORMAT_X8_D24_UNORM_PACK32); screen->have_D24_UNORM_S8_UINT = zink_is_depth_format_supported(screen, VK_FORMAT_D24_UNORM_S8_UINT); + screen->have_D32_SFLOAT_S8_UINT = zink_is_depth_format_supported(screen, + VK_FORMAT_D32_SFLOAT_S8_UINT); if (!zink_get_physical_device_info(screen)) { - debug_printf("ZINK: failed to detect features\n"); + if (!screen->implicitly_loaded) + debug_printf("ZINK: failed to detect features\n"); goto fail; } - /* Some Vulkan implementations have special requirements for WSI - * allocations. - */ - check_device_needs_mesa_wsi(screen); + memset(&screen->heap_map, UINT8_MAX, sizeof(screen->heap_map)); + for (enum zink_heap i = 0; i < ZINK_HEAP_MAX; i++) { + for (unsigned j = 0; j < screen->info.mem_props.memoryTypeCount; j++) { + VkMemoryPropertyFlags domains = vk_domain_from_heap(i); + if ((screen->info.mem_props.memoryTypes[j].propertyFlags & domains) == domains) { + screen->heap_map[i][screen->heap_count[i]++] = j; + } + } + } + /* iterate again to check for missing heaps */ + for (enum zink_heap i = 0; i < ZINK_HEAP_MAX; i++) { + /* not found: use compatible heap */ + if (screen->heap_map[i][0] == UINT8_MAX) { + /* only cached mem has a failure case for now */ + assert(i == ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED || i == ZINK_HEAP_DEVICE_LOCAL_LAZY || + i == ZINK_HEAP_DEVICE_LOCAL_VISIBLE); + if (i == ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED) { + memcpy(screen->heap_map[i], screen->heap_map[ZINK_HEAP_HOST_VISIBLE_COHERENT], screen->heap_count[ZINK_HEAP_HOST_VISIBLE_COHERENT]); + screen->heap_count[i] = screen->heap_count[ZINK_HEAP_HOST_VISIBLE_COHERENT]; + } else { + memcpy(screen->heap_map[i], screen->heap_map[ZINK_HEAP_DEVICE_LOCAL], screen->heap_count[ZINK_HEAP_DEVICE_LOCAL]); + screen->heap_count[i] = screen->heap_count[ZINK_HEAP_DEVICE_LOCAL]; + } + } + } + { + uint64_t biggest_vis_vram = 0; + for (unsigned i = 0; i < screen->heap_count[ZINK_HEAP_DEVICE_LOCAL_VISIBLE]; i++) + biggest_vis_vram = MAX2(biggest_vis_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[screen->heap_map[ZINK_HEAP_DEVICE_LOCAL_VISIBLE][i]].heapIndex].size); + uint64_t biggest_vram = 0; + for (unsigned i = 0; i < screen->heap_count[ZINK_HEAP_DEVICE_LOCAL]; i++) + biggest_vram = MAX2(biggest_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[screen->heap_map[ZINK_HEAP_DEVICE_LOCAL][i]].heapIndex].size); + /* determine if vis vram is roughly equal to total vram */ + if (biggest_vis_vram > biggest_vram * 0.9) + screen->resizable_bar = true; + } + + setup_renderdoc(screen); + if (screen->threaded_submit && !util_queue_init(&screen->flush_queue, "zfq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL, screen)) { + if (!screen->implicitly_loaded) + mesa_loge("zink: Failed to create flush queue.\n"); + goto fail; + } zink_internal_setup_moltenvk(screen); + if (!screen->info.have_KHR_timeline_semaphore && !screen->info.feats12.timelineSemaphore) { + if (!screen->implicitly_loaded) + mesa_loge("zink: KHR_timeline_semaphore is required"); + goto fail; + } + if (zink_debug & ZINK_DEBUG_DGC) { + if (!screen->info.have_NV_device_generated_commands) { + if (!screen->implicitly_loaded) + mesa_loge("zink: can't use DGC without NV_device_generated_commands"); + goto fail; + } + } + + if (zink_debug & ZINK_DEBUG_MEM) { + simple_mtx_init(&screen->debug_mem_lock, mtx_plain); + screen->debug_mem_sizes = _mesa_hash_table_create(screen, _mesa_hash_string, _mesa_key_string_equal); + } + + fixup_driver_props(screen); + + init_driver_workarounds(screen); screen->dev = zink_create_logical_device(screen); if (!screen->dev) goto fail; - init_queue(screen); - if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV || - screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE || - screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY) - /* this has bad perf on AMD */ - screen->info.have_KHR_push_descriptor = false; + vk_device_uncompacted_dispatch_table_load(&screen->vk.device, + screen->vk_GetDeviceProcAddr, + screen->dev); - vk_device_dispatch_table_load(&screen->vk.device, &vkGetDeviceProcAddr, screen->dev); + init_queue(screen); zink_verify_device_extensions(screen); + /* descriptor set indexing is determined by 'compact' descriptor mode: + * by default, 6 sets are used to provide more granular updating + * in compact mode, a maximum of 4 sets are used, with like-types combined + */ + if ((zink_debug & ZINK_DEBUG_COMPACT) || + screen->info.props.limits.maxBoundDescriptorSets < ZINK_MAX_DESCRIPTOR_SETS) { + screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_UNIFORMS] = 0; + screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_UBO] = 1; + screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_SSBO] = 1; + screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] = 2; + screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_IMAGE] = 2; + screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS] = 3; + screen->compact_descriptors = true; + } else { + screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_UNIFORMS] = 0; + screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_UBO] = 1; + screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] = 2; + screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_SSBO] = 3; + screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_IMAGE] = 4; + screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS] = 5; + } + if (screen->info.have_EXT_calibrated_timestamps && !check_have_device_time(screen)) goto fail; screen->have_triangle_fans = true; -#if defined(VK_EXTX_PORTABILITY_SUBSET_EXTENSION_NAME) - if (screen->info.have_EXTX_portability_subset) { - screen->have_triangle_fans = (VK_TRUE == screen->info.portability_subset_extx_feats.triangleFans); +#if defined(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME) + if (screen->info.have_KHR_portability_subset) { + screen->have_triangle_fans = (VK_TRUE == screen->info.portability_subset_feats.triangleFans); } -#endif // VK_EXTX_PORTABILITY_SUBSET_EXTENSION_NAME +#endif // VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME check_base_requirements(screen); util_live_shader_cache_init(&screen->shaders, zink_create_gfx_shader_state, zink_delete_shader_state); screen->base.get_name = zink_get_name; + if (screen->instance_info.have_KHR_external_memory_capabilities) { + screen->base.get_device_uuid = zink_get_device_uuid; + screen->base.get_driver_uuid = zink_get_driver_uuid; + } + if (screen->info.have_KHR_external_memory_win32) { + screen->base.get_device_luid = zink_get_device_luid; + screen->base.get_device_node_mask = zink_get_device_node_mask; + } + screen->base.set_max_shader_compiler_threads = zink_set_max_shader_compiler_threads; + screen->base.is_parallel_shader_compilation_finished = zink_is_parallel_shader_compilation_finished; screen->base.get_vendor = zink_get_vendor; screen->base.get_device_vendor = zink_get_device_vendor; screen->base.get_compute_param = zink_get_compute_param; + screen->base.get_timestamp = zink_get_timestamp; screen->base.query_memory_info = zink_query_memory_info; screen->base.get_param = zink_get_param; screen->base.get_paramf = zink_get_paramf; screen->base.get_shader_param = zink_get_shader_param; screen->base.get_compiler_options = zink_get_compiler_options; screen->base.get_sample_pixel_grid = zink_get_sample_pixel_grid; + screen->base.is_compute_copy_faster = zink_is_compute_copy_faster; screen->base.is_format_supported = zink_is_format_supported; - screen->base.query_dmabuf_modifiers = zink_query_dmabuf_modifiers; - screen->base.is_dmabuf_modifier_supported = zink_is_dmabuf_modifier_supported; - screen->base.get_dmabuf_modifier_planes = zink_get_dmabuf_modifier_planes; + screen->base.driver_thread_add_job = zink_driver_thread_add_job; + if (screen->info.have_EXT_image_drm_format_modifier && screen->info.have_EXT_external_memory_dma_buf) { + screen->base.query_dmabuf_modifiers = zink_query_dmabuf_modifiers; + screen->base.is_dmabuf_modifier_supported = zink_is_dmabuf_modifier_supported; + screen->base.get_dmabuf_modifier_planes = zink_get_dmabuf_modifier_planes; + } +#if defined(_WIN32) + if (screen->info.have_KHR_external_memory_win32) + screen->base.create_fence_win32 = zink_create_fence_win32; +#endif screen->base.context_create = zink_context_create; screen->base.flush_frontbuffer = zink_flush_frontbuffer; screen->base.destroy = zink_destroy_screen; screen->base.finalize_nir = zink_shader_finalize; + screen->base.get_disk_shader_cache = zink_get_disk_shader_cache; + screen->base.get_sparse_texture_virtual_page_size = zink_get_sparse_texture_virtual_page_size; + screen->base.get_driver_query_group_info = zink_get_driver_query_group_info; + screen->base.get_driver_query_info = zink_get_driver_query_info; + screen->base.set_damage_region = zink_set_damage_region; if (screen->info.have_EXT_sample_locations) { VkMultisamplePropertiesEXT prop; @@ -1895,94 +3537,225 @@ zink_internal_create_screen(const struct pipe_screen_config *config) if (!zink_screen_resource_init(&screen->base)) goto fail; - zink_bo_init(screen); + if (!zink_bo_init(screen)) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: failed to initialize suballocator"); + goto fail; + } zink_screen_fence_init(&screen->base); + if (zink_debug & ZINK_DEBUG_IOOPT) + screen->driver_workarounds.io_opt = true; zink_screen_init_compiler(screen); - disk_cache_init(screen); + if (!disk_cache_init(screen)) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: failed to initialize disk cache"); + goto fail; + } + if (!util_queue_init(&screen->cache_get_thread, "zcfq", 8, 4, + UTIL_QUEUE_INIT_RESIZE_IF_FULL, screen)) + goto fail; populate_format_props(screen); - pre_hash_descriptor_states(screen); slab_create_parent(&screen->transfer_pool, sizeof(struct zink_transfer), 16); + slab_create(&screen->present_mempool, sizeof(struct zink_kopper_present_info), 16); -#if WITH_XMLCONFIG - if (config) { - driParseConfigFiles(config->options, config->options_info, 0, "zink", - NULL, NULL, NULL, 0, NULL, 0); - screen->driconf.dual_color_blend_by_location = driQueryOptionb(config->options, "dual_color_blend_by_location"); - //screen->driconf.inline_uniforms = driQueryOptionb(config->options, "radeonsi_inline_uniforms"); - } -#endif - screen->driconf.inline_uniforms = debug_get_bool_option("ZINK_INLINE_UNIFORMS", false); + screen->driconf.inline_uniforms = debug_get_bool_option("ZINK_INLINE_UNIFORMS", screen->is_cpu) && !(zink_debug & ZINK_DEBUG_DGC); screen->total_video_mem = get_video_mem(screen); screen->clamp_video_mem = screen->total_video_mem * 0.8; - if (!os_get_total_physical_memory(&screen->total_mem)) + if (!os_get_total_physical_memory(&screen->total_mem)) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: failed to get total physical memory"); goto fail; + } - if (debug_get_bool_option("ZINK_NO_TIMELINES", false)) - screen->info.have_KHR_timeline_semaphore = false; - if (screen->info.have_KHR_timeline_semaphore) - zink_screen_init_semaphore(screen); + if (!zink_screen_init_semaphore(screen)) { + if (!screen->implicitly_loaded) + mesa_loge("zink: failed to create timeline semaphore"); + goto fail; + } - memset(&screen->heap_map, UINT8_MAX, sizeof(screen->heap_map)); - for (enum zink_heap i = 0; i < ZINK_HEAP_MAX; i++) { - for (unsigned j = 0; j < screen->info.mem_props.memoryTypeCount; j++) { - VkMemoryPropertyFlags domains = vk_domain_from_heap(i); - if ((screen->info.mem_props.memoryTypes[j].propertyFlags & domains) == domains) { - assert(screen->heap_map[i] == UINT8_MAX); - screen->heap_map[i] = j; - break; + bool can_db = true; + { + if (!screen->info.have_EXT_descriptor_buffer) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + if (!screen->implicitly_loaded) + mesa_loge("Cannot use db descriptor mode without EXT_descriptor_buffer"); + goto fail; } + can_db = false; } - - /* not found: use compatible heap */ - if (screen->heap_map[i] == UINT8_MAX) { - /* only cached mem has a failure case for now */ - assert(i == ZINK_HEAP_HOST_VISIBLE_CACHED); - screen->heap_map[i] = screen->heap_map[ZINK_HEAP_HOST_VISIBLE_COHERENT]; + if (!screen->resizable_bar) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + if (!screen->implicitly_loaded) + mesa_loge("Cannot use db descriptor mode without resizable bar"); + goto fail; + } + can_db = false; + } + if (!screen->info.have_EXT_non_seamless_cube_map) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + if (!screen->implicitly_loaded) + mesa_loge("Cannot use db descriptor mode without EXT_non_seamless_cube_map"); + goto fail; + } + can_db = false; + } + if (!screen->info.rb2_feats.nullDescriptor) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + if (!screen->implicitly_loaded) + mesa_loge("Cannot use db descriptor mode without robustness2.nullDescriptor"); + goto fail; + } + can_db = false; + } + if (ZINK_FBFETCH_DESCRIPTOR_SIZE < screen->info.db_props.inputAttachmentDescriptorSize) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + if (!screen->implicitly_loaded) + mesa_loge("Cannot use db descriptor mode with inputAttachmentDescriptorSize(%u) > %u", (unsigned)screen->info.db_props.inputAttachmentDescriptorSize, ZINK_FBFETCH_DESCRIPTOR_SIZE); + goto fail; + } + mesa_logw("zink: bug detected: inputAttachmentDescriptorSize(%u) > %u", (unsigned)screen->info.db_props.inputAttachmentDescriptorSize, ZINK_FBFETCH_DESCRIPTOR_SIZE); + can_db = false; + } + if (screen->info.db_props.maxDescriptorBufferBindings < 2 || screen->info.db_props.maxSamplerDescriptorBufferBindings < 2) { + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + /* allow for testing, but disable bindless */ + mesa_logw("Cannot use bindless and db descriptor mode with (maxDescriptorBufferBindings||maxSamplerDescriptorBufferBindings) < 2"); + } else { + can_db = false; + } } } - { - unsigned vis_vram = screen->heap_map[ZINK_HEAP_DEVICE_LOCAL_VISIBLE]; - unsigned vram = screen->heap_map[ZINK_HEAP_DEVICE_LOCAL]; - /* determine if vis vram is roughly equal to total vram */ - if (screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[vis_vram].heapIndex].size > - screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[vram].heapIndex].size * 0.9) - screen->resizable_bar = true; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_AUTO) { + /* descriptor buffer is not performant with virt yet */ + if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_VENUS) + zink_descriptor_mode = ZINK_DESCRIPTOR_MODE_LAZY; + else + zink_descriptor_mode = can_db ? ZINK_DESCRIPTOR_MODE_DB : ZINK_DESCRIPTOR_MODE_LAZY; } - - if (!screen->info.have_KHR_imageless_framebuffer) { - simple_mtx_init(&screen->framebuffer_mtx, mtx_plain); - _mesa_hash_table_init(&screen->framebuffer_cache, screen, hash_framebuffer_state, equals_framebuffer_state); + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + const uint32_t sampler_size = MAX2(screen->info.db_props.combinedImageSamplerDescriptorSize, screen->info.db_props.robustUniformTexelBufferDescriptorSize); + const uint32_t image_size = MAX2(screen->info.db_props.storageImageDescriptorSize, screen->info.db_props.robustStorageTexelBufferDescriptorSize); + if (screen->compact_descriptors) { + screen->db_size[ZINK_DESCRIPTOR_TYPE_UBO] = screen->info.db_props.robustUniformBufferDescriptorSize + + screen->info.db_props.robustStorageBufferDescriptorSize; + screen->db_size[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] = sampler_size + image_size; + } else { + screen->db_size[ZINK_DESCRIPTOR_TYPE_UBO] = screen->info.db_props.robustUniformBufferDescriptorSize; + screen->db_size[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] = sampler_size; + screen->db_size[ZINK_DESCRIPTOR_TYPE_SSBO] = screen->info.db_props.robustStorageBufferDescriptorSize; + screen->db_size[ZINK_DESCRIPTOR_TYPE_IMAGE] = image_size; + } + screen->db_size[ZINK_DESCRIPTOR_TYPE_UNIFORMS] = screen->info.db_props.robustUniformBufferDescriptorSize; + screen->info.have_KHR_push_descriptor = false; + screen->base_descriptor_size = MAX4(screen->db_size[0], screen->db_size[1], screen->db_size[2], screen->db_size[3]); } - zink_screen_init_descriptor_funcs(screen, false); + simple_mtx_init(&screen->free_batch_states_lock, mtx_plain); + simple_mtx_init(&screen->dt_lock, mtx_plain); + util_idalloc_mt_init_tc(&screen->buffer_ids); + simple_mtx_init(&screen->semaphores_lock, mtx_plain); + util_dynarray_init(&screen->semaphores, screen); + util_dynarray_init(&screen->fd_semaphores, screen); + + util_vertex_state_cache_init(&screen->vertex_state_cache, + zink_create_vertex_state, zink_vertex_state_destroy); + screen->base.create_vertex_state = zink_cache_create_vertex_state; + screen->base.vertex_state_destroy = zink_cache_vertex_state_destroy; + + zink_synchronization_init(screen); + + zink_init_screen_pipeline_libs(screen); + + if (!init_layouts(screen)) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: failed to initialize layouts"); + goto fail; + } + + if (!zink_descriptor_layouts_init(screen)) { + if (!screen->implicitly_loaded) + mesa_loge("ZINK: failed to initialize descriptor layouts"); + goto fail; + } + + simple_mtx_init(&screen->copy_context_lock, mtx_plain); + + init_optimal_keys(screen); + + screen->screen_id = p_atomic_inc_return(&num_screens); + zink_tracing = screen->instance_info.have_EXT_debug_utils && + (u_trace_is_enabled(U_TRACE_TYPE_PERFETTO) || u_trace_is_enabled(U_TRACE_TYPE_MARKERS)); + + screen->frame_marker_emitted = zink_screen_debug_marker_begin(screen, "frame"); + return screen; fail: - ralloc_free(screen); + zink_destroy_screen(&screen->base); return NULL; } struct pipe_screen * -zink_create_screen(struct sw_winsys *winsys) +zink_create_screen(struct sw_winsys *winsys, const struct pipe_screen_config *config) { - struct zink_screen *ret = zink_internal_create_screen(NULL); + struct zink_screen *ret = zink_internal_create_screen(config, -1, -1); if (ret) { - ret->winsys = winsys; ret->drm_fd = -1; } return &ret->base; } +static inline int +zink_render_rdev(int fd, int64_t *dev_major, int64_t *dev_minor) +{ + int ret = 0; + *dev_major = *dev_minor = -1; +#ifdef HAVE_LIBDRM + struct stat stx; + drmDevicePtr dev; + + if (fd == -1) + return 0; + + if (drmGetDevice2(fd, 0, &dev)) + return -1; + + if(!(dev->available_nodes & (1 << DRM_NODE_RENDER))) { + ret = -1; + goto free_device; + } + + if(stat(dev->nodes[DRM_NODE_RENDER], &stx)) { + ret = -1; + goto free_device; + } + + *dev_major = major(stx.st_rdev); + *dev_minor = minor(stx.st_rdev); + +free_device: + drmFreeDevice(&dev); +#endif //HAVE_LIBDRM + + return ret; +} + struct pipe_screen * zink_drm_create_screen(int fd, const struct pipe_screen_config *config) { - struct zink_screen *ret = zink_internal_create_screen(config); + int64_t dev_major, dev_minor; + struct zink_screen *ret; + + if (zink_render_rdev(fd, &dev_major, &dev_minor)) + return NULL; + + ret = zink_internal_create_screen(config, dev_major, dev_minor); if (ret) ret->drm_fd = os_dupfd_cloexec(fd); @@ -2003,3 +3776,35 @@ void zink_stub_function_not_loaded() mesa_loge("ZINK: a Vulkan function was called without being loaded"); abort(); } + +bool +zink_screen_debug_marker_begin(struct zink_screen *screen, const char *fmt, ...) +{ + if (!zink_tracing) + return false; + + char *name; + va_list va; + va_start(va, fmt); + int ret = vasprintf(&name, fmt, va); + va_end(va); + + if (ret == -1) + return false; + + VkDebugUtilsLabelEXT info = { 0 }; + info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; + info.pLabelName = name; + + VKSCR(QueueBeginDebugUtilsLabelEXT)(screen->queue, &info); + + free(name); + return true; +} + +void +zink_screen_debug_marker_end(struct zink_screen *screen, bool emitted) +{ + if (emitted) + VKSCR(QueueEndDebugUtilsLabelEXT)(screen->queue); +} diff --git a/src/gallium/drivers/zink/zink_screen.h b/src/gallium/drivers/zink/zink_screen.h index 4a30ef5adba..c907bc6e85d 100644 --- a/src/gallium/drivers/zink/zink_screen.h +++ b/src/gallium/drivers/zink/zink_screen.h @@ -24,186 +24,54 @@ #ifndef ZINK_SCREEN_H #define ZINK_SCREEN_H -#include "zink_device_info.h" -#include "zink_instance.h" -#include "vk_dispatch_table.h" +#include "zink_types.h" -#include "util/u_idalloc.h" -#include "pipe/p_screen.h" -#include "util/slab.h" -#include "compiler/nir/nir.h" -#include "util/disk_cache.h" -#include "util/log.h" -#include "util/simple_mtx.h" -#include "util/u_queue.h" -#include "util/u_live_shader_cache.h" -#include "pipebuffer/pb_cache.h" -#include "pipebuffer/pb_slab.h" -#include <vulkan/vulkan.h> -extern uint32_t zink_debug; -struct hash_table; - -struct zink_batch_state; -struct zink_context; -struct zink_descriptor_layout_key; -struct zink_program; -struct zink_shader; -enum zink_descriptor_type; - -/* this is the spec minimum */ -#define ZINK_SPARSE_BUFFER_PAGE_SIZE (64 * 1024) - -#define ZINK_DEBUG_NIR 0x1 -#define ZINK_DEBUG_SPIRV 0x2 -#define ZINK_DEBUG_TGSI 0x4 -#define ZINK_DEBUG_VALIDATION 0x8 - -#define NUM_SLAB_ALLOCATORS 3 - -enum zink_descriptor_mode { - ZINK_DESCRIPTOR_MODE_AUTO, - ZINK_DESCRIPTOR_MODE_LAZY, - ZINK_DESCRIPTOR_MODE_NOFALLBACK, - ZINK_DESCRIPTOR_MODE_NOTEMPLATES, -}; - -struct zink_modifier_prop { - uint32_t drmFormatModifierCount; - VkDrmFormatModifierPropertiesEXT* pDrmFormatModifierProperties; -}; - -struct zink_screen { - struct pipe_screen base; - bool threaded; - uint32_t curr_batch; //the current batch id - uint32_t last_finished; //this is racy but ultimately doesn't matter - VkSemaphore sem; - VkSemaphore prev_sem; - struct util_queue flush_queue; - - unsigned buffer_rebind_counter; - - bool device_lost; - struct sw_winsys *winsys; - int drm_fd; - - struct hash_table framebuffer_cache; - simple_mtx_t framebuffer_mtx; - - struct slab_parent_pool transfer_pool; - struct disk_cache *disk_cache; - struct util_queue cache_put_thread; - struct util_queue cache_get_thread; - - struct util_live_shader_cache shaders; - - struct { - struct pb_cache bo_cache; - struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS]; - unsigned min_alloc_size; - struct hash_table *bo_export_table; - simple_mtx_t bo_export_table_lock; - uint32_t next_bo_unique_id; - } pb; - uint8_t heap_map[VK_MAX_MEMORY_TYPES]; - bool resizable_bar; - - uint64_t total_video_mem; - uint64_t clamp_video_mem; - uint64_t total_mem; - - VkInstance instance; - struct zink_instance_info instance_info; - - VkPhysicalDevice pdev; - uint32_t vk_version, spirv_version; - struct util_idalloc_mt buffer_ids; - - struct zink_device_info info; - struct nir_shader_compiler_options nir_options; - - bool have_X8_D24_UNORM_PACK32; - bool have_D24_UNORM_S8_UINT; - bool have_triangle_fans; - - uint32_t gfx_queue; - uint32_t max_queues; - uint32_t timestamp_valid_bits; - VkDevice dev; - VkQueue queue; //gfx+compute - VkQueue thread_queue; //gfx+compute - VkDebugUtilsMessengerEXT debugUtilsCallbackHandle; - - uint32_t cur_custom_border_color_samplers; - - bool needs_mesa_wsi; - bool needs_mesa_flush_wsi; - - struct vk_dispatch_table vk; - - bool (*descriptor_program_init)(struct zink_context *ctx, struct zink_program *pg); - void (*descriptor_program_deinit)(struct zink_screen *screen, struct zink_program *pg); - void (*descriptors_update)(struct zink_context *ctx, bool is_compute); - void (*context_update_descriptor_states)(struct zink_context *ctx, bool is_compute); - void (*context_invalidate_descriptor_state)(struct zink_context *ctx, enum pipe_shader_type shader, - enum zink_descriptor_type type, - unsigned start, unsigned count); - bool (*batch_descriptor_init)(struct zink_screen *screen, struct zink_batch_state *bs); - void (*batch_descriptor_reset)(struct zink_screen *screen, struct zink_batch_state *bs); - void (*batch_descriptor_deinit)(struct zink_screen *screen, struct zink_batch_state *bs); - bool (*descriptors_init)(struct zink_context *ctx); - void (*descriptors_deinit)(struct zink_context *ctx); - enum zink_descriptor_mode descriptor_mode; - - struct { - bool dual_color_blend_by_location; - bool inline_uniforms; - } driconf; +#ifdef __cplusplus +extern "C" { +#endif - VkFormatProperties format_props[PIPE_FORMAT_COUNT]; - struct zink_modifier_prop modifier_props[PIPE_FORMAT_COUNT]; - struct { - uint32_t image_view; - uint32_t buffer_view; - } null_descriptor_hashes; +struct util_dl_library; - VkExtent2D maxSampleLocationGridSize[5]; -}; +void +zink_init_screen_pipeline_libs(struct zink_screen *screen); /* update last_finished to account for batch_id wrapping */ static inline void -zink_screen_update_last_finished(struct zink_screen *screen, uint32_t batch_id) +zink_screen_update_last_finished(struct zink_screen *screen, uint64_t batch_id) { + const uint32_t check_id = (uint32_t)batch_id; /* last_finished may have wrapped */ if (screen->last_finished < UINT_MAX / 2) { /* last_finished has wrapped, batch_id has not */ - if (batch_id > UINT_MAX / 2) + if (check_id > UINT_MAX / 2) return; - } else if (batch_id < UINT_MAX / 2) { + } else if (check_id < UINT_MAX / 2) { /* batch_id has wrapped, last_finished has not */ - screen->last_finished = batch_id; + screen->last_finished = check_id; return; } /* neither have wrapped */ - screen->last_finished = MAX2(batch_id, screen->last_finished); + screen->last_finished = MAX2(check_id, screen->last_finished); } /* check a batch_id against last_finished while accounting for wrapping */ static inline bool zink_screen_check_last_finished(struct zink_screen *screen, uint32_t batch_id) { + const uint32_t check_id = (uint32_t)batch_id; + assert(check_id); /* last_finished may have wrapped */ if (screen->last_finished < UINT_MAX / 2) { /* last_finished has wrapped, batch_id has not */ - if (batch_id > UINT_MAX / 2) + if (check_id > UINT_MAX / 2) return true; - } else if (batch_id < UINT_MAX / 2) { + } else if (check_id < UINT_MAX / 2) { /* batch_id has wrapped, last_finished has not */ return false; } - return screen->last_finished >= batch_id; + return screen->last_finished >= check_id; } bool @@ -219,6 +87,10 @@ zink_screen_handle_vkresult(struct zink_screen *screen, VkResult ret) break; case VK_ERROR_DEVICE_LOST: screen->device_lost = true; + mesa_loge("zink: DEVICE LOST!\n"); + /* if nothing can save us, abort */ + if (screen->abort_on_hang && !screen->robust_ctx_count) + abort(); FALLTHROUGH; default: success = false; @@ -227,56 +99,92 @@ zink_screen_handle_vkresult(struct zink_screen *screen, VkResult ret) return success; } -static inline struct zink_screen * -zink_screen(struct pipe_screen *pipe) +typedef const char *(*zink_vkflags_func)(uint64_t); + +static inline unsigned +zink_string_vkflags_unroll(char *buf, size_t bufsize, uint64_t flags, zink_vkflags_func func) { - return (struct zink_screen *)pipe; + bool first = true; + unsigned idx = 0; + u_foreach_bit64(bit, flags) { + if (!first) + buf[idx++] = '|'; + idx += snprintf(&buf[idx], bufsize - idx, "%s", func((BITFIELD64_BIT(bit)))); + first = false; + } + return idx; } +#define VRAM_ALLOC_LOOP(RET, DOIT, ...) \ + do { \ + unsigned _us[] = {0, 1000, 10000, 500000, 1000000}; \ + for (unsigned _i = 0; _i < ARRAY_SIZE(_us); _i++) { \ + RET = DOIT; \ + if (RET == VK_SUCCESS || RET != VK_ERROR_OUT_OF_DEVICE_MEMORY) \ + break; \ + os_time_sleep(_us[_i]); \ + } \ + __VA_ARGS__ \ + } while (0) -struct mem_cache_entry { - VkDeviceMemory mem; - void *map; -}; +VkSemaphore +zink_create_semaphore(struct zink_screen *screen); -#define VKCTX(fn) zink_screen(ctx->base.screen)->vk.fn -#define VKSCR(fn) screen->vk.fn +void +zink_screen_lock_context(struct zink_screen *screen); +void +zink_screen_unlock_context(struct zink_screen *screen); + +VkSemaphore +zink_create_exportable_semaphore(struct zink_screen *screen); +VkSemaphore +zink_screen_export_dmabuf_semaphore(struct zink_screen *screen, struct zink_resource *res); +bool +zink_screen_import_dmabuf_semaphore(struct zink_screen *screen, struct zink_resource *res, VkSemaphore sem); VkFormat zink_get_format(struct zink_screen *screen, enum pipe_format format); -bool -zink_screen_batch_id_wait(struct zink_screen *screen, uint32_t batch_id, uint64_t timeout); +void +zink_convert_color(const struct zink_screen *screen, enum pipe_format format, + union pipe_color_union *dst, + const union pipe_color_union *src); bool -zink_screen_timeline_wait(struct zink_screen *screen, uint32_t batch_id, uint64_t timeout); +zink_screen_timeline_wait(struct zink_screen *screen, uint64_t batch_id, uint64_t timeout); bool zink_is_depth_format_supported(struct zink_screen *screen, VkFormat format); -#define GET_PROC_ADDR_INSTANCE_LOCAL(instance, x) PFN_vk##x vk_##x = (PFN_vk##x)vkGetInstanceProcAddr(instance, "vk"#x) +#define GET_PROC_ADDR_INSTANCE_LOCAL(screen, instance, x) PFN_vk##x vk_##x = (PFN_vk##x)(screen)->vk_GetInstanceProcAddr(instance, "vk"#x) void -zink_screen_update_pipeline_cache(struct zink_screen *screen, struct zink_program *pg); +zink_screen_update_pipeline_cache(struct zink_screen *screen, struct zink_program *pg, bool in_thread); void -zink_screen_get_pipeline_cache(struct zink_screen *screen, struct zink_program *pg); +zink_screen_get_pipeline_cache(struct zink_screen *screen, struct zink_program *pg, bool in_thread); void -zink_screen_init_descriptor_funcs(struct zink_screen *screen, bool fallback); +zink_stub_function_not_loaded(void); +bool +zink_screen_debug_marker_begin(struct zink_screen *screen, const char *fmt, ...); void -zink_stub_function_not_loaded(void); +zink_screen_debug_marker_end(struct zink_screen *screen, bool emitted); -#define warn_missing_feature(feat) \ +#define warn_missing_feature(warned, feat) \ do { \ - static bool warned = false; \ if (!warned) { \ - fprintf(stderr, "WARNING: Incorrect rendering will happen, " \ - "because the Vulkan device doesn't support " \ - "the %s feature\n", feat); \ + if (!(zink_debug & ZINK_DEBUG_QUIET)) \ + mesa_logw("WARNING: Incorrect rendering will happen " \ + "because the Vulkan device doesn't support " \ + "the '%s' feature\n", feat); \ warned = true; \ } \ } while (0) +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/drivers/zink/zink_shader_keys.h b/src/gallium/drivers/zink/zink_shader_keys.h index 318728e87d8..1dab2447fd8 100644 --- a/src/gallium/drivers/zink/zink_shader_keys.h +++ b/src/gallium/drivers/zink/zink_shader_keys.h @@ -29,9 +29,11 @@ #include "compiler/shader_info.h" struct zink_vs_key_base { - bool clip_halfz; - bool push_drawid; - bool last_vertex_stage; + bool last_vertex_stage : 1; + bool clip_halfz : 1; + bool push_drawid : 1; + bool robust_access : 1; + uint8_t pad : 4; }; struct zink_vs_key { @@ -55,14 +57,64 @@ struct zink_vs_key { unsigned size; }; -struct zink_fs_key { +struct zink_gs_key { + struct zink_vs_key_base base; + uint8_t pad; + bool lower_line_stipple : 1; + bool lower_line_smooth : 1; + bool lower_gl_point : 1; + bool line_rectangular : 1; + unsigned lower_pv_mode : 2; + // not hashed + unsigned size; +}; + +struct zink_zs_swizzle { + uint8_t s[4]; +}; + +struct zink_zs_swizzle_key { + /* Mask of sampler views with zs_view, i.e. have swizzles other than GL_RED for depth */ + uint32_t mask; + struct zink_zs_swizzle swizzle[32]; +}; + +struct zink_fs_key_base { + bool point_coord_yinvert : 1; + bool samples : 1; + bool force_dual_color_blend : 1; + bool force_persample_interp : 1; + bool fbfetch_ms : 1; + bool shadow_needs_shader_swizzle : 1; //append zink_zs_swizzle_key after the key data + uint8_t pad : 2; uint8_t coord_replace_bits; - bool coord_replace_yinvert; - bool samples; - bool force_dual_color_blend; +}; + +struct zink_fs_key { + struct zink_fs_key_base base; + /* non-optimal bits after this point */ + bool lower_line_stipple : 1; + bool lower_line_smooth : 1; + bool lower_point_smooth : 1; + bool robust_access : 1; + uint16_t pad2 : 12; +}; + +struct zink_tcs_key { + uint8_t patch_vertices; +}; + +/* when adding a new field, make sure + * ctx->compute_pipeline_state.key.size is set in zink_context_create. + */ +struct zink_cs_key { + bool robust_access : 1; + uint32_t pad : 31; }; struct zink_shader_key_base { + bool needs_zs_shader_swizzle; + uint32_t nonseamless_cube_mask; uint32_t inlined_uniform_values[MAX_INLINABLE_UNIFORMS]; }; @@ -73,16 +125,54 @@ struct zink_shader_key_base { */ struct zink_shader_key { union { - /* reuse vs key for now with tes/gs since we only use clip_halfz */ + /* reuse vs key for now with tes since we only use clip_halfz */ struct zink_vs_key vs; struct zink_vs_key_base vs_base; + struct zink_tcs_key tcs; + struct zink_gs_key gs; struct zink_fs_key fs; + struct zink_fs_key_base fs_base; + struct zink_cs_key cs; } key; struct zink_shader_key_base base; unsigned inline_uniforms:1; uint32_t size; }; +union zink_shader_key_optimal { + struct { + struct zink_vs_key_base vs_base; + struct zink_tcs_key tcs; + struct zink_fs_key_base fs; + }; + struct { + uint8_t vs_bits; + uint8_t tcs_bits; + uint16_t fs_bits; + }; + uint32_t val; +}; + +/* the default key has only last_vertex_stage set*/ +#define ZINK_SHADER_KEY_OPTIMAL_DEFAULT (1<<0) +/* Ignore patch_vertices bits that would only be used if we had to generate the missing TCS */ +static inline uint32_t +zink_shader_key_optimal_no_tcs(uint32_t key) +{ + union zink_shader_key_optimal k; + k.val = key; + k.tcs_bits = 0; + return k.val; +} +#define ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(key) (zink_shader_key_optimal_no_tcs(key) == ZINK_SHADER_KEY_OPTIMAL_DEFAULT) + +static inline const struct zink_fs_key_base * +zink_fs_key_base(const struct zink_shader_key *key) +{ + assert(key); + return &key->key.fs.base; +} + static inline const struct zink_fs_key * zink_fs_key(const struct zink_shader_key *key) { @@ -103,6 +193,25 @@ zink_vs_key(const struct zink_shader_key *key) return &key->key.vs; } +static inline const struct zink_gs_key * +zink_gs_key(const struct zink_shader_key *key) +{ + assert(key); + return &key->key.gs; +} + +static inline const struct zink_tcs_key * +zink_tcs_key(const struct zink_shader_key *key) +{ + assert(key); + return &key->key.tcs; +} +static inline const struct zink_cs_key * +zink_cs_key(const struct zink_shader_key *key) +{ + assert(key); + return &key->key.cs; +} #endif diff --git a/src/gallium/drivers/zink/zink_state.c b/src/gallium/drivers/zink/zink_state.c index 68e8d413ff9..4298086f6ee 100644 --- a/src/gallium/drivers/zink/zink_state.c +++ b/src/gallium/drivers/zink/zink_state.c @@ -31,6 +31,8 @@ #include "compiler/shader_enums.h" #include "util/u_dual_blend.h" #include "util/u_memory.h" +#include "util/u_helpers.h" +#include "vk_format.h" #include <math.h> @@ -47,20 +49,21 @@ zink_create_vertex_elements_state(struct pipe_context *pctx, ves->hw_state.hash = _mesa_hash_pointer(ves); int buffer_map[PIPE_MAX_ATTRIBS]; - for (int i = 0; i < ARRAY_SIZE(buffer_map); ++i) - buffer_map[i] = -1; + for (int j = 0; j < ARRAY_SIZE(buffer_map); ++j) + buffer_map[j] = -1; int num_bindings = 0; unsigned num_decomposed = 0; uint32_t size8 = 0; uint32_t size16 = 0; uint32_t size32 = 0; + uint16_t strides[PIPE_MAX_ATTRIBS]; for (i = 0; i < num_elements; ++i) { const struct pipe_vertex_element *elem = elements + i; int binding = elem->vertex_buffer_index; if (buffer_map[binding] < 0) { - ves->binding_map[num_bindings] = binding; + ves->hw_state.binding_map[num_bindings] = binding; buffer_map[binding] = num_bindings++; } binding = buffer_map[binding]; @@ -102,6 +105,7 @@ zink_create_vertex_elements_state(struct pipe_context *pctx, ves->decomposed_attrs_without_w |= BITFIELD_BIT(i); ves->decomposed_attrs_without_w_size = size; } + ves->has_decomposed_attrs = true; } if (screen->info.have_EXT_vertex_input_dynamic_state) { @@ -109,32 +113,37 @@ zink_create_vertex_elements_state(struct pipe_context *pctx, ves->hw_state.dynattribs[i].binding = binding; ves->hw_state.dynattribs[i].location = i; ves->hw_state.dynattribs[i].format = format; + strides[binding] = elem->src_stride; assert(ves->hw_state.dynattribs[i].format != VK_FORMAT_UNDEFINED); ves->hw_state.dynattribs[i].offset = elem->src_offset; } else { ves->hw_state.attribs[i].binding = binding; ves->hw_state.attribs[i].location = i; ves->hw_state.attribs[i].format = format; + ves->hw_state.b.strides[binding] = elem->src_stride; assert(ves->hw_state.attribs[i].format != VK_FORMAT_UNDEFINED); ves->hw_state.attribs[i].offset = elem->src_offset; + ves->min_stride[binding] = MAX2(ves->min_stride[binding], elem->src_offset + vk_format_get_blocksize(format)); } } assert(num_decomposed + num_elements <= PIPE_MAX_ATTRIBS); - u_foreach_bit(i, ves->decomposed_attrs | ves->decomposed_attrs_without_w) { - const struct pipe_vertex_element *elem = elements + i; + u_foreach_bit(attr_index, ves->decomposed_attrs | ves->decomposed_attrs_without_w) { + const struct pipe_vertex_element *elem = elements + attr_index; const struct util_format_description *desc = util_format_description(elem->src_format); unsigned size = 1; - if (size32 & BITFIELD_BIT(i)) + if (size32 & BITFIELD_BIT(attr_index)) size = 4; - else if (size16 & BITFIELD_BIT(i)) + else if (size16 & BITFIELD_BIT(attr_index)) size = 2; + else + assert(size8 & BITFIELD_BIT(attr_index)); for (unsigned j = 1; j < desc->nr_channels; j++) { if (screen->info.have_EXT_vertex_input_dynamic_state) { - memcpy(&ves->hw_state.dynattribs[num_elements], &ves->hw_state.dynattribs[i], sizeof(VkVertexInputAttributeDescription2EXT)); + memcpy(&ves->hw_state.dynattribs[num_elements], &ves->hw_state.dynattribs[attr_index], sizeof(VkVertexInputAttributeDescription2EXT)); ves->hw_state.dynattribs[num_elements].location = num_elements; ves->hw_state.dynattribs[num_elements].offset += j * size; } else { - memcpy(&ves->hw_state.attribs[num_elements], &ves->hw_state.attribs[i], sizeof(VkVertexInputAttributeDescription)); + memcpy(&ves->hw_state.attribs[num_elements], &ves->hw_state.attribs[attr_index], sizeof(VkVertexInputAttributeDescription)); ves->hw_state.attribs[num_elements].location = num_elements; ves->hw_state.attribs[num_elements].offset += j * size; } @@ -144,22 +153,23 @@ zink_create_vertex_elements_state(struct pipe_context *pctx, ves->hw_state.num_bindings = num_bindings; ves->hw_state.num_attribs = num_elements; if (screen->info.have_EXT_vertex_input_dynamic_state) { - for (int i = 0; i < num_bindings; ++i) { - ves->hw_state.dynbindings[i].sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT; - ves->hw_state.dynbindings[i].binding = ves->bindings[i].binding; - ves->hw_state.dynbindings[i].inputRate = ves->bindings[i].inputRate; - if (ves->divisor[i]) - ves->hw_state.dynbindings[i].divisor = ves->divisor[i]; + for (int j = 0; j < num_bindings; ++j) { + ves->hw_state.dynbindings[j].sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT; + ves->hw_state.dynbindings[j].binding = ves->bindings[j].binding; + ves->hw_state.dynbindings[j].inputRate = ves->bindings[j].inputRate; + ves->hw_state.dynbindings[j].stride = strides[j]; + if (ves->divisor[j]) + ves->hw_state.dynbindings[j].divisor = ves->divisor[j]; else - ves->hw_state.dynbindings[i].divisor = 1; + ves->hw_state.dynbindings[j].divisor = 1; } } else { - for (int i = 0; i < num_bindings; ++i) { - ves->hw_state.b.bindings[i].binding = ves->bindings[i].binding; - ves->hw_state.b.bindings[i].inputRate = ves->bindings[i].inputRate; - if (ves->divisor[i]) { - ves->hw_state.b.divisors[ves->hw_state.b.divisors_present].divisor = ves->divisor[i]; - ves->hw_state.b.divisors[ves->hw_state.b.divisors_present].binding = ves->bindings[i].binding; + for (int j = 0; j < num_bindings; ++j) { + ves->hw_state.b.bindings[j].binding = ves->bindings[j].binding; + ves->hw_state.b.bindings[j].inputRate = ves->bindings[j].inputRate; + if (ves->divisor[j]) { + ves->hw_state.b.divisors[ves->hw_state.b.divisors_present].divisor = ves->divisor[j]; + ves->hw_state.b.divisors[ves->hw_state.b.divisors_present].binding = ves->bindings[j].binding; ves->hw_state.b.divisors_present++; } } @@ -173,12 +183,16 @@ zink_bind_vertex_elements_state(struct pipe_context *pctx, { struct zink_context *ctx = zink_context(pctx); struct zink_gfx_pipeline_state *state = &ctx->gfx_pipeline_state; + zink_flush_dgc_if_enabled(ctx); ctx->element_state = cso; if (cso) { if (state->element_state != &ctx->element_state->hw_state) { ctx->vertex_state_changed = !zink_screen(pctx->screen)->info.have_EXT_vertex_input_dynamic_state; ctx->vertex_buffers_dirty = ctx->element_state->hw_state.num_bindings > 0; } + state->element_state = &ctx->element_state->hw_state; + if (zink_screen(pctx->screen)->optimal_keys) + return; const struct zink_vs_key *vs = zink_get_vs_key(ctx); uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0; switch (vs->size) { @@ -218,7 +232,6 @@ zink_bind_vertex_elements_state(struct pipe_context *pctx, key->key.vs.size = size; key->size += 2 * size; } - state->element_state = &ctx->element_state->hw_state; } else { state->element_state = NULL; ctx->vertex_buffers_dirty = false; @@ -272,21 +285,6 @@ blend_factor(enum pipe_blendfactor factor) } -static bool -need_blend_constants(enum pipe_blendfactor factor) -{ - switch (factor) { - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_CONST_ALPHA: - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return true; - - default: - return false; - } -} - static VkBlendOp blend_op(enum pipe_blend_func func) { @@ -362,8 +360,7 @@ zink_create_blend_state(struct pipe_context *pctx, */ cso->alpha_to_coverage = blend_state->alpha_to_coverage; cso->alpha_to_one = blend_state->alpha_to_one; - - cso->need_blend_constants = false; + cso->num_rts = blend_state->max_rt + 1; for (int i = 0; i < blend_state->max_rt + 1; ++i) { const struct pipe_rt_blend_state *rt = blend_state->rt; @@ -380,12 +377,6 @@ zink_create_blend_state(struct pipe_context *pctx, att.srcAlphaBlendFactor = blend_factor(fix_blendfactor(rt->alpha_src_factor, cso->alpha_to_one)); att.dstAlphaBlendFactor = blend_factor(fix_blendfactor(rt->alpha_dst_factor, cso->alpha_to_one)); att.alphaBlendOp = blend_op(rt->alpha_func); - - if (need_blend_constants(rt->rgb_src_factor) || - need_blend_constants(rt->rgb_dst_factor) || - need_blend_constants(rt->alpha_src_factor) || - need_blend_constants(rt->alpha_dst_factor)) - cso->need_blend_constants = true; } if (rt->colormask & PIPE_MASK_R) @@ -397,7 +388,20 @@ zink_create_blend_state(struct pipe_context *pctx, if (rt->colormask & PIPE_MASK_A) att.colorWriteMask |= VK_COLOR_COMPONENT_A_BIT; + cso->wrmask |= (rt->colormask << i); + if (rt->blend_enable) + cso->enables |= BITFIELD_BIT(i); + cso->attachments[i] = att; + + cso->ds3.enables[i] = att.blendEnable; + cso->ds3.eq[i].alphaBlendOp = att.alphaBlendOp; + cso->ds3.eq[i].dstAlphaBlendFactor = att.dstAlphaBlendFactor; + cso->ds3.eq[i].srcAlphaBlendFactor = att.srcAlphaBlendFactor; + cso->ds3.eq[i].colorBlendOp = att.colorBlendOp; + cso->ds3.eq[i].dstColorBlendFactor = att.dstColorBlendFactor; + cso->ds3.eq[i].srcColorBlendFactor = att.srcColorBlendFactor; + cso->ds3.wrmask[i] = att.colorWriteMask; } cso->dual_src_blend = util_blend_state_is_dual(blend_state, 0); @@ -408,18 +412,47 @@ static void zink_bind_blend_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); + struct zink_screen *screen = zink_screen(pctx->screen); struct zink_gfx_pipeline_state* state = &zink_context(pctx)->gfx_pipeline_state; + zink_flush_dgc_if_enabled(ctx); struct zink_blend_state *blend = cso; + struct zink_blend_state *old_blend = state->blend_state; if (state->blend_state != cso) { state->blend_state = cso; - state->blend_id = blend ? blend->hash : 0; - state->dirty = true; - bool force_dual_color_blend = zink_screen(pctx->screen)->driconf.dual_color_blend_by_location && - blend && blend->dual_src_blend && state->blend_state->attachments[1].blendEnable; - if (force_dual_color_blend != zink_get_fs_key(ctx)->force_dual_color_blend) - zink_set_fs_key(ctx)->force_dual_color_blend = force_dual_color_blend; + if (!screen->have_full_ds3) { + state->blend_id = blend ? blend->hash : 0; + state->dirty = true; + } + bool force_dual_color_blend = screen->driconf.dual_color_blend_by_location && + blend && blend->dual_src_blend && state->blend_state->attachments[0].blendEnable; + if (force_dual_color_blend != zink_get_fs_base_key(ctx)->force_dual_color_blend) + zink_set_fs_base_key(ctx)->force_dual_color_blend = force_dual_color_blend; ctx->blend_state_changed = true; + + if (cso && screen->have_full_ds3) { +#define STATE_CHECK(NAME, FLAG) \ + if ((!old_blend || old_blend->NAME != blend->NAME)) \ + ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_##FLAG) + + STATE_CHECK(alpha_to_coverage, A2C); + if (screen->info.dynamic_state3_feats.extendedDynamicState3AlphaToOneEnable) { + STATE_CHECK(alpha_to_one, A21); + } + STATE_CHECK(enables, ON); + STATE_CHECK(wrmask, WRITE); + if (old_blend && blend->num_rts == old_blend->num_rts) { + if (memcmp(blend->ds3.eq, old_blend->ds3.eq, blend->num_rts * sizeof(blend->ds3.eq[0]))) + ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_EQ); + } else { + ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_EQ); + } + STATE_CHECK(logicop_enable, LOGIC_ON); + STATE_CHECK(logicop_func, LOGIC); + +#undef STATE_CHECK + } + } } @@ -516,7 +549,7 @@ zink_bind_depth_stencil_alpha_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); - bool prev_zwrite = ctx->dsa_state ? ctx->dsa_state->hw_state.depth_write : false; + zink_flush_dgc_if_enabled(ctx); ctx->dsa_state = cso; if (cso) { @@ -527,10 +560,8 @@ zink_bind_depth_stencil_alpha_state(struct pipe_context *pctx, void *cso) ctx->dsa_state_changed = true; } } - if (prev_zwrite != (ctx->dsa_state ? ctx->dsa_state->hw_state.depth_write : false)) { - ctx->rp_changed = true; - zink_batch_no_rp(ctx); - } + if (!ctx->track_renderpasses && !ctx->blitting) + ctx->rp_tc_info_updated = true; } static void @@ -558,18 +589,6 @@ line_width(float width, float granularity, const float range[2]) return CLAMP(width, range[0], range[1]); } -#define warn_line_feature(feat) \ - do { \ - static bool warned = false; \ - if (!warned) { \ - fprintf(stderr, "WARNING: Incorrect rendering will happen, " \ - "because the Vulkan device doesn't support " \ - "the %s feature of " \ - "VK_EXT_line_rasterization\n", feat); \ - warned = true; \ - } \ - } while (0) - static void * zink_create_rasterizer_state(struct pipe_context *pctx, const struct pipe_rasterizer_state *rs_state) @@ -582,83 +601,70 @@ zink_create_rasterizer_state(struct pipe_context *pctx, state->base = *rs_state; state->base.line_stipple_factor++; - state->hw_state.line_stipple_enable = rs_state->line_stipple_enable; + + state->hw_state.line_stipple_enable = + rs_state->line_stipple_enable && + !screen->driver_workarounds.no_linestipple; assert(rs_state->depth_clip_far == rs_state->depth_clip_near); - state->hw_state.depth_clamp = rs_state->depth_clip_near == 0; - state->hw_state.rasterizer_discard = rs_state->rasterizer_discard; - state->hw_state.force_persample_interp = rs_state->force_persample_interp; + state->hw_state.depth_clip = rs_state->depth_clip_near; + state->hw_state.depth_clamp = rs_state->depth_clamp; state->hw_state.pv_last = !rs_state->flatshade_first; state->hw_state.clip_halfz = rs_state->clip_halfz; assert(rs_state->fill_front <= PIPE_POLYGON_MODE_POINT); if (rs_state->fill_back != rs_state->fill_front) debug_printf("BUG: vulkan doesn't support different front and back fill modes\n"); - state->hw_state.polygon_mode = rs_state->fill_front; // same values - state->hw_state.cull_mode = rs_state->cull_face; // same bits + + if (rs_state->fill_front == PIPE_POLYGON_MODE_POINT && + screen->driver_workarounds.no_hw_gl_point) { + state->hw_state.polygon_mode = VK_POLYGON_MODE_FILL; + state->cull_mode = VK_CULL_MODE_NONE; + } else { + state->hw_state.polygon_mode = rs_state->fill_front; // same values + state->cull_mode = rs_state->cull_face; // same bits + } state->front_face = rs_state->front_ccw ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE; - VkPhysicalDeviceLineRasterizationFeaturesEXT *line_feats = - &screen->info.line_rast_feats; - state->hw_state.line_mode = - VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT; - - if (rs_state->line_stipple_enable) { - if (screen->info.have_EXT_line_rasterization) { - if (rs_state->line_rectangular) { - if (rs_state->line_smooth) { - if (line_feats->stippledSmoothLines) - state->hw_state.line_mode = - VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; - else - warn_line_feature("stippledSmoothLines"); - } else if (line_feats->stippledRectangularLines) - state->hw_state.line_mode = - VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; - else - warn_line_feature("stippledRectangularLines"); - } else if (line_feats->stippledBresenhamLines) - state->hw_state.line_mode = - VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; - else { - warn_line_feature("stippledBresenhamLines"); - - /* no suitable mode that supports line stippling */ - state->base.line_stipple_factor = 0; - state->base.line_stipple_pattern = UINT16_MAX; - } - } + state->hw_state.line_mode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT; + if (rs_state->line_rectangular) { + if (rs_state->line_smooth && + !screen->driver_workarounds.no_linesmooth) + state->hw_state.line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; + else + state->hw_state.line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; } else { - if (screen->info.have_EXT_line_rasterization) { - if (rs_state->line_rectangular) { - if (rs_state->line_smooth) { - if (line_feats->smoothLines) - state->hw_state.line_mode = - VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; - else - warn_line_feature("smoothLines"); - } else if (line_feats->rectangularLines) - state->hw_state.line_mode = - VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; - else - warn_line_feature("rectangularLines"); - } else if (line_feats->bresenhamLines) - state->hw_state.line_mode = - VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; - else - warn_line_feature("bresenhamLines"); - } - state->base.line_stipple_factor = 0; + state->hw_state.line_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; + } + state->dynamic_line_mode = state->hw_state.line_mode; + switch (state->hw_state.line_mode) { + case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT: + if (!screen->info.line_rast_feats.rectangularLines) + state->dynamic_line_mode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT; + break; + case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT: + if (!screen->info.line_rast_feats.smoothLines) + state->dynamic_line_mode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT; + break; + case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT: + if (!screen->info.line_rast_feats.bresenhamLines) + state->dynamic_line_mode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT; + break; + default: break; + } + + if (!rs_state->line_stipple_enable) { + state->base.line_stipple_factor = 1; state->base.line_stipple_pattern = UINT16_MAX; } - state->offset_point = rs_state->offset_point; - state->offset_line = rs_state->offset_line; - state->offset_tri = rs_state->offset_tri; + state->offset_fill = util_get_offset(rs_state, rs_state->fill_front); state->offset_units = rs_state->offset_units; + if (!rs_state->offset_units_unscaled) + state->offset_units *= 2; state->offset_clamp = rs_state->offset_clamp; state->offset_scale = rs_state->offset_scale; @@ -674,9 +680,16 @@ zink_bind_rasterizer_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); struct zink_screen *screen = zink_screen(pctx->screen); + struct zink_rasterizer_state *prev_state = ctx->rast_state; bool point_quad_rasterization = ctx->rast_state ? ctx->rast_state->base.point_quad_rasterization : false; bool scissor = ctx->rast_state ? ctx->rast_state->base.scissor : false; bool pv_last = ctx->rast_state ? ctx->rast_state->hw_state.pv_last : false; + bool force_persample_interp = ctx->gfx_pipeline_state.force_persample_interp; + bool clip_halfz = ctx->rast_state ? ctx->rast_state->hw_state.clip_halfz : false; + bool rasterizer_discard = ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false; + bool half_pixel_center = ctx->rast_state ? ctx->rast_state->base.half_pixel_center : true; + float line_width = ctx->rast_state ? ctx->rast_state->base.line_width : 1.0; + zink_flush_dgc_if_enabled(ctx); ctx->rast_state = cso; if (ctx->rast_state) { @@ -685,26 +698,83 @@ zink_bind_rasterizer_state(struct pipe_context *pctx, void *cso) /* without this prop, change in pv mode requires new rp */ !screen->info.pv_props.provokingVertexModePerPipeline) zink_batch_no_rp(ctx); - uint32_t rast_bits = 0; - memcpy(&rast_bits, &ctx->rast_state->hw_state, sizeof(struct zink_rasterizer_hw_state)); - ctx->gfx_pipeline_state.rast_state = rast_bits & BITFIELD_MASK(ZINK_RAST_HW_STATE_SIZE); + memcpy(&ctx->gfx_pipeline_state.dyn_state3, &ctx->rast_state->hw_state, sizeof(struct zink_rasterizer_hw_state)); - ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dirty |= !zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state3; ctx->rast_state_changed = true; - if (zink_get_last_vertex_key(ctx)->clip_halfz != ctx->rast_state->base.clip_halfz) { - zink_set_last_vertex_key(ctx)->clip_halfz = ctx->rast_state->base.clip_halfz; + if (clip_halfz != ctx->rast_state->base.clip_halfz) { + if (screen->info.have_EXT_depth_clip_control) + ctx->gfx_pipeline_state.dirty = true; + else + zink_set_last_vertex_key(ctx)->clip_halfz = ctx->rast_state->base.clip_halfz; ctx->vp_state_changed = true; } + if (screen->info.have_EXT_extended_dynamic_state3) { +#define STATE_CHECK(NAME, FLAG) \ + if (cso && (!prev_state || prev_state->NAME != ctx->rast_state->NAME)) \ + ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_RAST_##FLAG) + + if (!screen->driver_workarounds.no_linestipple) { + if (ctx->rast_state->base.line_stipple_enable) { + STATE_CHECK(base.line_stipple_factor, STIPPLE); + STATE_CHECK(base.line_stipple_pattern, STIPPLE); + } else { + ctx->ds3_states &= ~BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE); + } + if (screen->info.dynamic_state3_feats.extendedDynamicState3LineStippleEnable) { + STATE_CHECK(hw_state.line_stipple_enable, STIPPLE_ON); + } + } + STATE_CHECK(hw_state.depth_clip, CLIP); + STATE_CHECK(hw_state.depth_clamp, CLAMP); + STATE_CHECK(hw_state.polygon_mode, POLYGON); + STATE_CHECK(hw_state.clip_halfz, HALFZ); + STATE_CHECK(hw_state.pv_last, PV); + STATE_CHECK(dynamic_line_mode, LINE); + +#undef STATE_CHECK + } + + if (fabs(ctx->rast_state->base.line_width - line_width) > FLT_EPSILON) + ctx->line_width_changed = true; + + bool lower_gl_point = screen->driver_workarounds.no_hw_gl_point; + lower_gl_point &= ctx->rast_state->base.fill_front == PIPE_POLYGON_MODE_POINT; + if (zink_get_gs_key(ctx)->lower_gl_point != lower_gl_point) + zink_set_gs_key(ctx)->lower_gl_point = lower_gl_point; + if (ctx->gfx_pipeline_state.dyn_state1.front_face != ctx->rast_state->front_face) { ctx->gfx_pipeline_state.dyn_state1.front_face = ctx->rast_state->front_face; ctx->gfx_pipeline_state.dirty |= !zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state; } - if (ctx->rast_state->base.point_quad_rasterization != point_quad_rasterization) + if (ctx->gfx_pipeline_state.dyn_state1.cull_mode != ctx->rast_state->cull_mode) { + ctx->gfx_pipeline_state.dyn_state1.cull_mode = ctx->rast_state->cull_mode; + ctx->gfx_pipeline_state.dirty |= !zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state; + } + if (!ctx->primitives_generated_active) + zink_set_rasterizer_discard(ctx, false); + else if (rasterizer_discard != ctx->rast_state->base.rasterizer_discard) + zink_set_null_fs(ctx); + + if (ctx->rast_state->base.point_quad_rasterization || + ctx->rast_state->base.point_quad_rasterization != point_quad_rasterization) zink_set_fs_point_coord_key(ctx); if (ctx->rast_state->base.scissor != scissor) ctx->scissor_changed = true; + + if (ctx->rast_state->base.force_persample_interp != force_persample_interp) { + zink_set_fs_base_key(ctx)->force_persample_interp = ctx->rast_state->base.force_persample_interp; + ctx->gfx_pipeline_state.dirty = true; + } + ctx->gfx_pipeline_state.force_persample_interp = ctx->rast_state->base.force_persample_interp; + + if (ctx->rast_state->base.half_pixel_center != half_pixel_center) + ctx->vp_state_changed = true; + + if (!screen->optimal_keys) + zink_update_gs_key_rectangular_line(ctx); } } @@ -714,6 +784,65 @@ zink_delete_rasterizer_state(struct pipe_context *pctx, void *rs_state) FREE(rs_state); } +struct pipe_vertex_state * +zink_create_vertex_state(struct pipe_screen *pscreen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask) +{ + struct zink_vertex_state *zstate = CALLOC_STRUCT(zink_vertex_state); + if (!zstate) { + mesa_loge("ZINK: failed to allocate zstate!"); + return NULL; + } + + util_init_pipe_vertex_state(pscreen, buffer, elements, num_elements, indexbuf, full_velem_mask, + &zstate->b); + + /* Initialize the vertex element state in state->element. + * Do it by creating a vertex element state object and copying it there. + */ + struct zink_context ctx; + ctx.base.screen = pscreen; + struct zink_vertex_elements_state *elems = zink_create_vertex_elements_state(&ctx.base, num_elements, elements); + zstate->velems = *elems; + zink_delete_vertex_elements_state(&ctx.base, elems); + + return &zstate->b; +} + +void +zink_vertex_state_destroy(struct pipe_screen *pscreen, struct pipe_vertex_state *vstate) +{ + pipe_vertex_buffer_unreference(&vstate->input.vbuffer); + pipe_resource_reference(&vstate->input.indexbuf, NULL); + FREE(vstate); +} + +struct pipe_vertex_state * +zink_cache_create_vertex_state(struct pipe_screen *pscreen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask) +{ + struct zink_screen *screen = zink_screen(pscreen); + + return util_vertex_state_cache_get(pscreen, buffer, elements, num_elements, indexbuf, + full_velem_mask, &screen->vertex_state_cache); +} + +void +zink_cache_vertex_state_destroy(struct pipe_screen *pscreen, struct pipe_vertex_state *vstate) +{ + struct zink_screen *screen = zink_screen(pscreen); + + util_vertex_state_destroy(pscreen, &screen->vertex_state_cache, vstate); +} + void zink_context_state_init(struct pipe_context *pctx) { diff --git a/src/gallium/drivers/zink/zink_state.h b/src/gallium/drivers/zink/zink_state.h index 1254498377c..71dc6457170 100644 --- a/src/gallium/drivers/zink/zink_state.h +++ b/src/gallium/drivers/zink/zink_state.h @@ -24,98 +24,38 @@ #ifndef ZINK_STATE_H #define ZINK_STATE_H -#include <vulkan/vulkan.h> +#include "zink_types.h" -#include "pipe/p_state.h" - -struct zink_vertex_elements_hw_state { - uint32_t hash; - union { - VkVertexInputAttributeDescription attribs[PIPE_MAX_ATTRIBS]; - VkVertexInputAttributeDescription2EXT dynattribs[PIPE_MAX_ATTRIBS]; - }; - union { - struct { - VkVertexInputBindingDivisorDescriptionEXT divisors[PIPE_MAX_ATTRIBS]; - VkVertexInputBindingDescription bindings[PIPE_MAX_ATTRIBS]; // combination of element_state and stride - uint8_t divisors_present; - } b; - VkVertexInputBindingDescription2EXT dynbindings[PIPE_MAX_ATTRIBS]; - }; - uint32_t num_bindings, num_attribs; -}; - -struct zink_vertex_elements_state { - struct { - uint32_t binding; - VkVertexInputRate inputRate; - } bindings[PIPE_MAX_ATTRIBS]; - uint32_t divisor[PIPE_MAX_ATTRIBS]; - uint8_t binding_map[PIPE_MAX_ATTRIBS]; - uint32_t decomposed_attrs; - unsigned decomposed_attrs_size; - uint32_t decomposed_attrs_without_w; - unsigned decomposed_attrs_without_w_size; - struct zink_vertex_elements_hw_state hw_state; -}; - -struct zink_rasterizer_hw_state { - unsigned polygon_mode : 2; //VkPolygonMode - unsigned cull_mode : 2; //VkCullModeFlags - unsigned line_mode : 2; //VkLineRasterizationModeEXT - bool depth_clamp:1; - bool rasterizer_discard:1; - bool pv_last:1; - bool line_stipple_enable:1; - bool force_persample_interp:1; - bool clip_halfz:1; -}; -#define ZINK_RAST_HW_STATE_SIZE 12 - - -struct zink_rasterizer_state { - struct pipe_rasterizer_state base; - bool offset_point, offset_line, offset_tri; - float offset_units, offset_clamp, offset_scale; - float line_width; - VkFrontFace front_face; - struct zink_rasterizer_hw_state hw_state; -}; - -struct zink_blend_state { - uint32_t hash; - VkPipelineColorBlendAttachmentState attachments[PIPE_MAX_COLOR_BUFS]; - - VkBool32 logicop_enable; - VkLogicOp logicop_func; - - VkBool32 alpha_to_coverage; - VkBool32 alpha_to_one; - - bool need_blend_constants; - bool dual_src_blend; -}; - -struct zink_depth_stencil_alpha_hw_state { - VkBool32 depth_test; - VkCompareOp depth_compare_op; +#ifdef __cplusplus +extern "C" { +#endif - VkBool32 depth_bounds_test; - float min_depth_bounds, max_depth_bounds; +void +zink_context_state_init(struct pipe_context *pctx); - VkBool32 stencil_test; - VkStencilOpState stencil_front; - VkStencilOpState stencil_back; - VkBool32 depth_write; -}; +struct pipe_vertex_state * +zink_create_vertex_state(struct pipe_screen *pscreen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask); +void +zink_vertex_state_destroy(struct pipe_screen *pscreen, struct pipe_vertex_state *vstate); +struct pipe_vertex_state * +zink_cache_create_vertex_state(struct pipe_screen *pscreen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask); +void +zink_cache_vertex_state_destroy(struct pipe_screen *pscreen, struct pipe_vertex_state *vstate); -struct zink_depth_stencil_alpha_state { - struct pipe_depth_stencil_alpha_state base; - struct zink_depth_stencil_alpha_hw_state hw_state; -}; -void -zink_context_state_init(struct pipe_context *pctx); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/gallium/drivers/zink/zink_surface.c b/src/gallium/drivers/zink/zink_surface.c index f3cfacb1d14..a24ca83ac86 100644 --- a/src/gallium/drivers/zink/zink_surface.c +++ b/src/gallium/drivers/zink/zink_surface.c @@ -23,9 +23,11 @@ #include "zink_context.h" #include "zink_framebuffer.h" +#include "zink_format.h" #include "zink_resource.h" #include "zink_screen.h" #include "zink_surface.h" +#include "zink_kopper.h" #include "util/format/u_format.h" #include "util/u_inlines.h" @@ -37,17 +39,19 @@ create_ivci(struct zink_screen *screen, const struct pipe_surface *templ, enum pipe_texture_target target) { - VkImageViewCreateInfo ivci = {0}; + VkImageViewCreateInfo ivci; + /* zero holes since this is hashed */ + memset(&ivci, 0, sizeof(VkImageViewCreateInfo)); ivci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; ivci.image = res->obj->image; switch (target) { case PIPE_TEXTURE_1D: - ivci.viewType = VK_IMAGE_VIEW_TYPE_1D; + ivci.viewType = res->need_2D ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D; break; case PIPE_TEXTURE_1D_ARRAY: - ivci.viewType = VK_IMAGE_VIEW_TYPE_1D_ARRAY; + ivci.viewType = res->need_2D ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_1D_ARRAY; break; case PIPE_TEXTURE_2D: @@ -75,7 +79,7 @@ create_ivci(struct zink_screen *screen, unreachable("unsupported target"); } - ivci.format = zink_get_format(screen, templ->format); + ivci.format = res->base.b.format == PIPE_FORMAT_A8_UNORM ? res->format : zink_get_format(screen, templ->format); assert(ivci.format != VK_FORMAT_UNDEFINED); /* TODO: it's currently illegal to use non-identity swizzles for framebuffer attachments, @@ -96,51 +100,112 @@ create_ivci(struct zink_screen *screen, ivci.subresourceRange.levelCount = 1; ivci.subresourceRange.baseArrayLayer = templ->u.tex.first_layer; ivci.subresourceRange.layerCount = 1 + templ->u.tex.last_layer - templ->u.tex.first_layer; + assert(ivci.viewType != VK_IMAGE_VIEW_TYPE_3D || ivci.subresourceRange.baseArrayLayer == 0); + assert(ivci.viewType != VK_IMAGE_VIEW_TYPE_3D || ivci.subresourceRange.layerCount == 1); + /* ensure cube image types get clamped to 2D/2D_ARRAY as expected for partial views */ ivci.viewType = zink_surface_clamp_viewtype(ivci.viewType, templ->u.tex.first_layer, templ->u.tex.last_layer, res->base.b.array_size); return ivci; } +/* this is used for framebuffer attachments to set up imageless framebuffers */ +static void +init_surface_info(struct zink_screen *screen, struct zink_surface *surface, struct zink_resource *res, VkImageViewCreateInfo *ivci) +{ + VkImageViewUsageCreateInfo *usage_info = (VkImageViewUsageCreateInfo *)ivci->pNext; + surface->info.flags = res->obj->vkflags; + surface->info.usage = usage_info ? usage_info->usage : res->obj->vkusage; + surface->info.width = surface->base.width; + surface->info.height = surface->base.height; + surface->info.layerCount = ivci->subresourceRange.layerCount; + surface->info.format[0] = ivci->format; + if (res->obj->dt) { + struct kopper_displaytarget *cdt = res->obj->dt; + if (zink_kopper_has_srgb(cdt)) + surface->info.format[1] = ivci->format == cdt->formats[0] ? cdt->formats[1] : cdt->formats[0]; + } else { + enum pipe_format srgb = util_format_is_srgb(surface->base.format) ? util_format_linear(surface->base.format) : util_format_srgb(surface->base.format); + if (srgb == surface->base.format) + srgb = PIPE_FORMAT_NONE; + if (srgb) { + VkFormat format = zink_get_format(screen, srgb); + if (format) + surface->info.format[1] = format; + } + } +} + +static void +init_pipe_surface_info(struct pipe_context *pctx, struct pipe_surface *psurf, const struct pipe_surface *templ, const struct pipe_resource *pres) +{ + unsigned int level = templ->u.tex.level; + psurf->context = pctx; + psurf->format = templ->format; + psurf->width = u_minify(pres->width0, level); + assert(psurf->width); + psurf->height = u_minify(pres->height0, level); + assert(psurf->height); + psurf->nr_samples = templ->nr_samples; + psurf->u.tex.level = level; + psurf->u.tex.first_layer = templ->u.tex.first_layer; + psurf->u.tex.last_layer = templ->u.tex.last_layer; +} + +static void +apply_view_usage_for_format(struct zink_screen *screen, struct zink_resource *res, struct zink_surface *surface, enum pipe_format format, VkImageViewCreateInfo *ivci) +{ + VkFormatFeatureFlags feats = res->linear ? + screen->format_props[format].linearTilingFeatures : + screen->format_props[format].optimalTilingFeatures; + VkImageUsageFlags attachment = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT); + surface->usage_info.usage = res->obj->vkusage & ~attachment; + if (res->obj->modifier_aspect) { + feats = res->obj->vkfeats; + /* intersect format features for current modifier */ + for (unsigned i = 0; i < screen->modifier_props[format].drmFormatModifierCount; i++) { + if (res->obj->modifier == screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifier) + feats &= screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifierTilingFeatures; + } + } + /* if the format features don't support framebuffer attachment, use VkImageViewUsageCreateInfo to remove it */ + if ((res->obj->vkusage & attachment) && + !(feats & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))) { + ivci->pNext = &surface->usage_info; + } +} + static struct zink_surface * create_surface(struct pipe_context *pctx, struct pipe_resource *pres, const struct pipe_surface *templ, - VkImageViewCreateInfo *ivci) + VkImageViewCreateInfo *ivci, + bool actually) { struct zink_screen *screen = zink_screen(pctx->screen); struct zink_resource *res = zink_resource(pres); - unsigned int level = templ->u.tex.level; struct zink_surface *surface = CALLOC_STRUCT(zink_surface); if (!surface) return NULL; + surface->usage_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO; + surface->usage_info.pNext = NULL; + apply_view_usage_for_format(screen, res, surface, templ->format, ivci); + pipe_resource_reference(&surface->base.texture, pres); pipe_reference_init(&surface->base.reference, 1); - surface->base.context = pctx; - surface->base.format = templ->format; - surface->base.width = u_minify(pres->width0, level); - assert(surface->base.width); - surface->base.height = u_minify(pres->height0, level); - assert(surface->base.height); - surface->base.nr_samples = templ->nr_samples; - surface->base.u.tex.level = level; - surface->base.u.tex.first_layer = templ->u.tex.first_layer; - surface->base.u.tex.last_layer = templ->u.tex.last_layer; + init_pipe_surface_info(pctx, &surface->base, templ, pres); surface->obj = zink_resource(pres)->obj; - util_dynarray_init(&surface->framebuffer_refs, NULL); - util_dynarray_init(&surface->desc_set_refs.refs, NULL); - surface->info.flags = res->obj->vkflags; - surface->info.usage = res->obj->vkusage; - surface->info.width = surface->base.width; - surface->info.height = surface->base.height; - surface->info.layerCount = ivci->subresourceRange.layerCount; - surface->info.format = ivci->format; - surface->info_hash = _mesa_hash_data(&surface->info, sizeof(surface->info)); + init_surface_info(screen, surface, res, ivci); - if (VKSCR(CreateImageView)(screen->dev, ivci, NULL, - &surface->image_view) != VK_SUCCESS) { + if (!actually) + return surface; + assert(ivci->image); + VkResult result = VKSCR(CreateImageView)(screen->dev, ivci, NULL, + &surface->image_view); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateImageView failed (%s)", vk_Result_to_str(result)); FREE(surface); return NULL; } @@ -154,7 +219,20 @@ hash_ivci(const void *key) return _mesa_hash_data((char*)key + offsetof(VkImageViewCreateInfo, flags), sizeof(VkImageViewCreateInfo) - offsetof(VkImageViewCreateInfo, flags)); } -struct pipe_surface * +static struct zink_surface * +do_create_surface(struct pipe_context *pctx, struct pipe_resource *pres, const struct pipe_surface *templ, VkImageViewCreateInfo *ivci, uint32_t hash, bool actually) +{ + /* create a new surface */ + struct zink_surface *surface = create_surface(pctx, pres, templ, ivci, actually); + /* only transient surfaces have nr_samples set */ + surface->base.nr_samples = zink_screen(pctx->screen)->info.have_EXT_multisampled_render_to_single_sampled ? templ->nr_samples : 0; + surface->hash = hash; + surface->ivci = *ivci; + return surface; +} + +/* get a cached surface for a shader descriptor */ +struct zink_surface * zink_get_surface(struct zink_context *ctx, struct pipe_resource *pres, const struct pipe_surface *templ, @@ -168,10 +246,11 @@ zink_get_surface(struct zink_context *ctx, struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(&res->surface_cache, hash, ivci); if (!entry) { - /* create a new surface */ - surface = create_surface(&ctx->base, pres, templ, ivci); - surface->hash = hash; - surface->ivci = *ivci; + /* create a new surface, but don't actually create the imageview if mutable isn't set and the format is different; + * mutable will be set later and the imageview will be filled in + */ + bool actually = !zink_format_needs_mutable(pres->format, templ->format) || (pres->bind & ZINK_BIND_MUTABLE); + surface = do_create_surface(&ctx->base, pres, templ, ivci, hash, actually); entry = _mesa_hash_table_insert_pre_hashed(&res->surface_cache, hash, &surface->ivci, surface); if (!entry) { simple_mtx_unlock(&res->surface_mtx); @@ -184,60 +263,144 @@ zink_get_surface(struct zink_context *ctx, p_atomic_inc(&surface->base.reference.count); } simple_mtx_unlock(&res->surface_mtx); - return &surface->base; + + return surface; } -static struct pipe_surface * -wrap_surface(struct pipe_context *pctx, struct pipe_surface *psurf) +/* wrap a surface for use as a framebuffer attachment + * Takes ownership of surface */ +static struct zink_ctx_surface * +wrap_surface(struct pipe_context *pctx, + struct zink_surface *surface, + const struct pipe_surface *templ) { struct zink_ctx_surface *csurf = CALLOC_STRUCT(zink_ctx_surface); - csurf->base = *psurf; + if (!csurf) { + zink_surface_reference (zink_screen(pctx->screen), &surface, NULL); + return NULL; + } + + csurf->base = *templ; pipe_reference_init(&csurf->base.reference, 1); - csurf->surf = (struct zink_surface*)psurf; + csurf->surf = surface; csurf->base.context = pctx; - return &csurf->base; + return csurf; } +/* this is the context hook, so only zink_ctx_surfaces will reach it */ +static void +zink_surface_destroy(struct pipe_context *pctx, + struct pipe_surface *psurface) +{ + struct zink_ctx_surface *csurf = (struct zink_ctx_surface *)psurface; + if (csurf->needs_mutable) + /* this has an extra resource ref */ + pipe_resource_reference(&csurf->base.texture, NULL); + zink_surface_reference(zink_screen(pctx->screen), &csurf->surf, NULL); + pipe_surface_release(pctx, (struct pipe_surface**)&csurf->transient); + FREE(csurf); +} + +/* this the context hook that returns a zink_ctx_surface */ static struct pipe_surface * zink_create_surface(struct pipe_context *pctx, struct pipe_resource *pres, const struct pipe_surface *templ) { + struct zink_resource *res = zink_resource(pres); + struct zink_screen *screen = zink_screen(pctx->screen); + bool is_array = templ->u.tex.last_layer != templ->u.tex.first_layer; + bool needs_mutable = false; + enum pipe_texture_target target_2d[] = {PIPE_TEXTURE_2D, PIPE_TEXTURE_2D_ARRAY}; + if (!res->obj->dt && zink_format_needs_mutable(pres->format, templ->format)) { + /* mutable not set by default */ + needs_mutable = !(res->base.b.bind & ZINK_BIND_MUTABLE); + /* + VUID-VkImageViewCreateInfo-image-07072 + If image was created with the VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT flag and + format is a non-compressed format, the levelCount and layerCount members of + subresourceRange must both be 1 + + ...but this is allowed with a maintenance6 property + */ + if (util_format_is_compressed(pres->format) && templ->u.tex.first_layer != templ->u.tex.last_layer && + (!screen->info.have_KHR_maintenance6 || !screen->info.maint6_props.blockTexelViewCompatibleMultipleLayers)) + return NULL; + } - VkImageViewCreateInfo ivci = create_ivci(zink_screen(pctx->screen), - zink_resource(pres), templ, pres->target); - if (pres->target == PIPE_TEXTURE_3D) - ivci.viewType = VK_IMAGE_VIEW_TYPE_2D; + if (!screen->threaded && needs_mutable) { + /* this is fine without tc */ + needs_mutable = false; + zink_resource_object_init_mutable(zink_context(pctx), res); + } - return wrap_surface(pctx, zink_get_surface(zink_context(pctx), pres, templ, &ivci)); -} + if (!zink_get_format(screen, templ->format)) + return NULL; -/* framebuffers are owned by their surfaces, so each time a surface that's part of a cached fb - * is destroyed, it has to unref all the framebuffers it's attached to in order to avoid leaking - * all the framebuffers - * - * surfaces are always batch-tracked, so it is impossible for a framebuffer to be destroyed - * while it is in use - */ -static void -surface_clear_fb_refs(struct zink_screen *screen, struct pipe_surface *psurface) -{ - struct zink_surface *surface = zink_surface(psurface); - util_dynarray_foreach(&surface->framebuffer_refs, struct zink_framebuffer*, fb_ref) { - struct zink_framebuffer *fb = *fb_ref; - for (unsigned i = 0; i < fb->state.num_attachments; i++) { - if (fb->surfaces[i] == psurface) { - simple_mtx_lock(&screen->framebuffer_mtx); - fb->surfaces[i] = NULL; - _mesa_hash_table_remove_key(&screen->framebuffer_cache, &fb->state); - zink_framebuffer_reference(screen, &fb, NULL); - simple_mtx_unlock(&screen->framebuffer_mtx); - break; - } + VkImageViewCreateInfo ivci = create_ivci(screen, res, templ, + pres->target == PIPE_TEXTURE_3D ? target_2d[is_array] : pres->target); + + struct zink_surface *surface = NULL; + if (res->obj->dt) { + /* don't cache swapchain surfaces. that's weird. */ + surface = do_create_surface(pctx, pres, templ, &ivci, 0, false); + if (unlikely(!surface)) { + mesa_loge("ZINK: failed do_create_surface!"); + return NULL; } + + surface->is_swapchain = true; + } else if (!needs_mutable) { + surface = zink_get_surface(zink_context(pctx), pres, templ, &ivci); + if (unlikely(!surface)) { + mesa_loge("ZINK: failed to get non-mutable surface!"); + return NULL; + } + } + + struct zink_ctx_surface *csurf = wrap_surface(pctx, surface, needs_mutable ? templ : &surface->base); /* move ownership of surface */ + if (!unlikely (csurf)) { + mesa_loge("ZINK: failed to allocate csurf!"); + return NULL; + } + + csurf->needs_mutable = needs_mutable; + if (needs_mutable) { + pipe_resource_reference(&csurf->base.texture, pres); + init_pipe_surface_info(pctx, &csurf->base, templ, pres); } - util_dynarray_fini(&surface->framebuffer_refs); + + if (templ->nr_samples && !screen->info.have_EXT_multisampled_render_to_single_sampled) { + /* transient fb attachment: not cached */ + struct pipe_resource rtempl = *pres; + rtempl.nr_samples = templ->nr_samples; + rtempl.bind |= ZINK_BIND_TRANSIENT; + struct zink_resource *transient = zink_resource(pctx->screen->resource_create(pctx->screen, &rtempl)); + if (unlikely(!transient)) { + mesa_loge("ZINK: failed to create transient resource!"); + goto fail; + } + + ivci.image = transient->obj->image; + struct zink_surface *tsurf = create_surface(pctx, &transient->base.b, templ, &ivci, true); + pipe_resource_reference((struct pipe_resource**)&transient, NULL); + if (unlikely(!tsurf)) { + mesa_loge("ZINK: failed to create transient surface!"); + goto fail; + } + + csurf->transient = wrap_surface(pctx, tsurf, &tsurf->base); /* move ownership of tsurf */ + if (unlikely(!csurf->transient)) { + mesa_loge("ZINK: failed to wrap transient surface!"); + goto fail; + } + } + + return &csurf->base; +fail: + zink_surface_destroy(pctx, &csurf->base); + return NULL; } void @@ -245,55 +408,53 @@ zink_destroy_surface(struct zink_screen *screen, struct pipe_surface *psurface) { struct zink_surface *surface = zink_surface(psurface); struct zink_resource *res = zink_resource(psurface->texture); - simple_mtx_lock(&res->surface_mtx); - struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->surface_cache, surface->hash, &surface->ivci); - assert(he); - assert(he->data == surface); - _mesa_hash_table_remove(&res->surface_cache, he); - simple_mtx_unlock(&res->surface_mtx); - if (!screen->info.have_KHR_imageless_framebuffer) - surface_clear_fb_refs(screen, psurface); - zink_descriptor_set_refs_clear(&surface->desc_set_refs, surface); - util_dynarray_fini(&surface->framebuffer_refs); + if ((!psurface->nr_samples || screen->info.have_EXT_multisampled_render_to_single_sampled) && !surface->is_swapchain) { + simple_mtx_lock(&res->surface_mtx); + if (psurface->reference.count) { + /* a different context got a cache hit during deletion: this surface is alive again */ + simple_mtx_unlock(&res->surface_mtx); + return; + } + struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->surface_cache, surface->hash, &surface->ivci); + assert(he); + assert(he->data == surface); + _mesa_hash_table_remove(&res->surface_cache, he); + simple_mtx_unlock(&res->surface_mtx); + } + /* this surface is dead now */ + simple_mtx_lock(&res->obj->view_lock); + /* imageviews are never destroyed directly to ensure lifetimes for in-use surfaces */ + if (surface->is_swapchain) { + for (unsigned i = 0; i < surface->swapchain_size; i++) + util_dynarray_append(&res->obj->views, VkImageView, surface->swapchain[i]); + free(surface->swapchain); + } else + util_dynarray_append(&res->obj->views, VkImageView, surface->image_view); + simple_mtx_unlock(&res->obj->view_lock); pipe_resource_reference(&psurface->texture, NULL); - if (surface->simage_view) - VKSCR(DestroyImageView)(screen->dev, surface->simage_view, NULL); - VKSCR(DestroyImageView)(screen->dev, surface->image_view, NULL); FREE(surface); } -static void -zink_surface_destroy(struct pipe_context *pctx, - struct pipe_surface *psurface) -{ - struct zink_ctx_surface *csurf = (struct zink_ctx_surface *)psurface; - zink_surface_reference(zink_screen(pctx->screen), &csurf->surf, NULL); - FREE(csurf); -} - +/* this is called when a surface is rebound for mutable/storage use */ bool zink_rebind_surface(struct zink_context *ctx, struct pipe_surface **psurface) { struct zink_surface *surface = zink_surface(*psurface); struct zink_resource *res = zink_resource((*psurface)->texture); struct zink_screen *screen = zink_screen(ctx->base.screen); - if (surface->simage_view) + if (surface->obj == res->obj) return false; - VkImageViewCreateInfo ivci = create_ivci(screen, - zink_resource((*psurface)->texture), (*psurface), surface->base.texture->target); + assert(!res->obj->dt); + VkImageViewCreateInfo ivci = surface->ivci; + ivci.image = res->obj->image; uint32_t hash = hash_ivci(&ivci); simple_mtx_lock(&res->surface_mtx); struct hash_entry *new_entry = _mesa_hash_table_search_pre_hashed(&res->surface_cache, hash, &ivci); - if (zink_batch_usage_exists(surface->batch_uses)) - zink_batch_reference_surface(&ctx->batch, surface); - surface_clear_fb_refs(screen, *psurface); - zink_descriptor_set_refs_clear(&surface->desc_set_refs, surface); if (new_entry) { /* reuse existing surface; old one will be cleaned up naturally */ struct zink_surface *new_surface = new_entry->data; simple_mtx_unlock(&res->surface_mtx); - zink_batch_usage_set(&new_surface->batch_uses, ctx->batch.state); zink_surface_reference(screen, (struct zink_surface**)psurface, new_surface); return true; } @@ -301,8 +462,10 @@ zink_rebind_surface(struct zink_context *ctx, struct pipe_surface **psurface) assert(entry); _mesa_hash_table_remove(&res->surface_cache, entry); VkImageView image_view; - if (VKSCR(CreateImageView)(screen->dev, &ivci, NULL, &image_view) != VK_SUCCESS) { - debug_printf("zink: failed to create new imageview"); + apply_view_usage_for_format(screen, res, surface, surface->base.format, &ivci); + VkResult result = VKSCR(CreateImageView)(screen->dev, &ivci, NULL, &image_view); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: failed to create new imageview (%s)", vk_Result_to_str(result)); simple_mtx_unlock(&res->surface_mtx); return false; } @@ -310,18 +473,19 @@ zink_rebind_surface(struct zink_context *ctx, struct pipe_surface **psurface) surface->ivci = ivci; entry = _mesa_hash_table_insert_pre_hashed(&res->surface_cache, surface->hash, &surface->ivci, surface); assert(entry); - surface->simage_view = surface->image_view; + simple_mtx_lock(&res->obj->view_lock); + util_dynarray_append(&res->obj->views, VkImageView, surface->image_view); + simple_mtx_unlock(&res->obj->view_lock); surface->image_view = image_view; surface->obj = zink_resource(surface->base.texture)->obj; /* update for imageless fb */ surface->info.flags = res->obj->vkflags; surface->info.usage = res->obj->vkusage; - surface->info_hash = _mesa_hash_data(&surface->info, sizeof(surface->info)); - zink_batch_usage_set(&surface->batch_uses, ctx->batch.state); simple_mtx_unlock(&res->surface_mtx); return true; } +/* dummy surfaces are used for null framebuffer/descriptors */ struct pipe_surface * zink_surface_create_null(struct zink_context *ctx, enum pipe_texture_target target, unsigned width, unsigned height, unsigned samples) { @@ -332,16 +496,18 @@ zink_surface_create_null(struct zink_context *ctx, enum pipe_texture_target targ templ.width0 = width; templ.height0 = height; templ.depth0 = 1; - templ.format = PIPE_FORMAT_R8_UINT; + templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; templ.target = target; - templ.bind = PIPE_BIND_RENDER_TARGET; + templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + if (samples < 2) + templ.bind |= PIPE_BIND_SHADER_IMAGE; templ.nr_samples = samples; pres = ctx->base.screen->resource_create(ctx->base.screen, &templ); if (!pres) return NULL; - surf_templ.format = PIPE_FORMAT_R8_UINT; + surf_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; surf_templ.nr_samples = 0; struct pipe_surface *psurf = ctx->base.create_surface(&ctx->base, pres, &surf_templ); pipe_resource_reference(&pres, NULL); @@ -354,3 +520,43 @@ zink_context_surface_init(struct pipe_context *context) context->create_surface = zink_create_surface; context->surface_destroy = zink_surface_destroy; } + +/* must be called before a swapchain image is used to ensure correct imageview is used */ +void +zink_surface_swapchain_update(struct zink_context *ctx, struct zink_surface *surface) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_resource *res = zink_resource(surface->base.texture); + struct kopper_displaytarget *cdt = res->obj->dt; + if (!cdt) + return; //dead swapchain + if (cdt->swapchain != surface->dt_swapchain) { + /* new swapchain: clear out previous swapchain imageviews/array and setup a new one; + * old views will be pruned normally in zink_batch or on object destruction + */ + simple_mtx_lock(&res->obj->view_lock); + for (unsigned i = 0; i < surface->swapchain_size; i++) + util_dynarray_append(&res->obj->views, VkImageView, surface->swapchain[i]); + simple_mtx_unlock(&res->obj->view_lock); + free(surface->swapchain); + surface->swapchain_size = cdt->swapchain->num_images; + surface->swapchain = calloc(surface->swapchain_size, sizeof(VkImageView)); + if (!surface->swapchain) { + mesa_loge("ZINK: failed to allocate surface->swapchain!"); + return; + } + surface->base.width = res->base.b.width0; + surface->base.height = res->base.b.height0; + init_surface_info(screen, surface, res, &surface->ivci); + surface->dt_swapchain = cdt->swapchain; + } + if (!surface->swapchain[res->obj->dt_idx]) { + /* no current swapchain imageview exists: create it */ + assert(res->obj->image && cdt->swapchain->images[res->obj->dt_idx].image == res->obj->image); + surface->ivci.image = res->obj->image; + assert(surface->ivci.image); + VKSCR(CreateImageView)(screen->dev, &surface->ivci, NULL, &surface->swapchain[res->obj->dt_idx]); + } + /* the current swapchain imageview is now the view for the current swapchain image */ + surface->image_view = surface->swapchain[res->obj->dt_idx]; +} diff --git a/src/gallium/drivers/zink/zink_surface.h b/src/gallium/drivers/zink/zink_surface.h index 617084d39e8..9207b2e8b89 100644 --- a/src/gallium/drivers/zink/zink_surface.h +++ b/src/gallium/drivers/zink/zink_surface.h @@ -24,56 +24,7 @@ #ifndef ZINK_SURFACE_H #define ZINK_SURFACE_H -#include "pipe/p_state.h" -#include "zink_batch.h" -#include <vulkan/vulkan.h> - -struct pipe_context; - -struct zink_surface_info { - VkImageCreateFlags flags; - VkImageUsageFlags usage; - uint32_t width; - uint32_t height; - uint32_t layerCount; - VkFormat format; -}; - -struct zink_surface { - struct pipe_surface base; - VkImageViewCreateInfo ivci; - struct zink_surface_info info; //TODO: union with fb refs - uint32_t info_hash; - VkImageView image_view; - VkImageView simage_view;//old iview after storage replacement/rebind - void *obj; //backing resource object - uint32_t hash; - struct zink_batch_usage *batch_uses; - struct util_dynarray framebuffer_refs; - struct zink_descriptor_refs desc_set_refs; -}; - -/* wrapper object that preserves the gallium expectation of having - * pipe_surface::context match the context used to create the surface - */ -struct zink_ctx_surface { - struct pipe_surface base; - struct zink_surface *surf; -}; - -/* use this cast for framebuffer surfaces */ -static inline struct zink_surface * -zink_csurface(struct pipe_surface *psurface) -{ - return psurface ? ((struct zink_ctx_surface *)psurface)->surf : NULL; -} - -/* use this cast for internal surfaces */ -static inline struct zink_surface * -zink_surface(struct pipe_surface *psurface) -{ - return (struct zink_surface *)psurface; -} +#include "zink_types.h" void zink_destroy_surface(struct zink_screen *screen, struct pipe_surface *psurface); @@ -100,12 +51,13 @@ create_ivci(struct zink_screen *screen, const struct pipe_surface *templ, enum pipe_texture_target target); -struct pipe_surface * +struct zink_surface * zink_get_surface(struct zink_context *ctx, struct pipe_resource *pres, const struct pipe_surface *templ, VkImageViewCreateInfo *ivci); +/* cube image types are clamped by gallium rules to 2D or 2D_ARRAY viewtypes if not using all layers */ static inline VkImageViewType zink_surface_clamp_viewtype(VkImageViewType viewType, unsigned first_layer, unsigned last_layer, unsigned array_size) { @@ -113,14 +65,8 @@ zink_surface_clamp_viewtype(VkImageViewType viewType, unsigned first_layer, unsi if (viewType == VK_IMAGE_VIEW_TYPE_CUBE || viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) { if (first_layer == last_layer) return VK_IMAGE_VIEW_TYPE_2D; - if (layerCount % 6 == 0) { - if (viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY && layerCount == 6) - return VK_IMAGE_VIEW_TYPE_CUBE; - } else if (first_layer || layerCount != array_size) + if (layerCount % 6 != 0 && (first_layer || layerCount != array_size)) return VK_IMAGE_VIEW_TYPE_2D_ARRAY; - } else if (viewType == VK_IMAGE_VIEW_TYPE_2D_ARRAY) { - if (first_layer == last_layer) - return VK_IMAGE_VIEW_TYPE_2D; } return viewType; } @@ -137,4 +83,7 @@ zink_rebind_ctx_surface(struct zink_context *ctx, struct pipe_surface **psurface struct pipe_surface * zink_surface_create_null(struct zink_context *ctx, enum pipe_texture_target target, unsigned width, unsigned height, unsigned samples); + +void +zink_surface_swapchain_update(struct zink_context *ctx, struct zink_surface *surface); #endif diff --git a/src/gallium/drivers/zink/zink_synchronization.cpp b/src/gallium/drivers/zink/zink_synchronization.cpp new file mode 100644 index 00000000000..78fce1e29e9 --- /dev/null +++ b/src/gallium/drivers/zink/zink_synchronization.cpp @@ -0,0 +1,794 @@ +/* + * Copyright © 2023 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Mike Blumenkrantz <michael.blumenkrantz@gmail.com> + */ + +#include "zink_batch.h" +#include "zink_context.h" +#include "zink_descriptors.h" +#include "zink_resource.h" +#include "zink_screen.h" + + +static VkAccessFlags +access_src_flags(VkImageLayout layout) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_UNDEFINED: + return VK_ACCESS_NONE; + + case VK_IMAGE_LAYOUT_GENERAL: + return VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT: + return VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return VK_ACCESS_SHADER_READ_BIT; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + return VK_ACCESS_TRANSFER_READ_BIT; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + return VK_ACCESS_TRANSFER_WRITE_BIT; + + case VK_IMAGE_LAYOUT_PREINITIALIZED: + return VK_ACCESS_HOST_WRITE_BIT; + + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + return VK_ACCESS_NONE; + + default: + unreachable("unexpected layout"); + } +} + +static VkAccessFlags +access_dst_flags(VkImageLayout layout) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_UNDEFINED: + return VK_ACCESS_NONE; + + case VK_IMAGE_LAYOUT_GENERAL: + return VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT: + return VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return VK_ACCESS_SHADER_READ_BIT; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + return VK_ACCESS_TRANSFER_READ_BIT; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + return VK_ACCESS_SHADER_READ_BIT; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + return VK_ACCESS_TRANSFER_WRITE_BIT; + + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + return VK_ACCESS_NONE; + + default: + unreachable("unexpected layout"); + } +} + +static VkPipelineStageFlags +pipeline_dst_stage(VkImageLayout layout) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + return VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + return VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + return VK_PIPELINE_STAGE_TRANSFER_BIT; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + return VK_PIPELINE_STAGE_TRANSFER_BIT; + + case VK_IMAGE_LAYOUT_GENERAL: + return VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + + default: + return VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } +} + +#define ALL_READ_ACCESS_FLAGS \ + (VK_ACCESS_INDIRECT_COMMAND_READ_BIT | \ + VK_ACCESS_INDEX_READ_BIT | \ + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | \ + VK_ACCESS_UNIFORM_READ_BIT | \ + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | \ + VK_ACCESS_SHADER_READ_BIT | \ + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | \ + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | \ + VK_ACCESS_TRANSFER_READ_BIT |\ + VK_ACCESS_HOST_READ_BIT |\ + VK_ACCESS_MEMORY_READ_BIT |\ + VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |\ + VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT |\ + VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT |\ + VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR |\ + VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR |\ + VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT |\ + VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_NV |\ + VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR |\ + VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR) + + +bool +zink_resource_access_is_write(VkAccessFlags flags) +{ + return (flags & ~ALL_READ_ACCESS_FLAGS) > 0; +} + +static bool +zink_resource_image_needs_barrier(struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline) +{ + return res->layout != new_layout || (res->obj->access_stage & pipeline) != pipeline || + (res->obj->access & flags) != flags || + zink_resource_access_is_write(res->obj->access) || + zink_resource_access_is_write(flags); +} + +void +zink_resource_image_barrier_init(VkImageMemoryBarrier *imb, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline) +{ + if (!pipeline) + pipeline = pipeline_dst_stage(new_layout); + if (!flags) + flags = access_dst_flags(new_layout); + + VkImageSubresourceRange isr = { + res->aspect, + 0, VK_REMAINING_MIP_LEVELS, + 0, VK_REMAINING_ARRAY_LAYERS + }; + *imb = VkImageMemoryBarrier { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + NULL, + res->obj->access ? res->obj->access : access_src_flags(res->layout), + flags, + res->layout, + new_layout, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + res->obj->image, + isr + }; +} + +void +zink_resource_image_barrier2_init(VkImageMemoryBarrier2 *imb, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline) +{ + if (!pipeline) + pipeline = pipeline_dst_stage(new_layout); + if (!flags) + flags = access_dst_flags(new_layout); + + VkImageSubresourceRange isr = { + res->aspect, + 0, VK_REMAINING_MIP_LEVELS, + 0, VK_REMAINING_ARRAY_LAYERS + }; + *imb = VkImageMemoryBarrier2 { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + NULL, + res->obj->access_stage ? res->obj->access_stage : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + res->obj->access ? res->obj->access : access_src_flags(res->layout), + pipeline, + flags, + res->layout, + new_layout, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + res->obj->image, + isr + }; +} + +static inline bool +is_shader_pipline_stage(VkPipelineStageFlags pipeline) +{ + return pipeline & GFX_SHADER_BITS; +} + +static void +resource_check_defer_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkPipelineStageFlags pipeline) +{ + assert(res->obj->is_buffer); + if (res->bind_count[0] - res->so_bind_count > 0) { + if ((res->vbo_bind_mask && !(pipeline & VK_PIPELINE_STAGE_VERTEX_INPUT_BIT)) || + (util_bitcount(res->vbo_bind_mask) != res->bind_count[0] && !is_shader_pipline_stage(pipeline))) + /* gfx rebind */ + _mesa_set_add(ctx->need_barriers[0], res); + } + if (res->bind_count[1] && !(pipeline & VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) + /* compute rebind */ + _mesa_set_add(ctx->need_barriers[1], res); +} + +static inline bool +unordered_res_exec(const struct zink_context *ctx, const struct zink_resource *res, bool is_write) +{ + /* if all usage is unordered, keep unordered */ + if (res->obj->unordered_read && res->obj->unordered_write) + return true; + /* if testing write access but have any ordered read access, cannot promote */ + if (is_write && zink_batch_usage_matches(res->obj->bo->reads.u, ctx->batch.state) && !res->obj->unordered_read) + return false; + /* if write access is unordered or nonexistent, always promote */ + return res->obj->unordered_write || !zink_batch_usage_matches(res->obj->bo->writes.u, ctx->batch.state); +} + +static ALWAYS_INLINE bool +check_unordered_exec(struct zink_context *ctx, struct zink_resource *res, bool is_write) +{ + if (res) { + if (!res->obj->is_buffer) { + /* TODO: figure out how to link up unordered layout -> ordered layout and delete this conditionals */ + if (zink_resource_usage_is_unflushed(res) && !res->obj->unordered_read && !res->obj->unordered_write) + return false; + } + return unordered_res_exec(ctx, res, is_write); + } + return true; +} + +VkCommandBuffer +zink_get_cmdbuf(struct zink_context *ctx, struct zink_resource *src, struct zink_resource *dst) +{ + bool unordered_exec = !ctx->no_reorder; + + unordered_exec &= check_unordered_exec(ctx, src, false) && + check_unordered_exec(ctx, dst, true); + + if (src) + src->obj->unordered_read = unordered_exec; + if (dst) + dst->obj->unordered_write = unordered_exec; + + if (!unordered_exec || ctx->unordered_blitting) + zink_batch_no_rp(ctx); + + if (unordered_exec) { + ctx->batch.state->has_barriers = true; + ctx->batch.has_work = true; + return ctx->batch.state->reordered_cmdbuf; + } + return ctx->batch.state->cmdbuf; +} + +static void +resource_check_defer_image_barrier(struct zink_context *ctx, struct zink_resource *res, VkImageLayout layout, VkPipelineStageFlags pipeline) +{ + assert(!res->obj->is_buffer); + assert(!ctx->blitting); + + bool is_compute = pipeline == VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + /* if this is a non-shader barrier and there are binds, always queue a shader barrier */ + bool is_shader = is_shader_pipline_stage(pipeline); + if ((is_shader || !res->bind_count[is_compute]) && + /* if no layout change is needed between gfx and compute, do nothing */ + !res->bind_count[!is_compute] && (!is_compute || !res->fb_bind_count)) + return; + + if (res->bind_count[!is_compute] && is_shader) { + /* if the layout is the same between gfx and compute, do nothing */ + if (layout == zink_descriptor_util_image_layout_eval(ctx, res, !is_compute)) + return; + } + /* queue a layout change if a layout change will be needed */ + if (res->bind_count[!is_compute]) + _mesa_set_add(ctx->need_barriers[!is_compute], res); + /* also queue a layout change if this is a non-shader layout */ + if (res->bind_count[is_compute] && !is_shader) + _mesa_set_add(ctx->need_barriers[is_compute], res); +} + +enum barrier_type { + barrier_default, + barrier_KHR_synchronzation2 +}; + +template <barrier_type BARRIER_API> +struct emit_memory_barrier { + static void for_image(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, + VkAccessFlags flags, VkPipelineStageFlags pipeline, bool completed, VkCommandBuffer cmdbuf, + bool *queue_import) + { + VkImageMemoryBarrier imb; + zink_resource_image_barrier_init(&imb, res, new_layout, flags, pipeline); + if (!res->obj->access_stage || completed) + imb.srcAccessMask = 0; + if (res->obj->needs_zs_evaluate) + imb.pNext = &res->obj->zs_evaluate; + res->obj->needs_zs_evaluate = false; + if (res->queue != zink_screen(ctx->base.screen)->gfx_queue && res->queue != VK_QUEUE_FAMILY_IGNORED) { + imb.srcQueueFamilyIndex = res->queue; + imb.dstQueueFamilyIndex = zink_screen(ctx->base.screen)->gfx_queue; + res->queue = VK_QUEUE_FAMILY_IGNORED; + *queue_import = true; + } + VKCTX(CmdPipelineBarrier)( + cmdbuf, + res->obj->access_stage ? res->obj->access_stage : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + pipeline, + 0, + 0, NULL, + 0, NULL, + 1, &imb + ); + } + + static void for_buffer(struct zink_context *ctx, struct zink_resource *res, + VkPipelineStageFlags pipeline, + VkAccessFlags flags, + bool unordered, + bool usage_matches, + VkPipelineStageFlags stages, + VkCommandBuffer cmdbuf) + { + VkMemoryBarrier bmb; + bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + bmb.pNext = NULL; + if (unordered) { + stages = usage_matches ? res->obj->unordered_access_stage : stages; + bmb.srcAccessMask = usage_matches ? res->obj->unordered_access : res->obj->access; + } else { + bmb.srcAccessMask = res->obj->access; + } + bmb.dstAccessMask = flags; + VKCTX(CmdPipelineBarrier)( + cmdbuf, + stages, + pipeline, + 0, + 1, &bmb, + 0, NULL, + 0, NULL); + } +}; + + +template <> +struct emit_memory_barrier<barrier_KHR_synchronzation2> { + static void for_image(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, + VkAccessFlags flags, VkPipelineStageFlags pipeline, bool completed, VkCommandBuffer cmdbuf, + bool *queue_import) + { + VkImageMemoryBarrier2 imb; + zink_resource_image_barrier2_init(&imb, res, new_layout, flags, pipeline); + if (!res->obj->access_stage || completed) + imb.srcAccessMask = 0; + if (res->obj->needs_zs_evaluate) + imb.pNext = &res->obj->zs_evaluate; + res->obj->needs_zs_evaluate = false; + if (res->queue != zink_screen(ctx->base.screen)->gfx_queue && res->queue != VK_QUEUE_FAMILY_IGNORED) { + imb.srcQueueFamilyIndex = res->queue; + imb.dstQueueFamilyIndex = zink_screen(ctx->base.screen)->gfx_queue; + res->queue = VK_QUEUE_FAMILY_IGNORED; + *queue_import = true; + } + VkDependencyInfo dep = { + VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + NULL, + 0, + 0, + NULL, + 0, + NULL, + 1, + &imb + }; + VKCTX(CmdPipelineBarrier2)(cmdbuf, &dep); + } + + static void for_buffer(struct zink_context *ctx, struct zink_resource *res, + VkPipelineStageFlags pipeline, + VkAccessFlags flags, + bool unordered, + bool usage_matches, + VkPipelineStageFlags stages, + VkCommandBuffer cmdbuf) + { + VkMemoryBarrier2 bmb; + bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2; + bmb.pNext = NULL; + if (unordered) { + bmb.srcStageMask = usage_matches ? res->obj->unordered_access_stage : stages; + bmb.srcAccessMask = usage_matches ? res->obj->unordered_access : res->obj->access; + } else { + bmb.srcStageMask = stages; + bmb.srcAccessMask = res->obj->access; + } + bmb.dstStageMask = pipeline; + bmb.dstAccessMask = flags; + VkDependencyInfo dep = { + VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + NULL, + 0, + 1, + &bmb, + 0, + NULL, + 0, + NULL + }; + VKCTX(CmdPipelineBarrier2)(cmdbuf, &dep); + } +}; + +template <bool UNSYNCHRONIZED> +struct update_unordered_access_and_get_cmdbuf { + /* use base template to make the cases for true and false more explicite below */ +}; + +template <> +struct update_unordered_access_and_get_cmdbuf<true> { + static VkCommandBuffer apply(struct zink_context *ctx, struct zink_resource *res, bool usage_matches, bool is_write) + { + assert(!usage_matches); + res->obj->unordered_write = true; + res->obj->unordered_read = true; + ctx->batch.state->has_unsync = true; + return ctx->batch.state->unsynchronized_cmdbuf; + } +}; + +template <> +struct update_unordered_access_and_get_cmdbuf<false> { + static VkCommandBuffer apply(struct zink_context *ctx, struct zink_resource *res, bool usage_matches, bool is_write) + { + VkCommandBuffer cmdbuf; + if (!usage_matches) { + res->obj->unordered_write = true; + if (is_write || zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, ZINK_RESOURCE_ACCESS_RW)) + res->obj->unordered_read = true; + } + if (zink_resource_usage_matches(res, ctx->batch.state) && !ctx->unordered_blitting && + /* if current batch usage exists with ordered non-transfer access, never promote + * this avoids layout dsync + */ + (!res->obj->unordered_read || !res->obj->unordered_write)) { + cmdbuf = ctx->batch.state->cmdbuf; + res->obj->unordered_write = false; + res->obj->unordered_read = false; + /* it's impossible to detect this from the caller + * there should be no valid case where this barrier can occur inside a renderpass + */ + zink_batch_no_rp(ctx); + } else { + cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL); + /* force subsequent barriers to be ordered to avoid layout desync */ + if (cmdbuf != ctx->batch.state->reordered_cmdbuf) { + res->obj->unordered_write = false; + res->obj->unordered_read = false; + } + } + return cmdbuf; + } +}; + +template <barrier_type BARRIER_API, bool UNSYNCHRONIZED> +void +zink_resource_image_barrier(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline) +{ + if (!pipeline) + pipeline = pipeline_dst_stage(new_layout); + if (!flags) + flags = access_dst_flags(new_layout); + + bool is_write = zink_resource_access_is_write(flags); + if (is_write && zink_is_swapchain(res)) + zink_kopper_set_readback_needs_update(res); + if (!res->obj->needs_zs_evaluate && !zink_resource_image_needs_barrier(res, new_layout, flags, pipeline) && + (res->queue == zink_screen(ctx->base.screen)->gfx_queue || res->queue == VK_QUEUE_FAMILY_IGNORED)) + return; + enum zink_resource_access rw = is_write ? ZINK_RESOURCE_ACCESS_RW : ZINK_RESOURCE_ACCESS_WRITE; + bool completed = zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw); + bool usage_matches = !completed && zink_resource_usage_matches(res, ctx->batch.state); + VkCommandBuffer cmdbuf = update_unordered_access_and_get_cmdbuf<UNSYNCHRONIZED>::apply(ctx, res, usage_matches, is_write); + + assert(new_layout); + bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "image_barrier(%s->%s)", vk_ImageLayout_to_str(res->layout), vk_ImageLayout_to_str(new_layout)); + bool queue_import = false; + emit_memory_barrier<BARRIER_API>::for_image(ctx, res, new_layout, flags, pipeline, completed, cmdbuf, &queue_import); + zink_cmd_debug_marker_end(ctx, cmdbuf, marker); + + if (!UNSYNCHRONIZED) + resource_check_defer_image_barrier(ctx, res, new_layout, pipeline); + + if (is_write) + res->obj->last_write = flags; + + res->obj->access = flags; + res->obj->access_stage = pipeline; + res->layout = new_layout; + + if (new_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + zink_resource_copies_reset(res); + + if (res->obj->exportable) + simple_mtx_lock(&ctx->batch.state->exportable_lock); + if (res->obj->dt) { + struct kopper_displaytarget *cdt = res->obj->dt; + if (cdt->swapchain->num_acquires && res->obj->dt_idx != UINT32_MAX) { + cdt->swapchain->images[res->obj->dt_idx].layout = res->layout; + } + } else if (res->obj->exportable) { + struct pipe_resource *pres = NULL; + bool found = false; + _mesa_set_search_or_add(&ctx->batch.state->dmabuf_exports, res, &found); + if (!found) { + pipe_resource_reference(&pres, &res->base.b); + } + } + if (res->obj->exportable && queue_import) { + for (struct zink_resource *r = res; r; r = zink_resource(r->base.b.next)) { + VkSemaphore sem = zink_screen_export_dmabuf_semaphore(zink_screen(ctx->base.screen), r); + if (sem) + util_dynarray_append(&ctx->batch.state->fd_wait_semaphores, VkSemaphore, sem); + } + } + if (res->obj->exportable) + simple_mtx_unlock(&ctx->batch.state->exportable_lock); +} + +bool +zink_check_unordered_transfer_access(struct zink_resource *res, unsigned level, const struct pipe_box *box) +{ + /* always barrier against previous non-transfer writes */ + bool non_transfer_write = res->obj->last_write && res->obj->last_write != VK_ACCESS_TRANSFER_WRITE_BIT; + /* must barrier if clobbering a previous write */ + bool transfer_clobber = res->obj->last_write == VK_ACCESS_TRANSFER_WRITE_BIT && zink_resource_copy_box_intersects(res, level, box); + return non_transfer_write || transfer_clobber; +} + +bool +zink_check_valid_buffer_src_access(struct zink_context *ctx, struct zink_resource *res, unsigned offset, unsigned size) +{ + return res->obj->access && util_ranges_intersect(&res->valid_buffer_range, offset, offset + size) && !unordered_res_exec(ctx, res, false); +} + +void +zink_resource_image_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box, bool unsync) +{ + if (res->obj->copies_need_reset) + zink_resource_copies_reset(res); + /* skip TRANSFER_DST barrier if no intersection from previous copies */ + if (res->layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || + zink_screen(ctx->base.screen)->driver_workarounds.broken_cache_semantics || + zink_check_unordered_transfer_access(res, level, box)) { + if (unsync) + zink_screen(ctx->base.screen)->image_barrier_unsync(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + else + zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + } else { + res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT; + res->obj->last_write = VK_ACCESS_TRANSFER_WRITE_BIT; + res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; + } + zink_resource_copy_box_add(ctx, res, level, box); +} + +bool +zink_resource_buffer_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned offset, unsigned size) +{ + if (res->obj->copies_need_reset) + zink_resource_copies_reset(res); + bool unordered = true; + struct pipe_box box; + u_box_3d((int)offset, 0, 0, (int)size, 0, 0, &box); + bool can_unordered_write = unordered_res_exec(ctx, res, true); + /* must barrier if something read the valid buffer range */ + bool valid_read = (res->obj->access || res->obj->unordered_access) && + util_ranges_intersect(&res->valid_buffer_range, offset, offset + size) && !can_unordered_write; + if (valid_read || zink_screen(ctx->base.screen)->driver_workarounds.broken_cache_semantics || zink_check_unordered_transfer_access(res, 0, &box)) { + zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + unordered = res->obj->unordered_write; + } else { + res->obj->unordered_access = VK_ACCESS_TRANSFER_WRITE_BIT; + res->obj->last_write = VK_ACCESS_TRANSFER_WRITE_BIT; + res->obj->unordered_access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + ctx->batch.state->unordered_write_access |= VK_ACCESS_TRANSFER_WRITE_BIT; + ctx->batch.state->unordered_write_stages |= VK_PIPELINE_STAGE_TRANSFER_BIT; + if (!zink_resource_usage_matches(res, ctx->batch.state)) { + res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT; + res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; + res->obj->ordered_access_is_copied = true; + } + } + zink_resource_copy_box_add(ctx, res, 0, &box); + /* this return value implies that the caller could do an unordered op on this resource */ + return unordered; +} + +VkPipelineStageFlags +zink_pipeline_flags_from_stage(VkShaderStageFlagBits stage) +{ + switch (stage) { + case VK_SHADER_STAGE_VERTEX_BIT: + return VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + case VK_SHADER_STAGE_FRAGMENT_BIT: + return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + case VK_SHADER_STAGE_GEOMETRY_BIT: + return VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: + return VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT; + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: + return VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; + case VK_SHADER_STAGE_COMPUTE_BIT: + return VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + default: + unreachable("unknown shader stage bit"); + } +} + +ALWAYS_INLINE static VkPipelineStageFlags +pipeline_access_stage(VkAccessFlags flags) +{ + if (flags & (VK_ACCESS_UNIFORM_READ_BIT | + VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_SHADER_WRITE_BIT)) + return VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + return VK_PIPELINE_STAGE_TRANSFER_BIT; +} + +ALWAYS_INLINE static bool +buffer_needs_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline, bool unordered) +{ + return zink_resource_access_is_write(unordered ? res->obj->unordered_access : res->obj->access) || + zink_resource_access_is_write(flags) || + ((unordered ? res->obj->unordered_access_stage : res->obj->access_stage) & pipeline) != pipeline || + ((unordered ? res->obj->unordered_access : res->obj->access) & flags) != flags; +} + + + +template <barrier_type BARRIER_API> +void +zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline) +{ + if (!pipeline) + pipeline = pipeline_access_stage(flags); + + bool is_write = zink_resource_access_is_write(flags); + enum zink_resource_access rw = is_write ? ZINK_RESOURCE_ACCESS_RW : ZINK_RESOURCE_ACCESS_WRITE; + bool completed = zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw); + bool usage_matches = !completed && zink_resource_usage_matches(res, ctx->batch.state); + if (!usage_matches) { + res->obj->unordered_write = true; + if (is_write || zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, ZINK_RESOURCE_ACCESS_RW)) + res->obj->unordered_read = true; + } + bool unordered_usage_matches = res->obj->unordered_access && usage_matches; + bool unordered = unordered_res_exec(ctx, res, is_write); + if (!buffer_needs_barrier(res, flags, pipeline, unordered)) + return; + if (completed) { + /* reset access on complete */ + res->obj->access = VK_ACCESS_NONE; + res->obj->access_stage = VK_PIPELINE_STAGE_NONE; + res->obj->last_write = VK_ACCESS_NONE; + } else if (unordered && unordered_usage_matches && res->obj->ordered_access_is_copied) { + /* always reset propagated access to avoid weirdness */ + res->obj->access = VK_ACCESS_NONE; + res->obj->access_stage = VK_PIPELINE_STAGE_NONE; + } else if (!unordered && !unordered_usage_matches) { + /* reset unordered access on first ordered barrier */ + res->obj->unordered_access = VK_ACCESS_NONE; + res->obj->unordered_access_stage = VK_PIPELINE_STAGE_NONE; + } + if (!usage_matches) { + /* reset unordered on first new cmdbuf barrier */ + res->obj->unordered_access = VK_ACCESS_NONE; + res->obj->unordered_access_stage = VK_PIPELINE_STAGE_NONE; + res->obj->ordered_access_is_copied = false; + } + /* unordered barriers can be skipped when: + * - there is no current-batch unordered access AND previous batch usage is not write access + * - there is current-batch unordered access AND the unordered access is not write access + */ + bool can_skip_unordered = !unordered ? false : !zink_resource_access_is_write(!unordered_usage_matches ? res->obj->access : res->obj->unordered_access); + /* ordered barriers can be skipped if both: + * - there is no current access + * - there is no current-batch unordered access + */ + bool can_skip_ordered = unordered ? false : (!res->obj->access && !unordered_usage_matches); + if (ctx->no_reorder) + can_skip_unordered = can_skip_ordered = false; + + if (!can_skip_unordered && !can_skip_ordered) { + VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL); + bool marker = false; + if (unlikely(zink_tracing)) { + char buf[4096]; + zink_string_vkflags_unroll(buf, sizeof(buf), flags, (zink_vkflags_func)vk_AccessFlagBits_to_str); + marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "buffer_barrier(%s)", buf); + } + + VkPipelineStageFlags stages = res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access);; + emit_memory_barrier<BARRIER_API>::for_buffer(ctx, res, pipeline, flags, unordered,usage_matches, stages, cmdbuf); + + zink_cmd_debug_marker_end(ctx, cmdbuf, marker); + } + + resource_check_defer_buffer_barrier(ctx, res, pipeline); + + if (is_write) + res->obj->last_write = flags; + if (unordered) { + /* these should get automatically emitted during submission */ + res->obj->unordered_access = flags; + res->obj->unordered_access_stage = pipeline; + if (is_write) { + ctx->batch.state->unordered_write_access |= flags; + ctx->batch.state->unordered_write_stages |= pipeline; + } + } + if (!unordered || !usage_matches || res->obj->ordered_access_is_copied) { + res->obj->access = flags; + res->obj->access_stage = pipeline; + res->obj->ordered_access_is_copied = unordered; + } + if (pipeline != VK_PIPELINE_STAGE_TRANSFER_BIT && is_write) + zink_resource_copies_reset(res); +} + +void +zink_synchronization_init(struct zink_screen *screen) +{ + if (screen->info.have_vulkan13 || screen->info.have_KHR_synchronization2) { + screen->buffer_barrier = zink_resource_buffer_barrier<barrier_KHR_synchronzation2>; + screen->image_barrier = zink_resource_image_barrier<barrier_KHR_synchronzation2, false>; + screen->image_barrier_unsync = zink_resource_image_barrier<barrier_KHR_synchronzation2, true>; + } else { + screen->buffer_barrier = zink_resource_buffer_barrier<barrier_default>; + screen->image_barrier = zink_resource_image_barrier<barrier_default, false>; + screen->image_barrier_unsync = zink_resource_image_barrier<barrier_default, true>; + } +} diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h new file mode 100644 index 00000000000..403f600c793 --- /dev/null +++ b/src/gallium/drivers/zink/zink_types.h @@ -0,0 +1,2068 @@ +/* + * Copyright © 2022 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Mike Blumenkrantz <michael.blumenkrantz@gmail.com> + */ + +#ifndef ZINK_TYPES_H +#define ZINK_TYPES_H + +#include <vulkan/vulkan_core.h> + +#include "compiler/nir/nir.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipebuffer/pb_cache.h" +#include "pipebuffer/pb_slab.h" + +#include "util/disk_cache.h" +#include "util/hash_table.h" +#include "util/list.h" +#include "util/log.h" +#include "util/rwlock.h" +#include "util/set.h" +#include "util/simple_mtx.h" +#include "util/slab.h" +#include "util/u_dynarray.h" +#include "util/u_idalloc.h" +#include "util/u_live_shader_cache.h" +#include "util/u_queue.h" +#include "util/u_range.h" +#include "util/u_threaded_context.h" +#include "util/u_transfer.h" +#include "util/u_vertex_state_cache.h" + +#include "vk_util.h" + +#include "zink_device_info.h" +#include "zink_instance.h" +#include "zink_shader_keys.h" +#include "vk_dispatch_table.h" + +#ifdef HAVE_RENDERDOC_APP_H +#include "renderdoc_app.h" +#endif + +/* the descriptor binding id for fbfetch/input attachment */ +#define ZINK_FBFETCH_BINDING 5 +#define ZINK_GFX_SHADER_COUNT 5 + +/* number of descriptors to allocate in a pool */ +#define MAX_LAZY_DESCRIPTORS 500 +/* explicit clamping because descriptor caching used to exist */ +#define ZINK_MAX_SHADER_IMAGES 32 +/* total number of bindless ids that can be allocated */ +#define ZINK_MAX_BINDLESS_HANDLES 1024 + +/* enum zink_descriptor_type */ +#define ZINK_MAX_DESCRIPTOR_SETS 6 +#define ZINK_MAX_DESCRIPTORS_PER_TYPE (32 * ZINK_GFX_SHADER_COUNT) +/* Descriptor size reported by lavapipe. */ +#define ZINK_FBFETCH_DESCRIPTOR_SIZE 280 + +/* suballocator defines */ +#define NUM_SLAB_ALLOCATORS 3 +#define MIN_SLAB_ORDER 8 + + +/* this is the spec minimum */ +#define ZINK_SPARSE_BUFFER_PAGE_SIZE (64 * 1024) + +/* flag to create screen->copy_context */ +#define ZINK_CONTEXT_COPY_ONLY (1<<30) + +/* convenience macros for accessing dispatch table functions */ +#define VKCTX(fn) zink_screen(ctx->base.screen)->vk.fn +#define VKSCR(fn) screen->vk.fn + +#ifdef __cplusplus +extern "C" { +#endif + +extern uint32_t zink_debug; +extern bool zink_tracing; + +#ifdef __cplusplus +} +#endif + + +/** enums */ + +/* features for draw/program templates */ +typedef enum { + ZINK_NO_MULTIDRAW, + ZINK_MULTIDRAW, +} zink_multidraw; + +typedef enum { + ZINK_NO_DYNAMIC_STATE, + ZINK_DYNAMIC_STATE, + ZINK_DYNAMIC_STATE2, + ZINK_DYNAMIC_VERTEX_INPUT2, + ZINK_DYNAMIC_STATE3, + ZINK_DYNAMIC_VERTEX_INPUT, +} zink_dynamic_state; + +typedef enum { + ZINK_PIPELINE_NO_DYNAMIC_STATE, + ZINK_PIPELINE_DYNAMIC_STATE, + ZINK_PIPELINE_DYNAMIC_STATE2, + ZINK_PIPELINE_DYNAMIC_STATE2_PCP, + ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT2, + ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT2_PCP, + ZINK_PIPELINE_DYNAMIC_STATE3, + ZINK_PIPELINE_DYNAMIC_STATE3_PCP, + ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT, + ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT_PCP, +} zink_pipeline_dynamic_state; + +enum zink_blit_flags { + ZINK_BLIT_NORMAL = 1 << 0, + ZINK_BLIT_SAVE_FS = 1 << 1, + ZINK_BLIT_SAVE_FB = 1 << 2, + ZINK_BLIT_SAVE_TEXTURES = 1 << 3, + ZINK_BLIT_NO_COND_RENDER = 1 << 4, + ZINK_BLIT_SAVE_FS_CONST_BUF = 1 << 5, +}; + +/* descriptor types; also the ordering of the sets + * ...except that ZINK_DESCRIPTOR_BASE_TYPES is actually ZINK_DESCRIPTOR_TYPE_UNIFORMS, + * and all base type values are thus +1 to get the set id (using screen->desc_set_id[idx]) + */ +enum zink_descriptor_type { + ZINK_DESCRIPTOR_TYPE_UBO, + ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, + ZINK_DESCRIPTOR_TYPE_SSBO, + ZINK_DESCRIPTOR_TYPE_IMAGE, + ZINK_DESCRIPTOR_BASE_TYPES, /**< the count/iterator for basic descriptor types */ + ZINK_DESCRIPTOR_BINDLESS, + ZINK_DESCRIPTOR_ALL_TYPES, + ZINK_DESCRIPTOR_TYPE_UNIFORMS = ZINK_DESCRIPTOR_BASE_TYPES, /**< this is aliased for convenience */ + ZINK_DESCRIPTOR_NON_BINDLESS_TYPES = ZINK_DESCRIPTOR_BASE_TYPES + 1, /**< for struct sizing */ +}; + +enum zink_descriptor_mode { + ZINK_DESCRIPTOR_MODE_AUTO, + ZINK_DESCRIPTOR_MODE_LAZY, + ZINK_DESCRIPTOR_MODE_DB, +}; + +/* the current mode */ +extern enum zink_descriptor_mode zink_descriptor_mode; + +/* indexing for descriptor template management */ +enum zink_descriptor_size_index { + ZDS_INDEX_UBO, + ZDS_INDEX_COMBINED_SAMPLER, + ZDS_INDEX_UNIFORM_TEXELS, + ZDS_INDEX_SAMPLER, + ZDS_INDEX_STORAGE_BUFFER, + ZDS_INDEX_STORAGE_IMAGE, + ZDS_INDEX_STORAGE_TEXELS, + ZDS_INDEX_MAX, +}; + +/* indexing for descriptor template management in COMPACT mode */ +enum zink_descriptor_size_index_compact { + ZDS_INDEX_COMP_UBO, + ZDS_INDEX_COMP_STORAGE_BUFFER, + ZDS_INDEX_COMP_COMBINED_SAMPLER, + ZDS_INDEX_COMP_UNIFORM_TEXELS, + ZDS_INDEX_COMP_SAMPLER, + ZDS_INDEX_COMP_STORAGE_IMAGE, + ZDS_INDEX_COMP_STORAGE_TEXELS, +}; + +enum zink_resource_access { + ZINK_RESOURCE_ACCESS_READ = 1, + ZINK_RESOURCE_ACCESS_WRITE = 32, + ZINK_RESOURCE_ACCESS_RW = ZINK_RESOURCE_ACCESS_READ | ZINK_RESOURCE_ACCESS_WRITE, +}; + + +/* zink heaps are based off of vulkan memory types, but are not a 1-to-1 mapping to vulkan memory type indices and have no direct relation to vulkan memory heaps*/ +enum zink_heap { + ZINK_HEAP_DEVICE_LOCAL, + ZINK_HEAP_DEVICE_LOCAL_SPARSE, + ZINK_HEAP_DEVICE_LOCAL_LAZY, + ZINK_HEAP_DEVICE_LOCAL_VISIBLE, + ZINK_HEAP_HOST_VISIBLE_COHERENT, + ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED, + ZINK_HEAP_MAX, +}; + +enum zink_alloc_flag { + ZINK_ALLOC_SPARSE = 1<<0, + ZINK_ALLOC_NO_SUBALLOC = 1<<1, +}; + +enum zink_debug { + ZINK_DEBUG_NIR = (1<<0), + ZINK_DEBUG_SPIRV = (1<<1), + ZINK_DEBUG_TGSI = (1<<2), + ZINK_DEBUG_VALIDATION = (1<<3), + ZINK_DEBUG_SYNC = (1<<4), + ZINK_DEBUG_COMPACT = (1<<5), + ZINK_DEBUG_NOREORDER = (1<<6), + ZINK_DEBUG_GPL = (1<<7), + ZINK_DEBUG_SHADERDB = (1<<8), + ZINK_DEBUG_RP = (1<<9), + ZINK_DEBUG_NORP = (1<<10), + ZINK_DEBUG_MAP = (1<<11), + ZINK_DEBUG_FLUSHSYNC = (1<<12), + ZINK_DEBUG_NOSHOBJ = (1<<13), + ZINK_DEBUG_OPTIMAL_KEYS = (1<<14), + ZINK_DEBUG_NOOPT = (1<<15), + ZINK_DEBUG_NOBGC = (1<<16), + ZINK_DEBUG_DGC = (1<<17), + ZINK_DEBUG_MEM = (1<<18), + ZINK_DEBUG_QUIET = (1<<19), + ZINK_DEBUG_IOOPT = (1<<20), + ZINK_DEBUG_NOPC = (1<<21), +}; + +enum zink_pv_emulation_primitive { + ZINK_PVE_PRIMITIVE_NONE = 0, + ZINK_PVE_PRIMITIVE_SIMPLE = 1, + /* when triangle or quad strips are used and the gs outputs triangles */ + ZINK_PVE_PRIMITIVE_TRISTRIP = 2, + ZINK_PVE_PRIMITIVE_FAN = 3, +}; + +enum zink_dgc_buffer { + ZINK_DGC_VBO, + ZINK_DGC_IB, + ZINK_DGC_PSO, + ZINK_DGC_PUSH, + ZINK_DGC_DRAW, + ZINK_DGC_MAX, +}; + +/** fence types */ +struct tc_unflushed_batch_token; + +/* an async fence created for tc */ +struct zink_tc_fence { + struct pipe_reference reference; + /* enables distinction between tc fence submission and vk queue submission */ + uint32_t submit_count; + /* when the tc fence is signaled for use */ + struct util_queue_fence ready; + struct tc_unflushed_batch_token *tc_token; + /* for deferred flushes */ + struct pipe_context *deferred_ctx; + /* multiple tc fences may point to a real fence */ + struct zink_fence *fence; + /* for use with semaphore/imported fences */ + VkSemaphore sem; +}; + +/* a fence is actually a zink_batch_state, but these are split out for logical consistency */ +struct zink_fence { + uint64_t batch_id; + bool submitted; + bool completed; + struct util_dynarray mfences; +}; + + +/** state types */ + +struct zink_vertex_elements_hw_state { + uint32_t hash; + uint32_t num_bindings, num_attribs; + /* VK_EXT_vertex_input_dynamic_state uses different types */ + union { + VkVertexInputAttributeDescription attribs[PIPE_MAX_ATTRIBS]; + VkVertexInputAttributeDescription2EXT dynattribs[PIPE_MAX_ATTRIBS]; + }; + union { + struct { + VkVertexInputBindingDivisorDescriptionEXT divisors[PIPE_MAX_ATTRIBS]; + VkVertexInputBindingDescription bindings[PIPE_MAX_ATTRIBS]; // combination of element_state and stride + VkDeviceSize strides[PIPE_MAX_ATTRIBS]; + uint8_t divisors_present; + } b; + VkVertexInputBindingDescription2EXT dynbindings[PIPE_MAX_ATTRIBS]; + }; + uint8_t binding_map[PIPE_MAX_ATTRIBS]; +}; + +struct zink_vertex_elements_state { + /* decomposed attributes read only a single component for format compatibility */ + bool has_decomposed_attrs; + struct { + uint32_t binding; + VkVertexInputRate inputRate; + } bindings[PIPE_MAX_ATTRIBS]; + uint32_t divisor[PIPE_MAX_ATTRIBS]; + uint32_t min_stride[PIPE_MAX_ATTRIBS]; //for dynamic_state1 + uint32_t decomposed_attrs; + unsigned decomposed_attrs_size; + uint32_t decomposed_attrs_without_w; + unsigned decomposed_attrs_without_w_size; + struct zink_vertex_elements_hw_state hw_state; +}; + +/* for vertex state draws */ +struct zink_vertex_state { + struct pipe_vertex_state b; + struct zink_vertex_elements_state velems; +}; + +struct zink_rasterizer_hw_state { + unsigned polygon_mode : 2; //VkPolygonMode + unsigned line_mode : 2; //VkLineRasterizationModeEXT + unsigned depth_clip:1; + unsigned depth_clamp:1; + unsigned pv_last:1; + unsigned line_stipple_enable:1; + unsigned clip_halfz:1; +}; + +struct zink_rasterizer_state { + struct pipe_rasterizer_state base; + bool offset_fill; + float offset_units, offset_clamp, offset_scale; + float line_width; + VkFrontFace front_face; + VkCullModeFlags cull_mode; + VkLineRasterizationModeEXT dynamic_line_mode; + struct zink_rasterizer_hw_state hw_state; +}; + +struct zink_blend_state { + uint32_t hash; + unsigned num_rts; + VkPipelineColorBlendAttachmentState attachments[PIPE_MAX_COLOR_BUFS]; + + struct { + VkBool32 enables[PIPE_MAX_COLOR_BUFS]; + VkColorBlendEquationEXT eq[PIPE_MAX_COLOR_BUFS]; + VkColorComponentFlags wrmask[PIPE_MAX_COLOR_BUFS]; + } ds3; + + VkBool32 logicop_enable; + VkLogicOp logicop_func; + + VkBool32 alpha_to_coverage; + VkBool32 alpha_to_one; + + uint32_t wrmask; + uint8_t enables; + + bool dual_src_blend; +}; + +struct zink_depth_stencil_alpha_hw_state { + VkBool32 depth_test; + VkCompareOp depth_compare_op; + + VkBool32 depth_bounds_test; + float min_depth_bounds, max_depth_bounds; + + VkBool32 stencil_test; + VkStencilOpState stencil_front; + VkStencilOpState stencil_back; + + VkBool32 depth_write; +}; + +struct zink_depth_stencil_alpha_state { + struct pipe_depth_stencil_alpha_state base; + struct zink_depth_stencil_alpha_hw_state hw_state; +}; + + +/** descriptor types */ + +/* zink_descriptor_layout objects are cached: this is the key for one */ +struct zink_descriptor_layout_key { + unsigned num_bindings; + VkDescriptorSetLayoutBinding *bindings; +}; + +struct zink_descriptor_layout { + VkDescriptorSetLayout layout; +}; + +/* descriptor pools are cached: zink_descriptor_pool_key::id is the id for a type of pool */ +struct zink_descriptor_pool_key { + unsigned use_count; + unsigned num_type_sizes; + unsigned id; + VkDescriptorPoolSize sizes[4]; + struct zink_descriptor_layout_key *layout; +}; + +/* a template used for updating descriptor buffers */ +struct zink_descriptor_template { + uint16_t stride; //the stride between mem pointers + uint16_t db_size; //the size of the entry in the buffer + unsigned count; //the number of descriptors + size_t offset; //the offset of the base host pointer to update from +}; + +/* ctx->dd; created at context creation */ +struct zink_descriptor_data { + bool bindless_bound; + bool bindless_init; + bool has_fbfetch; + bool push_state_changed[2]; //gfx, compute + uint8_t state_changed[2]; //gfx, compute + struct zink_descriptor_layout_key *push_layout_keys[2]; //gfx, compute + struct zink_descriptor_layout *push_dsl[2]; //gfx, compute + VkDescriptorUpdateTemplate push_template[2]; //gfx, compute + + struct zink_descriptor_layout *dummy_dsl; + + union { + struct { + VkDescriptorPool bindless_pool; + VkDescriptorSet bindless_set; + } t; + struct { + struct zink_resource *bindless_db; + uint8_t *bindless_db_map; + struct pipe_transfer *bindless_db_xfer; + uint32_t bindless_db_offsets[4]; + unsigned max_db_size; + unsigned size_enlarge_scale; + } db; + }; + + struct zink_program *pg[2]; //gfx, compute + + VkDescriptorUpdateTemplateEntry push_entries[MESA_SHADER_STAGES]; //gfx+fbfetch + VkDescriptorUpdateTemplateEntry compute_push_entry; + + /* push descriptor layout size and binding offsets */ + uint32_t db_size[2]; //gfx, compute + uint32_t db_offset[ZINK_GFX_SHADER_COUNT + 1]; //gfx + fbfetch + /* compute offset is always 0 */ +}; + +/* pg->dd; created at program creation */ +struct zink_program_descriptor_data { + bool bindless; + bool fbfetch; + /* bitmask of ubo0 usage for stages */ + uint8_t push_usage; + /* bitmask of which sets are used by the program */ + uint8_t binding_usage; + /* all the pool keys for the program */ + struct zink_descriptor_pool_key *pool_key[ZINK_DESCRIPTOR_BASE_TYPES]; //push set doesn't need one + /* all the layouts for the program */ + struct zink_descriptor_layout *layouts[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; + /* all the templates for the program */ + union { + VkDescriptorUpdateTemplate templates[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; + struct zink_descriptor_template *db_template[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; + }; + uint32_t db_size[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; //the total size of the layout + uint32_t *db_offset[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; //the offset of each binding in the layout +}; + +struct zink_descriptor_pool { + /* the current index of 'sets' */ + unsigned set_idx; + /* number of sets allocated */ + unsigned sets_alloc; + VkDescriptorPool pool; + /* sets are lazily allocated */ + VkDescriptorSet sets[MAX_LAZY_DESCRIPTORS]; +}; + +/* a zink_descriptor_pool_key matches up to this struct */ +struct zink_descriptor_pool_multi { + /* for flagging when overflowed pools must be destroyed instead of reused */ + bool reinit_overflow; + /* this flips to split usable overflow from in-use overflow */ + unsigned overflow_idx; + /* zink_descriptor_pool objects that have exceeded MAX_LAZY_DESCRIPTORS sets */ + struct util_dynarray overflowed_pools[2]; + /* the current pool; may be null */ + struct zink_descriptor_pool *pool; + /* pool key for convenience */ + const struct zink_descriptor_pool_key *pool_key; +}; + +/* bs->dd; created on batch state creation */ +struct zink_batch_descriptor_data { + /* pools have fbfetch initialized */ + bool has_fbfetch; + /* are descriptor buffers bound */ + bool db_bound; + /* real size of 'pools' */ + unsigned pool_size[ZINK_DESCRIPTOR_BASE_TYPES]; + /* this array is sized based on the max zink_descriptor_pool_key::id used by the batch; members may be NULL */ + struct util_dynarray pools[ZINK_DESCRIPTOR_BASE_TYPES]; + struct zink_descriptor_pool_multi push_pool[2]; //gfx, compute + /* the current program (for descriptor updating) */ + struct zink_program *pg[2]; //gfx, compute + /* the current pipeline compatibility id (for pipeline compatibility rules) */ + uint32_t compat_id[2]; //gfx, compute + /* the current set layout */ + VkDescriptorSetLayout dsl[2][ZINK_DESCRIPTOR_BASE_TYPES]; //gfx, compute + union { + /* the current set for a given type; used for rebinding if pipeline compat id changes and current set must be rebound */ + VkDescriptorSet sets[2][ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; //gfx, compute + uint64_t cur_db_offset[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; //gfx, compute; the current offset of a descriptor buffer for rebinds + }; + /* mask of push descriptor usage */ + unsigned push_usage[2]; //gfx, compute + + struct zink_resource *db; //the descriptor buffer for a given type + uint8_t *db_map; //the host map for the buffer + struct pipe_transfer *db_xfer; //the transfer map for the buffer + uint64_t db_offset; //the "next" offset that will be used when the buffer is updated +}; + +/** batch types */ +/* zink_batch_usage concepts: + * - batch "usage" is an indicator of when and how a BO was accessed + * - batch "tracking" is the batch state(s) containing an extra ref for a BO + * + * - usage prevents a BO from being mapped while it has pending+conflicting access + * - usage affects pipeline barrier generation for synchronizing reads and writes + * - usage MUST be removed before context destruction to avoid crashing during BO + * reclaiming in suballocator + * + * - tracking prevents a BO from being destroyed early + * - tracking enables usage to be pruned + * + * + * tracking is added: + * - any time a BO is used in a "one-off" operation (e.g., blit, index buffer, indirect buffer) + * - any time a descriptor is unbound + * - when a buffer is replaced (IFF: resource is bound as a descriptor or usage previously existed) + * + * tracking is removed: + * - in zink_reset_batch_state() + * + * usage is added: + * - any time a BO is used in a "one-off" operation (e.g., blit, index buffer, indirect buffer) + * - any time a descriptor is bound + * - any time a descriptor is unbound (IFF: usage previously existed) + * - for all bound descriptors on the first draw/dispatch after a flush (zink_update_descriptor_refs) + * + * usage is removed: + * - when tracking is removed (IFF: BO usage == tracking, i.e., this is the last batch that a BO was active on) + */ +struct zink_batch_usage { + uint32_t usage; + /* this is a monotonic int used to disambiguate internal fences from their tc fence references */ + uint32_t submit_count; + cnd_t flush; + mtx_t mtx; + bool unflushed; +}; + +struct zink_bo_usage { + uint32_t submit_count; + struct zink_batch_usage *u; +}; + +struct zink_batch_obj_list { + unsigned max_buffers; + unsigned num_buffers; + struct zink_resource_object **objs; +}; + +struct zink_batch_state { + struct zink_fence fence; + struct zink_batch_state *next; + + struct zink_batch_usage usage; + struct zink_context *ctx; + VkCommandPool cmdpool; + VkCommandBuffer cmdbuf; + VkCommandBuffer reordered_cmdbuf; + VkCommandPool unsynchronized_cmdpool; + VkCommandBuffer unsynchronized_cmdbuf; + VkSemaphore signal_semaphore; //external signal semaphore + struct util_dynarray signal_semaphores; //external signal semaphores + struct util_dynarray wait_semaphores; //external wait semaphores + struct util_dynarray wait_semaphore_stages; //external wait semaphores + struct util_dynarray fd_wait_semaphores; //dmabuf wait semaphores + struct util_dynarray fd_wait_semaphore_stages; //dmabuf wait semaphores + struct util_dynarray fences; //zink_tc_fence refs + + VkSemaphore present; + struct zink_resource *swapchain; + struct util_dynarray acquires; + struct util_dynarray acquire_flags; + + struct { + struct util_dynarray pipelines; + struct util_dynarray layouts; + } dgc; + + VkAccessFlags unordered_write_access; + VkPipelineStageFlags unordered_write_stages; + + simple_mtx_t exportable_lock; + + struct util_queue_fence flush_completed; + + struct set programs; + struct set dmabuf_exports; + +#define BUFFER_HASHLIST_SIZE 32768 + /* buffer_indices_hashlist[hash(bo)] returns -1 if the bo + * isn't part of any buffer lists or the index where the bo could be found. + * Since 1) hash collisions of 2 different bo can happen and 2) we use a + * single hashlist for the 3 buffer list, this is only a hint. + * batch_find_resource uses this hint to speed up buffers look up. + */ + int16_t buffer_indices_hashlist[BUFFER_HASHLIST_SIZE]; + struct zink_batch_obj_list real_objs; + struct zink_batch_obj_list slab_objs; + struct zink_batch_obj_list sparse_objs; + struct zink_resource_object *last_added_obj; + struct util_dynarray swapchain_obj; //this doesn't have a zink_bo and must be handled differently + + struct util_dynarray unref_resources; + struct util_dynarray bindless_releases[2]; + + struct util_dynarray zombie_samplers; + + struct set active_queries; /* zink_query objects which were active at some point in this batch */ + struct util_dynarray dead_querypools; + + struct util_dynarray freed_sparse_backing_bos; + + struct zink_batch_descriptor_data dd; + + VkDeviceSize resource_size; + + bool is_device_lost; + bool has_barriers; + bool has_unsync; +}; + +static inline struct zink_batch_state * +zink_batch_state(struct zink_fence *fence) +{ + return (struct zink_batch_state *)fence; +} + +struct zink_batch { + struct zink_batch_state *state; + + struct zink_batch_usage *last_batch_usage; + struct zink_resource *swapchain; + + unsigned work_count; + + simple_mtx_t ref_lock; + + bool has_work; + bool last_was_compute; + bool in_rp; //renderpass is currently active +}; + + +/** bo types */ +struct bo_export { + /** File descriptor associated with a handle export. */ + int drm_fd; + + /** GEM handle in drm_fd */ + uint32_t gem_handle; + + struct list_head link; +}; + +struct zink_bo { + struct pb_buffer base; + + union { + struct { + void *cpu_ptr; /* for user_ptr and permanent maps */ + int map_count; + struct list_head exports; + simple_mtx_t export_lock; + + bool is_user_ptr; + bool use_reusable_pool; + + /* Whether buffer_get_handle or buffer_from_handle has been called, + * it can only transition from false to true. Protected by lock. + */ + bool is_shared; + } real; + struct { + struct pb_slab_entry entry; + struct zink_bo *real; + } slab; + struct { + uint32_t num_va_pages; + uint32_t num_backing_pages; + + struct list_head backing; + + /* Commitment information for each page of the virtual memory area. */ + struct zink_sparse_commitment *commitments; + } sparse; + } u; + + VkDeviceMemory mem; + uint64_t offset; + + uint32_t unique_id; + const char *name; + + simple_mtx_t lock; + + struct zink_bo_usage reads; + struct zink_bo_usage writes; + + struct pb_cache_entry cache_entry[]; +}; + +static inline struct zink_bo * +zink_bo(struct pb_buffer *pbuf) +{ + return (struct zink_bo*)pbuf; +} + +/** clear types */ +struct zink_framebuffer_clear_data { + union { + union pipe_color_union color; + struct { + float depth; + unsigned stencil; + uint8_t bits : 2; // PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL + } zs; + }; + struct pipe_scissor_state scissor; + bool has_scissor; + bool conditional; +}; + +struct zink_framebuffer_clear { + struct util_dynarray clears; +}; + + +/** compiler types */ +struct zink_shader_info { + uint16_t stride[PIPE_MAX_SO_BUFFERS]; + uint32_t sampler_mask; + bool have_sparse; + bool have_vulkan_memory_model; + bool have_workgroup_memory_explicit_layout; + struct { + uint8_t flush_denorms:3; // 16, 32, 64 + uint8_t preserve_denorms:3; // 16, 32, 64 + bool denorms_32_bit_independence:1; + bool denorms_all_independence:1; + } float_controls; + unsigned bindless_set_idx; +}; + +enum zink_rast_prim { + ZINK_PRIM_POINTS, + ZINK_PRIM_LINES, + ZINK_PRIM_TRIANGLES, + ZINK_PRIM_MAX, +}; + +struct zink_shader_object { + union { + VkShaderEXT obj; + VkShaderModule mod; + }; + struct spirv_shader *spirv; +}; + +struct zink_shader { + struct util_live_shader base; + uint32_t hash; + struct blob blob; + struct shader_info info; + + struct zink_shader_info sinfo; + + struct { + int index; + int binding; + VkDescriptorType type; + unsigned char size; + } bindings[ZINK_DESCRIPTOR_BASE_TYPES][ZINK_MAX_DESCRIPTORS_PER_TYPE]; + size_t num_bindings[ZINK_DESCRIPTOR_BASE_TYPES]; + uint32_t ubos_used; // bitfield of which ubo indices are used + uint32_t ssbos_used; // bitfield of which ssbo indices are used + uint64_t arrayed_inputs; //mask of locations using arrayed io + uint64_t arrayed_outputs; //mask of locations using arrayed io + uint64_t flat_flags; + bool bindless; + bool can_inline; + bool has_uniforms; + bool has_edgeflags; + bool needs_inlining; + bool uses_sample; + struct spirv_shader *spirv; + + struct { + struct util_queue_fence fence; + struct zink_shader_object obj; + VkDescriptorSetLayout dsl; + VkPipelineLayout layout; + VkPipeline gpl; + VkDescriptorSetLayoutBinding *bindings; + unsigned num_bindings; + struct zink_descriptor_template *db_template; + unsigned db_size; + unsigned *db_offset; + } precompile; + + simple_mtx_t lock; + struct set *programs; + struct util_dynarray pipeline_libs; + + union { + struct { + struct zink_shader *generated_tcs; // a generated shader that this shader "owns"; only valid in the tes stage + struct zink_shader *generated_gs[MESA_PRIM_COUNT][ZINK_PRIM_MAX]; // generated shaders that this shader "owns" + struct zink_shader *parent; // for a generated gs this points to the shader that "owns" it + + bool is_generated; // if this is a driver-created shader (e.g., tcs) + } non_fs; + + struct { + /* Bitmask of textures that have shadow sampling result components + * other than RED accessed. This is a subset of !is_new_style_shadow + * (GLSL <1.30, ARB_fp) shadow sampling usage. + */ + uint32_t legacy_shadow_mask; + nir_variable *fbfetch; //for fs output + } fs; + }; +}; + + +/** pipeline types */ +struct zink_pipeline_dynamic_state1 { + uint8_t front_face; //VkFrontFace:1 + uint8_t cull_mode; //VkCullModeFlags:2 + uint16_t num_viewports; + struct zink_depth_stencil_alpha_hw_state *depth_stencil_alpha_state; //must be last +}; + +struct zink_pipeline_dynamic_state2 { + bool primitive_restart; + bool rasterizer_discard; + uint16_t vertices_per_patch; //5 bits +}; + +#define zink_pipeline_dynamic_state3 zink_rasterizer_hw_state + +struct zink_gfx_pipeline_state { + /* order matches zink_gfx_output_key */ + unsigned force_persample_interp:1; + uint32_t rast_samples:6; + uint32_t min_samples:6; + uint32_t feedback_loop : 1; + uint32_t feedback_loop_zs : 1; + uint32_t rast_attachment_order : 1; + uint32_t rp_state : 16; + VkSampleMask sample_mask; + uint32_t blend_id; + + /* Pre-hashed value for table lookup, invalid when zero. + * Members after this point are not included in pipeline state hash key */ + uint32_t hash; + bool dirty; + + struct zink_pipeline_dynamic_state1 dyn_state1; + + struct zink_pipeline_dynamic_state2 dyn_state2; + struct zink_pipeline_dynamic_state3 dyn_state3; + + union { + VkShaderModule modules[MESA_SHADER_STAGES - 1]; + uint32_t optimal_key; + }; + bool modules_changed; + + uint32_t vertex_hash; + + uint32_t final_hash; + + uint32_t _pad2; + /* order matches zink_gfx_input_key */ + union { + struct { + unsigned idx:8; + bool uses_dynamic_stride; + }; + uint32_t input; + }; + uint32_t vertex_buffers_enabled_mask; + uint32_t vertex_strides[PIPE_MAX_ATTRIBS]; + struct zink_vertex_elements_hw_state *element_state; + struct zink_zs_swizzle_key *shadow; + bool sample_locations_enabled; + enum mesa_prim shader_rast_prim, rast_prim; /* reduced type or max for unknown */ + union { + struct { + struct zink_shader_key key[5]; + struct zink_shader_key last_vertex; + } shader_keys; + struct { + union zink_shader_key_optimal key; + } shader_keys_optimal; + }; + struct zink_blend_state *blend_state; + struct zink_render_pass *render_pass; + struct zink_render_pass *next_render_pass; //will be used next time rp is begun + VkFormat rendering_formats[PIPE_MAX_COLOR_BUFS]; + VkPipelineRenderingCreateInfo rendering_info; + VkPipeline pipeline; + enum mesa_prim gfx_prim_mode; //pending mode +}; + +struct zink_compute_pipeline_state { + /* Pre-hashed value for table lookup, invalid when zero. + * Members after this point are not included in pipeline state hash key */ + uint32_t hash; + uint32_t final_hash; + bool dirty; + uint32_t local_size[3]; + uint32_t variable_shared_mem; + + uint32_t module_hash; + VkShaderModule module; + bool module_changed; + + struct zink_shader_key key; + + VkPipeline pipeline; +}; + + +/** program types */ + +/* create_gfx_pushconst must be kept in sync with this struct */ +struct zink_gfx_push_constant { + unsigned draw_mode_is_indexed; + unsigned draw_id; + unsigned framebuffer_is_layered; + float default_inner_level[2]; + float default_outer_level[4]; + uint32_t line_stipple_pattern; + float viewport_scale[2]; + float line_width; +}; + +/* The order of the enums MUST match the order of the zink_gfx_push_constant + * members. + */ +enum zink_gfx_push_constant_member { + ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, + ZINK_GFX_PUSHCONST_DRAW_ID, + ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, + ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, + ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, + ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, + ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, + ZINK_GFX_PUSHCONST_LINE_WIDTH, + ZINK_GFX_PUSHCONST_MAX +}; + +/* a shader module is used for directly reusing a shader module between programs, + * e.g., in the case where we're swapping out only one shader, + * allowing us to skip going through shader keys + */ +struct zink_shader_module { + struct zink_shader_object obj; + uint32_t hash; + bool shobj; + bool default_variant; + bool has_nonseamless; + bool needs_zs_shader_swizzle; + uint8_t num_uniforms; + uint8_t key_size; + uint8_t key[0]; /* | key | uniforms | zs shader swizzle | */ +}; + +struct zink_program { + struct pipe_reference reference; + struct zink_context *ctx; + unsigned char sha1[20]; + struct util_queue_fence cache_fence; + struct u_rwlock pipeline_cache_lock; + VkPipelineCache pipeline_cache; + size_t pipeline_cache_size; + struct zink_batch_usage *batch_uses; + bool is_compute; + bool can_precompile; + bool uses_shobj; //whether shader objects are used; programs CANNOT mix shader objects and shader modules + + struct zink_program_descriptor_data dd; + + uint32_t compat_id; + VkPipelineLayout layout; + VkDescriptorSetLayout dsl[ZINK_DESCRIPTOR_ALL_TYPES]; // one for each type + push + bindless + unsigned num_dsl; + + bool removed; +}; + +#define STAGE_MASK_OPTIMAL (1<<16) +#define STAGE_MASK_OPTIMAL_SHADOW (1<<17) +typedef bool (*equals_gfx_pipeline_state_func)(const void *a, const void *b); + +struct zink_gfx_library_key { + uint32_t optimal_key; //equals_pipeline_lib_optimal + VkShaderModule modules[ZINK_GFX_SHADER_COUNT]; + VkPipeline pipeline; +}; + +struct zink_gfx_input_key { + union { + struct { + unsigned idx:8; + bool uses_dynamic_stride; + }; + uint32_t input; + }; + uint32_t vertex_buffers_enabled_mask; + uint32_t vertex_strides[PIPE_MAX_ATTRIBS]; + struct zink_vertex_elements_hw_state *element_state; + VkPipeline pipeline; +}; + +struct zink_gfx_output_key { + /* order matches zink_gfx_output_key */ + union { + struct { + unsigned force_persample_interp:1; + uint32_t rast_samples:6; + uint32_t min_samples:6; + uint32_t feedback_loop : 1; + uint32_t feedback_loop_zs : 1; + uint32_t rast_attachment_order : 1; + uint32_t rp_state : 16; + }; + uint32_t key; + }; + + /* TODO: compress these */ + VkSampleMask sample_mask; + uint32_t blend_id; + VkPipeline pipeline; +}; + +struct zink_gfx_pipeline_cache_entry { + struct zink_gfx_pipeline_state state; + VkPipeline pipeline; + struct zink_gfx_program *prog; + /* GPL only */ + struct util_queue_fence fence; + union { + struct { + struct zink_gfx_input_key *ikey; + struct zink_gfx_library_key *gkey; + struct zink_gfx_output_key *okey; + VkPipeline unoptimized_pipeline; + } gpl; + struct zink_shader_object shobjs[ZINK_GFX_SHADER_COUNT]; + }; +}; + +struct zink_gfx_lib_cache { + /* for hashing */ + struct zink_shader *shaders[ZINK_GFX_SHADER_COUNT]; + unsigned refcount; + bool removed; //once removed from cache + uint8_t stages_present; + + simple_mtx_t lock; + struct set libs; //zink_gfx_library_key -> VkPipeline +}; + +struct zink_gfx_program { + struct zink_program base; + + bool is_separable; //not a full program + + uint32_t stages_present; //mask of stages present in this program + uint32_t stages_remaining; //mask of zink_shader remaining in this program + uint32_t gfx_hash; //from ctx->gfx_hash + + struct zink_shader *shaders[ZINK_GFX_SHADER_COUNT]; + struct zink_shader *last_vertex_stage; + struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT]; + + /* full */ + VkShaderEXT objects[ZINK_GFX_SHADER_COUNT]; + uint32_t module_hash[ZINK_GFX_SHADER_COUNT]; + struct blob blobs[ZINK_GFX_SHADER_COUNT]; + struct util_dynarray shader_cache[ZINK_GFX_SHADER_COUNT][2][2]; //normal, nonseamless cubes, inline uniforms + unsigned inlined_variant_count[ZINK_GFX_SHADER_COUNT]; + uint32_t default_variant_hash; + uint8_t inline_variants; //which stages are using inlined uniforms + bool needs_inlining; // whether this program requires some uniforms to be inlined + bool has_edgeflags; + bool optimal_keys; + + /* separable */ + struct zink_gfx_program *full_prog; + + struct hash_table pipelines[2][11]; // [dynamic, renderpass][number of draw modes we support] + uint32_t last_variant_hash; + + uint32_t last_finalized_hash[2][4]; //[dynamic, renderpass][primtype idx] + struct zink_gfx_pipeline_cache_entry *last_pipeline[2][4]; //[dynamic, renderpass][primtype idx] + + struct zink_gfx_lib_cache *libs; +}; + +struct zink_compute_program { + struct zink_program base; + + bool use_local_size; + bool has_variable_shared_mem; + + unsigned scratch_size; + + unsigned num_inlinable_uniforms; + nir_shader *nir; //only until precompile finishes + + struct zink_shader_module *curr; + + struct zink_shader_module *module; //base + struct util_dynarray shader_cache[2]; //nonseamless cubes, inline uniforms + unsigned inlined_variant_count; + + struct zink_shader *shader; + struct hash_table pipelines; + + simple_mtx_t cache_lock; //extra lock because threads are insane and sand was not meant to think + + VkPipeline base_pipeline; +}; + + +/** renderpass types */ + +struct zink_rt_attrib { + VkFormat format; + VkSampleCountFlagBits samples; + bool clear_color; + union { + bool clear_stencil; + bool fbfetch; + }; + bool invalid; + bool needs_write; + bool resolve; + bool feedback_loop; +}; + +struct zink_render_pass_state { + union { + struct { + uint8_t num_cbufs : 5; /* PIPE_MAX_COLOR_BUFS = 8 */ + uint8_t have_zsbuf : 1; + uint8_t samples:1; //for fs samplemask + uint32_t num_zsresolves : 1; + uint32_t num_cresolves : 24; /* PIPE_MAX_COLOR_BUFS, but this is a struct hole */ + }; + uint32_t val; //for comparison + }; + struct zink_rt_attrib rts[PIPE_MAX_COLOR_BUFS + 1]; + unsigned num_rts; + uint32_t clears; //for extra verification and update flagging + uint16_t msaa_expand_mask; + uint16_t msaa_samples; //used with VK_EXT_multisampled_render_to_single_sampled +}; + +struct zink_pipeline_rt { + VkFormat format; + VkSampleCountFlagBits samples; +}; + +struct zink_render_pass_pipeline_state { + uint32_t num_attachments:14; + uint32_t msaa_samples : 8; + uint32_t fbfetch:1; + uint32_t color_read:1; + uint32_t depth_read:1; + uint32_t depth_write:1; + uint32_t num_cresolves:4; + uint32_t num_zsresolves:1; + bool samples:1; //for fs samplemask + struct zink_pipeline_rt attachments[PIPE_MAX_COLOR_BUFS + 1]; + unsigned id; +}; + +struct zink_render_pass { + VkRenderPass render_pass; + struct zink_render_pass_state state; + unsigned pipeline_state; +}; + + +/** resource types */ +struct zink_resource_object { + struct pipe_reference reference; + + VkPipelineStageFlags access_stage; + VkAccessFlags access; + VkPipelineStageFlags unordered_access_stage; + VkAccessFlags unordered_access; + VkAccessFlags last_write; + + /* 'access' is propagated from unordered_access to handle ops occurring + * in the ordered cmdbuf which can promote barriers to unordered + */ + bool ordered_access_is_copied; + bool unordered_read; + bool unordered_write; + bool unsync_access; + bool copies_valid; + bool copies_need_reset; //for use with batch state resets + + struct u_rwlock copy_lock; + struct util_dynarray copies[16]; //regions being copied to; for barrier omission + + VkBuffer storage_buffer; + simple_mtx_t view_lock; + uint32_t view_prune_count; //how many views to prune + uint32_t view_prune_timeline; //when to prune + struct util_dynarray views; + + union { + VkBuffer buffer; + VkImage image; + }; + VkDeviceAddress bda; + + VkSampleLocationsInfoEXT zs_evaluate; + bool needs_zs_evaluate; + + bool storage_init; //layout was set for image + bool transfer_dst; + bool render_target; + bool is_buffer; + bool exportable; + + /* TODO: this should be a union */ + int handle; + struct zink_bo *bo; + // struct { + struct kopper_displaytarget *dt; + uint32_t dt_idx; + uint32_t last_dt_idx; + VkSemaphore present; + bool new_dt; + bool indefinite_acquire; + // } + + + VkDeviceSize offset, size, alignment; + uint64_t vkflags; + uint64_t vkusage; + VkFormatFeatureFlags vkfeats; + uint64_t modifier; + VkImageAspectFlags modifier_aspect; + VkSamplerYcbcrConversion sampler_conversion; + unsigned plane_offsets[3]; + unsigned plane_strides[3]; + unsigned plane_count; + + bool host_visible; + bool coherent; + bool is_aux; +}; + +struct zink_resource { + struct threaded_resource base; + + enum pipe_format internal_format:16; + + struct zink_resource_object *obj; + uint32_t queue; + union { + struct { + struct util_range valid_buffer_range; + uint32_t vbo_bind_mask : PIPE_MAX_ATTRIBS; + uint8_t ubo_bind_count[2]; + uint8_t ssbo_bind_count[2]; + uint8_t vbo_bind_count; + uint8_t so_bind_count; //not counted in all_binds + bool so_valid; + uint32_t ubo_bind_mask[MESA_SHADER_STAGES]; + uint32_t ssbo_bind_mask[MESA_SHADER_STAGES]; + }; + struct { + bool linear; + bool need_2D; + bool valid; + uint8_t fb_bind_count; //not counted in all_binds + uint16_t fb_binds; /* mask of attachment idx; zs is PIPE_MAX_COLOR_BUFS */ + VkSparseImageMemoryRequirements sparse; + VkFormat format; + VkImageLayout layout; + VkImageAspectFlags aspect; + }; + }; + uint32_t sampler_binds[MESA_SHADER_STAGES]; + uint32_t image_binds[MESA_SHADER_STAGES]; + uint16_t sampler_bind_count[2]; //gfx, compute + uint16_t image_bind_count[2]; //gfx, compute + uint16_t write_bind_count[2]; //gfx, compute + union { + uint16_t bindless[2]; //tex, img + uint32_t all_bindless; + }; + union { + uint16_t bind_count[2]; //gfx, compute + uint32_t all_binds; + }; + + VkPipelineStageFlagBits gfx_barrier; + VkAccessFlagBits barrier_access[2]; //gfx, compute + + union { + struct { + struct hash_table bufferview_cache; + simple_mtx_t bufferview_mtx; + }; + struct { + struct hash_table surface_cache; + simple_mtx_t surface_mtx; + }; + }; + + VkRect2D damage; + bool use_damage; + + bool copies_warned; + bool swapchain; + bool dmabuf; + unsigned dt_stride; + + uint8_t modifiers_count; + uint64_t *modifiers; +}; + +static inline struct zink_resource * +zink_resource(struct pipe_resource *r) +{ + return (struct zink_resource *)r; +} + + +struct zink_transfer { + struct threaded_transfer base; + struct pipe_resource *staging_res; + unsigned offset; + unsigned depthPitch; +}; + + +/** screen types */ +struct zink_modifier_prop { + uint32_t drmFormatModifierCount; + VkDrmFormatModifierPropertiesEXT* pDrmFormatModifierProperties; +}; + +struct zink_format_props { + VkFormatFeatureFlags2 linearTilingFeatures; + VkFormatFeatureFlags2 optimalTilingFeatures; + VkFormatFeatureFlags2 bufferFeatures; +}; + +struct zink_screen { + struct pipe_screen base; + + struct util_dl_library *loader_lib; + PFN_vkGetInstanceProcAddr vk_GetInstanceProcAddr; + PFN_vkGetDeviceProcAddr vk_GetDeviceProcAddr; + + bool threaded; + bool threaded_submit; + bool is_cpu; + bool abort_on_hang; + bool frame_marker_emitted; + bool implicitly_loaded; + uint64_t curr_batch; //the current batch id + uint32_t last_finished; + VkSemaphore sem; + VkFence fence; + struct util_queue flush_queue; + simple_mtx_t copy_context_lock; + struct zink_context *copy_context; + + struct zink_batch_state *free_batch_states; //unused batch states + struct zink_batch_state *last_free_batch_state; //for appending + simple_mtx_t free_batch_states_lock; + + simple_mtx_t semaphores_lock; + struct util_dynarray semaphores; + struct util_dynarray fd_semaphores; + + unsigned buffer_rebind_counter; + unsigned image_rebind_counter; + unsigned robust_ctx_count; + + struct hash_table dts; + simple_mtx_t dt_lock; + + bool device_lost; + int drm_fd; + + struct slab_mempool present_mempool; + struct slab_parent_pool transfer_pool; + struct disk_cache *disk_cache; + struct util_queue cache_put_thread; + struct util_queue cache_get_thread; + + /* there are 5 gfx stages, but VS and FS are assumed to be always present, + * thus only 3 stages need to be considered, giving 2^3 = 8 program caches. + */ + struct set pipeline_libs[8]; + simple_mtx_t pipeline_libs_lock[8]; + + simple_mtx_t desc_set_layouts_lock; + struct hash_table desc_set_layouts[ZINK_DESCRIPTOR_BASE_TYPES]; + simple_mtx_t desc_pool_keys_lock; + struct set desc_pool_keys[ZINK_DESCRIPTOR_BASE_TYPES]; + struct util_live_shader_cache shaders; + + uint64_t db_size[ZINK_DESCRIPTOR_ALL_TYPES]; + unsigned base_descriptor_size; + VkDescriptorSetLayout bindless_layout; + + struct { + struct pb_cache bo_cache; + struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS]; + unsigned min_alloc_size; + uint32_t next_bo_unique_id; + } pb; + uint8_t heap_map[ZINK_HEAP_MAX][VK_MAX_MEMORY_TYPES]; // mapping from zink heaps to memory type indices + uint8_t heap_count[ZINK_HEAP_MAX]; // number of memory types per zink heap + bool resizable_bar; + + uint64_t total_video_mem; + uint64_t clamp_video_mem; + uint64_t total_mem; + uint64_t mapped_vram; + + VkInstance instance; + struct zink_instance_info instance_info; + + struct hash_table *debug_mem_sizes; + simple_mtx_t debug_mem_lock; + + VkPhysicalDevice pdev; + uint32_t vk_version, spirv_version; + struct util_idalloc_mt buffer_ids; + struct util_vertex_state_cache vertex_state_cache; + + struct zink_device_info info; + struct nir_shader_compiler_options nir_options; + + bool optimal_keys; + bool have_full_ds3; + bool have_X8_D24_UNORM_PACK32; + bool have_D24_UNORM_S8_UINT; + bool have_D32_SFLOAT_S8_UINT; + bool have_triangle_fans; + bool need_decompose_attrs; + bool need_2D_zs; + bool need_2D_sparse; + bool can_hic_shader_read; + + uint32_t gfx_queue; + uint32_t sparse_queue; + uint32_t max_queues; + uint32_t timestamp_valid_bits; + VkDevice dev; + VkQueue queue; //gfx+compute + VkQueue queue_sparse; + simple_mtx_t queue_lock; + VkDebugUtilsMessengerEXT debugUtilsCallbackHandle; + + uint32_t cur_custom_border_color_samplers; + + unsigned screen_id; + +#ifdef HAVE_RENDERDOC_APP_H + RENDERDOC_API_1_0_0 *renderdoc_api; + unsigned renderdoc_capture_start; + unsigned renderdoc_capture_end; + unsigned renderdoc_frame; + bool renderdoc_capturing; + bool renderdoc_capture_all; +#endif + + struct vk_uncompacted_dispatch_table vk; + + void (*buffer_barrier)(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline); + void (*image_barrier)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); + void (*image_barrier_unsync)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); + + bool compact_descriptors; /**< toggled if descriptor set ids are compacted */ + uint8_t desc_set_id[ZINK_MAX_DESCRIPTOR_SETS]; /**< converts enum zink_descriptor_type -> the actual set id */ + + struct { + bool dual_color_blend_by_location; + bool inline_uniforms; + bool emulate_point_smooth; + bool zink_shader_object_enable; + } driconf; + + struct zink_format_props format_props[PIPE_FORMAT_COUNT]; + struct zink_modifier_prop modifier_props[PIPE_FORMAT_COUNT]; + + VkExtent2D maxSampleLocationGridSize[5]; + VkPipelineLayout gfx_push_constant_layout; + + struct { + bool broken_l4a4; + /* https://gitlab.khronos.org/vulkan/vulkan/-/issues/3306 + * HI TURNIP + */ + bool broken_cache_semantics; + bool missing_a8_unorm; + bool implicit_sync; + bool disable_optimized_compile; + bool always_feedback_loop; + bool always_feedback_loop_zs; + bool needs_sanitised_layer; + bool track_renderpasses; + bool no_linestipple; + bool no_linesmooth; + bool no_hw_gl_point; + bool lower_robustImageAccess2; + bool needs_zs_shader_swizzle; + bool can_do_invalid_linear_modifier; + bool io_opt; + unsigned z16_unscaled_bias; + unsigned z24_unscaled_bias; + } driver_workarounds; +}; + +static inline struct zink_screen * +zink_screen(struct pipe_screen *pipe) +{ + return (struct zink_screen *)pipe; +} + +/** surface types */ + +/* info for validating/creating imageless framebuffers */ +struct zink_surface_info { + VkImageCreateFlags flags; + VkImageUsageFlags usage; + uint32_t width; + uint32_t height; + uint32_t layerCount; + VkFormat format[2]; //base format, srgb format (for srgb framebuffer) +}; + +/* an imageview for a zink_resource: + - may be a fb attachment, samplerview, or shader image + - cached on the parent zink_resource_object + - also handles swapchains + */ +struct zink_surface { + struct pipe_surface base; + /* all the info for creating a new imageview */ + VkImageViewCreateInfo ivci; + VkImageViewUsageCreateInfo usage_info; + /* for framebuffer use */ + struct zink_surface_info info; + bool is_swapchain; + /* the current imageview */ + VkImageView image_view; + /* array of imageviews for swapchains, one for each image */ + VkImageView *swapchain; + unsigned swapchain_size; + void *obj; //backing resource object; used to determine rebinds + void *dt_swapchain; //current swapchain object; used to determine swapchain rebinds + uint32_t hash; //for surface caching +}; + +/* wrapper object that preserves the gallium expectation of having + * pipe_surface::context match the context used to create the surface + */ +struct zink_ctx_surface { + struct pipe_surface base; + struct zink_surface *surf; //the actual surface + struct zink_ctx_surface *transient; //for use with EXT_multisample_render_to_texture + bool transient_init; //whether the transient surface has data + bool needs_mutable; +}; + +/* use this cast for framebuffer surfaces */ +static inline struct zink_surface * +zink_csurface(struct pipe_surface *psurface) +{ + return psurface ? ((struct zink_ctx_surface *)psurface)->surf : NULL; +} + +/* use this cast for checking transient framebuffer surfaces */ +static inline struct zink_surface * +zink_transient_surface(struct pipe_surface *psurface) +{ + return psurface ? ((struct zink_ctx_surface *)psurface)->transient ? ((struct zink_ctx_surface *)psurface)->transient->surf : NULL : NULL; +} + +/* use this cast for internal surfaces */ +static inline struct zink_surface * +zink_surface(struct pipe_surface *psurface) +{ + return (struct zink_surface *)psurface; +} + + +/** framebuffer types */ +struct zink_framebuffer_state { + uint32_t width; + uint16_t height; + uint32_t layers:6; + uint32_t samples:6; + uint32_t num_attachments:4; + struct zink_surface_info infos[PIPE_MAX_COLOR_BUFS + 1]; +}; + +struct zink_framebuffer { + struct pipe_reference reference; + + /* current objects */ + VkFramebuffer fb; + struct zink_render_pass *rp; + + struct zink_framebuffer_state state; + VkFramebufferAttachmentImageInfo infos[PIPE_MAX_COLOR_BUFS + 1]; + struct hash_table objects; +}; + + +/** context types */ +struct zink_sampler_state { + VkSampler sampler; + VkSampler sampler_clamped; + bool custom_border_color; + bool emulate_nonseamless; +}; + +struct zink_buffer_view { + struct pipe_reference reference; + struct pipe_resource *pres; + VkBufferViewCreateInfo bvci; + VkBufferView buffer_view; + uint32_t hash; +}; + +struct zink_sampler_view { + struct pipe_sampler_view base; + union { + struct zink_surface *image_view; + struct zink_buffer_view *buffer_view; + unsigned tbo_size; + }; + struct zink_surface *cube_array; + /* Optional sampler view returning red (depth) in all channels, for shader rewrites. */ + struct zink_surface *zs_view; + struct zink_zs_swizzle swizzle; +}; + +struct zink_image_view { + struct pipe_image_view base; + union { + struct zink_surface *surface; + struct zink_buffer_view *buffer_view; + }; +}; + +static inline struct zink_sampler_view * +zink_sampler_view(struct pipe_sampler_view *pview) +{ + return (struct zink_sampler_view *)pview; +} + +struct zink_so_target { + struct pipe_stream_output_target base; + struct pipe_resource *counter_buffer; + VkDeviceSize counter_buffer_offset; + uint32_t stride; + bool counter_buffer_valid; +}; + +static inline struct zink_so_target * +zink_so_target(struct pipe_stream_output_target *so_target) +{ + return (struct zink_so_target *)so_target; +} + +struct zink_viewport_state { + struct pipe_viewport_state viewport_states[PIPE_MAX_VIEWPORTS]; + struct pipe_scissor_state scissor_states[PIPE_MAX_VIEWPORTS]; + uint8_t num_viewports; +}; + +struct zink_descriptor_db_info { + unsigned offset; + unsigned size; + enum pipe_format format; + struct pipe_resource *pres; +}; + +struct zink_descriptor_surface { + union { + struct zink_surface *surface; + struct zink_buffer_view *bufferview; + struct zink_descriptor_db_info db; + }; + bool is_buffer; +}; + +struct zink_bindless_descriptor { + struct zink_descriptor_surface ds; + struct zink_sampler_state *sampler; + uint32_t handle; + uint32_t access; //PIPE_ACCESS_... +}; + +struct zink_rendering_info { + VkPipelineRenderingCreateInfo info; + unsigned id; +}; + + +typedef void (*pipe_draw_vertex_state_func)(struct pipe_context *ctx, + struct pipe_vertex_state *vstate, + uint32_t partial_velem_mask, + struct pipe_draw_vertex_state_info info, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws); +typedef void (*pipe_launch_grid_func)(struct pipe_context *pipe, const struct pipe_grid_info *info); + + +enum zink_ds3_state { + ZINK_DS3_RAST_STIPPLE, + ZINK_DS3_RAST_CLIP, + ZINK_DS3_RAST_CLAMP, + ZINK_DS3_RAST_POLYGON, + ZINK_DS3_RAST_HALFZ, + ZINK_DS3_RAST_PV, + ZINK_DS3_RAST_LINE, + ZINK_DS3_RAST_STIPPLE_ON, + ZINK_DS3_BLEND_A2C, + ZINK_DS3_BLEND_A21, + ZINK_DS3_BLEND_ON, + ZINK_DS3_BLEND_WRITE, + ZINK_DS3_BLEND_EQ, + ZINK_DS3_BLEND_LOGIC_ON, + ZINK_DS3_BLEND_LOGIC, +}; + +struct zink_context { + struct pipe_context base; + struct threaded_context *tc; + struct slab_child_pool transfer_pool; + struct slab_child_pool transfer_pool_unsync; + struct blitter_context *blitter; + struct util_debug_callback dbg; + + unsigned flags; + + pipe_draw_func draw_vbo[2]; //batch changed + pipe_draw_vertex_state_func draw_state[2]; //batch changed + pipe_launch_grid_func launch_grid[2]; //batch changed + + struct pipe_device_reset_callback reset; + + struct util_queue_fence unsync_fence; //unsigned during unsync recording (blocks flush ops) + struct util_queue_fence flush_fence; //unsigned during flush (blocks unsync ops) + + struct zink_fence *deferred_fence; + struct zink_batch_state *last_batch_state; //the last command buffer submitted + struct zink_batch_state *batch_states; //list of submitted batch states: ordered by increasing timeline id + unsigned batch_states_count; //number of states in `batch_states` + struct zink_batch_state *free_batch_states; //unused batch states + struct zink_batch_state *last_free_batch_state; //for appending + bool oom_flush; + bool oom_stall; + bool track_renderpasses; + bool no_reorder; + struct zink_batch batch; + + unsigned shader_has_inlinable_uniforms_mask; + unsigned inlinable_uniforms_valid_mask; + + struct pipe_constant_buffer ubos[MESA_SHADER_STAGES][PIPE_MAX_CONSTANT_BUFFERS]; + struct pipe_shader_buffer ssbos[MESA_SHADER_STAGES][PIPE_MAX_SHADER_BUFFERS]; + uint32_t writable_ssbos[MESA_SHADER_STAGES]; + struct zink_image_view image_views[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES]; + + uint32_t transient_attachments; + struct pipe_framebuffer_state fb_state; + struct hash_table framebuffer_cache; + + struct zink_vertex_elements_state *element_state; + struct zink_rasterizer_state *rast_state; + struct zink_depth_stencil_alpha_state *dsa_state; + + bool pipeline_changed[2]; //gfx, compute + + struct zink_shader *gfx_stages[ZINK_GFX_SHADER_COUNT]; + struct zink_shader *last_vertex_stage; + bool shader_reads_drawid; + bool shader_reads_basevertex; + struct zink_gfx_pipeline_state gfx_pipeline_state; + /* there are 5 gfx stages, but VS and FS are assumed to be always present, + * thus only 3 stages need to be considered, giving 2^3 = 8 program caches. + */ + struct hash_table program_cache[8]; + simple_mtx_t program_lock[8]; + uint32_t gfx_hash; + struct zink_gfx_program *curr_program; + struct set gfx_inputs; + struct set gfx_outputs; + + struct zink_descriptor_data dd; + + struct zink_compute_pipeline_state compute_pipeline_state; + struct zink_compute_program *curr_compute; + + unsigned shader_stages : ZINK_GFX_SHADER_COUNT; /* mask of bound gfx shader stages */ + uint8_t dirty_gfx_stages; /* mask of changed gfx shader stages */ + bool last_vertex_stage_dirty; + bool compute_dirty; + bool is_generated_gs_bound; + + struct { + VkRenderingAttachmentInfo attachments[PIPE_MAX_COLOR_BUFS + 2]; //+depth, +stencil + VkRenderingInfo info; + struct tc_renderpass_info tc_info; + } dynamic_fb; + uint32_t fb_layer_mismatch; //bitmask + unsigned depth_bias_scale_factor; + struct set rendering_state_cache[6]; //[util_logbase2_ceil(msrtss samplecount)] + struct set render_pass_state_cache; + struct hash_table *render_pass_cache; + VkExtent2D swapchain_size; + bool fb_changed; + bool rp_changed; //force renderpass restart + bool rp_layout_changed; //renderpass changed, maybe restart + bool rp_loadop_changed; //renderpass changed, don't restart + bool zsbuf_unused; + bool zsbuf_readonly; + + struct zink_framebuffer *framebuffer; + struct zink_framebuffer_clear fb_clears[PIPE_MAX_COLOR_BUFS + 1]; + uint16_t clears_enabled; + uint16_t rp_clears_enabled; + uint16_t void_clears; + uint16_t fbfetch_outputs; + uint16_t feedback_loops; + struct zink_resource *needs_present; + + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + bool vertex_buffers_dirty; + + struct zink_sampler_state *sampler_states[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view *sampler_views[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; + + struct zink_viewport_state vp_state; + bool vp_state_changed; + bool scissor_changed; + + float blend_constants[4]; + + bool sample_locations_changed; + VkSampleLocationEXT vk_sample_locations[PIPE_MAX_SAMPLE_LOCATION_GRID_SIZE * PIPE_MAX_SAMPLE_LOCATION_GRID_SIZE]; + uint8_t sample_locations[2 * 4 * 8 * 16]; + + struct pipe_stencil_ref stencil_ref; + + union { + struct { + float default_inner_level[2]; + float default_outer_level[4]; + }; + float tess_levels[6]; + }; + + struct zink_vk_query *curr_xfb_queries[PIPE_MAX_VERTEX_STREAMS]; + struct zink_shader *null_fs; + struct zink_shader *saved_fs; + + struct list_head query_pools; + struct list_head suspended_queries; + struct list_head primitives_generated_queries; + struct zink_query *vertices_query; + bool disable_fs; + bool disable_color_writes; + bool was_line_loop; + bool fs_query_active; + bool occlusion_query_active; + bool primitives_generated_active; + bool primitives_generated_suspended; + bool queries_disabled, render_condition_active; + bool queries_in_rp; + struct { + struct zink_query *query; + bool inverted; + bool active; //this is the internal vk state + } render_condition; + struct { + uint64_t render_passes; + } hud; + + struct { + bool valid; + struct u_upload_mgr *upload[ZINK_DGC_MAX]; + struct zink_resource *buffers[ZINK_DGC_MAX]; + struct zink_gfx_program *last_prog; + uint8_t *maps[ZINK_DGC_MAX]; + size_t bind_offsets[ZINK_DGC_MAX]; + size_t cur_offsets[ZINK_DGC_MAX]; + size_t max_size[ZINK_DGC_MAX]; + struct util_dynarray pipelines; + struct util_dynarray tokens; + } dgc; + + struct pipe_resource *dummy_vertex_buffer; + struct pipe_resource *dummy_xfb_buffer; + struct pipe_surface *dummy_surface[7]; + struct zink_buffer_view *dummy_bufferview; + + unsigned buffer_rebind_counter; + unsigned image_rebind_counter; + + struct { + /* descriptor info */ + uint8_t num_ubos[MESA_SHADER_STAGES]; + + uint8_t num_ssbos[MESA_SHADER_STAGES]; + struct util_dynarray global_bindings; + + VkDescriptorImageInfo textures[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; + uint32_t emulate_nonseamless[MESA_SHADER_STAGES]; + uint32_t cubes[MESA_SHADER_STAGES]; + uint8_t num_samplers[MESA_SHADER_STAGES]; + uint8_t num_sampler_views[MESA_SHADER_STAGES]; + + VkDescriptorImageInfo images[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES]; + uint8_t num_images[MESA_SHADER_STAGES]; + + union { + struct { + VkDescriptorBufferInfo ubos[MESA_SHADER_STAGES][PIPE_MAX_CONSTANT_BUFFERS]; + VkDescriptorBufferInfo ssbos[MESA_SHADER_STAGES][PIPE_MAX_SHADER_BUFFERS]; + VkBufferView tbos[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; + VkBufferView texel_images[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES]; + } t; + struct { + VkDescriptorAddressInfoEXT ubos[MESA_SHADER_STAGES][PIPE_MAX_CONSTANT_BUFFERS]; + VkDescriptorAddressInfoEXT ssbos[MESA_SHADER_STAGES][PIPE_MAX_SHADER_BUFFERS]; + VkDescriptorAddressInfoEXT tbos[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; + VkDescriptorAddressInfoEXT texel_images[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES]; + } db; + }; + + VkDescriptorImageInfo fbfetch; + uint8_t fbfetch_db[ZINK_FBFETCH_DESCRIPTOR_SIZE]; + + /* the current state of the zs swizzle data */ + struct zink_zs_swizzle_key zs_swizzle[MESA_SHADER_STAGES]; + + struct zink_resource *descriptor_res[ZINK_DESCRIPTOR_BASE_TYPES][MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; + + struct { + struct util_idalloc tex_slots; //img, buffer + struct util_idalloc img_slots; //img, buffer + struct hash_table tex_handles; //img, buffer + struct hash_table img_handles; //img, buffer + union { + struct { + VkBufferView *buffer_infos; //tex, img + } t; + struct { + VkDescriptorAddressInfoEXT *buffer_infos; + } db; + }; + VkDescriptorImageInfo *img_infos; //tex, img + struct util_dynarray updates; //texture, img + struct util_dynarray resident; //texture, img + } bindless[2]; + union { + bool bindless_dirty[2]; //tex, img + uint16_t any_bindless_dirty; + }; + bool bindless_refs_dirty; + bool null_fbfetch_init; + } di; + void (*invalidate_descriptor_state)(struct zink_context *ctx, gl_shader_stage shader, enum zink_descriptor_type type, unsigned, unsigned); + struct set *need_barriers[2]; //gfx, compute + struct set update_barriers[2][2]; //[gfx, compute][current, next] + uint8_t barrier_set_idx[2]; + unsigned memory_barrier; + + uint32_t ds3_states; + + uint32_t num_so_targets; + struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS]; + bool dirty_so_targets; + + bool gfx_dirty; + + bool shobj_draw : 1; //using shader objects for draw + bool is_device_lost; + bool primitive_restart; + bool blitting : 1; + bool unordered_blitting : 1; + bool vertex_state_changed : 1; + bool blend_state_changed : 1; + bool blend_color_changed : 1; + bool sample_mask_changed : 1; + bool rast_state_changed : 1; + bool line_width_changed : 1; + bool dsa_state_changed : 1; + bool stencil_ref_changed : 1; + bool rasterizer_discard_changed : 1; + bool rp_tc_info_updated : 1; +}; + +static inline struct zink_context * +zink_context(struct pipe_context *context) +{ + return (struct zink_context *)context; +} + +#endif |