summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/zink
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/zink')
-rw-r--r--src/gallium/drivers/zink/VP_ZINK_requirements.json947
-rw-r--r--src/gallium/drivers/zink/ci/deqp-zink-anv-tgl-full.toml135
-rw-r--r--src/gallium/drivers/zink/ci/deqp-zink-anv-tgl.toml103
-rw-r--r--src/gallium/drivers/zink/ci/deqp-zink-freedreno-a618.toml87
-rw-r--r--src/gallium/drivers/zink/ci/deqp-zink-lvp-fails.txt49
-rw-r--r--src/gallium/drivers/zink/ci/deqp-zink-lvp-flakes.txt1
-rw-r--r--src/gallium/drivers/zink/ci/deqp-zink-lvp-skips.txt1
-rw-r--r--src/gallium/drivers/zink/ci/deqp-zink-lvp.toml34
-rw-r--r--src/gallium/drivers/zink/ci/deqp-zink-radv.toml41
-rw-r--r--src/gallium/drivers/zink/ci/deqp-zink-venus-lvp.toml15
-rw-r--r--src/gallium/drivers/zink/ci/gitlab-ci-inc.yml172
-rw-r--r--src/gallium/drivers/zink/ci/gitlab-ci.yml195
-rw-r--r--src/gallium/drivers/zink/ci/piglit-zink-lvp-fails.txt826
-rw-r--r--src/gallium/drivers/zink/ci/piglit-zink-lvp-flakes.txt1
-rw-r--r--src/gallium/drivers/zink/ci/traces-zink-restricted.yml111
-rw-r--r--src/gallium/drivers/zink/ci/traces-zink.yml144
-rw-r--r--src/gallium/drivers/zink/ci/zink-amdpro-fails.txt159
-rw-r--r--src/gallium/drivers/zink/ci/zink-anv-icl-fails.txt64
-rw-r--r--src/gallium/drivers/zink/ci/zink-anv-icl-skips.txt1
-rw-r--r--src/gallium/drivers/zink/ci/zink-anv-tgl-fails.txt593
-rw-r--r--src/gallium/drivers/zink/ci/zink-anv-tgl-flakes.txt139
-rw-r--r--src/gallium/drivers/zink/ci/zink-anv-tgl-premerge-skips.txt21
-rw-r--r--src/gallium/drivers/zink/ci/zink-anv-tgl-skips.txt48
-rw-r--r--src/gallium/drivers/zink/ci/zink-anv-tgl-validation-settings.txt148
-rw-r--r--src/gallium/drivers/zink/ci/zink-freedreno-a618-fails.txt7
-rw-r--r--src/gallium/drivers/zink/ci/zink-freedreno-a618-flakes.txt63
-rw-r--r--src/gallium/drivers/zink/ci/zink-freedreno-a618-skips.txt4
-rw-r--r--src/gallium/drivers/zink/ci/zink-lvp-fails.txt147
-rw-r--r--src/gallium/drivers/zink/ci/zink-lvp-flakes.txt40
-rw-r--r--src/gallium/drivers/zink/ci/zink-lvp-skips.txt (renamed from src/gallium/drivers/zink/ci/piglit-zink-lvp-skips.txt)26
-rw-r--r--src/gallium/drivers/zink/ci/zink-lvp-validation-settings.txt62
-rw-r--r--src/gallium/drivers/zink/ci/zink-nv-fails.txt2
-rw-r--r--src/gallium/drivers/zink/ci/zink-nv-flakes.txt2
-rw-r--r--src/gallium/drivers/zink/ci/zink-nv-skips.txt4
-rw-r--r--src/gallium/drivers/zink/ci/zink-nvk-fails.txt902
-rw-r--r--src/gallium/drivers/zink/ci/zink-nvk-flakes.txt10
-rw-r--r--src/gallium/drivers/zink/ci/zink-nvk-skips.txt0
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-navi10-fails.txt183
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-navi10-flakes.txt30
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-navi10-skips.txt64
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt217
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-navi31-flakes.txt44
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-navi31-skips.txt34
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-polaris10-fails.txt610
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-polaris10-flakes.txt33
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-polaris10-skips.txt49
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt188
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-vangogh-flakes.txt41
-rw-r--r--src/gallium/drivers/zink/ci/zink-radv-vangogh-skips.txt61
-rw-r--r--src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt20
-rw-r--r--src/gallium/drivers/zink/ci/zink-tu-a630-flakes.txt16
-rw-r--r--src/gallium/drivers/zink/ci/zink-tu-a630-skips.txt2
-rw-r--r--src/gallium/drivers/zink/ci/zink-tu-a750-fails.txt524
-rw-r--r--src/gallium/drivers/zink/ci/zink-tu-a750-flakes.txt4
-rw-r--r--src/gallium/drivers/zink/ci/zink-tu-a750-skips.txt23
-rw-r--r--src/gallium/drivers/zink/ci/zink-venus-lvp-fails.txt164
-rw-r--r--src/gallium/drivers/zink/ci/zink-venus-lvp-flakes.txt40
-rw-r--r--src/gallium/drivers/zink/ci/zink-venus-lvp-skips.txt47
-rw-r--r--src/gallium/drivers/zink/driinfo_zink.h5
-rw-r--r--src/gallium/drivers/zink/meson.build51
-rw-r--r--src/gallium/drivers/zink/nir_lower_dynamic_bo_access.c151
-rw-r--r--src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c3502
-rw-r--r--src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h5
-rw-r--r--src/gallium/drivers/zink/nir_to_spirv/spirv_builder.c455
-rw-r--r--src/gallium/drivers/zink/nir_to_spirv/spirv_builder.h87
-rw-r--r--src/gallium/drivers/zink/nir_to_spirv/zink_nir_algebraic.py2
-rw-r--r--src/gallium/drivers/zink/zink_batch.c1215
-rw-r--r--src/gallium/drivers/zink/zink_batch.h119
-rw-r--r--src/gallium/drivers/zink/zink_blit.c437
-rw-r--r--src/gallium/drivers/zink/zink_bo.c682
-rw-r--r--src/gallium/drivers/zink/zink_bo.h199
-rw-r--r--src/gallium/drivers/zink/zink_clear.c555
-rw-r--r--src/gallium/drivers/zink/zink_clear.h44
-rw-r--r--src/gallium/drivers/zink/zink_compiler.c6482
-rw-r--r--src/gallium/drivers/zink/zink_compiler.h107
-rw-r--r--src/gallium/drivers/zink/zink_context.c5617
-rw-r--r--src/gallium/drivers/zink/zink_context.h457
-rw-r--r--src/gallium/drivers/zink/zink_descriptors.c2836
-rw-r--r--src/gallium/drivers/zink/zink_descriptors.h251
-rw-r--r--src/gallium/drivers/zink/zink_descriptors_lazy.c689
-rw-r--r--src/gallium/drivers/zink/zink_device_info.py454
-rw-r--r--src/gallium/drivers/zink/zink_draw.cpp1387
-rw-r--r--src/gallium/drivers/zink/zink_extensions.py128
-rw-r--r--src/gallium/drivers/zink/zink_fence.c201
-rw-r--r--src/gallium/drivers/zink/zink_fence.h42
-rw-r--r--src/gallium/drivers/zink/zink_format.c334
-rw-r--r--src/gallium/drivers/zink/zink_format.h32
-rw-r--r--src/gallium/drivers/zink/zink_format_test.c7
-rw-r--r--src/gallium/drivers/zink/zink_framebuffer.c207
-rw-r--r--src/gallium/drivers/zink/zink_framebuffer.h47
-rw-r--r--src/gallium/drivers/zink/zink_inlines.h2
-rw-r--r--src/gallium/drivers/zink/zink_instance.py112
-rw-r--r--src/gallium/drivers/zink/zink_kopper.c1162
-rw-r--r--src/gallium/drivers/zink/zink_kopper.h186
-rw-r--r--src/gallium/drivers/zink/zink_lower_cubemap_to_array.c533
-rw-r--r--src/gallium/drivers/zink/zink_pipeline.c836
-rw-r--r--src/gallium/drivers/zink/zink_pipeline.h106
-rw-r--r--src/gallium/drivers/zink/zink_program.c2582
-rw-r--r--src/gallium/drivers/zink/zink_program.h408
-rw-r--r--src/gallium/drivers/zink/zink_program_state.hpp423
-rw-r--r--src/gallium/drivers/zink/zink_public.h2
-rw-r--r--src/gallium/drivers/zink/zink_query.c1211
-rw-r--r--src/gallium/drivers/zink/zink_query.h33
-rw-r--r--src/gallium/drivers/zink/zink_render_pass.c799
-rw-r--r--src/gallium/drivers/zink/zink_render_pass.h71
-rw-r--r--src/gallium/drivers/zink/zink_resource.c2612
-rw-r--r--src/gallium/drivers/zink/zink_resource.h208
-rw-r--r--src/gallium/drivers/zink/zink_screen.c2863
-rw-r--r--src/gallium/drivers/zink/zink_screen.h252
-rw-r--r--src/gallium/drivers/zink/zink_shader_keys.h125
-rw-r--r--src/gallium/drivers/zink/zink_state.c399
-rw-r--r--src/gallium/drivers/zink/zink_state.h114
-rw-r--r--src/gallium/drivers/zink/zink_surface.c416
-rw-r--r--src/gallium/drivers/zink/zink_surface.h65
-rw-r--r--src/gallium/drivers/zink/zink_synchronization.cpp794
-rw-r--r--src/gallium/drivers/zink/zink_types.h2068
116 files changed, 40080 insertions, 12787 deletions
diff --git a/src/gallium/drivers/zink/VP_ZINK_requirements.json b/src/gallium/drivers/zink/VP_ZINK_requirements.json
new file mode 100644
index 00000000000..6f6860048e2
--- /dev/null
+++ b/src/gallium/drivers/zink/VP_ZINK_requirements.json
@@ -0,0 +1,947 @@
+{
+ "$schema": "https://schema.khronos.org/vulkan/profiles-0.8.2-271.json",
+ "capabilities": {
+ "vulkan10requirements": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "robustBufferAccess": true
+ }
+ }
+ },
+ "gl21_baseline": {
+ "extensions": {
+ "VK_KHR_maintenance1": 1,
+ "VK_KHR_create_renderpass2": 1,
+ "VK_KHR_imageless_framebuffer": 1,
+ "VK_KHR_timeline_semaphore": 1,
+ "VK_EXT_custom_border_color": 1,
+ "VK_EXT_line_rasterization": 1,
+ "VK_KHR_swapchain_mutable_format": 1,
+ "VK_KHR_incremental_present": 1,
+ "VK_EXT_border_color_swizzle": 1,
+ "VK_KHR_descriptor_update_template": 1
+ },
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "logicOp": true,
+ "fillModeNonSolid": true,
+ "alphaToOne": true,
+ "shaderClipDistance": true
+ },
+ "VkPhysicalDeviceCustomBorderColorFeaturesEXT": {
+ "customBorderColorWithoutFormat": true
+ },
+ "VkPhysicalDeviceBorderColorSwizzleFeaturesEXT": {
+ "borderColorSwizzleFromImage": true
+ },
+ "VkPhysicalDeviceLineRasterizationFeaturesEXT": {
+ "rectangularLines": true
+ }
+ }
+ },
+ "gl21_baseline_vk10": {
+ "extensions": {
+ "VK_EXT_scalar_block_layout": 1
+ },
+ "features": {
+ "VkPhysicalDeviceScalarBlockLayoutFeatures": {
+ "scalarBlockLayout": true
+ },
+ "VkPhysicalDeviceTimelineSemaphoreFeatures": {
+ "timelineSemaphore": true
+ },
+ "VkPhysicalDeviceImagelessFramebufferFeatures": {
+ "imagelessFramebuffer": true
+ }
+ }
+ },
+ "gl21_baseline_vk12": {
+ "features": {
+ "VkPhysicalDeviceVulkan12Features": {
+ "scalarBlockLayout": true,
+ "drawIndirectCount": true,
+ "imagelessFramebuffer": true,
+ "timelineSemaphore": true
+ }
+ }
+ },
+ "gl21_baseline_line_bresenham": {
+ "features": {
+ "VkPhysicalDeviceLineRasterizationFeaturesEXT": {
+ "bresenhamLines": true
+ }
+ }
+ },
+ "gl21_baseline_line_non_strict": {
+ "properties": {
+ "VkPhysicalDeviceProperties": {
+ "limits": {
+ "strictLines": false
+ }
+ }
+ }
+ },
+ "gl21_optional": {
+ "extensions": {
+ "VK_KHR_external_memory": 1
+ }
+ },
+ "gl30_baseline": {
+ "extensions": {
+ "VK_EXT_transform_feedback": 1,
+ "VK_EXT_conditional_rendering": 1
+ },
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "independentBlend": true
+ },
+ "VkPhysicalDeviceTransformFeedbackFeaturesEXT": {
+ "transformFeedback": true
+ },
+ "VkPhysicalDeviceConditionalRenderingFeaturesEXT": {
+ "conditionalRendering": true
+ }
+ },
+ "formats": {
+ "VK_FORMAT_D32_SFLOAT_S8_UINT": {
+ "VkFormatProperties": {
+ "optimalTilingFeatures": [
+ "VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT"
+ ]
+ }
+ }
+ }
+ },
+ "gl31_baseline": {
+ "properties": {
+ "VkPhysicalDeviceProperties": {
+ "limits": {
+ "maxPerStageDescriptorSamplers": 16
+ }
+ }
+ }
+ },
+ "gl32_baseline": {
+ "extensions": {
+ "VK_EXT_depth_clip_enable": 1
+ },
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "depthClamp": true,
+ "geometryShader": true,
+ "shaderTessellationAndGeometryPointSize": true
+ },
+ "VkPhysicalDeviceDepthClipEnableFeaturesEXT": {
+ "depthClipEnable": true
+ }
+ }
+ },
+ "gl33_baseline": {
+ "extensions": {
+ "VK_EXT_vertex_attribute_divisor": 1
+ },
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "dualSrcBlend": true
+ },
+ "VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT": {
+ "vertexAttributeInstanceRateDivisor": true
+ }
+ }
+ },
+ "gl40_baseline": {
+ "extensions": {
+ "VK_KHR_maintenance2": 1,
+ "VK_KHR_maintenance3": 1,
+ "VK_KHR_maintenance4": 1,
+ "VK_KHR_maintenance5": 1
+ },
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "sampleRateShading": true,
+ "tessellationShader": true,
+ "imageCubeArray": true
+ },
+ "VkPhysicalDeviceMaintenance4Features": {
+ "maintenance4": true
+ },
+ "VkPhysicalDeviceMaintenance5FeaturesKHR": {
+ "maintenance5": true
+ }
+ },
+ "formats": {
+ "VK_FORMAT_R32G32B32_SFLOAT": {
+ "VkFormatProperties": {
+ "bufferFeatures": [
+ "VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT"
+ ]
+ }
+ },
+ "VK_FORMAT_R32G32B32_SINT": {
+ "VkFormatProperties": {
+ "bufferFeatures": [
+ "VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT"
+ ]
+ }
+ },
+ "VK_FORMAT_R32G32B32_UINT": {
+ "VkFormatProperties": {
+ "bufferFeatures": [
+ "VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT"
+ ]
+ }
+ }
+ }
+ },
+ "gl41_baseline": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "multiViewport": true
+ }
+ },
+ "properties": {
+ "VkPhysicalDeviceProperties": {
+ "limits": {
+ "maxImageDimension1D": 16384,
+ "maxImageDimension2D": 16384,
+ "maxImageDimension3D": 2048,
+ "maxImageDimensionCube": 16384,
+ "maxImageArrayLayers": 2048,
+ "maxViewports": 16
+ }
+ }
+ }
+ },
+ "gl42_baseline": {
+ "extensions": {
+ "VK_EXT_image_2d_view_of_3d": 1
+ },
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "shaderStorageImageExtendedFormats": true,
+ "shaderStorageImageWriteWithoutFormat": true,
+ "vertexPipelineStoresAndAtomics": true,
+ "fragmentStoresAndAtomics": true
+ },
+ "VkPhysicalDeviceImage2DViewOf3DFeaturesEXT": {
+ "image2DViewOf3D": true
+ }
+ }
+ },
+ "gl42_baseline_vk10": {
+ "extensions": {
+ "VK_KHR_shader_draw_parameters": 1
+ },
+ "features": {
+ "VkPhysicalDeviceShaderDrawParametersFeatures": {
+ "shaderDrawParameters": true
+ }
+ }
+ },
+ "gl42_baseline_vk12": {
+ "features": {
+ "VkPhysicalDeviceVulkan11Features": {
+ "shaderDrawParameters": true
+ }
+ }
+ },
+ "gl43_baseline_rb2": {
+ "extensions": {
+ "VK_EXT_robustness2": 1
+ },
+ "features": {
+ "VkPhysicalDeviceRobustness2FeaturesEXT": {
+ "robustImageAccess2": true
+ }
+ }
+ },
+ "gl43_baseline_rb_image_vk13": {
+ "features": {
+ "VkPhysicalDeviceVulkan13Features": {
+ "robustImageAccess": true
+ }
+ }
+ },
+ "gl43_baseline_rb_image_ext": {
+ "extensions": {
+ "VK_EXT_image_robustness": 1
+ },
+ "features": {
+ "VkPhysicalDeviceImageRobustnessFeatures": {
+ "robustImageAccess": true
+ }
+ }
+ },
+ "gl43_baseline": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "robustBufferAccess": true,
+ "multiDrawIndirect": true
+ }
+ },
+ "formats": {
+ "VK_FORMAT_R8G8B8A8_UNORM": {
+ "VkFormatProperties": {
+ "linearTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ],
+ "optimalTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ]
+ }
+ },
+ "VK_FORMAT_R8G8B8A8_SRGB": {
+ "VkFormatProperties": {
+ "linearTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ],
+ "optimalTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ]
+ }
+ },
+ "VK_FORMAT_R16_UNORM": {
+ "VkFormatProperties": {
+ "linearTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ],
+ "optimalTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ]
+ }
+ },
+ "VK_FORMAT_R16G16_UNORM": {
+ "VkFormatProperties": {
+ "linearTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ],
+ "optimalTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ]
+ }
+ },
+ "VK_FORMAT_R16_SNORM": {
+ "VkFormatProperties": {
+ "linearTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ],
+ "optimalTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ]
+ }
+ },
+ "VK_FORMAT_R16G16_SNORM": {
+ "VkFormatProperties": {
+ "linearTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ],
+ "optimalTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ]
+ }
+ },
+ "VK_FORMAT_D32_SFLOAT_S8_UINT": {
+ "VkFormatProperties": {
+ "optimalTilingFeatures": [
+ "VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT"
+ ]
+ }
+ }
+ }
+ },
+ "gl44_baseline": {
+ "formats": {
+ "VK_FORMAT_B10G11R11_UFLOAT_PACK32": {
+ "VkFormatProperties": {
+ "bufferFeatures": [
+ "VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT"
+ ]
+ }
+ }
+ }
+ },
+ "gl44_baseline_ext": {
+ "extensions": {
+ "VK_KHR_sampler_mirror_clamp_to_edge": 1
+ }
+ },
+ "gl44_baseline_vk12": {
+ "features": {
+ "VkPhysicalDeviceVulkan12Features": {
+ "samplerMirrorClampToEdge": true
+ }
+ }
+ },
+ "gl45_baseline": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "shaderCullDistance": true
+ }
+ }
+ },
+ "gl46_baseline": {
+ "extensions": {
+ "VK_KHR_draw_indirect_count": 1
+ },
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "samplerAnisotropy": true,
+ "depthBiasClamp": true
+ }
+ }
+ },
+ "gl46_optimal": {
+ "extensions": {
+ "VK_EXT_extended_dynamic_state": 1,
+ "VK_EXT_extended_dynamic_state2": 1,
+ "VK_EXT_extended_dynamic_state3": 1,
+ "VK_EXT_graphics_pipeline_library": 1,
+ "VK_EXT_non_seamless_cube_map": 1,
+ "VK_KHR_pipeline_library": 1,
+ "VK_EXT_attachment_feedback_loop_layout": 1,
+ "VK_EXT_attachment_feedback_loop_dynamic_state": 1
+ },
+ "features": {
+ "VkPhysicalDeviceExtendedDynamicStateFeaturesEXT": {
+ "extendedDynamicState": true
+ },
+ "VkPhysicalDeviceExtendedDynamicState2FeaturesEXT": {
+ "extendedDynamicState2": true,
+ "extendedDynamicState2LogicOp": true,
+ "extendedDynamicState2PatchControlPoints": true
+ },
+ "VkPhysicalDeviceExtendedDynamicState3FeaturesEXT": {
+ "extendedDynamicState3PolygonMode": true,
+ "extendedDynamicState3DepthClampEnable": true,
+ "extendedDynamicState3DepthClipEnable": true,
+ "extendedDynamicState3ProvokingVertexMode": true,
+ "extendedDynamicState3LineRasterizationMode": true,
+ "extendedDynamicState3DepthClipNegativeOneToOne": true
+ },
+ "VkPhysicalDeviceFeatures": {
+ "textureCompressionBC": true
+ },
+ "VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT": {
+ "graphicsPipelineLibrary": true
+ },
+ "VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT": {
+ "nonSeamlessCubeMap": true
+ },
+ "VkPhysicalDeviceProvokingVertexFeaturesEXT": {
+ "provokingVertexLast": true
+ },
+ "VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT": {
+ "attachmentFeedbackLoopLayout": true
+ },
+ "VkPhysicalDeviceAttachmentFeedbackLoopDynamicStateFeaturesEXT": {
+ "attachmentFeedbackLoopDynamicState": true
+ }
+ },
+ "properties": {
+ "VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT": {
+
+ }
+ }
+ },
+ "gl46_optimal_ext": {
+ "extensions": {
+ "VK_EXT_provoking_vertex": 1,
+ "VK_KHR_dynamic_rendering": 1,
+ "VK_EXT_dynamic_rendering_unused_attachments": 1
+ },
+ "features": {
+ "VkPhysicalDeviceDynamicRenderingFeatures": {
+ "dynamicRendering": true
+ },
+ "VkPhysicalDeviceDynamicRenderingUnusedAttachmentsFeaturesEXT": {
+ "dynamicRenderingUnusedAttachments": true
+ }
+ }
+ },
+ "gl46_optimal_vk13": {
+ "features": {
+ "VkPhysicalDeviceVulkan13Features": {
+ "dynamicRendering": true
+ }
+ }
+ },
+ "gl46_optional": {
+ "extensions": {
+ "VK_EXT_primitives_generated_query": 1,
+ "VK_EXT_color_write_enable": 1,
+ "VK_EXT_extended_dynamic_state3": 1,
+ "VK_EXT_descriptor_buffer": 1
+ },
+ "features": {
+ "VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT": {
+ "primitivesGeneratedQueryWithRasterizerDiscard": true
+ },
+ "VkPhysicalDeviceColorWriteEnableFeaturesEXT": {
+ "colorWriteEnable": true
+ },
+ "VkPhysicalDeviceExtendedDynamicState3FeaturesEXT": {
+ "extendedDynamicState3AlphaToOneEnable": true,
+ "extendedDynamicState3SampleMask": true,
+ "extendedDynamicState3AlphaToCoverageEnable": true,
+ "extendedDynamicState3ColorBlendEnable": true,
+ "extendedDynamicState3RasterizationSamples": true,
+ "extendedDynamicState3ColorWriteMask": true,
+ "extendedDynamicState3LogicOpEnable": true
+ },
+ "VkPhysicalDeviceDescriptorBufferFeaturesEXT": {
+ "descriptorBuffer": true
+ }
+ }
+ },
+ "gl46_optional_ext": {
+ "extensions": {
+ "VK_EXT_pipeline_creation_cache_control": 1
+ },
+ "features": {
+ "VkPhysicalDevicePipelineCreationCacheControlFeatures": {
+ "pipelineCreationCacheControl": true
+ }
+ }
+ },
+ "gl46_optional_vk13": {
+ "features": {
+ "VkPhysicalDeviceVulkan13Features": {
+ "pipelineCreationCacheControl": true
+ }
+ }
+ },
+ "GL_ARB_bindless_texture": {
+ "extensions": {
+ "VK_EXT_descriptor_indexing": 1
+ },
+ "properties": {
+ "VkPhysicalDeviceDescriptorIndexingProperties": {
+ "robustBufferAccessUpdateAfterBind": true
+ }
+ }
+ },
+ "GL_ARB_sparse_texture": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "sparseResidencyImage2D": true,
+ "sparseResidencyImage3D": true
+ }
+ }
+ },
+ "GL_ARB_sparse_texture2": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "sparseResidency2Samples": true
+ }
+ }
+ },
+ "GL_ARB_sparse_texture_clamp_2s": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "sparseResidency2Samples": true
+ }
+ }
+ },
+ "GL_ARB_sparse_texture_clamp_4s": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "sparseResidency4Samples": true
+ }
+ }
+ },
+ "GL_ARB_sparse_texture_clamp_8s": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "sparseResidency8Samples": true
+ }
+ }
+ },
+ "GL_ARB_sparse_texture_clamp_16s": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "sparseResidency16Samples": true
+ }
+ }
+ },
+ "GL_ARB_sparse_buffer": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "sparseBinding": true,
+ "sparseResidencyBuffer": true
+ }
+ }
+ },
+ "GL_ARB_shader_viewport_layer_array_ext": {
+ "extensions": {
+ "VK_EXT_shader_viewport_index_layer": 1
+ }
+ },
+ "GL_ARB_shader_viewport_layer_array_vk12": {
+ "features": {
+ "VkPhysicalDeviceVulkan12Features": {
+ "shaderOutputViewportIndex": true,
+ "shaderOutputLayer": true
+ }
+ }
+ },
+ "GL_ARB_fragment_shader_interlock": {
+ "extensions": {
+ "VK_EXT_fragment_shader_interlock": 1
+ }
+ },
+ "GL_ARB_shader_clock": {
+ "extensions": {
+ "VK_KHR_shader_clock": 1
+ }
+ },
+ "GL_ARB_shader_ballot_ext": {
+ "extensions": {
+ "VK_EXT_shader_subgroup_ballot": 1
+ }
+ },
+ "GL_ARB_shader_ballot_vk11": {
+ "properties": {
+ "VkPhysicalDeviceSubgroupProperties": {
+ "subgroupSize": 64,
+ "supportedOperations": [ "VK_SUBGROUP_FEATURE_BALLOT_BIT" ]
+ }
+ }
+ },
+ "GL_ARB_sample_locations": {
+ "extensions": {
+ "VK_EXT_extended_dynamic_state": 1,
+ "VK_EXT_sample_locations": 1
+ },
+ "features": {
+ "VkPhysicalDeviceExtendedDynamicStateFeaturesEXT": {
+ "extendedDynamicState": true
+ }
+ }
+ },
+ "GL_ARB_shader_stencil_export": {
+ "extensions": {
+ "VK_EXT_shader_stencil_export": 1
+ }
+ },
+ "GL_EXT_depth_bounds_test": {
+ "features": {
+ "VkPhysicalDeviceFeatures": {
+ "depthBounds": true
+ }
+ }
+ },
+ "GL_EXT_texture_filter_minmax_ext": {
+ "extensions": {
+ "VK_EXT_sampler_filter_minmax": 1
+ },
+ "properties": {
+ "VkPhysicalDeviceSamplerFilterMinmaxProperties": {
+ "filterMinmaxSingleComponentFormats": true,
+ "filterMinmaxImageComponentMapping": true
+ }
+ }
+ },
+ "GL_EXT_texture_filter_minmax_vk12": {
+ "properties": {
+ "VkPhysicalDeviceVulkan12Properties": {
+ "filterMinmaxSingleComponentFormats": true,
+ "filterMinmaxImageComponentMapping": true
+ }
+ }
+ }
+ },
+ "profiles": {
+ "VP_ZINK_gl21_baseline": {
+ "version": 1,
+ "api-version": "1.2.0",
+ "label": "Zink OpenGL 2.1 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 2.1 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ]
+ ]
+ },
+ "VP_ZINK_gl30_baseline": {
+ "version": 1,
+ "api-version": "1.2.0",
+ "label": "Zink OpenGL 3.0 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 3.0 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline"
+ ]
+ },
+ "VP_ZINK_gl31_baseline": {
+ "version": 1,
+ "api-version": "1.2.0",
+ "label": "Zink OpenGL 3.1 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 3.1 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline",
+ "gl31_baseline"
+ ]
+ },
+ "VP_ZINK_gl32_baseline": {
+ "version": 1,
+ "api-version": "1.2.0",
+ "label": "Zink OpenGL 3.2 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 3.2 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline",
+ "gl31_baseline",
+ "gl32_baseline"
+ ]
+ },
+ "VP_ZINK_gl33_baseline": {
+ "version": 1,
+ "api-version": "1.2.0",
+ "label": "Zink OpenGL 3.3 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 3.3 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline",
+ "gl31_baseline",
+ "gl32_baseline",
+ "gl33_baseline"
+ ]
+ },
+ "VP_ZINK_gl40_baseline": {
+ "version": 1,
+ "api-version": "1.2.0",
+ "label": "Zink OpenGL 4.0 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 4.0 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline",
+ "gl31_baseline",
+ "gl32_baseline",
+ "gl33_baseline",
+ "gl40_baseline"
+ ]
+ },
+ "VP_ZINK_gl41_baseline": {
+ "version": 1,
+ "api-version": "1.2.0",
+ "label": "Zink OpenGL 4.1 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 4.1 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline",
+ "gl31_baseline",
+ "gl32_baseline",
+ "gl33_baseline",
+ "gl40_baseline",
+ "gl41_baseline"
+ ]
+ },
+ "VP_ZINK_gl42_baseline": {
+ "version": 1,
+ "api-version": "1.2.0",
+ "label": "Zink OpenGL 4.2 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 4.2 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline",
+ "gl31_baseline",
+ "gl32_baseline",
+ "gl33_baseline",
+ "gl40_baseline",
+ "gl41_baseline",
+ "gl42_baseline",
+ [ "gl42_baseline_vk10", "gl42_baseline_vk12" ]
+ ]
+ },
+ "VP_ZINK_gl43_baseline": {
+ "version": 1,
+ "api-version": "1.3.0",
+ "label": "Zink OpenGL 4.3 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 4.3 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline",
+ "gl31_baseline",
+ "gl32_baseline",
+ "gl33_baseline",
+ "gl40_baseline",
+ "gl41_baseline",
+ "gl42_baseline",
+ [ "gl42_baseline_vk10", "gl42_baseline_vk12" ],
+ "gl43_baseline",
+ [ "gl43_baseline_rb2", "gl43_baseline_rb_image_vk13", "gl43_baseline_rb_image_ext" ]
+ ]
+ },
+ "VP_ZINK_gl44_baseline": {
+ "version": 1,
+ "api-version": "1.3.0",
+ "label": "Zink OpenGL 4.4 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 4.4 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline",
+ "gl31_baseline",
+ "gl32_baseline",
+ "gl33_baseline",
+ "gl40_baseline",
+ "gl41_baseline",
+ "gl42_baseline",
+ [ "gl42_baseline_vk10", "gl42_baseline_vk12" ],
+ "gl43_baseline",
+ [ "gl43_baseline_rb2", "gl43_baseline_rb_image_vk13", "gl43_baseline_rb_image_ext" ],
+ "gl44_baseline",
+ [ "gl44_baseline_ext", "gl44_baseline_vk12" ]
+ ]
+ },
+ "VP_ZINK_gl45_baseline": {
+ "version": 1,
+ "api-version": "1.3.0",
+ "label": "Zink OpenGL 4.5 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 4.5 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline",
+ "gl31_baseline",
+ "gl32_baseline",
+ "gl33_baseline",
+ "gl40_baseline",
+ "gl41_baseline",
+ "gl42_baseline",
+ [ "gl42_baseline_vk10", "gl42_baseline_vk12" ],
+ "gl43_baseline",
+ [ "gl43_baseline_rb2", "gl43_baseline_rb_image_vk13", "gl43_baseline_rb_image_ext" ],
+ "gl44_baseline",
+ [ "gl44_baseline_ext", "gl44_baseline_vk12" ],
+ "gl45_baseline"
+ ]
+ },
+ "VP_ZINK_gl46_baseline": {
+ "version": 1,
+ "api-version": "1.3.0",
+ "label": "Zink OpenGL 4.6 Baseline profile",
+ "description": "Minimum requirements for Zink OpenGL 4.6 support.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline",
+ "gl31_baseline",
+ "gl32_baseline",
+ "gl33_baseline",
+ "gl40_baseline",
+ "gl41_baseline",
+ "gl42_baseline",
+ [ "gl42_baseline_vk10", "gl42_baseline_vk12" ],
+ "gl43_baseline",
+ [ "gl43_baseline_rb2", "gl43_baseline_rb_image_vk13", "gl43_baseline_rb_image_ext" ],
+ "gl44_baseline",
+ [ "gl44_baseline_ext", "gl44_baseline_vk12" ],
+ "gl45_baseline",
+ "gl46_baseline"
+ ]
+ },
+ "VP_ZINK_gl46_optimal": {
+ "version": 1,
+ "api-version": "1.3.0",
+ "label": "Zink OpenGL 4.6 Optimal profile",
+ "description": "Requirements for Zink OpenGL 4.6 support with best performances.",
+ "capabilities": [
+ "vulkan10requirements",
+ "gl21_baseline",
+ [ "gl21_baseline_vk10", "gl21_baseline_vk12" ],
+ [ "gl21_baseline_line_bresenham", "gl21_baseline_line_non_strict" ],
+ "gl30_baseline",
+ "gl31_baseline",
+ "gl32_baseline",
+ "gl33_baseline",
+ "gl40_baseline",
+ "gl41_baseline",
+ "gl42_baseline",
+ [ "gl42_baseline_vk10", "gl42_baseline_vk12" ],
+ "gl43_baseline",
+ [ "gl43_baseline_rb2", "gl43_baseline_rb_image_vk13", "gl43_baseline_rb_image_ext" ],
+ "gl44_baseline",
+ [ "gl44_baseline_ext", "gl44_baseline_vk12" ],
+ "gl45_baseline",
+ "gl46_baseline",
+ "gl46_optimal",
+ [ "gl46_optimal_ext", "gl46_optimal_vk13" ]
+ ],
+ "optionals": [
+ "gl46_optional",
+ [ "gl46_optional_ext", "gl46_optional_vk13" ],
+ "GL_ARB_bindless_texture",
+ "GL_ARB_sparse_texture",
+ "GL_ARB_sparse_texture2",
+ [ "GL_ARB_sparse_texture_clamp_2s", "GL_ARB_sparse_texture_clamp_4s", "GL_ARB_sparse_texture_clamp_8s", "GL_ARB_sparse_texture_clamp_16s" ],
+ "GL_ARB_sparse_buffer",
+ [ "GL_ARB_shader_viewport_layer_array_ext", "GL_ARB_shader_viewport_layer_array_vk12" ],
+ "GL_ARB_fragment_shader_interlock",
+ "GL_ARB_shader_clock",
+ [ "GL_ARB_shader_ballot_ext", "GL_ARB_shader_ballot_vk11" ],
+ "GL_ARB_sample_locations",
+ "GL_ARB_shader_stencil_export",
+ "GL_EXT_depth_bounds_test",
+ [ "GL_EXT_texture_filter_minmax_ext", "GL_EXT_texture_filter_minmax_vk12" ]
+ ]
+ }
+ },
+ "contributors": {
+ "Mike Blumenkrantz": {
+ "company": "Valve"
+ },
+ "Christophe Riccio": {
+ "company": "LunarG"
+ },
+ "Erik Faye-Lund": {
+ "company": "Collabora"
+ },
+ "Soroush Faghihi": {
+ "company": "Imagination Technologies"
+ },
+ "Connor Abbott": {
+ "company": "Valve"
+ }
+ },
+ "history": [
+ {
+ "revision": 1,
+ "date": "2022-10-18",
+ "author": "Christophe Riccio",
+ "comment": "Initial revision"
+ }
+ ]
+}
diff --git a/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl-full.toml b/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl-full.toml
new file mode 100644
index 00000000000..fae5431fde2
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl-full.toml
@@ -0,0 +1,135 @@
+[[deqp]]
+deqp = "/deqp/modules/gles2/deqp-gles2"
+caselists = ["/deqp/mustpass/gles2-main.txt"]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+timeout = 180.0
+version_check = "GL ES 3.2.*git"
+renderer_check = "zink.*Intel.*"
+
+[[deqp]]
+deqp = "/deqp/modules/gles3/deqp-gles3"
+caselists = ["/deqp/mustpass/gles3-main.txt"]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+timeout = 180.0
+
+[[deqp]]
+deqp = "/deqp/modules/gles31/deqp-gles31"
+caselists = ["/deqp/mustpass/gles31-main.txt"]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+timeout = 180.0
+
+[[deqp]]
+deqp = "/deqp/external/openglcts/modules/glcts"
+caselists = [
+ "/deqp/mustpass/gles2-khr-main.txt",
+ "/deqp/mustpass/gles3-khr-main.txt",
+ "/deqp/mustpass/gles31-khr-main.txt",
+]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+timeout = 180.0
+
+[[deqp]]
+deqp = "/deqp/external/openglcts/modules/glcts"
+caselists = [
+ "/deqp/mustpass/gl46-main.txt",
+ "/deqp/mustpass/gl46-khr-single.txt",
+]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+timeout = 180.0
+
+# 565-nozs
+[[deqp]]
+deqp = "/deqp/modules/gles3/deqp-gles3"
+caselists = ["/deqp/mustpass/gles3-565-no-depth-no-stencil.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgb565d0s0ms0",
+]
+prefix = "565-nozs-"
+
+[[deqp]]
+deqp = "/deqp/modules/gles31/deqp-gles31"
+caselists = ["/deqp/mustpass/gles31-565-no-depth-no-stencil.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgb565d0s0ms0",
+]
+prefix = "565-nozs-"
+
+# multisample
+[[deqp]]
+deqp = "/deqp/modules/gles3/deqp-gles3"
+caselists = ["/deqp/mustpass/gles3-multisample.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms4",
+]
+prefix = "multisample-"
+
+[[deqp]]
+deqp = "/deqp/modules/gles31/deqp-gles31"
+caselists = ["/deqp/mustpass/gles31-multisample.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms4",
+]
+prefix = "multisample-"
+
+[[deqp]]
+deqp = "/deqp/modules/egl/deqp-egl-x11"
+caselists = ["/deqp/mustpass/egl-main.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+]
+
+[[deqp]]
+deqp = "/deqp/modules/egl/deqp-egl-wayland"
+caselists = ["/deqp/mustpass/egl-main.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+]
+prefix = "wayland-"
+
+[[piglit]]
+piglit_folder = "/piglit"
+profile = "gpu"
+process_isolation = true
+timeout = 180.0
diff --git a/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl.toml b/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl.toml
new file mode 100644
index 00000000000..0554b105692
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/deqp-zink-anv-tgl.toml
@@ -0,0 +1,103 @@
+#[[deqp]]
+#deqp = "/deqp/modules/gles2/deqp-gles2"
+#caselists = ["/deqp/mustpass/gles2-main.txt"]
+#deqp_args = [
+# "--deqp-surface-width=256",
+# "--deqp-surface-height=256",
+# "--deqp-surface-type=pbuffer",
+# "--deqp-gl-config-name=rgba8888d24s8ms0",
+# "--deqp-visibility=hidden"
+#]
+#timeout = 180.0
+#version_check = "GL ES 3.2.*git"
+#renderer_check = "zink.*Intel.*"
+#
+#[[deqp]]
+#deqp = "/deqp/modules/gles3/deqp-gles3"
+#caselists = ["/deqp/mustpass/gles3-main.txt"]
+#deqp_args = [
+# "--deqp-surface-width=256",
+# "--deqp-surface-height=256",
+# "--deqp-surface-type=pbuffer",
+# "--deqp-gl-config-name=rgba8888d24s8ms0",
+# "--deqp-visibility=hidden"
+#]
+#timeout = 180.0
+#
+#[[deqp]]
+#deqp = "/deqp/modules/gles31/deqp-gles31"
+#caselists = ["/deqp/mustpass/gles31-main.txt"]
+#deqp_args = [
+# "--deqp-surface-width=256",
+# "--deqp-surface-height=256",
+# "--deqp-surface-type=pbuffer",
+# "--deqp-gl-config-name=rgba8888d24s8ms0",
+# "--deqp-visibility=hidden"
+#]
+#timeout = 180.0
+
+[[deqp]]
+deqp = "/deqp/external/openglcts/modules/glcts"
+caselists = [
+ "/deqp/mustpass/gl46-main.txt",
+ "/deqp/mustpass/gl46-khr-single.txt",
+]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+skips = ["/install/zink-anv-tgl-premerge-skips.txt"]
+timeout = 180.0
+[deqp.env]
+ ZINK_DEBUG = "validation"
+
+# Regression testing for graphics pipelines where fragment shaders
+# don't know about multisampling etc... at compile time
+[[deqp]]
+deqp = "/deqp/external/openglcts/modules/glcts"
+caselists = [
+ "/deqp/mustpass/gl46-main.txt",
+
+]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+skips = ["/install/zink-anv-tgl-premerge-skips.txt"]
+timeout = 180.0
+include = ["KHR-GL46.sample_variables.mask.rgba8.*.samples.*.mask.*"]
+prefix = "noopt-"
+[deqp.env]
+ ZINK_DEBUG = "nobgc,noopt,validation"
+
+# Regression testing for graphics pipelines where fragment shaders
+# don't know about multisampling etc... at compile time
+[[deqp]]
+deqp = "/deqp/modules/gles31/deqp-gles31"
+caselists = ["/deqp/mustpass/gles31-main.txt"]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+timeout = 180.0
+include = ["dEQP-GLES31.functional.shaders.multisample_interpolation.interpolate_at_sample.*"]
+prefix = "noopt-"
+[deqp.env]
+ ZINK_DEBUG = "nobgc,noopt,validation"
+
+[[piglit]]
+piglit_folder = "/piglit"
+profile = "quick_gl"
+process_isolation = true
+skips = ["/install/zink-anv-tgl-premerge-skips.txt"]
+timeout = 180.0
+fraction = 2
diff --git a/src/gallium/drivers/zink/ci/deqp-zink-freedreno-a618.toml b/src/gallium/drivers/zink/ci/deqp-zink-freedreno-a618.toml
new file mode 100644
index 00000000000..742c0155360
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/deqp-zink-freedreno-a618.toml
@@ -0,0 +1,87 @@
+# Basic test set
+[[deqp]]
+deqp = "/deqp/modules/gles2/deqp-gles2"
+caselists = ["/deqp/mustpass/gles2-main.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+]
+version_check = "GL ES 3.2.*git"
+renderer_check = "zink.*Adreno.*618"
+
+[[deqp]]
+deqp = "/deqp/modules/gles3/deqp-gles3"
+caselists = ["/deqp/mustpass/gles3-main.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+]
+
+[[deqp]]
+deqp = "/deqp/modules/gles31/deqp-gles31"
+caselists = ["/deqp/mustpass/gles31-main.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+]
+
+[[deqp]]
+deqp = "/deqp/external/openglcts/modules/glcts"
+caselists = [
+ "/deqp/mustpass/gles2-khr-main.txt",
+ "/deqp/mustpass/gles3-khr-main.txt",
+ "/deqp/mustpass/gles31-khr-main.txt",
+]
+# We want to test desktop GL eventually, but fp64 is slow and we've got enough work
+# to do just getting GLES sorted out.
+# "/deqp/mustpass/gl46-main.txt",
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+]
+
+# 565-nozs
+[[deqp]]
+deqp = "/deqp/modules/gles3/deqp-gles3"
+caselists = ["/deqp/mustpass/gles3-565-no-depth-no-stencil.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgb565d0s0ms0",
+]
+prefix = "565-nozs-"
+
+[[deqp]]
+deqp = "/deqp/modules/gles31/deqp-gles31"
+caselists = ["/deqp/mustpass/gles31-565-no-depth-no-stencil.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgb565d0s0ms0",
+]
+prefix = "565-nozs-"
+
+# multisample
+[[deqp]]
+deqp = "/deqp/modules/gles3/deqp-gles3"
+caselists = ["/deqp/mustpass/gles3-multisample.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms4",
+]
+prefix = "multisample-"
+
+[[deqp]]
+deqp = "/deqp/modules/gles31/deqp-gles31"
+caselists = ["/deqp/mustpass/gles31-multisample.txt"]
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms4",
+]
+prefix = "multisample-"
diff --git a/src/gallium/drivers/zink/ci/deqp-zink-lvp-fails.txt b/src/gallium/drivers/zink/ci/deqp-zink-lvp-fails.txt
deleted file mode 100644
index 60c8b845e2e..00000000000
--- a/src/gallium/drivers/zink/ci/deqp-zink-lvp-fails.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-dEQP-GLES2.functional.clipping.point.wide_point_clip,Fail
-dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_center,Fail
-dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_corner,Fail
-dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_center,Fail
-dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_corner,Fail
-dEQP-GLES3.functional.clipping.point.wide_point_clip,Fail
-dEQP-GLES3.functional.clipping.point.wide_point_clip_viewport_center,Fail
-dEQP-GLES3.functional.clipping.point.wide_point_clip_viewport_corner,Fail
-dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag,Fail
-dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_dst_x,Fail
-dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_dst_x,Fail
-dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_dst_y,Fail
-dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_x,Fail
-dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min,Fail
-dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_dst_x,Fail
-dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_x,Fail
-dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_y,Fail
-dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_x,Fail
-dEQP-GLES3.functional.multisample.fbo_4_samples.proportionality_sample_coverage,Fail
-dEQP-GLES3.functional.multisample.fbo_4_samples.sample_coverage_invert,Fail
-dEQP-GLES3.functional.multisample.fbo_max_samples.proportionality_sample_coverage,Fail
-dEQP-GLES3.functional.multisample.fbo_max_samples.sample_coverage_invert,Fail
-KHR-GL32.transform_feedback.capture_geometry_separate_test,Fail
-KHR-GL32.transform_feedback.capture_vertex_interleaved_test,Fail
-KHR-GL32.transform_feedback.capture_vertex_separate_test,Fail
-KHR-GL32.transform_feedback.discard_vertex_test,Fail
-KHR-GL32.transform_feedback.draw_xfb_instanced_test,Crash
-KHR-GL32.transform_feedback.draw_xfb_stream_instanced_test,Crash
-KHR-GL32.transform_feedback.query_geometry_separate_test,Fail
-KHR-GL32.transform_feedback.query_vertex_interleaved_test,Fail
-KHR-GL32.transform_feedback.query_vertex_separate_test,Fail
-dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail
-dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.r16i,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.r16ui,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.r32i,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.r32ui,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.r8ui,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.rg16i,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.rg16ui,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.rg32i,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.rg32ui,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.rg8ui,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.rgb10_a2ui,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.rgba16i,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.rgba16ui,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.rgba32i,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.rgba32ui,Fail
-KHR-GL32.packed_pixels.pbo_rectangle.rgba8ui,Fail
diff --git a/src/gallium/drivers/zink/ci/deqp-zink-lvp-flakes.txt b/src/gallium/drivers/zink/ci/deqp-zink-lvp-flakes.txt
deleted file mode 100644
index 693fee240cd..00000000000
--- a/src/gallium/drivers/zink/ci/deqp-zink-lvp-flakes.txt
+++ /dev/null
@@ -1 +0,0 @@
-dEQP-GLES2.functional.texture.filtering.cube.nearest_linear_mirror_l8_pot
diff --git a/src/gallium/drivers/zink/ci/deqp-zink-lvp-skips.txt b/src/gallium/drivers/zink/ci/deqp-zink-lvp-skips.txt
deleted file mode 100644
index 39aa35934b6..00000000000
--- a/src/gallium/drivers/zink/ci/deqp-zink-lvp-skips.txt
+++ /dev/null
@@ -1 +0,0 @@
-KHR-GL32.texture_size_promotion.functional
diff --git a/src/gallium/drivers/zink/ci/deqp-zink-lvp.toml b/src/gallium/drivers/zink/ci/deqp-zink-lvp.toml
index 8c902ef0738..549b9f026a4 100644
--- a/src/gallium/drivers/zink/ci/deqp-zink-lvp.toml
+++ b/src/gallium/drivers/zink/ci/deqp-zink-lvp.toml
@@ -1,6 +1,6 @@
[[deqp]]
deqp = "/deqp/modules/gles2/deqp-gles2"
-caselists = ["/deqp/mustpass/gles2-master.txt"]
+caselists = ["/deqp/mustpass/gles2-main.txt"]
deqp_args = [
"--deqp-surface-width=256",
"--deqp-surface-height=256",
@@ -9,10 +9,24 @@ deqp_args = [
"--deqp-visibility=hidden"
]
timeout = 180.0
+version_check = "GL ES 3.2.*git"
+renderer_check = "zink.*llvmpipe"
[[deqp]]
deqp = "/deqp/modules/gles3/deqp-gles3"
-caselists = ["/deqp/mustpass/gles3-master.txt"]
+caselists = ["/deqp/mustpass/gles3-main.txt"]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+timeout = 180.0
+
+[[deqp]]
+deqp = "/deqp/modules/gles31/deqp-gles31"
+caselists = ["/deqp/mustpass/gles31-main.txt"]
deqp_args = [
"--deqp-surface-width=256",
"--deqp-surface-height=256",
@@ -25,7 +39,8 @@ timeout = 180.0
[[deqp]]
deqp = "/deqp/external/openglcts/modules/glcts"
caselists = [
- "/deqp/mustpass/gl32-master.txt",
+ "/deqp/mustpass/gl46-main.txt",
+ "/deqp/mustpass/gl46-khr-single.txt",
]
deqp_args = [
"--deqp-surface-width=256",
@@ -35,3 +50,16 @@ deqp_args = [
"--deqp-visibility=hidden"
]
timeout = 180.0
+
+[[piglit]]
+piglit_folder = "/piglit"
+profile = "gpu"
+process_isolation = true
+timeout = 180.0
+ [piglit.env]
+ # Disable validation on piglit. We end up with use-after-frees from
+ # piglit_report() -> exit() having freed validation layer state, with a
+ # st_glFlush() -> tc_batch_execute() -> zink_set_vertex_buffers ->
+ # vulkan_layer_chassis::CmdPipelineBarrier2() (etc.) happening after that
+ # somehow.
+ ZINK_DEBUG = ""
diff --git a/src/gallium/drivers/zink/ci/deqp-zink-radv.toml b/src/gallium/drivers/zink/ci/deqp-zink-radv.toml
new file mode 100644
index 00000000000..b05b6c4f599
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/deqp-zink-radv.toml
@@ -0,0 +1,41 @@
+[[deqp]]
+deqp = "/deqp/external/openglcts/modules/glcts"
+caselists = [
+ "/deqp/mustpass/gles2-main.txt",
+ "/deqp/mustpass/gles3-main.txt",
+ "/deqp/mustpass/gles31-main.txt",
+]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+timeout = 180.0
+renderer_check = "zink.*RADV"
+
+[[deqp]]
+deqp = "/deqp/external/openglcts/modules/glcts"
+caselists = [
+ "/deqp/mustpass/gl46-main.txt",
+]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+timeout = 180.0
+renderer_check = "zink.*RADV"
+
+[[piglit]]
+piglit_folder = "/piglit"
+profile = "gpu"
+process_isolation = true
+timeout = 180.0
+ [piglit.env]
+ PIGLIT_NO_WINDOW = "1"
+ PIGLIT_PLATFORM = "gbm"
+ WAFFLE_PLATFORM = "gbm"
diff --git a/src/gallium/drivers/zink/ci/deqp-zink-venus-lvp.toml b/src/gallium/drivers/zink/ci/deqp-zink-venus-lvp.toml
new file mode 100644
index 00000000000..6b6a029b5bd
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/deqp-zink-venus-lvp.toml
@@ -0,0 +1,15 @@
+[[deqp]]
+deqp = "/deqp/external/openglcts/modules/glcts"
+caselists = [
+ "/deqp/mustpass/gl46-main.txt",
+ "/deqp/mustpass/gl46-khr-single.txt",
+]
+deqp_args = [
+ "--deqp-surface-width=256",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-visibility=hidden"
+]
+timeout = 180.0
+renderer_check = "zink.*llvmpipe"
diff --git a/src/gallium/drivers/zink/ci/gitlab-ci-inc.yml b/src/gallium/drivers/zink/ci/gitlab-ci-inc.yml
new file mode 100644
index 00000000000..dd4fc5166ca
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/gitlab-ci-inc.yml
@@ -0,0 +1,172 @@
+.zink-common-rules:
+ rules:
+ - !reference [.test, rules]
+ - !reference [.gl-rules, rules]
+ - changes: &zink_files_list
+ - src/gallium/drivers/zink/*
+ - src/gallium/drivers/zink/nir_to_spirv/*
+ - src/gallium/drivers/zink/ci/gitlab-ci.yml
+ - src/gallium/drivers/zink/ci/gitlab-ci-inc.yml
+ - src/gallium/drivers/zink/ci/deqp-$DEQP_SUITE.toml
+ - src/gallium/drivers/zink/ci/$GPU_VERSION-fails.txt
+ - src/gallium/drivers/zink/ci/$GPU_VERSION-flakes.txt
+ - src/gallium/drivers/zink/ci/$GPU_VERSION-skips.txt
+ - src/gallium/drivers/zink/ci/$GPU_VERSION-validation-settings.txt
+ - src/gallium/drivers/zink/ci/$PIGLIT_TRACES_FILE
+ when: on_success
+
+.zink-common-manual-rules:
+ retry: !reference [.scheduled_pipeline-rules, retry]
+ rules:
+ - !reference [.test, rules]
+ - !reference [.gl-manual-rules, rules]
+ - changes:
+ *zink_files_list
+ when: manual
+
+.zink-lvp-rules:
+ stage: layered-backends
+ rules:
+ - !reference [.test, rules]
+ - !reference [.lavapipe-rules, rules]
+ - !reference [.zink-common-rules, rules]
+
+.zink-lvp-venus-rules:
+ stage: layered-backends
+ rules:
+ - !reference [.test, rules]
+ - !reference [.lavapipe-rules, rules]
+ - !reference [.venus-rules, rules]
+ - !reference [.zink-common-rules, rules]
+
+.zink-anv-rules:
+ stage: layered-backends
+ rules:
+ - !reference [.test, rules]
+ - !reference [.anv-rules, rules]
+ - !reference [.zink-common-rules, rules]
+
+.zink-anv-manual-rules:
+ stage: layered-backends
+ retry: !reference [.scheduled_pipeline-rules, retry]
+ rules:
+ - !reference [.test, rules]
+ - !reference [.anv-manual-rules, rules]
+ - !reference [.zink-common-manual-rules, rules]
+
+.zink-anv-rules-restricted:
+ stage: layered-backends
+ rules:
+ - !reference [.test, rules]
+ - !reference [.restricted-rules, rules]
+ - !reference [.anv-rules, rules]
+ - !reference [.zink-common-rules, rules]
+
+.zink-turnip-rules:
+ stage: layered-backends
+ rules:
+ - !reference [.test, rules]
+ - !reference [.collabora-turnip-rules, rules]
+ - !reference [.zink-common-rules, rules]
+ variables:
+ ZINK_DEBUG: optimal_keys
+
+.zink-turnip-manual-rules:
+ stage: layered-backends
+ retry: !reference [.scheduled_pipeline-rules, retry]
+ rules:
+ - !reference [.test, rules]
+ - !reference [.collabora-turnip-manual-rules, rules]
+ - !reference [.zink-common-manual-rules, rules]
+
+.zink-radv-rules:
+ stage: layered-backends
+ rules:
+ - !reference [.test, rules]
+ - !reference [.radv-valve-rules, rules]
+ - !reference [.zink-common-rules, rules]
+
+.zink-radv-manual-rules:
+ stage: layered-backends
+ retry: !reference [.scheduled_pipeline-rules, retry]
+ rules:
+ - !reference [.test, rules]
+ - !reference [.radv-valve-manual-rules, rules]
+ - !reference [.zink-common-manual-rules, rules]
+ - changes:
+ - .gitlab-ci/container/build-piglit.sh
+ when: manual
+
+.zink-test:
+ timeout: 30m
+ variables:
+ MESA_LOADER_DRIVER_OVERRIDE: "zink"
+ FLAKES_CHANNEL: "#zink-ci"
+ MESA_VK_ABORT_ON_DEVICE_LOSS: 0
+
+.zink-trace-test:
+ extends:
+ - .zink-test
+ variables:
+ # The libX11 in the debian we use doesn't XInitThreads() by default (need
+ # 1.8.1 for that), and eglretrace's waffle GLX path doesn't call it either,
+ # which ends up causing trouble with kopper's X usage. Use gbm for our
+ # trace replay, instead.
+ # https://gitlab.freedesktop.org/mesa/mesa/-/issues/6753
+ HWCI_START_XORG: ""
+ WAFFLE_PLATFORM: gbm
+ PIGLIT_PLATFORM: gbm
+
+.zink-lvp-test:
+ extends:
+ - .zink-lvp-rules
+ - .zink-test
+ variables:
+ LIBGL_ALWAYS_SOFTWARE: "true"
+ LVP_POISON_MEMORY: "1"
+ GPU_VERSION: zink-lvp
+ # Fix non-conformant llvmpipe filter defaults
+ GALLIVM_PERF: "no_quad_lod"
+
+.zink-venus-lvp-test:
+ extends:
+ - .zink-lvp-venus-rules
+ - .zink-test
+ variables:
+ LIBGL_ALWAYS_SOFTWARE: "true"
+ LVP_POISON_MEMORY: "1"
+ GPU_VERSION: zink-venus-lvp
+ # Fix non-conformant llvmpipe filter defaults
+ GALLIVM_PERF: "no_quad_lod"
+ VK_DRIVER: virtio
+ GALLIUM_DRIVER: "zink"
+ CROSVM_GALLIUM_DRIVER: "llvmpipe"
+ CROSVM_VK_DRIVER: "lvp"
+ CROSVM_GPU_ARGS: "vulkan=true,gles=false,backend=virglrenderer,egl=true,surfaceless=true,fixed-blob-mapping=false"
+
+.zink-anv-test:
+ extends:
+ - .lava-acer-cp514-2h-1160g7-volteer:x86_64
+ - .anv-test
+ - .zink-anv-rules
+ - .zink-test
+ variables:
+ VK_DRIVER: intel
+ GPU_VERSION: zink-anv-tgl
+
+.radv-zink-test-valve:
+ timeout: 30m
+ extends:
+ - .zink-test
+ - .test-radv
+ - .b2c-x86_64-test-gl
+ variables:
+ DEQP_SUITE: zink-radv
+ HWCI_TEST_SCRIPT: ./install/deqp-runner.sh
+ B2C_KERNEL_URL: https://fs.mupuf.org/linux-6.6-b2c-radv-ci # 6.6
+ B2C_JOB_SUCCESS_REGEX: 'Execution is over, pipeline status: 0'
+ B2C_TIMEOUT_OVERALL_MINUTES: 20
+
+ # Disable reporting, since DUTs don't have internet access
+ FLAKES_CHANNEL: ""
+
diff --git a/src/gallium/drivers/zink/ci/gitlab-ci.yml b/src/gallium/drivers/zink/ci/gitlab-ci.yml
index 9608a048b24..e775e71b758 100644
--- a/src/gallium/drivers/zink/ci/gitlab-ci.yml
+++ b/src/gallium/drivers/zink/ci/gitlab-ci.yml
@@ -1,50 +1,177 @@
-.zink-lvp-test:
- extends:
- - .zink-rules
- variables:
- ZINK_USE_LAVAPIPE: "true"
- LIBGL_ALWAYS_SOFTWARE: "1"
- GPU_VERSION: zink-lvp
- # Fix non-conformant llvmpipe filter defaults
- GALLIVM_PERF: "no_quad_lod"
- LP_NUM_THREADS: 0
+include:
+ - local: 'src/gallium/drivers/zink/ci/gitlab-ci-inc.yml'
-.zink-piglit-quick_gl:
+zink-lvp:
extends:
- .test-gl
+ - .deqp-test
- .zink-lvp-test
variables:
- PIGLIT_PROFILES: quick_gl
+ DEQP_SUITE: zink-lvp
+ DEQP_FRACTION: 8
PIGLIT_NO_WINDOW: 1
- PIGLIT_RUNNER_OPTIONS: "--timeout 180"
+ # Enable validation (except for on piglit, see deqp-zink-lvp.toml), logging
+ # to stdout and aborting on unknown failures.
+ ZINK_DEBUG: validation
+ XVFB_SCRIPT: "VK_DRIVER=lvp install/deqp-runner.sh"
+ script: |
+ xvfb-run --server-args='-noreset' bash -c ". $SCRIPTS_DIR/setup-test-env.sh && ${XVFB_SCRIPT}"
-zink-piglit-timelines:
+zink-venus-lvp:
extends:
- - .zink-piglit-quick_gl
+ - .test-gl
+ - .deqp-test
+ - .zink-venus-lvp-test
+ variables:
+ DEQP_SUITE: zink-venus-lvp
+ DEQP_FRACTION: 8
+ PIGLIT_NO_WINDOW: 1
+ # Enable validation (except for on piglit, see deqp-zink-venus-lvp.toml), logging
+ # to stdout and aborting on unknown failures.
+ ZINK_DEBUG: validation
+ LP_NUM_THREADS: 2
+ CROSVM_MEMORY: 12288
+ CROSVM_CPU: $FDO_CI_CONCURRENT
script:
- - xvfb-run --server-args='-noreset' sh -c "GALLIUM_DRIVER=zink VK_DRIVER=lvp install/piglit/piglit-runner.sh"
+ - xvfb-run --server-args='-noreset' bash -c "./install/crosvm-runner.sh ./install/deqp-runner.sh"
-zink-piglit-no_timelines:
+zink-anv-tgl:
extends:
- - .zink-piglit-quick_gl
- script:
- - xvfb-run --server-args='-noreset' sh -c "ZINK_NO_TIMELINES=1 GALLIUM_DRIVER=zink VK_DRIVER=lvp install/piglit/piglit-runner.sh"
+ - .zink-anv-test
+ timeout: 1h
+ variables:
+ DEQP_SUITE: zink-anv-tgl
+ PIGLIT_NO_WINDOW: 1
+ HWCI_START_WESTON: 1
+ # We use gbm because X can die:
+ # MESA: error: ZINK: vkQueueSubmit failed (VK_ERROR_DEVICE_LOST)
+ # Xorg: ../src/gallium/drivers/zink/zink_batch.c:599: zink_end_batch: Assertion `!ctx->batch_states' failed.
+ PIGLIT_PLATFORM: gbm
-zink-piglit-lazy:
+# Manual full run when you want to double-check the full status.
+zink-anv-tgl-full:
extends:
- - .zink-piglit-quick_gl
- script:
- - xvfb-run --server-args='-noreset' sh -c "ZINK_DESCRIPTORS=lazy GALLIUM_DRIVER=zink VK_DRIVER=lvp install/piglit/piglit-runner.sh"
+ - zink-anv-tgl
+ - .zink-anv-manual-rules
+ variables:
+ DEQP_SUITE: zink-anv-tgl-full
+ JOB_TIMEOUT: 105
+ HWCI_START_WESTON: 1
+ timeout: 1h 45m
+ parallel: 3
-zink-lvp-deqp:
+zink-anv-tgl-traces:
extends:
- - .test-gl
- - .deqp-test
- - .zink-lvp-test
+ - .lava-piglit-traces:x86_64
+ - .zink-anv-test
+ - .zink-trace-test
+ # Add .lava-traces-base again to override .zink-anv-test setting the
+ # HWCI_TEST_SCRIPT, but .lava-piglit-traces having to come first to get
+ # dependencies right.
+ - .lava-traces-base
variables:
- GALLIUM_DRIVER: "zink" # move here due to bad xvfb-run interactions
- VK_DRIVER: lvp # Don't move to the top level, piglit runs do funny stuff with VK_DRIVER set
- DEQP_EXPECTED_RENDERER: "zink.*llvmpipe"
- DEQP_VER: gles2
- DEQP_SUITE: zink-lvp
- parallel: 2
+ PIGLIT_TRACES_FILE: traces-zink.yml
+
+zink-anv-tgl-traces-restricted:
+ extends:
+ - zink-anv-tgl-traces
+ - .zink-anv-rules-restricted
+ variables:
+ PIGLIT_TRACES_FILE: traces-zink-restricted.yml
+ PIGLIT_REPLAY_EXTRA_ARGS: --db-path ${CI_PROJECT_DIR}/replayer-db/ --minio_bucket=mesa-tracie-private --jwt-file=${S3_JWT_FILE}
+ allow_failure: true
+
+zink-tu-a618:
+ extends:
+ - .lava-test-deqp:arm64
+ - .zink-turnip-rules
+ - .zink-test
+ - .lava-sc7180-trogdor-lazor-limozeen:arm64
+ variables:
+ DEQP_FRACTION: 2
+ DEQP_SUITE: zink-freedreno-a618
+ FLAKES_CHANNEL: "#freedreno-ci"
+ HWCI_START_WESTON: 1
+ GPU_VERSION: zink-freedreno-a618
+
+zink-tu-a618-full:
+ extends:
+ - zink-tu-a618
+ - .collabora-turnip-manual-rules
+ variables:
+ DEQP_FRACTION: 1
+
+zink-tu-a618-traces:
+ extends:
+ - a618-traces
+ - .zink-turnip-rules
+ - .zink-trace-test
+ parallel: null
+ variables:
+ PIGLIT_REPLAY_DEVICE_NAME: "zink-a618"
+
+zink-tu-a618-traces-performance:
+ extends:
+ - zink-tu-a618-traces
+ - .zink-turnip-manual-rules
+ - .piglit-performance:arm64
+ rules:
+ - !reference [.piglit-performance:arm64, rules]
+ - !reference [.zink-turnip-manual-rules, rules]
+ variables:
+ # Always use the same device
+ # a618 tag starts with cbg-1 (not cbg-0) for some reason
+ LAVA_TAGS: "cbg-1"
+ needs:
+ - !reference [zink-tu-a618-traces, needs]
+ - !reference [.piglit-performance:arm64, needs]
+
+############### Combined testing (GL, GLES, Piglit) on RADV
+zink-radv-polaris10-valve:
+ extends:
+ - .radv-zink-test-valve
+ - .polaris10-test-valve-kws
+ - .zink-radv-manual-rules
+ variables:
+ GPU_VERSION: zink-radv-polaris10
+ ZINK_DEBUG: quiet
+
+zink-radv-navi10-valve:
+ extends:
+ - .radv-zink-test-valve
+ - .navi10-test-valve-mupuf
+ - .zink-radv-manual-rules
+ timeout: 40m
+ variables:
+ B2C_TIMEOUT_OVERALL_MINUTES: 30
+ GPU_VERSION: zink-radv-navi10
+
+zink-radv-vangogh-valve:
+ timeout: 35m
+ parallel: 3
+ extends:
+ - .radv-zink-test-valve
+ - .vangogh-test-valve
+ - .zink-radv-rules
+ variables:
+ GPU_VERSION: zink-radv-vangogh
+ B2C_SESSION_REBOOT_REGEX: 'BUG: kernel NULL pointer dereference, address'
+ B2C_TIMEOUT_BOOT_RETRIES: 1
+ B2C_TIMEOUT_BOOT_MINUTES: 30
+ B2C_TIMEOUT_OVERALL_MINUTES: 30
+ FDO_CI_CONCURRENT: 6
+ # Override the list of tags to drop `priority:low`
+ tags:
+ - farm:$RUNNER_FARM_LOCATION
+ - amdgpu:codename:VANGOGH
+
+zink-radv-navi31-valve:
+ extends:
+ - .radv-zink-test-valve
+ - .navi31-test-valve
+ - .zink-radv-manual-rules
+ timeout: 1h 20m
+ variables:
+ GPU_VERSION: zink-radv-navi31
+ B2C_TIMEOUT_BOOT_MINUTES: 75
+ B2C_TIMEOUT_OVERALL_MINUTES: 75
diff --git a/src/gallium/drivers/zink/ci/piglit-zink-lvp-fails.txt b/src/gallium/drivers/zink/ci/piglit-zink-lvp-fails.txt
deleted file mode 100644
index 95844f80a56..00000000000
--- a/src/gallium/drivers/zink/ci/piglit-zink-lvp-fails.txt
+++ /dev/null
@@ -1,826 +0,0 @@
-glx@glx-copy-sub-buffer,Fail
-glx@glx-copy-sub-buffer samples=2,Fail
-glx@glx-copy-sub-buffer samples=4,Fail
-glx@glx-multi-window-single-context,Fail
-glx@glx-multithread-texture,Fail
-glx@glx-swap-copy,Fail
-glx@glx-swap-pixmap-bad,Fail
-glx@glx-tfp,Crash
-glx@glx-visuals-depth,Crash
-glx@glx-visuals-depth -pixmap,Crash
-glx@glx-visuals-stencil,Crash
-glx@glx-visuals-stencil -pixmap,Crash
-glx@glx-query-drawable-glx_fbconfig_id-window,Fail
-glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail
-glx@glx_arb_create_context_no_error@no error,Fail
-glx@glx_ext_import_context@free context,Fail
-glx@glx_ext_import_context@get context id,Fail
-glx@glx_ext_import_context@get current display,Fail
-glx@glx_ext_import_context@import context- multi process,Fail
-glx@glx_ext_import_context@import context- single process,Fail
-glx@glx_ext_import_context@imported context has same context id,Fail
-glx@glx_ext_import_context@make current- multi process,Fail
-glx@glx_ext_import_context@make current- single process,Fail
-glx@glx_ext_import_context@query context info,Fail
-shaders@glsl-fs-pointcoord,Fail
-shaders@point-vertex-id divisor,Fail
-shaders@point-vertex-id gl_instanceid,Fail
-shaders@point-vertex-id gl_instanceid divisor,Fail
-shaders@point-vertex-id gl_vertexid,Fail
-shaders@point-vertex-id gl_vertexid divisor,Fail
-shaders@point-vertex-id gl_vertexid gl_instanceid,Fail
-shaders@point-vertex-id gl_vertexid gl_instanceid divisor,Fail
-spec@!opengl 1.0@gl-1.0-edgeflag,Fail
-spec@!opengl 1.0@gl-1.0-edgeflag-quads,Fail
-spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
-spec@!opengl 1.0@gl-1.0-swapbuffers-behavior,Fail
-spec@!opengl 1.1@linestipple,Fail
-spec@!opengl 1.1@linestipple@Factor 2x,Fail
-spec@!opengl 1.1@linestipple@Factor 3x,Fail
-spec@!opengl 1.1@linestipple@Line loop,Fail
-spec@!opengl 1.1@linestipple@Line strip,Fail
-spec@!opengl 1.1@polygon-offset,Fail
-spec@!opengl 1.1@polygon-mode,Fail
-spec@!opengl 1.1@polygon-mode-facing,Fail
-spec@!opengl 1.1@polygon-mode-offset,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
-spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
-spec@!opengl 1.1@read-front,Fail
-spec@!opengl 1.1@read-front clear-front-first,Fail
-spec@!opengl 1.1@read-front clear-front-first samples=2,Fail
-spec@!opengl 1.1@read-front clear-front-first samples=4,Fail
-spec@!opengl 1.1@read-front samples=2,Fail
-spec@!opengl 1.1@read-front samples=4,Fail
-spec@!opengl 1.2@copyteximage 3d,Fail
-spec@!opengl 2.0@depth-tex-modes-glsl,Fail
-spec@!opengl 2.0@gl-2.0-edgeflag,Fail
-spec@!opengl 2.0@gl-2.0-edgeflag-immediate,Fail
-spec@!opengl 2.1@pbo,Fail
-spec@!opengl 2.1@pbo@test_polygon_stip,Fail
-spec@!opengl 2.1@polygon-stipple-fs,Fail
-spec@!opengl 3.0@sampler-cube-shadow,Fail
-spec@!opengl 3.2@gl-3.2-adj-prims cull-back pv-first,Fail
-spec@!opengl 3.2@gl-3.2-adj-prims cull-front pv-first,Fail
-spec@!opengl 3.2@gl-3.2-adj-prims line cull-back pv-first,Fail
-spec@!opengl 3.2@gl-3.2-adj-prims line cull-front pv-first,Fail
-spec@!opengl 3.2@gl-3.2-adj-prims pv-first,Fail
-spec@!opengl es 2.0@glsl-fs-pointcoord,Fail
-spec@!opengl es 3.0@gles-3.0-transform-feedback-uniform-buffer-object,Fail
-spec@arb_depth_texture@depth-tex-modes,Fail
-spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
-spec@arb_framebuffer_object@fbo-gl_pointcoord,Fail
-spec@arb_get_program_binary@restore-sso-program,Fail
-spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
-spec@arb_pipeline_statistics_query@arb_pipeline_statistics_query-frag,Fail
-spec@arb_point_parameters@arb_point_parameters-point-attenuation,Fail
-spec@arb_point_parameters@arb_point_parameters-point-attenuation@Aliased combinations,Fail
-spec@arb_point_parameters@arb_point_parameters-point-attenuation@Antialiased combinations,Fail
-spec@arb_point_sprite@arb_point_sprite-checkerboard,Fail
-spec@arb_point_sprite@arb_point_sprite-mipmap,Fail
-spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
-spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
-spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail
-spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail
-spec@arb_sample_shading@interpolate-at-sample-position 2,Fail
-spec@arb_sample_shading@interpolate-at-sample-position 4,Fail
-spec@arb_sample_shading@samplemask 2,Fail
-spec@arb_sample_shading@samplemask 2@0.250000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail
-spec@arb_sample_shading@samplemask 2@noms partition,Fail
-spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail
-spec@arb_sample_shading@samplemask 2 all,Fail
-spec@arb_sample_shading@samplemask 2 all@0.250000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail
-spec@arb_sample_shading@samplemask 2 all@noms partition,Fail
-spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail
-spec@arb_sample_shading@samplemask 4,Fail
-spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail
-spec@arb_sample_shading@samplemask 4@noms partition,Fail
-spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
-spec@arb_sample_shading@samplemask 4 all,Fail
-spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail
-spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail
-spec@arb_sample_shading@samplemask 4 all@noms partition,Fail
-spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail
-spec@arb_seamless_cube_map@arb_seamless_cubemap,Fail
-spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail
-spec@arb_texture_cube_map_array@arb_texture_cube_map_array-sampler-cube-array-shadow,Fail
-spec@arb_texture_float@fbo-blending-formats,Fail
-spec@arb_texture_float@fbo-blending-formats@GL_INTENSITY16F_ARB,Fail
-spec@arb_texture_float@fbo-blending-formats@GL_INTENSITY32F_ARB,Fail
-spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE16F_ARB,Fail
-spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE32F_ARB,Fail
-spec@arb_texture_float@fbo-blending-formats@GL_RGB16F,Fail
-spec@arb_texture_float@fbo-blending-formats@GL_RGB32F,Fail
-spec@arb_texture_rg@multisample-fast-clear gl_arb_texture_rg-int,Fail
-spec@arb_texture_view@rendering-formats,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_R16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_RG8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_RG8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16 as GL_RG8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16F as GL_R16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16F as GL_R16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16F as GL_RG8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16F as GL_RG8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_R16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16I as GL_RG8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_R16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16UI as GL_RG8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RG16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RG16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RG16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8 as GL_R8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8 as GL_R8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8 as GL_R8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8I as GL_R8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8UI as GL_R8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_R32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_R32UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RG16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RGB10_A2UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_R32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_RG16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16F as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_R32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_R32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGB10_A2UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_R16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_R16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_R16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_R16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_RG8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_RG8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8 as GL_RG8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_R16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8I as GL_RG8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_R16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGB10_A2UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16F as GL_RGB16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16F as GL_RGB16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16_SNORM as GL_RGB16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16_SNORM as GL_RGB16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16_SNORM as GL_RGB16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB16_SNORM as GL_RGB16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8 as GL_RGB8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8 as GL_RGB8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8 as GL_RGB8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8_SNORM as GL_RGB8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8_SNORM as GL_RGB8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGB8_SNORM as GL_RGB8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16F as GL_RG32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16F as GL_RGBA16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16F as GL_RGBA16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RG32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RG32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGB10_A2UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_R32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16F,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16_SNORM,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGB10_A2,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGB10_A2UI,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8I,Fail
-spec@arb_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8UI,Fail
-spec@egl 1.4@egl-copy-buffers,Fail
-spec@egl 1.4@eglterminate then unbind context,Fail
-spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
-spec@egl_khr_surfaceless_context@viewport,Fail
-spec@egl_mesa_configless_context@basic,Fail
-spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
-spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
-spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 msaa,Fail
-spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 upsample,Fail
-spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 msaa,Fail
-spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 upsample,Fail
-spec@ext_framebuffer_multisample@enable-flag,Fail
-spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail
-spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail
-spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail
-spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail
-spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail
-spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail
-spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail
-spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail
-spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
-spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
-spec@ext_framebuffer_multisample@sample-coverage 2 non-inverted,Fail
-spec@ext_framebuffer_multisample@sample-coverage 4 non-inverted,Fail
-spec@ext_framebuffer_object@fbo-blending-formats,Fail
-spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY,Fail
-spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY12,Fail
-spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY16,Fail
-spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY4,Fail
-spec@ext_framebuffer_object@fbo-blending-formats@GL_INTENSITY8,Fail
-spec@ext_framebuffer_object@fbo-blending-formats@GL_LUMINANCE12,Fail
-spec@ext_framebuffer_object@fbo-blending-formats@GL_LUMINANCE16,Fail
-spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB10,Fail
-spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB12,Fail
-spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB16,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export-tex,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-intel_external_sampler_only,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-invalid_attributes,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-invalid_hints,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-missing_attributes,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-reimport-bug,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-unsupported_format,Fail
-spec@ext_packed_float@query-rgba-signed-components,Fail
-spec@ext_texture_integer@multisample-fast-clear gl_ext_texture_integer,Fail
-spec@ext_texture_snorm@fbo-blending-formats,Fail
-spec@ext_texture_snorm@fbo-blending-formats@GL_INTENSITY16_SNORM,Fail
-spec@ext_texture_snorm@fbo-blending-formats@GL_INTENSITY8_SNORM,Fail
-spec@ext_texture_snorm@fbo-blending-formats@GL_INTENSITY_SNORM,Fail
-spec@ext_texture_snorm@fbo-blending-formats@GL_LUMINANCE16_SNORM,Fail
-spec@ext_texture_snorm@fbo-blending-formats@GL_LUMINANCE8_SNORM,Fail
-spec@ext_texture_snorm@fbo-blending-formats@GL_LUMINANCE_SNORM,Fail
-spec@ext_texture_snorm@fbo-blending-formats@GL_RGB16_SNORM,Fail
-spec@ext_texture_swizzle@depth_texture_mode_and_swizzle,Fail
-spec@ext_transform_feedback2@counting with pause,Fail
-spec@ext_transform_feedback@generatemipmap prims_generated,Fail
-spec@intel_performance_query@intel_performance_query-issue_2235,Fail
-spec@khr_texture_compression_astc@array-gl,Fail
-spec@khr_texture_compression_astc@array-gl@12x12 Block Dim,Fail
-spec@khr_texture_compression_astc@array-gl@5x5 Block Dim,Fail
-spec@khr_texture_compression_astc@miptree-gl ldr,Fail
-spec@khr_texture_compression_astc@miptree-gl ldr@LDR Profile,Fail
-spec@khr_texture_compression_astc@miptree-gl srgb,Fail
-spec@khr_texture_compression_astc@miptree-gl srgb@sRGB decode,Fail
-spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
-spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail
-spec@khr_texture_compression_astc@miptree-gl srgb-sd,Fail
-spec@khr_texture_compression_astc@miptree-gl srgb-sd@sRGB skip decode,Fail
-spec@khr_texture_compression_astc@miptree-gles ldr,Fail
-spec@khr_texture_compression_astc@miptree-gles ldr@LDR Profile,Fail
-spec@khr_texture_compression_astc@miptree-gles srgb,Fail
-spec@khr_texture_compression_astc@miptree-gles srgb@sRGB decode,Fail
-spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
-spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail
-spec@khr_texture_compression_astc@miptree-gles srgb-sd,Fail
-spec@khr_texture_compression_astc@miptree-gles srgb-sd@sRGB skip decode,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gl ldr,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gl ldr@LDR Profile,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb@sRGB decode,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gles ldr,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gles ldr@LDR Profile,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb@sRGB decode,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail
-spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail
-spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail
-spec@arb_post_depth_coverage@arb_post_depth_coverage-multisampling,Fail
-spec@arb_shader_image_load_store@early-z,Fail
-spec@arb_shader_image_load_store@early-z@occlusion query test/early-z pass,Fail
-spec@arb_shader_image_load_store@indexing,Fail
-spec@arb_shader_image_load_store@indexing@Compute shader/dynamically uniform indexing test,Fail
-spec@arb_shader_image_load_store@indexing@Fragment shader/dynamically uniform indexing test,Fail
-spec@arb_shader_image_load_store@indexing@Geometry shader/dynamically uniform indexing test,Fail
-spec@arb_shader_image_load_store@indexing@Tessellation control shader/dynamically uniform indexing test,Fail
-spec@arb_shader_image_load_store@indexing@Tessellation evaluation shader/dynamically uniform indexing test,Fail
-spec@arb_shader_image_load_store@indexing@Vertex shader/dynamically uniform indexing test,Fail
-spec@arb_shader_image_load_store@invalid,Fail
-spec@arb_shader_image_load_store@invalid@imageLoad/incompatible format test/imageBuffer,Fail
-spec@khr_texture_compression_astc@array-gles,Fail
-spec@khr_texture_compression_astc@array-gles@12x12 Block Dim,Fail
-spec@khr_texture_compression_astc@array-gles@5x5 Block Dim,Fail
-spec@oes_egl_image_external_essl3@oes_egl_image_external_essl3,Fail
-spec@oes_egl_image_external_essl3@oes_egl_image_external_essl3@oes_egl_image_external_essl3_imageLoad,Fail
-spec@oes_egl_image_external_essl3@oes_egl_image_external_essl3@oes_egl_image_external_essl3_imageStore,Fail
-spec@oes_texture_view@rendering-formats,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_R16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_R16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_R16UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_R16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_RG8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_RG8UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16 as GL_RG8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16F as GL_R16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16F as GL_R16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16F as GL_RG8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16F as GL_RG8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_R16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16I as GL_RG8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_R16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16UI as GL_RG8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_R16UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R16_SNORM as GL_RG8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RG16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RGB10_A2,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32F as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RG16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RGB10_A2,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32I as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RG16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RGB10_A2,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R32UI as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8 as GL_R8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8 as GL_R8UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8 as GL_R8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8I as GL_R8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8UI as GL_R8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_R8_SNORM as GL_R8UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_R32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_R32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_R32UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RG16UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RGB10_A2UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16 as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_R32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_RG16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_RGB10_A2,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_RGBA8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16F as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_R32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_R32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RGB10_A2,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16I as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_R32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_R32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RGB10_A2,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16UI as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_R32UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RG16UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGB10_A2,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGB10_A2UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG16_SNORM as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32F as GL_RGBA16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32I as GL_RGBA16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG32UI as GL_RGBA16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_R16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_R16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_R16UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_R16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_RG8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_RG8UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8 as GL_RG8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_R16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_R16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_R16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_R16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_RG8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_RG8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8I as GL_RG8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_R16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_R16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_R16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_R16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8UI as GL_RG8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_R16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RG8_SNORM as GL_RG8UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_R32UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGB10_A2UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_R32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGB10_A2,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2UI as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB16 as GL_RGB16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB16F as GL_RGB16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB16F as GL_RGB16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB16I as GL_RGB16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB16UI as GL_RGB16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB16_SNORM as GL_RGB16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB8I as GL_RGB8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB8UI as GL_RGB8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGB8_SNORM as GL_RGB8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RG32UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16 as GL_RGBA16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16F as GL_RG32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16F as GL_RGBA16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16F as GL_RGBA16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RG32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RG32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16I as GL_RGBA16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RG32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16UI as GL_RGBA16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RG32UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA16_SNORM as GL_RGBA16UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_R32UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGB10_A2UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8 as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_R32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_R32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGB10_A2,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8I as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_R32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGB10_A2,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8UI as GL_RGBA8_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_R32UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16F,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RG16_SNORM,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGB10_A2,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGB10_A2UI,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8I,Fail
-spec@oes_texture_view@rendering-formats@clear GL_RGBA8_SNORM as GL_RGBA8UI,Fail
-
-
-#literally no driver can pass these
-spec@!opengl 1.0@rasterpos,Fail
-spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail
-spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
-spec@arb_direct_state_access@gettextureimage-formats,Crash
-spec@ext_texture_integer@fbo-integer,Fail
-
-#these need format conversions that gallium doesn't implement yet
-spec@arb_texture_buffer_object@formats (fs- arb),Crash
-spec@arb_texture_buffer_object@formats (vs- arb),Crash
diff --git a/src/gallium/drivers/zink/ci/piglit-zink-lvp-flakes.txt b/src/gallium/drivers/zink/ci/piglit-zink-lvp-flakes.txt
deleted file mode 100644
index e2b5fddf8fd..00000000000
--- a/src/gallium/drivers/zink/ci/piglit-zink-lvp-flakes.txt
+++ /dev/null
@@ -1 +0,0 @@
-spec@khr_debug@push-pop-group_gl.*
diff --git a/src/gallium/drivers/zink/ci/traces-zink-restricted.yml b/src/gallium/drivers/zink/ci/traces-zink-restricted.yml
new file mode 100644
index 00000000000..7ee2062e624
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/traces-zink-restricted.yml
@@ -0,0 +1,111 @@
+%YAML 1.2
+---
+traces-db:
+ download-url: "http://caching-proxy/cache/?uri=https://s3.freedesktop.org/mesa-tracie-private/"
+
+traces:
+ AmnesiaTDD/Amnesia-f700-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: c0a3a735ce4dcc394af8bef0289ba8b1
+ Anna/Anna-f692-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip]
+ text: "trace contains no swapbuffers, so no frames recorded"
+ Antichamber/antichamber-f240-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 55e105b22656add7d16efac2bcad82f5
+ text: "line widths look wrong compared to freedreno"
+ Cradle/cradle-f3000-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, slow]
+ ICEDarkness/ICEDarkness-f230-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, flakes]
+ checksum: 64ac14b2a04d510e470fb2e06b039b42
+ text: "note that this trace is stable on freedreno"
+ LifelessPlanet/LifelessPlanet-f420-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: defec084a01f13e5cf01147bcfd235e6
+ MetroLLRedux/metro-ll-redux-kf480-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, slow]
+ OilRush/OilRush-f14000-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip]
+ text: "requires allow_glsl_extension_directive_midshader"
+ Osmos/Osmos-f2660-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 17daf7dc20dd74186d04eb54497a1690
+ PenumbraOverture/penumbra-ov-s0-2-864-f1500-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: fff50b2eb306641d90b4249542d437d1
+ ShadowWarrior/ShadowWarrior-f3952-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, flakes]
+ checksum: 826f966a52bc956644bf41562aa9c686
+ text: |-
+ This trace is flaky on freedreno too.
+ SirYouAreBeingHunted/sir-f750-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 68611fd9f51e5ef5d2eb4417031a379f
+ SpecOps/specops-s0-1088-f1300-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, broken, flakes]
+ checksum: e4037172a61efe23a67b5cc9ea9960bb
+ text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986
+ Superhot/superhot-f8100-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, broken, flakes]
+ checksum: 3f4ce060d0306b639565f8705abdea26
+ text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986
+ TheRavenRemastered/Raven-f10900-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 64243ccd048cbbfc0649e8f3c5b5cdb3
+ TombRaider2013/TombRaider-f1430-v2.trace:
+ gl-zink-anv-tgl:
+ label: [crash]
+ text: |-
+ ../src/intel/isl/isl_storage_image.c:196: isl_lower_storage_image_format: Assertion `!Unknown image format' failed.
+ where the format is ISL_FORMAT_B8G8R8A8_UNORM.
+ Witcher2/witcher2-s0-1970-f2038-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip]
+ text: "many errors like 'GL_INVALID_VALUE in glBindBufferRange(offset misaligned 144/32)'"
+ alien-isolation/AlienIsolation.bin.1-trim--k-f2000-v20201203-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, slow]
+ text: "Slow, just seems to display a bit of text and no gameplay, anyway (radeonsi or iris)"
+ civilization-v/CivilizationV-trim--s705-761-f762-v20201203-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, broken, flakes]
+ checksum: 82e6d93321ab146d758f2d60a48f265d
+ text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986
+ golf-with-your-friends/GolfWithYourFriends-trim--f1070-v20201203-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, flakes]
+ checksum: cd01820de77063f0397f26d6cd747d1c
+ text: "has caused ../src/gallium/drivers/zink/zink_context.c:3773: zink_wait_on_batch: Assertion `batch_id' failed."
+ hollow-knight/HollowKnight-trim--f2020-v20201203-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, flakes]
+ checksum: 78f9122c5dcd89826efe421fa626971c
+ text: "note that this trace is stable on freedreno"
+ ksp/KSP-trim--f4800-v20201203-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 28b08ee598387fdc58b6e0e92261e1d3
+ overcooked2/Overcooked2-trim--f3301-v20201203-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 2d342febd76eb2b33e0496e5ed57e124
+ plague-inc-evolved/PlagueIncEvolved-trim--f1200-v20201203-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip]
+ text: "renders black in CI, freedreno CI has unspecified trouble with it too"
+ slime-rancher/SlimeRancher-trim--f970-v20201203-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip]
+ text: "renders black in CI, freedreno CI has unspecified trouble with it too"
+ stellaris/Stellaris-trim--f722-v20201203-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, flakes]
+ checksum: 1c263695e5bdfcd622f26292a3b2a10e
+ text: "looks good but checksum keeps changing"
diff --git a/src/gallium/drivers/zink/ci/traces-zink.yml b/src/gallium/drivers/zink/ci/traces-zink.yml
new file mode 100644
index 00000000000..0bd060ef98a
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/traces-zink.yml
@@ -0,0 +1,144 @@
+%YAML 1.2
+---
+traces-db:
+ download-url: "http://caching-proxy/cache/?uri=https://s3.freedesktop.org/mesa-tracie-public/"
+
+traces:
+ 0ad/0ad-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, broken, flakes]
+ checksum: 1da0ecf4034a81aa16e7984b75368aec
+ text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986
+ behdad-glyphy/glyphy-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: b743608724c13bc4105e95385fc2b810
+ blender/blender-demo-cube_diorama.trace:
+ gl-zink-anv-tgl:
+ label: [skip, flakes, broken]
+ checksum: 7b3ebdb5a4a8282ff564a4f14e7791a4
+ text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986
+ blender/blender-demo-ellie_pose.trace:
+ gl-zink-anv-tgl:
+ label: [skip, broken, flakes]
+ checksum: 9b5090a236350f04cb2a61c5f0c0fe0f
+ text: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8986
+ glxgears/glxgears-2-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: f53ac20e17da91c0359c31f2fa3f401e
+ gputest/furmark-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 7f513bceca18b6f44049bc5a690df235
+ gputest/triangle-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 5f694874b15bcd7a3689b387c143590b
+ godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc:
+ gl-zink-anv-tgl:
+ label: [skip]
+ checksum: dbe1de4e2e812413f173ea6c423117ff
+ text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?"
+ gputest/pixmark-julia-fp32-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: fbf5e44a6f46684b84e5bb5ad6d36c67
+ gputest/pixmark-julia-fp64-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 1760aea00af985b8cd902128235b08f6
+ gputest/pixmark-volplosion-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 7e49248ad9dc4c052c04b11246c4bd33
+ text: Different rendering from iris, but still looks correct (common result with this trace)
+ gputest/plot3d-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 56f11d133f72712a6df13855ec00cdb0
+ gputest/tessmark-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 46e05521eca33c2720ba14c0ea6c9066
+ humus/AmbientAperture-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 0f3b7351a84e1e6f15430f8766af4b4c
+ humus/Portals-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: a37699d09e61a842fc909f0c4fb72cf1
+ humus/CelShading-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 68f20f660b9d812083066342398fe1b0
+ humus/DynamicBranching3-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 0eb6c37cb88b48513e217012edf1ad32
+ humus/HDR-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: b09d83a5887b19ceaaaf0ac69c6a0af4
+ humus/RaytracedShadows-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 8c053a06021678e28bfffd68705c6293
+ humus/VolumetricFogging2-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 354a0046d81981a5227691fd8401d8ef
+ neverball/neverball-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 476a3e154a2564c9b136705cfdcf36de
+ paraview/pv-manyspheres-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 190153b6437f7063a6853ca94e5914f2
+ paraview/pv-waveletcontour-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: be4aba8a24e2bd2beb068d0c9c89dfcb
+ paraview/pv-waveletvolume-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: b36c25e52624cbf8dab73b6acecb8e84
+ pathfinder/demo-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 558c3d2b1b7acc782b3908c579ce0ce8
+ pathfinder/canvas_moire-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 1706229fca06c1d7946ebc94e3b0a66d
+ pathfinder/canvas_text_v2-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: c824fcadd028eba50d9911ebe1a3f823
+ ror/ror-default.trace:
+ gl-zink-anv-tgl:
+ checksum: d7b07cb1f6fdc6949bdaf84d2173e24b
+ supertuxkart/supertuxkart-antediluvian-abyss.rdc:
+ gl-zink-anv-tgl:
+ label: [skip]
+ checksum: 0af2faa0d9183c1bc4dc7612befe1f0a
+ text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?"
+ supertuxkart/supertuxkart-menu.rdc:
+ gl-zink-anv-tgl:
+ label: [skip]
+ checksum: 0a4095dc7b441643a3336975b61c9e6a
+ text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?"
+ supertuxkart/supertuxkart-ravenbridge-mansion.rdc:
+ gl-zink-anv-tgl:
+ label: [skip]
+ checksum: ca0b64f1a62e01765146be8391eae636
+ text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?"
+ unvanquished/unvanquished-lowest.trace:
+ gl-zink-anv-tgl:
+ checksum: 7789205e8b4d160dc81e3684f0627a38
+ unvanquished/unvanquished-ultra.trace:
+ gl-zink-anv-tgl:
+ checksum: 026dde18e934e7ce3e36eb13ea8e975c
+ valve/counterstrike-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, flakes]
+ checksum: 148ec1105f5f14c90cb756a178cef264
+ text: "note that this trace is stable on freedreno"
+ valve/counterstrike-source-v2.trace:
+ gl-zink-anv-tgl:
+ label: [skip, flakes]
+ checksum: d5eb7d064ca31cb316e853a082a3950d
+ text: occasional segfaults
+ valve/half-life-2-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 8deac48993e4515589a7165e8bd14f25
+ valve/portal-2-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 3683fd5bb2224d6f4a1c47c1eab277d9
+ warzone2100/warzone2100-default.trace:
+ gl-zink-anv-tgl:
+ label: [skip, flakes]
+ checksum: 56f1f06bdca3d5352b5e6c0c3d572f43
+ text: occasional checksum change
+ xonotic/xonotic-keybench-high-v2.trace:
+ gl-zink-anv-tgl:
+ checksum: 659ef8c91d9eeccd0dc603b196c2577c
diff --git a/src/gallium/drivers/zink/ci/zink-amdpro-fails.txt b/src/gallium/drivers/zink/ci/zink-amdpro-fails.txt
new file mode 100644
index 00000000000..6a3165ccfac
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-amdpro-fails.txt
@@ -0,0 +1,159 @@
+# broken vertex buffer robustness
+#GTF-GL46.gtf21.GL2FixedTests.vertex_order.vertex_order,Fail
+#GTF-GL46.gtf31.GL3Tests.draw_instanced.draw_instanced_vertex_attrib_stride,Fail
+#GTF-GL46.gtf33.GL3Tests.instanced_arrays.instanced_arrays_stride,Fail
+#KHR-GL46.tessellation_shader.tessellation_shader_triangles_tessellation.inner_tessellation_level_rounding
+#KHR-GL46.vertex_attrib_64bit.limits_test,Fail
+#KHR-GL46.vertex_attrib_64bit.vao,Fail
+#KHR-GL46.vertex_attrib_binding.advanced-bindingUpdate,Fail
+#KHR-GL46.vertex_attrib_binding.basic-inputI-case2,Fail
+#KHR-Single-GL46.enhanced_layouts.varying_array_locations,Fail
+#KHR-Single-GL46.enhanced_layouts.varying_locations,Fail
+#KHR-Single-GL46.enhanced_layouts.varying_structure_locations,Fail
+
+# uncategorized
+GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_query_objects,Fail
+GTF-GL46.gtf33.GL3Tests.vertex_type_2_10_10_10_rev.vertex_type_2_10_10_10_rev_stride_pointer,Fail
+GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_pause_resume,Fail
+GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_states,Fail
+KHR-GL46.direct_state_access.queries_functional,Fail
+KHR-GL46.direct_state_access.vertex_arrays_attribute_binding,Fail
+KHR-GL46.direct_state_access.vertex_arrays_enable_disable_attributes,Fail
+KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_tessLevel,Fail
+KHR-GL46.tessellation_shader.tessellation_shader_point_mode.points_verification,Fail
+KHR-GL46.tessellation_shader.tessellation_shader_quads_tessellation.inner_tessellation_level_rounding,Fail
+KHR-GL46.tessellation_shader.tessellation_shader_triangles_tessellation.identical_triangles,Fail
+KHR-GL46.tessellation_shader.vertex.vertex_spacing,Fail
+KHR-GL46.texture_cube_map_array.texture_size_tesselation_con_sh,Fail
+KHR-GL46.texture_cube_map_array.texture_size_tesselation_ev_sh,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_override_qualifiers_with_api,Fail
+
+# These tests mysteriously fail to allocate descriptor sets with input attachments
+KHR-GL46.blend_equation_advanced.blend_all.GL_COLORBURN_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_COLORDODGE_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_DARKEN_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_DIFFERENCE_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_EXCLUSION_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_HARDLIGHT_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_HSL_COLOR_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_HSL_HUE_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_HSL_LUMINOSITY_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_HSL_SATURATION_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_LIGHTEN_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_MULTIPLY_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_OVERLAY_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_SCREEN_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_all.GL_SOFTLIGHT_KHR_all_qualifier,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_COLORBURN_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_COLORDODGE_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_DARKEN_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_DIFFERENCE_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_EXCLUSION_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_HARDLIGHT_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_HSL_COLOR_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_HSL_HUE_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_HSL_LUMINOSITY_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_HSL_SATURATION_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_LIGHTEN_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_MULTIPLY_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_OVERLAY_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_SCREEN_KHR,Fail
+KHR-GL46.blend_equation_advanced.blend_specific.GL_SOFTLIGHT_KHR,Fail
+KHR-GL46.blend_equation_advanced.test_coherency.multiplySequence,Fail
+
+# This test fails after mysteriously failing to create a pipeline
+KHR-Single-GL46.enhanced_layouts.xfb_global_buffer,Crash
+
+# These pass on AMDVLK
+KHR-GL46.shaders.loops.do_while_constant_iterations.nested_tricky_dataflow_2_vertex,Fail
+KHR-GL46.shaders.loops.do_while_constant_iterations.nested_sequence_fragment,Fail
+KHR-GL46.shaders.loops.do_while_constant_iterations.nested_fragment,Fail
+KHR-GL46.geometry_shader.primitive_queries.primitive_queries_lines,Fail
+KHR-GL46.geometry_shader.primitive_queries.primitive_queries_points,Fail
+KHR-GL46.geometry_shader.primitive_queries.primitive_queries_triangles,Fail
+KHR-GL46.geometry_shader.qualifiers.flat_interpolation,Fail
+KHR-GL46.tessellation_shader.tessellation_shader_triangles_tessellation.degenerate_triangle,Fail
+KHR-GL46.compute_shader.pipeline-post-xfb,Fail
+KHR-GL46.transform_feedback.draw_xfb_stream_test,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_all_stages,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_block_member_stride,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_block_member,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_variable,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_capture_struct,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_explicit_location,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_stride,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_stride_of_empty_list,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_stride_of_empty_list_and_api,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_struct_explicit_location,Fail
+KHR-Single-GL46.enhanced_layouts.xfb_vertex_streams,Fail
+KHR-GL46.transform_feedback.capture_geometry_interleaved_test,Fail
+KHR-GL46.transform_feedback.capture_geometry_separate_test,Fail
+KHR-GL46.transform_feedback.capture_vertex_interleaved_test,Fail
+KHR-GL46.transform_feedback.capture_vertex_separate_test,Fail
+KHR-GL46.transform_feedback.query_geometry_interleaved_test,Fail
+KHR-GL46.transform_feedback.query_geometry_separate_test,Fail
+KHR-GL46.transform_feedback.query_vertex_interleaved_test,Fail
+KHR-GL46.transform_feedback.query_vertex_separate_test,Fail
+GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_basic_outline,Fail
+GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_double_precision,Fail
+GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_geometry,Fail
+GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_geometry_primitive_types,Fail
+GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_interleaved,Fail
+GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_max_interleaved,Fail
+GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_max_separate,Fail
+GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_misc,Fail
+GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_overflow,Fail
+GTF-GL46.gtf30.GL3Tests.transform_feedback.transform_feedback_vertex_id,Fail
+GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_basic,Fail
+GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_framebuffer,Fail
+GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_two_buffers,Fail
+GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_basic_outline,Fail
+GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_geometry_instanced,Fail
+GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_multiple_streams,Fail
+GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_streams_overflow,Fail
+GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_streams_queried,Fail
+GTF-GL46.gtf42.GL3Tests.transform_feedback_instanced.transform_feedback_instanced_basic,Fail
+GTF-GL46.gtf42.GL3Tests.transform_feedback_instanced.transform_feedback_instanced_conditional_render,Fail
+GTF-GL46.gtf42.GL3Tests.transform_feedback_instanced.transform_feedback_instanced_streams,Fail
+KHR-GL46.texture_buffer.texture_buffer_operations_transform_feedback,Fail
+GTF-GL46.gtf33.GL3Tests.vertex_type_2_10_10_10_rev.vertex_type_2_10_10_10_rev_attrib,Fail
+GTF-GL46.gtf33.GL3Tests.vertex_type_2_10_10_10_rev.vertex_type_2_10_10_10_rev_bgra,Fail
+GTF-GL46.gtf33.GL3Tests.vertex_type_2_10_10_10_rev.vertex_type_2_10_10_10_rev_conversion,Fail
+GTF-GL46.gtf33.GL3Tests.vertex_type_2_10_10_10_rev.vertex_type_2_10_10_10_rev_divisor,Fail
+KHR-GL46.direct_state_access.buffers_functional,Fail
+KHR-GL46.direct_state_access.vertex_arrays_attribute_format,Fail
+KHR-GL46.direct_state_access.vertex_arrays_vertex_buffers,Fail
+KHR-GL46.gpu_shader_fp64.fp64.max_uniform_components,Fail
+KHR-GL46.gpu_shader_fp64.fp64.named_uniform_blocks,Fail
+KHR-GL46.shader_atomic_counters.advanced-usage-multi-stage,Fail
+KHR-GL46.shaders.loops.do_while_constant_iterations.nested_sequence_vertex,Fail
+KHR-GL46.shaders.loops.do_while_constant_iterations.nested_tricky_dataflow_2_fragment,Fail
+KHR-GL46.shaders.loops.do_while_constant_iterations.nested_vertex,Fail
+KHR-GL46.texture_buffer.texture_buffer_texture_buffer_range,Fail
+KHR-GL46.texture_cube_map_array.texture_size_geometry_sh,Fail
+KHR-GL46.texture_cube_map_array.texture_size_vertex_sh,Fail
+
+# VKCTS coverage gap: fails on all non-mesa drivers
+
+# Passes on AMDVLK, fails on PRO if not in a specific caselist order
+KHR-GL46.direct_state_access.vertex_arrays_attribute_binding_divisor,Fail
+
+# No VKCTS coverage, no pass
+KHR-GL46.cull_distance.coverage,Fail
+KHR-GL46.cull_distance.functional,Fail
+
+# VK_EXT_image_2d_view_of_3d
+KHR-GL46.shader_image_load_store.non-layered_binding,Fail
+
+# Stencil sampling is apparently broken
+KHR-GL46.texture_view.view_sampling,Fail
+
+GTF-GL46.gtf31.GL3Tests.uniform_buffer_object.uniform_buffer_object_max_uniform_block_size,Fail
+
+KHR-GL46.direct_state_access.vertex_arrays_element_buffer,Fail
+
+# since 22.40-1577631
+GTF-GL46.gtf30.GL3Tests.half_float.half_float_rendering,Fail
+GTF-GL46.gtf30.GL3Tests.half_float.half_float_varying_data,Fail
+GTF-GL46.gtf43.GL3Tests.eac_compression_signed_r11.gl_compressed_signed_r11_eac,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-anv-icl-fails.txt b/src/gallium/drivers/zink/ci/zink-anv-icl-fails.txt
new file mode 100644
index 00000000000..40af4cb9bbc
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-anv-icl-fails.txt
@@ -0,0 +1,64 @@
+GTF-GL46.gtf32.GL3Tests.packed_pixels.packed_pixels_pbo,Fail
+KHR-GL46.tessellation_shader.single.isolines_tessellation,Fail
+KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.data_pass_through,Fail
+KHR-GL46.tessellation_shader.tessellation_invariance.invariance_rule3,Fail
+KHR-GL46.tessellation_shader.tessellation_shader_point_mode.points_verification,Fail
+KHR-GL46.tessellation_shader.tessellation_shader_quads_tessellation.degenerate_case,Fail
+KHR-GL46.tessellation_shader.tessellation_shader_quads_tessellation.inner_tessellation_level_rounding,Fail
+KHR-GL46.tessellation_shader.tessellation_shader_tessellation.gl_InvocationID_PatchVerticesIn_PrimitiveID,Fail
+KHR-GL46.tessellation_shader.vertex.vertex_spacing,Fail
+KHR-GL46.texture_view.view_classes,Fail
+KHR-GL46.texture_view.view_sampling,Fail
+
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32i_rg16i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32i_rg16ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32i_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32i_rgba8i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32i_rgba8ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32ui_rg16i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32ui_rg16ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32ui_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32ui_rgba8i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.r32ui_rgba8ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16i_rg16ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16i_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16i_rgba8i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16i_rgba8ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16ui_rg16i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16ui_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16ui_rgba8i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16ui_rgba8ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2_rg16i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2_rg16ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2_rgba8i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2_rgba8ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2ui_rg16i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2ui_rg16ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2ui_rgba8i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgb10_a2ui_rgba8ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8_rg16i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8_rg16ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8_rgba8i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8_rgba8ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8i_rg16i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8i_rg16ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8i_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8i_rgba8ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8ui_rg16i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8ui_rg16ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8ui_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rgba8ui_rgba8i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.srgb8_alpha8_rg16i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.srgb8_alpha8_rg16ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.srgb8_alpha8_rgb10_a2ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.srgb8_alpha8_rgba8i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.srgb8_alpha8_rgba8ui.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_64_bits.rg32i_rgba16i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_64_bits.rg32ui_rgba16i.renderbuffer_to_renderbuffer,Fail
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_64_bits.rgba16ui_rgba16i.renderbuffer_to_renderbuffer,Fail
+
+# Piglit xfb tests
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-anv-icl-skips.txt b/src/gallium/drivers/zink/ci/zink-anv-icl-skips.txt
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-anv-icl-skips.txt
@@ -0,0 +1 @@
+
diff --git a/src/gallium/drivers/zink/ci/zink-anv-tgl-fails.txt b/src/gallium/drivers/zink/ci/zink-anv-tgl-fails.txt
new file mode 100644
index 00000000000..c5b580c4f18
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-anv-tgl-fails.txt
@@ -0,0 +1,593 @@
+dEQP-EGL.functional.query_context.get_current_context.rgba8888_window,Crash
+
+wayland-dEQP-EGL.functional.resize.surface_size.grow,Fail
+wayland-dEQP-EGL.functional.resize.surface_size.shrink,Fail
+wayland-dEQP-EGL.functional.resize.surface_size.stretch_width,Fail
+wayland-dEQP-EGL.functional.resize.surface_size.stretch_height,Fail
+
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
+spec@egl_chromium_sync_control@conformance,Fail
+
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail
+
+glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail
+glx@glx_arb_create_context_no_error@no error,Fail
+glx@glx_arb_create_context_robustness@invalid reset notification strategy,Fail
+
+glx@glx-swap-pixmap-bad,Fail
+
+# ../src/gallium/drivers/zink/zink_kopper.c:859: zink_kopper_update: Assertion `pres->bind & PIPE_BIND_DISPLAY_TARGET' failed.
+glx@glx-visuals-depth -pixmap,Crash
+glx@glx-visuals-stencil -pixmap,Crash
+
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+
+spec@!opengl 1.0@gl-1.0-swapbuffers-behavior,Fail
+
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=16,Fail
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=2,Fail
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=6,Fail
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=8,Fail
+
+spec@!opengl 1.1@line-smooth-stipple,Fail
+
+spec@!opengl 1.1@polygon-mode-facing,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+
+# Frontend issue across multiple drivers.
+spec@!opengl 1.0@rasterpos,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
+
+spec@!opengl 1.1@linestipple@Line strip,Fail
+spec@!opengl 1.1@linestipple@Line loop,Fail
+spec@!opengl 1.1@linestipple@Factor 2x,Fail
+spec@!opengl 1.1@linestipple@Factor 3x,Fail
+spec@!opengl 1.1@linestipple,Fail
+
+# polygon-mode: glPolygonMode(front=GL_LINE, back=GL_FILL), glCullMode(GL_NONE/GL_FALSE/GL_NO_ERROR) failed
+# At position 0, found prim GL_FILL instead of GL_LINE
+# polygon-mode: glPolygonMode(front=GL_POINT, back=GL_FILL), glCullMode(GL_NONE/GL_FALSE/GL_NO_ERROR) failed
+# At position 1, found prim GL_POINT instead of GL_FILL
+# (and more)
+spec@!opengl 1.1@polygon-mode,Fail
+
+spec@!opengl 2.0@vs-point_size-zero,Fail
+
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+
+spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail
+
+spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
+
+
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
+spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64,Crash
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-load.indirect.3,Fail
+
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
+
+spec@arb_sample_locations@test,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 16- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_shading@samplemask 16 all,Fail
+spec@arb_sample_shading@samplemask 16 all@0.062500 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 16 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16,Fail
+spec@arb_sample_shading@samplemask 16@0.062500 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 16@noms partition,Fail
+spec@arb_sample_shading@samplemask 16@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all,Fail
+spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2,Fail
+spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms partition,Fail
+spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all,Fail
+spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4,Fail
+spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms partition,Fail
+spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all,Fail
+spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6,Fail
+spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@noms partition,Fail
+spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all,Fail
+spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8,Fail
+spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@noms partition,Fail
+spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail
+
+spec@arb_shader_image_load_store@early-z,Fail
+spec@arb_shader_image_load_store@early-z@occlusion query test/early-z pass,Fail
+
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
+
+# "../src/gallium/drivers/zink/zink_compiler.c:2071: assign_producer_var_io: Assertion `*reserved < MAX_VARYING' failed."
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash
+
+# "arb_texture_buffer_object-formats: ../src/gallium/drivers/zink/zink_context.c:807: create_bvci: Assertion `bvci.format' failed."
+spec@arb_texture_buffer_object@formats (vs- arb),Crash
+
+spec@arb_texture_buffer_object@formats (fs- arb),Crash
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8_ALPHA8,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8_ALPHA8,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8UI_EXT,Fail
+
+spec@egl 1.4@eglterminate then unbind context,Fail
+
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
+
+spec@egl_khr_surfaceless_context@viewport,Fail
+
+spec@egl_mesa_configless_context@basic,Fail
+
+spec@ext_external_objects@vk-image-overwrite@RGB 10 A2 UINT optimal: Failed to initialize OpenGL FBO/RBO,Fail
+
+spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 16 msaa,Fail
+spec@ext_framebuffer_multisample@enable-flag,Fail
+spec@ext_framebuffer_multisample@interpolation 16 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 16 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 16 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 16 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail
+
+spec@ext_packed_float@query-rgba-signed-components,Fail
+
+spec@ext_transform_feedback@tessellation triangle_fan flat_first,Fail
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
+
+spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail
+spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail
+
+spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail
+
+# since Debian 12 (bookworm) uprev
+spec@arb_viewport_array@display-list,Fail
+glx@glx-multi-window-single-context,Fail
+spec@arb_timer_query@timestamp-get,Fail
+
+# SIGKILL
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-fs-getuniformdv,Crash
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-gs-getuniformdv,Crash
+
+spec@arb_fragment_layer_viewport@layer-no-gs,Fail
+
+# sparse_backing_alloc: Assertion `bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, ZINK_SPARSE_BUFFER_PAGE_SIZE)' failed
+KHR-GL46.sparse_texture_tests.SparseTextureCommitment,Crash
+
+# uprev Piglit in Mesa
+spec@arb_shader_storage_buffer_object@max-ssbo-size@fs,Crash
+spec@arb_shader_storage_buffer_object@max-ssbo-size@vs,Crash
+
diff --git a/src/gallium/drivers/zink/ci/zink-anv-tgl-flakes.txt b/src/gallium/drivers/zink/ci/zink-anv-tgl-flakes.txt
new file mode 100644
index 00000000000..9067c2b505a
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-anv-tgl-flakes.txt
@@ -0,0 +1,139 @@
+glx@glx-tfp
+# ???
+spec@arb_query_buffer_object@qbo
+spec@arb_query_buffer_object@qbo@query-GL_PRIMITIVES_SUBMITTED-ASYNC_CPU_READ_BEFORE-GL_UNSIGNED_INT64_ARB
+
+dEQP-GLES31.functional.image_load_store.buffer.atomic.comp_swap_r32i_return_value
+
+# "../src/gallium/drivers/zink/zink_fence.c:130: fence_wait: Assertion `fence->batch_id' failed."
+.*dEQP-EGL.functional.sharing.gles2.multithread.random_egl_sync.textures.copytexsubimage2d.4
+.*dEQP-EGL.functional.sharing.gles2.multithread.random_egl_sync.textures.texsubimage2d.4
+
+# deqp-egl: ../src/intel/vulkan/anv_batch_chain.c:520: cmd_buffer_chain_to_batch_bo: Assertion `batch->end == current_bbo->bo->map + current_bbo->bo->size' failed.
+dEQP-EGL.functional.swap_buffers_with_damage.resize.*
+
+# around the time of kernel warnings about dma_resv
+# "ZINK: vkQueueSubmit failed (VK_ERROR_DEVICE_LOST)
+# ../src/vulkan/runtime/vk_object.h:101: vk_object_base_assert_valid: Assertion `base == NULL || base->type == obj_type' failed."
+dEQP-EGL.functional.query_context.get_current_display.*
+dEQP-EGL.functional.query_context.get_current_.*_window
+dEQP-EGL.functional.query_context.query_context.*_window
+
+# Probably more of the above.
+dEQP-EGL.functional.color_clears.*_window
+
+# MESA: error: ZINK: vkQueueSubmit failed (VK_ERROR_DEVICE_LOST)
+dEQP-EGL.functional.render.*_window
+
+# ../../src/xcb_in.c:746: xcb_request_check: Assertion `!reply' failed.
+glx@glx-make-current
+
+# Expected 15 15 15, observed 0 0 0
+glx@glx-multithread-texture
+
+# Timed out after piglit success result?
+glx@glx-visuals-depth
+
+# XIO: fatal IO error 11 (Resource temporarily unavailable) on X server ":0"
+glx@glx-visuals-stencil
+
+# "../src/vulkan/wsi/wsi_common_x11.c:1387: Swapchain status changed to VK_ERROR_SURFACE_LOST_KHR
+# XIO: fatal IO error 11 (Resource temporarily unavailable) on X server ":0"
+# after 130 requests (128 known processed) with 0 events remaining."
+glx@glx-multi-context-single-window
+
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=.*
+
+# PIGLIT: {"subtest": {"Tessellation control-Fragment shader/'coherent' qualifier coherency test/256x256" : "pass"}}
+# Probe value at (166, 0, 0, 0)
+# Expected: 33.000000 33.000000 33.000000 33.000000
+# Observed: 77.000000 77.000000 77.000000 77.000000
+# PIGLIT: {"subtest": {"Tessellation evaluation-Geometry shader/'coherent' qualifier coherency test/256x256" : "fail"}}
+# PIGLIT: {"subtest": {"Tessellation evaluation-Fragment shader/'coherent' qualifier coherency test/256x256" : "pass"}}
+# PIGLIT: {"subtest": {"Geometry-Fragment shader/'coherent' qualifier coherency test/256x256" : "pass"}}
+# Probe value at (56, 15, 0, 0)
+# Expected: 33.000000 33.000000 33.000000 33.000000
+# Observed: 77.000000 77.000000 77.000000 77.000000
+# PIGLIT: {"subtest": {"Vertex-Tessellation control shader/'volatile' qualifier coherency test/256x256" : "fail"}}
+spec@arb_shader_image_load_store@coherency
+
+spec@glsl-4.00@execution@built-in-functions@fs-inverse-dmat4
+
+# Around the time of running these tests there are some warnings from the kernel in dma_resv.c, and at least
+# some failures look like not waiting for rendering to complete.
+# Because those tests sometimes pass, keep them here
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2_gles3.rgb565_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2_gles3.rgb888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2_gles3.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2.rgb565_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2.rgb888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1_gles2.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1.rgb565_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1.rgb888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles1.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles2.rgb565_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles2.rgb888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles2.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles3.rgb565_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles3.rgb888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.multi_thread.gles3.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.single_context.gles1.rgb565_pbuffer
+wayland-dEQP-EGL.functional.color_clears.single_context.gles1.rgb888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.single_context.gles1.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.single_context.gles2.rgb565_pbuffer
+wayland-dEQP-EGL.functional.color_clears.single_context.gles2.rgb888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.single_context.gles2.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.single_context.gles3.rgb565_pbuffer
+wayland-dEQP-EGL.functional.color_clears.single_context.gles3.rgb888_pbuffer
+wayland-dEQP-EGL.functional.color_clears.single_context.gles3.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.negative_api.create_pixmap_surface
+wayland-dEQP-EGL.functional.render.multi_thread.gles2_gles3.rgb565_pbuffer
+wayland-dEQP-EGL.functional.render.multi_thread.gles2_gles3.rgb888_pbuffer
+wayland-dEQP-EGL.functional.render.multi_thread.gles2_gles3.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.render.multi_thread.gles2.rgb565_pbuffer
+wayland-dEQP-EGL.functional.render.multi_thread.gles2.rgb888_pbuffer
+wayland-dEQP-EGL.functional.render.multi_thread.gles2.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.render.multi_thread.gles3.rgb565_pbuffer
+wayland-dEQP-EGL.functional.render.multi_thread.gles3.rgb888_pbuffer
+wayland-dEQP-EGL.functional.render.multi_thread.gles3.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.render.single_context.gles2.rgb565_pbuffer
+wayland-dEQP-EGL.functional.render.single_context.gles2.rgb888_pbuffer
+wayland-dEQP-EGL.functional.render.single_context.gles2.rgba8888_pbuffer
+wayland-dEQP-EGL.functional.render.single_context.gles3.rgb565_pbuffer
+wayland-dEQP-EGL.functional.render.single_context.gles3.rgb888_pbuffer
+wayland-dEQP-EGL.functional.render.single_context.gles3.rgba8888_pbuffer
+
+# Everything in wayland EGL is flaking with crashes since at least early 2023-09
+# Possibly https://gitlab.freedesktop.org/mesa/mesa/-/issues/9577
+wayland-dEQP-EGL.functional.*
+
+spec@arb_tessellation_shader@execution@variable-indexing@tcs-patch-vec4-index-wr
+
+spec@arb_timer_query@timestamp-get
+# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49554086
+# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49564619
+# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49529964
+# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49530854
+# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49445862
+# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49588884
+glx@glx_ext_no_config_context@no fbconfig
+
+# See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25861#note_2140498
+spec@ext_timer_query@time-elapsed
+
+# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/53978300
+spec@!opengl 2.0@occlusion-query-discard
+spec@arb_shader_storage_buffer_object@max-ssbo-size@fs
+spec@arb_shader_storage_buffer_object@max-ssbo-size@fsexceed
+spec@arb_shader_storage_buffer_object@max-ssbo-size@vs
+spec@arb_shader_storage_buffer_object@max-ssbo-size@vsexceed
+
+# uprev Piglit in Mesa
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=16
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=6
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=8
+spec@arb_query_buffer_object@.*CPU_READ_BEFORE.*
+spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Geometry shader/'coherent' qualifier coherency test/256x256
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-modifiers
+spec@ext_timer_query@time-elapsed
+
diff --git a/src/gallium/drivers/zink/ci/zink-anv-tgl-premerge-skips.txt b/src/gallium/drivers/zink/ci/zink-anv-tgl-premerge-skips.txt
new file mode 100644
index 00000000000..1d42c0bef49
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-anv-tgl-premerge-skips.txt
@@ -0,0 +1,21 @@
+# Skip some >1min tests in pre merge. These will be covered in the nightly jobs.
+KHR-GL46.packed_pixels.varied_rectangle.*
+KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize
+KHR-Single-GL46.arrays_of_arrays_gl.SizedDeclarationsPrimitive
+KHR-Single-GL46.arrays_of_arrays_gl.SubroutineArgumentAliasing2
+KHR-Single-GL46.arrays_of_arrays_gl.SubroutineArgumentAliasing3
+KHR-Single-GL46.arrays_of_arrays_gl.SubroutineArgumentAliasing4
+KHR-Single-GL46.enhanced_layouts.ssb_member_invalid_offset_alignment
+KHR-Single-GL46.enhanced_layouts.uniform_block_member_invalid_offset_alignment
+KHR-Single-GL46.enhanced_layouts.varying_array_components
+KHR-Single-GL46.enhanced_layouts.varying_structure_locations
+KHR-Single-GL46.enhanced_layouts.xfb_override_qualifiers_with_api
+KHR-Single-GL46.enhanced_layouts.xfb_stride
+spec@!opengl 1.1@copypixels-sync
+spec@!opengl 1.1@draw-sync
+spec@arb_compute_shader@local-id-explosion
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-fs-getuniformdv
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-gs-getuniformdv
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-vs-getuniformdv
+spec@ext_texture_array@fbo-depth-array
+spec@ext_texture_lod_bias@lodbias
diff --git a/src/gallium/drivers/zink/ci/zink-anv-tgl-skips.txt b/src/gallium/drivers/zink/ci/zink-anv-tgl-skips.txt
new file mode 100644
index 00000000000..241cdee87b3
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-anv-tgl-skips.txt
@@ -0,0 +1,48 @@
+#these take too long to finish in ci
+KHR-GL46.texture_swizzle.smoke
+KHR-GL46.copy_image.functional
+KHR-GL46.gpu_shader_fp64.builtin.inverse_dmat4
+KHR-GL46.gpu_shader_fp64.builtin.inverse_dmat3
+KHR-GL46.gpu_shader_fp64.fp64.varyings
+KHR-GL46.texture_swizzle.functional
+KHR-Single-GL46.arrays_of_arrays_gl.AtomicUsage
+KHR-Single-GL46.arrays_of_arrays_gl.InteractionFunctionCalls2
+KHR-Single-GL46.arrays_of_arrays_gl.SubroutineFunctionCalls2
+KHR-Single-GL46.enhanced_layouts.ssb_member_align_non_power_of_2
+KHR-Single-GL46.enhanced_layouts.uniform_block_member_align_non_power_of_2
+KHR-Single-GL46.enhanced_layouts.xfb_global_buffer
+KHR-Single-GL46.arrays_of_arrays_gl.SubroutineFunctionCalls1
+KHR-GL46.direct_state_access.framebuffers_texture_layer_attachment
+KHR-GL46.sparse_buffer_tests.BufferStorageTest
+
+# Definitely shouldn't take this long, but times out at 3 minutes.
+glx@glx-visuals-depth$
+glx@glx-visuals-stencil$
+
+.*built-in-functions@.*dmat[34].*
+.*built-in-functions@.*-op-div-.*dmat.*
+.*built-in-functions@fs-mod-dvec4-dvec4
+.*fs-isnan-dvec
+.*gs-isnan-dvec
+.*vs-isnan-dvec
+.*conversion-implicit.*dmat.*
+
+spec@egl_nok_texture_from_pixmap@basic
+
+# implicit modifier selection is not currently supported
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_vyuy
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv
diff --git a/src/gallium/drivers/zink/ci/zink-anv-tgl-validation-settings.txt b/src/gallium/drivers/zink/ci/zink-anv-tgl-validation-settings.txt
new file mode 100644
index 00000000000..6615ba5181e
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-anv-tgl-validation-settings.txt
@@ -0,0 +1,148 @@
+# Please include a comment with the log message and a testcase triggering each
+# VUID at the bottom of the file.
+khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,UNASSIGNED-CoreValidation-Shader-InconsistentSpirv,VUID-vkDestroyDevice-device-00378,VUID-VkShaderModuleCreateInfo-pCode-01377,VUID-RuntimeSpirv-Location-06272,VUID-VkGraphicsPipelineCreateInfo-renderPass-06590,VUID-VkGraphicsPipelineCreateInfo-Geometry-07725,VUID-vkCmdDrawMultiIndexedEXT-format-07753,UNASSIGNED-CoreValidation-Shader-InterfaceTypeMismatch,VUID-RuntimeSpirv-OpEntryPoint-07754,VUID-VkShaderModuleCreateInfo-pCode-01379,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-VkGraphicsPipelineCreateInfo-topology-00737,VUID-VkGraphicsPipelineCreateInfo-pStages-00736,VUID-vkCmdCopyImage-srcImage-07743,VUID-vkCmdDrawMultiIndexedEXT-format-07753,VUID-vkCmdDrawMultiEXT-pDepthAttachment-06181,VUID-vkCmdDrawMultiEXT-pStencilAttachment-06182,VUID-vkCmdDrawMultiIndexedEXT-pDepthAttachment-06181,VUID-vkCmdDrawMultiIndexedEXT-pStencilAttachment-06182,VUID-vkDestroyDevice-device-05137,VUID-vkCmdDrawMultiEXT-Input-08734
+khronos_validation.report_flags = error
+khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
+VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
+khronos_validation.printf_buffer_size = 40960
+khronos_validation.printf_to_stdout = true
+khronos_validation.log_filename = stdout
+
+# VUID-VkPhysicalDeviceProperties2-pNext-pNext
+# VUID-VkDeviceCreateInfo-pNext-pNext
+# never error due to unrecognized extensions
+
+# KHR-Single-GL46.enhanced_layouts.xfb_struct_explicit_location:
+# [ UNASSIGNED-CoreValidation-Shader-InconsistentSpirv ] Object 0: handle =
+# 0x556fd2b6e190, type = VK_OBJECT_TYPE_DEVICE; | MessageID = 0x6bbb14 | SPIR-V
+# module not valid: The Component Type of Vector 1 must be the same as ResultType.
+
+# Intermittent, probably the end of a caselist so not tied to a specific known test:
+# [ VUID-vkDestroyDevice-device-00378 ] Object 0: handle = 0x55c458362820, type =
+# VK_OBJECT_TYPE_DEVICE; Object 1: handle = 0x2a7f70000000053, type =
+# VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT; | MessageID = 0x71500fba | OBJ ERROR : For
+# VkDevice 0x55c458362820[], VkDescriptorSetLayout 0x2a7f70000000053[] has not
+# been destroyed. The Vulkan spec states: All child objects created on device must
+# have been destroyed prior to destroying device
+# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkDestroyDevice-device-00378)
+
+# KHR-Single-GL46.enhanced_layouts.xfb_struct_explicit_location:
+# [ VUID-VkShaderModuleCreateInfo-pCode-01377 ] Object 0: handle =
+# 0x563a677573b0, type = VK_OBJECT_TYPE_DEVICE; | MessageID = 0x5821254b | SPIR-V
+# module not valid: The Component Type of Vector 1 must be the same as ResultType.
+# %142 = OpVectorShuffle %v2uint %141 %141 0 1
+# The Vulkan spec states: pCode must point to either valid SPIR-V code, formatted and packed as described by the Khronos SPIR-V Specification or valid GLSL code which must be written to the GL_KHR_vulkan_glsl extension specification (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkShaderModuleCreateInfo-pCode-01377)
+
+# KHR-GL46.geometry_shader.limits.max_output_components
+# [ VUID-RuntimeSpirv-Location-06272 ] Object 0: handle = 0x3a6cbb0000000025,
+# type = VK_OBJECT_TYPE_SHADER_MODULE; | MessageID = 0xa3614f8b | Invalid Pipeline
+# CreateInfo State: Fragment shader exceeds
+# VkPhysicalDeviceLimits::maxFragmentInputComponents of 116 components by 4
+# components The Vulkan spec states: The sum of Location and the number of
+# locations the variable it decorates consumes must be less than or equal to the
+# value for the matching {ExecutionModel} defined in Shader Input and Output
+# Locations
+# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-RuntimeSpirv-Location-06272)
+
+# KHR-GL46.gpu_shader_fp64.builtin.roundeven_dvec4:
+# [ VUID-VkGraphicsPipelineCreateInfo-renderPass-06590 ] Object 0: handle =
+# 0x5647ce5e6ac0, type = VK_OBJECT_TYPE_DEVICE; | MessageID = 0xd835f001 |
+# vkCreateGraphicsPipelines(): pCreateInfos[0] does contains fragment shader state
+# and no fragment output state, pDepthStencilState does not point to a valid
+# VkPipelineDepthStencilStateCreateInfo struct. The Vulkan spec states: If
+# renderPass is VK_NULL_HANDLE and the pipeline is being created with fragment
+# shader state but not fragment output interface state, pDepthStencilState must be
+# a valid pointer to a valid VkPipelineDepthStencilStateCreateInfo structure
+# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06590)
+
+# KHR-GL46.shader_image_size.basic-nonMS-gs-float:
+# [ VUID-VkGraphicsPipelineCreateInfo-Geometry-07725 ] Object 0: handle =
+# 0xa4ad110000039145, type = VK_OBJECT_TYPE_SHADER_MODULE; | MessageID =
+# 0x64e29d24 | vkCreateGraphicsPipelines(): shaderTessellationAndGeometryPointSize
+# is enabled, but PointSize is not written in the Geometry shader. The Vulkan spec
+# states: If the pipeline is being created with a Geometry {ExecutionModel}, uses
+# the OutputPoints {ExecutionMode}, and shaderTessellationAndGeometryPointSize is
+# enabled, a PointSize decorated variable must be written to
+# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkGraphicsPipelineCreateInfo-Geometry-07725)
+
+# KHR-GL46.sample_variables.mask.rgba8i.samples_8.mask_3:
+# [ VUID-vkCmdDrawMultiIndexedEXT-format-07753 ] Object 0: handle =
+# 0x535b660000000043, type = VK_OBJECT_TYPE_DESCRIPTOR_SET; | MessageID =
+# 0xd35852d3 | vkCmdDrawMultiIndexedEXT: Descriptor set VkDescriptorSet
+# 0x535b660000000043[] in binding #128 index 0 requires SINT component type, but
+# bound descriptor format is VK_FORMAT_R8G8B8A8_UNORM (VkImageView
+# 0x9638f80000000036[]). The Vulkan spec states: If a VkImageView is accessed as a
+# result of this command, then the image view's format must match the numeric
+# format from the Sampled Type operand of the OpTypeImage as described in the
+# SPIR-V Sampled Type column of the Interpretation of Numeric Format table
+# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdDrawMultiIndexedEXT-format-07753)
+
+# KHR-Single-GL46.enhanced_layouts.varying_locations:
+# [ UNASSIGNED-CoreValidation-Shader-InterfaceTypeMismatch ] Object 0: handle =
+# 0x5eb05e000000003b, type = VK_OBJECT_TYPE_SHADER_MODULE; | MessageID =
+# 0xb6cf33fe | Type mismatch on location 0.0, between geometry shader and fragment
+# shader: 'ptr to Output vec2 of uint64' vs 'ptr to Input struct of (vec4 of
+# float32, vec4 of float32)'
+
+# KHR-Single-GL46.enhanced_layouts.varying_locations
+# [ VUID-RuntimeSpirv-OpEntryPoint-07754 ]
+# vkCreateGraphicsPipelines(): pCreateInfos[0] Type mismatch on Location 0 Component 0, between
+# VK_SHADER_STAGE_GEOMETRY_BIT stage:
+# pointer to Output ->
+# vec2 of uint64
+# VK_SHADER_STAGE_FRAGMENT_BIT stage:
+# pointer to Input ->
+# struct of {
+# vec4 of float32
+# vec4 of float32
+# }
+# The Vulkan spec states: Any user-defined variables between the OpEntryPoint
+# of two shader stages must have the same type and width for each Component
+# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-RuntimeSpirv-OpEntryPoint-07754)
+
+# KHR-Single-GL46.enhanced_layouts.xfb_struct_explicit_location
+# [ VUID-VkShaderModuleCreateInfo-pCode-01379 ]
+# SPIR-V module not valid: The Component Type of Vector 1 must be the same as ResultType.
+# %142 = OpVectorShuffle %v2uint %141 %141 0 1
+
+# KHR-Single-GL46.enhanced_layouts.varying_locations
+#
+# VUID-RuntimeSpirv-OpEntryPoint-08743(ERROR / SPEC): msgNum: -1986897773 -
+# Validation Error: [ VUID-RuntimeSpirv-OpEntryPoint-08743 ]
+# vkCreateGraphicsPipelines(): pCreateInfos[0] VK_SHADER_STAGE_FRAGMENT_BIT declared input at
+# Location 1 Comonent 2 but it is not an Output declared in VK_SHADER_STAGE_VERTEX_BIT
+# The Vulkan spec states: Any user-defined variables shared between the OpEntryPoint of two shader stages,
+# and declared with Input as its {StorageClass} for the subsequent shader stage,
+# must have all Location slots and Component words declared in the preceding shader stage's
+# OpEntryPoint with Output as the {StorageClass}
+# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-RuntimeSpirv-OpEntryPoint-08743)
+#
+# VVL bug https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/5735
+
+
+# VUID-VkGraphicsPipelineCreateInfo-pStages-00736
+# VUID-VkGraphicsPipelineCreateInfo-topology-00737
+# spec bug https://gitlab.khronos.org/vulkan/vulkan/-/merge_requests/5916
+
+
+# VUID-vkCmdCopyImage-srcImage-07743
+# spec bug
+
+# VUID-vkCmdDrawMultiIndexedEXT-format-07753
+# KHR-GL46.shader_ballot_tests.ShaderBallotFunctionBallot
+# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/4488
+
+
+# VUID-vkCmdDrawMultiEXT-pDepthAttachment-06181
+# VUID-vkCmdDrawMultiEXT-pStencilAttachment-06182
+# VUID-vkCmdDrawMultiIndexedEXT-pDepthAttachment-06181
+# VUID-vkCmdDrawMultiIndexedEXT-pStencilAttachment-06182
+# spec issue
+
+
+# VUID-vkDestroyDevice-device-05137
+# some kind of bug
+
+# VUID-vkCmdDrawMultiEXT-Input-08734
+# not sure if cts bug...
+# KHR-GL46.direct_state_access.vertex_arrays_attribute_format
diff --git a/src/gallium/drivers/zink/ci/zink-freedreno-a618-fails.txt b/src/gallium/drivers/zink/ci/zink-freedreno-a618-fails.txt
new file mode 100644
index 00000000000..18c4d4cc59e
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-freedreno-a618-fails.txt
@@ -0,0 +1,7 @@
+# piglit xfb tests
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
+
+dEQP-GLES3.functional.shaders.matrix.inverse.dynamic.lowp_mat4_float_vertex,Fail
+dEQP-GLES3.functional.shaders.matrix.inverse.dynamic.mediump_mat4_float_vertex,Fail
+dEQP-GLES31.functional.texture.border_clamp.range_clamp.linear_float_color,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-freedreno-a618-flakes.txt b/src/gallium/drivers/zink/ci/zink-freedreno-a618-flakes.txt
new file mode 100644
index 00000000000..11898356cb4
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-freedreno-a618-flakes.txt
@@ -0,0 +1,63 @@
+# No GPU hang, looked like cache flushing fail. Happens on basically all format
+# families.
+dEQP-GLES3.functional.fbo.blit.conversion..*
+
+# No GPU hang, looks like one 2x2 quad was colored slightly off?
+dEQP-GLES3.functional.fbo.msaa.4_samples.depth_component16
+
+dEQP-GLES3.functional.transform_feedback.random_full_array_capture.separate.lines.4
+
+# Lines were the wrong colors on iteration 4/10.
+dEQP-GLES3.functional.transform_feedback.random.separate.lines.4
+
+# Looks like maybe the end of level 0 face 5 got overwritten, others were all
+# OK. source is 7 levels 65x65x9, dst is 7 levels 65x65x1 cubemap
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16i_r16i.texture3d_to_cubemap
+
+# level 0 face 5 again, corruption in the middle this time though. destination is 64x63, 1 level.
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16f_rg8.cubemap_to_texture2d_array
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16f_rg8i.cubemap_to_renderbuffer
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16f_r16ui.cubemap_to_renderbuffer
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16ui_r16ui.cubemap_to_renderbuffer
+
+# "MESA: error: ZINK: vkQueueSubmit failed (VK_ERROR_DEVICE_LOST)"
+# Not preceded by a GPU hang report? One case had these faults, though:
+# *** gpu fault: ttbr0=000000017505d000 iova=0000000114c89800 dir=WRITE type=TRANSLATION source=CCU (0,0,0,1)
+# *** gpu fault: ttbr0=000000017505d000 iova=0000000114c5c8f0 dir=WRITE type=TRANSLATION source=UNKNOWN (0,0,0,1)
+dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba_astc_12x12_khr_rgba32f.cubemap_to_cubemap
+dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba_astc_6x5_khr_rgba32f.cubemap_to_cubemap
+dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba32f_srgb8_alpha8_astc_12x12_khr.texture3d_to_texture2d
+dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.srgb8_alpha8_astc_10x5_khr_rgba32i.texture2d_array_to_cubemap
+dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.srgb8_alpha8_astc_8x6_khr_rgba32ui.cubemap_to_cubemap
+
+# Found when expanding coverage on 2022/11/17, or randomly flaking
+dEQP-GLES31.functional.separate_shader.random.79
+dEQP-GLES3.functional.transform_feedback.random_full_array_capture.separate.lines.4
+dEQP-GLES3.functional.texture.specification.texsubimage3d_pbo.rgba4_2d_array
+dEQP-GLES3.functional.texture.format.sized.cube.rgba4_npot
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.*cubemap.*
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.*texture2d_array.*
+dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.*astc.*cubemap.*
+dEQP-GLES3.functional.fbo.msaa.2_samples.depth_component16
+
+# ?
+dEQP-GLES31.functional.tessellation.invariance.primitive_set.isolines_fractional_odd_spacing_ccw
+KHR-GLES31.core.texture_cube_map_array.color_depth_attachments
+dEQP-GLES31.functional.texture.specification.texstorage3d.format.rgb5_a1_cube_array
+dEQP-GLES31.functional.copy_image.compressed.viewclass_astc_4x4_rgba.rgba_astc_4x4_khr_srgb8_alpha8_astc_4x4_khr.texture2d_array_to_texture3d
+dEQP-GLES31.functional.image_load_store.2d.format_reinterpret.r32ui_r32f
+
+dEQP-GLES3.functional.fbo.color.tex2d.rgb565
+
+# https://gitlab.freedesktop.org/mesa/mesa/-/issues/9707
+dEQP-GLES3.functional.texture.specification.teximage2d_align.cube_rgba4_51_2
+# ci-collate: Issue found in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/49638414
+dEQP-GLES31.functional.fbo.color.texcubearray.rgb565
+
+# uprev Piglit in Mesa
+dEQP-GLES3.functional.fbo.blit.conversion.rgba4_to_rgb10_a2
+dEQP-GLES3.functional.fbo.blit.conversion.rgba8_to_r16f
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16f_rg8.texture2d_array_to_texture3d
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16f_rg8ui.texture2d_array_to_texture3d
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.r16ui_r16i.texture2d_array_to_texture2d_array
+
diff --git a/src/gallium/drivers/zink/ci/zink-freedreno-a618-skips.txt b/src/gallium/drivers/zink/ci/zink-freedreno-a618-skips.txt
new file mode 100644
index 00000000000..5ca63552440
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-freedreno-a618-skips.txt
@@ -0,0 +1,4 @@
+# Can run over 60 seconds
+KHR-GLES31.core.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize
+KHR-GLES31.core.tessellation_shader.vertex.vertex_ordering
+KHR-GLES31.core.tessellation_shader.vertex.vertex_spacing
diff --git a/src/gallium/drivers/zink/ci/zink-lvp-fails.txt b/src/gallium/drivers/zink/ci/zink-lvp-fails.txt
new file mode 100644
index 00000000000..ba7682dc29e
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-lvp-fails.txt
@@ -0,0 +1,147 @@
+# #6115
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash
+
+# #6322
+spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
+
+#kopper regressions/changes
+spec@egl_chromium_sync_control@conformance,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail
+
+# this test tries to be error-compatible with nvidia. spoiler: mesa isn't, and no driver can pass it
+glx@glx_arb_create_context@invalid flag,Fail
+
+glx@glx-swap-pixmap-bad,Fail
+glx@glx-visuals-depth,Crash
+glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail
+glx@glx_arb_create_context_no_error@no error,Fail
+
+glx@glx_ext_import_context@free context,Fail
+glx@glx_ext_import_context@get context id,Fail
+glx@glx_ext_import_context@get current display,Fail
+glx@glx_ext_import_context@import context- multi process,Fail
+glx@glx_ext_import_context@import context- single process,Fail
+glx@glx_ext_import_context@imported context has same context id,Fail
+glx@glx_ext_import_context@make current- multi process,Fail
+glx@glx_ext_import_context@make current- single process,Fail
+glx@glx_ext_import_context@query context info,Fail
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+spec@!opengl 1.1@polygon-mode,Fail
+spec@!opengl 1.1@polygon-mode-facing,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+spec@!opengl 1.2@copyteximage 3d,Fail
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
+spec@arb_pipeline_statistics_query@arb_pipeline_statistics_query-frag,Fail
+spec@arb_point_sprite@arb_point_sprite-mipmap,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 2,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 4,Fail
+spec@arb_sample_shading@samplemask 2,Fail
+spec@arb_sample_shading@samplemask 2@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms partition,Fail
+spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all,Fail
+spec@arb_sample_shading@samplemask 2 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4,Fail
+spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms partition,Fail
+spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all,Fail
+spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail
+
+spec@egl 1.4@eglterminate then unbind context,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
+spec@egl_khr_surfaceless_context@viewport,Fail
+spec@egl_mesa_configless_context@basic,Fail
+spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 msaa,Fail
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 upsample,Fail
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 msaa,Fail
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 upsample,Fail
+spec@ext_framebuffer_multisample@enable-flag,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
+spec@ext_packed_float@query-rgba-signed-components,Fail
+
+spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail
+spec@arb_post_depth_coverage@arb_post_depth_coverage-multisampling,Fail
+spec@arb_shader_image_load_store@early-z,Fail
+spec@arb_shader_image_load_store@early-z@occlusion query test/early-z pass,Fail
+
+spec@arb_shader_image_load_store@execution@image-array-out-of-bounds-access-load,Crash
+spec@arb_shader_image_load_store@execution@image-array-out-of-bounds-access-store,Crash
+
+#literally no driver can pass these
+spec@!opengl 1.0@rasterpos,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
+
+spec@arb_tessellation_shader@execution@gs-primitiveid-instanced,Fail
+spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail
+spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail
+
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
+
+# Debian 12 CI update, see https://gitlab.freedesktop.org/mesa/mesa/-/issues/9072
+spec@ext_packed_float@multisample-formats 4 gl_ext_packed_float,Crash
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Crash
+spec@!opengl 1.0@gl-1.0-dlist-beginend,Crash
+spec@nv_texture_barrier@blending-in-shader,Crash
+
+spec@arb_viewport_array@display-list,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-lvp-flakes.txt b/src/gallium/drivers/zink/ci/zink-lvp-flakes.txt
new file mode 100644
index 00000000000..a883379893e
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-lvp-flakes.txt
@@ -0,0 +1,40 @@
+dEQP-GLES2.functional.texture.filtering.cube.nearest_linear_mirror_l8_pot
+spec@khr_debug@push-pop-group_gl.*
+glx@glx-multi-window-single-context
+
+# "free(): invalid next size (fast)"
+# since it's heap corruption, it may or may not appear in a particular run
+spec@arb_compute_variable_group_size@local-size
+
+# https://gitlab.freedesktop.org/mesa/mesa/-/jobs/20908454
+# "X connection to :99 broken (explicit kill or server shutdown)."
+glx@glx-multi-context-ib-1
+
+# depth visuals
+glx@glx-visuals-depth
+glx@glx-visuals-stencil
+
+# mysterious
+glx@glx-shader-sharing
+
+spec@arb_fragment_program@no-newline
+# glx-destroycontext-1: ../../src/xcb_conn.c:215: write_vec: Assertion `!c->out.queue_len' failed.
+glx@glx-destroycontext-1
+
+glx@glx-multithread-texture
+
+# does not happen very often, but rarely does
+KHR-GL46.limits.max_fragment_interpolation_offset
+
+# no output timeout, probably stuck in some X11 connection thing
+spec@ext_framebuffer_multisample@accuracy all_samples depth_resolve depthstencil
+
+# segfault in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/48719777 and others
+spec@ext_texture_array@texsubimage array
+
+# crash in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/48476882 and others
+KHR-GL46.layout_location.sampler_2d_shadow
+
+# uprev Piglit in Mesa
+spec@ext_framebuffer_multisample@accuracy all_samples depth_draw small depthstencil
+
diff --git a/src/gallium/drivers/zink/ci/piglit-zink-lvp-skips.txt b/src/gallium/drivers/zink/ci/zink-lvp-skips.txt
index 623645c36db..7226486adfb 100644
--- a/src/gallium/drivers/zink/ci/piglit-zink-lvp-skips.txt
+++ b/src/gallium/drivers/zink/ci/zink-lvp-skips.txt
@@ -2,16 +2,18 @@
# non-zero-length and not starting with '#', will regex match to
# delete lines from the test list. Be careful.
+KHR-GL32.texture_size_promotion.functional
+
+# this is just broken.
+KHR-GL46.shader_ballot_tests.ShaderBallotBitmasks
+KHR-GL46.shader_ballot_tests.ShaderBallotFunctionRead
+
# ignores copied from the old runner script
spec@arb_map_buffer_alignment@arb_map_buffer_alignment-map-invalidate-range
-glx@glx-make-current
spec@arb_timer_query.*
spec@arb_sample_shading@builtin-gl-sample-mask
spec@glsl-1.30@execution@tex-miplevel-selection.*
-# only supported if Piglit is using GLUT
-spec@!opengl 1.1@windowoverlap
-
# This test doesn't even seem to exist, but piglit adds it to a group...?
spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match
@@ -28,4 +30,18 @@ spec@arb_compute_shader@local-id-explosion
# I can't reproduce these crashes locally
# even after running them in loops for 4+ hours, so disable for now
-spec@arb_shader_texture_lod@execution@tex-miplevel-selection.*
+.*tex-miplevel-selection.*
+
+# these are insanely long
+KHR-GL46.copy_image.functional
+KHR-GL46.texture_swizzle.smoke
+KHR-GL46.texture_swizzle.functional
+
+# Kopper regression
+glx@glx-tfp
+
+# These tests started hitting timeouts when we upgraded LLVM from v11 to 13
+spec@arb_texture_rg@fbo-blending-formats
+
+#these need format conversions that gallium doesn't implement yet
+spec@arb_texture_buffer_object@formats.*arb.*
diff --git a/src/gallium/drivers/zink/ci/zink-lvp-validation-settings.txt b/src/gallium/drivers/zink/ci/zink-lvp-validation-settings.txt
new file mode 100644
index 00000000000..e3e3c5546df
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-lvp-validation-settings.txt
@@ -0,0 +1,62 @@
+# Please include a comment with the log message and a testcase triggering each
+# VUID at the bottom of the file.
+khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiEXT-None-02699,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdPipelineBarrier2-shaderTileImageColorReadAccess-08718,VUID-VkGraphicsPipelineCreateInfo-flags-06482,VUID-vkCmdPipelineBarrier2-None-08719
+khronos_validation.report_flags = error
+khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
+VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
+khronos_validation.printf_buffer_size = 40960
+khronos_validation.printf_to_stdout = true
+khronos_validation.log_filename = stdout
+
+# VUID-VkPhysicalDeviceProperties2-pNext-pNext
+# VUID-VkDeviceCreateInfo-pNext-pNext
+# never error due to unrecognized extensions
+
+
+# KHR-GL46.shader_image_load_store.basic-allTargets-atomic:
+# [ VUID-vkCmdDrawMultiEXT-viewType-07752 ] Object 0: handle =
+# 0x5581c500000000d5, type = VK_OBJECT_TYPE_DESCRIPTOR_SET; | MessageID =
+# 0xacde5967 | vkCmdDrawMultiEXT: Descriptor set VkDescriptorSet
+# 0x5581c500000000d5[] in binding #129 index 0 requires an image view of type
+# VK_IMAGE_VIEW_TYPE_CUBE but got VkImageView 0x359e9300000000cb[] which is of
+# type VK_IMAGE_VIEW_TYPE_CUBE_ARRAY. The Vulkan spec states: If a VkImageView is
+# accessed as a result of this command, then the image view's viewType must match
+# the Dim operand of the OpTypeImage as described in Instruction/Sampler/Image
+# View Validation
+# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdDrawMultiEXT-viewType-07752)
+
+# KHR-Single-GL46.enhanced_layouts.varying_array_locations:
+#
+# [ VUID-RuntimeSpirv-Location-06272 ] Object 0: handle =
+# 0x8f5f070000000095, type = VK_OBJECT_TYPE_SHADER_MODULE; | MessageID =
+# 0xa3614f8b | Invalid Pipeline CreateInfo State: Geometry shader exceeds
+# VkPhysicalDeviceLimits::maxGeometryInputComponents of 64 components by
+# 1 components The Vulkan spec states: The sum of Location and the number
+# of locations the variable it decorates consumes must be less than or
+# equal to the value for the matching {ExecutionModel} defined in Shader
+# Input and Output Locations
+# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-RuntimeSpirv-Location-06272)
+
+# dEQP-GLES31.functional.separate_shader.random.69
+#
+# UNASSIGNED-CoreValidation-Shader-MissingOutput(ERROR / SPEC): msgNum: 1086655814 -
+# Validation Error: [ UNASSIGNED-CoreValidation-Shader-MissingOutput ]
+# vkCreateGraphicsPipelines(): pCreateInfos[0] VK_SHADER_STAGE_FRAGMENT_BIT declared input at Location 0
+# Comonent 2 but it is not an Output declared in VK_SHADER_STAGE_VERTEX_BIT
+#
+# VUID-RuntimeSpirv-OpEntryPoint-08743(ERROR / SPEC): msgNum: -1986897773 -
+# Validation Error: [ VUID-RuntimeSpirv-OpEntryPoint-08743 ]
+# vkCreateGraphicsPipelines(): pCreateInfos[0] VK_SHADER_STAGE_FRAGMENT_BIT declared input at
+# Location 1 Comonent 2 but it is not an Output declared in VK_SHADER_STAGE_VERTEX_BIT
+# The Vulkan spec states: Any user-defined variables shared between the OpEntryPoint of two shader stages,
+# and declared with Input as its {StorageClass} for the subsequent shader stage,
+# must have all Location slots and Component words declared in the preceding shader stage's
+# OpEntryPoint with Output as the {StorageClass}
+# (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-RuntimeSpirv-OpEntryPoint-08743)
+#
+# VVL bug https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/5735
+
+# VVL missing support for KHR_dynamic_rendering_local_read
+# VUID-vkCmdPipelineBarrier2-shaderTileImageColorReadAccess-08718
+# VUID-VkGraphicsPipelineCreateInfo-flags-06482
+# VUID-vkCmdPipelineBarrier2-None-08719
diff --git a/src/gallium/drivers/zink/ci/zink-nv-fails.txt b/src/gallium/drivers/zink/ci/zink-nv-fails.txt
new file mode 100644
index 00000000000..ca151922eac
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-nv-fails.txt
@@ -0,0 +1,2 @@
+KHR-GL46.sparse_texture2_tests.SparseTexture2Commitment,Fail
+KHR-GL46.sparse_texture2_tests.SparseTexture2Lookup,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-nv-flakes.txt b/src/gallium/drivers/zink/ci/zink-nv-flakes.txt
new file mode 100644
index 00000000000..98e94fdb9df
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-nv-flakes.txt
@@ -0,0 +1,2 @@
+# ooms
+dEQP-GLES31.functional.draw_indirect.compute_interop.large.drawelements_separate_grid_1200x1200_drawcount_1
diff --git a/src/gallium/drivers/zink/ci/zink-nv-skips.txt b/src/gallium/drivers/zink/ci/zink-nv-skips.txt
new file mode 100644
index 00000000000..3ec8c63df51
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-nv-skips.txt
@@ -0,0 +1,4 @@
+# these take forever
+KHR-GL46.gpu_shader_fp64.fp64.max_uniform_components
+KHR-GL46.texture_swizzle.smoke
+KHR-GL46.copy_image.functional
diff --git a/src/gallium/drivers/zink/ci/zink-nvk-fails.txt b/src/gallium/drivers/zink/ci/zink-nvk-fails.txt
new file mode 100644
index 00000000000..6961c59b20f
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-nvk-fails.txt
@@ -0,0 +1,902 @@
+# nvk sparse needs more testing
+KHR-GL46.sparse_texture2_tests.SparseTexture2Commitment,Fail
+KHR-GL46.sparse_texture2_tests.SparseTexture2Lookup,Fail
+KHR-GL46.sparse_texture2_tests.UncommittedRegionsAccess,Fail
+KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor,Fail
+KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail
+
+# regressions ?
+spec@arb_bindless_texture@execution@images@ubo-named-block,Fail
+spec@arb_get_texture_sub_image@arb_get_texture_sub_image-get,Fail
+spec@egl_ext_device_query@conformance,Fail
+
+dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bit_count_per_two_samples.multisample_rbo_4,Fail
+dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bit_count_per_two_samples.multisample_rbo_8,Fail
+dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bit_count_per_two_samples.multisample_texture_4,Fail
+dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bit_count_per_two_samples.multisample_texture_8,Fail
+dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bits_unique_per_two_samples.multisample_texture_4,Fail
+dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.bits_unique_per_two_samples.multisample_texture_8,Fail
+
+glx@glx-multi-window-single-context,Fail
+glx@glx-visuals-depth,Crash
+glx@glx-visuals-stencil,Crash
+glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail
+glx@glx_arb_create_context_no_error@no error,Fail
+glx@glx_arb_create_context_robustness@invalid reset notification strategy,Fail
+glx@glx_ext_import_context@free context,Fail
+glx@glx_ext_import_context@get context id,Fail
+glx@glx_ext_import_context@get current display,Fail
+glx@glx_ext_import_context@import context- multi process,Fail
+glx@glx_ext_import_context@import context- single process,Fail
+glx@glx_ext_import_context@imported context has same context id,Fail
+glx@glx_ext_import_context@make current- multi process,Fail
+glx@glx_ext_import_context@make current- single process,Fail
+glx@glx_ext_import_context@query context info,Fail
+glx@glx_ext_no_config_context@no fbconfig,Fail
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+spec@!opengl 1.0@rasterpos,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
+spec@!opengl 1.1@line-smooth-coverage,Fail
+spec@!opengl 1.1@line-smooth-stipple,Fail
+spec@!opengl 1.1@max-texture-size,Crash
+spec@!opengl 1.1@polygon-mode,Fail
+spec@!opengl 1.1@polygon-mode-facing,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+spec@!opengl 1.2@copyteximage 3d,Fail
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail
+spec@!opengl es 3.0@gles-3.0-transform-feedback-uniform-buffer-object,Fail
+spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-mixed-const-non-const-uniform-index,Fail
+spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-mixed-const-non-const-uniform-index2,Fail
+spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-non-const-uniform-index,Fail
+spec@arb_arrays_of_arrays@execution@ubo@fs-const,Fail
+spec@arb_arrays_of_arrays@execution@ubo@fs-const-explicit-binding,Fail
+spec@arb_arrays_of_arrays@execution@ubo@fs-mixed-const-nonconst,Fail
+spec@arb_arrays_of_arrays@execution@ubo@fs-nonconst,Fail
+spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
+spec@arb_bindless_texture@execution@samplers@ubo-binding-samplers-conflict,Fail
+spec@arb_buffer_storage@bufferstorage-persistent draw,Fail
+spec@arb_buffer_storage@bufferstorage-persistent read,Fail
+spec@arb_buffer_storage@bufferstorage-persistent_gles3 draw,Fail
+spec@arb_buffer_storage@bufferstorage-persistent_gles3 read,Fail
+spec@arb_fragment_layer_viewport@layer-gs-writes-out-of-range,Fail
+spec@arb_fragment_layer_viewport@viewport-gs-writes-out-of-range,Fail
+spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
+spec@arb_gl_spirv@execution@ubo@aoa-2,Fail
+spec@arb_gl_spirv@execution@ubo@array-complex,Fail
+spec@arb_gl_spirv@execution@ubo@array-different-array-stride-ubo,Fail
+spec@arb_gl_spirv@execution@ubo@array-inside-ubo,Fail
+spec@arb_gl_spirv@execution@ubo@array-inside-ubo-copy,Fail
+spec@arb_gl_spirv@execution@ubo@array-of-arrays-inside-ubo,Fail
+spec@arb_gl_spirv@execution@ubo@matrix@complex,Fail
+spec@arb_gl_spirv@execution@ubo@simple,Fail
+spec@arb_gl_spirv@execution@ubo@two-stages,Fail
+spec@arb_gl_spirv@linker@uniform@multisampler,Crash
+spec@arb_gl_spirv@linker@uniform@multisampler-array,Crash
+spec@arb_gpu_shader5@execution@ubo_array_indexing@fs-array-nonconst,Fail
+spec@arb_gpu_shader5@execution@ubo_array_indexing@gs-array-nonconst,Fail
+spec@arb_gpu_shader5@execution@ubo_array_indexing@vs-array-nonconst,Fail
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-array-copy,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-double-array-const-index,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-double-array-variable-index,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-doubles,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-doubles-float-mixed,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-nested-struct,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-direct-1,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-load.indirect.1,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-load.indirect.2,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-load.indirect.3,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@gs-array-copy,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@gs-double-array-const-index,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@gs-double-array-variable-index,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@gs-doubles-float-mixed,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@gs-nested-struct,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@vs-array-copy,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@vs-double-array-const-index,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@vs-double-array-variable-index,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@vs-doubles,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@vs-doubles-float-mixed,Fail
+spec@arb_gpu_shader_fp64@uniform_buffers@vs-nested-struct,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
+spec@arb_sample_locations@test,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_shading@arb_sample_shading-builtin-gl-sample-mask-mrt-alpha-to-coverage,Fail
+spec@arb_sample_shading@samplemask 2,Fail
+spec@arb_sample_shading@samplemask 2 all,Fail
+spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@0.500000 partition,Fail
+spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@1.000000 partition,Fail
+spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@0.500000 partition,Fail
+spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@1.000000 partition,Fail
+spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms partition,Fail
+spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4,Fail
+spec@arb_sample_shading@samplemask 4 all,Fail
+spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@0.250000 partition,Fail
+spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@0.500000 partition,Fail
+spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@1.000000 partition,Fail
+spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.250000 partition,Fail
+spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.500000 partition,Fail
+spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@1.000000 partition,Fail
+spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms partition,Fail
+spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6,Fail
+spec@arb_sample_shading@samplemask 6 all,Fail
+spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.125000 partition,Fail
+spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.250000 partition,Fail
+spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.500000 partition,Fail
+spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@1.000000 partition,Fail
+spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.125000 partition,Fail
+spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.250000 partition,Fail
+spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.500000 partition,Fail
+spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@1.000000 partition,Fail
+spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@noms partition,Fail
+spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8,Fail
+spec@arb_sample_shading@samplemask 8 all,Fail
+spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.125000 partition,Fail
+spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.250000 partition,Fail
+spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.500000 partition,Fail
+spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@1.000000 partition,Fail
+spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.125000 partition,Fail
+spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.250000 partition,Fail
+spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.500000 partition,Fail
+spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@1.000000 partition,Fail
+spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@noms partition,Fail
+spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail
+spec@arb_shader_image_load_store@atomicity,Crash
+spec@arb_shader_image_load_store@max-size,Fail
+spec@arb_shader_image_load_store@max-size@image3D max size test/8x8x16384x1,Fail
+spec@arb_shader_image_load_store@semantics,Crash
+spec@arb_shader_image_size@builtin,Crash
+spec@arb_shader_image_size@builtin@rgba32f/Compute/image3D max size test/8x8x16384x1,Fail
+spec@arb_shader_image_size@builtin@rgba32f/Fragment/image3D max size test/8x8x16384x1,Fail
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
+spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
+spec@arb_sync@clientwaitsync-timeout,Fail
+spec@arb_sync@clientwaitsync-timeout@read,Fail
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-patch-input-array-float-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-patch-input-array-vec2-index-invalid-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-patch-input-array-vec2-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-patch-input-array-vec3-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-patch-input-array-vec4-index-rd,Crash
+spec@arb_texture_buffer_object@formats (fs- arb),Crash
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8_ALPHA8,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb),Crash
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8_ALPHA8,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail
+spec@arb_texture_cube_map_array@texsubimage cube_map_array,Fail
+spec@arb_texture_float@multisample-formats 2 gl_arb_texture_float,Fail
+spec@arb_texture_float@multisample-formats 4 gl_arb_texture_float,Fail
+spec@arb_texture_float@multisample-formats 6 gl_arb_texture_float,Fail
+spec@arb_texture_float@multisample-formats 8 gl_arb_texture_float,Fail
+spec@arb_texture_rg@multisample-formats 2 gl_arb_texture_rg,Fail
+spec@arb_texture_rg@multisample-formats 2 gl_arb_texture_rg-float,Fail
+spec@arb_texture_rg@multisample-formats 4 gl_arb_texture_rg,Fail
+spec@arb_texture_rg@multisample-formats 4 gl_arb_texture_rg-float,Fail
+spec@arb_texture_rg@multisample-formats 6 gl_arb_texture_rg,Fail
+spec@arb_texture_rg@multisample-formats 6 gl_arb_texture_rg-float,Fail
+spec@arb_texture_rg@multisample-formats 8 gl_arb_texture_rg,Fail
+spec@arb_texture_rg@multisample-formats 8 gl_arb_texture_rg-float,Fail
+spec@arb_timer_query@query gl_timestamp,Fail
+spec@arb_uniform_buffer_object@2-buffers-bug,Fail
+spec@arb_uniform_buffer_object@execution@fs-array-of-structs-std140-indirect,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-array-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-#column_major-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-array-array-struct-array,Fail
+spec@arb_uniform_buffer_object@execution@shared-array-struct-array-struct,Fail
+spec@arb_uniform_buffer_object@execution@shared-array-struct-struct,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-array-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-column_major-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat2x2-and-column_major-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat2x3-and-column_major-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat2x4-and-column_major-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat3x2-and-column_major-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat3x3-and-column_major-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat3x4-and-column_major-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat4x2-and-column_major-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat4x3-and-column_major-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-mat4x4-and-column_major-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-array-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-mat2x2-and-column_major-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-mat2x3-and-column_major-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-mat2x4-and-column_major-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-mat3x2-and-column_major-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-mat3x3-and-column_major-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-mat3x4-and-column_major-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-mat4x2-and-column_major-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-mat4x3-and-column_major-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-mat4x4-and-column_major-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@shared-row_major-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@shared-struct-array-array-array,Fail
+spec@arb_uniform_buffer_object@execution@shared-struct-array-array-array-struct,Fail
+spec@arb_uniform_buffer_object@execution@shared-struct-array-array-struct,Fail
+spec@arb_uniform_buffer_object@execution@shared-struct-array-struct,Fail
+spec@arb_uniform_buffer_object@execution@shared-struct-struct,Fail
+spec@arb_uniform_buffer_object@execution@shared-struct-struct-struct,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-array-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-#column_major-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-array-array-struct-array,Fail
+spec@arb_uniform_buffer_object@execution@std140-array-struct-array-struct,Fail
+spec@arb_uniform_buffer_object@execution@std140-array-struct-struct,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-array-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-column_major-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat2x2-and-column_major-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat2x3-and-column_major-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat2x4-and-column_major-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat3x2-and-column_major-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat3x3-and-column_major-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat3x4-and-column_major-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat4x2-and-column_major-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat4x3-and-column_major-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-mat4x4-and-column_major-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-array-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-mat2x2-and-column_major-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-mat2x3-and-column_major-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-mat2x4-and-column_major-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-mat3x2-and-column_major-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-mat3x3-and-column_major-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-mat3x4-and-column_major-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-mat4x2-and-column_major-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-mat4x3-and-column_major-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-mat4x4-and-column_major-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-array-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat2x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat2x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat2x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat3x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat3x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat3x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat4x2,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat4x3,Fail
+spec@arb_uniform_buffer_object@execution@std140-row_major-struct-mat4x4,Fail
+spec@arb_uniform_buffer_object@execution@std140-struct-array-array-array,Fail
+spec@arb_uniform_buffer_object@execution@std140-struct-array-array-array-struct,Fail
+spec@arb_uniform_buffer_object@execution@std140-struct-array-array-struct,Fail
+spec@arb_uniform_buffer_object@execution@std140-struct-array-struct,Fail
+spec@arb_uniform_buffer_object@execution@std140-struct-struct,Fail
+spec@arb_uniform_buffer_object@execution@std140-struct-struct-struct,Fail
+spec@egl 1.4@egl-copy-buffers,Crash
+spec@egl_chromium_sync_control@conformance,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail
+spec@egl_khr_gl_colorspace@linear,Crash
+spec@egl_khr_gl_colorspace@srgb,Crash
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
+spec@egl_nok_texture_from_pixmap@basic,Crash
+spec@ext_direct_state_access@named-buffers 30,Fail
+spec@ext_direct_state_access@named-buffers 30@FlushMappedNamedBufferRangeEXT,Fail
+spec@ext_external_objects@vk-image-overwrite,Crash
+spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
+spec@ext_framebuffer_multisample@accuracy 2 srgb small depthstencil,Fail
+spec@ext_framebuffer_multisample@accuracy 2 srgb small depthstencil linear,Fail
+spec@ext_framebuffer_multisample@accuracy 4 srgb small depthstencil,Fail
+spec@ext_framebuffer_multisample@accuracy 4 srgb small depthstencil linear,Fail
+spec@ext_framebuffer_multisample@accuracy 6 srgb small depthstencil,Fail
+spec@ext_framebuffer_multisample@accuracy 6 srgb small depthstencil linear,Fail
+spec@ext_framebuffer_multisample@accuracy 8 srgb small depthstencil,Fail
+spec@ext_framebuffer_multisample@accuracy 8 srgb small depthstencil linear,Fail
+spec@ext_framebuffer_multisample@accuracy all_samples srgb small depthstencil,Fail
+spec@ext_framebuffer_multisample@accuracy all_samples srgb small depthstencil linear,Fail
+spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
+spec@ext_framebuffer_multisample@enable-flag,Fail
+spec@ext_framebuffer_multisample@formats 2,Fail
+spec@ext_framebuffer_multisample@formats 4,Fail
+spec@ext_framebuffer_multisample@formats 6,Fail
+spec@ext_framebuffer_multisample@formats 8,Fail
+spec@ext_framebuffer_multisample@formats all_samples,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail
+spec@ext_packed_float@query-rgba-signed-components,Fail
+spec@ext_texture_array@texsubimage array,Fail
+spec@ext_texture_env_combine@texture-env-combine,Timeout
+spec@ext_texture_snorm@multisample-formats 2 gl_ext_texture_snorm,Fail
+spec@ext_texture_snorm@multisample-formats 4 gl_ext_texture_snorm,Fail
+spec@ext_texture_snorm@multisample-formats 6 gl_ext_texture_snorm,Fail
+spec@ext_texture_snorm@multisample-formats 8 gl_ext_texture_snorm,Fail
+spec@ext_texture_srgb@multisample-formats 2 gl_ext_texture_srgb,Fail
+spec@ext_texture_srgb@multisample-formats 4 gl_ext_texture_srgb,Fail
+spec@ext_texture_srgb@multisample-formats 6 gl_ext_texture_srgb,Fail
+spec@ext_texture_srgb@multisample-formats 8 gl_ext_texture_srgb,Fail
+spec@ext_timer_query@time-elapsed,Fail
+spec@ext_transform_feedback@immediate-reuse-uniform-buffer,Fail
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
+spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash
+spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash
+spec@glsl-1.10@execution@samplers@glsl-fs-lots-of-tex,Fail
+spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail
+spec@glsl-1.40@uniform_buffer@fs-array-copy,Fail
+spec@glsl-1.40@uniform_buffer@fs-bools,Fail
+spec@glsl-1.40@uniform_buffer@fs-float-array-const-index,Fail
+spec@glsl-1.40@uniform_buffer@fs-float-array-variable-index,Fail
+spec@glsl-1.40@uniform_buffer@fs-floats,Fail
+spec@glsl-1.40@uniform_buffer@fs-struct,Fail
+spec@glsl-1.40@uniform_buffer@fs-struct-copy,Fail
+spec@glsl-1.40@uniform_buffer@fs-struct-copy-complicated,Fail
+spec@glsl-1.40@uniform_buffer@fs-struct-pad,Fail
+spec@glsl-1.40@uniform_buffer@fs-two-members,Fail
+spec@glsl-1.40@uniform_buffer@vs-array-copy,Fail
+spec@glsl-1.40@uniform_buffer@vs-bools,Fail
+spec@glsl-1.40@uniform_buffer@vs-float-array-const-index,Fail
+spec@glsl-1.40@uniform_buffer@vs-float-array-variable-index,Fail
+spec@glsl-1.40@uniform_buffer@vs-floats,Fail
+spec@glsl-1.40@uniform_buffer@vs-struct,Fail
+spec@glsl-1.40@uniform_buffer@vs-struct-copy,Fail
+spec@glsl-1.40@uniform_buffer@vs-struct-copy-complicated,Fail
+spec@glsl-1.40@uniform_buffer@vs-struct-pad,Fail
+spec@glsl-1.40@uniform_buffer@vs-two-members,Fail
+spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail
+spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail
+spec@glsl-1.50@uniform_buffer@gs-array-copy,Fail
+spec@glsl-1.50@uniform_buffer@gs-bools,Fail
+spec@glsl-1.50@uniform_buffer@gs-float-array-const-index,Fail
+spec@glsl-1.50@uniform_buffer@gs-float-array-variable-index,Fail
+spec@glsl-1.50@uniform_buffer@gs-floats,Fail
+spec@glsl-1.50@uniform_buffer@gs-struct,Fail
+spec@glsl-1.50@uniform_buffer@gs-struct-copy,Fail
+spec@glsl-1.50@uniform_buffer@gs-struct-copy-complicated,Fail
+spec@glsl-1.50@uniform_buffer@gs-struct-pad,Fail
+spec@glsl-1.50@uniform_buffer@gs-two-members,Fail
+spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail
+spec@oes_viewport_array@viewport-gs-writes-out-of-range,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-nvk-flakes.txt b/src/gallium/drivers/zink/ci/zink-nvk-flakes.txt
new file mode 100644
index 00000000000..346817ceda7
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-nvk-flakes.txt
@@ -0,0 +1,10 @@
+spec@arb_pixel_buffer_object@pbo-drawpixels
+spec@arb_shader_image_load_store@semantics@imageLoad/Tessellation control shader/r8/image2D test
+spec@arb_vertex_attrib_64bit@execution@vs_in@vs-input-double_dvec4-double_dmat3-position
+spec@arb_vertex_attrib_64bit@execution@vs_in@vs-input-position-float_vec4_array3-double_dvec4
+spec@glsl-1.30@linker@interpolation-qualifiers@default-gl_backsecondarycolor-flat-gl_secondarycolor
+spec@glsl-4.00@execution@built-in-functions@fs-op-add-dvec2-dvec2
+spec@glsl-4.20@execution@vs_in@vs-input-ubyte_uint-short_int-double_dvec3-position
+spec@glsl-4.30@execution@built-in-functions@cs-lessthan-vec4-vec4
+spec@glsl-4.30@execution@built-in-functions@cs-op-assign-sub-mat3x2-mat3x2
+spec@glsl-4.30@execution@built-in-functions@cs-op-uplus-mat2
diff --git a/src/gallium/drivers/zink/ci/zink-nvk-skips.txt b/src/gallium/drivers/zink/ci/zink-nvk-skips.txt
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-nvk-skips.txt
diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi10-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-navi10-fails.txt
new file mode 100644
index 00000000000..53c26e072e9
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-navi10-fails.txt
@@ -0,0 +1,183 @@
+# kopper
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail
+
+# #6115
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash
+
+
+glx@extension string sanity,Fail
+
+# #6322
+spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
+
+spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
+
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
+spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail
+spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 6,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 8,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 2,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 4,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 6,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 8,Fail
+
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
+
+spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
+
+spec@egl 1.4@eglterminate then unbind context,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
+spec@egl_khr_surfaceless_context@viewport,Fail
+
+spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
+spec@ext_packed_float@query-rgba-signed-components,Fail
+
+spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash
+spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash
+
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-clamp-z,Fail
+
+spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail
+
+spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail
+spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail
+
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+spec@!opengl 1.1@polygon-mode-facing,Fail
+spec@!opengl 1.1@polygon-mode,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.5@depth-tex-compare,Fail
+
+spec@!opengl 2.0@vs-point_size-zero,Fail
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail
+
+
+# Introduced with the uprev of piglit (70ce1dcacc92 - "ci: Update piglit with s3 support")
+spec@egl 1.4@egl-ext_egl_image_storage,Fail
+
+# Introduced by a8d2b288eee3 ("ci/piglit: 2023-01-19 uprev")
+spec@!opengl 1.1@line-smooth-stipple,Fail
+
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
+
+# Regressed between 1080ff39717b92b99afcf51283bec3994deae376..ef01a9cf3b465889fe8084732264dad0580270c3
+spec@arb_sample_shading@samplemask 2,Fail
+spec@arb_sample_shading@samplemask 2 all,Fail
+spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms partition,Fail
+spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4,Fail
+spec@arb_sample_shading@samplemask 4 all,Fail
+spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms partition,Fail
+spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6,Fail
+spec@arb_sample_shading@samplemask 6 all,Fail
+spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@noms partition,Fail
+spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8,Fail
+spec@arb_sample_shading@samplemask 8 all,Fail
+spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@noms partition,Fail
+spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail
+spec@ext_framebuffer_multisample@enable-flag,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail
+
+# Polygon smoothing isn't supported in Vulkan.
+spec@!opengl 1.0@gl-1.0-polygon-line-aa,Fail
+
+# Regression noticed in https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/891104
+spec@arb_viewport_array@display-list,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi10-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-navi10-flakes.txt
new file mode 100644
index 00000000000..7642429cce0
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-navi10-flakes.txt
@@ -0,0 +1,30 @@
+KHR-GL46.packed_pixels.varied_rectangle.rgb16
+dEQP-GLES2.functional.texture.filtering.cube.nearest_linear_clamp_rgba4444_pot
+dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.srgb8_alpha8_astc_10x5_khr_rgba32i.texture2d_array_to_texture2d_array
+dEQP-GLES31.functional.texture.filtering.cube_array.combinations.nearest_nearest_repeat_repeat
+spec@arb_enhanced_layouts@execution@component-layout@sso-vs-gs-fs-array-interleave
+spec@arb_fragment_shader_interlock@arb_fragment_shader_interlock-image-load-store
+spec@arb_texture_cube_map@copyteximage cube samples=6
+spec@arb_texture_cube_map@copyteximage cube samples=8
+spec@arb_texture_rectangle@copyteximage rect samples=6
+spec@arb_texture_rectangle@copyteximage rect samples=8
+spec@arb_timer_query@timestamp-get
+spec@ext_texture_array@copyteximage 1d_array samples=6
+spec@ext_texture_array@copyteximage 1d_array samples=8
+spec@ext_texture_array@copyteximage 2d_array samples=6
+spec@ext_texture_array@copyteximage 2d_array samples=8
+spec@ext_transform_feedback@max-varyings
+spec@ext_transform_feedback@max-varyings@max-varying-arrays-of-arrays
+spec@glsl-1.50@execution@geometry@point-size-out
+spec@!opengl 1.0@rasterpos
+spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked
+spec@!opengl 1.0@rasterpos@glsl_vs_tes_linke
+
+# Marked as flake because it passes with ESO but crashes with pipelines.
+spec@arb_tessellation_shader@arb_tessellation_shader-tes-gs-max-output -small -scan 1 50
+
+# Nightly run expectations update
+dEQP-GLES31.functional.copy_image.mixed.viewclass_64_bits_mixed.signed_r11_eac_rgba16ui.cubemap_to_texture3d
+KHR-GL46.packed_pixels.varied_rectangle.rgb10_a2ui
+spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec3-index-wr-before-barrier
+
diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi10-skips.txt b/src/gallium/drivers/zink/ci/zink-radv-navi10-skips.txt
new file mode 100644
index 00000000000..4ca4f66b281
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-navi10-skips.txt
@@ -0,0 +1,64 @@
+ext_texture_env.*
+spec@arb_shader_image_load_store.invalid
+spec@arb_shader_image_load_store.max-size
+spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64
+.*@execution@vs_in.*
+
+# Kopper regression
+glx@glx-tfp
+
+spec@egl_nok_texture_from_pixmap@basic
+
+# Exclude GLX tests.
+glx@glx.*
+
+# Tests below timeout most of the time.
+KHR-GL46.copy_image.functional
+KHR-GL46.texture_swizzle.smoke
+KHR-GL46.texture_swizzle.functional
+KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize
+
+#these need format conversions that gallium doesn't implement yet
+spec@arb_texture_buffer_object@formats.*arb.*
+
+# These randomly hang.
+spec@ext_external_objects@.*
+
+# These are too random.
+spec@arb_shader_clock@execution@clock.*
+
+# These run OOM and migth hang?
+spec@arb_texture_buffer_object@texture-buffer-size-clamp.*
+spec@!opengl 1.1@streaming-texture-leak
+spec@arb_uniform_buffer_object@maxuniformblocksize.*
+
+# implicit modifier selection not currently supported
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_vyuy
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88
+
+# This subset hangs since a077c14f150 ("zink: Fix resizable BAR detection logic")
+# for very weird reasons, skip it completely until the issue is properly fixed.
+spec@arb_shader_image_load_store.*
diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt
new file mode 100644
index 00000000000..0c6ae03c09d
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt
@@ -0,0 +1,217 @@
+# kopper
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail
+
+# #6115
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash
+
+
+glx@extension string sanity,Fail
+
+# #6322
+spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
+
+spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
+
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
+spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail
+spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 6,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 8,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 2,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 4,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 6,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 8,Fail
+
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
+spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
+spec@arb_tessellation_shader@arb_tessellation_shader-tes-gs-max-output -small -scan 1 50,Crash
+
+spec@egl 1.4@eglterminate then unbind context,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
+spec@egl_khr_surfaceless_context@viewport,Fail
+
+spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
+spec@ext_packed_float@query-rgba-signed-components,Fail
+
+spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash
+spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash
+
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-clamp-z,Fail
+
+spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail
+
+spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail
+spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail
+
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+spec@!opengl 1.1@polygon-mode-facing,Fail
+spec@!opengl 1.1@polygon-mode,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.5@depth-tex-compare,Fail
+
+spec@!opengl 2.0@vs-point_size-zero,Fail
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
+
+
+# Introduced with the uprev of piglit (70ce1dcacc92 - "ci: Update piglit with s3 support")
+spec@egl 1.4@egl-ext_egl_image_storage,Fail
+
+# Introduced by a8d2b288eee3 ("ci/piglit: 2023-01-19 uprev")
+spec@!opengl 1.1@line-smooth-stipple,Fail
+
+# Delta over NAVI10
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=4,Fail
+spec@!opengl 1.1@depthstencil-default_fb-copypixels samples=6,Fail
+
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
+
+# Regressed between 1080ff39717b92b99afcf51283bec3994deae376..ef01a9cf3b465889fe8084732264dad0580270c3
+spec@arb_sample_shading@samplemask 2,Fail
+spec@arb_sample_shading@samplemask 2 all,Fail
+spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms partition,Fail
+spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4,Fail
+spec@arb_sample_shading@samplemask 4 all,Fail
+spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms partition,Fail
+spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6,Fail
+spec@arb_sample_shading@samplemask 6 all,Fail
+spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@noms partition,Fail
+spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8,Fail
+spec@arb_sample_shading@samplemask 8 all,Fail
+spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@noms partition,Fail
+spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail
+spec@ext_framebuffer_multisample@enable-flag,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail
+
+# Polygon smoothing isn't supported in Vulkan.
+spec@!opengl 1.0@gl-1.0-polygon-line-aa,Fail
+
+# Regression noticed in https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/891104
+spec@arb_viewport_array@display-list,Fail
+
+dEQP-GLES3.functional.shaders.precision.uint.highp_div_fragment,Fail
+spec@arb_sample_shading@arb_sample_shading-builtin-gl-sample-mask-mrt-alpha-to-coverage,Fail
+
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_vyuy,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi31-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-navi31-flakes.txt
new file mode 100644
index 00000000000..aadc1208612
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-navi31-flakes.txt
@@ -0,0 +1,44 @@
+dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_equal_spacing_ccw_point_mode
+dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_equal_spacing_cw_point_mode
+dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_fractional_even_spacing_ccw_point_mode
+dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_fractional_even_spacing_cw_point_mode
+dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_fractional_odd_spacing_ccw_point_mode
+dEQP-GLES31.functional.tessellation.invariance.primitive_set.triangles_fractional_odd_spacing_cw_point_mode
+KHR-GL46.geometry_shader.primitive_counter.lines_to_line_strip
+KHR-GL46.geometry_shader.primitive_counter.lines_to_line_strip_rp
+KHR-GL46.geometry_shader.primitive_counter.points_to_line_strip
+KHR-GL46.geometry_shader.primitive_counter.points_to_line_strip_rp
+KHR-GL46.geometry_shader.primitive_counter.triangles_to_line_strip_rp
+spec@arb_depth_texture@fbo-depth-gl_depth_component16-blit
+spec@arb_fragment_shader_interlock@arb_fragment_shader_interlock-image-load-store
+spec@arb_sample_shading@arb_sample_shading-builtin-gl-sample-mask-mrt-alpha-to-coverage
+spec@arb_shader_image_load_store@coherency
+spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Fragment shader/'coherent' qualifier coherency test/256x256
+spec@arb_shader_image_load_store@shader-mem-barrier
+spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'coherent' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'coherent' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'volatile' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'volatile' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'coherent' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'coherent' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'volatile' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'volatile' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'coherent' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'coherent' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'volatile' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'volatile' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'coherent' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'coherent' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'volatile' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'volatile' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'coherent' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'coherent' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'volatile' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'volatile' qualifier memory barrier test/modulus=64
+spec@arb_timer_query@timestamp-get
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv
+spec@glsl-1.50@execution@geometry@point-size-out
+spec@glsl-1.50@execution@redeclare-pervertex-out-subset-gs
+spec@!opengl 1.0@rasterpos
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=4
+spec@!opengl 1.1@depthstencil-default_fb-copypixels samples=6
diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi31-skips.txt b/src/gallium/drivers/zink/ci/zink-radv-navi31-skips.txt
new file mode 100644
index 00000000000..47781bd87ae
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-navi31-skips.txt
@@ -0,0 +1,34 @@
+ext_texture_env.*
+spec@arb_shader_image_load_store.invalid
+spec@arb_shader_image_load_store.max-size
+spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64
+.*@execution@vs_in.*
+
+# Kopper regression
+glx@glx-tfp
+
+spec@egl_nok_texture_from_pixmap@basic
+
+
+# Exclude GLX tests.
+glx@glx.*
+
+# Tests below timeout most of the time.
+KHR-GL46.copy_image.functional
+KHR-GL46.texture_swizzle.smoke
+KHR-GL46.texture_swizzle.functional
+KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize
+
+#these need format conversions that gallium doesn't implement yet
+spec@arb_texture_buffer_object@formats.*arb.*
+
+# These randomly hang.
+spec@ext_external_objects@.*
+
+# These are too random.
+spec@arb_shader_clock@execution@clock.*
+
+# These run OOM and migth hang?
+spec@arb_texture_buffer_object@texture-buffer-size-clamp.*
+spec@!opengl 1.1@streaming-texture-leak
+spec@arb_uniform_buffer_object@maxuniformblocksize.*
diff --git a/src/gallium/drivers/zink/ci/zink-radv-polaris10-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-polaris10-fails.txt
new file mode 100644
index 00000000000..324c7132a0e
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-polaris10-fails.txt
@@ -0,0 +1,610 @@
+# kopper
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail
+
+# #6115
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash
+
+
+glx@extension string sanity,Fail
+
+# #6322
+spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
+
+spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
+
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
+spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail
+spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 6,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 8,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 2,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 4,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 6,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 8,Fail
+
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
+
+spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
+
+spec@egl 1.4@eglterminate then unbind context,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
+spec@egl_khr_surfaceless_context@viewport,Fail
+
+spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_vyuy,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu,Fail
+
+# implicit modifier selection is broken with radeonsi display server
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail
+
+spec@ext_packed_float@query-rgba-signed-components,Fail
+
+spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash
+spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash
+
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-clamp-z,Fail
+
+spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail
+
+spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail
+spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail
+
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+spec@!opengl 1.1@polygon-mode-facing,Fail
+spec@!opengl 1.1@polygon-mode,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.5@depth-tex-compare,Fail
+
+spec@!opengl 2.0@vs-point_size-zero,Fail
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
+
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail
+
+# Introduced with the uprev of piglit (70ce1dcacc92 - "ci: Update piglit with s3 support")
+spec@egl 1.4@egl-ext_egl_image_storage,Fail
+
+# Introduced by a8d2b288eee3 ("ci/piglit: 2023-01-19 uprev")
+spec@!opengl 1.1@line-smooth-stipple,Fail
+
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
+
+# Regressed between 1080ff39717b92b99afcf51283bec3994deae376..ef01a9cf3b465889fe8084732264dad0580270c3
+spec@arb_sample_shading@samplemask 2,Fail
+spec@arb_sample_shading@samplemask 2 all,Fail
+spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms partition,Fail
+spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4,Fail
+spec@arb_sample_shading@samplemask 4 all,Fail
+spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms partition,Fail
+spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6,Fail
+spec@arb_sample_shading@samplemask 6 all,Fail
+spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@noms partition,Fail
+spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8,Fail
+spec@arb_sample_shading@samplemask 8 all,Fail
+spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@noms partition,Fail
+spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail
+spec@ext_framebuffer_multisample@enable-flag,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail
+
+# Polygon smoothing isn't supported in Vulkan.
+spec@!opengl 1.0@gl-1.0-polygon-line-aa,Fail
+
+# Regression noticed in https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/891104
+spec@arb_viewport_array@display-list,Fail
+
+KHR-GL46.geometry_shader.rendering.rendering.triangles_with_adjacency_input_line_strip_output_triangle_strip_adjacency_drawcall,Fail
+KHR-GL46.geometry_shader.rendering.rendering.triangles_with_adjacency_input_triangle_strip_output_triangle_strip_adjacency_drawcall,Fail
+KHR-GL46.texture_size_promotion.functional,Fail
+dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.greater_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.greater_or_equal_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.less_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.less_or_equal_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d.nearest_mipmap_nearest.greater_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d.nearest_mipmap_nearest.greater_or_equal_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d.nearest_mipmap_nearest.less_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d.nearest_mipmap_nearest.less_or_equal_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.greater_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_nearest.greater_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_nearest.greater_or_equal_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_nearest.less_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_nearest.less_or_equal_depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.specification.teximage2d_depth.depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.specification.teximage2d_depth.depth32f_stencil8,Fail
+dEQP-GLES3.functional.texture.specification.teximage3d_depth.depth24_stencil8_2d_array,Fail
+dEQP-GLES3.functional.texture.specification.teximage3d_depth.depth32f_stencil8_2d_array,Fail
+dEQP-GLES3.functional.texture.specification.texstorage2d.format.depth24_stencil8_2d,Fail
+dEQP-GLES3.functional.texture.specification.texstorage2d.format.depth24_stencil8_cube,Fail
+dEQP-GLES3.functional.texture.specification.texstorage2d.format.depth32f_stencil8_2d,Fail
+dEQP-GLES3.functional.texture.specification.texstorage2d.format.depth32f_stencil8_cube,Fail
+dEQP-GLES3.functional.texture.specification.texstorage3d.format.depth24_stencil8_2d_array,Fail
+dEQP-GLES3.functional.texture.specification.texstorage3d.format.depth32f_stencil8_2d_array,Fail
+dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth24_stencil8,Fail
+dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth32f_stencil8,Fail
+dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_2d_array,Fail
+dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth32f_stencil8_2d_array,Fail
+dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x15,Fail
+dEQP-GLES31.functional.fbo.no_attachments.npot_size.1x1,Fail
+dEQP-GLES31.functional.fbo.no_attachments.npot_size.3x3,Fail
+dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth24_stencil8_cube_array,Fail
+dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth32f_stencil8_cube_array,Fail
+dEQP-GLES31.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_cube_array,Fail
+
+spec@!opengl 1.1@getteximage-depth,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D-GL_DEPTH_COMPONENT,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D-GL_DEPTH_COMPONENT16,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D-GL_DEPTH_COMPONENT24,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D-GL_DEPTH_COMPONENT32,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT16,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT24,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT32,Fail
+spec@!opengl 1.1@texwrap formats bordercolor,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_ALPHA12- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_ALPHA16- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_ALPHA4- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_ALPHA8- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_R3_G3_B2- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB10- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB10_A2- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB12- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB16- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB4- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB5- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB5_A1- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB8- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA12- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA16- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA4- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA8- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_ALPHA12- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_ALPHA16- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_ALPHA4- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_ALPHA8- border color only,Fail
+spec@!opengl 3.2@gl-3.2-adj-prims cull-back pv-last,Fail
+spec@!opengl 3.2@gl-3.2-adj-prims cull-front pv-last,Fail
+spec@!opengl 3.2@gl-3.2-adj-prims line cull-back pv-last,Fail
+spec@!opengl 3.2@gl-3.2-adj-prims line cull-front pv-last,Fail
+spec@!opengl 3.2@gl-3.2-adj-prims pv-last,Fail
+spec@arb_depth_buffer_float@fbo-clear-formats,Fail
+spec@arb_depth_buffer_float@fbo-clear-formats@GL_DEPTH32F_STENCIL8,Fail
+spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled,Fail
+spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled@GL_RGB565- swizzled- border color only,Fail
+spec@arb_sample_locations@test,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 8- X: 3- Y: 6- Grid: true,Fail
+spec@arb_texture_compression@texwrap formats bordercolor-swizzled,Fail
+spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGB- swizzled- border color only,Fail
+spec@arb_texture_compression@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA- swizzled- border color only,Fail
+spec@arb_texture_compression_bptc@texwrap formats bordercolor-swizzled,Fail
+spec@arb_texture_compression_bptc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA_BPTC_UNORM- swizzled- border color only,Fail
+spec@arb_texture_compression_bptc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT- swizzled- border color only,Fail
+spec@arb_texture_compression_bptc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT- swizzled- border color only,Fail
+spec@arb_texture_compression_bptc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_ALPHA16F_ARB- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_ALPHA32F_ARB- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGB16F- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGB32F- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGBA16F- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGBA32F- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor@GL_ALPHA16F_ARB- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor@GL_ALPHA32F_ARB- border color only,Fail
+spec@arb_texture_rg@texwrap formats bordercolor-swizzled,Fail
+spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_R16- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_R8- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_RG16- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_RG8- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled,Fail
+spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_R16F- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_R32F- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_RG16F- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_RG32F- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R16I- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R16UI- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R32I- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R32UI- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R8I- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_R8UI- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG16I- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG16UI- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG32I- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG32UI- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG8I- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-int bordercolor-swizzled@GL_RG8UI- swizzled- border color only,Fail
+spec@arb_texture_rgb10_a2ui@texwrap formats bordercolor-swizzled,Fail
+spec@arb_texture_rgb10_a2ui@texwrap formats bordercolor-swizzled@GL_RGB10_A2UI- swizzled- border color only,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export-tex,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail
+spec@ext_packed_depth_stencil@fbo-clear-formats,Fail
+spec@ext_packed_depth_stencil@fbo-clear-formats@GL_DEPTH24_STENCIL8,Fail
+spec@ext_packed_depth_stencil@fbo-clear-formats@GL_DEPTH_STENCIL,Fail
+spec@ext_packed_float@texwrap formats bordercolor-swizzled,Fail
+spec@ext_packed_float@texwrap formats bordercolor-swizzled@GL_R11F_G11F_B10F- swizzled- border color only,Fail
+spec@ext_texture_compression_rgtc@texwrap formats bordercolor-swizzled,Fail
+spec@ext_texture_compression_rgtc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RED_RGTC1- swizzled- border color only,Fail
+spec@ext_texture_compression_rgtc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RG_RGTC2- swizzled- border color only,Fail
+spec@ext_texture_compression_rgtc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_SIGNED_RED_RGTC1- swizzled- border color only,Fail
+spec@ext_texture_compression_rgtc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_SIGNED_RG_RGTC2- swizzled- border color only,Fail
+spec@ext_texture_compression_s3tc@texwrap formats bordercolor-swizzled,Fail
+spec@ext_texture_compression_s3tc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT- swizzled- border color only,Fail
+spec@ext_texture_compression_s3tc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT- swizzled- border color only,Fail
+spec@ext_texture_compression_s3tc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT- swizzled- border color only,Fail
+spec@ext_texture_compression_s3tc@texwrap formats bordercolor-swizzled@GL_COMPRESSED_RGB_S3TC_DXT1_EXT- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB16I- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB16UI- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB32I- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB32UI- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB8I- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGB8UI- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA16I- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA16UI- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA32I- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA32UI- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA8I- swizzled- border color only,Fail
+spec@ext_texture_integer@texwrap formats bordercolor-swizzled@GL_RGBA8UI- swizzled- border color only,Fail
+spec@ext_texture_shared_exponent@texwrap formats bordercolor-swizzled,Fail
+spec@ext_texture_shared_exponent@texwrap formats bordercolor-swizzled@GL_RGB9_E5- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_R16_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_R8_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RG16_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RG8_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGB16_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGB8_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGBA16_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGBA8_SNORM- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor-swizzled,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8_ALPHA8- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT- swizzled- border color only,Fail
+spec@glsl-1.50@execution@geometry@primitive-types gl_triangle_strip_adjacency,Fail
+spec@glsl-1.50@execution@geometry@tri-strip-ordering-with-prim-restart gl_triangle_strip_adjacency ffs,Fail
+spec@glsl-1.50@execution@geometry@tri-strip-ordering-with-prim-restart gl_triangle_strip_adjacency other,Fail
+
+# Regressions from 1f4662cc4ed0c5b87479eb71e53a1320ab1b414b
+spec@ext_texture_array@copyteximage 1d_array,Fail
+spec@ext_texture_array@copyteximage 1d_array samples=2,Fail
+spec@ext_texture_array@copyteximage 1d_array samples=4,Fail
+spec@ext_texture_array@copyteximage 1d_array samples=6,Fail
+spec@ext_texture_array@copyteximage 1d_array samples=8,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-radv-polaris10-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-polaris10-flakes.txt
new file mode 100644
index 00000000000..efc6bf8df8f
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-polaris10-flakes.txt
@@ -0,0 +1,33 @@
+dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x15
+dEQP-GLES31.functional.fbo.no_attachments.npot_size.1x1
+dEQP-GLES31.functional.fbo.no_attachments.npot_size.3x3
+dEQP-GLES31.functional.fbo.no_attachments.size.16x16
+dEQP-GLES31.functional.texture.specification.teximage3d_depth.depth24_stencil8_cube_array
+dEQP-GLES31.functional.texture.specification.teximage3d_depth.depth32f_stencil8_cube_array
+dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth24_stencil8_cube_array
+dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth32f_stencil8_cube_array
+dEQP-GLES31.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_cube_array
+dEQP-GLES31.functional.texture.specification.texsubimage3d_depth.depth32f_stencil8_cube_array
+dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.equal_depth24_stencil8
+dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.greater_depth24_stencil8
+dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.greater_or_equal_depth24_stencil8
+dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.less_depth24_stencil8
+dEQP-GLES3.functional.texture.shadow.2d_array.linear_mipmap_nearest.less_or_equal_depth24_stencil8
+dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_nearest.not_equal_depth24_stencil8
+dEQP-GLES3.functional.texture.specification.texstorage2d.format.depth24_stencil8_cube
+spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@glScissor
+spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@glViewport
+spec@glsl-1.50@execution@geometry@point-size-out
+spec@glsl-1.50@execution@redeclare-pervertex-out-subset-gs
+spec@!opengl 1.0@rasterpos
+
+# Updated by ci-collate, found in this job run: https://gitlab.freedesktop.org/mesa/mesa/-/jobs/56164998
+spec@!opengl 1.1@depthstencil-default_fb-blit
+
+# Updated by ci-collate, found in this job run: https://gitlab.freedesktop.org/mesa/mesa/-/jobs/56260518
+dEQP-GLES31.functional.fbo.no_attachments.npot_size.17x17
+
+# This test is flaking:
+# Fail: https://gitlab.freedesktop.org/mesa/mesa/-/jobs/56268639
+# Success: https://gitlab.freedesktop.org/mesa/mesa/-/jobs/56260518
+dEQP-GLES31.functional.fbo.no_attachments.npot_size.31x31
diff --git a/src/gallium/drivers/zink/ci/zink-radv-polaris10-skips.txt b/src/gallium/drivers/zink/ci/zink-radv-polaris10-skips.txt
new file mode 100644
index 00000000000..dafa035046d
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-polaris10-skips.txt
@@ -0,0 +1,49 @@
+ext_texture_env.*
+spec@arb_shader_image_load_store.invalid
+spec@arb_shader_image_load_store.max-size
+spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64
+.*@execution@vs_in.*
+
+# Kopper regression
+glx@glx-tfp
+
+spec@egl_nok_texture_from_pixmap@basic
+
+# Exclude GLX tests.
+glx@glx.*
+
+# Tests below timeout most of the time.
+KHR-GL46.copy_image.functional
+KHR-GL46.texture_swizzle.smoke
+KHR-GL46.texture_swizzle.functional
+KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize
+
+#these need format conversions that gallium doesn't implement yet
+spec@arb_texture_buffer_object@formats.*arb.*
+
+# These randomly hang.
+spec@ext_external_objects@.*
+
+# These are too random.
+spec@arb_shader_clock@execution@clock.*
+
+# These run OOM and migth hang?
+spec@arb_texture_buffer_object@texture-buffer-size-clamp.*
+spec@!opengl 1.1@streaming-texture-leak
+spec@arb_uniform_buffer_object@maxuniformblocksize.*
+
+# implicit modifier selection not currently supported
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88
+
+# This subset hangs since a077c14f150 ("zink: Fix resizable BAR detection logic")
+# for very weird reasons, skip it completely until the issue is properly fixed.
+spec@arb_shader_image_load_store.*
diff --git a/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt
new file mode 100644
index 00000000000..230fc86df47
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt
@@ -0,0 +1,188 @@
+# kopper
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail
+
+# #6115
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash
+
+
+glx@extension string sanity,Fail
+
+# #6322
+spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
+
+spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
+
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
+spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail
+spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 6,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 8,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 2,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 4,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 6,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 8,Fail
+
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
+spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
+
+spec@egl 1.4@eglterminate then unbind context,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
+spec@egl_khr_surfaceless_context@viewport,Fail
+
+spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
+spec@ext_packed_float@query-rgba-signed-components,Fail
+
+spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash
+spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash
+
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-clamp-z,Fail
+
+spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail
+
+spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail
+spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail
+
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+spec@!opengl 1.1@polygon-mode-facing,Fail
+spec@!opengl 1.1@polygon-mode,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.5@depth-tex-compare,Fail
+
+spec@!opengl 2.0@vs-point_size-zero,Fail
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail
+
+
+# Introduced with the uprev of piglit (70ce1dcacc92 - "ci: Update piglit with s3 support")
+spec@egl 1.4@egl-ext_egl_image_storage,Fail
+
+# Introduced by a8d2b288eee3 ("ci/piglit: 2023-01-19 uprev")
+spec@!opengl 1.1@line-smooth-stipple,Fail
+
+# Delta over NAVI10
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=4,Fail
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=6,Fail
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=8,Fail
+
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
+
+# Regressed between 1080ff39717b92b99afcf51283bec3994deae376..ef01a9cf3b465889fe8084732264dad0580270c3
+spec@arb_sample_shading@samplemask 2,Fail
+spec@arb_sample_shading@samplemask 2 all,Fail
+spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms partition,Fail
+spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4,Fail
+spec@arb_sample_shading@samplemask 4 all,Fail
+spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms partition,Fail
+spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6,Fail
+spec@arb_sample_shading@samplemask 6 all,Fail
+spec@arb_sample_shading@samplemask 6 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 6 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 6@noms partition,Fail
+spec@arb_sample_shading@samplemask 6@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8,Fail
+spec@arb_sample_shading@samplemask 8 all,Fail
+spec@arb_sample_shading@samplemask 8 all@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 8 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.125000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 8@noms partition,Fail
+spec@arb_sample_shading@samplemask 8@sample mask_in_one,Fail
+spec@ext_framebuffer_multisample@enable-flag,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 6 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail
+
+# Polygon smoothing isn't supported in Vulkan.
+spec@!opengl 1.0@gl-1.0-polygon-line-aa,Fail
+
+# Regression noticed in https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/891104
+spec@arb_viewport_array@display-list,Fail
+
diff --git a/src/gallium/drivers/zink/ci/zink-radv-vangogh-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-vangogh-flakes.txt
new file mode 100644
index 00000000000..e5576267400
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-vangogh-flakes.txt
@@ -0,0 +1,41 @@
+dEQP-GLES2.functional.shaders.random.swizzle.vertex.43
+dEQP-GLES31.functional.copy_image.compressed.viewclass_astc_12x10_rgba.srgb8_alpha8_astc_12x10_khr_rgba_astc_12x10_khr.texture3d_to_texture3d
+dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba_astc_4x4_khr_rgba32ui.texture2d_array_to_texture2d_array
+dEQP-GLES31.functional.texture.filtering.cube_array.sizes.8x8x6_nearest
+dEQP-GLES3.functional.texture.filtering.cube.combinations.linear_linear_mirror_mirror
+dEQP-GLES3.functional.texture.shadow.cube.linear_mipmap_nearest.greater_depth_component32f
+object namespace pollution@framebuffer with glgetteximage
+spec@arb_fragment_shader_interlock@arb_fragment_shader_interlock-image-load-store
+spec@arb_instanced_arrays@arb_instanced_arrays-instanced_arrays-vbo
+spec@arb_shader_image_load_store@shader-mem-barrier
+spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'coherent' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'coherent' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'volatile' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Fragment shader/'volatile' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'coherent' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Geometry shader/'volatile' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'coherent' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'coherent' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'volatile' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation control shader/'volatile' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'coherent' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'coherent' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'volatile' qualifier memory barrier test/modulus=16
+spec@arb_shader_image_load_store@shader-mem-barrier@Tessellation evaluation shader/'volatile' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'coherent' qualifier memory barrier test/modulus=64
+spec@arb_shader_image_load_store@shader-mem-barrier@Vertex shader/'volatile' qualifier memory barrier test/modulus=64
+spec@arb_tessellation_shader@execution@built-in-functions@tcs-sign-vec3
+spec@arb_texture_multisample@arb_texture_multisample-dsa-texelfetch
+spec@arb_texture_multisample@arb_texture_multisample-dsa-texelfetch@Texture type: GL_RGB9_E5
+spec@arb_timer_query@timestamp-get
+spec@glsl-1.10@execution@built-in-functions@vs-equal-vec2-vec2
+spec@glsl-1.50@execution@built-in-functions@gs-greaterthan-uvec3-uvec3
+spec@glsl-1.50@execution@geometry@point-size-out
+spec@glsl-1.50@execution@redeclare-pervertex-out-subset-gs
+spec@!opengl 1.0@rasterpos
+spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked
+spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=4
+
+# Marked as flake because it passes with ESO but crashes with pipelines.
+spec@arb_tessellation_shader@arb_tessellation_shader-tes-gs-max-output -small -scan 1 50
diff --git a/src/gallium/drivers/zink/ci/zink-radv-vangogh-skips.txt b/src/gallium/drivers/zink/ci/zink-radv-vangogh-skips.txt
new file mode 100644
index 00000000000..4d37b3041b0
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-radv-vangogh-skips.txt
@@ -0,0 +1,61 @@
+ext_texture_env.*
+spec@arb_shader_image_load_store.invalid
+spec@arb_shader_image_load_store.max-size
+spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64
+.*@execution@vs_in.*
+
+# Kopper regression
+glx@glx-tfp
+
+spec@egl_nok_texture_from_pixmap@basic
+
+
+# Exclude GLX tests.
+glx@glx.*
+
+# Tests below timeout most of the time.
+KHR-GL46.copy_image.functional
+KHR-GL46.texture_swizzle.smoke
+KHR-GL46.texture_swizzle.functional
+KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize
+
+#these need format conversions that gallium doesn't implement yet
+spec@arb_texture_buffer_object@formats.*arb.*
+
+# These randomly hang.
+spec@ext_external_objects@.*
+
+# These are too random.
+spec@arb_shader_clock@execution@clock.*
+
+# These run OOM and migth hang?
+spec@arb_texture_buffer_object@texture-buffer-size-clamp.*
+spec@!opengl 1.1@streaming-texture-leak
+spec@arb_uniform_buffer_object@maxuniformblocksize.*
+
+# implicit modifier selection not currently supported
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_vyuy
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88
diff --git a/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt b/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt
new file mode 100644
index 00000000000..834e02589d1
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt
@@ -0,0 +1,20 @@
+GTF-GL46.gtf30.GL3Tests.framebuffer_blit.framebuffer_blit_functionality_multisampled_to_singlesampled_blit,Fail
+GTF-GL46.gtf30.GL3Tests.sgis_texture_lod.sgis_texture_lod_basic_lod_selection,Fail
+GTF-GL46.gtf32.GL3Tests.draw_elements_base_vertex.draw_elements_base_vertex_invalid_mode,Fail
+
+KHR-Single-GL46.arrays_of_arrays_gl.AtomicUsage,Fail
+
+# Turnip has maxFragmentInputComponents = 124, while GL requires
+# gl_MaxFragmentInputComponents >= 128
+KHR-GL46.limits.max_fragment_input_components,Fail
+
+# https://gerrit.khronos.org/c/vk-gl-cts/+/9672
+KHR-GL46.buffer_storage.map_persistent_draw,Fail
+
+# https://gitlab.freedesktop.org/mesa/mesa/-/issues/6723
+KHR-GL46.copy_image.functional,Fail
+KHR-GL46.texture_view.view_classes,Fail
+
+# Piglit xfb tests
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-tu-a630-flakes.txt b/src/gallium/drivers/zink/ci/zink-tu-a630-flakes.txt
new file mode 100644
index 00000000000..a9107058df7
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-tu-a630-flakes.txt
@@ -0,0 +1,16 @@
+GTF-GL46.gtf32.GL3Tests.packed_pixels.packed_pixels_pixelstore
+KHR-Single-GL46.arrays_of_arrays_gl.ConstructorsAndUnsizedDeclConstructorSizing1
+dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_write_dynamic_read_vertex
+dEQP-GLES3.functional.texture.wrap.astc_12x10_srgb.repeat_mirror_linear_divisible
+dEQP-GLES3.functional.texture.wrap.astc_6x6.repeat_mirror_nearest_divisible
+dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.rgba_astc_10x6_khr_rgba32i.texture3d_to_texture3d
+dEQP-GLES31.functional.copy_image.mixed.viewclass_128_bits_mixed.srgb8_alpha8_astc_10x5_khr_rgba32f.texture3d_to_texture2d_array
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.rg8_rg8.texture2d_to_renderbuffer
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.rg8i_rg8i.cubemap_to_renderbuffer
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.rg8i_rg8i.texture2d_to_renderbuffer
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.rg8i_rg8i.texture3d_to_renderbuffer
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_16_bits.rg8ui_rg8ui.texture2d_to_renderbuffer
+dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16i_rgb10_a2.cubemap_to_renderbuffer
+dEQP-GLES3.functional.texture.specification.texstorage3d.format.depth_component16_2d_array
+dEQP-GLES3.functional.texture.specification.texstorage2d.format.rgb565_cube
+dEQP-GLES31.functional.fbo.color.texcubearray.r16f
diff --git a/src/gallium/drivers/zink/ci/zink-tu-a630-skips.txt b/src/gallium/drivers/zink/ci/zink-tu-a630-skips.txt
new file mode 100644
index 00000000000..fb03e671d01
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-tu-a630-skips.txt
@@ -0,0 +1,2 @@
+# takes forever, but passes
+KHR-GL46.texture_swizzle.smoke
diff --git a/src/gallium/drivers/zink/ci/zink-tu-a750-fails.txt b/src/gallium/drivers/zink/ci/zink-tu-a750-fails.txt
new file mode 100644
index 00000000000..f2921348eba
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-tu-a750-fails.txt
@@ -0,0 +1,524 @@
+GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_two_buffers,Fail
+dEQP-GLES3.functional.fbo.depth.depth_test_clamp.depth32f_stencil8,Fail
+dEQP-GLES3.functional.fbo.depth.depth_test_clamp.depth_component32f,Fail
+dEQP-GLES3.functional.fbo.depth.depth_write_clamp.depth32f_stencil8,Fail
+dEQP-GLES3.functional.fbo.depth.depth_write_clamp.depth_component32f,Fail
+
+GTF-GL46.gtf30.GL3Tests.sgis_texture_lod.sgis_texture_lod_basic_lod_selection,Fail
+
+KHR-Single-GL46.arrays_of_arrays_gl.SubroutineFunctionCalls2,Crash
+
+# Turnip has maxFragmentInputComponents = 124, while GL requires
+# gl_MaxFragmentInputComponents >= 128
+KHR-GL46.limits.max_fragment_input_components,Fail
+
+# https://gitlab.freedesktop.org/mesa/mesa/-/issues/6723
+KHR-GL46.copy_image.functional,Fail
+KHR-GL46.texture_view.view_classes,Fail
+
+
+dEQP-GLES3.functional.shaders.matrix.inverse.dynamic.lowp_mat4_float_vertex,Fail
+dEQP-GLES3.functional.shaders.matrix.inverse.dynamic.mediump_mat4_float_vertex,Fail
+
+glx@glx-multi-window-single-context,Fail
+glx@glx-multithread-texture,Fail
+glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail
+glx@glx_arb_create_context_no_error@no error,Fail
+glx@glx_arb_create_context_robustness@invalid reset notification strategy,Fail
+glx@glx_ext_no_config_context@no fbconfig,Fail
+spec@arb_texture_rectangle@fbo-blit rect,Fail
+spec@egl_chromium_sync_control@conformance,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail
+spec@ext_framebuffer_blit@fbo-blit,Fail
+spec@ext_framebuffer_blit@fbo-copypix,Fail
+spec@ext_framebuffer_blit@fbo-readdrawpix,Fail
+spec@!opengl 1.0@depth-clear-precision-check,Fail
+spec@!opengl 1.0@depth-clear-precision-check@depth24,Fail
+spec@!opengl 1.0@depth-clear-precision-check@depth24_stencil8,Fail
+spec@!opengl 1.0@depth-clear-precision-check@depth32,Fail
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+spec@!opengl 1.0@rasterpos,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
+spec@!opengl 1.1@line-aa-width,Fail
+spec@!opengl 1.1@line-smooth-stipple,Crash
+spec@!opengl 1.1@linestipple,Crash
+spec@!opengl 1.1@polygon-mode,Fail
+spec@!opengl 1.1@polygon-mode-facing,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-offset,Fail
+spec@!opengl 2.0@vs-point_size-zero,Fail
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail
+spec@!opengl 3.2@gl-3.2-adj-prims cull-back pv-first,Fail
+spec@!opengl 3.2@gl-3.2-adj-prims cull-front pv-first,Fail
+spec@!opengl 3.2@gl-3.2-adj-prims line cull-back pv-first,Fail
+spec@!opengl 3.2@gl-3.2-adj-prims line cull-front pv-first,Fail
+spec@!opengl 3.2@gl-3.2-adj-prims pv-first,Fail
+spec@!opengl 3.2@minmax,Fail
+spec@!opengl 3.3@minmax,Fail
+spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
+spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
+spec@arb_gpu_shader5@arb_gpu_shader5-interpolateatsample-dynamically-nonuniform,Fail
+spec@arb_gpu_shader5@execution@built-in-functions@fs-interpolateatcentroid-array-of-structs,Crash
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
+spec@arb_gpu_shader_fp64@execution@glsl-fs-loop-unroll-mul-fp64,Crash
+spec@arb_gpu_shader_fp64@uniform_buffers@fs-ubo-load.indirect.3,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_size pname checks,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_BLUE_SIZE,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_GREEN_SIZE,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_RED_SIZE,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_type pname checks,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_type pname checks@GL_INTERNALFORMAT_BLUE_TYPE,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_type pname checks@GL_INTERNALFORMAT_GREEN_TYPE,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_type pname checks@GL_INTERNALFORMAT_RED_TYPE,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2' on GL_PROGRAM_INPUT,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
+spec@arb_query_buffer_object@coherency,Fail
+spec@arb_query_buffer_object@coherency@index-buffer-GL_TESS_CONTROL_SHADER_PATCHES,Fail
+spec@arb_query_buffer_object@coherency@indirect-dispatch-GL_TESS_CONTROL_SHADER_PATCHES,Fail
+spec@arb_query_buffer_object@coherency@indirect-draw-GL_TESS_CONTROL_SHADER_PATCHES,Fail
+spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_TESS_CONTROL_SHADER_PATCHES,Fail
+spec@arb_query_buffer_object@qbo,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC-GL_INT,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC-GL_UNSIGNED_INT,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC-GL_UNSIGNED_INT64_ARB,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_AFTER-GL_INT,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_AFTER-GL_UNSIGNED_INT,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_AFTER-GL_UNSIGNED_INT64_ARB,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_BEFORE-GL_INT,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_BEFORE-GL_UNSIGNED_INT,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-ASYNC_CPU_READ_BEFORE-GL_UNSIGNED_INT64_ARB,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC-GL_INT,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC-GL_UNSIGNED_INT,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC-GL_UNSIGNED_INT64_ARB,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_INT,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_UNSIGNED_INT,Fail
+spec@arb_query_buffer_object@qbo@query-GL_TESS_CONTROL_SHADER_PATCHES-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_UNSIGNED_INT64_ARB,Fail
+spec@arb_sample_locations@test,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 1- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 2- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 0- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 1- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 2- Y: 6- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 0- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 1- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 2- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 3- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 4- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 5- Grid: true,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: false,Fail
+spec@arb_sample_locations@test@MSAA: 4- X: 3- Y: 6- Grid: true,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 2,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 4,Fail
+spec@arb_sample_shading@samplemask 2,Fail
+spec@arb_sample_shading@samplemask 2 all,Fail
+spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms partition,Fail
+spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4,Fail
+spec@arb_sample_shading@samplemask 4 all,Fail
+spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms partition,Fail
+spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
+spec@arb_shader_image_load_store@coherency,Fail
+spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Fragment shader/'coherent' qualifier coherency test/512x512,Fail
+spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Fragment shader/'volatile' qualifier coherency test/1024x1024,Fail
+spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Geometry shader/'coherent' qualifier coherency test/1024x1024,Fail
+spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Geometry shader/'coherent' qualifier coherency test/512x512,Fail
+spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Geometry shader/'volatile' qualifier coherency test/1024x1024,Fail
+spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Geometry shader/'volatile' qualifier coherency test/512x512,Fail
+spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'coherent' qualifier coherency test/1024x1024,Fail
+spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'coherent' qualifier coherency test/256x256,Fail
+spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'coherent' qualifier coherency test/512x512,Fail
+spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'volatile' qualifier coherency test/1024x1024,Fail
+spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'volatile' qualifier coherency test/256x256,Fail
+spec@arb_shader_image_load_store@coherency@Vertex-Geometry shader/'volatile' qualifier coherency test/512x512,Fail
+spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
+spec@arb_tessellation_shader@execution@tcs-input-read-mat,Fail
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash
+spec@arb_texture_buffer_object@formats (fs- arb),Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_ALPHA8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY16,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY8,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_INTENSITY8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE16_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE8_ALPHA8,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (fs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb),Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_ALPHA8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY16,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY8,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_INTENSITY8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE16_ALPHA16,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE8_ALPHA8,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA16UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32F_ARB,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA32UI_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8I_EXT,Fail
+spec@arb_texture_buffer_object@formats (vs- arb)@GL_LUMINANCE_ALPHA8UI_EXT,Fail
+spec@arb_texture_rectangle@1-1-linear-texture,Fail
+spec@arb_timer_query@timestamp-get,Fail
+spec@ext_external_objects@vk-depth-display,Fail
+spec@ext_external_objects@vk-depth-display@D16,Fail
+spec@ext_external_objects@vk-depth-display@D24S8,Fail
+spec@ext_external_objects@vk-depth-display@D32S8,Fail
+spec@ext_external_objects@vk-image-display,Fail
+spec@ext_external_objects@vk-image-display-muliple-textures,Fail
+spec@ext_external_objects@vk-image-display-overwrite,Fail
+spec@ext_external_objects@vk-image-overwrite,Fail
+spec@ext_external_objects@vk-image-overwrite@RGB 10 A2 UINT optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGB 10 A2 UNORM optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGB 5 A1 UNORM optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 16 INT optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 16 SFLOAT optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 16 UINT optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 16 UNORM optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 32 INT optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 32 SFLOAT optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 32 UINT optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 4 UNORM optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 8 INT optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 8 SRGB optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 8 UINT optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-image-overwrite@RGBA 8 UNORM optimal: Failed to create texture from GL memory object.,Fail
+spec@ext_external_objects@vk-semaphores,Fail
+spec@ext_external_objects@vk-semaphores-2,Fail
+spec@ext_external_objects@vk-stencil-display,Fail
+spec@ext_external_objects@vk-stencil-display@D24S8,Fail
+spec@ext_external_objects@vk-stencil-display@D32S8,Fail
+spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
+spec@ext_framebuffer_multisample@alpha-to-coverage-dual-src-blend 2,Fail
+spec@ext_framebuffer_multisample@alpha-to-coverage-dual-src-blend 4,Fail
+spec@ext_framebuffer_multisample@alpha-to-coverage-no-draw-buffer-zero 2,Fail
+spec@ext_framebuffer_multisample@alpha-to-coverage-no-draw-buffer-zero 4,Fail
+spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
+spec@ext_framebuffer_multisample@draw-buffers-alpha-to-coverage 2,Fail
+spec@ext_framebuffer_multisample@draw-buffers-alpha-to-coverage 4,Fail
+spec@ext_framebuffer_multisample@enable-flag,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@sample-alpha-to-coverage 2 color,Fail
+spec@ext_framebuffer_multisample@sample-alpha-to-coverage 4 color,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv21,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail
+spec@ext_packed_float@query-rgba-signed-components,Fail
+spec@ext_transform_feedback@structs struct-array-elem run,Fail
+spec@ext_transform_feedback@structs struct-array-elem run interface,Fail
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Fail
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
+spec@ext_transform_feedback@tessellation triangle_fan flat_first,Fail
+spec@ext_transform_feedback@tessellation triangle_strip flat_first,Fail
+spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash
+spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion,Crash
+spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion,Crash
+spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail
+spec@glsl-1.30@execution@texelfetch fs sampler3d 1x129x9-98x129x9,Fail
+spec@glsl-1.30@execution@texelfetch fs sampler3d 98x1x9-98x129x9,Fail
+spec@glsl-1.50@built-in constants,Fail
+spec@glsl-1.50@built-in constants@gl_MaxFragmentInputComponents,Fail
+spec@glsl-1.50@execution@geometry@point-size-out,Fail
+spec@glsl-1.50@execution@geometry@primitive-id-restart gl_line_strip_adjacency other,Crash
+spec@glsl-1.50@execution@interface-blocks-complex-vs-fs,Fail
+spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail
+spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail
+spec@glsl-1.50@execution@redeclare-pervertex-out-subset-gs,Fail
+spec@glsl-1.50@execution@variable-indexing@gs-output-array-vec4-index-wr,Fail
+spec@glsl-3.30@built-in constants,Fail
+spec@glsl-3.30@built-in constants@gl_MaxFragmentInputComponents,Fail
+spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail
+spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail
+spec@khr_texture_compression_astc@array-gl,Fail
+spec@khr_texture_compression_astc@array-gl@12x12 Block Dim,Fail
+spec@khr_texture_compression_astc@array-gl@5x5 Block Dim,Fail
+spec@khr_texture_compression_astc@array-gles,Fail
+spec@khr_texture_compression_astc@array-gles@12x12 Block Dim,Fail
+spec@khr_texture_compression_astc@array-gles@5x5 Block Dim,Fail
+spec@khr_texture_compression_astc@miptree-gl hdr,Fail
+spec@khr_texture_compression_astc@miptree-gl hdr@HDR Profile,Fail
+spec@khr_texture_compression_astc@miptree-gl ldr,Fail
+spec@khr_texture_compression_astc@miptree-gl ldr@LDR Profile,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-sd,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-sd@sRGB skip decode,Fail
+spec@khr_texture_compression_astc@miptree-gles hdr,Fail
+spec@khr_texture_compression_astc@miptree-gles hdr@HDR Profile,Fail
+spec@khr_texture_compression_astc@miptree-gles ldr,Fail
+spec@khr_texture_compression_astc@miptree-gles ldr@LDR Profile,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-sd,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-sd@sRGB skip decode,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl hdr,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl hdr@HDR Profile,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles hdr,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles hdr@HDR Profile,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-tu-a750-flakes.txt b/src/gallium/drivers/zink/ci/zink-tu-a750-flakes.txt
new file mode 100644
index 00000000000..698e5b6a711
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-tu-a750-flakes.txt
@@ -0,0 +1,4 @@
+KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS
+spec@arb_shader_image_load_store@coherency@Tessellation evaluation-Fragment shader/'coherent' qualifier coherency test/512x512
+glx@glx-multithread-texture
+glx@glx-visuals-depth
diff --git a/src/gallium/drivers/zink/ci/zink-tu-a750-skips.txt b/src/gallium/drivers/zink/ci/zink-tu-a750-skips.txt
new file mode 100644
index 00000000000..0628fe02c29
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-tu-a750-skips.txt
@@ -0,0 +1,23 @@
+GTF-GL46.gtf32.GL3Tests.packed_pixels.packed_pixels_pixelstore
+KHR-GL46.texture_swizzle.smoke
+KHR-Single-GL46.arrays_of_arrays_gl.SubroutineFunctionCalls2
+
+# crashes
+KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component
+
+spec@.*dvec.*
+spec@.*dmat.*
+spec@.*int64.*
+spec@.*64bit.*
+spec@arb_texture_buffer_object@texture-buffer-size-clamp.*
+
+# hangs
+spec@arb_texture_barrier@arb_texture_barrier-blending-in-shader.*
+spec@glsl-1.50@execution@geometry@primitive-id-restart gl_line_strip_adjacency ffs
+
+# timeout
+glx@glx-visuals-stencil
+spec@!opengl 1.0@gl-1.0-drawbuffer-modes
+spec@arb_texture_cube_map@cubemap npot
+spec@arb_texture_cube_map_array@arb_texture_cube_map_array-sampler-cube-array-shadow
+spec@egl_nok_texture_from_pixmap@basic
diff --git a/src/gallium/drivers/zink/ci/zink-venus-lvp-fails.txt b/src/gallium/drivers/zink/ci/zink-venus-lvp-fails.txt
new file mode 100644
index 00000000000..0d05bbda4d2
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-venus-lvp-fails.txt
@@ -0,0 +1,164 @@
+# #6115
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-float-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec2-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec3-index-rd,Crash
+spec@arb_tessellation_shader@execution@variable-indexing@tes-both-input-array-vec4-index-rd,Crash
+
+# #6322
+spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
+
+#kopper regressions/changes
+spec@egl_chromium_sync_control@conformance,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
+spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_ust_test,Fail
+
+
+dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag,Fail
+dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_dst_x,Fail
+dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_dst_y,Fail
+dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min,Fail
+dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_dst_x,Fail
+dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_x,Fail
+dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_y,Fail
+
+# this test tries to be error-compatible with nvidia. spoiler: mesa isn't, and no driver can pass it
+glx@glx_arb_create_context@invalid flag,Fail
+
+glx@glx-swap-pixmap-bad,Fail
+glx@glx-visuals-depth,Crash
+glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail
+glx@glx_arb_create_context_no_error@no error,Fail
+
+glx@glx_ext_import_context@free context,Fail
+glx@glx_ext_import_context@get context id,Fail
+glx@glx_ext_import_context@get current display,Fail
+glx@glx_ext_import_context@import context- multi process,Fail
+glx@glx_ext_import_context@import context- single process,Fail
+glx@glx_ext_import_context@imported context has same context id,Fail
+glx@glx_ext_import_context@make current- multi process,Fail
+glx@glx_ext_import_context@make current- single process,Fail
+glx@glx_ext_import_context@query context info,Fail
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+spec@!opengl 1.1@polygon-mode,Fail
+spec@!opengl 1.1@polygon-mode-facing,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+spec@!opengl 1.2@copyteximage 3d,Fail
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
+spec@arb_pipeline_statistics_query@arb_pipeline_statistics_query-frag,Fail
+spec@arb_point_sprite@arb_point_sprite-mipmap,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
+spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail
+spec@arb_sample_shading@ignore-centroid-qualifier 4,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 2,Fail
+spec@arb_sample_shading@interpolate-at-sample-position 4,Fail
+spec@arb_sample_shading@samplemask 2,Fail
+spec@arb_sample_shading@samplemask 2@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2@noms partition,Fail
+spec@arb_sample_shading@samplemask 2@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all,Fail
+spec@arb_sample_shading@samplemask 2 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 2 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 2 all@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4,Fail
+spec@arb_sample_shading@samplemask 4@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4@noms partition,Fail
+spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all,Fail
+spec@arb_sample_shading@samplemask 4 all@0.250000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@0.500000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@1.000000 mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms mask_in_one,Fail
+spec@arb_sample_shading@samplemask 4 all@noms partition,Fail
+spec@arb_sample_shading@samplemask 4 all@sample mask_in_one,Fail
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail
+
+spec@egl 1.4@eglterminate then unbind context,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
+spec@egl_khr_surfaceless_context@viewport,Fail
+spec@egl_mesa_configless_context@basic,Fail
+spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
+spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 msaa,Fail
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 upsample,Fail
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 msaa,Fail
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 upsample,Fail
+spec@ext_framebuffer_multisample@enable-flag,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 2 non-centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
+spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
+spec@ext_packed_float@query-rgba-signed-components,Fail
+
+spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail
+spec@arb_post_depth_coverage@arb_post_depth_coverage-multisampling,Fail
+spec@arb_shader_image_load_store@early-z,Fail
+spec@arb_shader_image_load_store@early-z@occlusion query test/early-z pass,Fail
+
+spec@arb_shader_image_load_store@execution@image-array-out-of-bounds-access-load,Crash
+spec@arb_shader_image_load_store@execution@image-array-out-of-bounds-access-store,Crash
+
+#literally no driver can pass these
+spec@!opengl 1.0@rasterpos,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail
+spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
+
+
+spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-mixed-const-non-const-uniform-index,Fail
+spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-mixed-const-non-const-uniform-index2,Fail
+spec@arb_arrays_of_arrays@execution@image_store@basic-imagestore-non-const-uniform-index,Fail
+spec@arb_gpu_shader_fp64@execution@conversion,Fail
+spec@arb_tessellation_shader@execution@gs-primitiveid-instanced,Fail
+spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail
+spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail
+spec@glsl-4.00@execution@conversion,Fail
+
+spec@ext_transform_feedback@tessellation quads wireframe,Fail
+
+# Debian 12 CI update, see https://gitlab.freedesktop.org/mesa/mesa/-/issues/9072
+spec@ext_packed_float@multisample-formats 4 gl_ext_packed_float,Crash
+spec@ext_transform_feedback@tessellation quad_strip wireframe,Crash
+spec@!opengl 1.0@gl-1.0-dlist-beginend,Crash
+spec@arb_clip_control@arb_clip_control-depth-precision,Crash
+spec@nv_texture_barrier@blending-in-shader,Crash
+
+spec@arb_viewport_array@display-list,Fail
diff --git a/src/gallium/drivers/zink/ci/zink-venus-lvp-flakes.txt b/src/gallium/drivers/zink/ci/zink-venus-lvp-flakes.txt
new file mode 100644
index 00000000000..a883379893e
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-venus-lvp-flakes.txt
@@ -0,0 +1,40 @@
+dEQP-GLES2.functional.texture.filtering.cube.nearest_linear_mirror_l8_pot
+spec@khr_debug@push-pop-group_gl.*
+glx@glx-multi-window-single-context
+
+# "free(): invalid next size (fast)"
+# since it's heap corruption, it may or may not appear in a particular run
+spec@arb_compute_variable_group_size@local-size
+
+# https://gitlab.freedesktop.org/mesa/mesa/-/jobs/20908454
+# "X connection to :99 broken (explicit kill or server shutdown)."
+glx@glx-multi-context-ib-1
+
+# depth visuals
+glx@glx-visuals-depth
+glx@glx-visuals-stencil
+
+# mysterious
+glx@glx-shader-sharing
+
+spec@arb_fragment_program@no-newline
+# glx-destroycontext-1: ../../src/xcb_conn.c:215: write_vec: Assertion `!c->out.queue_len' failed.
+glx@glx-destroycontext-1
+
+glx@glx-multithread-texture
+
+# does not happen very often, but rarely does
+KHR-GL46.limits.max_fragment_interpolation_offset
+
+# no output timeout, probably stuck in some X11 connection thing
+spec@ext_framebuffer_multisample@accuracy all_samples depth_resolve depthstencil
+
+# segfault in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/48719777 and others
+spec@ext_texture_array@texsubimage array
+
+# crash in https://gitlab.freedesktop.org/mesa/mesa/-/jobs/48476882 and others
+KHR-GL46.layout_location.sampler_2d_shadow
+
+# uprev Piglit in Mesa
+spec@ext_framebuffer_multisample@accuracy all_samples depth_draw small depthstencil
+
diff --git a/src/gallium/drivers/zink/ci/zink-venus-lvp-skips.txt b/src/gallium/drivers/zink/ci/zink-venus-lvp-skips.txt
new file mode 100644
index 00000000000..7226486adfb
--- /dev/null
+++ b/src/gallium/drivers/zink/ci/zink-venus-lvp-skips.txt
@@ -0,0 +1,47 @@
+# Note: skips lists for CI are just a list of lines that, when
+# non-zero-length and not starting with '#', will regex match to
+# delete lines from the test list. Be careful.
+
+KHR-GL32.texture_size_promotion.functional
+
+# this is just broken.
+KHR-GL46.shader_ballot_tests.ShaderBallotBitmasks
+KHR-GL46.shader_ballot_tests.ShaderBallotFunctionRead
+
+# ignores copied from the old runner script
+spec@arb_map_buffer_alignment@arb_map_buffer_alignment-map-invalidate-range
+spec@arb_timer_query.*
+spec@arb_sample_shading@builtin-gl-sample-mask
+spec@glsl-1.30@execution@tex-miplevel-selection.*
+
+# This test doesn't even seem to exist, but piglit adds it to a group...?
+spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match
+
+# This one seems to have a typo in the name, and doesn't really ever run?
+spec@egl_ext_protected_content@conformance
+
+# has a race where probes periodically return black
+# cf. https://gitlab.freedesktop.org/mesa/mesa/-/jobs/10624521
+glx@glx-multi-context-single-window
+
+# This one takes too long, but passes. There's other tests that don't
+# try all the combinations, so that's probably enough.
+spec@arb_compute_shader@local-id-explosion
+
+# I can't reproduce these crashes locally
+# even after running them in loops for 4+ hours, so disable for now
+.*tex-miplevel-selection.*
+
+# these are insanely long
+KHR-GL46.copy_image.functional
+KHR-GL46.texture_swizzle.smoke
+KHR-GL46.texture_swizzle.functional
+
+# Kopper regression
+glx@glx-tfp
+
+# These tests started hitting timeouts when we upgraded LLVM from v11 to 13
+spec@arb_texture_rg@fbo-blending-formats
+
+#these need format conversions that gallium doesn't implement yet
+spec@arb_texture_buffer_object@formats.*arb.*
diff --git a/src/gallium/drivers/zink/driinfo_zink.h b/src/gallium/drivers/zink/driinfo_zink.h
index e1cf6d7d559..cdf1596cce0 100644
--- a/src/gallium/drivers/zink/driinfo_zink.h
+++ b/src/gallium/drivers/zink/driinfo_zink.h
@@ -6,5 +6,10 @@ DRI_CONF_SECTION_DEBUG
DRI_CONF_SECTION_END
DRI_CONF_SECTION_PERFORMANCE
+DRI_CONF_MESA_GLTHREAD_DRIVER(true)
+DRI_CONF_OPT_B(zink_shader_object_enable, false, "Enable support for EXT_shader_object")
+DRI_CONF_SECTION_END
+DRI_CONF_SECTION_QUALITY
+ DRI_CONF_OPT_B(zink_emulate_point_smooth, false, "Enable support for emulated GL_POINT_SMOOTH")
DRI_CONF_SECTION_END
diff --git a/src/gallium/drivers/zink/meson.build b/src/gallium/drivers/zink/meson.build
index 8da0092bbce..db68907f256 100644
--- a/src/gallium/drivers/zink/meson.build
+++ b/src/gallium/drivers/zink/meson.build
@@ -19,7 +19,7 @@
# SOFTWARE.
files_libzink = files(
- 'nir_lower_dynamic_bo_access.c',
+ 'zink_lower_cubemap_to_array.c',
'nir_to_spirv/nir_to_spirv.c',
'nir_to_spirv/spirv_builder.c',
'zink_batch.c',
@@ -28,8 +28,8 @@ files_libzink = files(
'zink_clear.c',
'zink_compiler.c',
'zink_context.c',
+ 'zink_kopper.c',
'zink_descriptors.c',
- 'zink_descriptors_lazy.c',
'zink_draw.cpp',
'zink_fence.c',
'zink_format.c',
@@ -42,6 +42,7 @@ files_libzink = files(
'zink_screen.c',
'zink_state.c',
'zink_surface.c',
+ 'zink_synchronization.cpp',
)
zink_device_info = custom_target(
@@ -49,7 +50,7 @@ zink_device_info = custom_target(
input : ['zink_device_info.py'],
output : ['zink_device_info.h', 'zink_device_info.c'],
command : [
- prog_python, '@INPUT@', '@OUTPUT@', join_paths(meson.source_root(), 'src/vulkan/registry/vk.xml')
+ prog_python, '@INPUT@', '@OUTPUT@', vk_api_xml
]
)
@@ -58,7 +59,7 @@ zink_instance = custom_target(
input : ['zink_instance.py'],
output : ['zink_instance.h', 'zink_instance.c'],
command : [
- prog_python, '@INPUT@', '@OUTPUT@', join_paths(meson.source_root(), 'src/vulkan/registry/vk.xml')
+ prog_python, '@INPUT@', '@OUTPUT@', vk_api_xml
]
)
@@ -67,33 +68,53 @@ zink_nir_algebraic_c = custom_target(
input : 'nir_to_spirv/zink_nir_algebraic.py',
output : 'zink_nir_algebraic.c',
command : [
- prog_python, '@INPUT@',
- '-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
+ prog_python, '@INPUT@', '-p', dir_compiler_nir,
],
capture : true,
- depend_files : nir_algebraic_py,
+ depend_files : nir_algebraic_depends,
)
zink_c_args = []
inc_zink_vk = []
-if with_swrast_vk
- zink_c_args += '-DZINK_WITH_SWRAST_VK'
-endif
+if host_machine.system() == 'darwin'
+
+ # MoltenVK options
+ if with_moltenvk_dir != ''
+ fs = import('fs')
+ # Vulkan SDK 1.3.250 to 1.3.268 support
+ moltenvk_includes = join_paths(with_moltenvk_dir, 'MoltenVK', 'include')
+ if not fs.is_dir(moltenvk_includes)
+ # Vulkan SDK 1.3.275 onwards support
+ moltenvk_includes = join_paths(with_moltenvk_dir, 'macos', 'include')
+ if not fs.is_dir(moltenvk_includes)
+ # MoltenVK from brew support
+ moltenvk_includes = join_paths(with_moltenvk_dir, 'include')
+ if not fs.is_dir(moltenvk_includes)
+ error(f'moltenvk includes cannot be found in moltenvk-dir="@with_moltenvk_dir@"')
+ endif
+ endif
+ endif
+ inc_zink_vk += include_directories(moltenvk_includes)
+ else
+ error('moltenvk-dir is required but not set.')
+ endif
-# MoltenVK options
-if with_moltenvk_dir != ''
- inc_zink_vk = [inc_zink_vk, include_directories( join_paths(with_moltenvk_dir, 'include') )]
zink_c_args += ['-x','objective-c'] # Put compiler into objective-C mode to allow for MacOS types, like IOSurface and CAMetalLayer
zink_c_args += ['-iframework' , 'Foundation']
+
endif
libzink = static_library(
'zink',
[files_libzink, zink_device_info, zink_instance, zink_nir_algebraic_c, vk_dispatch_table],
gnu_symbol_visibility : 'hidden',
- include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_vulkan_wsi, inc_vulkan_util, inc_zink_vk],
- dependencies: [dep_vulkan, idep_nir_headers, idep_mesautil, idep_vulkan_util_headers, dep_libdrm],
+ include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_vulkan_util, inc_zink_vk],
+ link_args : [ld_args_build_id],
+ dependencies: [
+ idep_nir_headers, idep_mesautil, idep_vulkan_util_headers,
+ idep_vulkan_wsi_defines, idep_vulkan_util, dep_libdrm, vulkan_wsi_deps
+ ],
c_args: zink_c_args,
)
diff --git a/src/gallium/drivers/zink/nir_lower_dynamic_bo_access.c b/src/gallium/drivers/zink/nir_lower_dynamic_bo_access.c
deleted file mode 100644
index cc38565b155..00000000000
--- a/src/gallium/drivers/zink/nir_lower_dynamic_bo_access.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright © 2020 Mike Blumenkrantz
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
- */
-
-#include "nir.h"
-#include "nir_builder.h"
-
-bool nir_lower_dynamic_bo_access(nir_shader *shader);
-/**
- * This pass converts dynamic UBO/SSBO block indices to constant indices by generating
- * conditional chains which reduce to single values.
- *
- * This is needed by anything which intends to convert GLSL-like shaders to SPIRV,
- * as SPIRV requires explicit load points for UBO/SSBO variables and has no instruction for
- * loading based on an offset in the underlying driver's binding table
- */
-
-
-/* generate a single ssa value which conditionally selects the right value that
- * was previously loaded by the load_ubo conditional chain
- */
-static nir_ssa_def *
-recursive_generate_bo_ssa_def(nir_builder *b, nir_intrinsic_instr *instr, nir_ssa_def *index, unsigned start, unsigned end)
-{
- if (start == end - 1) {
- nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, instr->intrinsic);
- new_instr->src[0] = nir_src_for_ssa(nir_imm_int(b, start));
- for (unsigned i = 0; i < nir_intrinsic_infos[instr->intrinsic].num_srcs; i++) {
- if (i)
- nir_src_copy(&new_instr->src[i], &instr->src[i]);
- }
- if (instr->intrinsic != nir_intrinsic_load_ubo_vec4) {
- nir_intrinsic_set_align(new_instr, nir_intrinsic_align_mul(instr), nir_intrinsic_align_offset(instr));
- if (instr->intrinsic != nir_intrinsic_load_ssbo)
- nir_intrinsic_set_range(new_instr, nir_intrinsic_range(instr));
- }
- new_instr->num_components = instr->num_components;
- nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
- nir_dest_num_components(instr->dest),
- nir_dest_bit_size(instr->dest), NULL);
- nir_builder_instr_insert(b, &new_instr->instr);
- return &new_instr->dest.ssa;
- }
-
- unsigned mid = start + (end - start) / 2;
- return nir_build_alu(b, nir_op_bcsel, nir_build_alu(b, nir_op_ilt, index, nir_imm_int(b, mid), NULL, NULL),
- recursive_generate_bo_ssa_def(b, instr, index, start, mid),
- recursive_generate_bo_ssa_def(b, instr, index, mid, end),
- NULL
- );
-}
-
-static void
-generate_store_ssbo_ssa_def(nir_builder *b, nir_intrinsic_instr *instr, nir_ssa_def *index, unsigned start, unsigned end)
-{
- if (start == end - 1) {
- nir_intrinsic_instr *new_instr = nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
- new_instr->src[1] = nir_src_for_ssa(nir_imm_int(b, start));
- nir_builder_instr_insert(b, &new_instr->instr);
- } else {
- int mid = start + (end - start) / 2;
- nir_ssa_def *mid_idx = nir_imm_int(b, mid);
- nir_push_if(b, nir_ilt(b, index, mid_idx));
- generate_store_ssbo_ssa_def(b, instr, index, start, mid);
- nir_push_else(b, NULL);
- generate_store_ssbo_ssa_def(b, instr, index, mid, end);
- nir_pop_if(b, NULL);
- }
-}
-
-static bool
-lower_dynamic_bo_access_instr(nir_builder *b,
- nir_instr *instr_,
- UNUSED void *cb_data)
-{
- if (instr_->type != nir_instr_type_intrinsic)
- return false;
-
- nir_intrinsic_instr *instr = nir_instr_as_intrinsic(instr_);
-
- if (instr->intrinsic != nir_intrinsic_load_ubo &&
- instr->intrinsic != nir_intrinsic_load_ubo_vec4 &&
- instr->intrinsic != nir_intrinsic_get_ssbo_size &&
- instr->intrinsic != nir_intrinsic_load_ssbo &&
- instr->intrinsic != nir_intrinsic_store_ssbo)
- return false;
- /* block index src is 1 for this op */
- unsigned block_idx = instr->intrinsic == nir_intrinsic_store_ssbo;
- if (nir_src_is_const(instr->src[block_idx]))
- return false;
- b->cursor = nir_after_instr(&instr->instr);
- bool ssbo_mode = instr->intrinsic != nir_intrinsic_load_ubo && instr->intrinsic != nir_intrinsic_load_ubo_vec4;
- unsigned first_idx = UINT_MAX, last_idx;
- if (ssbo_mode) {
- nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_ssbo)
- first_idx = MIN2(first_idx, var->data.driver_location);
- last_idx = first_idx + b->shader->info.num_ssbos;
- } else {
- /* skip 0 index if uniform_0 is one we created previously */
- first_idx = !b->shader->info.first_ubo_is_default_ubo;
- last_idx = first_idx + b->shader->info.num_ubos;
- }
-
- if (instr->intrinsic != nir_intrinsic_store_ssbo) {
- /* now create the composite dest with a bcsel chain based on the original value */
- nir_ssa_def *new_dest = recursive_generate_bo_ssa_def(b, instr,
- instr->src[block_idx].ssa,
- first_idx, last_idx);
-
- /* now use the composite dest in all cases where the original dest (from the dynamic index)
- * was used and remove the dynamically-indexed load_*bo instruction
- */
- nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, new_dest,
- &instr->instr);
- } else
- generate_store_ssbo_ssa_def(b, instr, instr->src[block_idx].ssa, first_idx, last_idx);
- nir_instr_remove(&instr->instr);
-
- return true;
-}
-
-bool
-nir_lower_dynamic_bo_access(nir_shader *shader)
-{
- return nir_shader_instructions_pass(shader,
- lower_dynamic_bo_access_instr,
- nir_metadata_dominance,
- NULL);
-}
diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
index f62aad28eb3..88ced74699f 100644
--- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
+++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
@@ -40,59 +40,69 @@ struct ntv_context {
*/
bool spirv_1_4_interfaces;
+ bool explicit_lod; //whether to set lod=0 for texture()
+
struct spirv_builder builder;
+ nir_shader *nir;
struct hash_table *glsl_types;
+ struct hash_table *bo_struct_types;
+ struct hash_table *bo_array_types;
SpvId GLSL_std_450;
gl_shader_stage stage;
- const struct zink_so_info *so_info;
+ const struct zink_shader_info *sinfo;
- SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][3]; //8, 16, 32
+ SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][5]; //8, 16, 32, unused, 64
nir_variable *ubo_vars[PIPE_MAX_CONSTANT_BUFFERS];
- SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][3]; //8, 16, 32
- nir_variable *ssbo_vars[PIPE_MAX_SHADER_BUFFERS];
- SpvId image_types[PIPE_MAX_SAMPLERS];
- SpvId images[PIPE_MAX_SAMPLERS];
- SpvId sampler_types[PIPE_MAX_SAMPLERS];
- SpvId samplers[PIPE_MAX_SAMPLERS];
- unsigned char sampler_array_sizes[PIPE_MAX_SAMPLERS];
- unsigned samplers_used : PIPE_MAX_SAMPLERS;
+ SpvId ssbos[5]; //8, 16, 32, unused, 64
+ nir_variable *ssbo_vars;
+
+ SpvId images[PIPE_MAX_SHADER_IMAGES];
+ struct hash_table image_types;
+ SpvId samplers[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ SpvId bindless_samplers[2];
+ SpvId cl_samplers[PIPE_MAX_SAMPLERS];
+ nir_variable *sampler_var[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /* driver_location -> variable */
+ nir_variable *bindless_sampler_var[2];
+ unsigned last_sampler;
+ unsigned bindless_set_idx;
+ nir_variable *image_var[PIPE_MAX_SHADER_IMAGES]; /* driver_location -> variable */
+
SpvId entry_ifaces[PIPE_MAX_SHADER_INPUTS * 4 + PIPE_MAX_SHADER_OUTPUTS * 4];
size_t num_entry_ifaces;
SpvId *defs;
+ nir_alu_type *def_types;
+ SpvId *resident_defs;
size_t num_defs;
- SpvId *regs;
- size_t num_regs;
-
struct hash_table *vars; /* nir_variable -> SpvId */
- struct hash_table *image_vars; /* SpvId -> nir_variable */
- struct hash_table *so_outputs; /* pipe_stream_output -> SpvId */
- unsigned outputs[VARYING_SLOT_MAX * 4];
- const struct glsl_type *so_output_gl_types[VARYING_SLOT_MAX * 4];
- SpvId so_output_types[VARYING_SLOT_MAX * 4];
const SpvId *block_ids;
size_t num_blocks;
bool block_started;
SpvId loop_break, loop_cont;
+ SpvId shared_block_var[5]; //8, 16, 32, unused, 64
+ SpvId shared_block_arr_type[5]; //8, 16, 32, unused, 64
+ SpvId scratch_block_var[5]; //8, 16, 32, unused, 64
+
SpvId front_face_var, instance_id_var, vertex_id_var,
primitive_id_var, invocation_id_var, // geometry
sample_mask_type, sample_id_var, sample_pos_var, sample_mask_in_var,
tess_patch_vertices_in, tess_coord_var, // tess
- push_const_var,
+ push_const_var, point_coord_var,
workgroup_id_var, num_workgroups_var,
local_invocation_id_var, global_invocation_id_var,
local_invocation_index_var, helper_invocation_var,
local_group_size_var,
- shared_block_var,
base_vertex_var, base_instance_var, draw_id_var;
+ SpvId shared_mem_size;
+
SpvId subgroup_eq_mask_var,
subgroup_ge_mask_var,
subgroup_gt_mask_var,
@@ -101,6 +111,9 @@ struct ntv_context {
subgroup_le_mask_var,
subgroup_lt_mask_var,
subgroup_size_var;
+
+ SpvId discard_func;
+ SpvId float_array_type[2];
};
static SpvId
@@ -108,10 +121,6 @@ get_fvec_constant(struct ntv_context *ctx, unsigned bit_size,
unsigned num_components, double value);
static SpvId
-get_uvec_constant(struct ntv_context *ctx, unsigned bit_size,
- unsigned num_components, uint64_t value);
-
-static SpvId
get_ivec_constant(struct ntv_context *ctx, unsigned bit_size,
unsigned num_components, int64_t value);
@@ -126,6 +135,128 @@ static SpvId
emit_triop(struct ntv_context *ctx, SpvOp op, SpvId type,
SpvId src0, SpvId src1, SpvId src2);
+static bool
+alu_op_is_typeless(nir_op op)
+{
+ switch (op) {
+ case nir_op_mov:
+ case nir_op_vec16:
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ case nir_op_vec5:
+ case nir_op_vec8:
+ case nir_op_bcsel:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static nir_alu_type
+get_nir_alu_type(const struct glsl_type *type)
+{
+ return nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(glsl_without_array_or_matrix(type))));
+}
+
+static nir_alu_type
+infer_nir_alu_type_from_uses_ssa(nir_def *ssa);
+
+static nir_alu_type
+infer_nir_alu_type_from_use(nir_src *src)
+{
+ nir_instr *instr = nir_src_parent_instr(src);
+ nir_alu_type atype = nir_type_invalid;
+ switch (instr->type) {
+ case nir_instr_type_alu: {
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ if (alu->op == nir_op_bcsel) {
+ if (nir_srcs_equal(alu->src[0].src, *src)) {
+ /* special case: the first src in bcsel is always bool */
+ return nir_type_bool;
+ }
+ }
+ /* ignore typeless ops */
+ if (alu_op_is_typeless(alu->op)) {
+ atype = infer_nir_alu_type_from_uses_ssa(&alu->def);
+ break;
+ }
+ for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+ if (!nir_srcs_equal(alu->src[i].src, *src))
+ continue;
+ atype = nir_op_infos[alu->op].input_types[i];
+ break;
+ }
+ break;
+ }
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ if (!nir_srcs_equal(tex->src[i].src, *src))
+ continue;
+ switch (tex->src[i].src_type) {
+ case nir_tex_src_coord:
+ case nir_tex_src_lod:
+ if (tex->op == nir_texop_txf ||
+ tex->op == nir_texop_txf_ms ||
+ tex->op == nir_texop_txs)
+ atype = nir_type_int;
+ else
+ atype = nir_type_float;
+ break;
+ case nir_tex_src_projector:
+ case nir_tex_src_bias:
+ case nir_tex_src_min_lod:
+ case nir_tex_src_comparator:
+ case nir_tex_src_ddx:
+ case nir_tex_src_ddy:
+ atype = nir_type_float;
+ break;
+ case nir_tex_src_offset:
+ case nir_tex_src_ms_index:
+ case nir_tex_src_texture_offset:
+ case nir_tex_src_sampler_offset:
+ case nir_tex_src_sampler_handle:
+ case nir_tex_src_texture_handle:
+ atype = nir_type_int;
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ break;
+ }
+ case nir_instr_type_intrinsic: {
+ if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_deref) {
+ atype = get_nir_alu_type(nir_instr_as_deref(instr)->type);
+ } else if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_store_deref) {
+ atype = get_nir_alu_type(nir_src_as_deref(nir_instr_as_intrinsic(instr)->src[0])->type);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return nir_alu_type_get_base_type(atype);
+}
+
+static nir_alu_type
+infer_nir_alu_type_from_uses_ssa(nir_def *ssa)
+{
+ nir_alu_type atype = nir_type_invalid;
+ /* try to infer a type: if it's wrong then whatever, but at least we tried */
+ nir_foreach_use_including_if(src, ssa) {
+ if (nir_src_is_if(src))
+ return nir_type_bool;
+ atype = infer_nir_alu_type_from_use(src);
+ if (atype)
+ break;
+ }
+ return atype ? atype : nir_type_uint;
+}
+
static SpvId
get_bvec_type(struct ntv_context *ctx, int num_components)
{
@@ -138,17 +269,24 @@ get_bvec_type(struct ntv_context *ctx, int num_components)
return bool_type;
}
+static SpvId
+find_image_type(struct ntv_context *ctx, nir_variable *var)
+{
+ struct hash_entry *he = _mesa_hash_table_search(&ctx->image_types, var);
+ return he ? (intptr_t)he->data : 0;
+}
+
static SpvScope
-get_scope(nir_scope scope)
+get_scope(mesa_scope scope)
{
SpvScope conv[] = {
- [NIR_SCOPE_NONE] = 0,
- [NIR_SCOPE_INVOCATION] = SpvScopeInvocation,
- [NIR_SCOPE_SUBGROUP] = SpvScopeSubgroup,
- [NIR_SCOPE_SHADER_CALL] = SpvScopeShaderCallKHR,
- [NIR_SCOPE_WORKGROUP] = SpvScopeWorkgroup,
- [NIR_SCOPE_QUEUE_FAMILY] = SpvScopeQueueFamily,
- [NIR_SCOPE_DEVICE] = SpvScopeDevice,
+ [SCOPE_NONE] = 0,
+ [SCOPE_INVOCATION] = SpvScopeInvocation,
+ [SCOPE_SUBGROUP] = SpvScopeSubgroup,
+ [SCOPE_SHADER_CALL] = SpvScopeShaderCallKHR,
+ [SCOPE_WORKGROUP] = SpvScopeWorkgroup,
+ [SCOPE_QUEUE_FAMILY] = SpvScopeQueueFamily,
+ [SCOPE_DEVICE] = SpvScopeDevice,
};
return conv[scope];
}
@@ -163,9 +301,7 @@ block_label(struct ntv_context *ctx, nir_block *block)
static void
emit_access_decorations(struct ntv_context *ctx, nir_variable *var, SpvId var_id)
{
- unsigned access = var->data.access;
- while (access) {
- unsigned bit = u_bit_scan(&access);
+ u_foreach_bit(bit, var->data.access) {
switch (1 << bit) {
case ACCESS_COHERENT:
/* SpvDecorationCoherent can't be used with vulkan memory model */
@@ -186,43 +322,79 @@ emit_access_decorations(struct ntv_context *ctx, nir_variable *var, SpvId var_id
spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationNonUniform);
break;
case ACCESS_CAN_REORDER:
- case ACCESS_STREAM_CACHE_POLICY:
+ case ACCESS_NON_TEMPORAL:
/* no equivalent */
break;
default:
unreachable("unknown access bit");
}
}
+ /* The Simple, GLSL, and Vulkan memory models can assume that aliasing is generally
+ * not present between the memory object declarations. Specifically, the consumer
+ * is free to assume aliasing is not present between memory object declarations,
+ * unless the memory object declarations explicitly indicate they alias.
+ * ...
+ * Applying Restrict is allowed, but has no effect.
+ * ...
+ * Only those memory object declarations decorated with Aliased or AliasedPointer may alias each other.
+ *
+ * - SPIRV 2.18.2 Aliasing
+ *
+ * thus if the variable isn't marked restrict, assume it may alias
+ */
+ if (!(var->data.access & ACCESS_RESTRICT))
+ spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationAliased);
}
static SpvOp
-get_atomic_op(nir_intrinsic_op op)
+get_atomic_op(struct ntv_context *ctx, unsigned bit_size, nir_atomic_op op)
{
switch (op) {
-#define CASE_ATOMIC_OP(type) \
- case nir_intrinsic_ssbo_atomic_##type: \
- case nir_intrinsic_image_deref_atomic_##type: \
- case nir_intrinsic_shared_atomic_##type
-
- CASE_ATOMIC_OP(add):
+#define ATOMIC_FCAP(NAME) \
+ do {\
+ if (bit_size == 16) \
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityAtomicFloat16##NAME##EXT); \
+ if (bit_size == 32) \
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityAtomicFloat32##NAME##EXT); \
+ if (bit_size == 64) \
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityAtomicFloat64##NAME##EXT); \
+ } while (0)
+
+ case nir_atomic_op_fadd:
+ ATOMIC_FCAP(Add);
+ if (bit_size == 16)
+ spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float16_add");
+ else
+ spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float_add");
+ return SpvOpAtomicFAddEXT;
+ case nir_atomic_op_fmax:
+ ATOMIC_FCAP(MinMax);
+ spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float_min_max");
+ return SpvOpAtomicFMaxEXT;
+ case nir_atomic_op_fmin:
+ ATOMIC_FCAP(MinMax);
+ spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float_min_max");
+ return SpvOpAtomicFMinEXT;
+
+ case nir_atomic_op_iadd:
return SpvOpAtomicIAdd;
- CASE_ATOMIC_OP(umin):
+ case nir_atomic_op_umin:
return SpvOpAtomicUMin;
- CASE_ATOMIC_OP(imin):
+ case nir_atomic_op_imin:
return SpvOpAtomicSMin;
- CASE_ATOMIC_OP(umax):
+ case nir_atomic_op_umax:
return SpvOpAtomicUMax;
- CASE_ATOMIC_OP(imax):
+ case nir_atomic_op_imax:
return SpvOpAtomicSMax;
- CASE_ATOMIC_OP(and):
+ case nir_atomic_op_iand:
return SpvOpAtomicAnd;
- CASE_ATOMIC_OP(or):
+ case nir_atomic_op_ior:
return SpvOpAtomicOr;
- CASE_ATOMIC_OP(xor):
+ case nir_atomic_op_ixor:
return SpvOpAtomicXor;
- CASE_ATOMIC_OP(exchange):
+ case nir_atomic_op_xchg:
return SpvOpAtomicExchange;
- CASE_ATOMIC_OP(comp_swap):
+ case nir_atomic_op_cmpxchg:
return SpvOpAtomicCompareExchange;
default:
debug_printf("%s - ", nir_intrinsic_infos[op].name);
@@ -230,7 +402,7 @@ get_atomic_op(nir_intrinsic_op op)
}
return 0;
}
-#undef CASE_ATOMIC_OP
+
static SpvId
emit_float_const(struct ntv_context *ctx, int bit_size, double value)
{
@@ -294,10 +466,37 @@ get_uvec_type(struct ntv_context *ctx, unsigned bit_size, unsigned num_component
return uint_type;
}
+static SpvId
+get_alu_type(struct ntv_context *ctx, nir_alu_type type, unsigned num_components, unsigned bit_size)
+{
+ if (bit_size == 1)
+ return get_bvec_type(ctx, num_components);
+
+ type = nir_alu_type_get_base_type(type);
+ switch (nir_alu_type_get_base_type(type)) {
+ case nir_type_bool:
+ return get_bvec_type(ctx, num_components);
+
+ case nir_type_int:
+ return get_ivec_type(ctx, bit_size, num_components);
+
+ case nir_type_uint:
+ return get_uvec_type(ctx, bit_size, num_components);
+
+ case nir_type_float:
+ return get_fvec_type(ctx, bit_size, num_components);
+
+ default:
+ unreachable("unsupported nir_alu_type");
+ }
+}
+
static SpvStorageClass
get_storage_class(struct nir_variable *var)
{
switch (var->data.mode) {
+ case nir_var_function_temp:
+ return SpvStorageClassFunction;
case nir_var_mem_push_const:
return SpvStorageClassPushConstant;
case nir_var_shader_in:
@@ -305,7 +504,12 @@ get_storage_class(struct nir_variable *var)
case nir_var_shader_out:
return SpvStorageClassOutput;
case nir_var_uniform:
+ case nir_var_image:
return SpvStorageClassUniformConstant;
+ case nir_var_mem_ubo:
+ return SpvStorageClassUniform;
+ case nir_var_mem_ssbo:
+ return SpvStorageClassStorageBuffer;
default:
unreachable("Unsupported nir_variable_mode");
}
@@ -313,10 +517,10 @@ get_storage_class(struct nir_variable *var)
}
static SpvId
-get_dest_uvec_type(struct ntv_context *ctx, nir_dest *dest)
+get_def_uvec_type(struct ntv_context *ctx, nir_def *def)
{
- unsigned bit_size = nir_dest_bit_size(*dest);
- return get_uvec_type(ctx, bit_size, nir_dest_num_components(*dest));
+ unsigned bit_size = def->bit_size;
+ return get_uvec_type(ctx, bit_size, def->num_components);
}
static SpvId
@@ -346,7 +550,15 @@ get_glsl_basetype(struct ntv_context *ctx, enum glsl_base_type type)
case GLSL_TYPE_UINT64:
return spirv_builder_type_uint(&ctx->builder, 64);
- /* TODO: handle more types */
+
+ case GLSL_TYPE_UINT16:
+ return spirv_builder_type_uint(&ctx->builder, 16);
+ case GLSL_TYPE_INT16:
+ return spirv_builder_type_int(&ctx->builder, 16);
+ case GLSL_TYPE_INT8:
+ return spirv_builder_type_int(&ctx->builder, 8);
+ case GLSL_TYPE_UINT8:
+ return spirv_builder_type_uint(&ctx->builder, 8);
default:
unreachable("unknown GLSL type");
@@ -413,8 +625,11 @@ get_glsl_type(struct ntv_context *ctx, const struct glsl_type *type)
types[i] = get_glsl_type(ctx, glsl_get_struct_field(type, i));
ret = spirv_builder_type_struct(&ctx->builder, types,
glsl_get_length(type));
- for (unsigned i = 0; i < glsl_get_length(type); i++)
- spirv_builder_emit_member_offset(&ctx->builder, ret, i, glsl_get_struct_field_offset(type, i));
+ for (unsigned i = 0; i < glsl_get_length(type); i++) {
+ int32_t offset = glsl_get_struct_field_offset(type, i);
+ if (offset >= 0)
+ spirv_builder_emit_member_offset(&ctx->builder, ret, i, offset);
+ }
} else
unreachable("Unhandled GLSL type");
@@ -423,21 +638,99 @@ get_glsl_type(struct ntv_context *ctx, const struct glsl_type *type)
}
static void
-create_shared_block(struct ntv_context *ctx, unsigned shared_size)
+create_scratch_block(struct ntv_context *ctx, unsigned scratch_size, unsigned bit_size)
{
- SpvId type = spirv_builder_type_uint(&ctx->builder, 32);
- SpvId array = spirv_builder_type_array(&ctx->builder, type, emit_uint_const(ctx, 32, shared_size / 4));
- spirv_builder_emit_array_stride(&ctx->builder, array, 4);
+ unsigned idx = bit_size >> 4;
+ SpvId type = spirv_builder_type_uint(&ctx->builder, bit_size);
+ unsigned block_size = scratch_size / (bit_size / 8);
+ assert(block_size);
+ SpvId array = spirv_builder_type_array(&ctx->builder, type, emit_uint_const(ctx, 32, block_size));
+ spirv_builder_emit_array_stride(&ctx->builder, array, bit_size / 8);
SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
- SpvStorageClassWorkgroup,
+ SpvStorageClassPrivate,
array);
- ctx->shared_block_var = spirv_builder_emit_var(&ctx->builder, ptr_type, SpvStorageClassWorkgroup);
+ ctx->scratch_block_var[idx] = spirv_builder_emit_var(&ctx->builder, ptr_type, SpvStorageClassPrivate);
+ if (ctx->spirv_1_4_interfaces) {
+ assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces));
+ ctx->entry_ifaces[ctx->num_entry_ifaces++] = ctx->scratch_block_var[idx];
+ }
+}
+
+static SpvId
+get_scratch_block(struct ntv_context *ctx, unsigned bit_size)
+{
+ unsigned idx = bit_size >> 4;
+ if (!ctx->scratch_block_var[idx])
+ create_scratch_block(ctx, ctx->nir->scratch_size, bit_size);
+ return ctx->scratch_block_var[idx];
+}
+
+static void
+create_shared_block(struct ntv_context *ctx, unsigned bit_size)
+{
+ unsigned idx = bit_size >> 4;
+ SpvId type = spirv_builder_type_uint(&ctx->builder, bit_size);
+ SpvId array;
+
+ assert(gl_shader_stage_is_compute(ctx->nir->info.stage));
+ if (ctx->nir->info.cs.has_variable_shared_mem) {
+ assert(ctx->shared_mem_size);
+ SpvId const_shared_size = emit_uint_const(ctx, 32, ctx->nir->info.shared_size);
+ SpvId shared_mem_size = spirv_builder_emit_triop(&ctx->builder, SpvOpSpecConstantOp, spirv_builder_type_uint(&ctx->builder, 32), SpvOpIAdd, const_shared_size, ctx->shared_mem_size);
+ shared_mem_size = spirv_builder_emit_triop(&ctx->builder, SpvOpSpecConstantOp, spirv_builder_type_uint(&ctx->builder, 32), SpvOpUDiv, shared_mem_size, emit_uint_const(ctx, 32, bit_size / 8));
+ array = spirv_builder_type_array(&ctx->builder, type, shared_mem_size);
+ } else {
+ unsigned block_size = ctx->nir->info.shared_size / (bit_size / 8);
+ assert(block_size);
+ array = spirv_builder_type_array(&ctx->builder, type, emit_uint_const(ctx, 32, block_size));
+ }
+
+ ctx->shared_block_arr_type[idx] = array;
+ spirv_builder_emit_array_stride(&ctx->builder, array, bit_size / 8);
+
+ /* Create wrapper struct for Block, Offset and Aliased decorations. */
+ SpvId block = spirv_builder_type_struct(&ctx->builder, &array, 1);
+
+ SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassWorkgroup,
+ block);
+ ctx->shared_block_var[idx] = spirv_builder_emit_var(&ctx->builder, ptr_type, SpvStorageClassWorkgroup);
if (ctx->spirv_1_4_interfaces) {
assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces));
- ctx->entry_ifaces[ctx->num_entry_ifaces++] = ctx->shared_block_var;
+ ctx->entry_ifaces[ctx->num_entry_ifaces++] = ctx->shared_block_var[idx];
+ }
+ /* Alias our shared memory blocks */
+ if (ctx->sinfo->have_workgroup_memory_explicit_layout) {
+ spirv_builder_emit_member_offset(&ctx->builder, block, 0, 0);
+ spirv_builder_emit_decoration(&ctx->builder, block, SpvDecorationBlock);
+ spirv_builder_emit_decoration(&ctx->builder, ctx->shared_block_var[idx], SpvDecorationAliased);
}
}
+static SpvId
+get_shared_block(struct ntv_context *ctx, unsigned bit_size)
+{
+ unsigned idx = bit_size >> 4;
+ if (!ctx->shared_block_var[idx])
+ create_shared_block(ctx, bit_size);
+ if (ctx->sinfo->have_workgroup_memory_explicit_layout) {
+ spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_workgroup_memory_explicit_layout");
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityWorkgroupMemoryExplicitLayoutKHR);
+ if (ctx->shared_block_var[0])
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR);
+ if (ctx->shared_block_var[1])
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR);
+ }
+
+ SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassWorkgroup,
+ ctx->shared_block_arr_type[idx]);
+ SpvId zero = emit_uint_const(ctx, 32, 0);
+
+ return spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
+ ctx->shared_block_var[idx], &zero, 1);
+}
+
#define HANDLE_EMIT_BUILTIN(SLOT, BUILTIN) \
case VARYING_SLOT_##SLOT: \
spirv_builder_emit_builtin(&ctx->builder, var_id, SpvBuiltIn##BUILTIN); \
@@ -505,7 +798,6 @@ emit_input(struct ntv_context *ctx, struct nir_variable *var)
else if (ctx->stage == MESA_SHADER_FRAGMENT) {
switch (var->data.location) {
HANDLE_EMIT_BUILTIN(POS, FragCoord);
- HANDLE_EMIT_BUILTIN(PNTC, PointCoord);
HANDLE_EMIT_BUILTIN(LAYER, Layer);
HANDLE_EMIT_BUILTIN(PRIMITIVE_ID, PrimitiveId);
HANDLE_EMIT_BUILTIN(CLIP_DIST0, ClipDistance);
@@ -521,6 +813,7 @@ emit_input(struct ntv_context *ctx, struct nir_variable *var)
spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationCentroid);
else if (var->data.sample)
spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationSample);
+ emit_interpolation(ctx, var_id, var->data.interpolation);
} else if (ctx->stage < MESA_SHADER_FRAGMENT) {
switch (var->data.location) {
HANDLE_EMIT_BUILTIN(POS, Position);
@@ -550,8 +843,6 @@ emit_input(struct ntv_context *ctx, struct nir_variable *var)
if (var->data.patch)
spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationPatch);
- emit_interpolation(ctx, var_id, var->data.interpolation);
-
_mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id);
assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces));
@@ -574,6 +865,11 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var)
if (var->name)
spirv_builder_emit_name(&ctx->builder, var_id, var->name);
+ if (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW) {
+ spirv_builder_emit_decoration(&ctx->builder, var_id,
+ SpvDecorationRelaxedPrecision);
+ }
+
if (ctx->stage != MESA_SHADER_FRAGMENT) {
switch (var->data.location) {
HANDLE_EMIT_BUILTIN(POS, Position);
@@ -587,16 +883,12 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var)
HANDLE_EMIT_BUILTIN(TESS_LEVEL_INNER, TessLevelInner);
default:
- spirv_builder_emit_location(&ctx->builder, var_id,
- var->data.driver_location);
- }
- /* tcs can't do xfb */
- if (ctx->stage != MESA_SHADER_TESS_CTRL) {
- unsigned idx = var->data.location << 2 | var->data.location_frac;
- ctx->outputs[idx] = var_id;
- ctx->so_output_gl_types[idx] = var->type;
- ctx->so_output_types[idx] = var_type;
+ /* non-xfb psiz output will have location -1 */
+ if (var->data.location >= 0)
+ spirv_builder_emit_location(&ctx->builder, var_id,
+ var->data.driver_location);
}
+ emit_interpolation(ctx, var_id, var->data.interpolation);
} else {
if (var->data.location >= FRAG_RESULT_DATA0) {
spirv_builder_emit_location(&ctx->builder, var_id,
@@ -633,12 +925,10 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var)
spirv_builder_emit_component(&ctx->builder, var_id,
var->data.location_frac);
- emit_interpolation(ctx, var_id, var->data.interpolation);
-
if (var->data.patch)
spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationPatch);
- if (var->data.explicit_xfb_buffer) {
+ if (var->data.explicit_xfb_buffer && ctx->nir->xfb_info) {
spirv_builder_emit_offset(&ctx->builder, var_id, var->data.offset);
spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, var->data.xfb.buffer);
spirv_builder_emit_xfb_stride(&ctx->builder, var_id, var->data.xfb.stride);
@@ -652,6 +942,41 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var)
ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id;
}
+static void
+emit_shader_temp(struct ntv_context *ctx, struct nir_variable *var)
+{
+ SpvId var_type = get_glsl_type(ctx, var->type);
+
+ SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassPrivate,
+ var_type);
+ SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type,
+ SpvStorageClassPrivate);
+ if (var->name)
+ spirv_builder_emit_name(&ctx->builder, var_id, var->name);
+
+ _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id);
+
+ assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces));
+ ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id;
+}
+
+static void
+emit_temp(struct ntv_context *ctx, struct nir_variable *var)
+{
+ SpvId var_type = get_glsl_type(ctx, var->type);
+
+ SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassFunction,
+ var_type);
+ SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type,
+ SpvStorageClassFunction);
+ if (var->name)
+ spirv_builder_emit_name(&ctx->builder, var_id, var->name);
+
+ _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id);
+}
+
static SpvDim
type_to_dim(enum glsl_sampler_dim gdim, bool *is_ms)
{
@@ -674,6 +999,9 @@ type_to_dim(enum glsl_sampler_dim gdim, bool *is_ms)
case GLSL_SAMPLER_DIM_MS:
*is_ms = true;
return SpvDim2D;
+ case GLSL_SAMPLER_DIM_SUBPASS_MS:
+ *is_ms = true;
+ return SpvDimSubpassData;
case GLSL_SAMPLER_DIM_SUBPASS:
return SpvDimSubpassData;
default:
@@ -800,13 +1128,12 @@ get_image_format(struct ntv_context *ctx, enum pipe_format format)
return ret;
}
-static void
-emit_image(struct ntv_context *ctx, struct nir_variable *var)
+static SpvId
+get_bare_image_type(struct ntv_context *ctx, struct nir_variable *var, bool is_sampler)
{
const struct glsl_type *type = glsl_without_array(var->type);
bool is_ms;
- bool is_sampler = glsl_type_is_sampler(type);
if (var->data.fb_fetch_output) {
spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInputAttachment);
@@ -818,29 +1145,64 @@ emit_image(struct ntv_context *ctx, struct nir_variable *var)
}
SpvDim dimension = type_to_dim(glsl_get_sampler_dim(type), &is_ms);
+ if (dimension == SpvDim1D) {
+ if (is_sampler)
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySampled1D);
+ else
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImage1D);
+ }
+ if (dimension == SpvDimBuffer) {
+ if (is_sampler)
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySampledBuffer);
+ else
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageBuffer);
+ }
+
bool arrayed = glsl_sampler_type_is_array(type);
if (dimension == SpvDimCube && arrayed)
spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageCubeArray);
+ if (arrayed && !is_sampler && is_ms)
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageMSArray);
SpvId result_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type));
- SpvId image_type = spirv_builder_type_image(&ctx->builder, result_type,
+ return spirv_builder_type_image(&ctx->builder, result_type,
dimension, false,
arrayed,
is_ms, is_sampler ? 1 : 2,
get_image_format(ctx, var->data.image.format));
+}
+
+static SpvId
+get_image_type(struct ntv_context *ctx, struct nir_variable *var,
+ bool is_sampler, bool is_buffer)
+{
+ SpvId image_type = get_bare_image_type(ctx, var, is_sampler);
+ return is_sampler && ctx->stage != MESA_SHADER_KERNEL && !is_buffer ?
+ spirv_builder_type_sampled_image(&ctx->builder, image_type) :
+ image_type;
+}
- SpvId var_type = is_sampler ? spirv_builder_type_sampled_image(&ctx->builder, image_type) : image_type;
+static SpvId
+emit_image(struct ntv_context *ctx, struct nir_variable *var, SpvId image_type)
+{
+ if (var->data.bindless)
+ return 0;
+ const struct glsl_type *type = glsl_without_array(var->type);
+
+ bool is_sampler = glsl_type_is_sampler(type);
+ bool is_buffer = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF;
+ SpvId var_type = is_sampler && ctx->stage != MESA_SHADER_KERNEL && !is_buffer ?
+ spirv_builder_type_sampled_image(&ctx->builder, image_type) : image_type;
+
+ bool mediump = (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW);
int index = var->data.driver_location;
- assert(!is_sampler || (!(ctx->samplers_used & (1 << index))));
- assert(!is_sampler || !ctx->sampler_types[index]);
- assert(is_sampler || !ctx->image_types[index]);
+ assert(!find_image_type(ctx, var));
if (glsl_type_is_array(var->type)) {
var_type = spirv_builder_type_array(&ctx->builder, var_type,
emit_uint_const(ctx, 32, glsl_get_aoa_size(var->type)));
spirv_builder_emit_array_stride(&ctx->builder, var_type, sizeof(void*));
- ctx->sampler_array_sizes[index] = glsl_get_aoa_size(var->type);
}
SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
SpvStorageClassUniformConstant,
@@ -849,25 +1211,32 @@ emit_image(struct ntv_context *ctx, struct nir_variable *var)
SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type,
SpvStorageClassUniformConstant);
+ if (mediump) {
+ spirv_builder_emit_decoration(&ctx->builder, var_id,
+ SpvDecorationRelaxedPrecision);
+ }
+
if (var->name)
spirv_builder_emit_name(&ctx->builder, var_id, var->name);
if (var->data.fb_fetch_output)
spirv_builder_emit_input_attachment_index(&ctx->builder, var_id, var->data.index);
+ _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id);
if (is_sampler) {
- ctx->sampler_types[index] = image_type;
- ctx->samplers[index] = var_id;
- ctx->samplers_used |= 1 << index;
+ if (var->data.descriptor_set == ctx->bindless_set_idx) {
+ assert(!ctx->bindless_samplers[index]);
+ ctx->bindless_samplers[index] = var_id;
+ } else {
+ assert(!ctx->samplers[index]);
+ ctx->samplers[index] = var_id;
+ }
} else {
- ctx->image_types[index] = image_type;
+ assert(!ctx->images[index]);
ctx->images[index] = var_id;
- _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id);
- uint32_t *key = ralloc_size(ctx->mem_ctx, sizeof(uint32_t));
- *key = var_id;
- _mesa_hash_table_insert(ctx->image_vars, key, var);
emit_access_decorations(ctx, var, var_id);
}
+ _mesa_hash_table_insert(&ctx->image_types, var, (void *)(intptr_t)image_type);
if (ctx->spirv_1_4_interfaces) {
assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces));
ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id;
@@ -875,6 +1244,30 @@ emit_image(struct ntv_context *ctx, struct nir_variable *var)
spirv_builder_emit_descriptor_set(&ctx->builder, var_id, var->data.descriptor_set);
spirv_builder_emit_binding(&ctx->builder, var_id, var->data.binding);
+ return var_id;
+}
+
+static void
+emit_sampler(struct ntv_context *ctx, unsigned sampler_index, unsigned desc_set)
+{
+ SpvId type = spirv_builder_type_sampler(&ctx->builder);
+ SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassUniformConstant,
+ type);
+
+ SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type,
+ SpvStorageClassUniformConstant);
+ char buf[128];
+ snprintf(buf, sizeof(buf), "sampler_%u", sampler_index);
+ spirv_builder_emit_name(&ctx->builder, var_id, buf);
+ spirv_builder_emit_descriptor_set(&ctx->builder, var_id, desc_set);
+ spirv_builder_emit_binding(&ctx->builder, var_id, sampler_index);
+ ctx->cl_samplers[sampler_index] = var_id;
+ if (ctx->spirv_1_4_interfaces) {
+ assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces));
+ ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id;
+ }
+
}
static SpvId
@@ -887,19 +1280,22 @@ get_sized_uint_array_type(struct ntv_context *ctx, unsigned array_size, unsigned
return array_type;
}
+/* get array<struct(array_type <--this one)> */
static SpvId
-get_bo_array_type(struct ntv_context *ctx, struct nir_variable *var, unsigned bitsize)
+get_bo_array_type(struct ntv_context *ctx, struct nir_variable *var)
{
+ struct hash_entry *he = _mesa_hash_table_search(ctx->bo_array_types, var);
+ if (he)
+ return (SpvId)(uintptr_t)he->data;
+ unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(glsl_without_array(var->type), 0)));
assert(bitsize);
SpvId array_type;
- const struct glsl_type *type = var->type;
- if (!glsl_type_is_unsized_array(type)) {
- type = glsl_get_struct_field(var->interface_type, 0);
- if (!glsl_type_is_unsized_array(type)) {
- uint32_t array_size = glsl_get_length(type) * (bitsize / 4);
- assert(array_size);
- return get_sized_uint_array_type(ctx, array_size, bitsize);
- }
+ const struct glsl_type *type = glsl_without_array(var->type);
+ const struct glsl_type *first_type = glsl_get_struct_field(type, 0);
+ if (!glsl_type_is_unsized_array(first_type)) {
+ uint32_t array_size = glsl_get_length(first_type);
+ assert(array_size);
+ return get_sized_uint_array_type(ctx, array_size, bitsize);
}
SpvId uint_type = spirv_builder_type_uint(&ctx->builder, bitsize);
array_type = spirv_builder_type_runtime_array(&ctx->builder, uint_type);
@@ -907,19 +1303,25 @@ get_bo_array_type(struct ntv_context *ctx, struct nir_variable *var, unsigned bi
return array_type;
}
+/* get array<struct(array_type) <--this one> */
static SpvId
-get_bo_struct_type(struct ntv_context *ctx, struct nir_variable *var, unsigned bitsize)
+get_bo_struct_type(struct ntv_context *ctx, struct nir_variable *var)
{
- SpvId array_type = get_bo_array_type(ctx, var, bitsize);
+ struct hash_entry *he = _mesa_hash_table_search(ctx->bo_struct_types, var);
+ if (he)
+ return (SpvId)(uintptr_t)he->data;
+ const struct glsl_type *bare_type = glsl_without_array(var->type);
+ unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(bare_type, 0)));
+ SpvId array_type = get_bo_array_type(ctx, var);
+ _mesa_hash_table_insert(ctx->bo_array_types, var, (void *)(uintptr_t)array_type);
bool ssbo = var->data.mode == nir_var_mem_ssbo;
// wrap UBO-array in a struct
SpvId runtime_array = 0;
- if (ssbo && glsl_get_length(var->interface_type) > 1) {
- const struct glsl_type *last_member = glsl_get_struct_field(var->interface_type, glsl_get_length(var->interface_type) - 1);
+ if (ssbo && glsl_get_length(bare_type) > 1) {
+ const struct glsl_type *last_member = glsl_get_struct_field(bare_type, glsl_get_length(bare_type) - 1);
if (glsl_type_is_unsized_array(last_member)) {
- bool is_64bit = glsl_type_is_64bit(glsl_without_array(last_member));
- runtime_array = spirv_builder_type_runtime_array(&ctx->builder, get_uvec_type(ctx, is_64bit ? 64 : bitsize, 1));
+ runtime_array = spirv_builder_type_runtime_array(&ctx->builder, get_uvec_type(ctx, bitsize, 1));
spirv_builder_emit_array_stride(&ctx->builder, runtime_array, glsl_get_explicit_stride(last_member));
}
}
@@ -934,36 +1336,39 @@ get_bo_struct_type(struct ntv_context *ctx, struct nir_variable *var, unsigned b
spirv_builder_emit_decoration(&ctx->builder, struct_type,
SpvDecorationBlock);
spirv_builder_emit_member_offset(&ctx->builder, struct_type, 0, 0);
- if (runtime_array) {
- spirv_builder_emit_member_offset(&ctx->builder, struct_type, 1,
- glsl_get_struct_field_offset(var->interface_type,
- glsl_get_length(var->interface_type) - 1));
- }
+ if (runtime_array)
+ spirv_builder_emit_member_offset(&ctx->builder, struct_type, 1, 0);
- return spirv_builder_type_pointer(&ctx->builder,
- ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform,
- struct_type);
+ return struct_type;
}
static void
-emit_bo(struct ntv_context *ctx, struct nir_variable *var, unsigned force_bitsize)
+emit_bo(struct ntv_context *ctx, struct nir_variable *var, bool aliased)
{
+ unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(glsl_without_array(var->type), 0)));
bool ssbo = var->data.mode == nir_var_mem_ssbo;
- unsigned bitsize = force_bitsize ? force_bitsize : 32;
- unsigned idx = bitsize >> 4;
- assert(idx < ARRAY_SIZE(ctx->ssbos[0]));
-
- SpvId pointer_type = get_bo_struct_type(ctx, var, bitsize);
-
+ SpvId struct_type = get_bo_struct_type(ctx, var);
+ _mesa_hash_table_insert(ctx->bo_struct_types, var, (void *)(uintptr_t)struct_type);
+ SpvId array_length = emit_uint_const(ctx, 32, glsl_get_length(var->type));
+ SpvId array_type = spirv_builder_type_array(&ctx->builder, struct_type, array_length);
+ SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
+ ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform,
+ array_type);
SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type,
ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform);
if (var->name)
spirv_builder_emit_name(&ctx->builder, var_id, var->name);
+ if (aliased)
+ spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationAliased);
+
+ unsigned idx = bitsize >> 4;
+ assert(idx < ARRAY_SIZE(ctx->ssbos));
if (ssbo) {
- assert(!ctx->ssbos[var->data.driver_location][idx]);
- ctx->ssbos[var->data.driver_location][idx] = var_id;
- ctx->ssbo_vars[var->data.driver_location] = var;
+ assert(!ctx->ssbos[idx]);
+ ctx->ssbos[idx] = var_id;
+ if (bitsize == 32)
+ ctx->ssbo_vars = var;
} else {
assert(!ctx->ubos[var->data.driver_location][idx]);
ctx->ubos[var->data.driver_location][idx] = var_id;
@@ -973,79 +1378,60 @@ emit_bo(struct ntv_context *ctx, struct nir_variable *var, unsigned force_bitsiz
assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces));
ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id;
}
+ _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id);
spirv_builder_emit_descriptor_set(&ctx->builder, var_id, var->data.descriptor_set);
spirv_builder_emit_binding(&ctx->builder, var_id, var->data.binding);
}
-static void
-emit_uniform(struct ntv_context *ctx, struct nir_variable *var)
-{
- if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo)
- emit_bo(ctx, var, 0);
- else {
- assert(var->data.mode == nir_var_uniform);
- const struct glsl_type *type = glsl_without_array(var->type);
- if (glsl_type_is_sampler(type) || glsl_type_is_image(type))
- emit_image(ctx, var);
- }
-}
-
static SpvId
get_vec_from_bit_size(struct ntv_context *ctx, uint32_t bit_size, uint32_t num_components)
{
if (bit_size == 1)
return get_bvec_type(ctx, num_components);
- if (bit_size == 8 || bit_size == 16 || bit_size == 32 || bit_size == 64)
- return get_uvec_type(ctx, bit_size, num_components);
- unreachable("unhandled register bit size");
- return 0;
+ return get_uvec_type(ctx, bit_size, num_components);
}
static SpvId
-get_src_ssa(struct ntv_context *ctx, const nir_ssa_def *ssa)
+get_src_ssa(struct ntv_context *ctx, const nir_def *ssa, nir_alu_type *atype)
{
assert(ssa->index < ctx->num_defs);
assert(ctx->defs[ssa->index] != 0);
+ *atype = ctx->def_types[ssa->index];
return ctx->defs[ssa->index];
}
-static SpvId
-get_var_from_reg(struct ntv_context *ctx, nir_register *reg)
+static void
+init_reg(struct ntv_context *ctx, nir_intrinsic_instr *decl, nir_alu_type atype)
{
- assert(reg->index < ctx->num_regs);
- assert(ctx->regs[reg->index] != 0);
- return ctx->regs[reg->index];
-}
+ unsigned index = decl->def.index;
+ unsigned num_components = nir_intrinsic_num_components(decl);
+ unsigned bit_size = nir_intrinsic_bit_size(decl);
-static SpvId
-get_src_reg(struct ntv_context *ctx, const nir_reg_src *reg)
-{
- assert(reg->reg);
- assert(!reg->indirect);
- assert(!reg->base_offset);
+ if (ctx->defs[index])
+ return;
+
+ SpvId type = get_alu_type(ctx, atype, num_components, bit_size);
+ SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassFunction,
+ type);
+ SpvId var = spirv_builder_emit_var(&ctx->builder, pointer_type,
+ SpvStorageClassFunction);
- SpvId var = get_var_from_reg(ctx, reg->reg);
- SpvId type = get_vec_from_bit_size(ctx, reg->reg->bit_size, reg->reg->num_components);
- return spirv_builder_emit_load(&ctx->builder, type, var);
+ ctx->defs[index] = var;
+ ctx->def_types[index] = nir_alu_type_get_base_type(atype);
}
static SpvId
-get_src(struct ntv_context *ctx, nir_src *src)
+get_src(struct ntv_context *ctx, nir_src *src, nir_alu_type *atype)
{
- if (src->is_ssa)
- return get_src_ssa(ctx, src->ssa);
- else
- return get_src_reg(ctx, &src->reg);
+ return get_src_ssa(ctx, src->ssa, atype);
}
static SpvId
-get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src)
+get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src, nir_alu_type *atype)
{
- assert(!alu->src[src].negate);
- assert(!alu->src[src].abs);
-
- SpvId def = get_src(ctx, &alu->src[src].src);
+ SpvId def = get_src(ctx, &alu->src[src].src, atype);
unsigned used_channels = 0;
bool need_swizzle = false;
@@ -1068,10 +1454,7 @@ get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src)
return def;
int bit_size = nir_src_bit_size(alu->src[src].src);
- assert(bit_size == 1 || bit_size == 8 || bit_size == 16 || bit_size == 32 || bit_size == 64);
-
- SpvId raw_type = bit_size == 1 ? spirv_builder_type_bool(&ctx->builder) :
- spirv_builder_type_uint(&ctx->builder, bit_size);
+ SpvId raw_type = get_alu_type(ctx, *atype, 1, bit_size);
if (used_channels == 1) {
uint32_t indices[] = { alu->src[src].swizzle[0] };
@@ -1111,14 +1494,6 @@ get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src)
}
}
-static void
-store_ssa_def(struct ntv_context *ctx, nir_ssa_def *ssa, SpvId result)
-{
- assert(result != 0);
- assert(ssa->index < ctx->num_defs);
- ctx->defs[ssa->index] = result;
-}
-
static SpvId
emit_select(struct ntv_context *ctx, SpvId type, SpvId cond,
SpvId if_true, SpvId if_false)
@@ -1127,14 +1502,6 @@ emit_select(struct ntv_context *ctx, SpvId type, SpvId cond,
}
static SpvId
-uvec_to_bvec(struct ntv_context *ctx, SpvId value, unsigned num_components)
-{
- SpvId type = get_bvec_type(ctx, num_components);
- SpvId zero = get_uvec_constant(ctx, 32, num_components, 0);
- return emit_binop(ctx, SpvOpINotEqual, type, value, zero);
-}
-
-static SpvId
emit_bitcast(struct ntv_context *ctx, SpvId type, SpvId value)
{
return emit_unop(ctx, SpvOpBitcast, type, value);
@@ -1164,50 +1531,22 @@ bitcast_to_fvec(struct ntv_context *ctx, SpvId value, unsigned bit_size,
return emit_bitcast(ctx, type, value);
}
-static void
-store_reg_def(struct ntv_context *ctx, nir_reg_dest *reg, SpvId result)
+static SpvId
+cast_src_to_type(struct ntv_context *ctx, SpvId value, nir_src src, nir_alu_type atype)
{
- SpvId var = get_var_from_reg(ctx, reg->reg);
- assert(var);
- spirv_builder_emit_store(&ctx->builder, var, result);
+ atype = nir_alu_type_get_base_type(atype);
+ unsigned num_components = nir_src_num_components(src);
+ unsigned bit_size = nir_src_bit_size(src);
+ return emit_bitcast(ctx, get_alu_type(ctx, atype, num_components, bit_size), value);
}
static void
-store_dest_raw(struct ntv_context *ctx, nir_dest *dest, SpvId result)
+store_def(struct ntv_context *ctx, unsigned def_index, SpvId result, nir_alu_type type)
{
- if (dest->is_ssa)
- store_ssa_def(ctx, &dest->ssa, result);
- else
- store_reg_def(ctx, &dest->reg, result);
-}
-
-static SpvId
-store_dest(struct ntv_context *ctx, nir_dest *dest, SpvId result, nir_alu_type type)
-{
- unsigned num_components = nir_dest_num_components(*dest);
- unsigned bit_size = nir_dest_bit_size(*dest);
-
- if (bit_size != 1) {
- switch (nir_alu_type_get_base_type(type)) {
- case nir_type_bool:
- assert("bool should have bit-size 1");
- break;
-
- case nir_type_uint:
- break; /* nothing to do! */
-
- case nir_type_int:
- case nir_type_float:
- result = bitcast_to_uvec(ctx, result, bit_size, num_components);
- break;
-
- default:
- unreachable("unsupported nir_alu_type");
- }
- }
-
- store_dest_raw(ctx, dest, result);
- return result;
+ assert(result != 0);
+ assert(def_index < ctx->num_defs);
+ ctx->def_types[def_index] = nir_alu_type_get_base_type(type);
+ ctx->defs[def_index] = result;
}
static SpvId
@@ -1216,178 +1555,20 @@ emit_unop(struct ntv_context *ctx, SpvOp op, SpvId type, SpvId src)
return spirv_builder_emit_unop(&ctx->builder, op, type, src);
}
-/* return the intended xfb output vec type based on base type and vector size */
-static SpvId
-get_output_type(struct ntv_context *ctx, unsigned register_index, unsigned num_components)
-{
- const struct glsl_type *out_type = NULL;
- /* index is based on component, so we might have to go back a few slots to get to the base */
- while (!out_type)
- out_type = ctx->so_output_gl_types[register_index--];
- enum glsl_base_type base_type = glsl_get_base_type(out_type);
- if (base_type == GLSL_TYPE_ARRAY)
- base_type = glsl_get_base_type(glsl_without_array(out_type));
-
- switch (base_type) {
- case GLSL_TYPE_BOOL:
- return get_bvec_type(ctx, num_components);
-
- case GLSL_TYPE_FLOAT:
- return get_fvec_type(ctx, 32, num_components);
-
- case GLSL_TYPE_INT:
- return get_ivec_type(ctx, 32, num_components);
-
- case GLSL_TYPE_UINT:
- return get_uvec_type(ctx, 32, num_components);
-
- default:
- break;
- }
- unreachable("unknown type");
- return 0;
-}
-
-/* for streamout create new outputs, as streamout can be done on individual components,
- from complete outputs, so we just can't use the created packed outputs */
-static void
-emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info,
- unsigned first_so)
-{
- unsigned output = 0;
- for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) {
- struct pipe_stream_output so_output = so_info->so_info.output[i];
- unsigned slot = so_info->so_info_slots[i] << 2 | so_output.start_component;
- SpvId out_type = get_output_type(ctx, slot, so_output.num_components);
- SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
- SpvStorageClassOutput,
- out_type);
- SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type,
- SpvStorageClassOutput);
- char name[10];
-
- snprintf(name, 10, "xfb%d", output);
- spirv_builder_emit_name(&ctx->builder, var_id, name);
- spirv_builder_emit_offset(&ctx->builder, var_id, (so_output.dst_offset * 4));
- spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, so_output.output_buffer);
- spirv_builder_emit_xfb_stride(&ctx->builder, var_id, so_info->so_info.stride[so_output.output_buffer] * 4);
- if (so_output.stream)
- spirv_builder_emit_stream(&ctx->builder, var_id, so_output.stream);
-
- /* output location is incremented by VARYING_SLOT_VAR0 for non-builtins in vtn,
- * so we need to ensure that the new xfb location slot doesn't conflict with any previously-emitted
- * outputs.
- */
- uint32_t location = first_so + i;
- assert(location < VARYING_SLOT_VAR0);
- spirv_builder_emit_location(&ctx->builder, var_id, location);
-
- /* note: gl_ClipDistance[4] can the 0-indexed member of VARYING_SLOT_CLIP_DIST1 here,
- * so this is still the 0 component
- */
- if (so_output.start_component)
- spirv_builder_emit_component(&ctx->builder, var_id, so_output.start_component);
-
- uint32_t *key = ralloc_size(ctx->mem_ctx, sizeof(uint32_t));
- *key = (uint32_t)so_output.register_index << 2 | so_output.start_component;
- _mesa_hash_table_insert(ctx->so_outputs, key, (void *)(intptr_t)var_id);
-
- assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces));
- ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id;
- output += align(so_output.num_components, 4) / 4;
- }
-}
-
-static void
-emit_so_outputs(struct ntv_context *ctx,
- const struct zink_so_info *so_info)
-{
- for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) {
- uint32_t components[NIR_MAX_VEC_COMPONENTS];
- unsigned slot = so_info->so_info_slots[i];
- struct pipe_stream_output so_output = so_info->so_info.output[i];
- uint32_t so_key = (uint32_t) so_output.register_index << 2 | so_output.start_component;
- uint32_t location = (uint32_t) slot << 2 | so_output.start_component;
- struct hash_entry *he = _mesa_hash_table_search(ctx->so_outputs, &so_key);
- assert(he);
- SpvId so_output_var_id = (SpvId)(intptr_t)he->data;
-
- SpvId type = get_output_type(ctx, location, so_output.num_components);
- SpvId output = 0;
- /* index is based on component, so we might have to go back a few slots to get to the base */
- UNUSED uint32_t orig_location = location;
- while (!output)
- output = ctx->outputs[location--];
- location++;
- SpvId output_type = ctx->so_output_types[location];
- const struct glsl_type *out_type = ctx->so_output_gl_types[location];
-
- SpvId src = spirv_builder_emit_load(&ctx->builder, output_type, output);
-
- SpvId result;
-
- for (unsigned c = 0; c < so_output.num_components; c++) {
- components[c] = so_output.start_component + c;
- /* this is the second half of a 2 * vec4 array */
- if (slot == VARYING_SLOT_CLIP_DIST1)
- components[c] += 4;
- }
-
- /* if we're emitting a scalar or the type we're emitting matches the output's original type and we're
- * emitting the same number of components, then we can skip any sort of conversion here
- */
- if (glsl_type_is_scalar(out_type) || (type == output_type && glsl_get_length(out_type) == so_output.num_components))
- result = src;
- else {
- /* OpCompositeExtract can only extract scalars for our use here */
- if (so_output.num_components == 1) {
- result = spirv_builder_emit_composite_extract(&ctx->builder, type, src, components, so_output.num_components);
- } else if (glsl_type_is_vector(out_type)) {
- /* OpVectorShuffle can select vector members into a differently-sized vector */
- result = spirv_builder_emit_vector_shuffle(&ctx->builder, type,
- src, src,
- components, so_output.num_components);
- result = emit_bitcast(ctx, type, result);
- } else {
- /* for arrays, we need to manually extract each desired member
- * and re-pack them into the desired output type
- */
- for (unsigned c = 0; c < so_output.num_components; c++) {
- uint32_t member[2];
- unsigned member_idx = 0;
- if (glsl_type_is_matrix(out_type)) {
- member_idx = 1;
- member[0] = so_output.register_index;
- }
- member[member_idx] = so_output.start_component + c;
- SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(glsl_without_array_or_matrix(out_type)));
-
- if (slot == VARYING_SLOT_CLIP_DIST1)
- member[member_idx] += 4;
- components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, member, 1 + member_idx);
- }
- result = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components);
- }
- }
-
- spirv_builder_emit_store(&ctx->builder, so_output_var_id, result);
- }
-}
-
static SpvId
emit_atomic(struct ntv_context *ctx, SpvId op, SpvId type, SpvId src0, SpvId src1, SpvId src2)
{
if (op == SpvOpAtomicLoad)
- return spirv_builder_emit_triop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup),
+ return spirv_builder_emit_triop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice),
emit_uint_const(ctx, 32, 0));
if (op == SpvOpAtomicCompareExchange)
- return spirv_builder_emit_hexop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup),
+ return spirv_builder_emit_hexop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice),
emit_uint_const(ctx, 32, 0),
emit_uint_const(ctx, 32, 0),
/* these params are intentionally swapped */
src2, src1);
- return spirv_builder_emit_quadop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup),
+ return spirv_builder_emit_quadop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice),
emit_uint_const(ctx, 32, 0), src1);
}
@@ -1453,26 +1634,6 @@ get_fvec_constant(struct ntv_context *ctx, unsigned bit_size,
}
static SpvId
-get_uvec_constant(struct ntv_context *ctx, unsigned bit_size,
- unsigned num_components, uint64_t value)
-{
- assert(bit_size == 32 || bit_size == 64);
-
- SpvId result = emit_uint_const(ctx, bit_size, value);
- if (num_components == 1)
- return result;
-
- assert(num_components > 1);
- SpvId components[NIR_MAX_VEC_COMPONENTS];
- for (int i = 0; i < num_components; i++)
- components[i] = result;
-
- SpvId type = get_uvec_type(ctx, bit_size, num_components);
- return spirv_builder_const_composite(&ctx->builder, type, components,
- num_components);
-}
-
-static SpvId
get_ivec_constant(struct ntv_context *ctx, unsigned bit_size,
unsigned num_components, int64_t value)
{
@@ -1498,36 +1659,36 @@ alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
if (nir_op_infos[instr->op].input_sizes[src] > 0)
return nir_op_infos[instr->op].input_sizes[src];
- if (instr->dest.dest.is_ssa)
- return instr->dest.dest.ssa.num_components;
- else
- return instr->dest.dest.reg.reg->num_components;
+ return instr->def.num_components;
}
static SpvId
-get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src)
+get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src, SpvId *raw_value, nir_alu_type *atype)
{
- SpvId raw_value = get_alu_src_raw(ctx, alu, src);
+ *raw_value = get_alu_src_raw(ctx, alu, src, atype);
unsigned num_components = alu_instr_src_components(alu, src);
unsigned bit_size = nir_src_bit_size(alu->src[src].src);
- nir_alu_type type = nir_op_infos[alu->op].input_types[src];
+ nir_alu_type type = alu_op_is_typeless(alu->op) ? *atype : nir_op_infos[alu->op].input_types[src];
+ type = nir_alu_type_get_base_type(type);
+ if (type == *atype)
+ return *raw_value;
if (bit_size == 1)
- return raw_value;
+ return *raw_value;
else {
switch (nir_alu_type_get_base_type(type)) {
case nir_type_bool:
unreachable("bool should have bit-size 1");
case nir_type_int:
- return bitcast_to_ivec(ctx, raw_value, bit_size, num_components);
+ return bitcast_to_ivec(ctx, *raw_value, bit_size, num_components);
case nir_type_uint:
- return raw_value;
+ return bitcast_to_uvec(ctx, *raw_value, bit_size, num_components);
case nir_type_float:
- return bitcast_to_fvec(ctx, raw_value, bit_size, num_components);
+ return bitcast_to_fvec(ctx, *raw_value, bit_size, num_components);
default:
unreachable("unknown nir_alu_type");
@@ -1535,39 +1696,16 @@ get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src)
}
}
-static SpvId
-store_alu_result(struct ntv_context *ctx, nir_alu_instr *alu, SpvId result)
+static void
+store_alu_result(struct ntv_context *ctx, nir_alu_instr *alu, SpvId result, nir_alu_type atype)
{
- assert(!alu->dest.saturate);
- return store_dest(ctx, &alu->dest.dest, result,
- nir_op_infos[alu->op].output_type);
+ store_def(ctx, alu->def.index, result, atype);
}
static SpvId
-get_dest_type(struct ntv_context *ctx, nir_dest *dest, nir_alu_type type)
+get_def_type(struct ntv_context *ctx, nir_def *def, nir_alu_type type)
{
- unsigned num_components = nir_dest_num_components(*dest);
- unsigned bit_size = nir_dest_bit_size(*dest);
-
- if (bit_size == 1)
- return get_bvec_type(ctx, num_components);
-
- switch (nir_alu_type_get_base_type(type)) {
- case nir_type_bool:
- unreachable("bool should have bit-size 1");
-
- case nir_type_int:
- return get_ivec_type(ctx, bit_size, num_components);
-
- case nir_type_uint:
- return get_uvec_type(ctx, bit_size, num_components);
-
- case nir_type_float:
- return get_fvec_type(ctx, bit_size, num_components);
-
- default:
- unreachable("unsupported nir_alu_type");
- }
+ return get_alu_type(ctx, type, def->num_components, def->bit_size);
}
static bool
@@ -1588,14 +1726,66 @@ needs_derivative_control(nir_alu_instr *alu)
static void
emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
{
+ bool is_bcsel = alu->op == nir_op_bcsel;
+ nir_alu_type stype[NIR_MAX_VEC_COMPONENTS] = {0};
SpvId src[NIR_MAX_VEC_COMPONENTS];
+ SpvId raw_src[NIR_MAX_VEC_COMPONENTS];
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
- src[i] = get_alu_src(ctx, alu, i);
+ src[i] = get_alu_src(ctx, alu, i, &raw_src[i], &stype[i]);
+
+ nir_alu_type typeless_type = stype[is_bcsel];
+ if (nir_op_infos[alu->op].num_inputs > 1 &&
+ alu_op_is_typeless(alu->op) &&
+ nir_src_bit_size(alu->src[is_bcsel].src) != 1) {
+ unsigned uint_count = 0;
+ unsigned int_count = 0;
+ unsigned float_count = 0;
+ for (unsigned i = is_bcsel; i < nir_op_infos[alu->op].num_inputs; i++) {
+ if (stype[i] == nir_type_bool)
+ break;
+ switch (stype[i]) {
+ case nir_type_uint:
+ uint_count++;
+ break;
+ case nir_type_int:
+ int_count++;
+ break;
+ case nir_type_float:
+ float_count++;
+ break;
+ default:
+ unreachable("this shouldn't happen");
+ }
+ }
+ if (uint_count > int_count && uint_count > float_count)
+ typeless_type = nir_type_uint;
+ else if (int_count > uint_count && int_count > float_count)
+ typeless_type = nir_type_int;
+ else if (float_count > uint_count && float_count > int_count)
+ typeless_type = nir_type_float;
+ else if (float_count == uint_count || uint_count == int_count)
+ typeless_type = nir_type_uint;
+ else if (float_count == int_count)
+ typeless_type = nir_type_float;
+ else
+ typeless_type = nir_type_uint;
+ assert(typeless_type != nir_type_bool);
+ for (unsigned i = is_bcsel; i < nir_op_infos[alu->op].num_inputs; i++) {
+ unsigned num_components = alu_instr_src_components(alu, i);
+ unsigned bit_size = nir_src_bit_size(alu->src[i].src);
+ SpvId type = get_alu_type(ctx, typeless_type, num_components, bit_size);
+ if (stype[i] != typeless_type) {
+ src[i] = emit_bitcast(ctx, type, src[i]);
+ }
+ }
+ }
- SpvId dest_type = get_dest_type(ctx, &alu->dest.dest,
- nir_op_infos[alu->op].output_type);
- unsigned bit_size = nir_dest_bit_size(alu->dest.dest);
- unsigned num_components = nir_dest_num_components(alu->dest.dest);
+ unsigned bit_size = alu->def.bit_size;
+ unsigned num_components = alu->def.num_components;
+ nir_alu_type atype = bit_size == 1 ?
+ nir_type_bool :
+ (alu_op_is_typeless(alu->op) ? typeless_type : nir_op_infos[alu->op].output_type);
+ SpvId dest_type = get_def_type(ctx, &alu->def, atype);
if (needs_derivative_control(alu))
spirv_builder_emit_cap(&ctx->builder, SpvCapabilityDerivativeControl);
@@ -1621,6 +1811,8 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
UNOP(nir_op_fddy, SpvOpDPdy)
UNOP(nir_op_fddy_coarse, SpvOpDPdyCoarse)
UNOP(nir_op_fddy_fine, SpvOpDPdyFine)
+ UNOP(nir_op_f2i8, SpvOpConvertFToS)
+ UNOP(nir_op_f2u8, SpvOpConvertFToU)
UNOP(nir_op_f2i16, SpvOpConvertFToS)
UNOP(nir_op_f2u16, SpvOpConvertFToU)
UNOP(nir_op_f2i32, SpvOpConvertFToS)
@@ -1629,6 +1821,7 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
UNOP(nir_op_i2f32, SpvOpConvertSToF)
UNOP(nir_op_u2f16, SpvOpConvertUToF)
UNOP(nir_op_u2f32, SpvOpConvertUToF)
+ UNOP(nir_op_i2i8, SpvOpSConvert)
UNOP(nir_op_i2i16, SpvOpSConvert)
UNOP(nir_op_i2i32, SpvOpSConvert)
UNOP(nir_op_u2u8, SpvOpUConvert)
@@ -1647,6 +1840,12 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
UNOP(nir_op_bit_count, SpvOpBitCount)
#undef UNOP
+ case nir_op_f2f16_rtz:
+ assert(nir_op_infos[alu->op].num_inputs == 1);
+ result = emit_unop(ctx, SpvOpFConvert, dest_type, src[0]);
+ spirv_builder_emit_rounding_mode(&ctx->builder, result, SpvFPRoundingModeRTZ);
+ break;
+
case nir_op_inot:
if (bit_size == 1)
result = emit_unop(ctx, SpvOpLogicalNot, dest_type, src[0]);
@@ -1654,6 +1853,7 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
result = emit_unop(ctx, SpvOpNot, dest_type, src[0]);
break;
+ case nir_op_b2i8:
case nir_op_b2i16:
case nir_op_b2i32:
case nir_op_b2i64:
@@ -1672,12 +1872,25 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
get_fvec_constant(ctx, bit_size, num_components, 0));
break;
+ case nir_op_uclz:
+ assert(nir_op_infos[alu->op].num_inputs == 1);
+ result = emit_unop(ctx, SpvOpUCountLeadingZerosINTEL, dest_type, src[0]);
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityIntegerFunctions2INTEL);
+ spirv_builder_emit_extension(&ctx->builder, "SPV_INTEL_shader_integer_functions2");
+ break;
#define BUILTIN_UNOP(nir_op, spirv_op) \
case nir_op: \
assert(nir_op_infos[alu->op].num_inputs == 1); \
result = emit_builtin_unop(ctx, spirv_op, dest_type, src[0]); \
break;
+#define BUILTIN_UNOPF(nir_op, spirv_op) \
+ case nir_op: \
+ assert(nir_op_infos[alu->op].num_inputs == 1); \
+ result = emit_builtin_unop(ctx, spirv_op, get_def_type(ctx, &alu->def, nir_type_float), src[0]); \
+ atype = nir_type_float; \
+ break;
+
BUILTIN_UNOP(nir_op_iabs, GLSLstd450SAbs)
BUILTIN_UNOP(nir_op_fabs, GLSLstd450FAbs)
BUILTIN_UNOP(nir_op_fsqrt, GLSLstd450Sqrt)
@@ -1696,31 +1909,27 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
BUILTIN_UNOP(nir_op_ufind_msb, GLSLstd450FindUMsb)
BUILTIN_UNOP(nir_op_find_lsb, GLSLstd450FindILsb)
BUILTIN_UNOP(nir_op_ifind_msb, GLSLstd450FindSMsb)
- BUILTIN_UNOP(nir_op_pack_half_2x16, GLSLstd450PackHalf2x16)
- BUILTIN_UNOP(nir_op_unpack_half_2x16, GLSLstd450UnpackHalf2x16)
- BUILTIN_UNOP(nir_op_pack_64_2x32, GLSLstd450PackDouble2x32)
-#undef BUILTIN_UNOP
- case nir_op_frcp:
+ case nir_op_pack_half_2x16:
assert(nir_op_infos[alu->op].num_inputs == 1);
- result = emit_binop(ctx, SpvOpFDiv, dest_type,
- get_fvec_constant(ctx, bit_size, num_components, 1),
- src[0]);
+ result = emit_builtin_unop(ctx, GLSLstd450PackHalf2x16, get_def_type(ctx, &alu->def, nir_type_uint), src[0]);
break;
- case nir_op_f2b1:
+ case nir_op_unpack_64_2x32:
assert(nir_op_infos[alu->op].num_inputs == 1);
- result = emit_binop(ctx, SpvOpFOrdNotEqual, dest_type, src[0],
- get_fvec_constant(ctx,
- nir_src_bit_size(alu->src[0].src),
- num_components, 0));
+ result = emit_builtin_unop(ctx, GLSLstd450UnpackDouble2x32, get_def_type(ctx, &alu->def, nir_type_uint), src[0]);
break;
- case nir_op_i2b1:
+
+ BUILTIN_UNOPF(nir_op_unpack_half_2x16, GLSLstd450UnpackHalf2x16)
+ BUILTIN_UNOPF(nir_op_pack_64_2x32, GLSLstd450PackDouble2x32)
+#undef BUILTIN_UNOP
+#undef BUILTIN_UNOPF
+
+ case nir_op_frcp:
assert(nir_op_infos[alu->op].num_inputs == 1);
- result = emit_binop(ctx, SpvOpINotEqual, dest_type, src[0],
- get_ivec_constant(ctx,
- nir_src_bit_size(alu->src[0].src),
- num_components, 0));
+ result = emit_binop(ctx, SpvOpFDiv, dest_type,
+ get_fvec_constant(ctx, bit_size, num_components, 1),
+ src[0]);
break;
@@ -1736,6 +1945,8 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
BINOP(nir_op_idiv, SpvOpSDiv)
BINOP(nir_op_udiv, SpvOpUDiv)
BINOP(nir_op_umod, SpvOpUMod)
+ BINOP(nir_op_imod, SpvOpSMod)
+ BINOP(nir_op_irem, SpvOpSRem)
BINOP(nir_op_fadd, SpvOpFAdd)
BINOP(nir_op_fsub, SpvOpFSub)
BINOP(nir_op_fmul, SpvOpFMul)
@@ -1747,12 +1958,6 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
BINOP(nir_op_uge, SpvOpUGreaterThanEqual)
BINOP(nir_op_flt, SpvOpFOrdLessThan)
BINOP(nir_op_fge, SpvOpFOrdGreaterThanEqual)
- BINOP(nir_op_feq, SpvOpFOrdEqual)
- BINOP(nir_op_fneu, SpvOpFUnordNotEqual)
- BINOP(nir_op_ishl, SpvOpShiftLeftLogical)
- BINOP(nir_op_ishr, SpvOpShiftRightArithmetic)
- BINOP(nir_op_ushr, SpvOpShiftRightLogical)
- BINOP(nir_op_ixor, SpvOpBitwiseXor)
BINOP(nir_op_frem, SpvOpFRem)
#undef BINOP
@@ -1769,8 +1974,26 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
BINOP_LOG(nir_op_ior, SpvOpBitwiseOr, SpvOpLogicalOr)
BINOP_LOG(nir_op_ieq, SpvOpIEqual, SpvOpLogicalEqual)
BINOP_LOG(nir_op_ine, SpvOpINotEqual, SpvOpLogicalNotEqual)
+ BINOP_LOG(nir_op_ixor, SpvOpBitwiseXor, SpvOpLogicalNotEqual)
#undef BINOP_LOG
+#define BINOP_SHIFT(nir_op, spirv_op) \
+ case nir_op: { \
+ assert(nir_op_infos[alu->op].num_inputs == 2); \
+ int shift_bit_size = nir_src_bit_size(alu->src[1].src); \
+ nir_alu_type shift_nir_type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[1]); \
+ SpvId shift_type = get_alu_type(ctx, shift_nir_type, num_components, shift_bit_size); \
+ SpvId shift_mask = get_ivec_constant(ctx, shift_bit_size, num_components, bit_size - 1); \
+ SpvId shift_count = emit_binop(ctx, SpvOpBitwiseAnd, shift_type, src[1], shift_mask); \
+ result = emit_binop(ctx, spirv_op, dest_type, src[0], shift_count); \
+ break; \
+ }
+
+ BINOP_SHIFT(nir_op_ishl, SpvOpShiftLeftLogical)
+ BINOP_SHIFT(nir_op_ishr, SpvOpShiftRightArithmetic)
+ BINOP_SHIFT(nir_op_ushr, SpvOpShiftRightLogical)
+#undef BINOP_SHIFT
+
#define BUILTIN_BINOP(nir_op, spirv_op) \
case nir_op: \
assert(nir_op_infos[alu->op].num_inputs == 2); \
@@ -1783,8 +2006,31 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
BUILTIN_BINOP(nir_op_imax, GLSLstd450SMax)
BUILTIN_BINOP(nir_op_umin, GLSLstd450UMin)
BUILTIN_BINOP(nir_op_umax, GLSLstd450UMax)
+ BUILTIN_BINOP(nir_op_ldexp, GLSLstd450Ldexp)
#undef BUILTIN_BINOP
+#define INTEL_BINOP(nir_op, spirv_op) \
+ case nir_op: \
+ assert(nir_op_infos[alu->op].num_inputs == 2); \
+ result = emit_binop(ctx, spirv_op, dest_type, src[0], src[1]); \
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityIntegerFunctions2INTEL); \
+ spirv_builder_emit_extension(&ctx->builder, "SPV_INTEL_shader_integer_functions2"); \
+ break;
+
+ INTEL_BINOP(nir_op_uabs_isub, SpvOpAbsISubINTEL)
+ INTEL_BINOP(nir_op_uabs_usub, SpvOpAbsUSubINTEL)
+ INTEL_BINOP(nir_op_iadd_sat, SpvOpIAddSatINTEL)
+ INTEL_BINOP(nir_op_uadd_sat, SpvOpUAddSatINTEL)
+ INTEL_BINOP(nir_op_ihadd, SpvOpIAverageINTEL)
+ INTEL_BINOP(nir_op_uhadd, SpvOpUAverageINTEL)
+ INTEL_BINOP(nir_op_irhadd, SpvOpIAverageRoundedINTEL)
+ INTEL_BINOP(nir_op_urhadd, SpvOpUAverageRoundedINTEL)
+ INTEL_BINOP(nir_op_isub_sat, SpvOpISubSatINTEL)
+ INTEL_BINOP(nir_op_usub_sat, SpvOpUSubSatINTEL)
+ INTEL_BINOP(nir_op_imul_32x16, SpvOpIMul32x16INTEL)
+ INTEL_BINOP(nir_op_umul_32x16, SpvOpUMul32x16INTEL)
+#undef INTEL_BINOP
+
case nir_op_fdot2:
case nir_op_fdot3:
case nir_op_fdot4:
@@ -1799,6 +2045,23 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
case nir_op_sge:
unreachable("should already be lowered away");
+ case nir_op_fneu:
+ assert(nir_op_infos[alu->op].num_inputs == 2);
+ if (raw_src[0] == raw_src[1])
+ result = emit_unop(ctx, SpvOpIsNan, dest_type, src[0]);
+ else
+ result = emit_binop(ctx, SpvOpFUnordNotEqual, dest_type, src[0], src[1]);
+ break;
+
+ case nir_op_feq:
+ assert(nir_op_infos[alu->op].num_inputs == 2);
+ if (raw_src[0] == raw_src[1])
+ result = emit_unop(ctx, SpvOpLogicalNot, dest_type,
+ emit_unop(ctx, SpvOpIsNan, dest_type, src[0]));
+ else
+ result = emit_binop(ctx, SpvOpFOrdEqual, dest_type, src[0], src[1]);
+ break;
+
case nir_op_flrp:
assert(nir_op_infos[alu->op].num_inputs == 3);
result = emit_builtin_triop(ctx, GLSLstd450FMix, dest_type,
@@ -1841,6 +2104,84 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
result = spirv_builder_emit_quadop(&ctx->builder, SpvOpBitFieldInsert, dest_type, src[0], src[1], src[2], src[3]);
break;
+ /* those are all simple bitcasts, we could do better, but it doesn't matter */
+ case nir_op_pack_32_4x8:
+ case nir_op_pack_32_2x16:
+ case nir_op_pack_64_4x16:
+ case nir_op_unpack_32_4x8:
+ case nir_op_unpack_32_2x16:
+ case nir_op_unpack_64_4x16: {
+ result = emit_bitcast(ctx, dest_type, src[0]);
+ break;
+ }
+
+ case nir_op_pack_32_2x16_split:
+ case nir_op_pack_64_2x32_split: {
+ nir_alu_type type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[0]);
+ if (num_components <= 2) {
+ SpvId components[] = {src[0], src[1]};
+ SpvId vec_type = get_alu_type(ctx, type, num_components * 2, nir_src_bit_size(alu->src[0].src));
+ result = spirv_builder_emit_composite_construct(&ctx->builder, vec_type, components, 2);
+ result = emit_bitcast(ctx, dest_type, result);
+ } else {
+ SpvId components[NIR_MAX_VEC_COMPONENTS];
+ SpvId conv_type = get_alu_type(ctx, type, 1, nir_src_bit_size(alu->src[0].src));
+ SpvId vec_type = get_alu_type(ctx, type, 2, nir_src_bit_size(alu->src[0].src));
+ SpvId dest_scalar_type = get_alu_type(ctx, nir_op_infos[alu->op].output_type, 1, bit_size);
+ for (unsigned i = 0; i < nir_src_num_components(alu->src[0].src); i++) {
+ SpvId conv[2];
+ conv[0] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, src[0], &i, 1);
+ conv[1] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, src[1], &i, 1);
+ SpvId vec = spirv_builder_emit_composite_construct(&ctx->builder, vec_type, conv, 2);
+ components[i] = emit_bitcast(ctx, dest_scalar_type, vec);
+ }
+ result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, components, num_components);
+ }
+ break;
+ }
+
+ case nir_op_unpack_32_2x16_split_x:
+ case nir_op_unpack_64_2x32_split_x: {
+ nir_alu_type type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[0]);
+ SpvId vec_type = get_alu_type(ctx, type, 2, bit_size);
+ unsigned idx = 0;
+ if (num_components == 1) {
+ SpvId vec = emit_bitcast(ctx, vec_type, src[0]);
+ result = spirv_builder_emit_composite_extract(&ctx->builder, dest_type, vec, &idx, 1);
+ } else {
+ SpvId components[NIR_MAX_VEC_COMPONENTS];
+ for (unsigned i = 0; i < nir_src_num_components(alu->src[0].src); i++) {
+ SpvId conv = spirv_builder_emit_composite_extract(&ctx->builder, get_alu_type(ctx, type, 1, nir_src_bit_size(alu->src[0].src)), src[0], &i, 1);
+ conv = emit_bitcast(ctx, vec_type, conv);
+ SpvId conv_type = get_alu_type(ctx, type, 1, bit_size);
+ components[i] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, conv, &idx, 1);
+ }
+ result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, components, num_components);
+ }
+ break;
+ }
+
+ case nir_op_unpack_32_2x16_split_y:
+ case nir_op_unpack_64_2x32_split_y: {
+ nir_alu_type type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[0]);
+ SpvId vec_type = get_alu_type(ctx, type, 2, bit_size);
+ unsigned idx = 1;
+ if (num_components == 1) {
+ SpvId vec = emit_bitcast(ctx, vec_type, src[0]);
+ result = spirv_builder_emit_composite_extract(&ctx->builder, dest_type, vec, &idx, 1);
+ } else {
+ SpvId components[NIR_MAX_VEC_COMPONENTS];
+ for (unsigned i = 0; i < nir_src_num_components(alu->src[0].src); i++) {
+ SpvId conv = spirv_builder_emit_composite_extract(&ctx->builder, get_alu_type(ctx, type, 1, nir_src_bit_size(alu->src[0].src)), src[0], &i, 1);
+ conv = emit_bitcast(ctx, vec_type, conv);
+ SpvId conv_type = get_alu_type(ctx, type, 1, bit_size);
+ components[i] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, conv, &idx, 1);
+ }
+ result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, components, num_components);
+ }
+ break;
+ }
+
default:
fprintf(stderr, "emit_alu: not implemented (%s)\n",
nir_op_infos[alu->op].name);
@@ -1851,7 +2192,7 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
if (alu->exact)
spirv_builder_emit_decoration(&ctx->builder, result, SpvDecorationNoContraction);
- store_alu_result(ctx, alu, result);
+ store_alu_result(ctx, alu, result, atype);
}
static void
@@ -1861,273 +2202,99 @@ emit_load_const(struct ntv_context *ctx, nir_load_const_instr *load_const)
unsigned num_components = load_const->def.num_components;
SpvId components[NIR_MAX_VEC_COMPONENTS];
+ nir_alu_type atype;
if (bit_size == 1) {
+ atype = nir_type_bool;
for (int i = 0; i < num_components; i++)
components[i] = spirv_builder_const_bool(&ctx->builder,
load_const->value[i].b);
} else {
+ atype = infer_nir_alu_type_from_uses_ssa(&load_const->def);
for (int i = 0; i < num_components; i++) {
- uint64_t tmp = nir_const_value_as_uint(load_const->value[i],
- bit_size);
- components[i] = emit_uint_const(ctx, bit_size, tmp);
+ switch (atype) {
+ case nir_type_uint: {
+ uint64_t tmp = nir_const_value_as_uint(load_const->value[i], bit_size);
+ components[i] = emit_uint_const(ctx, bit_size, tmp);
+ break;
+ }
+ case nir_type_int: {
+ int64_t tmp = nir_const_value_as_int(load_const->value[i], bit_size);
+ components[i] = emit_int_const(ctx, bit_size, tmp);
+ break;
+ }
+ case nir_type_float: {
+ double tmp = nir_const_value_as_float(load_const->value[i], bit_size);
+ components[i] = emit_float_const(ctx, bit_size, tmp);
+ break;
+ }
+ default:
+ unreachable("this shouldn't happen!");
+ }
}
}
if (num_components > 1) {
- SpvId type = get_vec_from_bit_size(ctx, bit_size,
- num_components);
+ SpvId type = get_alu_type(ctx, atype, num_components, bit_size);
SpvId value = spirv_builder_const_composite(&ctx->builder,
type, components,
num_components);
- store_ssa_def(ctx, &load_const->def, value);
+ store_def(ctx, load_const->def.index, value, atype);
} else {
assert(num_components == 1);
- store_ssa_def(ctx, &load_const->def, components[0]);
- }
-}
-
-static void
-emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
-{
- nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]);
- bool ssbo = intr->intrinsic == nir_intrinsic_load_ssbo;
- assert(const_block_index); // no dynamic indexing for now
-
- unsigned idx = 0;
- unsigned bit_size = nir_dest_bit_size(intr->dest);
- idx = MIN2(bit_size, 32) >> 4;
- if (ssbo) {
- assert(idx < ARRAY_SIZE(ctx->ssbos[0]));
- if (!ctx->ssbos[const_block_index->u32][idx])
- emit_bo(ctx, ctx->ssbo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest));
- } else {
- assert(idx < ARRAY_SIZE(ctx->ubos[0]));
- if (!ctx->ubos[const_block_index->u32][idx])
- emit_bo(ctx, ctx->ubo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest));
- }
- SpvId bo = ssbo ? ctx->ssbos[const_block_index->u32][idx] : ctx->ubos[const_block_index->u32][idx];
- SpvId uint_type = get_uvec_type(ctx, MIN2(bit_size, 32), 1);
- SpvId one = emit_uint_const(ctx, 32, 1);
-
- /* number of components being loaded */
- unsigned num_components = nir_dest_num_components(intr->dest);
- /* we need to grab 2x32 to fill the 64bit value */
- if (bit_size == 64)
- num_components *= 2;
- SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2];
- SpvId result;
-
- /* destination type for the load */
- SpvId type = get_dest_uvec_type(ctx, &intr->dest);
- /* an id of an array member in bytes */
- SpvId uint_size = emit_uint_const(ctx, 32, MIN2(bit_size, 32) / 8);
-
- /* we grab a single array member at a time, so it's a pointer to a uint */
- SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
- ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform,
- uint_type);
-
- /* our generated uniform has a memory layout like
- *
- * struct {
- * uint base[array_size];
- * };
- *
- * where 'array_size' is set as though every member of the ubo takes up a vec4,
- * even if it's only a vec2 or a float.
- *
- * first, access 'base'
- */
- SpvId member = emit_uint_const(ctx, 32, 0);
- /* this is the offset (in bytes) that we're accessing:
- * it may be a const value or it may be dynamic in the shader
- */
- SpvId offset = get_src(ctx, &intr->src[1]);
- /* calculate the byte offset in the array */
- SpvId vec_offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size);
- /* OpAccessChain takes an array of indices that drill into a hierarchy based on the type:
- * index 0 is accessing 'base'
- * index 1 is accessing 'base[index 1]'
- *
- * we must perform the access this way in case src[1] is dynamic because there's
- * no other spirv method for using an id to access a member of a composite, as
- * (composite|vector)_extract both take literals
- */
- for (unsigned i = 0; i < num_components; i++) {
- SpvId indices[2] = { member, vec_offset };
- SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type,
- bo, indices,
- ARRAY_SIZE(indices));
- /* load a single value into the constituents array */
- if (ssbo && nir_intrinsic_access(intr) & ACCESS_COHERENT)
- constituents[i] = emit_atomic(ctx, SpvOpAtomicLoad, uint_type, ptr, 0, 0);
- else
- constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, ptr);
- /* increment to the next member index for the next load */
- vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one);
- }
-
- /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values
- * by creating uvec2 composites and bitcasting them to u64 values
- */
- if (bit_size == 64) {
- num_components /= 2;
- type = get_uvec_type(ctx, 64, num_components);
- SpvId u64_type = get_uvec_type(ctx, 64, 1);
- for (unsigned i = 0; i < num_components; i++) {
- constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2);
- constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]);
- }
- }
- /* if loading more than 1 value, reassemble the results into the desired type,
- * otherwise just use the loaded result
- */
- if (num_components > 1) {
- result = spirv_builder_emit_composite_construct(&ctx->builder,
- type,
- constituents,
- num_components);
- } else
- result = constituents[0];
-
- /* explicitly convert to a bool vector if the destination type is a bool */
- if (nir_dest_bit_size(intr->dest) == 1)
- result = uvec_to_bvec(ctx, result, num_components);
-
- store_dest(ctx, &intr->dest, result, nir_type_uint);
-}
-
-static void
-emit_store_ssbo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
-{
- /* TODO: would be great to refactor this in with emit_load_bo() */
-
- nir_const_value *const_block_index = nir_src_as_const_value(intr->src[1]);
- assert(const_block_index);
-
- unsigned idx = MIN2(nir_src_bit_size(intr->src[0]), 32) >> 4;
- assert(idx < ARRAY_SIZE(ctx->ssbos[0]));
- if (!ctx->ssbos[const_block_index->u32][idx])
- emit_bo(ctx, ctx->ssbo_vars[const_block_index->u32], nir_src_bit_size(intr->src[0]));
- SpvId bo = ctx->ssbos[const_block_index->u32][idx];
-
- unsigned bit_size = nir_src_bit_size(intr->src[0]);
- SpvId uint_type = get_uvec_type(ctx, 32, 1);
- SpvId one = emit_uint_const(ctx, 32, 1);
-
- /* number of components being stored */
- unsigned wrmask = nir_intrinsic_write_mask(intr);
- unsigned num_components = util_bitcount(wrmask);
-
- /* we need to grab 2x32 to fill the 64bit value */
- bool is_64bit = bit_size == 64;
-
- /* an id of an array member in bytes */
- SpvId uint_size = emit_uint_const(ctx, 32, MIN2(bit_size, 32) / 8);
- /* we grab a single array member at a time, so it's a pointer to a uint */
- SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
- SpvStorageClassStorageBuffer,
- get_uvec_type(ctx, MIN2(bit_size, 32), 1));
-
- /* our generated uniform has a memory layout like
- *
- * struct {
- * uint base[array_size];
- * };
- *
- * where 'array_size' is set as though every member of the ubo takes up a vec4,
- * even if it's only a vec2 or a float.
- *
- * first, access 'base'
- */
- SpvId member = emit_uint_const(ctx, 32, 0);
- /* this is the offset (in bytes) that we're accessing:
- * it may be a const value or it may be dynamic in the shader
- */
- SpvId offset = get_src(ctx, &intr->src[2]);
- /* calculate byte offset */
- SpvId vec_offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size);
-
- SpvId value = get_src(ctx, &intr->src[0]);
- /* OpAccessChain takes an array of indices that drill into a hierarchy based on the type:
- * index 0 is accessing 'base'
- * index 1 is accessing 'base[index 1]'
- * index 2 is accessing 'base[index 1][index 2]'
- *
- * we must perform the access this way in case src[1] is dynamic because there's
- * no other spirv method for using an id to access a member of a composite, as
- * (composite|vector)_extract both take literals
- */
- unsigned write_count = 0;
- SpvId src_base_type = get_uvec_type(ctx, bit_size, 1);
- for (unsigned i = 0; write_count < num_components; i++) {
- if (wrmask & (1 << i)) {
- SpvId component = nir_src_num_components(intr->src[0]) > 1 ?
- spirv_builder_emit_composite_extract(&ctx->builder, src_base_type, value, &i, 1) :
- value;
- SpvId component_split;
- if (is_64bit)
- component_split = emit_bitcast(ctx, get_uvec_type(ctx, 32, 2), component);
- for (unsigned j = 0; j < 1 + !!is_64bit; j++) {
- if (j)
- vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one);
- SpvId indices[] = { member, vec_offset };
- SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type,
- bo, indices,
- ARRAY_SIZE(indices));
- if (is_64bit)
- component = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, component_split, &j, 1);
- if (nir_intrinsic_access(intr) & ACCESS_COHERENT)
- spirv_builder_emit_atomic_store(&ctx->builder, ptr, SpvScopeWorkgroup, 0, component);
- else
- spirv_builder_emit_store(&ctx->builder, ptr, component);
- }
- write_count++;
- } else if (is_64bit)
- /* we're doing 32bit stores here, so we need to increment correctly here */
- vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one);
-
- /* increment to the next vec4 member index for the next store */
- vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one);
+ store_def(ctx, load_const->def.index, components[0], atype);
}
}
static void
emit_discard(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- assert(ctx->block_started);
- spirv_builder_emit_kill(&ctx->builder);
- /* discard is weird in NIR, so let's just create an unreachable block after
- it and hope that the vulkan driver will DCE any instructinos in it. */
- spirv_builder_label(&ctx->builder, spirv_builder_new_id(&ctx->builder));
+ assert(ctx->discard_func);
+ SpvId type_void = spirv_builder_type_void(&ctx->builder);
+ spirv_builder_function_call(&ctx->builder, type_void,
+ ctx->discard_func, NULL, 0);
}
static void
emit_load_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- SpvId ptr = get_src(ctx, intr->src);
+ nir_alu_type atype;
+ SpvId ptr = get_src(ctx, intr->src, &atype);
+
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ SpvId type;
+ if (glsl_type_is_image(deref->type)) {
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ const struct glsl_type *gtype = glsl_without_array(var->type);
+ type = get_image_type(ctx, var,
+ glsl_type_is_sampler(gtype),
+ glsl_get_sampler_dim(gtype) == GLSL_SAMPLER_DIM_BUF);
+ atype = nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(gtype));
+ } else {
+ type = get_glsl_type(ctx, deref->type);
+ atype = get_nir_alu_type(deref->type);
+ }
+ SpvId result;
- SpvId result = spirv_builder_emit_load(&ctx->builder,
- get_glsl_type(ctx, nir_src_as_deref(intr->src[0])->type),
- ptr);
- unsigned num_components = nir_dest_num_components(intr->dest);
- unsigned bit_size = nir_dest_bit_size(intr->dest);
- result = bitcast_to_uvec(ctx, result, bit_size, num_components);
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ if (nir_intrinsic_access(intr) & ACCESS_COHERENT)
+ result = emit_atomic(ctx, SpvOpAtomicLoad, type, ptr, 0, 0);
+ else
+ result = spirv_builder_emit_load(&ctx->builder, type, ptr);
+ store_def(ctx, intr->def.index, result, atype);
}
static void
emit_store_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- SpvId ptr = get_src(ctx, &intr->src[0]);
- SpvId src = get_src(ctx, &intr->src[1]);
+ nir_alu_type ptype, stype;
+ SpvId ptr = get_src(ctx, &intr->src[0], &ptype);
+ SpvId src = get_src(ctx, &intr->src[1], &stype);
const struct glsl_type *gtype = nir_src_as_deref(intr->src[0])->type;
SpvId type = get_glsl_type(ctx, gtype);
- nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
- unsigned num_writes = util_bitcount(nir_intrinsic_write_mask(intr));
+ nir_variable *var = nir_intrinsic_get_var(intr, 0);
unsigned wrmask = nir_intrinsic_write_mask(intr);
- if (num_writes && num_writes != intr->num_components) {
+ if (!glsl_type_is_scalar(gtype) &&
+ wrmask != BITFIELD_MASK(glsl_type_is_array(gtype) ? glsl_get_aoa_size(gtype) : glsl_get_vector_elements(gtype))) {
/* no idea what we do if this fails */
assert(glsl_type_is_array(gtype) || glsl_type_is_vector(gtype));
@@ -2136,17 +2303,18 @@ emit_store_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr)
SpvId member_type;
if (glsl_type_is_vector(gtype)) {
result_type = get_glsl_basetype(ctx, glsl_get_base_type(gtype));
- member_type = get_uvec_type(ctx, 32, 1);
+ member_type = get_alu_type(ctx, stype, 1, glsl_get_bit_size(gtype));
} else
member_type = result_type = get_glsl_type(ctx, glsl_get_array_element(gtype));
SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
- SpvStorageClassOutput,
+ get_storage_class(var),
result_type);
for (unsigned i = 0; i < 4; i++)
- if ((wrmask >> i) & 1) {
+ if (wrmask & BITFIELD_BIT(i)) {
SpvId idx = emit_uint_const(ctx, 32, i);
SpvId val = spirv_builder_emit_composite_extract(&ctx->builder, member_type, src, &i, 1);
- val = emit_bitcast(ctx, result_type, val);
+ if (stype != ptype)
+ val = emit_bitcast(ctx, result_type, val);
SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
ptr, &idx, 1);
spirv_builder_emit_store(&ctx->builder, member, val);
@@ -2155,99 +2323,161 @@ emit_store_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr)
}
SpvId result;
- if (ctx->stage == MESA_SHADER_FRAGMENT && var->data.location == FRAG_RESULT_SAMPLE_MASK) {
+ if (ctx->stage == MESA_SHADER_FRAGMENT &&
+ var->data.mode == nir_var_shader_out &&
+ var->data.location == FRAG_RESULT_SAMPLE_MASK) {
src = emit_bitcast(ctx, type, src);
/* SampleMask is always an array in spirv, so we need to construct it into one */
result = spirv_builder_emit_composite_construct(&ctx->builder, ctx->sample_mask_type, &src, 1);
- } else
- result = emit_bitcast(ctx, type, src);
- spirv_builder_emit_store(&ctx->builder, ptr, result);
+ } else {
+ if (ptype == stype)
+ result = src;
+ else
+ result = emit_bitcast(ctx, type, src);
+ }
+ if (nir_intrinsic_access(intr) & ACCESS_COHERENT)
+ spirv_builder_emit_atomic_store(&ctx->builder, ptr, SpvScopeDevice, 0, result);
+ else
+ spirv_builder_emit_store(&ctx->builder, ptr, result);
}
static void
emit_load_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint);
- unsigned num_components = nir_dest_num_components(intr->dest);
- unsigned bit_size = nir_dest_bit_size(intr->dest);
- bool qword = bit_size == 64;
- SpvId uint_type = get_uvec_type(ctx, 32, 1);
+ SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint);
+ unsigned num_components = intr->def.num_components;
+ unsigned bit_size = intr->def.bit_size;
+ SpvId uint_type = get_uvec_type(ctx, bit_size, 1);
SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
SpvStorageClassWorkgroup,
uint_type);
- SpvId offset = emit_binop(ctx, SpvOpUDiv, uint_type, get_src(ctx, &intr->src[0]), emit_uint_const(ctx, 32, 4));
+ nir_alu_type atype;
+ SpvId offset = get_src(ctx, &intr->src[0], &atype);
+ if (atype == nir_type_float)
+ offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[0]), 1);
SpvId constituents[NIR_MAX_VEC_COMPONENTS];
+ SpvId shared_block = get_shared_block(ctx, bit_size);
/* need to convert array -> vec */
for (unsigned i = 0; i < num_components; i++) {
- SpvId parts[2];
- for (unsigned j = 0; j < 1 + !!qword; j++) {
- SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
- ctx->shared_block_var, &offset, 1);
- parts[j] = spirv_builder_emit_load(&ctx->builder, uint_type, member);
- offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, 1));
- }
- if (qword)
- constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 64, 1), parts, 2);
- else
- constituents[i] = parts[0];
+ SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
+ shared_block, &offset, 1);
+ constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, member);
+ offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, 1));
}
SpvId result;
if (num_components > 1)
result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, constituents, num_components);
else
- result = bitcast_to_uvec(ctx, constituents[0], bit_size, num_components);
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ result = constituents[0];
+ store_def(ctx, intr->def.index, result, nir_type_uint);
}
static void
emit_store_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- SpvId src = get_src(ctx, &intr->src[0]);
- bool qword = nir_src_bit_size(intr->src[0]) == 64;
+ nir_alu_type atype;
+ SpvId src = get_src(ctx, &intr->src[0], &atype);
- unsigned num_writes = util_bitcount(nir_intrinsic_write_mask(intr));
unsigned wrmask = nir_intrinsic_write_mask(intr);
- /* this is a partial write, so we have to loop and do a per-component write */
- SpvId uint_type = get_uvec_type(ctx, 32, 1);
+ unsigned bit_size = nir_src_bit_size(intr->src[0]);
+ SpvId uint_type = get_uvec_type(ctx, bit_size, 1);
SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
SpvStorageClassWorkgroup,
uint_type);
- SpvId offset = emit_binop(ctx, SpvOpUDiv, uint_type, get_src(ctx, &intr->src[1]), emit_uint_const(ctx, 32, 4));
-
- for (unsigned i = 0; num_writes; i++) {
- if ((wrmask >> i) & 1) {
- for (unsigned j = 0; j < 1 + !!qword; j++) {
- unsigned comp = ((1 + !!qword) * i) + j;
- SpvId shared_offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, comp));
- SpvId val = src;
- if (nir_src_num_components(intr->src[0]) != 1 || qword)
- val = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, src, &comp, 1);
- SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
- ctx->shared_block_var, &shared_offset, 1);
- spirv_builder_emit_store(&ctx->builder, member, val);
- }
- num_writes--;
- }
+ nir_alu_type otype;
+ SpvId offset = get_src(ctx, &intr->src[1], &otype);
+ if (otype == nir_type_float)
+ offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[0]), 1);
+ SpvId shared_block = get_shared_block(ctx, bit_size);
+ /* this is a partial write, so we have to loop and do a per-component write */
+ u_foreach_bit(i, wrmask) {
+ SpvId shared_offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, i));
+ SpvId val = src;
+ if (nir_src_num_components(intr->src[0]) != 1)
+ val = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, src, &i, 1);
+ if (atype != nir_type_uint)
+ val = emit_bitcast(ctx, get_alu_type(ctx, nir_type_uint, 1, bit_size), val);
+ SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
+ shared_block, &shared_offset, 1);
+ spirv_builder_emit_store(&ctx->builder, member, val);
+ }
+}
+
+static void
+emit_load_scratch(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+{
+ SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint);
+ unsigned num_components = intr->def.num_components;
+ unsigned bit_size = intr->def.bit_size;
+ SpvId uint_type = get_uvec_type(ctx, bit_size, 1);
+ SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassPrivate,
+ uint_type);
+ nir_alu_type atype;
+ SpvId offset = get_src(ctx, &intr->src[0], &atype);
+ if (atype != nir_type_uint)
+ offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[0]), 1);
+ SpvId constituents[NIR_MAX_VEC_COMPONENTS];
+ SpvId scratch_block = get_scratch_block(ctx, bit_size);
+ /* need to convert array -> vec */
+ for (unsigned i = 0; i < num_components; i++) {
+ SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
+ scratch_block, &offset, 1);
+ constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, member);
+ offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, 1));
+ }
+ SpvId result;
+ if (num_components > 1)
+ result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, constituents, num_components);
+ else
+ result = constituents[0];
+ store_def(ctx, intr->def.index, result, nir_type_uint);
+}
+
+static void
+emit_store_scratch(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+{
+ nir_alu_type atype;
+ SpvId src = get_src(ctx, &intr->src[0], &atype);
+
+ unsigned wrmask = nir_intrinsic_write_mask(intr);
+ unsigned bit_size = nir_src_bit_size(intr->src[0]);
+ SpvId uint_type = get_uvec_type(ctx, bit_size, 1);
+ SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassPrivate,
+ uint_type);
+ nir_alu_type otype;
+ SpvId offset = get_src(ctx, &intr->src[1], &otype);
+ if (otype != nir_type_uint)
+ offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[1]), 1);
+ SpvId scratch_block = get_scratch_block(ctx, bit_size);
+ /* this is a partial write, so we have to loop and do a per-component write */
+ u_foreach_bit(i, wrmask) {
+ SpvId scratch_offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, i));
+ SpvId val = src;
+ if (nir_src_num_components(intr->src[0]) != 1)
+ val = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, src, &i, 1);
+ if (atype != nir_type_uint)
+ val = emit_bitcast(ctx, get_alu_type(ctx, nir_type_uint, 1, bit_size), val);
+ SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
+ scratch_block, &scratch_offset, 1);
+ spirv_builder_emit_store(&ctx->builder, member, val);
}
}
static void
emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- unsigned bit_size = nir_dest_bit_size(intr->dest);
SpvId uint_type = get_uvec_type(ctx, 32, 1);
SpvId load_type = get_uvec_type(ctx, 32, 1);
/* number of components being loaded */
- unsigned num_components = nir_dest_num_components(intr->dest);
- /* we need to grab 2x32 to fill the 64bit value */
- if (bit_size == 64)
- num_components *= 2;
+ unsigned num_components = intr->def.num_components;
SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2];
SpvId result;
/* destination type for the load */
- SpvId type = get_dest_uvec_type(ctx, &intr->dest);
+ SpvId type = get_def_uvec_type(ctx, &intr->def);
SpvId one = emit_uint_const(ctx, 32, 1);
/* we grab a single array member at a time, so it's a pointer to a uint */
@@ -2255,9 +2485,12 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr)
SpvStorageClassPushConstant,
load_type);
- SpvId member = get_src(ctx, &intr->src[0]);
+ nir_alu_type atype;
+ SpvId member = get_src(ctx, &intr->src[0], &atype);
+ if (atype == nir_type_float)
+ member = bitcast_to_uvec(ctx, member, nir_src_bit_size(intr->src[0]), 1);
/* reuse the offset from ZINK_PUSH_CONST_OFFSET */
- SpvId offset = emit_uint_const(ctx, 32, 0);
+ SpvId offset = emit_uint_const(ctx, 32, nir_intrinsic_component(intr));
/* OpAccessChain takes an array of indices that drill into a hierarchy based on the type:
* index 0 is accessing 'base'
* index 1 is accessing 'base[index 1]'
@@ -2274,18 +2507,6 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr)
offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one);
}
- /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values
- * by creating uvec2 composites and bitcasting them to u64 values
- */
- if (bit_size == 64) {
- num_components /= 2;
- type = get_uvec_type(ctx, 64, num_components);
- SpvId u64_type = get_uvec_type(ctx, 64, 1);
- for (unsigned i = 0; i < num_components; i++) {
- constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2);
- constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]);
- }
- }
/* if loading more than 1 value, reassemble the results into the desired type,
* otherwise just use the loaded result
*/
@@ -2297,7 +2518,84 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr)
} else
result = constituents[0];
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ store_def(ctx, intr->def.index, result, nir_type_uint);
+}
+
+static void
+emit_load_global(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+{
+ bool coherent = ctx->sinfo->have_vulkan_memory_model && nir_intrinsic_access(intr) & ACCESS_COHERENT;
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityPhysicalStorageBufferAddresses);
+ SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint);
+ SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassPhysicalStorageBuffer,
+ dest_type);
+ nir_alu_type atype;
+ SpvId ptr = emit_bitcast(ctx, pointer_type, get_src(ctx, &intr->src[0], &atype));
+ SpvId result = spirv_builder_emit_load_aligned(&ctx->builder, dest_type, ptr, intr->def.bit_size / 8, coherent);
+ store_def(ctx, intr->def.index, result, nir_type_uint);
+}
+
+static void
+emit_store_global(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+{
+ bool coherent = ctx->sinfo->have_vulkan_memory_model && nir_intrinsic_access(intr) & ACCESS_COHERENT;
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityPhysicalStorageBufferAddresses);
+ unsigned bit_size = nir_src_bit_size(intr->src[0]);
+ SpvId dest_type = get_uvec_type(ctx, bit_size, 1);
+ SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassPhysicalStorageBuffer,
+ dest_type);
+ nir_alu_type atype;
+ SpvId param = get_src(ctx, &intr->src[0], &atype);
+ if (atype != nir_type_uint)
+ param = emit_bitcast(ctx, dest_type, param);
+ SpvId ptr = emit_bitcast(ctx, pointer_type, get_src(ctx, &intr->src[1], &atype));
+ spirv_builder_emit_store_aligned(&ctx->builder, ptr, param, bit_size / 8, coherent);
+}
+
+static void
+emit_load_reg(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+{
+ assert(nir_intrinsic_base(intr) == 0 && "no array registers");
+
+ nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[0].ssa);
+ unsigned num_components = nir_intrinsic_num_components(decl);
+ unsigned bit_size = nir_intrinsic_bit_size(decl);
+ unsigned index = decl->def.index;
+ assert(index < ctx->num_defs);
+
+ init_reg(ctx, decl, nir_type_uint);
+ assert(ctx->defs[index] != 0);
+
+ nir_alu_type atype = ctx->def_types[index];
+ SpvId var = ctx->defs[index];
+ SpvId type = get_alu_type(ctx, atype, num_components, bit_size);
+ SpvId result = spirv_builder_emit_load(&ctx->builder, type, var);
+ store_def(ctx, intr->def.index, result, atype);
+}
+
+static void
+emit_store_reg(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+{
+ nir_alu_type atype;
+ SpvId param = get_src(ctx, &intr->src[0], &atype);
+
+ nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[1].ssa);
+ unsigned index = decl->def.index;
+ unsigned num_components = nir_intrinsic_num_components(decl);
+ unsigned bit_size = nir_intrinsic_bit_size(decl);
+
+ atype = nir_alu_type_get_base_type(atype);
+ init_reg(ctx, decl, atype);
+ SpvId var = ctx->defs[index];
+ nir_alu_type vtype = ctx->def_types[index];
+ if (atype != vtype) {
+ assert(vtype != nir_type_bool);
+ param = emit_bitcast(ctx, get_alu_type(ctx, vtype, num_components, bit_size), param);
+ }
+ assert(var);
+ spirv_builder_emit_store(&ctx->builder, var, param);
}
static SpvId
@@ -2313,6 +2611,17 @@ create_builtin_var(struct ntv_context *ctx, SpvId var_type,
spirv_builder_emit_name(&ctx->builder, var, name);
spirv_builder_emit_builtin(&ctx->builder, var, builtin);
+ if (ctx->stage == MESA_SHADER_FRAGMENT) {
+ switch (builtin) {
+ case SpvBuiltInSampleId:
+ case SpvBuiltInSubgroupLocalInvocationId:
+ spirv_builder_emit_decoration(&ctx->builder, var, SpvDecorationFlat);
+ break;
+ default:
+ break;
+ }
+ }
+
assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces));
ctx->entry_ifaces[ctx->num_entry_ifaces++] = var;
return var;
@@ -2330,37 +2639,39 @@ emit_load_front_face(struct ntv_context *ctx, nir_intrinsic_instr *intr)
SpvId result = spirv_builder_emit_load(&ctx->builder, var_type,
ctx->front_face_var);
- assert(1 == nir_dest_num_components(intr->dest));
- store_dest(ctx, &intr->dest, result, nir_type_bool);
+ assert(1 == intr->def.num_components);
+ store_def(ctx, intr->def.index, result, nir_type_bool);
}
static void
emit_load_uint_input(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId *var_id, const char *var_name, SpvBuiltIn builtin)
{
SpvId var_type = spirv_builder_type_uint(&ctx->builder, 32);
- if (builtin == SpvBuiltInSampleMask) {
- /* gl_SampleMaskIn is an array[1] in spirv... */
- var_type = spirv_builder_type_array(&ctx->builder, var_type, emit_uint_const(ctx, 32, 1));
- spirv_builder_emit_array_stride(&ctx->builder, var_type, sizeof(uint32_t));
- }
if (!*var_id) {
+ if (builtin == SpvBuiltInSampleMask) {
+ /* gl_SampleMaskIn is an array[1] in spirv... */
+ var_type = spirv_builder_type_array(&ctx->builder, var_type, emit_uint_const(ctx, 32, 1));
+ spirv_builder_emit_array_stride(&ctx->builder, var_type, sizeof(uint32_t));
+ }
*var_id = create_builtin_var(ctx, var_type,
SpvStorageClassInput,
var_name,
builtin);
- if (builtin == SpvBuiltInSampleMask) {
- SpvId zero = emit_uint_const(ctx, 32, 0);
- var_type = spirv_builder_type_uint(&ctx->builder, 32);
- SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
- SpvStorageClassInput,
- var_type);
- *var_id = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, *var_id, &zero, 1);
- }
}
- SpvId result = spirv_builder_emit_load(&ctx->builder, var_type, *var_id);
- assert(1 == nir_dest_num_components(intr->dest));
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ SpvId load_var = *var_id;
+ if (builtin == SpvBuiltInSampleMask) {
+ SpvId zero = emit_uint_const(ctx, 32, 0);
+ var_type = spirv_builder_type_uint(&ctx->builder, 32);
+ SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassInput,
+ var_type);
+ load_var = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, load_var, &zero, 1);
+ }
+
+ SpvId result = spirv_builder_emit_load(&ctx->builder, var_type, load_var);
+ assert(1 == intr->def.num_components);
+ store_def(ctx, intr->def.index, result, nir_type_uint);
}
static void
@@ -2370,16 +2681,19 @@ emit_load_vec_input(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId *v
switch (type) {
case nir_type_bool:
- var_type = get_bvec_type(ctx, nir_dest_num_components(intr->dest));
+ var_type = get_bvec_type(ctx, intr->def.num_components);
break;
case nir_type_int:
- var_type = get_ivec_type(ctx, nir_dest_bit_size(intr->dest), nir_dest_num_components(intr->dest));
+ var_type = get_ivec_type(ctx, intr->def.bit_size,
+ intr->def.num_components);
break;
case nir_type_uint:
- var_type = get_uvec_type(ctx, nir_dest_bit_size(intr->dest), nir_dest_num_components(intr->dest));
+ var_type = get_uvec_type(ctx, intr->def.bit_size,
+ intr->def.num_components);
break;
case nir_type_float:
- var_type = get_fvec_type(ctx, nir_dest_bit_size(intr->dest), nir_dest_num_components(intr->dest));
+ var_type = get_fvec_type(ctx, intr->def.bit_size,
+ intr->def.num_components);
break;
default:
unreachable("unknown type passed");
@@ -2391,7 +2705,7 @@ emit_load_vec_input(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId *v
builtin);
SpvId result = spirv_builder_emit_load(&ctx->builder, var_type, *var_id);
- store_dest(ctx, &intr->dest, result, type);
+ store_def(ctx, intr->def.index, result, type);
}
static void
@@ -2399,133 +2713,182 @@ emit_interpolate(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
SpvId op;
spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInterpolationFunction);
+ SpvId src1 = 0;
+ nir_alu_type atype;
switch (intr->intrinsic) {
case nir_intrinsic_interp_deref_at_centroid:
op = GLSLstd450InterpolateAtCentroid;
break;
case nir_intrinsic_interp_deref_at_sample:
op = GLSLstd450InterpolateAtSample;
+ src1 = get_src(ctx, &intr->src[1], &atype);
+ if (atype != nir_type_int)
+ src1 = emit_bitcast(ctx, get_ivec_type(ctx, 32, 1), src1);
break;
case nir_intrinsic_interp_deref_at_offset:
op = GLSLstd450InterpolateAtOffset;
+ src1 = get_src(ctx, &intr->src[1], &atype);
+ /*
+ The offset operand must be a vector of 2 components of 32-bit floating-point type.
+ - InterpolateAtOffset spec
+ */
+ if (atype != nir_type_float)
+ src1 = emit_bitcast(ctx, get_fvec_type(ctx, 32, 2), src1);
break;
default:
unreachable("unknown interp op");
}
- SpvId ptr = get_src(ctx, &intr->src[0]);
+ nir_alu_type ptype;
+ SpvId ptr = get_src(ctx, &intr->src[0], &ptype);
SpvId result;
+ const struct glsl_type *gtype = nir_src_as_deref(intr->src[0])->type;
+ assert(glsl_get_vector_elements(gtype) == intr->num_components);
+ assert(ptype == get_nir_alu_type(gtype));
if (intr->intrinsic == nir_intrinsic_interp_deref_at_centroid)
- result = emit_builtin_unop(ctx, op, get_glsl_type(ctx, nir_src_as_deref(intr->src[0])->type), ptr);
+ result = emit_builtin_unop(ctx, op, get_glsl_type(ctx, gtype), ptr);
else
- result = emit_builtin_binop(ctx, op, get_glsl_type(ctx, nir_src_as_deref(intr->src[0])->type),
- ptr, get_src(ctx, &intr->src[1]));
- unsigned num_components = nir_dest_num_components(intr->dest);
- unsigned bit_size = nir_dest_bit_size(intr->dest);
- result = bitcast_to_uvec(ctx, result, bit_size, num_components);
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ result = emit_builtin_binop(ctx, op, get_glsl_type(ctx, gtype), ptr, src1);
+ store_def(ctx, intr->def.index, result, ptype);
}
static void
-handle_atomic_op(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId ptr, SpvId param, SpvId param2)
+handle_atomic_op(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId ptr, SpvId param, SpvId param2, nir_alu_type type)
{
- SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32);
- SpvId result = emit_atomic(ctx, get_atomic_op(intr->intrinsic), dest_type, ptr, param, param2);
+ SpvId dest_type = get_def_type(ctx, &intr->def, type);
+ SpvId result = emit_atomic(ctx,
+ get_atomic_op(ctx, intr->def.bit_size, nir_intrinsic_atomic_op(intr)),
+ dest_type, ptr, param, param2);
assert(result);
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ store_def(ctx, intr->def.index, result, type);
}
static void
-emit_ssbo_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+emit_deref_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- SpvId ssbo;
- SpvId param;
- SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32);
+ nir_alu_type atype;
+ nir_alu_type ret_type = nir_atomic_op_type(nir_intrinsic_atomic_op(intr)) == nir_type_float ? nir_type_float : nir_type_uint;
+ SpvId ptr = get_src(ctx, &intr->src[0], &atype);
+ if (atype != ret_type && ret_type == nir_type_float) {
+ unsigned bit_size = nir_src_bit_size(intr->src[0]);
+ SpvId *float_array_type = &ctx->float_array_type[bit_size == 32 ? 0 : 1];
+ if (!*float_array_type) {
+ *float_array_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassStorageBuffer,
+ spirv_builder_type_float(&ctx->builder, bit_size));
+ }
+ ptr = emit_unop(ctx, SpvOpBitcast, *float_array_type, ptr);
+ }
- nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]);
- assert(const_block_index); // no dynamic indexing for now
- unsigned bit_size = MIN2(nir_src_bit_size(intr->src[0]), 32);
- unsigned idx = bit_size >> 4;
- assert(idx < ARRAY_SIZE(ctx->ssbos[0]));
- if (!ctx->ssbos[const_block_index->u32][idx])
- emit_bo(ctx, ctx->ssbo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest));
- ssbo = ctx->ssbos[const_block_index->u32][idx];
- param = get_src(ctx, &intr->src[2]);
-
- SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
- SpvStorageClassStorageBuffer,
- dest_type);
- SpvId uint_type = get_uvec_type(ctx, 32, 1);
- /* an id of the array stride in bytes */
- SpvId uint_size = emit_uint_const(ctx, 32, bit_size / 8);
- SpvId member = emit_uint_const(ctx, 32, 0);
- SpvId offset = get_src(ctx, &intr->src[1]);
- SpvId vec_offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size);
- SpvId indices[] = { member, vec_offset };
- SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type,
- ssbo, indices,
- ARRAY_SIZE(indices));
+ SpvId param = get_src(ctx, &intr->src[1], &atype);
+ if (atype != ret_type)
+ param = cast_src_to_type(ctx, param, intr->src[1], ret_type);
SpvId param2 = 0;
- if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
- param2 = get_src(ctx, &intr->src[3]);
+ if (nir_src_bit_size(intr->src[1]) == 64)
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInt64Atomics);
- handle_atomic_op(ctx, intr, ptr, param, param2);
+ if (intr->intrinsic == nir_intrinsic_deref_atomic_swap) {
+ param2 = get_src(ctx, &intr->src[2], &atype);
+ if (atype != ret_type)
+ param2 = cast_src_to_type(ctx, param2, intr->src[2], ret_type);
+ }
+
+ handle_atomic_op(ctx, intr, ptr, param, param2, ret_type);
}
static void
emit_shared_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32);
- SpvId param = get_src(ctx, &intr->src[1]);
+ unsigned bit_size = nir_src_bit_size(intr->src[1]);
+ SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint);
+ nir_alu_type atype;
+ nir_alu_type ret_type = nir_atomic_op_type(nir_intrinsic_atomic_op(intr)) == nir_type_float ? nir_type_float : nir_type_uint;
+ SpvId param = get_src(ctx, &intr->src[1], &atype);
+ if (atype != ret_type)
+ param = cast_src_to_type(ctx, param, intr->src[1], ret_type);
SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
SpvStorageClassWorkgroup,
dest_type);
- SpvId offset = emit_binop(ctx, SpvOpUDiv, get_uvec_type(ctx, 32, 1), get_src(ctx, &intr->src[0]), emit_uint_const(ctx, 32, 4));
+ SpvId offset = get_src(ctx, &intr->src[0], &atype);
+ if (atype != nir_type_uint)
+ offset = cast_src_to_type(ctx, offset, intr->src[0], nir_type_uint);
+ offset = emit_binop(ctx, SpvOpUDiv, get_uvec_type(ctx, 32, 1), offset, emit_uint_const(ctx, 32, bit_size / 8));
+ SpvId shared_block = get_shared_block(ctx, bit_size);
SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type,
- ctx->shared_block_var, &offset, 1);
+ shared_block, &offset, 1);
+ if (nir_src_bit_size(intr->src[1]) == 64)
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInt64Atomics);
+ SpvId param2 = 0;
+
+ if (intr->intrinsic == nir_intrinsic_shared_atomic_swap) {
+ param2 = get_src(ctx, &intr->src[2], &atype);
+ if (atype != ret_type)
+ param2 = cast_src_to_type(ctx, param2, intr->src[2], ret_type);
+ }
+
+ handle_atomic_op(ctx, intr, ptr, param, param2, ret_type);
+}
+
+static void
+emit_global_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+{
+ unsigned bit_size = nir_src_bit_size(intr->src[1]);
+ SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint);
+ nir_alu_type atype;
+ nir_alu_type ret_type = nir_atomic_op_type(nir_intrinsic_atomic_op(intr)) == nir_type_float ? nir_type_float : nir_type_uint;
+ SpvId param = get_src(ctx, &intr->src[1], &atype);
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityPhysicalStorageBufferAddresses);
+ SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassPhysicalStorageBuffer,
+ dest_type);
+ SpvId ptr = emit_bitcast(ctx, pointer_type, get_src(ctx, &intr->src[0], &atype));
+
+ if (bit_size == 64)
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInt64Atomics);
SpvId param2 = 0;
- if (intr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
- param2 = get_src(ctx, &intr->src[2]);
+ if (intr->intrinsic == nir_intrinsic_global_atomic_swap)
+ param2 = get_src(ctx, &intr->src[2], &atype);
- handle_atomic_op(ctx, intr, ptr, param, param2);
+ handle_atomic_op(ctx, intr, ptr, param, param2, ret_type);
}
static void
emit_get_ssbo_size(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
SpvId uint_type = get_uvec_type(ctx, 32, 1);
- nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]);
- assert(const_block_index); // no dynamic indexing for now
- nir_variable *var = ctx->ssbo_vars[const_block_index->u32];
+ nir_variable *var = ctx->ssbo_vars;
+ const struct glsl_type *bare_type = glsl_without_array(var->type);
+ unsigned last_member_idx = glsl_get_length(bare_type) - 1;
+ SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
+ SpvStorageClassStorageBuffer,
+ get_bo_struct_type(ctx, var));
+ nir_alu_type atype;
+ SpvId bo = get_src(ctx, &intr->src[0], &atype);
+ if (atype == nir_type_float)
+ bo = bitcast_to_uvec(ctx, bo, nir_src_bit_size(intr->src[0]), 1);
+ SpvId indices[] = { bo };
+ SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type,
+ ctx->ssbos[2], indices,
+ ARRAY_SIZE(indices));
SpvId result = spirv_builder_emit_binop(&ctx->builder, SpvOpArrayLength, uint_type,
- ctx->ssbos[const_block_index->u32][2], 1);
+ ptr, last_member_idx);
/* this is going to be converted by nir to:
length = (buffer_size - offset) / stride
* so we need to un-convert it to avoid having the calculation performed twice
*/
- unsigned last_member_idx = glsl_get_length(var->interface_type) - 1;
- const struct glsl_type *last_member = glsl_get_struct_field(var->interface_type, last_member_idx);
+ const struct glsl_type *last_member = glsl_get_struct_field(bare_type, last_member_idx);
/* multiply by stride */
result = emit_binop(ctx, SpvOpIMul, uint_type, result, emit_uint_const(ctx, 32, glsl_get_explicit_stride(last_member)));
/* get total ssbo size by adding offset */
result = emit_binop(ctx, SpvOpIAdd, uint_type, result,
emit_uint_const(ctx, 32,
- glsl_get_struct_field_offset(var->interface_type, last_member_idx)));
- store_dest(ctx, &intr->dest, result, nir_type_uint);
-}
-
-static inline nir_variable *
-get_var_from_image(struct ntv_context *ctx, SpvId var_id)
-{
- struct hash_entry *he = _mesa_hash_table_search(ctx->image_vars, &var_id);
- assert(he);
- return he->data;
+ glsl_get_struct_field_offset(bare_type, last_member_idx)));
+ store_def(ctx, intr->def.index, result, nir_type_uint);
}
static SpvId
@@ -2534,16 +2897,17 @@ get_image_coords(struct ntv_context *ctx, const struct glsl_type *type, nir_src
uint32_t num_coords = glsl_get_sampler_coordinate_components(type);
uint32_t src_components = nir_src_num_components(*src);
- SpvId spv = get_src(ctx, src);
+ nir_alu_type atype;
+ SpvId spv = get_src(ctx, src, &atype);
if (num_coords == src_components)
return spv;
/* need to extract the coord dimensions that the image can use */
- SpvId vec_type = get_uvec_type(ctx, 32, num_coords);
+ SpvId vec_type = get_alu_type(ctx, atype, num_coords, 32);
if (num_coords == 1)
return spirv_builder_emit_vector_extract(&ctx->builder, vec_type, spv, 0);
uint32_t constituents[4];
- SpvId zero = emit_uint_const(ctx, nir_src_bit_size(*src), 0);
+ SpvId zero = atype == nir_type_uint ? emit_uint_const(ctx, nir_src_bit_size(*src), 0) : emit_float_const(ctx, nir_src_bit_size(*src), 0);
assert(num_coords < ARRAY_SIZE(constituents));
for (unsigned i = 0; i < num_coords; i++)
constituents[i] = i < src_components ? i : zero;
@@ -2553,81 +2917,165 @@ get_image_coords(struct ntv_context *ctx, const struct glsl_type *type, nir_src
static void
emit_image_deref_store(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- SpvId img_var = get_src(ctx, &intr->src[0]);
- nir_variable *var = get_var_from_image(ctx, img_var);
- SpvId img_type = ctx->image_types[var->data.driver_location];
+ nir_alu_type atype;
+ SpvId img_var = get_src(ctx, &intr->src[0], &atype);
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ SpvId img_type = find_image_type(ctx, var);
const struct glsl_type *type = glsl_without_array(var->type);
SpvId base_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type));
SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var);
SpvId coord = get_image_coords(ctx, type, &intr->src[1]);
- SpvId texel = get_src(ctx, &intr->src[3]);
- SpvId sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS ? get_src(ctx, &intr->src[2]) : 0;
- assert(nir_src_bit_size(intr->src[3]) == glsl_base_type_bit_size(glsl_get_sampler_result_type(type)));
+ SpvId texel = get_src(ctx, &intr->src[3], &atype);
/* texel type must match image type */
- texel = emit_bitcast(ctx,
- spirv_builder_type_vector(&ctx->builder, base_type, 4),
- texel);
+ if (atype != nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(type)))
+ texel = emit_bitcast(ctx,
+ spirv_builder_type_vector(&ctx->builder, base_type, 4),
+ texel);
+ bool use_sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS ||
+ glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS;
+ SpvId sample = use_sample ? get_src(ctx, &intr->src[2], &atype) : 0;
+ assert(nir_src_bit_size(intr->src[3]) == glsl_base_type_bit_size(glsl_get_sampler_result_type(type)));
spirv_builder_emit_image_write(&ctx->builder, img, coord, texel, 0, sample, 0);
}
+static SpvId
+extract_sparse_load(struct ntv_context *ctx, SpvId result, SpvId dest_type, nir_def *def)
+{
+ /* Result Type must be an OpTypeStruct with two members.
+ * The first member’s type must be an integer type scalar.
+ * It holds a Residency Code that can be passed to OpImageSparseTexelsResident
+ * - OpImageSparseRead spec
+ */
+ uint32_t idx = 0;
+ SpvId resident = spirv_builder_emit_composite_extract(&ctx->builder, spirv_builder_type_uint(&ctx->builder, 32), result, &idx, 1);
+ idx = 1;
+ /* normal vec4 return */
+ if (def->num_components == 4)
+ result = spirv_builder_emit_composite_extract(&ctx->builder, dest_type, result, &idx, 1);
+ else {
+ /* shadow */
+ assert(def->num_components == 1);
+ SpvId type = spirv_builder_type_float(&ctx->builder, def->bit_size);
+ SpvId val[2];
+ /* pad to 2 components: the upcoming is_sparse_texels_resident instr will always use the
+ * separate residency value, but the shader still expects this return to be a vec2,
+ * so give it a vec2
+ */
+ val[0] = spirv_builder_emit_composite_extract(&ctx->builder, type, result, &idx, 1);
+ val[1] = emit_float_const(ctx, def->bit_size, 0);
+ result = spirv_builder_emit_composite_construct(&ctx->builder, get_fvec_type(ctx, def->bit_size, 2), val, 2);
+ }
+ assert(resident != 0);
+ assert(def->index < ctx->num_defs);
+ ctx->resident_defs[def->index] = resident;
+ return result;
+}
+
static void
emit_image_deref_load(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- SpvId img_var = get_src(ctx, &intr->src[0]);
- nir_variable *var = get_var_from_image(ctx, img_var);
- SpvId img_type = ctx->image_types[var->data.driver_location];
+ bool sparse = intr->intrinsic == nir_intrinsic_image_deref_sparse_load;
+ nir_alu_type atype;
+ SpvId img_var = get_src(ctx, &intr->src[0], &atype);
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ bool mediump = (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW);
+ SpvId img_type = find_image_type(ctx, var);
const struct glsl_type *type = glsl_without_array(var->type);
SpvId base_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type));
SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var);
SpvId coord = get_image_coords(ctx, type, &intr->src[1]);
- SpvId sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS ? get_src(ctx, &intr->src[2]) : 0;
+ bool use_sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS ||
+ glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS;
+ SpvId sample = use_sample ? get_src(ctx, &intr->src[2], &atype) : 0;
+ SpvId dest_type = spirv_builder_type_vector(&ctx->builder, base_type,
+ intr->def.num_components);
SpvId result = spirv_builder_emit_image_read(&ctx->builder,
- spirv_builder_type_vector(&ctx->builder, base_type, nir_dest_num_components(intr->dest)),
- img, coord, 0, sample, 0);
- store_dest(ctx, &intr->dest, result, nir_type_float);
+ dest_type,
+ img, coord, 0, sample, 0, sparse);
+ if (sparse)
+ result = extract_sparse_load(ctx, result, dest_type, &intr->def);
+
+ if (!sparse && mediump) {
+ spirv_builder_emit_decoration(&ctx->builder, result,
+ SpvDecorationRelaxedPrecision);
+ }
+
+ store_def(ctx, intr->def.index, result, nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(type)));
}
static void
emit_image_deref_size(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- SpvId img_var = get_src(ctx, &intr->src[0]);
- nir_variable *var = get_var_from_image(ctx, img_var);
- SpvId img_type = ctx->image_types[var->data.driver_location];
+ nir_alu_type atype;
+ SpvId img_var = get_src(ctx, &intr->src[0], &atype);
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ SpvId img_type = find_image_type(ctx, var);
const struct glsl_type *type = glsl_without_array(var->type);
SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var);
- SpvId result = spirv_builder_emit_image_query_size(&ctx->builder, get_uvec_type(ctx, 32, glsl_get_sampler_coordinate_components(type)), img, 0);
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ unsigned num_components = glsl_get_sampler_coordinate_components(type);
+ /* SPIRV requires 2 components for non-array cube size */
+ if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && !glsl_sampler_type_is_array(type))
+ num_components = 2;
+
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageQuery);
+ SpvId result = spirv_builder_emit_image_query_size(&ctx->builder, get_uvec_type(ctx, 32, num_components), img, 0);
+ store_def(ctx, intr->def.index, result, nir_type_uint);
}
static void
emit_image_deref_samples(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- SpvId img_var = get_src(ctx, &intr->src[0]);
- nir_variable *var = get_var_from_image(ctx, img_var);
- SpvId img_type = ctx->image_types[var->data.driver_location];
+ nir_alu_type atype;
+ SpvId img_var = get_src(ctx, &intr->src[0], &atype);
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ SpvId img_type = find_image_type(ctx, var);
SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var);
- SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples, get_dest_type(ctx, &intr->dest, nir_type_uint), img);
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageQuery);
+ SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples, get_def_type(ctx, &intr->def, nir_type_uint), img);
+ store_def(ctx, intr->def.index, result, nir_type_uint);
}
static void
emit_image_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- SpvId img_var = get_src(ctx, &intr->src[0]);
- SpvId param = get_src(ctx, &intr->src[3]);
- nir_variable *var = get_var_from_image(ctx, img_var);
+ nir_alu_type atype, ptype;
+ SpvId param = get_src(ctx, &intr->src[3], &ptype);
+ SpvId img_var = get_src(ctx, &intr->src[0], &atype);
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
const struct glsl_type *type = glsl_without_array(var->type);
bool is_ms;
type_to_dim(glsl_get_sampler_dim(type), &is_ms);
- SpvId sample = is_ms ? get_src(ctx, &intr->src[2]) : emit_uint_const(ctx, 32, 0);
+ SpvId sample = is_ms ? get_src(ctx, &intr->src[2], &atype) : emit_uint_const(ctx, 32, 0);
SpvId coord = get_image_coords(ctx, type, &intr->src[1]);
- SpvId base_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type));
+ enum glsl_base_type glsl_result_type = glsl_get_sampler_result_type(type);
+ SpvId base_type = get_glsl_basetype(ctx, glsl_result_type);
SpvId texel = spirv_builder_emit_image_texel_pointer(&ctx->builder, base_type, img_var, coord, sample);
SpvId param2 = 0;
- if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap)
- param2 = get_src(ctx, &intr->src[4]);
- handle_atomic_op(ctx, intr, texel, param, param2);
+ /* The type of Value must be the same as Result Type.
+ * The type of the value pointed to by Pointer must be the same as Result Type.
+ */
+ nir_alu_type ntype = nir_get_nir_type_for_glsl_base_type(glsl_result_type);
+ if (ptype != ntype) {
+ SpvId cast_type = get_def_type(ctx, &intr->def, ntype);
+ param = emit_bitcast(ctx, cast_type, param);
+ }
+
+ if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap) {
+ param2 = get_src(ctx, &intr->src[4], &ptype);
+ if (ptype != ntype) {
+ SpvId cast_type = get_def_type(ctx, &intr->def, ntype);
+ param2 = emit_bitcast(ctx, cast_type, param2);
+ }
+ }
+
+ handle_atomic_op(ctx, intr, texel, param, param2, ntype);
}
static void
@@ -2635,9 +3083,10 @@ emit_ballot(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySubgroupBallotKHR);
spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_ballot");
- SpvId type = get_dest_uvec_type(ctx, &intr->dest);
- SpvId result = emit_unop(ctx, SpvOpSubgroupBallotKHR, type, get_src(ctx, &intr->src[0]));
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ SpvId type = get_def_uvec_type(ctx, &intr->def);
+ nir_alu_type atype;
+ SpvId result = emit_unop(ctx, SpvOpSubgroupBallotKHR, type, get_src(ctx, &intr->src[0], &atype));
+ store_def(ctx, intr->def.index, result, nir_type_uint);
}
static void
@@ -2645,9 +3094,11 @@ emit_read_first_invocation(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySubgroupBallotKHR);
spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_ballot");
- SpvId type = get_dest_type(ctx, &intr->dest, nir_type_uint);
- SpvId result = emit_unop(ctx, SpvOpSubgroupFirstInvocationKHR, type, get_src(ctx, &intr->src[0]));
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ nir_alu_type atype;
+ SpvId src = get_src(ctx, &intr->src[0], &atype);
+ SpvId type = get_def_type(ctx, &intr->def, atype);
+ SpvId result = emit_unop(ctx, SpvOpSubgroupFirstInvocationKHR, type, src);
+ store_def(ctx, intr->def.index, result, atype);
}
static void
@@ -2655,11 +3106,13 @@ emit_read_invocation(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySubgroupBallotKHR);
spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_ballot");
- SpvId type = get_dest_type(ctx, &intr->dest, nir_type_uint);
+ nir_alu_type atype, itype;
+ SpvId src = get_src(ctx, &intr->src[0], &atype);
+ SpvId type = get_def_type(ctx, &intr->def, atype);
SpvId result = emit_binop(ctx, SpvOpSubgroupReadInvocationKHR, type,
- get_src(ctx, &intr->src[0]),
- get_src(ctx, &intr->src[1]));
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ src,
+ get_src(ctx, &intr->src[1], &itype));
+ store_def(ctx, intr->def.index, result, atype);
}
static void
@@ -2669,9 +3122,25 @@ emit_shader_clock(struct ntv_context *ctx, nir_intrinsic_instr *intr)
spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_clock");
SpvScope scope = get_scope(nir_intrinsic_memory_scope(intr));
- SpvId type = get_dest_type(ctx, &intr->dest, nir_type_uint);
+ SpvId type = get_def_type(ctx, &intr->def, nir_type_uint);
SpvId result = spirv_builder_emit_unop_const(&ctx->builder, SpvOpReadClockKHR, type, scope);
- store_dest(ctx, &intr->dest, result, nir_type_uint);
+ store_def(ctx, intr->def.index, result, nir_type_uint);
+}
+
+static void
+emit_is_sparse_texels_resident(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+{
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySparseResidency);
+
+ SpvId type = get_def_type(ctx, &intr->def, nir_type_uint);
+
+ unsigned index = intr->src[0].ssa->index;
+ assert(index < ctx->num_defs);
+ assert(ctx->resident_defs[index] != 0);
+ SpvId resident = ctx->resident_defs[index];
+
+ SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageSparseTexelsResident, type, resident);
+ store_def(ctx, intr->def.index, result, nir_type_uint);
}
static void
@@ -2693,27 +3162,85 @@ emit_vote(struct ntv_context *ctx, nir_intrinsic_instr *intr)
default:
unreachable("unknown vote intrinsic");
}
- SpvId result = spirv_builder_emit_vote(&ctx->builder, op, get_src(ctx, &intr->src[0]));
- store_dest_raw(ctx, &intr->dest, result);
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityGroupNonUniformVote);
+ nir_alu_type atype;
+ SpvId result = spirv_builder_emit_vote(&ctx->builder, op, get_src(ctx, &intr->src[0], &atype));
+ store_def(ctx, intr->def.index, result, nir_type_bool);
+}
+
+static void
+emit_is_helper_invocation(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+{
+ spirv_builder_emit_extension(&ctx->builder,
+ "SPV_EXT_demote_to_helper_invocation");
+ SpvId result = spirv_is_helper_invocation(&ctx->builder);
+ store_def(ctx, intr->def.index, result, nir_type_bool);
+}
+
+static void
+emit_barrier(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+{
+ SpvScope scope = get_scope(nir_intrinsic_execution_scope(intr));
+ SpvScope mem_scope = get_scope(nir_intrinsic_memory_scope(intr));
+ SpvMemorySemanticsMask semantics = 0;
+
+ if (nir_intrinsic_memory_scope(intr) != SCOPE_NONE) {
+ nir_variable_mode modes = nir_intrinsic_memory_modes(intr);
+
+ if (modes & nir_var_image)
+ semantics |= SpvMemorySemanticsImageMemoryMask;
+
+ if (modes & nir_var_mem_shared)
+ semantics |= SpvMemorySemanticsWorkgroupMemoryMask;
+
+ if (modes & (nir_var_mem_ssbo | nir_var_mem_global))
+ semantics |= SpvMemorySemanticsUniformMemoryMask;
+
+ if (modes & nir_var_mem_global)
+ semantics |= SpvMemorySemanticsCrossWorkgroupMemoryMask;
+
+ if (modes & (nir_var_shader_out | nir_var_mem_task_payload))
+ semantics |= SpvMemorySemanticsOutputMemoryMask;
+
+ if (!modes)
+ semantics = SpvMemorySemanticsWorkgroupMemoryMask |
+ SpvMemorySemanticsUniformMemoryMask |
+ SpvMemorySemanticsImageMemoryMask |
+ SpvMemorySemanticsCrossWorkgroupMemoryMask;
+ semantics |= SpvMemorySemanticsAcquireReleaseMask;
+ }
+
+ if (nir_intrinsic_execution_scope(intr) != SCOPE_NONE)
+ spirv_builder_emit_control_barrier(&ctx->builder, scope, mem_scope, semantics);
+ else
+ spirv_builder_emit_memory_barrier(&ctx->builder, mem_scope, semantics);
}
static void
emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
switch (intr->intrinsic) {
- case nir_intrinsic_load_ubo:
- case nir_intrinsic_load_ssbo:
- emit_load_bo(ctx, intr);
+ case nir_intrinsic_decl_reg:
+ /* Nothing to do */
+ break;
+
+ case nir_intrinsic_load_reg:
+ emit_load_reg(ctx, intr);
break;
- case nir_intrinsic_store_ssbo:
- emit_store_ssbo(ctx, intr);
+ case nir_intrinsic_store_reg:
+ emit_store_reg(ctx, intr);
break;
case nir_intrinsic_discard:
emit_discard(ctx, intr);
break;
+ case nir_intrinsic_demote:
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityDemoteToHelperInvocation);
+ spirv_builder_emit_demote(&ctx->builder);
+ break;
+
case nir_intrinsic_load_deref:
emit_load_deref(ctx, intr);
break;
@@ -2722,10 +3249,19 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
emit_store_deref(ctx, intr);
break;
- case nir_intrinsic_load_push_constant:
+ case nir_intrinsic_load_push_constant_zink:
emit_load_push_const(ctx, intr);
break;
+ case nir_intrinsic_load_global:
+ case nir_intrinsic_load_global_constant:
+ emit_load_global(ctx, intr);
+ break;
+
+ case nir_intrinsic_store_global:
+ emit_store_global(ctx, intr);
+ break;
+
case nir_intrinsic_load_front_face:
emit_load_front_face(ctx, intr);
break;
@@ -2759,9 +3295,15 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_sample_id:
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySampleRateShading);
emit_load_uint_input(ctx, intr, &ctx->sample_id_var, "gl_SampleId", SpvBuiltInSampleId);
break;
+ case nir_intrinsic_load_point_coord_maybe_flipped:
+ case nir_intrinsic_load_point_coord:
+ emit_load_vec_input(ctx, intr, &ctx->point_coord_var, "gl_PointCoord", SpvBuiltInPointCoord, nir_type_float);
+ break;
+
case nir_intrinsic_load_sample_pos:
emit_load_vec_input(ctx, intr, &ctx->sample_pos_var, "gl_SamplePosition", SpvBuiltInSamplePosition, nir_type_float);
break;
@@ -2770,21 +3312,15 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
emit_load_uint_input(ctx, intr, &ctx->sample_mask_in_var, "gl_SampleMaskIn", SpvBuiltInSampleMask);
break;
- case nir_intrinsic_emit_vertex_with_counter:
- /* geometry shader emits copied xfb outputs just prior to EmitVertex(),
- * since that's the end of the shader
- */
- if (ctx->so_info)
- emit_so_outputs(ctx, ctx->so_info);
- spirv_builder_emit_vertex(&ctx->builder, nir_intrinsic_stream_id(intr));
- break;
-
- case nir_intrinsic_set_vertex_and_primitive_count:
- /* do nothing */
+ case nir_intrinsic_emit_vertex:
+ if (ctx->nir->info.gs.vertices_out) //skip vertex emission if !vertices_out
+ spirv_builder_emit_vertex(&ctx->builder, nir_intrinsic_stream_id(intr),
+ ctx->nir->info.stage == MESA_SHADER_GEOMETRY && util_bitcount(ctx->nir->info.gs.active_stream_mask) > 1);
break;
- case nir_intrinsic_end_primitive_with_counter:
- spirv_builder_end_primitive(&ctx->builder, nir_intrinsic_stream_id(intr));
+ case nir_intrinsic_end_primitive:
+ spirv_builder_end_primitive(&ctx->builder, nir_intrinsic_stream_id(intr),
+ ctx->nir->info.stage == MESA_SHADER_GEOMETRY && util_bitcount(ctx->nir->info.gs.active_stream_mask) > 1);
break;
case nir_intrinsic_load_helper_invocation:
@@ -2801,39 +3337,8 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
SpvBuiltInTessCoord, nir_type_float);
break;
- case nir_intrinsic_memory_barrier_tcs_patch:
- spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup,
- SpvMemorySemanticsOutputMemoryMask | SpvMemorySemanticsReleaseMask);
- break;
-
- case nir_intrinsic_memory_barrier:
- spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup,
- SpvMemorySemanticsImageMemoryMask | SpvMemorySemanticsUniformMemoryMask |
- SpvMemorySemanticsMakeVisibleMask | SpvMemorySemanticsAcquireReleaseMask);
- break;
-
- case nir_intrinsic_memory_barrier_image:
- spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeDevice,
- SpvMemorySemanticsImageMemoryMask |
- SpvMemorySemanticsAcquireReleaseMask);
- break;
-
- case nir_intrinsic_group_memory_barrier:
- spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup,
- SpvMemorySemanticsWorkgroupMemoryMask |
- SpvMemorySemanticsAcquireReleaseMask);
- break;
-
- case nir_intrinsic_memory_barrier_shared:
- spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup,
- SpvMemorySemanticsWorkgroupMemoryMask |
- SpvMemorySemanticsAcquireReleaseMask);
- break;
-
- case nir_intrinsic_control_barrier:
- spirv_builder_emit_control_barrier(&ctx->builder, SpvScopeWorkgroup,
- SpvScopeWorkgroup,
- SpvMemorySemanticsWorkgroupMemoryMask | SpvMemorySemanticsAcquireMask);
+ case nir_intrinsic_barrier:
+ emit_barrier(ctx, intr);
break;
case nir_intrinsic_interp_deref_at_centroid:
@@ -2842,38 +3347,21 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
emit_interpolate(ctx, intr);
break;
- case nir_intrinsic_memory_barrier_buffer:
- spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeDevice,
- SpvMemorySemanticsUniformMemoryMask |
- SpvMemorySemanticsAcquireReleaseMask);
- break;
-
- case nir_intrinsic_ssbo_atomic_add:
- case nir_intrinsic_ssbo_atomic_umin:
- case nir_intrinsic_ssbo_atomic_imin:
- case nir_intrinsic_ssbo_atomic_umax:
- case nir_intrinsic_ssbo_atomic_imax:
- case nir_intrinsic_ssbo_atomic_and:
- case nir_intrinsic_ssbo_atomic_or:
- case nir_intrinsic_ssbo_atomic_xor:
- case nir_intrinsic_ssbo_atomic_exchange:
- case nir_intrinsic_ssbo_atomic_comp_swap:
- emit_ssbo_atomic_intrinsic(ctx, intr);
- break;
-
- case nir_intrinsic_shared_atomic_add:
- case nir_intrinsic_shared_atomic_umin:
- case nir_intrinsic_shared_atomic_imin:
- case nir_intrinsic_shared_atomic_umax:
- case nir_intrinsic_shared_atomic_imax:
- case nir_intrinsic_shared_atomic_and:
- case nir_intrinsic_shared_atomic_or:
- case nir_intrinsic_shared_atomic_xor:
- case nir_intrinsic_shared_atomic_exchange:
- case nir_intrinsic_shared_atomic_comp_swap:
+ case nir_intrinsic_deref_atomic:
+ case nir_intrinsic_deref_atomic_swap:
+ emit_deref_atomic_intrinsic(ctx, intr);
+ break;
+
+ case nir_intrinsic_shared_atomic:
+ case nir_intrinsic_shared_atomic_swap:
emit_shared_atomic_intrinsic(ctx, intr);
break;
+ case nir_intrinsic_global_atomic:
+ case nir_intrinsic_global_atomic_swap:
+ emit_global_atomic_intrinsic(ctx, intr);
+ break;
+
case nir_intrinsic_begin_invocation_interlock:
case nir_intrinsic_end_invocation_interlock:
spirv_builder_emit_interlock(&ctx->builder, intr->intrinsic == nir_intrinsic_end_invocation_interlock);
@@ -2887,6 +3375,7 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
emit_image_deref_store(ctx, intr);
break;
+ case nir_intrinsic_image_deref_sparse_load:
case nir_intrinsic_image_deref_load:
emit_image_deref_load(ctx, intr);
break;
@@ -2899,16 +3388,8 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
emit_image_deref_samples(ctx, intr);
break;
- case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_umin:
- case nir_intrinsic_image_deref_atomic_imin:
- case nir_intrinsic_image_deref_atomic_umax:
- case nir_intrinsic_image_deref_atomic_imax:
- case nir_intrinsic_image_deref_atomic_and:
- case nir_intrinsic_image_deref_atomic_or:
- case nir_intrinsic_image_deref_atomic_xor:
- case nir_intrinsic_image_deref_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_atomic:
+ case nir_intrinsic_image_deref_atomic_swap:
emit_image_intrinsic(ctx, intr);
break;
@@ -2959,7 +3440,7 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
case nir_intrinsic_load_workgroup_size:
assert(ctx->local_group_size_var);
- store_dest(ctx, &intr->dest, ctx->local_group_size_var, nir_type_uint);
+ store_def(ctx, intr->def.index, ctx->local_group_size_var, nir_type_uint);
break;
case nir_intrinsic_load_shared:
@@ -2970,6 +3451,14 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
emit_store_shared(ctx, intr);
break;
+ case nir_intrinsic_load_scratch:
+ emit_load_scratch(ctx, intr);
+ break;
+
+ case nir_intrinsic_store_scratch:
+ emit_store_scratch(ctx, intr);
+ break;
+
case nir_intrinsic_shader_clock:
emit_shader_clock(ctx, intr);
break;
@@ -2981,6 +3470,14 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
emit_vote(ctx, intr);
break;
+ case nir_intrinsic_is_sparse_resident_zink:
+ emit_is_sparse_texels_resident(ctx, intr);
+ break;
+
+ case nir_intrinsic_is_helper_invocation:
+ emit_is_helper_invocation(ctx, intr);
+ break;
+
default:
fprintf(stderr, "emit_intrinsic: not implemented (%s)\n",
nir_intrinsic_infos[intr->intrinsic].name);
@@ -2989,20 +3486,24 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
}
static void
-emit_undef(struct ntv_context *ctx, nir_ssa_undef_instr *undef)
+emit_undef(struct ntv_context *ctx, nir_undef_instr *undef)
{
SpvId type = undef->def.bit_size == 1 ? get_bvec_type(ctx, undef->def.num_components) :
get_uvec_type(ctx, undef->def.bit_size,
undef->def.num_components);
- store_ssa_def(ctx, &undef->def,
- spirv_builder_emit_undef(&ctx->builder, type));
+ store_def(ctx, undef->def.index,
+ spirv_builder_emit_undef(&ctx->builder, type),
+ undef->def.bit_size == 1 ? nir_type_bool : nir_type_uint);
}
static SpvId
get_src_float(struct ntv_context *ctx, nir_src *src)
{
- SpvId def = get_src(ctx, src);
+ nir_alu_type atype;
+ SpvId def = get_src(ctx, src, &atype);
+ if (atype == nir_type_float)
+ return def;
unsigned num_components = nir_src_num_components(*src);
unsigned bit_size = nir_src_bit_size(*src);
return bitcast_to_fvec(ctx, def, bit_size, num_components);
@@ -3011,7 +3512,10 @@ get_src_float(struct ntv_context *ctx, nir_src *src)
static SpvId
get_src_int(struct ntv_context *ctx, nir_src *src)
{
- SpvId def = get_src(ctx, src);
+ nir_alu_type atype;
+ SpvId def = get_src(ctx, src, &atype);
+ if (atype == nir_type_int)
+ return def;
unsigned num_components = nir_src_num_components(*src);
unsigned bit_size = nir_src_bit_size(*src);
return bitcast_to_ivec(ctx, def, bit_size, num_components);
@@ -3027,44 +3531,37 @@ tex_instr_is_lod_allowed(nir_tex_instr *tex)
return (tex->sampler_dim == GLSL_SAMPLER_DIM_1D ||
tex->sampler_dim == GLSL_SAMPLER_DIM_2D ||
tex->sampler_dim == GLSL_SAMPLER_DIM_3D ||
- tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
+ tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE ||
+ /* External images are interpreted as 2D in type_to_dim,
+ * so LOD is allowed */
+ tex->sampler_dim == GLSL_SAMPLER_DIM_EXTERNAL ||
+ /* RECT will always become 2D, so this is fine */
+ tex->sampler_dim == GLSL_SAMPLER_DIM_RECT);
}
static void
-emit_tex(struct ntv_context *ctx, nir_tex_instr *tex)
+get_tex_srcs(struct ntv_context *ctx, nir_tex_instr *tex,
+ nir_variable **bindless_var, unsigned *coord_components,
+ struct spriv_tex_src *tex_src)
{
- assert(tex->op == nir_texop_tex ||
- tex->op == nir_texop_txb ||
- tex->op == nir_texop_txl ||
- tex->op == nir_texop_txd ||
- tex->op == nir_texop_txf ||
- tex->op == nir_texop_txf_ms ||
- tex->op == nir_texop_txs ||
- tex->op == nir_texop_lod ||
- tex->op == nir_texop_tg4 ||
- tex->op == nir_texop_texture_samples ||
- tex->op == nir_texop_query_levels);
- assert(tex->texture_index == tex->sampler_index);
-
- SpvId coord = 0, proj = 0, bias = 0, lod = 0, dref = 0, dx = 0, dy = 0,
- const_offset = 0, offset = 0, sample = 0, tex_offset = 0;
- unsigned coord_components = 0;
+ tex_src->sparse = tex->is_sparse;
+ nir_alu_type atype;
for (unsigned i = 0; i < tex->num_srcs; i++) {
nir_const_value *cv;
switch (tex->src[i].src_type) {
case nir_tex_src_coord:
if (tex->op == nir_texop_txf ||
tex->op == nir_texop_txf_ms)
- coord = get_src_int(ctx, &tex->src[i].src);
+ tex_src->coord = get_src_int(ctx, &tex->src[i].src);
else
- coord = get_src_float(ctx, &tex->src[i].src);
- coord_components = nir_src_num_components(tex->src[i].src);
+ tex_src->coord = get_src_float(ctx, &tex->src[i].src);
+ *coord_components = nir_src_num_components(tex->src[i].src);
break;
case nir_tex_src_projector:
assert(nir_src_num_components(tex->src[i].src) == 1);
- proj = get_src_float(ctx, &tex->src[i].src);
- assert(proj != 0);
+ tex_src->proj = get_src_float(ctx, &tex->src[i].src);
+ assert(tex_src->proj != 0);
break;
case nir_tex_src_offset:
@@ -3074,27 +3571,33 @@ emit_tex(struct ntv_context *ctx, nir_tex_instr *tex)
unsigned num_components = nir_src_num_components(tex->src[i].src);
SpvId components[NIR_MAX_VEC_COMPONENTS];
- for (int i = 0; i < num_components; ++i) {
- int64_t tmp = nir_const_value_as_int(cv[i], bit_size);
- components[i] = emit_int_const(ctx, bit_size, tmp);
+ for (int j = 0; j < num_components; ++j) {
+ int64_t tmp = nir_const_value_as_int(cv[j], bit_size);
+ components[j] = emit_int_const(ctx, bit_size, tmp);
}
if (num_components > 1) {
SpvId type = get_ivec_type(ctx, bit_size, num_components);
- const_offset = spirv_builder_const_composite(&ctx->builder,
- type,
- components,
- num_components);
+ tex_src->const_offset = spirv_builder_const_composite(&ctx->builder,
+ type,
+ components,
+ num_components);
} else
- const_offset = components[0];
+ tex_src->const_offset = components[0];
} else
- offset = get_src_int(ctx, &tex->src[i].src);
+ tex_src->offset = get_src_int(ctx, &tex->src[i].src);
break;
case nir_tex_src_bias:
assert(tex->op == nir_texop_txb);
- bias = get_src_float(ctx, &tex->src[i].src);
- assert(bias != 0);
+ tex_src->bias = get_src_float(ctx, &tex->src[i].src);
+ assert(tex_src->bias != 0);
+ break;
+
+ case nir_tex_src_min_lod:
+ assert(nir_src_num_components(tex->src[i].src) == 1);
+ tex_src->min_lod = get_src_float(ctx, &tex->src[i].src);
+ assert(tex_src->min_lod != 0);
break;
case nir_tex_src_lod:
@@ -3102,201 +3605,320 @@ emit_tex(struct ntv_context *ctx, nir_tex_instr *tex)
if (tex->op == nir_texop_txf ||
tex->op == nir_texop_txf_ms ||
tex->op == nir_texop_txs)
- lod = get_src_int(ctx, &tex->src[i].src);
+ tex_src->lod = get_src_int(ctx, &tex->src[i].src);
else
- lod = get_src_float(ctx, &tex->src[i].src);
- assert(lod != 0);
+ tex_src->lod = get_src_float(ctx, &tex->src[i].src);
+ assert(tex_src->lod != 0);
break;
case nir_tex_src_ms_index:
assert(nir_src_num_components(tex->src[i].src) == 1);
- sample = get_src_int(ctx, &tex->src[i].src);
+ tex_src->sample = get_src_int(ctx, &tex->src[i].src);
break;
case nir_tex_src_comparator:
assert(nir_src_num_components(tex->src[i].src) == 1);
- dref = get_src_float(ctx, &tex->src[i].src);
- assert(dref != 0);
+ tex_src->dref = get_src_float(ctx, &tex->src[i].src);
+ assert(tex_src->dref != 0);
break;
case nir_tex_src_ddx:
- dx = get_src_float(ctx, &tex->src[i].src);
- assert(dx != 0);
+ tex_src->dx = get_src_float(ctx, &tex->src[i].src);
+ assert(tex_src->dx != 0);
break;
case nir_tex_src_ddy:
- dy = get_src_float(ctx, &tex->src[i].src);
- assert(dy != 0);
+ tex_src->dy = get_src_float(ctx, &tex->src[i].src);
+ assert(tex_src->dy != 0);
break;
case nir_tex_src_texture_offset:
- tex_offset = get_src_int(ctx, &tex->src[i].src);
+ tex_src->tex_offset = get_src_int(ctx, &tex->src[i].src);
break;
case nir_tex_src_sampler_offset:
+ case nir_tex_src_sampler_handle:
/* don't care */
break;
+ case nir_tex_src_texture_handle:
+ tex_src->bindless = get_src(ctx, &tex->src[i].src, &atype);
+ *bindless_var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[i].src));
+ break;
+
default:
fprintf(stderr, "texture source: %d\n", tex->src[i].src_type);
unreachable("unknown texture source");
}
}
+}
- unsigned texture_index = tex->texture_index;
- if (!tex_offset) {
- /* convert constant index back to base + offset */
- unsigned last_sampler = util_last_bit(ctx->samplers_used);
- for (unsigned i = 0; i < last_sampler; i++) {
- if (!ctx->sampler_array_sizes[i]) {
- if (i == texture_index)
- /* this is a non-array sampler, so we don't need an access chain */
+static void
+find_sampler_and_texture_index(struct ntv_context *ctx, struct spriv_tex_src *tex_src,
+ nir_variable *bindless_var,
+ nir_variable **var, uint32_t *texture_index)
+{
+ *var = bindless_var ? bindless_var : ctx->sampler_var[*texture_index];
+ nir_variable **sampler_var = tex_src->bindless ? ctx->bindless_sampler_var : ctx->sampler_var;
+ if (!bindless_var && (!tex_src->tex_offset || !var)) {
+ if (sampler_var[*texture_index]) {
+ if (glsl_type_is_array(sampler_var[*texture_index]->type))
+ tex_src->tex_offset = emit_uint_const(ctx, 32, 0);
+ } else {
+ /* convert constant index back to base + offset */
+ for (int i = *texture_index; i >= 0; i--) {
+ if (sampler_var[i]) {
+ assert(glsl_type_is_array(sampler_var[i]->type));
+ if (!tex_src->tex_offset)
+ tex_src->tex_offset = emit_uint_const(ctx, 32, *texture_index - i);
+ *var = sampler_var[i];
+ *texture_index = i;
break;
- } else if (texture_index <= i + ctx->sampler_array_sizes[i] - 1) {
- /* this is the first member of a sampler array */
- tex_offset = emit_uint_const(ctx, 32, texture_index - i);
- texture_index = i;
- break;
+ }
}
}
}
- SpvId image_type = ctx->sampler_types[texture_index];
- assert(image_type);
- SpvId sampled_type = spirv_builder_type_sampled_image(&ctx->builder,
- image_type);
- assert(sampled_type);
- assert(ctx->samplers_used & (1u << texture_index));
- SpvId sampler_id = ctx->samplers[texture_index];
- if (tex_offset) {
- SpvId ptr = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassUniformConstant, sampled_type);
- sampler_id = spirv_builder_emit_access_chain(&ctx->builder, ptr, sampler_id, &tex_offset, 1);
- }
- SpvId load = spirv_builder_emit_load(&ctx->builder, sampled_type, sampler_id);
-
- SpvId dest_type = get_dest_type(ctx, &tex->dest, tex->dest_type);
+}
- if (!tex_instr_is_lod_allowed(tex))
- lod = 0;
- if (tex->op == nir_texop_txs) {
- SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load);
- SpvId result = spirv_builder_emit_image_query_size(&ctx->builder,
- dest_type, image,
- lod);
- store_dest(ctx, &tex->dest, result, tex->dest_type);
- return;
- }
- if (tex->op == nir_texop_query_levels) {
- SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load);
- SpvId result = spirv_builder_emit_image_query_levels(&ctx->builder,
- dest_type, image);
- store_dest(ctx, &tex->dest, result, tex->dest_type);
- return;
- }
- if (tex->op == nir_texop_texture_samples) {
- SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load);
- SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples,
- dest_type, image);
- store_dest(ctx, &tex->dest, result, tex->dest_type);
- return;
+static SpvId
+get_texture_load(struct ntv_context *ctx, SpvId sampler_id, nir_tex_instr *tex,
+ SpvId image_type, SpvId sampled_type)
+{
+ if (ctx->stage == MESA_SHADER_KERNEL) {
+ SpvId image_load = spirv_builder_emit_load(&ctx->builder, image_type, sampler_id);
+ if (nir_tex_instr_need_sampler(tex)) {
+ SpvId sampler_load = spirv_builder_emit_load(&ctx->builder, spirv_builder_type_sampler(&ctx->builder),
+ ctx->cl_samplers[tex->sampler_index]);
+ return spirv_builder_emit_sampled_image(&ctx->builder, sampled_type, image_load, sampler_load);
+ } else {
+ return image_load;
+ }
+ } else {
+ return spirv_builder_emit_load(&ctx->builder, sampled_type, sampler_id);
}
+}
- if (proj && coord_components > 0) {
- SpvId constituents[NIR_MAX_VEC_COMPONENTS + 1];
- if (coord_components == 1)
- constituents[0] = coord;
- else {
- assert(coord_components > 1);
- SpvId float_type = spirv_builder_type_float(&ctx->builder, 32);
- for (uint32_t i = 0; i < coord_components; ++i)
- constituents[i] = spirv_builder_emit_composite_extract(&ctx->builder,
- float_type,
- coord,
- &i, 1);
- }
+static SpvId
+get_texop_dest_type(struct ntv_context *ctx, const nir_tex_instr *tex)
+{
+ SpvId actual_dest_type;
+ unsigned num_components = tex->def.num_components;
+ switch (nir_alu_type_get_base_type(tex->dest_type)) {
+ case nir_type_int:
+ actual_dest_type = get_ivec_type(ctx, 32, num_components);
+ break;
- constituents[coord_components++] = proj;
+ case nir_type_uint:
+ actual_dest_type = get_uvec_type(ctx, 32, num_components);
+ break;
- SpvId vec_type = get_fvec_type(ctx, 32, coord_components);
- coord = spirv_builder_emit_composite_construct(&ctx->builder,
- vec_type,
- constituents,
- coord_components);
- }
- if (tex->op == nir_texop_lod) {
- SpvId result = spirv_builder_emit_image_query_lod(&ctx->builder,
- dest_type, load,
- coord);
- store_dest(ctx, &tex->dest, result, tex->dest_type);
- return;
+ case nir_type_float:
+ actual_dest_type = get_fvec_type(ctx, 32, num_components);
+ break;
+
+ default:
+ unreachable("unexpected nir_alu_type");
}
- SpvId actual_dest_type;
- if (dref)
- actual_dest_type =
- spirv_builder_type_float(&ctx->builder,
- nir_dest_bit_size(tex->dest));
+
+ return actual_dest_type;
+}
+
+static void
+move_tex_proj_to_coord(struct ntv_context *ctx, unsigned coord_components, struct spriv_tex_src *tex_src)
+{
+ SpvId constituents[NIR_MAX_VEC_COMPONENTS + 1];
+ if (coord_components == 1)
+ constituents[0] = tex_src->coord;
else {
- unsigned num_components = nir_dest_num_components(tex->dest);
- switch (nir_alu_type_get_base_type(tex->dest_type)) {
- case nir_type_int:
- actual_dest_type = get_ivec_type(ctx, 32, num_components);
- break;
+ assert(coord_components > 1);
+ SpvId float_type = spirv_builder_type_float(&ctx->builder, 32);
+ for (uint32_t i = 0; i < coord_components; ++i)
+ constituents[i] = spirv_builder_emit_composite_extract(&ctx->builder,
+ float_type,
+ tex_src->coord,
+ &i, 1);
+ }
- case nir_type_uint:
- actual_dest_type = get_uvec_type(ctx, 32, num_components);
- break;
+ constituents[coord_components++] = tex_src->proj;
- case nir_type_float:
- actual_dest_type = get_fvec_type(ctx, 32, num_components);
- break;
+ SpvId vec_type = get_fvec_type(ctx, 32, coord_components);
+ tex_src->coord = spirv_builder_emit_composite_construct(&ctx->builder,
+ vec_type,
+ constituents,
+ coord_components);
+}
- default:
- unreachable("unexpected nir_alu_type");
- }
- }
+static SpvId
+get_tex_image_to_load( struct ntv_context *ctx, SpvId image_type, bool is_buffer, SpvId load)
+{
+ return is_buffer || ctx->stage == MESA_SHADER_KERNEL ?
+ load :
+ spirv_builder_emit_image(&ctx->builder, image_type, load);
+}
+
+static SpvId
+emit_tex_readop(struct ntv_context *ctx, nir_variable *bindless_var, SpvId load,
+ struct spriv_tex_src *tex_src, SpvId dest_type, bool is_buffer,
+ nir_variable *var, SpvId image_type, nir_tex_instr *tex)
+{
+ SpvId actual_dest_type = get_texop_dest_type(ctx, tex);
SpvId result;
- if (offset)
+ if (tex_src->offset)
spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageGatherExtended);
+ if (tex_src->min_lod)
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityMinLod);
if (tex->op == nir_texop_txf ||
tex->op == nir_texop_txf_ms ||
tex->op == nir_texop_tg4) {
- SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load);
+ SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load);
if (tex->op == nir_texop_tg4) {
- if (const_offset)
+ if (tex_src->const_offset)
spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageGatherExtended);
- result = spirv_builder_emit_image_gather(&ctx->builder, dest_type,
- load, coord, emit_uint_const(ctx, 32, tex->component),
- lod, sample, const_offset, offset, dref);
- } else
+ result = spirv_builder_emit_image_gather(&ctx->builder, actual_dest_type,
+ load, tex_src, emit_uint_const(ctx, 32, tex->component));
+ actual_dest_type = dest_type;
+ } else {
+ assert(tex->op == nir_texop_txf_ms || !tex_src->sample);
+ bool is_ms;
+ type_to_dim(glsl_get_sampler_dim(glsl_without_array(var->type)), &is_ms);
+ assert(is_ms || !tex_src->sample);
result = spirv_builder_emit_image_fetch(&ctx->builder, actual_dest_type,
- image, coord, lod, sample, const_offset, offset);
+ image, tex_src);
+ }
} else {
+ if (tex->op == nir_texop_txl)
+ tex_src->min_lod = 0;
result = spirv_builder_emit_image_sample(&ctx->builder,
actual_dest_type, load,
- coord,
- proj != 0,
- lod, bias, dref, dx, dy,
- const_offset, offset);
+ tex_src);
}
- spirv_builder_emit_decoration(&ctx->builder, result,
- SpvDecorationRelaxedPrecision);
-
- if (dref && nir_dest_num_components(tex->dest) > 1 && tex->op != nir_texop_tg4) {
- SpvId components[4] = { result, result, result, result };
- result = spirv_builder_emit_composite_construct(&ctx->builder,
- dest_type,
- components,
- 4);
+ if (!bindless_var && (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW)) {
+ spirv_builder_emit_decoration(&ctx->builder, result,
+ SpvDecorationRelaxedPrecision);
}
- if (nir_dest_bit_size(tex->dest) != 32) {
+ if (tex->is_sparse)
+ result = extract_sparse_load(ctx, result, actual_dest_type, &tex->def);
+
+ if (tex->def.bit_size != 32) {
/* convert FP32 to FP16 */
result = emit_unop(ctx, SpvOpFConvert, dest_type, result);
}
- store_dest(ctx, &tex->dest, result, tex->dest_type);
+ return result;
+}
+
+static void
+emit_tex(struct ntv_context *ctx, nir_tex_instr *tex)
+{
+ assert(tex->op == nir_texop_tex ||
+ tex->op == nir_texop_txb ||
+ tex->op == nir_texop_txl ||
+ tex->op == nir_texop_txd ||
+ tex->op == nir_texop_txf ||
+ tex->op == nir_texop_txf_ms ||
+ tex->op == nir_texop_txs ||
+ tex->op == nir_texop_lod ||
+ tex->op == nir_texop_tg4 ||
+ tex->op == nir_texop_texture_samples ||
+ tex->op == nir_texop_query_levels);
+ assert(tex->texture_index == tex->sampler_index || ctx->stage == MESA_SHADER_KERNEL);
+
+ struct spriv_tex_src tex_src = {0};
+ unsigned coord_components = 0;
+ nir_variable *bindless_var = NULL;
+ nir_variable *var = NULL;
+ uint32_t texture_index = tex->texture_index;
+
+ get_tex_srcs(ctx, tex, &bindless_var, &coord_components, &tex_src);
+ find_sampler_and_texture_index(ctx, &tex_src, bindless_var, &var, &texture_index);
+
+ assert(var);
+ SpvId image_type = find_image_type(ctx, var);
+ assert(image_type);
+
+ bool is_buffer = glsl_get_sampler_dim(glsl_without_array(var->type)) ==
+ GLSL_SAMPLER_DIM_BUF;
+ SpvId sampled_type = is_buffer ? image_type :
+ spirv_builder_type_sampled_image(&ctx->builder, image_type);
+ assert(sampled_type);
+
+ SpvId sampler_id = tex_src.bindless ? tex_src.bindless : ctx->samplers[texture_index];
+ if (tex_src.tex_offset) {
+ SpvId ptr = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassUniformConstant, sampled_type);
+ sampler_id = spirv_builder_emit_access_chain(&ctx->builder, ptr, sampler_id, &tex_src.tex_offset, 1);
+ }
+
+ SpvId load = get_texture_load(ctx, sampler_id, tex, image_type, sampled_type);
+
+ if (tex->is_sparse)
+ tex->def.num_components--;
+ SpvId dest_type = get_def_type(ctx, &tex->def, tex->dest_type);
+
+ if (nir_tex_instr_is_query(tex))
+ spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageQuery);
+
+ if (!tex_instr_is_lod_allowed(tex))
+ tex_src.lod = 0;
+ else if (ctx->stage != MESA_SHADER_FRAGMENT &&
+ tex->op == nir_texop_tex && ctx->explicit_lod && !tex_src.lod)
+ tex_src.lod = emit_float_const(ctx, 32, 0.0);
+
+ if (tex_src.proj && coord_components > 0)
+ move_tex_proj_to_coord(ctx, coord_components, &tex_src);
+
+ SpvId result = 0;
+
+ switch (tex->op) {
+ case nir_texop_txs: {
+ SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load);
+ /* Its Dim operand must be one of 1D, 2D, 3D, or Cube
+ * - OpImageQuerySizeLod specification
+ *
+ * Additionally, if its Dim is 1D, 2D, 3D, or Cube,
+ * it must also have either an MS of 1 or a Sampled of 0 or 2.
+ * - OpImageQuerySize specification
+ *
+ * all spirv samplers use these types
+ */
+ if (!tex_src.lod && tex_instr_is_lod_allowed(tex))
+ tex_src.lod = emit_uint_const(ctx, 32, 0);
+ result = spirv_builder_emit_image_query_size(&ctx->builder,
+ dest_type, image,
+ tex_src.lod);
+ break;
+ }
+ case nir_texop_query_levels: {
+ SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load);
+ result = spirv_builder_emit_image_query_levels(&ctx->builder,
+ dest_type, image);
+ break;
+ }
+ case nir_texop_texture_samples: {
+ SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load);
+ result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples,
+ dest_type, image);
+ break;
+ }
+ case nir_texop_lod: {
+ result = spirv_builder_emit_image_query_lod(&ctx->builder,
+ dest_type, load,
+ tex_src.coord);
+ break;
+ }
+ default:
+ result = emit_tex_readop(ctx, bindless_var, load, &tex_src,
+ dest_type, is_buffer, var, image_type, tex);
+ break;
+ }
+
+ store_def(ctx, tex->def.index, result, tex->dest_type);
+
+ if (tex->is_sparse)
+ tex->def.num_components++;
}
static void
@@ -3356,7 +3978,7 @@ emit_deref_var(struct ntv_context *ctx, nir_deref_instr *deref)
struct hash_entry *he = _mesa_hash_table_search(ctx->vars, deref->var);
assert(he);
SpvId result = (SpvId)(intptr_t)he->data;
- store_dest_raw(ctx, &deref->dest, result);
+ store_def(ctx, deref->def.index, result, get_nir_alu_type(deref->type));
}
static void
@@ -3365,21 +3987,63 @@ emit_deref_array(struct ntv_context *ctx, nir_deref_instr *deref)
assert(deref->deref_type == nir_deref_type_array);
nir_variable *var = nir_deref_instr_get_variable(deref);
+ if (!nir_src_is_always_uniform(deref->arr.index)) {
+ if (deref->modes & nir_var_mem_ubo)
+ spirv_builder_emit_cap(&ctx->builder,
+ SpvCapabilityUniformBufferArrayDynamicIndexing);
+
+ if (deref->modes & nir_var_mem_ssbo)
+ spirv_builder_emit_cap(&ctx->builder,
+ SpvCapabilityStorageBufferArrayDynamicIndexing);
+
+ if (deref->modes & (nir_var_uniform | nir_var_image)) {
+ const struct glsl_type *type = glsl_without_array(var->type);
+ assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
+
+ if (glsl_type_is_sampler(type))
+ spirv_builder_emit_cap(&ctx->builder,
+ SpvCapabilitySampledImageArrayDynamicIndexing);
+ else
+ spirv_builder_emit_cap(&ctx->builder,
+ SpvCapabilityStorageImageArrayDynamicIndexing);
+ }
+ }
+
SpvStorageClass storage_class = get_storage_class(var);
- SpvId base, type;
+ SpvId type;
+ nir_alu_type atype = nir_type_uint;
+
+ SpvId base = get_src(ctx, &deref->parent, &atype);
+
switch (var->data.mode) {
+
+ case nir_var_mem_ubo:
+ case nir_var_mem_ssbo:
+ base = get_src(ctx, &deref->parent, &atype);
+ /* this is either the array<buffers> deref or the array<uint> deref */
+ if (glsl_type_is_struct_or_ifc(deref->type)) {
+ /* array<buffers> */
+ type = get_bo_struct_type(ctx, var);
+ break;
+ }
+ /* array<uint> */
+ FALLTHROUGH;
+ case nir_var_function_temp:
case nir_var_shader_in:
case nir_var_shader_out:
- base = get_src(ctx, &deref->parent);
+ base = get_src(ctx, &deref->parent, &atype);
type = get_glsl_type(ctx, deref->type);
break;
- case nir_var_uniform: {
- assert(glsl_type_is_image(glsl_without_array(var->type)));
+ case nir_var_uniform:
+ case nir_var_image: {
struct hash_entry *he = _mesa_hash_table_search(ctx->vars, var);
assert(he);
base = (SpvId)(intptr_t)he->data;
- type = ctx->image_types[var->data.driver_location];
+ const struct glsl_type *gtype = glsl_without_array(var->type);
+ type = get_image_type(ctx, var,
+ glsl_type_is_sampler(gtype),
+ glsl_get_sampler_dim(gtype) == GLSL_SAMPLER_DIM_BUF);
break;
}
@@ -3387,7 +4051,30 @@ emit_deref_array(struct ntv_context *ctx, nir_deref_instr *deref)
unreachable("Unsupported nir_variable_mode\n");
}
- SpvId index = get_src(ctx, &deref->arr.index);
+ nir_alu_type itype;
+ SpvId index = get_src(ctx, &deref->arr.index, &itype);
+ if (itype == nir_type_float)
+ index = emit_bitcast(ctx, get_uvec_type(ctx, 32, 1), index);
+
+ if (var->data.mode == nir_var_uniform || var->data.mode == nir_var_image) {
+ nir_deref_instr *aoa_deref = nir_src_as_deref(deref->parent);
+ uint32_t inner_stride = glsl_array_size(aoa_deref->type);
+
+ while (aoa_deref->deref_type != nir_deref_type_var) {
+ assert(aoa_deref->deref_type == nir_deref_type_array);
+
+ SpvId aoa_index = get_src(ctx, &aoa_deref->arr.index, &itype);
+ if (itype == nir_type_float)
+ aoa_index = emit_bitcast(ctx, get_uvec_type(ctx, 32, 1), aoa_index);
+
+ aoa_deref = nir_src_as_deref(aoa_deref->parent);
+
+ uint32_t stride = glsl_get_aoa_size(aoa_deref->type) / inner_stride;
+ aoa_index = emit_binop(ctx, SpvOpIMul, get_uvec_type(ctx, 32, 1), aoa_index,
+ emit_uint_const(ctx, 32, stride));
+ index = emit_binop(ctx, SpvOpIAdd, get_uvec_type(ctx, 32, 1), index, aoa_index);
+ }
+ }
SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
storage_class,
@@ -3398,14 +4085,7 @@ emit_deref_array(struct ntv_context *ctx, nir_deref_instr *deref)
base,
&index, 1);
/* uint is a bit of a lie here, it's really just an opaque type */
- store_dest(ctx, &deref->dest, result, nir_type_uint);
-
- /* image ops always need to be able to get the variable to check out sampler types and such */
- if (glsl_type_is_image(glsl_without_array(var->type))) {
- uint32_t *key = ralloc_size(ctx->mem_ctx, sizeof(uint32_t));
- *key = result;
- _mesa_hash_table_insert(ctx->image_vars, key, var);
- }
+ store_def(ctx, deref->def.index, result, get_nir_alu_type(deref->type));
}
static void
@@ -3417,17 +4097,21 @@ emit_deref_struct(struct ntv_context *ctx, nir_deref_instr *deref)
SpvStorageClass storage_class = get_storage_class(var);
SpvId index = emit_uint_const(ctx, 32, deref->strct.index);
+ SpvId type = (var->data.mode & (nir_var_mem_ubo | nir_var_mem_ssbo)) ?
+ get_bo_array_type(ctx, var) :
+ get_glsl_type(ctx, deref->type);
SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
storage_class,
- get_glsl_type(ctx, deref->type));
+ type);
+ nir_alu_type atype;
SpvId result = spirv_builder_emit_access_chain(&ctx->builder,
ptr_type,
- get_src(ctx, &deref->parent),
+ get_src(ctx, &deref->parent, &atype),
&index, 1);
/* uint is a bit of a lie here, it's really just an opaque type */
- store_dest(ctx, &deref->dest, result, nir_type_uint);
+ store_def(ctx, deref->def.index, result, get_nir_alu_type(deref->type));
}
static void
@@ -3466,8 +4150,8 @@ emit_block(struct ntv_context *ctx, struct nir_block *block)
case nir_instr_type_load_const:
emit_load_const(ctx, nir_instr_as_load_const(instr));
break;
- case nir_instr_type_ssa_undef:
- emit_undef(ctx, nir_instr_as_ssa_undef(instr));
+ case nir_instr_type_undef:
+ emit_undef(ctx, nir_instr_as_undef(instr));
break;
case nir_instr_type_tex:
emit_tex(ctx, nir_instr_as_tex(instr));
@@ -3498,7 +4182,8 @@ static SpvId
get_src_bool(struct ntv_context *ctx, nir_src *src)
{
assert(nir_src_bit_size(*src) == 1);
- return get_src(ctx, src);
+ nir_alu_type atype;
+ return get_src(ctx, src, &atype);
}
static void
@@ -3538,6 +4223,7 @@ emit_if(struct ntv_context *ctx, nir_if *if_stmt)
static void
emit_loop(struct ntv_context *ctx, nir_loop *loop)
{
+ assert(!nir_loop_has_continue_construct(loop));
SpvId header_id = spirv_builder_new_id(&ctx->builder);
SpvId begin_id = block_label(ctx, nir_loop_first_block(loop));
SpvId break_id = spirv_builder_new_id(&ctx->builder);
@@ -3592,35 +4278,33 @@ emit_cf_list(struct ntv_context *ctx, struct exec_list *list)
}
static SpvExecutionMode
-get_input_prim_type_mode(uint16_t type)
+get_input_prim_type_mode(enum mesa_prim type)
{
switch (type) {
- case GL_POINTS:
+ case MESA_PRIM_POINTS:
return SpvExecutionModeInputPoints;
- case GL_LINES:
- case GL_LINE_LOOP:
- case GL_LINE_STRIP:
+ case MESA_PRIM_LINES:
+ case MESA_PRIM_LINE_LOOP:
+ case MESA_PRIM_LINE_STRIP:
return SpvExecutionModeInputLines;
- case GL_TRIANGLE_STRIP:
- case GL_TRIANGLES:
- case GL_TRIANGLE_FAN:
+ case MESA_PRIM_TRIANGLE_STRIP:
+ case MESA_PRIM_TRIANGLES:
+ case MESA_PRIM_TRIANGLE_FAN:
return SpvExecutionModeTriangles;
- case GL_QUADS:
- case GL_QUAD_STRIP:
+ case MESA_PRIM_QUADS:
+ case MESA_PRIM_QUAD_STRIP:
return SpvExecutionModeQuads;
break;
- case GL_POLYGON:
+ case MESA_PRIM_POLYGON:
unreachable("handle polygons in gs");
break;
- case GL_LINES_ADJACENCY:
- case GL_LINE_STRIP_ADJACENCY:
+ case MESA_PRIM_LINES_ADJACENCY:
+ case MESA_PRIM_LINE_STRIP_ADJACENCY:
return SpvExecutionModeInputLinesAdjacency;
- case GL_TRIANGLES_ADJACENCY:
- case GL_TRIANGLE_STRIP_ADJACENCY:
+ case MESA_PRIM_TRIANGLES_ADJACENCY:
+ case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
return SpvExecutionModeInputTrianglesAdjacency;
break;
- case GL_ISOLINES:
- return SpvExecutionModeIsolines;
default:
debug_printf("unknown geometry shader input mode %u\n", type);
unreachable("error!");
@@ -3630,38 +4314,36 @@ get_input_prim_type_mode(uint16_t type)
return 0;
}
static SpvExecutionMode
-get_output_prim_type_mode(uint16_t type)
+get_output_prim_type_mode(enum mesa_prim type)
{
switch (type) {
- case GL_POINTS:
+ case MESA_PRIM_POINTS:
return SpvExecutionModeOutputPoints;
- case GL_LINES:
- case GL_LINE_LOOP:
- unreachable("GL_LINES/LINE_LOOP passed as gs output");
+ case MESA_PRIM_LINES:
+ case MESA_PRIM_LINE_LOOP:
+ unreachable("MESA_PRIM_LINES/LINE_LOOP passed as gs output");
break;
- case GL_LINE_STRIP:
+ case MESA_PRIM_LINE_STRIP:
return SpvExecutionModeOutputLineStrip;
- case GL_TRIANGLE_STRIP:
+ case MESA_PRIM_TRIANGLE_STRIP:
return SpvExecutionModeOutputTriangleStrip;
- case GL_TRIANGLES:
- case GL_TRIANGLE_FAN: //FIXME: not sure if right for output
+ case MESA_PRIM_TRIANGLES:
+ case MESA_PRIM_TRIANGLE_FAN: //FIXME: not sure if right for output
return SpvExecutionModeTriangles;
- case GL_QUADS:
- case GL_QUAD_STRIP:
+ case MESA_PRIM_QUADS:
+ case MESA_PRIM_QUAD_STRIP:
return SpvExecutionModeQuads;
- case GL_POLYGON:
+ case MESA_PRIM_POLYGON:
unreachable("handle polygons in gs");
break;
- case GL_LINES_ADJACENCY:
- case GL_LINE_STRIP_ADJACENCY:
+ case MESA_PRIM_LINES_ADJACENCY:
+ case MESA_PRIM_LINE_STRIP_ADJACENCY:
unreachable("handle line adjacency in gs");
break;
- case GL_TRIANGLES_ADJACENCY:
- case GL_TRIANGLE_STRIP_ADJACENCY:
+ case MESA_PRIM_TRIANGLES_ADJACENCY:
+ case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
unreachable("handle triangle adjacency in gs");
break;
- case GL_ISOLINES:
- return SpvExecutionModeIsolines;
default:
debug_printf("unknown geometry shader output mode %u\n", type);
unreachable("error!");
@@ -3690,12 +4372,12 @@ get_depth_layout_mode(enum gl_frag_depth_layout depth_layout)
}
static SpvExecutionMode
-get_primitive_mode(uint16_t primitive_mode)
+get_primitive_mode(enum tess_primitive_mode primitive_mode)
{
switch (primitive_mode) {
- case GL_TRIANGLES: return SpvExecutionModeTriangles;
- case GL_QUADS: return SpvExecutionModeQuads;
- case GL_ISOLINES: return SpvExecutionModeIsolines;
+ case TESS_PRIMITIVE_TRIANGLES: return SpvExecutionModeTriangles;
+ case TESS_PRIMITIVE_QUADS: return SpvExecutionModeQuads;
+ case TESS_PRIMITIVE_ISOLINES: return SpvExecutionModeIsolines;
default:
unreachable("unknown tess prim type!");
}
@@ -3717,39 +4399,43 @@ get_spacing(enum gl_tess_spacing spacing)
}
struct spirv_shader *
-nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_version)
+nir_to_spirv(struct nir_shader *s, const struct zink_shader_info *sinfo, uint32_t spirv_version)
{
struct spirv_shader *ret = NULL;
struct ntv_context ctx = {0};
ctx.mem_ctx = ralloc_context(NULL);
+ ctx.nir = s;
ctx.builder.mem_ctx = ctx.mem_ctx;
assert(spirv_version >= SPIRV_VERSION(1, 0));
ctx.spirv_1_4_interfaces = spirv_version >= SPIRV_VERSION(1, 4);
+ ctx.bindless_set_idx = sinfo->bindless_set_idx;
ctx.glsl_types = _mesa_pointer_hash_table_create(ctx.mem_ctx);
- if (!ctx.glsl_types)
+ ctx.bo_array_types = _mesa_pointer_hash_table_create(ctx.mem_ctx);
+ ctx.bo_struct_types = _mesa_pointer_hash_table_create(ctx.mem_ctx);
+ if (!ctx.glsl_types || !ctx.bo_array_types || !ctx.bo_struct_types ||
+ !_mesa_hash_table_init(&ctx.image_types, ctx.mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal))
goto fail;
spirv_builder_emit_cap(&ctx.builder, SpvCapabilityShader);
- if (s->info.image_buffers != 0)
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImageBuffer);
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampledBuffer);
switch (s->info.stage) {
case MESA_SHADER_FRAGMENT:
- if (s->info.fs.post_depth_coverage &&
- BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN))
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampleMaskPostDepthCoverage);
if (s->info.fs.uses_sample_shading)
spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampleRateShading);
+ if (s->info.fs.uses_demote && spirv_version < SPIRV_VERSION(1, 6))
+ spirv_builder_emit_extension(&ctx.builder,
+ "SPV_EXT_demote_to_helper_invocation");
break;
case MESA_SHADER_VERTEX:
if (BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
+ BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_DRAW_ID) ||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) ||
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX)) {
- spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_shader_draw_parameters");
+ if (spirv_version < SPIRV_VERSION(1, 3))
+ spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_shader_draw_parameters");
spirv_builder_emit_cap(&ctx.builder, SpvCapabilityDrawParameters);
}
break;
@@ -3783,9 +4469,16 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t
spirv_builder_emit_cap(&ctx.builder, SpvCapabilityShaderViewportIndexLayerEXT);
}
}
+ } else if (s->info.stage == MESA_SHADER_FRAGMENT) {
+ /* incredibly, this is legal and intended.
+ * https://github.com/KhronosGroup/SPIRV-Registry/issues/95
+ */
+ if (s->info.inputs_read & (BITFIELD64_BIT(VARYING_SLOT_LAYER) |
+ BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID)))
+ spirv_builder_emit_cap(&ctx.builder, SpvCapabilityGeometry);
}
- if (s->info.num_ssbos)
+ if (s->info.num_ssbos && spirv_version < SPIRV_VERSION(1, 1))
spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_storage_buffer_storage_class");
if (s->info.stage < MESA_SHADER_FRAGMENT &&
@@ -3796,46 +4489,40 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t
spirv_builder_emit_cap(&ctx.builder, SpvCapabilityMultiViewport);
}
- if (s->info.num_textures) {
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampled1D);
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImageQuery);
- }
-
- if (s->info.num_images) {
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImage1D);
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImageQuery);
+ if (s->info.stage > MESA_SHADER_VERTEX &&
+ s->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_VIEWPORT)) {
+ if (s->info.stage < MESA_SHADER_GEOMETRY)
+ spirv_builder_emit_cap(&ctx.builder, SpvCapabilityShaderViewportIndex);
+ else
+ spirv_builder_emit_cap(&ctx.builder, SpvCapabilityMultiViewport);
}
- if (s->info.bit_sizes_int & 8)
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilityInt8);
- if (s->info.bit_sizes_int & 16)
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilityInt16);
- if (s->info.bit_sizes_int & 64)
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilityInt64);
-
- if (s->info.bit_sizes_float & 16)
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilityFloat16);
- if (s->info.bit_sizes_float & 64)
- spirv_builder_emit_cap(&ctx.builder, SpvCapabilityFloat64);
-
ctx.stage = s->info.stage;
- ctx.so_info = so_info;
+ ctx.sinfo = sinfo;
ctx.GLSL_std_450 = spirv_builder_import(&ctx.builder, "GLSL.std.450");
+ ctx.explicit_lod = true;
spirv_builder_emit_source(&ctx.builder, SpvSourceLanguageUnknown, 0);
- if (s->info.stage == MESA_SHADER_COMPUTE) {
- SpvAddressingModel model;
+ SpvAddressingModel model = SpvAddressingModelLogical;
+ if (gl_shader_stage_is_compute(s->info.stage)) {
if (s->info.cs.ptr_size == 32)
model = SpvAddressingModelPhysical32;
- else if (s->info.cs.ptr_size == 64)
- model = SpvAddressingModelPhysical64;
- else
+ else if (s->info.cs.ptr_size == 64) {
+ spirv_builder_emit_cap(&ctx.builder, SpvCapabilityPhysicalStorageBufferAddresses);
+ model = SpvAddressingModelPhysicalStorageBuffer64;
+ } else
model = SpvAddressingModelLogical;
+ }
+
+ if (ctx.sinfo->have_vulkan_memory_model) {
+ spirv_builder_emit_cap(&ctx.builder, SpvCapabilityVulkanMemoryModel);
+ spirv_builder_emit_cap(&ctx.builder, SpvCapabilityVulkanMemoryModelDeviceScope);
+ spirv_builder_emit_mem_model(&ctx.builder, model,
+ SpvMemoryModelVulkan);
+ } else {
spirv_builder_emit_mem_model(&ctx.builder, model,
SpvMemoryModelGLSL450);
- } else
- spirv_builder_emit_mem_model(&ctx.builder, SpvAddressingModelLogical,
- SpvMemoryModelGLSL450);
+ }
if (s->info.stage == MESA_SHADER_FRAGMENT &&
s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
@@ -3861,6 +4548,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t
exec_model = SpvExecutionModelFragment;
break;
case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_KERNEL:
exec_model = SpvExecutionModelGLCompute;
break;
default:
@@ -3868,46 +4556,143 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t
}
SpvId type_void = spirv_builder_type_void(&ctx.builder);
- SpvId type_main = spirv_builder_type_function(&ctx.builder, type_void,
- NULL, 0);
+ SpvId type_void_func = spirv_builder_type_function(&ctx.builder, type_void,
+ NULL, 0);
SpvId entry_point = spirv_builder_new_id(&ctx.builder);
spirv_builder_emit_name(&ctx.builder, entry_point, "main");
ctx.vars = _mesa_hash_table_create(ctx.mem_ctx, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- ctx.image_vars = _mesa_hash_table_create(ctx.mem_ctx, _mesa_hash_u32,
- _mesa_key_u32_equal);
-
- ctx.so_outputs = _mesa_hash_table_create(ctx.mem_ctx, _mesa_hash_u32,
- _mesa_key_u32_equal);
-
nir_foreach_variable_with_modes(var, s, nir_var_mem_push_const)
input_var_init(&ctx, var);
nir_foreach_shader_in_variable(var, s)
emit_input(&ctx, var);
- int max_output = -1;
+ int max_output = 0;
nir_foreach_shader_out_variable(var, s) {
/* ignore SPIR-V built-ins, tagged with a sentinel value */
if (var->data.driver_location != UINT_MAX) {
assert(var->data.driver_location < INT_MAX);
- max_output = MAX2(max_output, (int)var->data.driver_location);
+ unsigned extent = glsl_count_attribute_slots(var->type, false);
+ max_output = MAX2(max_output, (int)var->data.driver_location + extent);
}
emit_output(&ctx, var);
}
+ uint32_t tcs_vertices_out_word = 0;
+
+ unsigned ubo_counter[2] = {0};
+ nir_foreach_variable_with_modes(var, s, nir_var_mem_ubo)
+ ubo_counter[var->data.driver_location != 0]++;
+ nir_foreach_variable_with_modes(var, s, nir_var_mem_ubo)
+ emit_bo(&ctx, var, ubo_counter[var->data.driver_location != 0] > 1);
+
+ unsigned ssbo_counter = 0;
+ nir_foreach_variable_with_modes(var, s, nir_var_mem_ssbo)
+ ssbo_counter++;
+ nir_foreach_variable_with_modes(var, s, nir_var_mem_ssbo)
+ emit_bo(&ctx, var, ssbo_counter > 1);
+
+ nir_foreach_variable_with_modes(var, s, nir_var_image)
+ ctx.image_var[var->data.driver_location] = var;
+ nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
+ if (glsl_type_is_sampler(glsl_without_array(var->type))) {
+ if (var->data.descriptor_set == ctx.bindless_set_idx)
+ ctx.bindless_sampler_var[var->data.driver_location] = var;
+ else
+ ctx.sampler_var[var->data.driver_location] = var;
+ ctx.last_sampler = MAX2(ctx.last_sampler, var->data.driver_location);
+ }
+ }
+ if (sinfo->sampler_mask) {
+ assert(s->info.stage == MESA_SHADER_KERNEL);
+ int desc_set = -1;
+ nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
+ if (glsl_type_is_sampler(glsl_without_array(var->type))) {
+ desc_set = var->data.descriptor_set;
+ break;
+ }
+ }
+ assert(desc_set != -1);
+ u_foreach_bit(sampler, sinfo->sampler_mask)
+ emit_sampler(&ctx, sampler, desc_set);
+ }
+ nir_foreach_variable_with_modes(var, s, nir_var_image | nir_var_uniform) {
+ const struct glsl_type *type = glsl_without_array(var->type);
+ if (glsl_type_is_sampler(type))
+ emit_image(&ctx, var, get_bare_image_type(&ctx, var, true));
+ else if (glsl_type_is_image(type))
+ emit_image(&ctx, var, get_bare_image_type(&ctx, var, false));
+ }
+
+ if (sinfo->float_controls.flush_denorms) {
+ unsigned execution_mode = s->info.float_controls_execution_mode;
+ bool flush_16_bit = nir_is_denorm_flush_to_zero(execution_mode, 16);
+ bool flush_32_bit = nir_is_denorm_flush_to_zero(execution_mode, 32);
+ bool flush_64_bit = nir_is_denorm_flush_to_zero(execution_mode, 64);
+ bool preserve_16_bit = nir_is_denorm_preserve(execution_mode, 16);
+ bool preserve_32_bit = nir_is_denorm_preserve(execution_mode, 32);
+ bool preserve_64_bit = nir_is_denorm_preserve(execution_mode, 64);
+ bool emit_cap_flush = false;
+ bool emit_cap_preserve = false;
+
+ if (!sinfo->float_controls.denorms_all_independence) {
+ bool flush = flush_16_bit && flush_64_bit;
+ bool preserve = preserve_16_bit && preserve_64_bit;
+
+ if (!sinfo->float_controls.denorms_32_bit_independence) {
+ flush = flush && flush_32_bit;
+ preserve = preserve && preserve_32_bit;
+
+ flush_32_bit = flush;
+ preserve_32_bit = preserve;
+ }
+
+ flush_16_bit = flush;
+ flush_64_bit = flush;
+ preserve_16_bit = preserve;
+ preserve_64_bit = preserve;
+ }
+
+ if (flush_16_bit && sinfo->float_controls.flush_denorms & BITFIELD_BIT(0)) {
+ emit_cap_flush = true;
+ spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point,
+ SpvExecutionModeDenormFlushToZero, 16);
+ }
+ if (flush_32_bit && sinfo->float_controls.flush_denorms & BITFIELD_BIT(1)) {
+ emit_cap_flush = true;
+ spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point,
+ SpvExecutionModeDenormFlushToZero, 32);
+ }
+ if (flush_64_bit && sinfo->float_controls.flush_denorms & BITFIELD_BIT(2)) {
+ emit_cap_flush = true;
+ spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point,
+ SpvExecutionModeDenormFlushToZero, 64);
+ }
- if (so_info)
- emit_so_info(&ctx, so_info, max_output + 1);
+ if (preserve_16_bit && sinfo->float_controls.preserve_denorms & BITFIELD_BIT(0)) {
+ emit_cap_preserve = true;
+ spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point,
+ SpvExecutionModeDenormPreserve, 16);
+ }
+ if (preserve_32_bit && sinfo->float_controls.preserve_denorms & BITFIELD_BIT(1)) {
+ emit_cap_preserve = true;
+ spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point,
+ SpvExecutionModeDenormPreserve, 32);
+ }
+ if (preserve_64_bit && sinfo->float_controls.preserve_denorms & BITFIELD_BIT(2)) {
+ emit_cap_preserve = true;
+ spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point,
+ SpvExecutionModeDenormPreserve, 64);
+ }
- /* we have to reverse iterate to match what's done in zink_compiler.c */
- foreach_list_typed_reverse(nir_variable, var, node, &s->variables)
- if (_nir_shader_variable_has_mode(var, nir_var_uniform |
- nir_var_mem_ubo |
- nir_var_mem_ssbo))
- emit_uniform(&ctx, var);
+ if (emit_cap_flush)
+ spirv_builder_emit_cap(&ctx.builder, SpvCapabilityDenormFlushToZero);
+ if (emit_cap_preserve)
+ spirv_builder_emit_cap(&ctx.builder, SpvCapabilityDenormPreserve);
+ }
switch (s->info.stage) {
case MESA_SHADER_FRAGMENT:
@@ -3924,6 +4709,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t
SpvExecutionModeEarlyFragmentTests);
if (s->info.fs.post_depth_coverage) {
spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_post_depth_coverage");
+ spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampleMaskPostDepthCoverage);
spirv_builder_emit_exec_mode(&ctx.builder, entry_point,
SpvExecutionModePostDepthCoverage);
}
@@ -3945,13 +4731,13 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t
spirv_builder_emit_exec_mode(&ctx.builder, entry_point, SpvExecutionModeSampleInterlockUnorderedEXT);
break;
case MESA_SHADER_TESS_CTRL:
- spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point,
- SpvExecutionModeOutputVertices,
- s->info.tess.tcs_vertices_out);
+ tcs_vertices_out_word = spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point,
+ SpvExecutionModeOutputVertices,
+ s->info.tess.tcs_vertices_out);
break;
case MESA_SHADER_TESS_EVAL:
spirv_builder_emit_exec_mode(&ctx.builder, entry_point,
- get_primitive_mode(s->info.tess.primitive_mode));
+ get_primitive_mode(s->info.tess._primitive_mode));
spirv_builder_emit_exec_mode(&ctx.builder, entry_point,
s->info.tess.ccw ? SpvExecutionModeVertexOrderCcw
: SpvExecutionModeVertexOrderCw);
@@ -3970,12 +4756,10 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t
s->info.gs.invocations);
spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point,
SpvExecutionModeOutputVertices,
- s->info.gs.vertices_out);
+ MAX2(s->info.gs.vertices_out, 1));
break;
+ case MESA_SHADER_KERNEL:
case MESA_SHADER_COMPUTE:
- if (s->info.shared_size)
- create_shared_block(&ctx, s->info.shared_size);
-
if (s->info.workgroup_size[0] || s->info.workgroup_size[1] || s->info.workgroup_size[2])
spirv_builder_emit_exec_mode_literal3(&ctx.builder, entry_point, SpvExecutionModeLocalSize,
(uint32_t[3]){(uint32_t)s->info.workgroup_size[0], (uint32_t)s->info.workgroup_size[1],
@@ -3990,9 +4774,31 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t
spirv_builder_emit_name(&ctx.builder, sizes[i], names[i]);
}
SpvId var_type = get_uvec_type(&ctx, 32, 3);
+ // Even when using LocalSizeId this need to be initialized for nir_intrinsic_load_workgroup_size
ctx.local_group_size_var = spirv_builder_spec_const_composite(&ctx.builder, var_type, sizes, 3);
- spirv_builder_emit_name(&ctx.builder, ctx.local_group_size_var, "gl_LocalGroupSize");
- spirv_builder_emit_builtin(&ctx.builder, ctx.local_group_size_var, SpvBuiltInWorkgroupSize);
+ spirv_builder_emit_name(&ctx.builder, ctx.local_group_size_var, "gl_LocalGroupSizeARB");
+
+ /* WorkgroupSize is deprecated in SPIR-V 1.6 */
+ if (spirv_version >= SPIRV_VERSION(1, 6)) {
+ spirv_builder_emit_exec_mode_id3(&ctx.builder, entry_point,
+ SpvExecutionModeLocalSizeId,
+ sizes);
+ } else {
+ spirv_builder_emit_builtin(&ctx.builder, ctx.local_group_size_var, SpvBuiltInWorkgroupSize);
+ }
+ }
+ if (s->info.cs.has_variable_shared_mem) {
+ ctx.shared_mem_size = spirv_builder_spec_const_uint(&ctx.builder, 32);
+ spirv_builder_emit_specid(&ctx.builder, ctx.shared_mem_size, ZINK_VARIABLE_SHARED_MEM);
+ spirv_builder_emit_name(&ctx.builder, ctx.shared_mem_size, "variable_shared_mem");
+ }
+ if (s->info.cs.derivative_group) {
+ SpvCapability caps[] = { 0, SpvCapabilityComputeDerivativeGroupQuadsNV, SpvCapabilityComputeDerivativeGroupLinearNV };
+ SpvExecutionMode modes[] = { 0, SpvExecutionModeDerivativeGroupQuadsNV, SpvExecutionModeDerivativeGroupLinearNV };
+ spirv_builder_emit_extension(&ctx.builder, "SPV_NV_compute_shader_derivatives");
+ spirv_builder_emit_cap(&ctx.builder, caps[s->info.cs.derivative_group]);
+ spirv_builder_emit_exec_mode(&ctx.builder, entry_point, modes[s->info.cs.derivative_group]);
+ ctx.explicit_lod = false;
}
break;
default:
@@ -4002,31 +4808,55 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t
spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySubgroupBallotKHR);
spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_shader_ballot");
}
- if (s->info.has_transform_feedback_varyings) {
+ if (s->info.has_transform_feedback_varyings && s->info.stage != MESA_SHADER_FRAGMENT) {
spirv_builder_emit_cap(&ctx.builder, SpvCapabilityTransformFeedback);
spirv_builder_emit_exec_mode(&ctx.builder, entry_point,
SpvExecutionModeXfb);
}
+
+ if (s->info.stage == MESA_SHADER_FRAGMENT && s->info.fs.uses_discard) {
+ ctx.discard_func = spirv_builder_new_id(&ctx.builder);
+ spirv_builder_emit_name(&ctx.builder, ctx.discard_func, "discard");
+ spirv_builder_function(&ctx.builder, ctx.discard_func, type_void,
+ SpvFunctionControlMaskNone,
+ type_void_func);
+ SpvId label = spirv_builder_new_id(&ctx.builder);
+ spirv_builder_label(&ctx.builder, label);
+
+ /* kill is deprecated in SPIR-V 1.6, use terminate instead */
+ if (spirv_version >= SPIRV_VERSION(1, 6))
+ spirv_builder_emit_terminate(&ctx.builder);
+ else
+ spirv_builder_emit_kill(&ctx.builder);
+
+ spirv_builder_function_end(&ctx.builder);
+ }
+
spirv_builder_function(&ctx.builder, entry_point, type_void,
- SpvFunctionControlMaskNone,
- type_main);
+ SpvFunctionControlMaskNone,
+ type_void_func);
nir_function_impl *entry = nir_shader_get_entrypoint(s);
nir_metadata_require(entry, nir_metadata_block_index);
- ctx.defs = ralloc_array_size(ctx.mem_ctx,
- sizeof(SpvId), entry->ssa_alloc);
- if (!ctx.defs)
+ ctx.defs = rzalloc_array_size(ctx.mem_ctx,
+ sizeof(SpvId), entry->ssa_alloc);
+ ctx.def_types = ralloc_array_size(ctx.mem_ctx,
+ sizeof(nir_alu_type), entry->ssa_alloc);
+ if (!ctx.defs || !ctx.def_types)
goto fail;
+ if (sinfo->have_sparse) {
+ spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySparseResidency);
+ /* this could be huge, so only alloc if needed since it's extremely unlikely to
+ * ever be used by anything except cts
+ */
+ ctx.resident_defs = rzalloc_array_size(ctx.mem_ctx,
+ sizeof(SpvId), entry->ssa_alloc);
+ if (!ctx.resident_defs)
+ goto fail;
+ }
ctx.num_defs = entry->ssa_alloc;
- nir_index_local_regs(entry);
- ctx.regs = ralloc_array_size(ctx.mem_ctx,
- sizeof(SpvId), entry->reg_alloc);
- if (!ctx.regs)
- goto fail;
- ctx.num_regs = entry->reg_alloc;
-
SpvId *block_ids = ralloc_array_size(ctx.mem_ctx,
sizeof(SpvId), entry->num_blocks);
if (!block_ids)
@@ -4040,22 +4870,21 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t
/* emit a block only for the variable declarations */
start_block(&ctx, spirv_builder_new_id(&ctx.builder));
- foreach_list_typed(nir_register, reg, node, &entry->registers) {
- SpvId type = get_vec_from_bit_size(&ctx, reg->bit_size, reg->num_components);
- SpvId pointer_type = spirv_builder_type_pointer(&ctx.builder,
- SpvStorageClassFunction,
- type);
- SpvId var = spirv_builder_emit_var(&ctx.builder, pointer_type,
- SpvStorageClassFunction);
+ spirv_builder_begin_local_vars(&ctx.builder);
- ctx.regs[reg->index] = var;
+ nir_foreach_reg_decl(reg, entry) {
+ if (nir_intrinsic_bit_size(reg) == 1)
+ init_reg(&ctx, reg, nir_type_bool);
}
- emit_cf_list(&ctx, &entry->body);
+ nir_foreach_variable_with_modes(var, s, nir_var_shader_temp)
+ emit_shader_temp(&ctx, var);
- /* vertex/tess shader emits copied xfb outputs at the end of the shader */
- if (so_info && (ctx.stage == MESA_SHADER_VERTEX || ctx.stage == MESA_SHADER_TESS_EVAL))
- emit_so_outputs(&ctx, so_info);
+ nir_foreach_function_temp_variable(var, entry)
+ emit_temp(&ctx, var);
+
+
+ emit_cf_list(&ctx, &entry->body);
spirv_builder_return(&ctx.builder); // doesn't belong here, but whatevz
spirv_builder_function_end(&ctx.builder);
@@ -4074,7 +4903,8 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t
if (!ret->words)
goto fail;
- ret->num_words = spirv_builder_get_words(&ctx.builder, ret->words, num_words, spirv_version);
+ ret->num_words = spirv_builder_get_words(&ctx.builder, ret->words, num_words, spirv_version, &tcs_vertices_out_word);
+ ret->tcs_vertices_out_word = tcs_vertices_out_word;
assert(ret->num_words == num_words);
ralloc_free(ctx.mem_ctx);
diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h
index 04b559473e4..67a56464d19 100644
--- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h
+++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h
@@ -26,7 +26,7 @@
#include <stdlib.h>
#include <stdint.h>
-#include <vulkan/vulkan.h>
+#include <vulkan/vulkan_core.h>
#include "compiler/nir/nir.h"
#include "compiler/shader_enums.h"
@@ -39,13 +39,14 @@
struct spirv_shader {
uint32_t *words;
size_t num_words;
+ uint32_t tcs_vertices_out_word;
};
struct nir_shader;
struct pipe_stream_output_info;
struct spirv_shader *
-nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info,
+nir_to_spirv(struct nir_shader *s, const struct zink_shader_info *so_info,
uint32_t spirv_version);
void
diff --git a/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.c b/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.c
index 339af44f4c6..515d659e9af 100644
--- a/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.c
+++ b/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.c
@@ -32,6 +32,7 @@
#include "util/hash_table.h"
#define XXH_INLINE_ALL
#include "util/xxhash.h"
+#include "vk_util.h"
#include <stdbool.h>
#include <inttypes.h>
@@ -62,11 +63,12 @@ spirv_buffer_prepare(struct spirv_buffer *b, void *mem_ctx, size_t needed)
return spirv_buffer_grow(b, mem_ctx, needed);
}
-static inline void
+static inline uint32_t
spirv_buffer_emit_word(struct spirv_buffer *b, uint32_t word)
{
assert(b->num_words < b->room);
- b->words[b->num_words++] = word;
+ b->words[b->num_words] = word;
+ return b->num_words++;
}
static int
@@ -146,10 +148,10 @@ spirv_builder_emit_entry_point(struct spirv_builder *b,
b->entry_points.words[pos] |= (3 + len + num_interfaces) << 16;
spirv_buffer_prepare(&b->entry_points, b->mem_ctx, num_interfaces);
for (int i = 0; i < num_interfaces; ++i)
- spirv_buffer_emit_word(&b->entry_points, interfaces[i]);
+ spirv_buffer_emit_word(&b->entry_points, interfaces[i]);
}
-void
+uint32_t
spirv_builder_emit_exec_mode_literal(struct spirv_builder *b, SpvId entry_point,
SpvExecutionMode exec_mode, uint32_t param)
{
@@ -157,7 +159,7 @@ spirv_builder_emit_exec_mode_literal(struct spirv_builder *b, SpvId entry_point,
spirv_buffer_emit_word(&b->exec_modes, SpvOpExecutionMode | (4 << 16));
spirv_buffer_emit_word(&b->exec_modes, entry_point);
spirv_buffer_emit_word(&b->exec_modes, exec_mode);
- spirv_buffer_emit_word(&b->exec_modes, param);
+ return spirv_buffer_emit_word(&b->exec_modes, param);
}
void
@@ -173,6 +175,18 @@ spirv_builder_emit_exec_mode_literal3(struct spirv_builder *b, SpvId entry_point
}
void
+spirv_builder_emit_exec_mode_id3(struct spirv_builder *b, SpvId entry_point,
+ SpvExecutionMode exec_mode, SpvId param[3])
+{
+ spirv_buffer_prepare(&b->exec_modes, b->mem_ctx, 6);
+ spirv_buffer_emit_word(&b->exec_modes, SpvOpExecutionModeId | (6 << 16));
+ spirv_buffer_emit_word(&b->exec_modes, entry_point);
+ spirv_buffer_emit_word(&b->exec_modes, exec_mode);
+ for (unsigned i = 0; i < 3; i++)
+ spirv_buffer_emit_word(&b->exec_modes, param[i]);
+}
+
+void
spirv_builder_emit_exec_mode(struct spirv_builder *b, SpvId entry_point,
SpvExecutionMode exec_mode)
{
@@ -216,6 +230,14 @@ spirv_builder_emit_decoration(struct spirv_builder *b, SpvId target,
}
void
+spirv_builder_emit_rounding_mode(struct spirv_builder *b, SpvId target,
+ SpvFPRoundingMode rounding)
+{
+ uint32_t args[] = { rounding };
+ emit_decoration(b, target, SpvDecorationFPRoundingMode, args, ARRAY_SIZE(args));
+}
+
+void
spirv_builder_emit_input_attachment_index(struct spirv_builder *b, SpvId target, uint32_t id)
{
uint32_t args[] = { id };
@@ -254,32 +276,32 @@ spirv_builder_emit_builtin(struct spirv_builder *b, SpvId target,
}
void
-spirv_builder_emit_vertex(struct spirv_builder *b, uint32_t stream)
+spirv_builder_emit_vertex(struct spirv_builder *b, uint32_t stream, bool multistream)
{
unsigned words = 1;
SpvOp op = SpvOpEmitVertex;
- if (stream > 0) {
+ if (multistream) {
op = SpvOpEmitStreamVertex;
words++;
}
spirv_buffer_prepare(&b->instructions, b->mem_ctx, words);
spirv_buffer_emit_word(&b->instructions, op | (words << 16));
- if (stream)
+ if (multistream)
spirv_buffer_emit_word(&b->instructions, spirv_builder_const_uint(b, 32, stream));
}
void
-spirv_builder_end_primitive(struct spirv_builder *b, uint32_t stream)
+spirv_builder_end_primitive(struct spirv_builder *b, uint32_t stream, bool multistream)
{
unsigned words = 1;
SpvOp op = SpvOpEndPrimitive;
- if (stream > 0) {
+ if (multistream || stream > 0) {
op = SpvOpEndStreamPrimitive;
words++;
}
spirv_buffer_prepare(&b->instructions, b->mem_ctx, words);
spirv_buffer_emit_word(&b->instructions, op | (words << 16));
- if (stream)
+ if (multistream || stream > 0)
spirv_buffer_emit_word(&b->instructions, spirv_builder_const_uint(b, 32, stream));
}
@@ -403,6 +425,28 @@ spirv_builder_function_end(struct spirv_builder *b)
spirv_buffer_emit_word(&b->instructions, SpvOpFunctionEnd | (1 << 16));
}
+SpvId
+spirv_builder_function_call(struct spirv_builder *b, SpvId result_type,
+ SpvId function, const SpvId arguments[],
+ size_t num_arguments)
+{
+ SpvId result = spirv_builder_new_id(b);
+
+ int words = 4 + num_arguments;
+ spirv_buffer_prepare(&b->instructions, b->mem_ctx, words);
+ spirv_buffer_emit_word(&b->instructions,
+ SpvOpFunctionCall | (words << 16));
+ spirv_buffer_emit_word(&b->instructions, result_type);
+ spirv_buffer_emit_word(&b->instructions, result);
+ spirv_buffer_emit_word(&b->instructions, function);
+
+ for (int i = 0; i < num_arguments; ++i)
+ spirv_buffer_emit_word(&b->instructions, arguments[i]);
+
+ return result;
+}
+
+
void
spirv_builder_label(struct spirv_builder *b, SpvId label)
{
@@ -425,6 +469,17 @@ spirv_builder_emit_load(struct spirv_builder *b, SpvId result_type,
return spirv_builder_emit_unop(b, SpvOpLoad, result_type, pointer);
}
+SpvId
+spirv_builder_emit_load_aligned(struct spirv_builder *b, SpvId result_type, SpvId pointer, unsigned alignment, bool coherent)
+{
+ if (coherent) {
+ SpvId scope = spirv_builder_const_int(b, 32, SpvScopeDevice);
+ return spirv_builder_emit_quadop(b, SpvOpLoad, result_type, pointer, SpvMemoryAccessAlignedMask | SpvMemoryAccessNonPrivatePointerMask | SpvMemoryAccessMakePointerVisibleMask, alignment, scope);
+ } else {
+ return spirv_builder_emit_triop(b, SpvOpLoad, result_type, pointer, SpvMemoryAccessAlignedMask, alignment);
+ }
+}
+
void
spirv_builder_emit_store(struct spirv_builder *b, SpvId pointer, SpvId object)
{
@@ -435,6 +490,30 @@ spirv_builder_emit_store(struct spirv_builder *b, SpvId pointer, SpvId object)
}
void
+spirv_builder_emit_store_aligned(struct spirv_builder *b, SpvId pointer, SpvId object, unsigned alignment, bool coherent)
+{
+ unsigned size = 5;
+ SpvMemoryAccessMask mask = SpvMemoryAccessAlignedMask;
+
+ if (coherent) {
+ mask |= SpvMemoryAccessNonPrivatePointerMask | SpvMemoryAccessMakePointerAvailableMask;
+ size++;
+ }
+
+ spirv_buffer_prepare(&b->instructions, b->mem_ctx, size);
+ spirv_buffer_emit_word(&b->instructions, SpvOpStore | (size << 16));
+ spirv_buffer_emit_word(&b->instructions, pointer);
+ spirv_buffer_emit_word(&b->instructions, object);
+ spirv_buffer_emit_word(&b->instructions, mask);
+ spirv_buffer_emit_word(&b->instructions, alignment);
+
+ if (coherent) {
+ SpvId scope = spirv_builder_const_int(b, 32, SpvScopeDevice);
+ spirv_buffer_emit_word(&b->instructions, scope);
+ }
+}
+
+void
spirv_builder_emit_atomic_store(struct spirv_builder *b, SpvId pointer, SpvScope scope,
SpvMemorySemanticsMask semantics, SpvId object)
{
@@ -492,12 +571,13 @@ SpvId
spirv_builder_emit_unop(struct spirv_builder *b, SpvOp op, SpvId result_type,
SpvId operand)
{
+ struct spirv_buffer *buf = op == SpvOpSpecConstant ? &b->types_const_defs : &b->instructions;
SpvId result = spirv_builder_new_id(b);
- spirv_buffer_prepare(&b->instructions, b->mem_ctx, 4);
- spirv_buffer_emit_word(&b->instructions, op | (4 << 16));
- spirv_buffer_emit_word(&b->instructions, result_type);
- spirv_buffer_emit_word(&b->instructions, result);
- spirv_buffer_emit_word(&b->instructions, operand);
+ spirv_buffer_prepare(buf, b->mem_ctx, 4);
+ spirv_buffer_emit_word(buf, op | (4 << 16));
+ spirv_buffer_emit_word(buf, result_type);
+ spirv_buffer_emit_word(buf, result);
+ spirv_buffer_emit_word(buf, operand);
return result;
}
@@ -519,14 +599,16 @@ SpvId
spirv_builder_emit_triop(struct spirv_builder *b, SpvOp op, SpvId result_type,
SpvId operand0, SpvId operand1, SpvId operand2)
{
+ struct spirv_buffer *buf = op == SpvOpSpecConstantOp ? &b->types_const_defs : &b->instructions;
+
SpvId result = spirv_builder_new_id(b);
- spirv_buffer_prepare(&b->instructions, b->mem_ctx, 6);
- spirv_buffer_emit_word(&b->instructions, op | (6 << 16));
- spirv_buffer_emit_word(&b->instructions, result_type);
- spirv_buffer_emit_word(&b->instructions, result);
- spirv_buffer_emit_word(&b->instructions, operand0);
- spirv_buffer_emit_word(&b->instructions, operand1);
- spirv_buffer_emit_word(&b->instructions, operand2);
+ spirv_buffer_prepare(buf, b->mem_ctx, 6);
+ spirv_buffer_emit_word(buf, op | (6 << 16));
+ spirv_buffer_emit_word(buf, result_type);
+ spirv_buffer_emit_word(buf, result);
+ spirv_buffer_emit_word(buf, operand0);
+ spirv_buffer_emit_word(buf, operand1);
+ spirv_buffer_emit_word(buf, operand2);
return result;
}
@@ -534,15 +616,17 @@ SpvId
spirv_builder_emit_quadop(struct spirv_builder *b, SpvOp op, SpvId result_type,
SpvId operand0, SpvId operand1, SpvId operand2, SpvId operand3)
{
+ struct spirv_buffer *buf = op == SpvOpSpecConstantOp ? &b->types_const_defs : &b->instructions;
+
SpvId result = spirv_builder_new_id(b);
- spirv_buffer_prepare(&b->instructions, b->mem_ctx, 7);
- spirv_buffer_emit_word(&b->instructions, op | (7 << 16));
- spirv_buffer_emit_word(&b->instructions, result_type);
- spirv_buffer_emit_word(&b->instructions, result);
- spirv_buffer_emit_word(&b->instructions, operand0);
- spirv_buffer_emit_word(&b->instructions, operand1);
- spirv_buffer_emit_word(&b->instructions, operand2);
- spirv_buffer_emit_word(&b->instructions, operand3);
+ spirv_buffer_prepare(buf, b->mem_ctx, 7);
+ spirv_buffer_emit_word(buf, op | (7 << 16));
+ spirv_buffer_emit_word(buf, result_type);
+ spirv_buffer_emit_word(buf, result);
+ spirv_buffer_emit_word(buf, operand0);
+ spirv_buffer_emit_word(buf, operand1);
+ spirv_buffer_emit_word(buf, operand2);
+ spirv_buffer_emit_word(buf, operand3);
return result;
}
@@ -551,17 +635,19 @@ spirv_builder_emit_hexop(struct spirv_builder *b, SpvOp op, SpvId result_type,
SpvId operand0, SpvId operand1, SpvId operand2, SpvId operand3,
SpvId operand4, SpvId operand5)
{
+ struct spirv_buffer *buf = op == SpvOpSpecConstantOp ? &b->types_const_defs : &b->instructions;
+
SpvId result = spirv_builder_new_id(b);
- spirv_buffer_prepare(&b->instructions, b->mem_ctx, 9);
- spirv_buffer_emit_word(&b->instructions, op | (9 << 16));
- spirv_buffer_emit_word(&b->instructions, result_type);
- spirv_buffer_emit_word(&b->instructions, result);
- spirv_buffer_emit_word(&b->instructions, operand0);
- spirv_buffer_emit_word(&b->instructions, operand1);
- spirv_buffer_emit_word(&b->instructions, operand2);
- spirv_buffer_emit_word(&b->instructions, operand3);
- spirv_buffer_emit_word(&b->instructions, operand4);
- spirv_buffer_emit_word(&b->instructions, operand5);
+ spirv_buffer_prepare(buf, b->mem_ctx, 9);
+ spirv_buffer_emit_word(buf, op | (9 << 16));
+ spirv_buffer_emit_word(buf, result_type);
+ spirv_buffer_emit_word(buf, result);
+ spirv_buffer_emit_word(buf, operand0);
+ spirv_buffer_emit_word(buf, operand1);
+ spirv_buffer_emit_word(buf, operand2);
+ spirv_buffer_emit_word(buf, operand3);
+ spirv_buffer_emit_word(buf, operand4);
+ spirv_buffer_emit_word(buf, operand5);
return result;
}
@@ -735,63 +821,112 @@ spirv_builder_emit_kill(struct spirv_builder *b)
spirv_buffer_emit_word(&b->instructions, SpvOpKill | (1 << 16));
}
+void
+spirv_builder_emit_terminate(struct spirv_builder *b)
+{
+ spirv_buffer_prepare(&b->instructions, b->mem_ctx, 1);
+ spirv_buffer_emit_word(&b->instructions, SpvOpTerminateInvocation | (1 << 16));
+}
+
+void
+spirv_builder_emit_demote(struct spirv_builder *b)
+{
+ spirv_buffer_prepare(&b->instructions, b->mem_ctx, 1);
+ spirv_buffer_emit_word(&b->instructions, SpvOpDemoteToHelperInvocation | (1 << 16));
+}
+
+SpvId
+spirv_is_helper_invocation(struct spirv_builder *b)
+{
+ SpvId result = spirv_builder_new_id(b);
+ SpvId result_type = spirv_builder_type_bool(b);
+
+ int words = 3;
+ spirv_buffer_prepare(&b->instructions, b->mem_ctx, words);
+ spirv_buffer_emit_word(&b->instructions, SpvOpIsHelperInvocationEXT | (words << 16));
+ spirv_buffer_emit_word(&b->instructions, result_type);
+ spirv_buffer_emit_word(&b->instructions, result);
+ return result;
+}
+
SpvId
spirv_builder_emit_vote(struct spirv_builder *b, SpvOp op, SpvId src)
{
return spirv_builder_emit_binop(b, op, spirv_builder_type_bool(b),
- spirv_builder_const_uint(b, 32, SpvScopeWorkgroup), src);
+ spirv_builder_const_uint(b, 32, SpvScopeSubgroup), src);
+}
+
+static SpvId
+sparse_wrap_result_type(struct spirv_builder *b, SpvId result_type)
+{
+ SpvId types[2];
+ types[0] = spirv_builder_type_uint(b, 32);
+ types[1] = result_type;
+ return spirv_builder_type_struct(b, types, 2);
}
SpvId
spirv_builder_emit_image_sample(struct spirv_builder *b,
SpvId result_type,
SpvId sampled_image,
- SpvId coordinate,
- bool proj,
- SpvId lod,
- SpvId bias,
- SpvId dref,
- SpvId dx,
- SpvId dy,
- SpvId const_offset,
- SpvId offset)
+ const struct spriv_tex_src *src)
{
SpvId result = spirv_builder_new_id(b);
- int opcode = SpvOpImageSampleImplicitLod;
+ bool proj = src->proj != 0;
+
int operands = 5;
- if (proj)
- opcode += SpvOpImageSampleProjImplicitLod - SpvOpImageSampleImplicitLod;
- if (lod || (dx && dy))
- opcode += SpvOpImageSampleExplicitLod - SpvOpImageSampleImplicitLod;
- if (dref) {
- opcode += SpvOpImageSampleDrefImplicitLod - SpvOpImageSampleImplicitLod;
- operands++;
+ int opcode;
+ if (src->sparse) {
+ opcode = SpvOpImageSparseSampleImplicitLod;
+ if (proj)
+ opcode += SpvOpImageSparseSampleProjImplicitLod - SpvOpImageSparseSampleImplicitLod;
+ if (src->lod || (src->dx && src->dy))
+ opcode += SpvOpImageSparseSampleExplicitLod - SpvOpImageSparseSampleImplicitLod;
+ if (src->dref) {
+ opcode += SpvOpImageSparseSampleDrefImplicitLod - SpvOpImageSparseSampleImplicitLod;
+ operands++;
+ }
+ result_type = sparse_wrap_result_type(b, result_type);
+ } else {
+ opcode = SpvOpImageSampleImplicitLod;
+ if (proj)
+ opcode += SpvOpImageSampleProjImplicitLod - SpvOpImageSampleImplicitLod;
+ if (src->lod || (src->dx && src->dy))
+ opcode += SpvOpImageSampleExplicitLod - SpvOpImageSampleImplicitLod;
+ if (src->dref) {
+ opcode += SpvOpImageSampleDrefImplicitLod - SpvOpImageSampleImplicitLod;
+ operands++;
+ }
}
SpvImageOperandsMask operand_mask = SpvImageOperandsMaskNone;
- SpvId extra_operands[5];
+ SpvId extra_operands[6];
int num_extra_operands = 1;
- if (bias) {
- extra_operands[num_extra_operands++] = bias;
+ if (src->bias) {
+ extra_operands[num_extra_operands++] = src->bias;
operand_mask |= SpvImageOperandsBiasMask;
}
- if (lod) {
- extra_operands[num_extra_operands++] = lod;
+ if (src->lod) {
+ extra_operands[num_extra_operands++] = src->lod;
operand_mask |= SpvImageOperandsLodMask;
- } else if (dx && dy) {
- extra_operands[num_extra_operands++] = dx;
- extra_operands[num_extra_operands++] = dy;
+ } else if (src->dx && src->dy) {
+ extra_operands[num_extra_operands++] = src->dx;
+ extra_operands[num_extra_operands++] = src->dy;
operand_mask |= SpvImageOperandsGradMask;
}
- assert(!(const_offset && offset));
- if (const_offset) {
- extra_operands[num_extra_operands++] = const_offset;
+ assert(!(src->const_offset && src->offset));
+ if (src->const_offset) {
+ extra_operands[num_extra_operands++] = src->const_offset;
operand_mask |= SpvImageOperandsConstOffsetMask;
- } else if (offset) {
- extra_operands[num_extra_operands++] = offset;
+ } else if (src->offset) {
+ extra_operands[num_extra_operands++] = src->offset;
operand_mask |= SpvImageOperandsOffsetMask;
}
+ if (src->min_lod) {
+ extra_operands[num_extra_operands++] = src->min_lod;
+ operand_mask |= SpvImageOperandsMinLodMask;
+ }
/* finalize num_extra_operands / extra_operands */
extra_operands[0] = operand_mask;
@@ -801,9 +936,9 @@ spirv_builder_emit_image_sample(struct spirv_builder *b,
spirv_buffer_emit_word(&b->instructions, result_type);
spirv_buffer_emit_word(&b->instructions, result);
spirv_buffer_emit_word(&b->instructions, sampled_image);
- spirv_buffer_emit_word(&b->instructions, coordinate);
- if (dref)
- spirv_buffer_emit_word(&b->instructions, dref);
+ spirv_buffer_emit_word(&b->instructions, src->coord);
+ if (src->dref)
+ spirv_buffer_emit_word(&b->instructions, src->dref);
for (int i = 0; i < num_extra_operands; ++i)
spirv_buffer_emit_word(&b->instructions, extra_operands[i]);
return result;
@@ -842,13 +977,16 @@ spirv_builder_emit_image_read(struct spirv_builder *b,
SpvId coordinate,
SpvId lod,
SpvId sample,
- SpvId offset)
+ SpvId offset,
+ bool sparse)
{
SpvId result = spirv_builder_new_id(b);
SpvImageOperandsMask operand_mask = SpvImageOperandsMaskNone;
SpvId extra_operands[5];
int num_extra_operands = 1;
+ if (sparse)
+ result_type = sparse_wrap_result_type(b, result_type);
if (lod) {
extra_operands[num_extra_operands++] = lod;
operand_mask |= SpvImageOperandsLodMask;
@@ -865,7 +1003,7 @@ spirv_builder_emit_image_read(struct spirv_builder *b,
extra_operands[0] = operand_mask;
spirv_buffer_prepare(&b->instructions, b->mem_ctx, 5 + num_extra_operands);
- spirv_buffer_emit_word(&b->instructions, SpvOpImageRead |
+ spirv_buffer_emit_word(&b->instructions, (sparse ? SpvOpImageSparseRead : SpvOpImageRead) |
((5 + num_extra_operands) << 16));
spirv_buffer_emit_word(&b->instructions, result_type);
spirv_buffer_emit_word(&b->instructions, result);
@@ -915,40 +1053,37 @@ spirv_builder_emit_image_write(struct spirv_builder *b,
SpvId
spirv_builder_emit_image_gather(struct spirv_builder *b,
- SpvId result_type,
- SpvId image,
- SpvId coordinate,
- SpvId component,
- SpvId lod,
- SpvId sample,
- SpvId const_offset,
- SpvId offset,
- SpvId dref)
+ SpvId result_type,
+ SpvId image,
+ const struct spriv_tex_src *src,
+ SpvId component)
{
SpvId result = spirv_builder_new_id(b);
- SpvId op = SpvOpImageGather;
+ SpvId op = src->sparse ? SpvOpImageSparseGather : SpvOpImageGather;
SpvImageOperandsMask operand_mask = SpvImageOperandsMaskNone;
SpvId extra_operands[4];
int num_extra_operands = 1;
- if (lod) {
- extra_operands[num_extra_operands++] = lod;
+ if (src->lod) {
+ extra_operands[num_extra_operands++] = src->lod;
operand_mask |= SpvImageOperandsLodMask;
}
- if (sample) {
- extra_operands[num_extra_operands++] = sample;
+ if (src->sample) {
+ extra_operands[num_extra_operands++] = src->sample;
operand_mask |= SpvImageOperandsSampleMask;
}
- assert(!(const_offset && offset));
- if (const_offset) {
- extra_operands[num_extra_operands++] = const_offset;
+ assert(!(src->const_offset && src->offset));
+ if (src->const_offset) {
+ extra_operands[num_extra_operands++] = src->const_offset;
operand_mask |= SpvImageOperandsConstOffsetMask;
- } else if (offset) {
- extra_operands[num_extra_operands++] = offset;
+ } else if (src->offset) {
+ extra_operands[num_extra_operands++] = src->offset;
operand_mask |= SpvImageOperandsOffsetMask;
}
- if (dref)
- op = SpvOpImageDrefGather;
+ if (src->dref)
+ op = src->sparse ? SpvOpImageSparseDrefGather : SpvOpImageDrefGather;
+ if (src->sparse)
+ result_type = sparse_wrap_result_type(b, result_type);
/* finalize num_extra_operands / extra_operands */
extra_operands[0] = operand_mask;
@@ -958,9 +1093,9 @@ spirv_builder_emit_image_gather(struct spirv_builder *b,
spirv_buffer_emit_word(&b->instructions, result_type);
spirv_buffer_emit_word(&b->instructions, result);
spirv_buffer_emit_word(&b->instructions, image);
- spirv_buffer_emit_word(&b->instructions, coordinate);
- if (dref)
- spirv_buffer_emit_word(&b->instructions, dref);
+ spirv_buffer_emit_word(&b->instructions, src->coord);
+ if (src->dref)
+ spirv_buffer_emit_word(&b->instructions, src->dref);
else
spirv_buffer_emit_word(&b->instructions, component);
for (int i = 0; i < num_extra_operands; ++i)
@@ -972,44 +1107,42 @@ SpvId
spirv_builder_emit_image_fetch(struct spirv_builder *b,
SpvId result_type,
SpvId image,
- SpvId coordinate,
- SpvId lod,
- SpvId sample,
- SpvId const_offset,
- SpvId offset)
+ const struct spriv_tex_src *src)
{
SpvId result = spirv_builder_new_id(b);
SpvImageOperandsMask operand_mask = SpvImageOperandsMaskNone;
SpvId extra_operands[4];
int num_extra_operands = 1;
- if (lod) {
- extra_operands[num_extra_operands++] = lod;
+ if (src->lod) {
+ extra_operands[num_extra_operands++] = src->lod;
operand_mask |= SpvImageOperandsLodMask;
}
- if (sample) {
- extra_operands[num_extra_operands++] = sample;
+ if (src->sample) {
+ extra_operands[num_extra_operands++] = src->sample;
operand_mask |= SpvImageOperandsSampleMask;
}
- assert(!(const_offset && offset));
- if (const_offset) {
- extra_operands[num_extra_operands++] = const_offset;
+ assert(!(src->const_offset && src->offset));
+ if (src->const_offset) {
+ extra_operands[num_extra_operands++] = src->const_offset;
operand_mask |= SpvImageOperandsConstOffsetMask;
- } else if (offset) {
- extra_operands[num_extra_operands++] = offset;
+ } else if (src->offset) {
+ extra_operands[num_extra_operands++] = src->offset;
operand_mask |= SpvImageOperandsOffsetMask;
}
+ if (src->sparse)
+ result_type = sparse_wrap_result_type(b, result_type);
/* finalize num_extra_operands / extra_operands */
extra_operands[0] = operand_mask;
spirv_buffer_prepare(&b->instructions, b->mem_ctx, 5 + num_extra_operands);
- spirv_buffer_emit_word(&b->instructions, SpvOpImageFetch |
+ spirv_buffer_emit_word(&b->instructions, (src->sparse ? SpvOpImageSparseFetch : SpvOpImageFetch) |
((5 + num_extra_operands) << 16));
spirv_buffer_emit_word(&b->instructions, result_type);
spirv_buffer_emit_word(&b->instructions, result);
spirv_buffer_emit_word(&b->instructions, image);
- spirv_buffer_emit_word(&b->instructions, coordinate);
+ spirv_buffer_emit_word(&b->instructions, src->coord);
for (int i = 0; i < num_extra_operands; ++i)
spirv_buffer_emit_word(&b->instructions, extra_operands[i]);
return result;
@@ -1197,6 +1330,12 @@ SpvId
spirv_builder_type_int(struct spirv_builder *b, unsigned width)
{
uint32_t args[] = { width, 1 };
+ if (width == 8)
+ spirv_builder_emit_cap(b, SpvCapabilityInt8);
+ else if (width == 16)
+ spirv_builder_emit_cap(b, SpvCapabilityInt16);
+ else if (width == 64)
+ spirv_builder_emit_cap(b, SpvCapabilityInt64);
return get_type_def(b, SpvOpTypeInt, args, ARRAY_SIZE(args));
}
@@ -1204,6 +1343,12 @@ SpvId
spirv_builder_type_uint(struct spirv_builder *b, unsigned width)
{
uint32_t args[] = { width, 0 };
+ if (width == 8)
+ spirv_builder_emit_cap(b, SpvCapabilityInt8);
+ else if (width == 16)
+ spirv_builder_emit_cap(b, SpvCapabilityInt16);
+ else if (width == 64)
+ spirv_builder_emit_cap(b, SpvCapabilityInt64);
return get_type_def(b, SpvOpTypeInt, args, ARRAY_SIZE(args));
}
@@ -1211,6 +1356,10 @@ SpvId
spirv_builder_type_float(struct spirv_builder *b, unsigned width)
{
uint32_t args[] = { width };
+ if (width == 16)
+ spirv_builder_emit_cap(b, SpvCapabilityFloat16);
+ else if (width == 64)
+ spirv_builder_emit_cap(b, SpvCapabilityFloat64);
return get_type_def(b, SpvOpTypeFloat, args, ARRAY_SIZE(args));
}
@@ -1224,10 +1373,18 @@ spirv_builder_type_image(struct spirv_builder *b, SpvId sampled_type,
sampled_type, dim, depth ? 1 : 0, arrayed ? 1 : 0, ms ? 1 : 0, sampled,
image_format
};
+ if (sampled == 2 && ms && dim != SpvDimSubpassData)
+ spirv_builder_emit_cap(b, SpvCapabilityStorageImageMultisample);
return get_type_def(b, SpvOpTypeImage, args, ARRAY_SIZE(args));
}
SpvId
+spirv_builder_emit_sampled_image(struct spirv_builder *b, SpvId result_type, SpvId image, SpvId sampler)
+{
+ return spirv_builder_emit_binop(b, SpvOpSampledImage, result_type, image, sampler);
+}
+
+SpvId
spirv_builder_type_sampled_image(struct spirv_builder *b, SpvId image_type)
{
uint32_t args[] = { image_type };
@@ -1235,6 +1392,13 @@ spirv_builder_type_sampled_image(struct spirv_builder *b, SpvId image_type)
}
SpvId
+spirv_builder_type_sampler(struct spirv_builder *b)
+{
+ uint32_t args[1] = {0};
+ return get_type_def(b, SpvOpTypeSampler, args, 0);
+}
+
+SpvId
spirv_builder_type_pointer(struct spirv_builder *b,
SpvStorageClass storage_class, SpvId type)
{
@@ -1416,7 +1580,7 @@ spirv_builder_const_bool(struct spirv_builder *b, bool val)
SpvId
spirv_builder_const_int(struct spirv_builder *b, int width, int64_t val)
{
- assert(width >= 16);
+ assert(width >= 8);
SpvId type = spirv_builder_type_int(b, width);
if (width <= 32)
return emit_constant_32(b, type, val);
@@ -1428,6 +1592,12 @@ SpvId
spirv_builder_const_uint(struct spirv_builder *b, int width, uint64_t val)
{
assert(width >= 8);
+ if (width == 8)
+ spirv_builder_emit_cap(b, SpvCapabilityInt8);
+ else if (width == 16)
+ spirv_builder_emit_cap(b, SpvCapabilityInt16);
+ else if (width == 64)
+ spirv_builder_emit_cap(b, SpvCapabilityInt64);
SpvId type = spirv_builder_type_uint(b, width);
if (width <= 32)
return emit_constant_32(b, type, val);
@@ -1439,7 +1609,17 @@ SpvId
spirv_builder_spec_const_uint(struct spirv_builder *b, int width)
{
assert(width <= 32);
- return spirv_builder_emit_unop(b, SpvOpSpecConstant, spirv_builder_type_uint(b, width), 0);
+ SpvId const_type = spirv_builder_type_uint(b, width);
+ SpvId result = spirv_builder_new_id(b);
+ spirv_buffer_prepare(&b->types_const_defs, b->mem_ctx, 4);
+ spirv_buffer_emit_word(&b->types_const_defs, SpvOpSpecConstant | (4 << 16));
+ spirv_buffer_emit_word(&b->types_const_defs, const_type);
+ spirv_buffer_emit_word(&b->types_const_defs, result);
+ /* this is the default value for spec constants;
+ * if any users need a different default, add a param to pass for it
+ */
+ spirv_buffer_emit_word(&b->types_const_defs, 1);
+ return result;
}
SpvId
@@ -1447,12 +1627,15 @@ spirv_builder_const_float(struct spirv_builder *b, int width, double val)
{
assert(width >= 16);
SpvId type = spirv_builder_type_float(b, width);
- if (width == 16)
+ if (width == 16) {
+ spirv_builder_emit_cap(b, SpvCapabilityFloat16);
return emit_constant_32(b, type, _mesa_float_to_half(val));
- else if (width == 32)
+ } else if (width == 32)
return emit_constant_32(b, type, u_bitcast_f2u(val));
- else if (width == 64)
+ else if (width == 64) {
+ spirv_builder_emit_cap(b, SpvCapabilityFloat64);
return emit_constant_64(b, type, u_bitcast_d2u(val));
+ }
unreachable("unhandled float-width");
}
@@ -1492,7 +1675,7 @@ spirv_builder_emit_var(struct spirv_builder *b, SpvId type,
{
assert(storage_class != SpvStorageClassGeneric);
struct spirv_buffer *buf = storage_class != SpvStorageClassFunction ?
- &b->types_const_defs : &b->instructions;
+ &b->types_const_defs : &b->local_vars;
SpvId ret = spirv_builder_new_id(b);
spirv_buffer_prepare(buf, b->mem_ctx, 4);
@@ -1549,12 +1732,14 @@ spirv_builder_get_num_words(struct spirv_builder *b)
b->debug_names.num_words +
b->decorations.num_words +
b->types_const_defs.num_words +
+ b->local_vars.num_words +
b->instructions.num_words;
}
size_t
spirv_builder_get_words(struct spirv_builder *b, uint32_t *words,
- size_t num_words, uint32_t spirv_version)
+ size_t num_words, uint32_t spirv_version,
+ uint32_t *tcs_vertices_out_word)
{
assert(num_words >= spirv_builder_get_num_words(b));
@@ -1581,15 +1766,31 @@ spirv_builder_get_words(struct spirv_builder *b, uint32_t *words,
&b->debug_names,
&b->decorations,
&b->types_const_defs,
- &b->instructions
};
for (int i = 0; i < ARRAY_SIZE(buffers); ++i) {
const struct spirv_buffer *buffer = buffers[i];
- for (int j = 0; j < buffer->num_words; ++j)
- words[written++] = buffer->words[j];
+
+ if (buffer == &b->exec_modes && *tcs_vertices_out_word > 0)
+ *tcs_vertices_out_word += written;
+
+ memcpy(words + written, buffer->words,
+ buffer->num_words * sizeof(uint32_t));
+ written += buffer->num_words;
}
+ typed_memcpy(&words[written], b->instructions.words, b->local_vars_begin);
+ written += b->local_vars_begin;
+ typed_memcpy(&words[written], b->local_vars.words, b->local_vars.num_words);
+ written += b->local_vars.num_words;
+ typed_memcpy(&words[written], &b->instructions.words[b->local_vars_begin], (b->instructions.num_words - b->local_vars_begin));
+ written += b->instructions.num_words - b->local_vars_begin;
assert(written == spirv_builder_get_num_words(b));
return written;
}
+
+void
+spirv_builder_begin_local_vars(struct spirv_builder *b)
+{
+ b->local_vars_begin = b->instructions.num_words;
+}
diff --git a/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.h b/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.h
index d18c101b394..dbdf9d1fba6 100644
--- a/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.h
+++ b/src/gallium/drivers/zink/nir_to_spirv/spirv_builder.h
@@ -53,11 +53,30 @@ struct spirv_builder {
struct spirv_buffer decorations;
struct spirv_buffer types_const_defs;
+ struct spirv_buffer local_vars;
struct hash_table *types;
struct hash_table *consts;
struct spirv_buffer instructions;
SpvId prev_id;
+ unsigned local_vars_begin;
+};
+
+struct spriv_tex_src {
+ SpvId coord;
+ SpvId proj;
+ SpvId bias;
+ SpvId lod;
+ SpvId dref;
+ SpvId dx;
+ SpvId dy;
+ SpvId const_offset;
+ SpvId offset;
+ SpvId sample;
+ SpvId tex_offset;
+ SpvId bindless;
+ SpvId min_lod;
+ bool sparse;
};
static inline SpvId
@@ -90,6 +109,10 @@ spirv_builder_emit_decoration(struct spirv_builder *b, SpvId target,
SpvDecoration decoration);
void
+spirv_builder_emit_rounding_mode(struct spirv_builder *b, SpvId target,
+ SpvFPRoundingMode rounding);
+
+void
spirv_builder_emit_input_attachment_index(struct spirv_builder *b, SpvId target, uint32_t id);
void
@@ -146,13 +169,16 @@ spirv_builder_emit_entry_point(struct spirv_builder *b,
SpvExecutionModel exec_model, SpvId entry_point,
const char *name, const SpvId interfaces[],
size_t num_interfaces);
-void
+uint32_t
spirv_builder_emit_exec_mode_literal(struct spirv_builder *b, SpvId entry_point,
SpvExecutionMode exec_mode, uint32_t param);
void
spirv_builder_emit_exec_mode_literal3(struct spirv_builder *b, SpvId entry_point,
SpvExecutionMode exec_mode, uint32_t param[3]);
void
+spirv_builder_emit_exec_mode_id3(struct spirv_builder *b, SpvId entry_point,
+ SpvExecutionMode exec_mode, SpvId param[3]);
+void
spirv_builder_emit_exec_mode(struct spirv_builder *b, SpvId entry_point,
SpvExecutionMode exec_mode);
@@ -178,12 +204,16 @@ SpvId
spirv_builder_emit_load(struct spirv_builder *b, SpvId result_type,
SpvId pointer);
+SpvId
+spirv_builder_emit_load_aligned(struct spirv_builder *b, SpvId result_type, SpvId pointer, unsigned alignment, bool coherent);
void
spirv_builder_emit_atomic_store(struct spirv_builder *b, SpvId pointer, SpvScope scope,
SpvMemorySemanticsMask semantics, SpvId object);
void
spirv_builder_emit_store(struct spirv_builder *b, SpvId pointer, SpvId object);
+void
+spirv_builder_emit_store_aligned(struct spirv_builder *b, SpvId pointer, SpvId object, unsigned alignment, bool coherent);
SpvId
spirv_builder_emit_access_chain(struct spirv_builder *b, SpvId result_type,
@@ -268,6 +298,15 @@ spirv_builder_set_phi_operand(struct spirv_builder *b, size_t position,
void
spirv_builder_emit_kill(struct spirv_builder *b);
+void
+spirv_builder_emit_terminate(struct spirv_builder *b);
+
+void
+spirv_builder_emit_demote(struct spirv_builder *b);
+
+SpvId
+spirv_is_helper_invocation(struct spirv_builder *b);
+
SpvId
spirv_builder_emit_vote(struct spirv_builder *b, SpvOp op, SpvId src);
@@ -275,15 +314,7 @@ SpvId
spirv_builder_emit_image_sample(struct spirv_builder *b,
SpvId result_type,
SpvId sampled_image,
- SpvId coordinate,
- bool proj,
- SpvId lod,
- SpvId bias,
- SpvId dref,
- SpvId dx,
- SpvId dy,
- SpvId const_offset,
- SpvId offset);
+ const struct spriv_tex_src *src);
SpvId
spirv_builder_emit_image(struct spirv_builder *b, SpvId result_type,
@@ -303,7 +334,8 @@ spirv_builder_emit_image_read(struct spirv_builder *b,
SpvId coordinate,
SpvId lod,
SpvId sample,
- SpvId offset);
+ SpvId offset,
+ bool sparse);
void
spirv_builder_emit_image_write(struct spirv_builder *b,
@@ -318,22 +350,13 @@ SpvId
spirv_builder_emit_image_fetch(struct spirv_builder *b,
SpvId result_type,
SpvId image,
- SpvId coordinate,
- SpvId lod,
- SpvId sample,
- SpvId const_offset,
- SpvId offset);
+ const struct spriv_tex_src *src);
SpvId
spirv_builder_emit_image_gather(struct spirv_builder *b,
SpvId result_type,
SpvId image,
- SpvId coordinate,
- SpvId component,
- SpvId lod,
- SpvId sample,
- SpvId const_offset,
- SpvId offset,
- SpvId dref);
+ const struct spriv_tex_src *src,
+ SpvId component);
SpvId
spirv_builder_emit_image_query_size(struct spirv_builder *b,
@@ -379,6 +402,10 @@ spirv_builder_type_image(struct spirv_builder *b, SpvId sampled_type,
SpvId
spirv_builder_type_sampled_image(struct spirv_builder *b, SpvId image_type);
+SpvId
+spirv_builder_type_sampler(struct spirv_builder *b);
+SpvId
+spirv_builder_emit_sampled_image(struct spirv_builder *b, SpvId result_type, SpvId image, SpvId sampler);
SpvId
spirv_builder_type_pointer(struct spirv_builder *b,
@@ -409,6 +436,11 @@ spirv_builder_type_function(struct spirv_builder *b, SpvId return_type,
size_t num_parameter_types);
SpvId
+spirv_builder_function_call(struct spirv_builder *b, SpvId result_type,
+ SpvId function, const SpvId arguments[],
+ size_t num_arguments);
+
+SpvId
spirv_builder_const_bool(struct spirv_builder *b, bool val);
SpvId
@@ -451,10 +483,13 @@ spirv_builder_get_num_words(struct spirv_builder *b);
size_t
spirv_builder_get_words(struct spirv_builder *b, uint32_t *words,
- size_t num_words, uint32_t spirv_version);
+ size_t num_words, uint32_t spirv_version,
+ uint32_t *tcs_vertices_out_word);
void
-spirv_builder_emit_vertex(struct spirv_builder *b, uint32_t stream);
+spirv_builder_emit_vertex(struct spirv_builder *b, uint32_t stream, bool multistream);
+void
+spirv_builder_end_primitive(struct spirv_builder *b, uint32_t stream, bool multistream);
void
-spirv_builder_end_primitive(struct spirv_builder *b, uint32_t stream);
+spirv_builder_begin_local_vars(struct spirv_builder *b);
#endif
diff --git a/src/gallium/drivers/zink/nir_to_spirv/zink_nir_algebraic.py b/src/gallium/drivers/zink/nir_to_spirv/zink_nir_algebraic.py
index af2419cf9d4..20ed4cfb565 100644
--- a/src/gallium/drivers/zink/nir_to_spirv/zink_nir_algebraic.py
+++ b/src/gallium/drivers/zink/nir_to_spirv/zink_nir_algebraic.py
@@ -25,7 +25,7 @@ import sys
lower_b2b = [
(('b2b32', 'a'), ('b2i32', 'a')),
- (('b2b1', 'a'), ('i2b1', 'a')),
+ (('b2b1', 'a'), ('ine', 'a', 0)),
]
def main():
diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c
index f60590ca25e..6da1d571c74 100644
--- a/src/gallium/drivers/zink/zink_batch.c
+++ b/src/gallium/drivers/zink/zink_batch.c
@@ -1,23 +1,19 @@
#include "zink_batch.h"
-
#include "zink_context.h"
-#include "zink_fence.h"
+#include "zink_descriptors.h"
#include "zink_framebuffer.h"
-#include "zink_query.h"
+#include "zink_kopper.h"
#include "zink_program.h"
-#include "zink_render_pass.h"
+#include "zink_query.h"
#include "zink_resource.h"
#include "zink_screen.h"
#include "zink_surface.h"
-#include "util/hash_table.h"
-#include "util/u_debug.h"
-#include "util/set.h"
-
#ifdef VK_USE_PLATFORM_METAL_EXT
#include "QuartzCore/CAMetalLayer.h"
#endif
-#include "wsi_common.h"
+
+#define MAX_VIEW_COUNT 500
void
debug_describe_zink_batch_state(char *buf, const struct zink_batch_state *ptr)
@@ -25,91 +21,221 @@ debug_describe_zink_batch_state(char *buf, const struct zink_batch_state *ptr)
sprintf(buf, "zink_batch_state");
}
+/* this resets the batch usage and tracking for a resource object */
+static void
+reset_obj(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_resource_object *obj)
+{
+ /* if no batch usage exists after removing the usage from 'bs', this resource is considered fully idle */
+ if (!zink_resource_object_usage_unset(obj, bs)) {
+ /* the resource is idle, so reset all access/reordering info */
+ obj->unordered_read = true;
+ obj->unordered_write = true;
+ obj->access = 0;
+ obj->unordered_access = 0;
+ obj->last_write = 0;
+ obj->access_stage = 0;
+ obj->unordered_access_stage = 0;
+ obj->copies_need_reset = true;
+ obj->unsync_access = true;
+ /* also prune dead view objects */
+ simple_mtx_lock(&obj->view_lock);
+ if (obj->is_buffer) {
+ while (util_dynarray_contains(&obj->views, VkBufferView))
+ VKSCR(DestroyBufferView)(screen->dev, util_dynarray_pop(&obj->views, VkBufferView), NULL);
+ } else {
+ while (util_dynarray_contains(&obj->views, VkImageView))
+ VKSCR(DestroyImageView)(screen->dev, util_dynarray_pop(&obj->views, VkImageView), NULL);
+ }
+ obj->view_prune_count = 0;
+ obj->view_prune_timeline = 0;
+ simple_mtx_unlock(&obj->view_lock);
+ if (obj->dt)
+ zink_kopper_prune_batch_usage(obj->dt, &bs->usage);
+ } else if (util_dynarray_num_elements(&obj->views, VkBufferView) > MAX_VIEW_COUNT && !zink_bo_has_unflushed_usage(obj->bo)) {
+ /* avoid ballooning from too many views on always-used resources: */
+ simple_mtx_lock(&obj->view_lock);
+ /* ensure no existing view pruning is queued, double check elements in case pruning just finished */
+ if (!obj->view_prune_timeline && util_dynarray_num_elements(&obj->views, VkBufferView) > MAX_VIEW_COUNT) {
+ /* prune all existing views */
+ obj->view_prune_count = util_dynarray_num_elements(&obj->views, VkBufferView);
+ /* prune them when the views will definitely not be in use */
+ obj->view_prune_timeline = MAX2(obj->bo->reads.u ? obj->bo->reads.u->usage : 0,
+ obj->bo->writes.u ? obj->bo->writes.u->usage : 0);
+ }
+ simple_mtx_unlock(&obj->view_lock);
+ }
+ /* resource objects are not unrefed here;
+ * this is typically the last ref on a resource object, and destruction will
+ * usually trigger an ioctl, so defer deletion to the submit thread to avoid blocking
+ */
+ util_dynarray_append(&bs->unref_resources, struct zink_resource_object*, obj);
+}
+
+/* reset all the resource objects in a given batch object list */
+static void
+reset_obj_list(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_batch_obj_list *list)
+{
+ for (unsigned i = 0; i < list->num_buffers; i++)
+ reset_obj(screen, bs, list->objs[i]);
+ list->num_buffers = 0;
+}
+
+/* reset a given batch state */
void
zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
- if (VKSCR(ResetCommandPool)(screen->dev, bs->cmdpool, 0) != VK_SUCCESS)
- debug_printf("vkResetCommandPool failed\n");
+ VkResult result = VKSCR(ResetCommandPool)(screen->dev, bs->cmdpool, 0);
+ if (result != VK_SUCCESS)
+ mesa_loge("ZINK: vkResetCommandPool failed (%s)", vk_Result_to_str(result));
+ result = VKSCR(ResetCommandPool)(screen->dev, bs->unsynchronized_cmdpool, 0);
+ if (result != VK_SUCCESS)
+ mesa_loge("ZINK: vkResetCommandPool failed (%s)", vk_Result_to_str(result));
+
+ /* unref/reset all used resources */
+ reset_obj_list(screen, bs, &bs->real_objs);
+ reset_obj_list(screen, bs, &bs->slab_objs);
+ reset_obj_list(screen, bs, &bs->sparse_objs);
+ while (util_dynarray_contains(&bs->swapchain_obj, struct zink_resource_object*)) {
+ struct zink_resource_object *obj = util_dynarray_pop(&bs->swapchain_obj, struct zink_resource_object*);
+ reset_obj(screen, bs, obj);
+ }
- /* unref all used resources */
- set_foreach_remove(bs->resources, entry) {
- struct zink_resource_object *obj = (struct zink_resource_object *)entry->key;
- if (!zink_resource_object_usage_unset(obj, bs)) {
- obj->unordered_barrier = false;
- obj->access = 0;
- obj->access_stage = 0;
+ /* this is where bindless texture/buffer ids get recycled */
+ for (unsigned i = 0; i < 2; i++) {
+ while (util_dynarray_contains(&bs->bindless_releases[i], uint32_t)) {
+ uint32_t handle = util_dynarray_pop(&bs->bindless_releases[i], uint32_t);
+ bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle);
+ struct util_idalloc *ids = i ? &ctx->di.bindless[is_buffer].img_slots : &ctx->di.bindless[is_buffer].tex_slots;
+ util_idalloc_free(ids, is_buffer ? handle - ZINK_MAX_BINDLESS_HANDLES : handle);
}
- util_dynarray_append(&bs->unref_resources, struct zink_resource_object*, obj);
}
- set_foreach_remove(bs->active_queries, entry) {
+ /* queries must only be destroyed once they are inactive */
+ set_foreach_remove(&bs->active_queries, entry) {
struct zink_query *query = (void*)entry->key;
- zink_prune_query(screen, bs, query);
- }
-
- set_foreach_remove(bs->surfaces, entry) {
- struct zink_surface *surf = (struct zink_surface *)entry->key;
- zink_batch_usage_unset(&surf->batch_uses, bs);
- zink_surface_reference(screen, &surf, NULL);
+ zink_prune_query(bs, query);
}
- set_foreach_remove(bs->bufferviews, entry) {
- struct zink_buffer_view *buffer_view = (struct zink_buffer_view *)entry->key;
- zink_batch_usage_unset(&buffer_view->batch_uses, bs);
- zink_buffer_view_reference(screen, &buffer_view, NULL);
- }
-
- util_dynarray_foreach(&bs->dead_framebuffers, struct zink_framebuffer*, fb) {
- zink_framebuffer_reference(screen, fb, NULL);
- }
- util_dynarray_clear(&bs->dead_framebuffers);
+ util_dynarray_foreach(&bs->dead_querypools, VkQueryPool, pool)
+ VKSCR(DestroyQueryPool)(screen->dev, *pool, NULL);
+ util_dynarray_clear(&bs->dead_querypools);
+
+ util_dynarray_foreach(&bs->dgc.pipelines, VkPipeline, pipeline)
+ VKSCR(DestroyPipeline)(screen->dev, *pipeline, NULL);
+ util_dynarray_clear(&bs->dgc.pipelines);
+ util_dynarray_foreach(&bs->dgc.layouts, VkIndirectCommandsLayoutNV, iclayout)
+ VKSCR(DestroyIndirectCommandsLayoutNV)(screen->dev, *iclayout, NULL);
+ util_dynarray_clear(&bs->dgc.layouts);
+
+ /* samplers are appended to the batch state in which they are destroyed
+ * to ensure deferred deletion without destroying in-use objects
+ */
util_dynarray_foreach(&bs->zombie_samplers, VkSampler, samp) {
VKSCR(DestroySampler)(screen->dev, *samp, NULL);
}
util_dynarray_clear(&bs->zombie_samplers);
- util_dynarray_clear(&bs->persistent_resources);
- screen->batch_descriptor_reset(screen, bs);
+ zink_batch_descriptor_reset(screen, bs);
+
+ util_dynarray_foreach(&bs->freed_sparse_backing_bos, struct zink_bo, bo) {
+ zink_bo_unref(screen, bo);
+ }
+ util_dynarray_clear(&bs->freed_sparse_backing_bos);
- set_foreach_remove(bs->programs, entry) {
+ /* programs are refcounted and batch-tracked */
+ set_foreach_remove(&bs->programs, entry) {
struct zink_program *pg = (struct zink_program*)entry->key;
zink_batch_usage_unset(&pg->batch_uses, bs);
- if (pg->is_compute) {
- struct zink_compute_program *comp = (struct zink_compute_program*)pg;
- zink_compute_program_reference(screen, &comp, NULL);
- } else {
- struct zink_gfx_program *prog = (struct zink_gfx_program*)pg;
- zink_gfx_program_reference(screen, &prog, NULL);
- }
+ zink_program_reference(screen, &pg, NULL);
}
- pipe_resource_reference(&bs->flush_res, NULL);
-
bs->resource_size = 0;
+ bs->signal_semaphore = VK_NULL_HANDLE;
+ util_dynarray_clear(&bs->wait_semaphore_stages);
+
+ bs->present = VK_NULL_HANDLE;
+ /* check the arrays first to avoid locking unnecessarily */
+ if (util_dynarray_contains(&bs->acquires, VkSemaphore) || util_dynarray_contains(&bs->wait_semaphores, VkSemaphore)) {
+ simple_mtx_lock(&screen->semaphores_lock);
+ util_dynarray_append_dynarray(&screen->semaphores, &bs->acquires);
+ util_dynarray_clear(&bs->acquires);
+ util_dynarray_append_dynarray(&screen->semaphores, &bs->wait_semaphores);
+ util_dynarray_clear(&bs->wait_semaphores);
+ simple_mtx_unlock(&screen->semaphores_lock);
+ }
+ if (util_dynarray_contains(&bs->signal_semaphores, VkSemaphore) || util_dynarray_contains(&bs->fd_wait_semaphores, VkSemaphore)) {
+ simple_mtx_lock(&screen->semaphores_lock);
+ util_dynarray_append_dynarray(&screen->fd_semaphores, &bs->signal_semaphores);
+ util_dynarray_clear(&bs->signal_semaphores);
+ util_dynarray_append_dynarray(&screen->fd_semaphores, &bs->fd_wait_semaphores);
+ util_dynarray_clear(&bs->fd_wait_semaphores);
+ simple_mtx_unlock(&screen->semaphores_lock);
+ }
+ bs->swapchain = NULL;
+
+ util_dynarray_foreach(&bs->fences, struct zink_tc_fence*, mfence)
+ zink_fence_reference(screen, mfence, NULL);
+ util_dynarray_clear(&bs->fences);
+
+ bs->unordered_write_access = VK_ACCESS_NONE;
+ bs->unordered_write_stages = VK_PIPELINE_STAGE_NONE;
+ /* only increment batch generation if previously in-use to avoid false detection of batch completion */
+ if (bs->fence.submitted)
+ bs->usage.submit_count++;
/* only reset submitted here so that tc fence desync can pick up the 'completed' flag
* before the state is reused
*/
bs->fence.submitted = false;
bs->has_barriers = false;
- bs->scanout_flush = false;
+ bs->has_unsync = false;
if (bs->fence.batch_id)
zink_screen_update_last_finished(screen, bs->fence.batch_id);
- bs->submit_count++;
bs->fence.batch_id = 0;
bs->usage.usage = 0;
+ bs->next = NULL;
+ bs->last_added_obj = NULL;
}
+/* this is where deferred resource unrefs occur */
static void
unref_resources(struct zink_screen *screen, struct zink_batch_state *bs)
{
while (util_dynarray_contains(&bs->unref_resources, struct zink_resource_object*)) {
struct zink_resource_object *obj = util_dynarray_pop(&bs->unref_resources, struct zink_resource_object*);
+ /* view pruning may be deferred to avoid ballooning */
+ if (obj->view_prune_timeline && zink_screen_check_last_finished(screen, obj->view_prune_timeline)) {
+ simple_mtx_lock(&obj->view_lock);
+ /* check again under lock in case multi-context use is in the same place */
+ if (obj->view_prune_timeline && zink_screen_check_last_finished(screen, obj->view_prune_timeline)) {
+ /* prune `view_prune_count` views */
+ if (obj->is_buffer) {
+ VkBufferView *views = obj->views.data;
+ for (unsigned i = 0; i < obj->view_prune_count; i++)
+ VKSCR(DestroyBufferView)(screen->dev, views[i], NULL);
+ } else {
+ VkImageView *views = obj->views.data;
+ for (unsigned i = 0; i < obj->view_prune_count; i++)
+ VKSCR(DestroyImageView)(screen->dev, views[i], NULL);
+ }
+ size_t offset = obj->view_prune_count * sizeof(VkBufferView);
+ uint8_t *data = obj->views.data;
+ /* shift the view array to the start */
+ memcpy(data, data + offset, obj->views.size - offset);
+ /* adjust the array size */
+ obj->views.size -= offset;
+ obj->view_prune_count = 0;
+ obj->view_prune_timeline = 0;
+ }
+ simple_mtx_unlock(&obj->view_lock);
+ }
+ /* this is typically where resource objects get destroyed */
zink_resource_object_reference(screen, &obj, NULL);
}
}
+/* utility for resetting a batch state; called on context destruction */
void
zink_clear_batch_state(struct zink_context *ctx, struct zink_batch_state *bs)
{
@@ -118,20 +244,37 @@ zink_clear_batch_state(struct zink_context *ctx, struct zink_batch_state *bs)
unref_resources(zink_screen(ctx->base.screen), bs);
}
+/* utility for managing the singly-linked batch state list */
+static void
+pop_batch_state(struct zink_context *ctx)
+{
+ const struct zink_batch_state *bs = ctx->batch_states;
+ ctx->batch_states = bs->next;
+ ctx->batch_states_count--;
+ if (ctx->last_batch_state == bs)
+ ctx->last_batch_state = NULL;
+}
+
+/* reset all batch states and append to the free state list
+ * only usable after a full stall
+ */
void
zink_batch_reset_all(struct zink_context *ctx)
{
- simple_mtx_lock(&ctx->batch_mtx);
- hash_table_foreach(&ctx->batch_states, entry) {
- struct zink_batch_state *bs = entry->data;
+ while (ctx->batch_states) {
+ struct zink_batch_state *bs = ctx->batch_states;
bs->fence.completed = true;
+ pop_batch_state(ctx);
zink_reset_batch_state(ctx, bs);
- _mesa_hash_table_remove(&ctx->batch_states, entry);
- util_dynarray_append(&ctx->free_batch_states, struct zink_batch_state *, bs);
+ if (ctx->last_free_batch_state)
+ ctx->last_free_batch_state->next = bs;
+ else
+ ctx->free_batch_states = bs;
+ ctx->last_free_batch_state = bs;
}
- simple_mtx_unlock(&ctx->batch_mtx);
}
+/* called only on context destruction */
void
zink_batch_state_destroy(struct zink_screen *screen, struct zink_batch_state *bs)
{
@@ -143,79 +286,131 @@ zink_batch_state_destroy(struct zink_screen *screen, struct zink_batch_state *bs
cnd_destroy(&bs->usage.flush);
mtx_destroy(&bs->usage.mtx);
- if (bs->fence.fence)
- VKSCR(DestroyFence)(screen->dev, bs->fence.fence, NULL);
-
if (bs->cmdbuf)
VKSCR(FreeCommandBuffers)(screen->dev, bs->cmdpool, 1, &bs->cmdbuf);
- if (bs->barrier_cmdbuf)
- VKSCR(FreeCommandBuffers)(screen->dev, bs->cmdpool, 1, &bs->barrier_cmdbuf);
+ if (bs->reordered_cmdbuf)
+ VKSCR(FreeCommandBuffers)(screen->dev, bs->cmdpool, 1, &bs->reordered_cmdbuf);
if (bs->cmdpool)
VKSCR(DestroyCommandPool)(screen->dev, bs->cmdpool, NULL);
-
+ if (bs->unsynchronized_cmdbuf)
+ VKSCR(FreeCommandBuffers)(screen->dev, bs->unsynchronized_cmdpool, 1, &bs->unsynchronized_cmdbuf);
+ if (bs->unsynchronized_cmdpool)
+ VKSCR(DestroyCommandPool)(screen->dev, bs->unsynchronized_cmdpool, NULL);
+ free(bs->real_objs.objs);
+ free(bs->slab_objs.objs);
+ free(bs->sparse_objs.objs);
+ util_dynarray_fini(&bs->freed_sparse_backing_bos);
+ util_dynarray_fini(&bs->dead_querypools);
+ util_dynarray_fini(&bs->dgc.pipelines);
+ util_dynarray_fini(&bs->dgc.layouts);
+ util_dynarray_fini(&bs->swapchain_obj);
util_dynarray_fini(&bs->zombie_samplers);
- util_dynarray_fini(&bs->dead_framebuffers);
util_dynarray_fini(&bs->unref_resources);
- _mesa_set_destroy(bs->surfaces, NULL);
- _mesa_set_destroy(bs->bufferviews, NULL);
- _mesa_set_destroy(bs->programs, NULL);
- _mesa_set_destroy(bs->active_queries, NULL);
- screen->batch_descriptor_deinit(screen, bs);
+ util_dynarray_fini(&bs->bindless_releases[0]);
+ util_dynarray_fini(&bs->bindless_releases[1]);
+ util_dynarray_fini(&bs->acquires);
+ util_dynarray_fini(&bs->acquire_flags);
+ unsigned num_mfences = util_dynarray_num_elements(&bs->fence.mfences, void *);
+ struct zink_tc_fence **mfence = bs->fence.mfences.data;
+ for (unsigned i = 0; i < num_mfences; i++) {
+ mfence[i]->fence = NULL;
+ }
+ util_dynarray_fini(&bs->fence.mfences);
+ zink_batch_descriptor_deinit(screen, bs);
ralloc_free(bs);
}
+/* batch states are created:
+ * - on context creation
+ * - dynamically up to a threshold if no free ones are available
+ */
static struct zink_batch_state *
create_batch_state(struct zink_context *ctx)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
struct zink_batch_state *bs = rzalloc(NULL, struct zink_batch_state);
- bs->have_timelines = ctx->have_timelines;
VkCommandPoolCreateInfo cpci = {0};
cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cpci.queueFamilyIndex = screen->gfx_queue;
- cpci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
- if (VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->cmdpool) != VK_SUCCESS)
- goto fail;
+ VkResult result;
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->cmdpool),
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result));
+ goto fail;
+ }
+ );
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->unsynchronized_cmdpool),
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result));
+ goto fail;
+ }
+ );
+
+ VkCommandBuffer cmdbufs[2];
VkCommandBufferAllocateInfo cbai = {0};
cbai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
- cbai.commandPool = bs->cmdpool;
cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
- cbai.commandBufferCount = 1;
+ cbai.commandPool = bs->cmdpool;
+ cbai.commandBufferCount = 2;
- if (VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->cmdbuf) != VK_SUCCESS)
- goto fail;
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, cmdbufs),
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result));
+ goto fail;
+ }
+ );
- if (VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->barrier_cmdbuf) != VK_SUCCESS)
- goto fail;
+ bs->cmdbuf = cmdbufs[0];
+ bs->reordered_cmdbuf = cmdbufs[1];
+
+ cbai.commandPool = bs->unsynchronized_cmdpool;
+ cbai.commandBufferCount = 1;
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->unsynchronized_cmdbuf);,
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result));
+ goto fail;
+ }
+ );
#define SET_CREATE_OR_FAIL(ptr) \
- ptr = _mesa_pointer_set_create(bs); \
- if (!ptr) \
+ if (!_mesa_set_init(ptr, bs, _mesa_hash_pointer, _mesa_key_pointer_equal)) \
goto fail
bs->ctx = ctx;
- SET_CREATE_OR_FAIL(bs->resources);
- SET_CREATE_OR_FAIL(bs->surfaces);
- SET_CREATE_OR_FAIL(bs->bufferviews);
- SET_CREATE_OR_FAIL(bs->programs);
- SET_CREATE_OR_FAIL(bs->active_queries);
+ SET_CREATE_OR_FAIL(&bs->programs);
+ SET_CREATE_OR_FAIL(&bs->active_queries);
+ SET_CREATE_OR_FAIL(&bs->dmabuf_exports);
+ util_dynarray_init(&bs->signal_semaphores, NULL);
+ util_dynarray_init(&bs->wait_semaphores, NULL);
+ util_dynarray_init(&bs->fd_wait_semaphores, NULL);
+ util_dynarray_init(&bs->fences, NULL);
+ util_dynarray_init(&bs->dead_querypools, NULL);
+ util_dynarray_init(&bs->dgc.pipelines, NULL);
+ util_dynarray_init(&bs->dgc.layouts, NULL);
+ util_dynarray_init(&bs->wait_semaphore_stages, NULL);
+ util_dynarray_init(&bs->fd_wait_semaphore_stages, NULL);
util_dynarray_init(&bs->zombie_samplers, NULL);
- util_dynarray_init(&bs->dead_framebuffers, NULL);
- util_dynarray_init(&bs->persistent_resources, NULL);
+ util_dynarray_init(&bs->freed_sparse_backing_bos, NULL);
util_dynarray_init(&bs->unref_resources, NULL);
+ util_dynarray_init(&bs->acquires, NULL);
+ util_dynarray_init(&bs->acquire_flags, NULL);
+ util_dynarray_init(&bs->bindless_releases[0], NULL);
+ util_dynarray_init(&bs->bindless_releases[1], NULL);
+ util_dynarray_init(&bs->swapchain_obj, NULL);
+ util_dynarray_init(&bs->fence.mfences, NULL);
cnd_init(&bs->usage.flush);
mtx_init(&bs->usage.mtx, mtx_plain);
+ simple_mtx_init(&bs->exportable_lock, mtx_plain);
+ memset(&bs->buffer_indices_hashlist, -1, sizeof(bs->buffer_indices_hashlist));
- if (!screen->batch_descriptor_init(screen, bs))
- goto fail;
-
- VkFenceCreateInfo fci = {0};
- fci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
-
- if (VKSCR(CreateFence)(screen->dev, &fci, NULL, &bs->fence.fence) != VK_SUCCESS)
+ if (!zink_batch_descriptor_init(screen, bs))
goto fail;
util_queue_fence_init(&bs->flush_completed);
@@ -226,65 +421,78 @@ fail:
return NULL;
}
+/* a batch state is considered "free" if it is both submitted and completed */
static inline bool
-find_unused_state(struct hash_entry *entry)
+find_unused_state(struct zink_batch_state *bs)
{
- struct zink_fence *fence = entry->data;
+ struct zink_fence *fence = &bs->fence;
/* we can't reset these from fence_finish because threads */
bool completed = p_atomic_read(&fence->completed);
bool submitted = p_atomic_read(&fence->submitted);
return submitted && completed;
}
+/* find a "free" batch state */
static struct zink_batch_state *
get_batch_state(struct zink_context *ctx, struct zink_batch *batch)
{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
struct zink_batch_state *bs = NULL;
- simple_mtx_lock(&ctx->batch_mtx);
- if (util_dynarray_num_elements(&ctx->free_batch_states, struct zink_batch_state*))
- bs = util_dynarray_pop(&ctx->free_batch_states, struct zink_batch_state*);
+ /* try from the ones that are known to be free first */
+ if (ctx->free_batch_states) {
+ bs = ctx->free_batch_states;
+ ctx->free_batch_states = bs->next;
+ if (bs == ctx->last_free_batch_state)
+ ctx->last_free_batch_state = NULL;
+ }
+ /* try from the ones that are given back to the screen next */
if (!bs) {
- hash_table_foreach(&ctx->batch_states, he) {
- struct zink_fence *fence = he->data;
- if (zink_screen_check_last_finished(zink_screen(ctx->base.screen), fence->batch_id) || find_unused_state(he)) {
- bs = he->data;
- _mesa_hash_table_remove(&ctx->batch_states, he);
- break;
- }
+ simple_mtx_lock(&screen->free_batch_states_lock);
+ if (screen->free_batch_states) {
+ bs = screen->free_batch_states;
+ bs->ctx = ctx;
+ screen->free_batch_states = bs->next;
+ if (bs == screen->last_free_batch_state)
+ screen->last_free_batch_state = NULL;
+ }
+ simple_mtx_unlock(&screen->free_batch_states_lock);
+ }
+ /* states are stored sequentially, so if the first one doesn't work, none of them will */
+ if (!bs && ctx->batch_states && ctx->batch_states->next) {
+ /* only a submitted state can be reused */
+ if (p_atomic_read(&ctx->batch_states->fence.submitted) &&
+ /* a submitted state must have completed before it can be reused */
+ (zink_screen_check_last_finished(screen, ctx->batch_states->fence.batch_id) ||
+ p_atomic_read(&ctx->batch_states->fence.completed))) {
+ bs = ctx->batch_states;
+ pop_batch_state(ctx);
}
}
- simple_mtx_unlock(&ctx->batch_mtx);
if (bs) {
- if (bs->fence.submitted && !bs->fence.completed)
- /* this fence is already done, so we need vulkan to release the cmdbuf */
- zink_vkfence_wait(zink_screen(ctx->base.screen), &bs->fence, PIPE_TIMEOUT_INFINITE);
zink_reset_batch_state(ctx, bs);
} else {
if (!batch->state) {
/* this is batch init, so create a few more states for later use */
for (int i = 0; i < 3; i++) {
struct zink_batch_state *state = create_batch_state(ctx);
- util_dynarray_append(&ctx->free_batch_states, struct zink_batch_state *, state);
+ if (ctx->last_free_batch_state)
+ ctx->last_free_batch_state->next = state;
+ else
+ ctx->free_batch_states = state;
+ ctx->last_free_batch_state = state;
}
}
+ /* no batch states were available: make a new one */
bs = create_batch_state(ctx);
}
return bs;
}
+/* reset the batch object: get a new state and unset 'has_work' to disable flushing */
void
zink_reset_batch(struct zink_context *ctx, struct zink_batch *batch)
{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
-
- if (ctx->have_timelines && screen->last_finished > ctx->curr_batch && ctx->curr_batch == 1) {
- if (!zink_screen_init_semaphore(screen)) {
- debug_printf("timeline init failed, things are about to go dramatically wrong.");
- ctx->have_timelines = false;
- }
- }
-
batch->state = get_batch_state(ctx, batch);
assert(batch->state);
@@ -292,8 +500,34 @@ zink_reset_batch(struct zink_context *ctx, struct zink_batch *batch)
}
void
+zink_batch_bind_db(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_batch *batch = &ctx->batch;
+ unsigned count = 1;
+ VkDescriptorBufferBindingInfoEXT infos[2] = {0};
+ infos[0].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
+ infos[0].address = batch->state->dd.db->obj->bda;
+ infos[0].usage = batch->state->dd.db->obj->vkusage;
+ assert(infos[0].usage);
+
+ if (ctx->dd.bindless_init) {
+ infos[1].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
+ infos[1].address = ctx->dd.db.bindless_db->obj->bda;
+ infos[1].usage = ctx->dd.db.bindless_db->obj->vkusage;
+ assert(infos[1].usage);
+ count++;
+ }
+ VKSCR(CmdBindDescriptorBuffersEXT)(batch->state->cmdbuf, count, infos);
+ VKSCR(CmdBindDescriptorBuffersEXT)(batch->state->reordered_cmdbuf, count, infos);
+ batch->state->dd.db_bound = true;
+}
+
+/* called on context creation and after flushing an old batch */
+void
zink_start_batch(struct zink_context *ctx, struct zink_batch *batch)
{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
zink_reset_batch(ctx, batch);
batch->state->usage.unflushed = true;
@@ -301,109 +535,240 @@ zink_start_batch(struct zink_context *ctx, struct zink_batch *batch)
VkCommandBufferBeginInfo cbbi = {0};
cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
- if (VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi) != VK_SUCCESS)
- debug_printf("vkBeginCommandBuffer failed\n");
- if (VKCTX(BeginCommandBuffer)(batch->state->barrier_cmdbuf, &cbbi) != VK_SUCCESS)
- debug_printf("vkBeginCommandBuffer failed\n");
- batch->state->fence.batch_id = ctx->curr_batch;
+ VkResult result;
+ VRAM_ALLOC_LOOP(result,
+ VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi),
+ if (result != VK_SUCCESS)
+ mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
+ );
+ VRAM_ALLOC_LOOP(result,
+ VKCTX(BeginCommandBuffer)(batch->state->reordered_cmdbuf, &cbbi),
+ if (result != VK_SUCCESS)
+ mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
+ );
+ VRAM_ALLOC_LOOP(result,
+ VKCTX(BeginCommandBuffer)(batch->state->unsynchronized_cmdbuf, &cbbi),
+ if (result != VK_SUCCESS)
+ mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
+ );
+
batch->state->fence.completed = false;
- if (ctx->last_fence) {
- struct zink_batch_state *last_state = zink_batch_state(ctx->last_fence);
+ if (ctx->last_batch_state) {
+ struct zink_batch_state *last_state = ctx->last_batch_state;
batch->last_batch_usage = &last_state->usage;
}
- if (!ctx->queries_disabled)
- zink_resume_queries(ctx, batch);
+#ifdef HAVE_RENDERDOC_APP_H
+ if (VKCTX(CmdInsertDebugUtilsLabelEXT) && screen->renderdoc_api) {
+ VkDebugUtilsLabelEXT capture_label;
+ /* Magic fallback which lets us bridge the Wine barrier over to Linux RenderDoc. */
+ capture_label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
+ capture_label.pNext = NULL;
+ capture_label.pLabelName = "vr-marker,frame_end,type,application";
+ memset(capture_label.color, 0, sizeof(capture_label.color));
+ VKCTX(CmdInsertDebugUtilsLabelEXT)(batch->state->unsynchronized_cmdbuf, &capture_label);
+ VKCTX(CmdInsertDebugUtilsLabelEXT)(batch->state->reordered_cmdbuf, &capture_label);
+ VKCTX(CmdInsertDebugUtilsLabelEXT)(batch->state->cmdbuf, &capture_label);
+ }
+
+ unsigned renderdoc_frame = p_atomic_read(&screen->renderdoc_frame);
+ if (!(ctx->flags & ZINK_CONTEXT_COPY_ONLY) && screen->renderdoc_api && !screen->renderdoc_capturing &&
+ ((screen->renderdoc_capture_all && screen->screen_id == 1) || (renderdoc_frame >= screen->renderdoc_capture_start && renderdoc_frame <= screen->renderdoc_capture_end))) {
+ screen->renderdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(screen->instance), NULL);
+ screen->renderdoc_capturing = true;
+ }
+#endif
+
+ /* descriptor buffers must always be bound at the start of a batch */
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && !(ctx->flags & ZINK_CONTEXT_COPY_ONLY))
+ zink_batch_bind_db(ctx);
+ /* zero init for unordered blits */
+ if (screen->info.have_EXT_attachment_feedback_loop_dynamic_state) {
+ VKCTX(CmdSetAttachmentFeedbackLoopEnableEXT)(ctx->batch.state->cmdbuf, 0);
+ VKCTX(CmdSetAttachmentFeedbackLoopEnableEXT)(ctx->batch.state->reordered_cmdbuf, 0);
+ VKCTX(CmdSetAttachmentFeedbackLoopEnableEXT)(ctx->batch.state->unsynchronized_cmdbuf, 0);
+ }
}
+/* common operations to run post submit; split out for clarity */
static void
post_submit(void *data, void *gdata, int thread_index)
{
struct zink_batch_state *bs = data;
+ struct zink_screen *screen = zink_screen(bs->ctx->base.screen);
if (bs->is_device_lost) {
if (bs->ctx->reset.reset)
bs->ctx->reset.reset(bs->ctx->reset.data, PIPE_GUILTY_CONTEXT_RESET);
- zink_screen(bs->ctx->base.screen)->device_lost = true;
+ else if (screen->abort_on_hang && !screen->robust_ctx_count)
+ /* if nothing can save us, abort */
+ abort();
+ screen->device_lost = true;
+ } else if (bs->ctx->batch_states_count > 5000) {
+ /* throttle in case something crazy is happening */
+ zink_screen_timeline_wait(screen, bs->fence.batch_id - 2500, OS_TIMEOUT_INFINITE);
}
+ /* this resets the buffer hashlist for the state's next use */
+ memset(&bs->buffer_indices_hashlist, -1, sizeof(bs->buffer_indices_hashlist));
}
+typedef enum {
+ ZINK_SUBMIT_WAIT_ACQUIRE,
+ ZINK_SUBMIT_WAIT_FD,
+ ZINK_SUBMIT_CMDBUF,
+ ZINK_SUBMIT_SIGNAL,
+ ZINK_SUBMIT_MAX
+} zink_submit;
+
static void
submit_queue(void *data, void *gdata, int thread_index)
{
struct zink_batch_state *bs = data;
struct zink_context *ctx = bs->ctx;
struct zink_screen *screen = zink_screen(ctx->base.screen);
- VkSubmitInfo si = {0};
-
- simple_mtx_lock(&ctx->batch_mtx);
+ VkSubmitInfo si[ZINK_SUBMIT_MAX] = {0};
+ VkSubmitInfo *submit = si;
+ int num_si = ZINK_SUBMIT_MAX;
while (!bs->fence.batch_id)
- bs->fence.batch_id = p_atomic_inc_return(&screen->curr_batch);
- _mesa_hash_table_insert_pre_hashed(&ctx->batch_states, bs->fence.batch_id, (void*)(uintptr_t)bs->fence.batch_id, bs);
+ bs->fence.batch_id = (uint32_t)p_atomic_inc_return(&screen->curr_batch);
bs->usage.usage = bs->fence.batch_id;
bs->usage.unflushed = false;
- simple_mtx_unlock(&ctx->batch_mtx);
-
- VKSCR(ResetFences)(screen->dev, 1, &bs->fence.fence);
uint64_t batch_id = bs->fence.batch_id;
- si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
- si.waitSemaphoreCount = 0;
- si.pWaitSemaphores = NULL;
- si.signalSemaphoreCount = 0;
- si.pSignalSemaphores = NULL;
- si.pWaitDstStageMask = NULL;
- si.commandBufferCount = bs->has_barriers ? 2 : 1;
- VkCommandBuffer cmdbufs[2] = {
- bs->barrier_cmdbuf,
- bs->cmdbuf,
- };
- si.pCommandBuffers = bs->has_barriers ? cmdbufs : &cmdbufs[1];
+ /* first submit is just for acquire waits since they have a separate array */
+ for (unsigned i = 0; i < ARRAY_SIZE(si); i++)
+ si[i].sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ si[ZINK_SUBMIT_WAIT_ACQUIRE].waitSemaphoreCount = util_dynarray_num_elements(&bs->acquires, VkSemaphore);
+ si[ZINK_SUBMIT_WAIT_ACQUIRE].pWaitSemaphores = bs->acquires.data;
+ while (util_dynarray_num_elements(&bs->acquire_flags, VkPipelineStageFlags) < si[ZINK_SUBMIT_WAIT_ACQUIRE].waitSemaphoreCount) {
+ VkPipelineStageFlags mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ util_dynarray_append(&bs->acquire_flags, VkPipelineStageFlags, mask);
+ }
+ assert(util_dynarray_num_elements(&bs->acquires, VkSemaphore) <= util_dynarray_num_elements(&bs->acquire_flags, VkPipelineStageFlags));
+ si[ZINK_SUBMIT_WAIT_ACQUIRE].pWaitDstStageMask = bs->acquire_flags.data;
+
+ si[ZINK_SUBMIT_WAIT_FD].waitSemaphoreCount = util_dynarray_num_elements(&bs->fd_wait_semaphores, VkSemaphore);
+ si[ZINK_SUBMIT_WAIT_FD].pWaitSemaphores = bs->fd_wait_semaphores.data;
+ while (util_dynarray_num_elements(&bs->fd_wait_semaphore_stages, VkPipelineStageFlags) < si[ZINK_SUBMIT_WAIT_FD].waitSemaphoreCount) {
+ VkPipelineStageFlags mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+ util_dynarray_append(&bs->fd_wait_semaphore_stages, VkPipelineStageFlags, mask);
+ }
+ assert(util_dynarray_num_elements(&bs->fd_wait_semaphores, VkSemaphore) <= util_dynarray_num_elements(&bs->fd_wait_semaphore_stages, VkPipelineStageFlags));
+ si[ZINK_SUBMIT_WAIT_FD].pWaitDstStageMask = bs->fd_wait_semaphore_stages.data;
+
+ if (si[ZINK_SUBMIT_WAIT_ACQUIRE].waitSemaphoreCount == 0) {
+ num_si--;
+ submit++;
+ if (si[ZINK_SUBMIT_WAIT_FD].waitSemaphoreCount == 0) {
+ num_si--;
+ submit++;
+ }
+ }
+ /* then the real submit */
+ si[ZINK_SUBMIT_CMDBUF].waitSemaphoreCount = util_dynarray_num_elements(&bs->wait_semaphores, VkSemaphore);
+ si[ZINK_SUBMIT_CMDBUF].pWaitSemaphores = bs->wait_semaphores.data;
+ si[ZINK_SUBMIT_CMDBUF].pWaitDstStageMask = bs->wait_semaphore_stages.data;
+ VkCommandBuffer cmdbufs[3];
+ unsigned c = 0;
+ if (bs->has_unsync)
+ cmdbufs[c++] = bs->unsynchronized_cmdbuf;
+ if (bs->has_barriers)
+ cmdbufs[c++] = bs->reordered_cmdbuf;
+ cmdbufs[c++] = bs->cmdbuf;
+ si[ZINK_SUBMIT_CMDBUF].pCommandBuffers = cmdbufs;
+ si[ZINK_SUBMIT_CMDBUF].commandBufferCount = c;
+ /* assorted signal submit from wsi/externals */
+ si[ZINK_SUBMIT_CMDBUF].signalSemaphoreCount = util_dynarray_num_elements(&bs->signal_semaphores, VkSemaphore);
+ si[ZINK_SUBMIT_CMDBUF].pSignalSemaphores = bs->signal_semaphores.data;
+
+ /* then the signal submit with the timeline (fence) semaphore */
+ VkSemaphore signals[3];
+ si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount = !!bs->signal_semaphore;
+ signals[0] = bs->signal_semaphore;
+ si[ZINK_SUBMIT_SIGNAL].pSignalSemaphores = signals;
VkTimelineSemaphoreSubmitInfo tsi = {0};
- if (bs->have_timelines) {
- tsi.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
- si.pNext = &tsi;
- tsi.signalSemaphoreValueCount = 1;
- tsi.pSignalSemaphoreValues = &batch_id;
- si.signalSemaphoreCount = 1;
- si.pSignalSemaphores = &screen->sem;
+ uint64_t signal_values[2] = {0};
+ tsi.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
+ si[ZINK_SUBMIT_SIGNAL].pNext = &tsi;
+ tsi.pSignalSemaphoreValues = signal_values;
+ signal_values[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount] = batch_id;
+ signals[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount++] = screen->sem;
+ tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount;
+
+ if (bs->present)
+ signals[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount++] = bs->present;
+ tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount;
+
+
+ VkResult result;
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(EndCommandBuffer)(bs->cmdbuf),
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
+ bs->is_device_lost = true;
+ goto end;
+ }
+ );
+ if (bs->has_barriers) {
+ if (bs->unordered_write_access) {
+ VkMemoryBarrier mb;
+ mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ mb.pNext = NULL;
+ mb.srcAccessMask = bs->unordered_write_access;
+ mb.dstAccessMask = VK_ACCESS_NONE;
+ VKSCR(CmdPipelineBarrier)(bs->reordered_cmdbuf,
+ bs->unordered_write_stages,
+ screen->info.have_KHR_synchronization2 ? VK_PIPELINE_STAGE_NONE : VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ 0, 1, &mb, 0, NULL, 0, NULL);
+ }
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf),
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
+ bs->is_device_lost = true;
+ goto end;
+ }
+ );
+ }
+ if (bs->has_unsync) {
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf),
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
+ bs->is_device_lost = true;
+ goto end;
+ }
+ );
}
- struct wsi_memory_signal_submit_info mem_signal = {
- .sType = VK_STRUCTURE_TYPE_WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA,
- .pNext = si.pNext,
- };
+ if (!si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount)
+ num_si--;
- if (bs->flush_res && screen->needs_mesa_flush_wsi) {
- struct zink_resource *flush_res = zink_resource(bs->flush_res);
- mem_signal.memory = zink_bo_get_mem(flush_res->scanout_obj ? flush_res->scanout_obj->bo : flush_res->obj->bo);
- si.pNext = &mem_signal;
- }
+ simple_mtx_lock(&screen->queue_lock);
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE),
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkQueueSubmit failed (%s)", vk_Result_to_str(result));
+ bs->is_device_lost = true;
+ }
+ );
+ simple_mtx_unlock(&screen->queue_lock);
- if (VKSCR(EndCommandBuffer)(bs->cmdbuf) != VK_SUCCESS) {
- debug_printf("vkEndCommandBuffer failed\n");
- bs->is_device_lost = true;
- goto end;
- }
- if (VKSCR(EndCommandBuffer)(bs->barrier_cmdbuf) != VK_SUCCESS) {
- debug_printf("vkEndCommandBuffer failed\n");
- bs->is_device_lost = true;
- goto end;
- }
+ unsigned i = 0;
+ VkSemaphore *sem = bs->signal_semaphores.data;
+ set_foreach(&bs->dmabuf_exports, entry) {
+ struct zink_resource *res = (void*)entry->key;
+ for (; res; res = zink_resource(res->base.b.next))
+ zink_screen_import_dmabuf_semaphore(screen, res, sem[i++]);
- while (util_dynarray_contains(&bs->persistent_resources, struct zink_resource_object*)) {
- struct zink_resource_object *obj = util_dynarray_pop(&bs->persistent_resources, struct zink_resource_object*);
- VkMappedMemoryRange range = zink_resource_init_mem_range(screen, obj, 0, obj->size);
- VKSCR(FlushMappedMemoryRanges)(screen->dev, 1, &range);
+ struct pipe_resource *pres = (void*)entry->key;
+ pipe_resource_reference(&pres, NULL);
}
+ _mesa_set_clear(&bs->dmabuf_exports, NULL);
- if (VKSCR(QueueSubmit)(bs->queue, 1, &si, bs->fence.fence) != VK_SUCCESS) {
- debug_printf("ZINK: vkQueueSubmit() failed\n");
- bs->is_device_lost = true;
- }
- bs->submit_count++;
+ bs->usage.submit_count++;
end:
cnd_broadcast(&bs->usage.flush);
@@ -411,212 +776,175 @@ end:
unref_resources(screen, bs);
}
-
-/* TODO: remove for wsi */
-static void
-copy_scanout(struct zink_batch_state *bs, struct zink_resource *res)
-{
- if (!bs->scanout_flush)
- return;
- struct zink_context *ctx = bs->ctx;
-
- VkImageCopy region = {0};
- struct pipe_box box = {0, 0, 0,
- u_minify(res->base.b.width0, 0),
- u_minify(res->base.b.height0, 0), res->base.b.array_size};
- box.depth = util_num_layers(&res->base.b, 0);
- struct pipe_box *src_box = &box;
- unsigned dstz = 0;
-
- region.srcSubresource.aspectMask = res->aspect;
- region.srcSubresource.mipLevel = 0;
- switch (res->base.b.target) {
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_1D_ARRAY:
- /* these use layer */
- region.srcSubresource.baseArrayLayer = src_box->z;
- region.srcSubresource.layerCount = src_box->depth;
- region.srcOffset.z = 0;
- region.extent.depth = 1;
- break;
- case PIPE_TEXTURE_3D:
- /* this uses depth */
- region.srcSubresource.baseArrayLayer = 0;
- region.srcSubresource.layerCount = 1;
- region.srcOffset.z = src_box->z;
- region.extent.depth = src_box->depth;
- break;
- default:
- /* these must only copy one layer */
- region.srcSubresource.baseArrayLayer = 0;
- region.srcSubresource.layerCount = 1;
- region.srcOffset.z = 0;
- region.extent.depth = 1;
- }
-
- region.srcOffset.x = src_box->x;
- region.srcOffset.y = src_box->y;
-
- region.dstSubresource.aspectMask = res->aspect;
- region.dstSubresource.mipLevel = 0;
- switch (res->base.b.target) {
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_1D_ARRAY:
- /* these use layer */
- region.dstSubresource.baseArrayLayer = dstz;
- region.dstSubresource.layerCount = src_box->depth;
- region.dstOffset.z = 0;
- break;
- case PIPE_TEXTURE_3D:
- /* this uses depth */
- region.dstSubresource.baseArrayLayer = 0;
- region.dstSubresource.layerCount = 1;
- region.dstOffset.z = dstz;
- break;
- default:
- /* these must only copy one layer */
- region.dstSubresource.baseArrayLayer = 0;
- region.dstSubresource.layerCount = 1;
- region.dstOffset.z = 0;
- }
-
- region.dstOffset.x = 0;
- region.dstOffset.y = 0;
- region.extent.width = src_box->width;
- region.extent.height = src_box->height;
-
- VkImageMemoryBarrier imb1;
- zink_resource_image_barrier_init(&imb1, res, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
- VKCTX(CmdPipelineBarrier)(
- bs->cmdbuf,
- res->obj->access_stage ? res->obj->access_stage : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0,
- 0, NULL,
- 0, NULL,
- 1, &imb1
- );
-
- VkImageSubresourceRange isr = {
- res->aspect,
- 0, VK_REMAINING_MIP_LEVELS,
- 0, VK_REMAINING_ARRAY_LAYERS
- };
- VkImageMemoryBarrier imb = {
- VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- NULL,
- 0,
- VK_ACCESS_TRANSFER_WRITE_BIT,
- res->scanout_obj_init ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_UNDEFINED,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- VK_QUEUE_FAMILY_IGNORED,
- VK_QUEUE_FAMILY_IGNORED,
- res->scanout_obj->image,
- isr
- };
- VKCTX(CmdPipelineBarrier)(
- bs->cmdbuf,
- VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0,
- 0, NULL,
- 0, NULL,
- 1, &imb
- );
-
- VKCTX(CmdCopyImage)(bs->cmdbuf, res->obj->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
- res->scanout_obj->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- 1, &region);
- imb.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- imb.dstAccessMask = 0;
- imb.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
- imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
- VKCTX(CmdPipelineBarrier)(
- bs->cmdbuf,
- VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- 0,
- 0, NULL,
- 0, NULL,
- 1, &imb
- );
- /* separate flag to avoid annoying validation errors for new scanout objs */
- res->scanout_obj_init = true;
-}
-
+/* called during flush */
void
zink_end_batch(struct zink_context *ctx, struct zink_batch *batch)
{
- if (batch->state->flush_res)
- copy_scanout(batch->state, zink_resource(batch->state->flush_res));
if (!ctx->queries_disabled)
zink_suspend_queries(ctx, batch);
- tc_driver_internal_flush_notify(ctx->tc);
struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (ctx->tc && !ctx->track_renderpasses)
+ tc_driver_internal_flush_notify(ctx->tc);
+ struct zink_batch_state *bs;
+
+ /* oom flushing is triggered to handle stupid piglit tests like streaming-texture-leak */
+ if (ctx->oom_flush || ctx->batch_states_count > 25) {
+ assert(!ctx->batch_states_count || ctx->batch_states);
+ while (ctx->batch_states) {
+ bs = ctx->batch_states;
+ struct zink_fence *fence = &bs->fence;
+ /* once an incomplete state is reached, no more will be complete */
+ if (!zink_check_batch_completion(ctx, fence->batch_id))
+ break;
- ctx->last_fence = &batch->state->fence;
- if (ctx->oom_flush || _mesa_hash_table_num_entries(&ctx->batch_states) > 10) {
- simple_mtx_lock(&ctx->batch_mtx);
- hash_table_foreach(&ctx->batch_states, he) {
- struct zink_fence *fence = he->data;
- struct zink_batch_state *bs = he->data;
- if (zink_check_batch_completion(ctx, fence->batch_id, true)) {
- zink_reset_batch_state(ctx, he->data);
- _mesa_hash_table_remove(&ctx->batch_states, he);
- util_dynarray_append(&ctx->free_batch_states, struct zink_batch_state *, bs);
- }
+ pop_batch_state(ctx);
+ zink_reset_batch_state(ctx, bs);
+ if (ctx->last_free_batch_state)
+ ctx->last_free_batch_state->next = bs;
+ else
+ ctx->free_batch_states = bs;
+ ctx->last_free_batch_state = bs;
}
- simple_mtx_unlock(&ctx->batch_mtx);
- if (_mesa_hash_table_num_entries(&ctx->batch_states) > 50)
+ if (ctx->batch_states_count > 50)
ctx->oom_flush = true;
}
+
+ bs = batch->state;
+ if (ctx->last_batch_state)
+ ctx->last_batch_state->next = bs;
+ else {
+ assert(!ctx->batch_states);
+ ctx->batch_states = bs;
+ }
+ ctx->last_batch_state = bs;
+ ctx->batch_states_count++;
batch->work_count = 0;
+ /* this is swapchain presentation semaphore handling */
+ if (batch->swapchain) {
+ if (zink_kopper_acquired(batch->swapchain->obj->dt, batch->swapchain->obj->dt_idx) && !batch->swapchain->obj->present) {
+ batch->state->present = zink_kopper_present(screen, batch->swapchain);
+ batch->state->swapchain = batch->swapchain;
+ }
+ batch->swapchain = NULL;
+ }
+
if (screen->device_lost)
return;
- if (screen->threaded) {
- batch->state->queue = screen->thread_queue;
- util_queue_add_job(&screen->flush_queue, batch->state, &batch->state->flush_completed,
+ if (ctx->tc) {
+ set_foreach(&bs->active_queries, entry)
+ zink_query_sync(ctx, (void*)entry->key);
+ }
+
+ set_foreach(&bs->dmabuf_exports, entry) {
+ struct zink_resource *res = (void*)entry->key;
+ if (screen->info.have_KHR_synchronization2) {
+ VkImageMemoryBarrier2 imb;
+ zink_resource_image_barrier2_init(&imb, res, res->layout, 0, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
+ imb.srcQueueFamilyIndex = screen->gfx_queue;
+ imb.dstQueueFamilyIndex = VK_QUEUE_FAMILY_FOREIGN_EXT;
+ VkDependencyInfo dep = {
+ VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ NULL,
+ 0,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ 1,
+ &imb
+ };
+ VKCTX(CmdPipelineBarrier2)(bs->cmdbuf, &dep);
+ } else {
+ VkImageMemoryBarrier imb;
+ zink_resource_image_barrier_init(&imb, res, res->layout, 0, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
+ imb.srcQueueFamilyIndex = screen->gfx_queue;
+ imb.dstQueueFamilyIndex = VK_QUEUE_FAMILY_FOREIGN_EXT;
+ VKCTX(CmdPipelineBarrier)(
+ bs->cmdbuf,
+ res->obj->access_stage,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0,
+ 0, NULL,
+ 0, NULL,
+ 1, &imb
+ );
+ }
+ res->queue = VK_QUEUE_FAMILY_FOREIGN_EXT;
+
+ for (; res; res = zink_resource(res->base.b.next)) {
+ VkSemaphore sem = zink_create_exportable_semaphore(screen);
+ if (sem)
+ util_dynarray_append(&ctx->batch.state->signal_semaphores, VkSemaphore, sem);
+ }
+ }
+
+ if (screen->threaded_submit) {
+ util_queue_add_job(&screen->flush_queue, bs, &bs->flush_completed,
submit_queue, post_submit, 0);
} else {
- batch->state->queue = screen->queue;
- submit_queue(batch->state, NULL, 0);
- post_submit(batch->state, NULL, 0);
+ submit_queue(bs, NULL, 0);
+ post_submit(bs, NULL, 0);
+ }
+#ifdef HAVE_RENDERDOC_APP_H
+ if (!(ctx->flags & ZINK_CONTEXT_COPY_ONLY) && screen->renderdoc_capturing && p_atomic_read(&screen->renderdoc_frame) > screen->renderdoc_capture_end) {
+ screen->renderdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(screen->instance), NULL);
+ screen->renderdoc_capturing = false;
}
+#endif
}
-void
-zink_batch_resource_usage_set(struct zink_batch *batch, struct zink_resource *res, bool write)
+static int
+batch_find_resource(struct zink_batch_state *bs, struct zink_resource_object *obj, struct zink_batch_obj_list *list)
{
- zink_resource_usage_set(res, batch->state, write);
- if (write && res->scanout_obj)
- batch->state->scanout_flush = true;
- /* multiple array entries are fine */
- if (!res->obj->coherent && res->obj->persistent_maps)
- util_dynarray_append(&batch->state->persistent_resources, struct zink_resource_object*, res->obj);
-
- batch->has_work = true;
+ unsigned hash = obj->bo->unique_id & (BUFFER_HASHLIST_SIZE-1);
+ int buffer_index = bs->buffer_indices_hashlist[hash];
+
+ /* not found or found */
+ if (buffer_index < 0 || (buffer_index < list->num_buffers && list->objs[buffer_index] == obj))
+ return buffer_index;
+
+ /* Hash collision, look for the BO in the list of list->objs linearly. */
+ for (int i = list->num_buffers - 1; i >= 0; i--) {
+ if (list->objs[i] == obj) {
+ /* Put this buffer in the hash list.
+ * This will prevent additional hash collisions if there are
+ * several consecutive lookup_buffer calls for the same buffer.
+ *
+ * Example: Assuming list->objs A,B,C collide in the hash list,
+ * the following sequence of list->objs:
+ * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
+ * will collide here: ^ and here: ^,
+ * meaning that we should get very few collisions in the end. */
+ bs->buffer_indices_hashlist[hash] = i & (BUFFER_HASHLIST_SIZE-1);
+ return i;
+ }
+ }
+ return -1;
}
void
zink_batch_reference_resource_rw(struct zink_batch *batch, struct zink_resource *res, bool write)
{
- /* if the resource already has usage of any sort set for this batch, we can skip hashing */
- if (!zink_batch_usage_matches(res->obj->reads, batch->state) &&
- !zink_batch_usage_matches(res->obj->writes, batch->state)) {
+ /* if the resource already has usage of any sort set for this batch, */
+ if (!zink_resource_usage_matches(res, batch->state) ||
+ /* or if it's bound somewhere */
+ !zink_resource_has_binds(res))
+ /* then it already has a batch ref and doesn't need one here */
zink_batch_reference_resource(batch, res);
- }
- zink_batch_resource_usage_set(batch, res, write);
+ zink_batch_resource_usage_set(batch, res, write, res->obj->is_buffer);
}
-bool
+void
+zink_batch_add_wait_semaphore(struct zink_batch *batch, VkSemaphore sem)
+{
+ util_dynarray_append(&batch->state->acquires, VkSemaphore, sem);
+}
+
+static bool
batch_ptr_add_usage(struct zink_batch *batch, struct set *s, void *ptr)
{
bool found = false;
@@ -624,6 +952,7 @@ batch_ptr_add_usage(struct zink_batch *batch, struct set *s, void *ptr)
return !found;
}
+/* this is a vague, handwave-y estimate */
ALWAYS_INLINE static void
check_oom_flush(struct zink_context *ctx, const struct zink_batch *batch)
{
@@ -634,87 +963,131 @@ check_oom_flush(struct zink_context *ctx, const struct zink_batch *batch)
}
}
+/* this adds a ref (batch tracking) */
void
zink_batch_reference_resource(struct zink_batch *batch, struct zink_resource *res)
{
- if (!batch_ptr_add_usage(batch, batch->state->resources, res->obj))
- return;
- pipe_reference(NULL, &res->obj->reference);
- batch->state->resource_size += res->obj->size;
- check_oom_flush(batch->state->ctx, batch);
- batch->has_work = true;
+ if (!zink_batch_reference_resource_move(batch, res))
+ zink_resource_object_reference(NULL, NULL, res->obj);
}
-void
+/* this adds batch usage */
+bool
zink_batch_reference_resource_move(struct zink_batch *batch, struct zink_resource *res)
{
- if (!batch_ptr_add_usage(batch, batch->state->resources, res->obj))
- return;
- batch->state->resource_size += res->obj->size;
- check_oom_flush(batch->state->ctx, batch);
- batch->has_work = true;
-}
+ struct zink_batch_state *bs = batch->state;
+
+ simple_mtx_lock(&batch->ref_lock);
+ /* swapchains are special */
+ if (zink_is_swapchain(res)) {
+ struct zink_resource_object **swapchains = bs->swapchain_obj.data;
+ unsigned count = util_dynarray_num_elements(&bs->swapchain_obj, struct zink_resource_object*);
+ for (unsigned i = 0; i < count; i++) {
+ if (swapchains[i] == res->obj) {
+ simple_mtx_unlock(&batch->ref_lock);
+ return true;
+ }
+ }
+ util_dynarray_append(&bs->swapchain_obj, struct zink_resource_object*, res->obj);
+ simple_mtx_unlock(&batch->ref_lock);
+ return false;
+ }
+ /* Fast exit for no-op calls.
+ * This is very effective with suballocators and linear uploaders that
+ * are outside of the winsys.
+ */
+ if (res->obj == bs->last_added_obj) {
+ simple_mtx_unlock(&batch->ref_lock);
+ return true;
+ }
-void
-zink_batch_reference_bufferview(struct zink_batch *batch, struct zink_buffer_view *buffer_view)
-{
- if (!batch_ptr_add_usage(batch, batch->state->bufferviews, buffer_view))
- return;
- pipe_reference(NULL, &buffer_view->reference);
- batch->has_work = true;
-}
+ struct zink_bo *bo = res->obj->bo;
+ struct zink_batch_obj_list *list;
+ if (!(res->base.b.flags & PIPE_RESOURCE_FLAG_SPARSE)) {
+ if (!bo->mem) {
+ list = &bs->slab_objs;
+ } else {
+ list = &bs->real_objs;
+ }
+ } else {
+ list = &bs->sparse_objs;
+ }
+ int idx = batch_find_resource(bs, res->obj, list);
+ if (idx >= 0) {
+ simple_mtx_unlock(&batch->ref_lock);
+ return true;
+ }
-void
-zink_batch_reference_surface(struct zink_batch *batch, struct zink_surface *surface)
-{
- if (!batch_ptr_add_usage(batch, batch->state->surfaces, surface))
- return;
- struct pipe_surface *surf = NULL;
- pipe_surface_reference(&surf, &surface->base);
+ if (list->num_buffers >= list->max_buffers) {
+ unsigned new_max = MAX2(list->max_buffers + 16, (unsigned)(list->max_buffers * 1.3));
+ struct zink_resource_object **objs = realloc(list->objs, new_max * sizeof(void*));
+ if (!objs) {
+ /* things are about to go dramatically wrong anyway */
+ mesa_loge("zink: buffer list realloc failed due to oom!\n");
+ abort();
+ }
+ list->objs = objs;
+ list->max_buffers = new_max;
+ }
+ idx = list->num_buffers++;
+ list->objs[idx] = res->obj;
+ unsigned hash = bo->unique_id & (BUFFER_HASHLIST_SIZE-1);
+ bs->buffer_indices_hashlist[hash] = idx & 0x7fff;
+ bs->last_added_obj = res->obj;
+ if (!(res->base.b.flags & PIPE_RESOURCE_FLAG_SPARSE)) {
+ bs->resource_size += res->obj->size;
+ } else {
+ /* Sparse backing pages are not directly referenced by the batch as
+ * there can be a lot of them.
+ * Instead, they are kept referenced in one of two ways:
+ * - While they are committed, they are directly referenced from the
+ * resource's state.
+ * - Upon de-commit, they are added to the freed_sparse_backing_bos
+ * list, which will defer destroying the resource until the batch
+ * performing unbind finishes.
+ */
+ }
+ check_oom_flush(batch->state->ctx, batch);
batch->has_work = true;
+ simple_mtx_unlock(&batch->ref_lock);
+ return false;
}
-void
-zink_batch_reference_sampler_view(struct zink_batch *batch,
- struct zink_sampler_view *sv)
-{
- if (sv->base.target == PIPE_BUFFER)
- zink_batch_reference_bufferview(batch, sv->buffer_view);
- else
- zink_batch_reference_surface(batch, sv->image_view);
-}
-
+/* this is how programs achieve deferred deletion */
void
zink_batch_reference_program(struct zink_batch *batch,
struct zink_program *pg)
{
if (zink_batch_usage_matches(pg->batch_uses, batch->state) ||
- !batch_ptr_add_usage(batch, batch->state->programs, pg))
+ !batch_ptr_add_usage(batch, &batch->state->programs, pg))
return;
pipe_reference(NULL, &pg->reference);
zink_batch_usage_set(&pg->batch_uses, batch->state);
batch->has_work = true;
}
-void
-zink_batch_reference_image_view(struct zink_batch *batch,
- struct zink_image_view *image_view)
+/* a fast (hopefully) way to check whether a given batch has completed */
+bool
+zink_screen_usage_check_completion(struct zink_screen *screen, const struct zink_batch_usage *u)
{
- if (image_view->base.resource->target == PIPE_BUFFER)
- zink_batch_reference_bufferview(batch, image_view->buffer_view);
- else
- zink_batch_reference_surface(batch, image_view->surface);
+ if (!zink_batch_usage_exists(u))
+ return true;
+ if (zink_batch_usage_is_unflushed(u))
+ return false;
+
+ return zink_screen_timeline_wait(screen, u->usage, 0);
}
+/* an even faster check that doesn't ioctl */
bool
-zink_screen_usage_check_completion(struct zink_screen *screen, const struct zink_batch_usage *u)
+zink_screen_usage_check_completion_fast(struct zink_screen *screen, const struct zink_batch_usage *u)
{
if (!zink_batch_usage_exists(u))
return true;
if (zink_batch_usage_is_unflushed(u))
return false;
- return zink_screen_batch_id_wait(screen, u->usage, 0);
+ return zink_screen_check_last_finished(screen, u->usage);
}
bool
@@ -724,11 +1097,11 @@ zink_batch_usage_check_completion(struct zink_context *ctx, const struct zink_ba
return true;
if (zink_batch_usage_is_unflushed(u))
return false;
- return zink_check_batch_completion(ctx, u->usage, false);
+ return zink_check_batch_completion(ctx, u->usage);
}
-void
-zink_batch_usage_wait(struct zink_context *ctx, struct zink_batch_usage *u)
+static void
+batch_usage_wait(struct zink_context *ctx, struct zink_batch_usage *u, bool trywait)
{
if (!zink_batch_usage_exists(u))
return;
@@ -737,9 +1110,25 @@ zink_batch_usage_wait(struct zink_context *ctx, struct zink_batch_usage *u)
ctx->base.flush(&ctx->base, NULL, PIPE_FLUSH_HINT_FINISH);
else { //multi-context
mtx_lock(&u->mtx);
- cnd_wait(&u->flush, &u->mtx);
+ if (trywait) {
+ struct timespec ts = {0, 10000};
+ cnd_timedwait(&u->flush, &u->mtx, &ts);
+ } else
+ cnd_wait(&u->flush, &u->mtx);
mtx_unlock(&u->mtx);
}
}
zink_wait_on_batch(ctx, u->usage);
}
+
+void
+zink_batch_usage_wait(struct zink_context *ctx, struct zink_batch_usage *u)
+{
+ batch_usage_wait(ctx, u, false);
+}
+
+void
+zink_batch_usage_try_wait(struct zink_context *ctx, struct zink_batch_usage *u)
+{
+ batch_usage_wait(ctx, u, true);
+}
diff --git a/src/gallium/drivers/zink/zink_batch.h b/src/gallium/drivers/zink/zink_batch.h
index 4b077646369..67ffa9e0b04 100644
--- a/src/gallium/drivers/zink/zink_batch.h
+++ b/src/gallium/drivers/zink/zink_batch.h
@@ -24,7 +24,8 @@
#ifndef ZINK_BATCH_H
#define ZINK_BATCH_H
-#include <vulkan/vulkan.h>
+#include <vulkan/vulkan_core.h>
+#include "zink_types.h"
#include "util/list.h"
#include "util/set.h"
@@ -36,90 +37,6 @@
extern "C" {
#endif
-struct pipe_reference;
-
-struct zink_buffer_view;
-struct zink_context;
-struct zink_descriptor_set;
-struct zink_image_view;
-struct zink_program;
-struct zink_render_pass;
-struct zink_resource;
-struct zink_sampler_view;
-struct zink_surface;
-
-struct zink_batch_usage {
- uint32_t usage;
- cnd_t flush;
- mtx_t mtx;
- bool unflushed;
-};
-
-/* not real api don't use */
-bool
-batch_ptr_add_usage(struct zink_batch *batch, struct set *s, void *ptr);
-
-struct zink_batch_state {
- struct zink_fence fence;
-
- struct zink_batch_usage usage;
- struct zink_context *ctx;
- VkCommandPool cmdpool;
- VkCommandBuffer cmdbuf;
- VkCommandBuffer barrier_cmdbuf;
-
- VkQueue queue; //duplicated from batch for threading
- VkSemaphore sem;
-
- struct util_queue_fence flush_completed;
-
- struct pipe_resource *flush_res;
-
- struct set *programs;
-
- struct set *resources;
- struct set *surfaces;
- struct set *bufferviews;
-
- struct util_dynarray unref_resources;
-
- struct util_dynarray persistent_resources;
- struct util_dynarray zombie_samplers;
- struct util_dynarray dead_framebuffers;
-
- struct set *active_queries; /* zink_query objects which were active at some point in this batch */
-
- struct zink_batch_descriptor_data *dd;
-
- VkDeviceSize resource_size;
-
- /* this is a monotonic int used to disambiguate internal fences from their tc fence references */
- unsigned submit_count;
-
- bool is_device_lost;
- bool have_timelines;
- bool has_barriers;
- bool scanout_flush;
-};
-
-struct zink_batch {
- struct zink_batch_state *state;
-
- struct zink_batch_usage *last_batch_usage;
-
- unsigned work_count;
-
- bool has_work;
- bool last_was_compute;
- bool in_rp; //renderpass is currently active
-};
-
-
-static inline struct zink_batch_state *
-zink_batch_state(struct zink_fence *fence)
-{
- return (struct zink_batch_state *)fence;
-}
void
zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs);
@@ -145,7 +62,7 @@ void
zink_end_batch(struct zink_context *ctx, struct zink_batch *batch);
void
-zink_batch_resource_usage_set(struct zink_batch *batch, struct zink_resource *res, bool write);
+zink_batch_add_wait_semaphore(struct zink_batch *batch, VkSemaphore sem);
void
zink_batch_reference_resource_rw(struct zink_batch *batch,
@@ -154,54 +71,43 @@ zink_batch_reference_resource_rw(struct zink_batch *batch,
void
zink_batch_reference_resource(struct zink_batch *batch, struct zink_resource *res);
-void
+bool
zink_batch_reference_resource_move(struct zink_batch *batch, struct zink_resource *res);
void
-zink_batch_reference_sampler_view(struct zink_batch *batch,
- struct zink_sampler_view *sv);
-
-void
zink_batch_reference_program(struct zink_batch *batch,
struct zink_program *pg);
void
-zink_batch_reference_image_view(struct zink_batch *batch,
- struct zink_image_view *image_view);
-
-void
-zink_batch_reference_bufferview(struct zink_batch *batch, struct zink_buffer_view *buffer_view);
-void
-zink_batch_reference_surface(struct zink_batch *batch, struct zink_surface *surface);
-
+zink_batch_bind_db(struct zink_context *ctx);
void
debug_describe_zink_batch_state(char *buf, const struct zink_batch_state *ptr);
-static inline bool
+static ALWAYS_INLINE bool
zink_batch_usage_is_unflushed(const struct zink_batch_usage *u)
{
return u && u->unflushed;
}
-static inline void
+static ALWAYS_INLINE void
zink_batch_usage_unset(struct zink_batch_usage **u, struct zink_batch_state *bs)
{
(void)p_atomic_cmpxchg((uintptr_t *)u, (uintptr_t)&bs->usage, (uintptr_t)NULL);
}
-static inline void
+static ALWAYS_INLINE void
zink_batch_usage_set(struct zink_batch_usage **u, struct zink_batch_state *bs)
{
*u = &bs->usage;
}
-static inline bool
+static ALWAYS_INLINE bool
zink_batch_usage_matches(const struct zink_batch_usage *u, const struct zink_batch_state *bs)
{
return u == &bs->usage;
}
-static inline bool
+static ALWAYS_INLINE bool
zink_batch_usage_exists(const struct zink_batch_usage *u)
{
return u && (u->usage || u->unflushed);
@@ -209,6 +115,8 @@ zink_batch_usage_exists(const struct zink_batch_usage *u)
bool
zink_screen_usage_check_completion(struct zink_screen *screen, const struct zink_batch_usage *u);
+bool
+zink_screen_usage_check_completion_fast(struct zink_screen *screen, const struct zink_batch_usage *u);
bool
zink_batch_usage_check_completion(struct zink_context *ctx, const struct zink_batch_usage *u);
@@ -216,6 +124,9 @@ zink_batch_usage_check_completion(struct zink_context *ctx, const struct zink_ba
void
zink_batch_usage_wait(struct zink_context *ctx, struct zink_batch_usage *u);
+void
+zink_batch_usage_try_wait(struct zink_context *ctx, struct zink_batch_usage *u);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/drivers/zink/zink_blit.c b/src/gallium/drivers/zink/zink_blit.c
index 7085c8e033c..f74921a2a1a 100644
--- a/src/gallium/drivers/zink/zink_blit.c
+++ b/src/gallium/drivers/zink/zink_blit.c
@@ -1,4 +1,8 @@
+#include "zink_clear.h"
#include "zink_context.h"
+#include "zink_format.h"
+#include "zink_inlines.h"
+#include "zink_kopper.h"
#include "zink_helpers.h"
#include "zink_query.h"
#include "zink_resource.h"
@@ -21,7 +25,7 @@ apply_dst_clears(struct zink_context *ctx, const struct pipe_blit_info *info, bo
}
static bool
-blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info)
+blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info, bool *needs_present_readback)
{
if (util_format_get_mask(info->dst.format) != info->mask ||
util_format_get_mask(info->src.format) != info->mask ||
@@ -30,9 +34,17 @@ blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info)
info->alpha_blend)
return false;
- if (info->src.box.width != info->dst.box.width ||
- info->src.box.height != info->dst.box.height ||
- info->src.box.depth != info->dst.box.depth)
+ if (info->src.box.width < 0 ||
+ info->dst.box.width < 0 ||
+ info->src.box.height < 0 ||
+ info->dst.box.height < 0 ||
+ info->src.box.depth < 0 ||
+ info->dst.box.depth < 0)
+ return false;
+ /* vulkan resolves can't downscale */
+ if (info->src.box.width > info->dst.box.width ||
+ info->src.box.height > info->dst.box.height ||
+ info->src.box.depth > info->dst.box.depth)
return false;
if (info->render_condition_enable &&
@@ -40,26 +52,39 @@ blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info)
return false;
struct zink_resource *src = zink_resource(info->src.resource);
+ struct zink_resource *use_src = src;
struct zink_resource *dst = zink_resource(info->dst.resource);
struct zink_screen *screen = zink_screen(ctx->base.screen);
+ /* aliased/swizzled formats need u_blitter */
if (src->format != zink_get_format(screen, info->src.format) ||
dst->format != zink_get_format(screen, info->dst.format))
return false;
- if (info->dst.resource->target == PIPE_BUFFER)
- util_range_add(info->dst.resource, &dst->valid_buffer_range,
- info->dst.box.x, info->dst.box.x + info->dst.box.width);
+ if (src->format != dst->format)
+ return false;
+
apply_dst_clears(ctx, info, false);
zink_fb_clears_apply_region(ctx, info->src.resource, zink_rect_from_box(&info->src.box));
+ if (src->obj->dt)
+ *needs_present_readback = zink_kopper_acquire_readback(ctx, src, &use_src);
+
struct zink_batch *batch = &ctx->batch;
- zink_batch_no_rp(ctx);
- zink_batch_reference_resource_rw(batch, src, false);
+ zink_resource_setup_transfer_layouts(ctx, use_src, dst);
+ VkCommandBuffer cmdbuf = *needs_present_readback ?
+ ctx->batch.state->cmdbuf :
+ zink_get_cmdbuf(ctx, src, dst);
+ if (cmdbuf == ctx->batch.state->cmdbuf)
+ zink_flush_dgc_if_enabled(ctx);
+ zink_batch_reference_resource_rw(batch, use_src, false);
zink_batch_reference_resource_rw(batch, dst, true);
- zink_resource_setup_transfer_layouts(ctx, src, dst);
-
+ bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "blit_resolve(%s->%s, %dx%d->%dx%d)",
+ util_format_short_name(info->src.format),
+ util_format_short_name(info->src.format),
+ info->src.box.width, info->src.box.height,
+ info->dst.box.width, info->dst.box.height);
VkImageResolve region = {0};
region.srcSubresource.aspectMask = src->aspect;
@@ -97,23 +122,28 @@ blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info)
region.extent.width = info->dst.box.width;
region.extent.height = info->dst.box.height;
region.extent.depth = info->dst.box.depth;
- VKCTX(CmdResolveImage)(batch->state->cmdbuf, src->obj->image, src->layout,
+ if (region.srcOffset.x + region.extent.width >= u_minify(src->base.b.width0, region.srcSubresource.mipLevel))
+ region.extent.width = u_minify(src->base.b.width0, region.srcSubresource.mipLevel) - region.srcOffset.x;
+ if (region.dstOffset.x + region.extent.width >= u_minify(dst->base.b.width0, region.dstSubresource.mipLevel))
+ region.extent.width = u_minify(dst->base.b.width0, region.dstSubresource.mipLevel) - region.dstOffset.x;
+ if (region.srcOffset.y + region.extent.height >= u_minify(src->base.b.height0, region.srcSubresource.mipLevel))
+ region.extent.height = u_minify(src->base.b.height0, region.srcSubresource.mipLevel) - region.srcOffset.y;
+ if (region.dstOffset.y + region.extent.height >= u_minify(dst->base.b.height0, region.dstSubresource.mipLevel))
+ region.extent.height = u_minify(dst->base.b.height0, region.dstSubresource.mipLevel) - region.dstOffset.y;
+ if (region.srcOffset.z + region.extent.depth >= u_minify(src->base.b.depth0, region.srcSubresource.mipLevel))
+ region.extent.depth = u_minify(src->base.b.depth0, region.srcSubresource.mipLevel) - region.srcOffset.z;
+ if (region.dstOffset.z + region.extent.depth >= u_minify(dst->base.b.depth0, region.dstSubresource.mipLevel))
+ region.extent.depth = u_minify(dst->base.b.depth0, region.dstSubresource.mipLevel) - region.dstOffset.z;
+ VKCTX(CmdResolveImage)(cmdbuf, use_src->obj->image, src->layout,
dst->obj->image, dst->layout,
1, &region);
+ zink_cmd_debug_marker_end(ctx, cmdbuf, marker);
return true;
}
-static VkFormatFeatureFlags
-get_resource_features(struct zink_screen *screen, struct zink_resource *res)
-{
- VkFormatProperties props = screen->format_props[res->base.b.format];
- return res->optimal_tiling ? props.optimalTilingFeatures :
- props.linearTilingFeatures;
-}
-
static bool
-blit_native(struct zink_context *ctx, const struct pipe_blit_info *info)
+blit_native(struct zink_context *ctx, const struct pipe_blit_info *info, bool *needs_present_readback)
{
if (util_format_get_mask(info->dst.format) != info->mask ||
util_format_get_mask(info->src.format) != info->mask ||
@@ -126,7 +156,7 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info)
return false;
if (util_format_is_depth_or_stencil(info->dst.format) &&
- info->dst.format != info->src.format)
+ (info->dst.format != info->src.format || info->filter == PIPE_TEX_FILTER_LINEAR))
return false;
/* vkCmdBlitImage must not be used for multisampled source or destination images. */
@@ -134,15 +164,18 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info)
return false;
struct zink_resource *src = zink_resource(info->src.resource);
+ struct zink_resource *use_src = src;
struct zink_resource *dst = zink_resource(info->dst.resource);
struct zink_screen *screen = zink_screen(ctx->base.screen);
if (src->format != zink_get_format(screen, info->src.format) ||
dst->format != zink_get_format(screen, info->dst.format))
return false;
+ if (src->format != VK_FORMAT_A8_UNORM_KHR && zink_format_is_emulated_alpha(info->src.format))
+ return false;
- if (!(get_resource_features(screen, src) & VK_FORMAT_FEATURE_BLIT_SRC_BIT) ||
- !(get_resource_features(screen, dst) & VK_FORMAT_FEATURE_BLIT_DST_BIT))
+ if (!(src->obj->vkfeats & VK_FORMAT_FEATURE_BLIT_SRC_BIT) ||
+ !(dst->obj->vkfeats & VK_FORMAT_FEATURE_BLIT_DST_BIT))
return false;
if ((util_format_is_pure_sint(info->src.format) !=
@@ -152,22 +185,10 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info)
return false;
if (info->filter == PIPE_TEX_FILTER_LINEAR &&
- !(get_resource_features(screen, src) &
- VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT))
+ !(src->obj->vkfeats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT))
return false;
- apply_dst_clears(ctx, info, false);
- zink_fb_clears_apply_region(ctx, info->src.resource, zink_rect_from_box(&info->src.box));
- struct zink_batch *batch = &ctx->batch;
- zink_batch_no_rp(ctx);
- zink_batch_reference_resource_rw(batch, src, false);
- zink_batch_reference_resource_rw(batch, dst, true);
-
- zink_resource_setup_transfer_layouts(ctx, src, dst);
- if (info->dst.resource->target == PIPE_BUFFER)
- util_range_add(info->dst.resource, &dst->valid_buffer_range,
- info->dst.box.x, info->dst.box.x + info->dst.box.width);
VkImageBlit region = {0};
region.srcSubresource.aspectMask = src->aspect;
region.srcSubresource.mipLevel = info->src.level;
@@ -176,13 +197,19 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info)
region.srcOffsets[1].x = info->src.box.x + info->src.box.width;
region.srcOffsets[1].y = info->src.box.y + info->src.box.height;
- switch (src->base.b.target) {
+ enum pipe_texture_target src_target = src->base.b.target;
+ if (src->need_2D)
+ src_target = src_target == PIPE_TEXTURE_1D ? PIPE_TEXTURE_2D : PIPE_TEXTURE_2D_ARRAY;
+ switch (src_target) {
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_1D_ARRAY:
/* these use layer */
region.srcSubresource.baseArrayLayer = info->src.box.z;
+ /* VUID-vkCmdBlitImage-srcImage-00240 */
+ if (region.srcSubresource.baseArrayLayer && dst->base.b.target == PIPE_TEXTURE_3D)
+ return false;
region.srcSubresource.layerCount = info->src.box.depth;
region.srcOffsets[0].z = 0;
region.srcOffsets[1].z = 1;
@@ -211,13 +238,19 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info)
assert(region.dstOffsets[0].x != region.dstOffsets[1].x);
assert(region.dstOffsets[0].y != region.dstOffsets[1].y);
- switch (dst->base.b.target) {
+ enum pipe_texture_target dst_target = dst->base.b.target;
+ if (dst->need_2D)
+ dst_target = dst_target == PIPE_TEXTURE_1D ? PIPE_TEXTURE_2D : PIPE_TEXTURE_2D_ARRAY;
+ switch (dst_target) {
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_1D_ARRAY:
/* these use layer */
region.dstSubresource.baseArrayLayer = info->dst.box.z;
+ /* VUID-vkCmdBlitImage-srcImage-00240 */
+ if (region.dstSubresource.baseArrayLayer && src->base.b.target == PIPE_TEXTURE_3D)
+ return false;
region.dstSubresource.layerCount = info->dst.box.depth;
region.dstOffsets[0].z = 0;
region.dstOffsets[1].z = 1;
@@ -238,14 +271,57 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info)
}
assert(region.dstOffsets[0].z != region.dstOffsets[1].z);
- VKCTX(CmdBlitImage)(batch->state->cmdbuf, src->obj->image, src->layout,
+ apply_dst_clears(ctx, info, false);
+ zink_fb_clears_apply_region(ctx, info->src.resource, zink_rect_from_box(&info->src.box));
+
+ if (src->obj->dt)
+ *needs_present_readback = zink_kopper_acquire_readback(ctx, src, &use_src);
+
+ struct zink_batch *batch = &ctx->batch;
+ zink_resource_setup_transfer_layouts(ctx, use_src, dst);
+ VkCommandBuffer cmdbuf = *needs_present_readback ?
+ ctx->batch.state->cmdbuf :
+ zink_get_cmdbuf(ctx, src, dst);
+ if (cmdbuf == ctx->batch.state->cmdbuf)
+ zink_flush_dgc_if_enabled(ctx);
+ zink_batch_reference_resource_rw(batch, use_src, false);
+ zink_batch_reference_resource_rw(batch, dst, true);
+
+ bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "blit_native(%s->%s, %dx%d->%dx%d)",
+ util_format_short_name(info->src.format),
+ util_format_short_name(info->src.format),
+ info->src.box.width, info->src.box.height,
+ info->dst.box.width, info->dst.box.height);
+
+ VKCTX(CmdBlitImage)(cmdbuf, use_src->obj->image, src->layout,
dst->obj->image, dst->layout,
1, &region,
zink_filter(info->filter));
+ zink_cmd_debug_marker_end(ctx, cmdbuf, marker);
+
return true;
}
+static bool
+try_copy_region(struct pipe_context *pctx, const struct pipe_blit_info *info)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_resource *src = zink_resource(info->src.resource);
+ struct zink_resource *dst = zink_resource(info->dst.resource);
+ /* if we're copying between resources with matching aspects then we can probably just copy_region */
+ if (src->aspect != dst->aspect)
+ return false;
+ struct pipe_blit_info new_info = *info;
+
+ if (src->aspect & VK_IMAGE_ASPECT_STENCIL_BIT &&
+ new_info.render_condition_enable &&
+ !ctx->render_condition_active)
+ new_info.render_condition_enable = false;
+
+ return util_try_blit_via_copy_region(pctx, &new_info, ctx->render_condition_active);
+}
+
void
zink_blit(struct pipe_context *pctx,
const struct pipe_blit_info *info)
@@ -254,9 +330,14 @@ zink_blit(struct pipe_context *pctx,
const struct util_format_description *src_desc = util_format_description(info->src.format);
const struct util_format_description *dst_desc = util_format_description(info->dst.format);
- if (info->render_condition_enable &&
- unlikely(!zink_screen(pctx->screen)->info.have_EXT_conditional_rendering && !zink_check_conditional_render(ctx)))
- return;
+ struct zink_resource *src = zink_resource(info->src.resource);
+ struct zink_resource *use_src = src;
+ struct zink_resource *dst = zink_resource(info->dst.resource);
+ bool needs_present_readback = false;
+ if (zink_is_swapchain(dst)) {
+ if (!zink_kopper_acquire(ctx, dst, UINT64_MAX))
+ return;
+ }
if (src_desc == dst_desc ||
src_desc->nr_channels != 4 || src_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
@@ -266,47 +347,158 @@ zink_blit(struct pipe_context *pctx,
*/
if (info->src.resource->nr_samples > 1 &&
info->dst.resource->nr_samples <= 1) {
- if (blit_resolve(ctx, info))
- return;
+ if (blit_resolve(ctx, info, &needs_present_readback))
+ goto end;
} else {
- if (blit_native(ctx, info))
- return;
+ if (try_copy_region(pctx, info))
+ goto end;
+ if (blit_native(ctx, info, &needs_present_readback))
+ goto end;
}
}
- struct zink_resource *src = zink_resource(info->src.resource);
- struct zink_resource *dst = zink_resource(info->dst.resource);
- /* if we're copying between resources with matching aspects then we can probably just copy_region */
- if (src->aspect == dst->aspect) {
- struct pipe_blit_info new_info = *info;
- if (src->aspect & VK_IMAGE_ASPECT_STENCIL_BIT &&
- new_info.render_condition_enable &&
- !ctx->render_condition_active)
- new_info.render_condition_enable = false;
- if (util_try_blit_via_copy_region(pctx, &new_info))
- return;
+ bool stencil_blit = false;
+ if (!util_blitter_is_blit_supported(ctx->blitter, info)) {
+ if (util_format_is_depth_or_stencil(info->src.resource->format)) {
+ if (info->mask & PIPE_MASK_Z) {
+ struct pipe_blit_info depth_blit = *info;
+ depth_blit.mask = PIPE_MASK_Z;
+ if (util_blitter_is_blit_supported(ctx->blitter, &depth_blit)) {
+ zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES);
+ util_blitter_blit(ctx->blitter, &depth_blit);
+ } else {
+ mesa_loge("ZINK: depth blit unsupported %s -> %s",
+ util_format_short_name(info->src.resource->format),
+ util_format_short_name(info->dst.resource->format));
+ }
+ }
+ if (info->mask & PIPE_MASK_S)
+ stencil_blit = true;
+ }
+ if (!stencil_blit) {
+ mesa_loge("ZINK: blit unsupported %s -> %s",
+ util_format_short_name(info->src.resource->format),
+ util_format_short_name(info->dst.resource->format));
+ goto end;
+ }
}
- if (!util_blitter_is_blit_supported(ctx->blitter, info)) {
- debug_printf("blit unsupported %s -> %s\n",
- util_format_short_name(info->src.resource->format),
- util_format_short_name(info->dst.resource->format));
- return;
+ if (src->obj->dt) {
+ zink_fb_clears_apply_region(ctx, info->src.resource, zink_rect_from_box(&info->src.box));
+ needs_present_readback = zink_kopper_acquire_readback(ctx, src, &use_src);
}
/* this is discard_only because we're about to start a renderpass that will
* flush all pending clears anyway
*/
apply_dst_clears(ctx, info, true);
+ zink_fb_clears_apply_region(ctx, info->src.resource, zink_rect_from_box(&info->src.box));
+ unsigned rp_clears_enabled = ctx->rp_clears_enabled;
+ unsigned clears_enabled = ctx->clears_enabled;
+ if (!dst->fb_bind_count) {
+ /* avoid applying clears from fb unbind by storing and re-setting them after the blit */
+ ctx->rp_clears_enabled = 0;
+ ctx->clears_enabled = 0;
+ } else {
+ unsigned bit;
+ /* convert to PIPE_CLEAR_XYZ */
+ if (dst->fb_binds & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS))
+ bit = PIPE_CLEAR_DEPTHSTENCIL;
+ else
+ bit = dst->fb_binds << 2;
+ rp_clears_enabled &= ~bit;
+ clears_enabled &= ~bit;
+ ctx->rp_clears_enabled &= bit;
+ ctx->clears_enabled &= bit;
+ }
- if (info->dst.resource->target == PIPE_BUFFER)
- util_range_add(info->dst.resource, &dst->valid_buffer_range,
- info->dst.box.x, info->dst.box.x + info->dst.box.width);
+ /* this will draw a full-resource quad, so ignore existing data */
+ bool whole = util_blit_covers_whole_resource(info);
+ if (whole)
+ pctx->invalidate_resource(pctx, info->dst.resource);
+
+ zink_flush_dgc_if_enabled(ctx);
+ ctx->unordered_blitting = !(info->render_condition_enable && ctx->render_condition_active) &&
+ zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering &&
+ !needs_present_readback &&
+ zink_get_cmdbuf(ctx, src, dst) == ctx->batch.state->reordered_cmdbuf;
+ VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
+ VkPipeline pipeline = ctx->gfx_pipeline_state.pipeline;
+ bool in_rp = ctx->batch.in_rp;
+ uint64_t tc_data = ctx->dynamic_fb.tc_info.data;
+ bool queries_disabled = ctx->queries_disabled;
+ bool rp_changed = ctx->rp_changed || (!ctx->fb_state.zsbuf && util_format_is_depth_or_stencil(info->dst.format));
+ unsigned ds3_states = ctx->ds3_states;
+ bool rp_tc_info_updated = ctx->rp_tc_info_updated;
+ if (ctx->unordered_blitting) {
+ /* for unordered blit, swap the unordered cmdbuf for the main one for the whole op to avoid conditional hell */
+ ctx->batch.state->cmdbuf = ctx->batch.state->reordered_cmdbuf;
+ ctx->batch.in_rp = false;
+ ctx->rp_changed = true;
+ ctx->queries_disabled = true;
+ ctx->batch.state->has_barriers = true;
+ ctx->pipeline_changed[0] = true;
+ zink_reset_ds3_states(ctx);
+ zink_select_draw_vbo(ctx);
+ }
zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES);
-
- util_blitter_blit(ctx->blitter, info);
+ if (zink_format_needs_mutable(info->src.format, info->src.resource->format))
+ zink_resource_object_init_mutable(ctx, src);
+ if (zink_format_needs_mutable(info->dst.format, info->dst.resource->format))
+ zink_resource_object_init_mutable(ctx, dst);
+ zink_blit_barriers(ctx, use_src, dst, whole);
+ ctx->blitting = true;
+
+ if (stencil_blit) {
+ struct pipe_surface *dst_view, dst_templ;
+ util_blitter_default_dst_texture(&dst_templ, info->dst.resource, info->dst.level, info->dst.box.z);
+ dst_view = pctx->create_surface(pctx, info->dst.resource, &dst_templ);
+
+ util_blitter_clear_depth_stencil(ctx->blitter, dst_view, PIPE_CLEAR_STENCIL,
+ 0, 0, info->dst.box.x, info->dst.box.y,
+ info->dst.box.width, info->dst.box.height);
+ zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES | ZINK_BLIT_SAVE_FS_CONST_BUF);
+ util_blitter_stencil_fallback(ctx->blitter,
+ info->dst.resource,
+ info->dst.level,
+ &info->dst.box,
+ info->src.resource,
+ info->src.level,
+ &info->src.box,
+ info->scissor_enable ? &info->scissor : NULL);
+
+ pipe_surface_release(pctx, &dst_view);
+ } else {
+ struct pipe_blit_info new_info = *info;
+ new_info.src.resource = &use_src->base.b;
+ util_blitter_blit(ctx->blitter, &new_info);
+ }
+ ctx->blitting = false;
+ ctx->rp_clears_enabled = rp_clears_enabled;
+ ctx->clears_enabled = clears_enabled;
+ if (ctx->unordered_blitting) {
+ zink_batch_no_rp(ctx);
+ ctx->batch.in_rp = in_rp;
+ ctx->gfx_pipeline_state.rp_state = zink_update_rendering_info(ctx);
+ ctx->rp_changed = rp_changed;
+ ctx->rp_tc_info_updated |= rp_tc_info_updated;
+ ctx->queries_disabled = queries_disabled;
+ ctx->dynamic_fb.tc_info.data = tc_data;
+ ctx->batch.state->cmdbuf = cmdbuf;
+ ctx->gfx_pipeline_state.pipeline = pipeline;
+ ctx->pipeline_changed[0] = true;
+ ctx->ds3_states = ds3_states;
+ zink_select_draw_vbo(ctx);
+ }
+ ctx->unordered_blitting = false;
+end:
+ if (needs_present_readback) {
+ src->obj->unordered_read = false;
+ dst->obj->unordered_write = false;
+ zink_kopper_present_readback(ctx, src);
+ }
}
/* similar to radeonsi */
@@ -316,24 +508,27 @@ zink_blit_begin(struct zink_context *ctx, enum zink_blit_flags flags)
util_blitter_save_vertex_elements(ctx->blitter, ctx->element_state);
util_blitter_save_viewport(ctx->blitter, ctx->vp_state.viewport_states);
- util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffers);
- util_blitter_save_vertex_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_VERTEX]);
- util_blitter_save_tessctrl_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_TESS_CTRL]);
- util_blitter_save_tesseval_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]);
- util_blitter_save_geometry_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_GEOMETRY]);
+ util_blitter_save_vertex_buffers(ctx->blitter, ctx->vertex_buffers,
+ util_last_bit(ctx->gfx_pipeline_state.vertex_buffers_enabled_mask));
+ util_blitter_save_vertex_shader(ctx->blitter, ctx->gfx_stages[MESA_SHADER_VERTEX]);
+ util_blitter_save_tessctrl_shader(ctx->blitter, ctx->gfx_stages[MESA_SHADER_TESS_CTRL]);
+ util_blitter_save_tesseval_shader(ctx->blitter, ctx->gfx_stages[MESA_SHADER_TESS_EVAL]);
+ util_blitter_save_geometry_shader(ctx->blitter, ctx->gfx_stages[MESA_SHADER_GEOMETRY]);
util_blitter_save_rasterizer(ctx->blitter, ctx->rast_state);
util_blitter_save_so_targets(ctx->blitter, ctx->num_so_targets, ctx->so_targets);
+ if (flags & ZINK_BLIT_SAVE_FS_CONST_BUF)
+ util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, ctx->ubos[MESA_SHADER_FRAGMENT]);
+
if (flags & ZINK_BLIT_SAVE_FS) {
- util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, ctx->ubos[PIPE_SHADER_FRAGMENT]);
util_blitter_save_blend(ctx->blitter, ctx->gfx_pipeline_state.blend_state);
util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->dsa_state);
util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
- util_blitter_save_sample_mask(ctx->blitter, ctx->gfx_pipeline_state.sample_mask);
+ util_blitter_save_sample_mask(ctx->blitter, ctx->gfx_pipeline_state.sample_mask, ctx->gfx_pipeline_state.min_samples + 1);
util_blitter_save_scissor(ctx->blitter, ctx->vp_state.scissor_states);
/* also util_blitter_save_window_rectangles when we have that? */
- util_blitter_save_fragment_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_FRAGMENT]);
+ util_blitter_save_fragment_shader(ctx->blitter, ctx->gfx_stages[MESA_SHADER_FRAGMENT]);
}
if (flags & ZINK_BLIT_SAVE_FB)
@@ -342,27 +537,83 @@ zink_blit_begin(struct zink_context *ctx, enum zink_blit_flags flags)
if (flags & ZINK_BLIT_SAVE_TEXTURES) {
util_blitter_save_fragment_sampler_states(ctx->blitter,
- ctx->di.num_samplers[PIPE_SHADER_FRAGMENT],
- (void**)ctx->sampler_states[PIPE_SHADER_FRAGMENT]);
+ ctx->di.num_samplers[MESA_SHADER_FRAGMENT],
+ (void**)ctx->sampler_states[MESA_SHADER_FRAGMENT]);
util_blitter_save_fragment_sampler_views(ctx->blitter,
- ctx->di.num_sampler_views[PIPE_SHADER_FRAGMENT],
- ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
+ ctx->di.num_sampler_views[MESA_SHADER_FRAGMENT],
+ ctx->sampler_views[MESA_SHADER_FRAGMENT]);
}
if (flags & ZINK_BLIT_NO_COND_RENDER && ctx->render_condition_active)
zink_stop_conditional_render(ctx);
}
+void
+zink_blit_barriers(struct zink_context *ctx, struct zink_resource *src, struct zink_resource *dst, bool whole_dst)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (src && zink_is_swapchain(src)) {
+ if (!zink_kopper_acquire(ctx, src, UINT64_MAX))
+ return;
+ } else if (dst && zink_is_swapchain(dst)) {
+ if (!zink_kopper_acquire(ctx, dst, UINT64_MAX))
+ return;
+ }
+
+ VkAccessFlagBits flags;
+ VkPipelineStageFlagBits pipeline;
+ if (util_format_is_depth_or_stencil(dst->base.b.format)) {
+ flags = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ if (!whole_dst)
+ flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
+ pipeline = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+ } else {
+ flags = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ if (!whole_dst)
+ flags |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
+ pipeline = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ }
+ if (src == dst) {
+ VkImageLayout layout = zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_layout ?
+ VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT :
+ VK_IMAGE_LAYOUT_GENERAL;
+ screen->image_barrier(ctx, src, layout, VK_ACCESS_SHADER_READ_BIT | flags, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | pipeline);
+ } else {
+ if (src) {
+ VkImageLayout layout = util_format_is_depth_or_stencil(src->base.b.format) &&
+ src->obj->vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ?
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL :
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ screen->image_barrier(ctx, src, layout,
+ VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
+ if (!ctx->unordered_blitting)
+ src->obj->unordered_read = false;
+ }
+ VkImageLayout layout = util_format_is_depth_or_stencil(dst->base.b.format) ?
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL :
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ screen->image_barrier(ctx, dst, layout, flags, pipeline);
+ }
+ if (!ctx->unordered_blitting)
+ dst->obj->unordered_read = dst->obj->unordered_write = false;
+}
+
bool
zink_blit_region_fills(struct u_rect region, unsigned width, unsigned height)
{
struct u_rect intersect = {0, width, 0, height};
-
- if (!u_rect_test_intersection(&region, &intersect))
+ struct u_rect r = {
+ MIN2(region.x0, region.x1),
+ MAX2(region.x0, region.x1),
+ MIN2(region.y0, region.y1),
+ MAX2(region.y0, region.y1),
+ };
+
+ if (!u_rect_test_intersection(&r, &intersect))
/* is this even a thing? */
return false;
- u_rect_find_intersection(&region, &intersect);
+ u_rect_find_intersection(&r, &intersect);
if (intersect.x0 != 0 || intersect.y0 != 0 ||
intersect.x1 != width || intersect.y1 != height)
return false;
@@ -373,11 +624,23 @@ zink_blit_region_fills(struct u_rect region, unsigned width, unsigned height)
bool
zink_blit_region_covers(struct u_rect region, struct u_rect covers)
{
+ struct u_rect r = {
+ MIN2(region.x0, region.x1),
+ MAX2(region.x0, region.x1),
+ MIN2(region.y0, region.y1),
+ MAX2(region.y0, region.y1),
+ };
+ struct u_rect c = {
+ MIN2(covers.x0, covers.x1),
+ MAX2(covers.x0, covers.x1),
+ MIN2(covers.y0, covers.y1),
+ MAX2(covers.y0, covers.y1),
+ };
struct u_rect intersect;
- if (!u_rect_test_intersection(&region, &covers))
+ if (!u_rect_test_intersection(&r, &c))
return false;
- u_rect_union(&intersect, &region, &covers);
- return intersect.x0 == covers.x0 && intersect.y0 == covers.y0 &&
- intersect.x1 == covers.x1 && intersect.y1 == covers.y1;
+ u_rect_union(&intersect, &r, &c);
+ return intersect.x0 == c.x0 && intersect.y0 == c.y0 &&
+ intersect.x1 == c.x1 && intersect.y1 == c.y1;
}
diff --git a/src/gallium/drivers/zink/zink_bo.c b/src/gallium/drivers/zink/zink_bo.c
index e673efefb3e..97fbae6de0d 100644
--- a/src/gallium/drivers/zink/zink_bo.c
+++ b/src/gallium/drivers/zink/zink_bo.c
@@ -29,11 +29,17 @@
* Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
*/
+#include "zink_context.h"
#include "zink_bo.h"
#include "zink_resource.h"
#include "zink_screen.h"
#include "util/u_hash_table.h"
+#if !defined(__APPLE__) && !defined(_WIN32)
+#define ZINK_USE_DMABUF
+#include <xf86drm.h>
+#endif
+
struct zink_bo;
struct zink_sparse_backing_chunk {
@@ -63,7 +69,6 @@ struct zink_sparse_commitment {
struct zink_slab {
struct pb_slab base;
- unsigned entry_size;
struct zink_bo *buffer;
struct zink_bo *entries;
};
@@ -120,9 +125,19 @@ bo_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
{
struct zink_bo *bo = zink_bo(pbuf);
- simple_mtx_lock(&screen->pb.bo_export_table_lock);
- _mesa_hash_table_remove_key(screen->pb.bo_export_table, bo);
- simple_mtx_unlock(&screen->pb.bo_export_table_lock);
+#ifdef ZINK_USE_DMABUF
+ if (bo->mem && !bo->u.real.use_reusable_pool) {
+ simple_mtx_lock(&bo->u.real.export_lock);
+ list_for_each_entry_safe(struct bo_export, export, &bo->u.real.exports, link) {
+ struct drm_gem_close args = { .handle = export->gem_handle };
+ drmIoctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &args);
+ list_del(&export->link);
+ free(export);
+ }
+ simple_mtx_unlock(&bo->u.real.export_lock);
+ simple_mtx_destroy(&bo->u.real.export_lock);
+ }
+#endif
if (!bo->u.real.is_user_ptr && bo->u.real.cpu_ptr) {
bo->u.real.map_count = 1;
@@ -141,7 +156,7 @@ bo_can_reclaim(struct zink_screen *screen, struct pb_buffer *pbuf)
{
struct zink_bo *bo = zink_bo(pbuf);
- return zink_screen_usage_check_completion(screen, bo->reads) && zink_screen_usage_check_completion(screen, bo->writes);
+ return zink_screen_usage_check_completion(screen, bo->reads.u) && zink_screen_usage_check_completion(screen, bo->writes.u);
}
static bool
@@ -156,9 +171,9 @@ static void
bo_slab_free(struct zink_screen *screen, struct pb_slab *pslab)
{
struct zink_slab *slab = zink_slab(pslab);
- ASSERTED unsigned slab_size = slab->buffer->base.size;
+ ASSERTED unsigned slab_size = slab->buffer->base.base.size;
- assert(slab->base.num_entries * slab->entry_size <= slab_size);
+ assert(slab->base.num_entries * slab->base.entry_size <= slab_size);
FREE(slab->entries);
zink_bo_unref(screen, slab->buffer);
FREE(slab);
@@ -174,19 +189,21 @@ bo_slab_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
//if (bo->base.usage & RADEON_FLAG_ENCRYPTED)
//pb_slab_free(get_slabs(screen, bo->base.size, RADEON_FLAG_ENCRYPTED), &bo->u.slab.entry);
//else
- pb_slab_free(get_slabs(screen, bo->base.size, 0), &bo->u.slab.entry);
+ pb_slab_free(get_slabs(screen, bo->base.base.size, 0), &bo->u.slab.entry);
}
-static void
+static bool
clean_up_buffer_managers(struct zink_screen *screen)
{
+ unsigned num_reclaims = 0;
for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
- pb_slabs_reclaim(&screen->pb.bo_slabs[i]);
+ num_reclaims += pb_slabs_reclaim(&screen->pb.bo_slabs[i]);
//if (screen->info.has_tmz_support)
//pb_slabs_reclaim(&screen->bo_slabs_encrypted[i]);
}
- pb_cache_release_all_buffers(&screen->pb.bo_cache);
+ num_reclaims += pb_cache_release_all_buffers(&screen->pb.bo_cache);
+ return !!num_reclaims;
}
static unsigned
@@ -211,11 +228,11 @@ bo_destroy_or_cache(struct zink_screen *screen, struct pb_buffer *pbuf)
struct zink_bo *bo = zink_bo(pbuf);
assert(bo->mem); /* slab buffers have a separate vtbl */
- bo->reads = NULL;
- bo->writes = NULL;
+ bo->reads.u = NULL;
+ bo->writes.u = NULL;
if (bo->u.real.use_reusable_pool)
- pb_cache_add_buffer(bo->cache_entry);
+ pb_cache_add_buffer(&screen->pb.bo_cache, bo->cache_entry);
else
bo_destroy(screen, pbuf);
}
@@ -231,52 +248,84 @@ bo_create_internal(struct zink_screen *screen,
uint64_t size,
unsigned alignment,
enum zink_heap heap,
+ unsigned mem_type_idx,
unsigned flags,
const void *pNext)
{
- struct zink_bo *bo;
+ struct zink_bo *bo = NULL;
bool init_pb_cache;
- /* too big for vk alloc */
- if (size > UINT32_MAX)
- return NULL;
-
alignment = get_optimal_alignment(screen, size, alignment);
- /* all non-suballocated bo can cache */
- init_pb_cache = true;
-
- bo = CALLOC(1, sizeof(struct zink_bo) + init_pb_cache * sizeof(struct pb_cache_entry));
- if (!bo) {
- return NULL;
- }
-
- if (init_pb_cache) {
- bo->u.real.use_reusable_pool = true;
- pb_cache_init_entry(&screen->pb.bo_cache, bo->cache_entry, &bo->base, heap);
- }
+ VkMemoryAllocateFlagsInfo ai;
+ ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
+ ai.pNext = pNext;
+ ai.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
+ ai.deviceMask = 0;
+ if (screen->info.have_KHR_buffer_device_address)
+ pNext = &ai;
+
+ VkMemoryPriorityAllocateInfoEXT prio = {
+ VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT,
+ pNext,
+ (flags & ZINK_ALLOC_NO_SUBALLOC) ? 1.0 : 0.5,
+ };
+ if (screen->info.have_EXT_memory_priority)
+ pNext = &prio;
VkMemoryAllocateInfo mai;
mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
mai.pNext = pNext;
mai.allocationSize = size;
- mai.memoryTypeIndex = screen->heap_map[heap];
+ mai.memoryTypeIndex = mem_type_idx;
if (screen->info.mem_props.memoryTypes[mai.memoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
alignment = MAX2(alignment, screen->info.props.limits.minMemoryMapAlignment);
- mai.allocationSize = align(mai.allocationSize, screen->info.props.limits.minMemoryMapAlignment);
+ mai.allocationSize = align64(mai.allocationSize, screen->info.props.limits.minMemoryMapAlignment);
+ }
+ unsigned vk_heap_idx = screen->info.mem_props.memoryTypes[mem_type_idx].heapIndex;
+ if (mai.allocationSize > screen->info.mem_props.memoryHeaps[vk_heap_idx].size) {
+ mesa_loge("zink: can't allocate %"PRIu64" bytes from heap that's only %"PRIu64" bytes!\n", mai.allocationSize, screen->info.mem_props.memoryHeaps[vk_heap_idx].size);
+ return NULL;
}
+
+ /* all non-suballocated bo can cache */
+ init_pb_cache = !pNext;
+
+ if (!bo)
+ bo = CALLOC(1, sizeof(struct zink_bo) + init_pb_cache * sizeof(struct pb_cache_entry));
+ if (!bo) {
+ return NULL;
+ }
+
VkResult ret = VKSCR(AllocateMemory)(screen->dev, &mai, NULL, &bo->mem);
- if (!zink_screen_handle_vkresult(screen, ret))
+ if (!zink_screen_handle_vkresult(screen, ret)) {
+ mesa_loge("zink: couldn't allocate memory: heap=%u size=%" PRIu64, heap, size);
+ if (zink_debug & ZINK_DEBUG_MEM) {
+ zink_debug_mem_print_stats(screen);
+ /* abort with mem debug to allow debugging */
+ abort();
+ }
goto fail;
+ }
+
+ if (init_pb_cache) {
+ bo->u.real.use_reusable_pool = true;
+ pb_cache_init_entry(&screen->pb.bo_cache, bo->cache_entry, &bo->base.base, mem_type_idx);
+ } else {
+#ifdef ZINK_USE_DMABUF
+ list_inithead(&bo->u.real.exports);
+ simple_mtx_init(&bo->u.real.export_lock, mtx_plain);
+#endif
+ }
+
simple_mtx_init(&bo->lock, mtx_plain);
- pipe_reference_init(&bo->base.reference, 1);
- bo->base.alignment_log2 = util_logbase2(alignment);
- bo->base.size = size;
+ pipe_reference_init(&bo->base.base.reference, 1);
+ bo->base.base.alignment_log2 = util_logbase2(alignment);
+ bo->base.base.size = mai.allocationSize;
bo->base.vtbl = &bo_vtbl;
- bo->base.placement = vk_domain_from_heap(heap);
- bo->base.usage = flags;
- bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
+ bo->base.base.placement = mem_type_idx;
+ bo->base.base.usage = flags;
return bo;
@@ -333,15 +382,15 @@ sparse_backing_alloc(struct zink_screen *screen, struct zink_bo *bo,
return NULL;
}
- assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, ZINK_SPARSE_BUFFER_PAGE_SIZE));
+ assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.base.size, ZINK_SPARSE_BUFFER_PAGE_SIZE));
- size = MIN3(bo->base.size / 16,
+ size = MIN3(bo->base.base.size / 16,
8 * 1024 * 1024,
- bo->base.size - (uint64_t)bo->u.sparse.num_backing_pages * ZINK_SPARSE_BUFFER_PAGE_SIZE);
+ bo->base.base.size - (uint64_t)bo->u.sparse.num_backing_pages * ZINK_SPARSE_BUFFER_PAGE_SIZE);
size = MAX2(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
buf = zink_bo_create(screen, size, ZINK_SPARSE_BUFFER_PAGE_SIZE,
- bo->base.placement, ZINK_ALLOC_NO_SUBALLOC, NULL);
+ ZINK_HEAP_DEVICE_LOCAL, 0, screen->heap_map[ZINK_HEAP_DEVICE_LOCAL][0], NULL);
if (!buf) {
FREE(best_backing->chunks);
FREE(best_backing);
@@ -349,7 +398,7 @@ sparse_backing_alloc(struct zink_screen *screen, struct zink_bo *bo,
}
/* We might have gotten a bigger buffer than requested via caching. */
- pages = buf->size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
+ pages = buf->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
best_backing->bo = zink_bo(buf);
best_backing->num_chunks = 1;
@@ -380,7 +429,7 @@ static void
sparse_free_backing_buffer(struct zink_screen *screen, struct zink_bo *bo,
struct zink_sparse_backing *backing)
{
- bo->u.sparse.num_backing_pages -= backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
+ bo->u.sparse.num_backing_pages -= backing->bo->base.base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
list_del(&backing->list);
zink_bo_unref(screen, backing->bo);
@@ -447,7 +496,7 @@ sparse_backing_free(struct zink_screen *screen, struct zink_bo *bo,
}
if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 &&
- backing->chunks[0].end == backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE)
+ backing->chunks[0].end == backing->bo->base.base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE)
sparse_free_backing_buffer(screen, bo, backing);
return true;
@@ -458,7 +507,7 @@ bo_sparse_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
{
struct zink_bo *bo = zink_bo(pbuf);
- assert(!bo->mem && bo->base.usage & ZINK_ALLOC_SPARSE);
+ assert(!bo->mem && bo->base.base.usage & ZINK_ALLOC_SPARSE);
while (!list_is_empty(&bo->u.sparse.backing)) {
sparse_free_backing_buffer(screen, bo,
@@ -494,13 +543,15 @@ bo_sparse_create(struct zink_screen *screen, uint64_t size)
return NULL;
simple_mtx_init(&bo->lock, mtx_plain);
- pipe_reference_init(&bo->base.reference, 1);
- bo->base.alignment_log2 = util_logbase2(ZINK_SPARSE_BUFFER_PAGE_SIZE);
- bo->base.size = size;
+ pipe_reference_init(&bo->base.base.reference, 1);
+ bo->base.base.alignment_log2 = util_logbase2(ZINK_SPARSE_BUFFER_PAGE_SIZE);
+ bo->base.base.size = size;
bo->base.vtbl = &bo_sparse_vtbl;
- bo->base.placement = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ unsigned placement = zink_mem_type_idx_from_types(screen, ZINK_HEAP_DEVICE_LOCAL_SPARSE, UINT32_MAX);
+ assert(placement != UINT32_MAX);
+ bo->base.base.placement = placement;
bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
- bo->base.usage = ZINK_ALLOC_SPARSE;
+ bo->base.base.usage = ZINK_ALLOC_SPARSE;
bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages,
@@ -519,7 +570,7 @@ error_alloc_commitments:
}
struct pb_buffer *
-zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, const void *pNext)
+zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, unsigned mem_type_idx, const void *pNext)
{
struct zink_bo *bo;
/* pull in sparse flag */
@@ -527,9 +578,9 @@ zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, en
//struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
//screen->bo_slabs_encrypted : screen->bo_slabs;
- struct pb_slabs *slabs = screen->pb.bo_slabs;
+ struct pb_slabs *bo_slabs = screen->pb.bo_slabs;
- struct pb_slabs *last_slab = &slabs[NUM_SLAB_ALLOCATORS - 1];
+ struct pb_slabs *last_slab = &bo_slabs[NUM_SLAB_ALLOCATORS - 1];
unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1);
/* Sub-allocate small buffers from slabs. */
@@ -563,20 +614,33 @@ zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, en
}
struct pb_slabs *slabs = get_slabs(screen, alloc_size, flags);
- entry = pb_slab_alloc(slabs, alloc_size, heap);
+ bool reclaim_all = false;
+ if (heap == ZINK_HEAP_DEVICE_LOCAL_VISIBLE && !screen->resizable_bar) {
+ unsigned low_bound = 128 * 1024 * 1024; //128MB is a very small BAR
+ if (screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY)
+ low_bound *= 2; //nvidia has fat textures or something
+ unsigned vk_heap_idx = screen->info.mem_props.memoryTypes[mem_type_idx].heapIndex;
+ reclaim_all = screen->info.mem_props.memoryHeaps[vk_heap_idx].size <= low_bound;
+ if (reclaim_all)
+ reclaim_all = clean_up_buffer_managers(screen);
+ }
+ entry = pb_slab_alloc_reclaimed(slabs, alloc_size, mem_type_idx, reclaim_all);
if (!entry) {
/* Clean up buffer managers and try again. */
- clean_up_buffer_managers(screen);
-
- entry = pb_slab_alloc(slabs, alloc_size, heap);
+ if (clean_up_buffer_managers(screen))
+ entry = pb_slab_alloc_reclaimed(slabs, alloc_size, mem_type_idx, true);
}
if (!entry)
return NULL;
bo = container_of(entry, struct zink_bo, u.slab.entry);
- pipe_reference_init(&bo->base.reference, 1);
- bo->base.size = size;
- assert(alignment <= 1 << bo->base.alignment_log2);
+ assert(bo->base.base.placement == mem_type_idx);
+ pipe_reference_init(&bo->base.base.reference, 1);
+ bo->base.base.size = size;
+ memset(&bo->reads, 0, sizeof(bo->reads));
+ memset(&bo->writes, 0, sizeof(bo->writes));
+ bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
+ assert(alignment <= 1 << bo->base.base.alignment_log2);
return &bo->base;
}
@@ -602,21 +666,25 @@ no_slab:
if (use_reusable_pool) {
/* Get a buffer from the cache. */
bo = (struct zink_bo*)
- pb_cache_reclaim_buffer(&screen->pb.bo_cache, size, alignment, 0, heap);
- if (bo)
+ pb_cache_reclaim_buffer(&screen->pb.bo_cache, size, alignment, 0, mem_type_idx);
+ assert(!bo || bo->base.base.placement == mem_type_idx);
+ if (bo) {
+ memset(&bo->reads, 0, sizeof(bo->reads));
+ memset(&bo->writes, 0, sizeof(bo->writes));
return &bo->base;
+ }
}
/* Create a new one. */
- bo = bo_create_internal(screen, size, alignment, heap, flags, pNext);
+ bo = bo_create_internal(screen, size, alignment, heap, mem_type_idx, flags, pNext);
if (!bo) {
/* Clean up buffer managers and try again. */
- clean_up_buffer_managers(screen);
-
- bo = bo_create_internal(screen, size, alignment, heap, flags, pNext);
+ if (clean_up_buffer_managers(screen))
+ bo = bo_create_internal(screen, size, alignment, heap, mem_type_idx, flags, pNext);
if (!bo)
return NULL;
}
+ assert(bo->base.base.placement == mem_type_idx);
return &bo->base;
}
@@ -642,11 +710,16 @@ zink_bo_map(struct zink_screen *screen, struct zink_bo *bo)
* be atomic thanks to the lock. */
cpu = real->u.real.cpu_ptr;
if (!cpu) {
- VkResult result = VKSCR(MapMemory)(screen->dev, real->mem, 0, real->base.size, 0, &cpu);
+ VkResult result = VKSCR(MapMemory)(screen->dev, real->mem, 0, real->base.base.size, 0, &cpu);
if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkMapMemory failed (%s)", vk_Result_to_str(result));
simple_mtx_unlock(&real->lock);
return NULL;
}
+ if (unlikely(zink_debug & ZINK_DEBUG_MAP)) {
+ p_atomic_add(&screen->mapped_vram, real->base.base.size);
+ mesa_loge("NEW MAP(%"PRIu64") TOTAL(%"PRIu64")", real->base.base.size, screen->mapped_vram);
+ }
p_atomic_set(&real->u.real.cpu_ptr, cpu);
}
simple_mtx_unlock(&real->lock);
@@ -665,95 +738,73 @@ zink_bo_unmap(struct zink_screen *screen, struct zink_bo *bo)
if (p_atomic_dec_zero(&real->u.real.map_count)) {
p_atomic_set(&real->u.real.cpu_ptr, NULL);
+ if (unlikely(zink_debug & ZINK_DEBUG_MAP)) {
+ p_atomic_add(&screen->mapped_vram, -real->base.base.size);
+ mesa_loge("UNMAP(%"PRIu64") TOTAL(%"PRIu64")", real->base.base.size, screen->mapped_vram);
+ }
VKSCR(UnmapMemory)(screen->dev, real->mem);
}
}
-
-static inline struct zink_screen **
-get_screen_ptr_for_commit(uint8_t *mem)
-{
- return (struct zink_screen**)(mem + sizeof(VkBindSparseInfo) + sizeof(VkSparseBufferMemoryBindInfo) + sizeof(VkSparseMemoryBind));
-}
-
-static bool
-resource_commit(struct zink_screen *screen, VkBindSparseInfo *sparse)
-{
- VkQueue queue = screen->threaded ? screen->thread_queue : screen->queue;
-
- VkResult ret = VKSCR(QueueBindSparse)(queue, 1, sparse, VK_NULL_HANDLE);
- return zink_screen_handle_vkresult(screen, ret);
-}
-
+/* see comment in zink_batch_reference_resource_move for how references on sparse backing buffers are organized */
static void
-submit_resource_commit(void *data, void *gdata, int thread_index)
+track_freed_sparse_bo(struct zink_context *ctx, struct zink_sparse_backing *backing)
{
- struct zink_screen **screen = get_screen_ptr_for_commit(data);
- resource_commit(*screen, data);
- free(data);
+ pipe_reference(NULL, &backing->bo->base.base.reference);
+ util_dynarray_append(&ctx->batch.state->freed_sparse_backing_bos, struct zink_bo*, backing->bo);
}
-static bool
-do_commit_single(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t offset, uint32_t size, bool commit)
+static VkSemaphore
+buffer_commit_single(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t bo_offset, uint32_t offset, uint32_t size, bool commit, VkSemaphore wait)
{
-
- uint8_t *mem = malloc(sizeof(VkBindSparseInfo) + sizeof(VkSparseBufferMemoryBindInfo) + sizeof(VkSparseMemoryBind) + sizeof(void*));
- if (!mem)
- return false;
- VkBindSparseInfo *sparse = (void*)mem;
- sparse->sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
- sparse->pNext = NULL;
- sparse->waitSemaphoreCount = 0;
- sparse->bufferBindCount = 1;
- sparse->imageOpaqueBindCount = 0;
- sparse->imageBindCount = 0;
- sparse->signalSemaphoreCount = 0;
-
- VkSparseBufferMemoryBindInfo *sparse_bind = (void*)(mem + sizeof(VkBindSparseInfo));
- sparse_bind->buffer = res->obj->buffer;
- sparse_bind->bindCount = 1;
- sparse->pBufferBinds = sparse_bind;
-
- VkSparseMemoryBind *mem_bind = (void*)(mem + sizeof(VkBindSparseInfo) + sizeof(VkSparseBufferMemoryBindInfo));
- mem_bind->resourceOffset = offset;
- mem_bind->size = MIN2(res->base.b.width0 - offset, size);
- mem_bind->memory = commit ? bo->mem : VK_NULL_HANDLE;
- mem_bind->memoryOffset = 0;
- mem_bind->flags = 0;
- sparse_bind->pBinds = mem_bind;
-
- struct zink_screen **ptr = get_screen_ptr_for_commit(mem);
- *ptr = screen;
-
- if (screen->threaded) {
- /* this doesn't need any kind of fencing because any access to this resource
- * will be automagically synchronized by queue dispatch */
- util_queue_add_job(&screen->flush_queue, mem, NULL, submit_resource_commit, NULL, 0);
- } else {
- bool ret = resource_commit(screen, sparse);
- free(sparse);
- return ret;
- }
- return true;
+ VkSemaphore sem = zink_create_semaphore(screen);
+ VkBindSparseInfo sparse = {0};
+ sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
+ sparse.bufferBindCount = res->obj->storage_buffer ? 2 : 1;
+ sparse.waitSemaphoreCount = !!wait;
+ sparse.pWaitSemaphores = &wait;
+ sparse.signalSemaphoreCount = 1;
+ sparse.pSignalSemaphores = &sem;
+
+ VkSparseBufferMemoryBindInfo sparse_bind[2];
+ sparse_bind[0].buffer = res->obj->buffer;
+ sparse_bind[1].buffer = res->obj->storage_buffer;
+ sparse_bind[0].bindCount = 1;
+ sparse_bind[1].bindCount = 1;
+ sparse.pBufferBinds = sparse_bind;
+
+ VkSparseMemoryBind mem_bind;
+ mem_bind.resourceOffset = offset;
+ mem_bind.size = MIN2(res->base.b.width0 - offset, size);
+ mem_bind.memory = commit ? (bo->mem ? bo->mem : bo->u.slab.real->mem) : VK_NULL_HANDLE;
+ mem_bind.memoryOffset = bo_offset * ZINK_SPARSE_BUFFER_PAGE_SIZE + (commit ? (bo->mem ? 0 : bo->offset) : 0);
+ mem_bind.flags = 0;
+ sparse_bind[0].pBinds = &mem_bind;
+ sparse_bind[1].pBinds = &mem_bind;
+
+ VkResult ret = VKSCR(QueueBindSparse)(screen->queue_sparse, 1, &sparse, VK_NULL_HANDLE);
+ if (zink_screen_handle_vkresult(screen, ret))
+ return sem;
+ VKSCR(DestroySemaphore)(screen->dev, sem, NULL);
+ return VK_NULL_HANDLE;
}
-bool
-zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t offset, uint32_t size, bool commit)
+static bool
+buffer_bo_commit(struct zink_context *ctx, struct zink_resource *res, uint32_t offset, uint32_t size, bool commit, VkSemaphore *sem)
{
bool ok = true;
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
struct zink_bo *bo = res->obj->bo;
assert(offset % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0);
- assert(offset <= bo->base.size);
- assert(size <= bo->base.size - offset);
- assert(size % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0 || offset + size == bo->base.size);
+ assert(offset <= bo->base.base.size);
+ assert(size <= bo->base.base.size - offset);
+ assert(size % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0 || offset + size == res->obj->size);
struct zink_sparse_commitment *comm = bo->u.sparse.commitments;
uint32_t va_page = offset / ZINK_SPARSE_BUFFER_PAGE_SIZE;
uint32_t end_va_page = va_page + DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
-
- simple_mtx_lock(&bo->lock);
-
+ VkSemaphore cur_sem = VK_NULL_HANDLE;
if (commit) {
while (va_page < end_va_page) {
uint32_t span_va_page;
@@ -780,10 +831,10 @@ zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t o
ok = false;
goto out;
}
- if (!do_commit_single(screen, res, backing->bo,
- (uint64_t)span_va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
- (uint64_t)backing_size * ZINK_SPARSE_BUFFER_PAGE_SIZE, true)) {
-
+ cur_sem = buffer_commit_single(screen, res, backing->bo, backing_start,
+ (uint64_t)span_va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
+ (uint64_t)backing_size * ZINK_SPARSE_BUFFER_PAGE_SIZE, true, cur_sem);
+ if (!cur_sem) {
ok = sparse_backing_free(screen, bo, backing, backing_start, backing_size);
assert(ok && "sufficient memory should already be allocated");
@@ -801,13 +852,8 @@ zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t o
}
}
} else {
- if (!do_commit_single(screen, res, NULL,
- (uint64_t)va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
- (uint64_t)(end_va_page - va_page) * ZINK_SPARSE_BUFFER_PAGE_SIZE, false)) {
- ok = false;
- goto out;
- }
-
+ bool done = false;
+ uint32_t base_page = va_page;
while (va_page < end_va_page) {
struct zink_sparse_backing *backing;
uint32_t backing_start;
@@ -819,6 +865,17 @@ zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t o
continue;
}
+ if (!done) {
+ cur_sem = buffer_commit_single(screen, res, NULL, 0,
+ (uint64_t)base_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
+ (uint64_t)(end_va_page - base_page) * ZINK_SPARSE_BUFFER_PAGE_SIZE, false, cur_sem);
+ if (!cur_sem) {
+ ok = false;
+ goto out;
+ }
+ }
+ done = true;
+
/* Group contiguous spans of pages. */
backing = comm[va_page].backing;
backing_start = comm[va_page].page;
@@ -835,6 +892,7 @@ zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t o
span_pages++;
}
+ track_freed_sparse_bo(ctx, backing);
if (!sparse_backing_free(screen, bo, backing, backing_start, span_pages)) {
/* Couldn't allocate tracking data structures, so we have to leak */
fprintf(stderr, "zink: leaking sparse backing memory\n");
@@ -843,11 +901,292 @@ zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t o
}
}
out:
+ *sem = cur_sem;
+ return ok;
+}
+
+static VkSemaphore
+texture_commit_single(struct zink_screen *screen, struct zink_resource *res, VkSparseImageMemoryBind *ibind, unsigned num_binds, bool commit, VkSemaphore wait)
+{
+ VkSemaphore sem = zink_create_semaphore(screen);
+ VkBindSparseInfo sparse = {0};
+ sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
+ sparse.imageBindCount = 1;
+ sparse.waitSemaphoreCount = !!wait;
+ sparse.pWaitSemaphores = &wait;
+ sparse.signalSemaphoreCount = 1;
+ sparse.pSignalSemaphores = &sem;
+
+ VkSparseImageMemoryBindInfo sparse_ibind;
+ sparse_ibind.image = res->obj->image;
+ sparse_ibind.bindCount = num_binds;
+ sparse_ibind.pBinds = ibind;
+ sparse.pImageBinds = &sparse_ibind;
+
+ VkResult ret = VKSCR(QueueBindSparse)(screen->queue_sparse, 1, &sparse, VK_NULL_HANDLE);
+ if (zink_screen_handle_vkresult(screen, ret))
+ return sem;
+ VKSCR(DestroySemaphore)(screen->dev, sem, NULL);
+ return VK_NULL_HANDLE;
+}
+
+static VkSemaphore
+texture_commit_miptail(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t bo_offset, uint32_t offset, bool commit, VkSemaphore wait)
+{
+ VkSemaphore sem = zink_create_semaphore(screen);
+ VkBindSparseInfo sparse = {0};
+ sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
+ sparse.imageOpaqueBindCount = 1;
+ sparse.waitSemaphoreCount = !!wait;
+ sparse.pWaitSemaphores = &wait;
+ sparse.signalSemaphoreCount = 1;
+ sparse.pSignalSemaphores = &sem;
+
+ VkSparseImageOpaqueMemoryBindInfo sparse_bind;
+ sparse_bind.image = res->obj->image;
+ sparse_bind.bindCount = 1;
+ sparse.pImageOpaqueBinds = &sparse_bind;
+
+ VkSparseMemoryBind mem_bind;
+ mem_bind.resourceOffset = offset;
+ mem_bind.size = MIN2(ZINK_SPARSE_BUFFER_PAGE_SIZE, res->sparse.imageMipTailSize - offset);
+ mem_bind.memory = commit ? (bo->mem ? bo->mem : bo->u.slab.real->mem) : VK_NULL_HANDLE;
+ mem_bind.memoryOffset = bo_offset + (commit ? (bo->mem ? 0 : bo->offset) : 0);
+ mem_bind.flags = 0;
+ sparse_bind.pBinds = &mem_bind;
+
+ VkResult ret = VKSCR(QueueBindSparse)(screen->queue_sparse, 1, &sparse, VK_NULL_HANDLE);
+ if (zink_screen_handle_vkresult(screen, ret))
+ return sem;
+ VKSCR(DestroySemaphore)(screen->dev, sem, NULL);
+ return VK_NULL_HANDLE;
+}
+
+bool
+zink_bo_commit(struct zink_context *ctx, struct zink_resource *res, unsigned level, struct pipe_box *box, bool commit, VkSemaphore *sem)
+{
+ bool ok = true;
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_bo *bo = res->obj->bo;
+ VkSemaphore cur_sem = VK_NULL_HANDLE;
+
+ simple_mtx_lock(&screen->queue_lock);
+ simple_mtx_lock(&bo->lock);
+ if (res->base.b.target == PIPE_BUFFER) {
+ ok = buffer_bo_commit(ctx, res, box->x, box->width, commit, &cur_sem);
+ goto out;
+ }
+
+ int gwidth, gheight, gdepth;
+ gwidth = res->sparse.formatProperties.imageGranularity.width;
+ gheight = res->sparse.formatProperties.imageGranularity.height;
+ gdepth = res->sparse.formatProperties.imageGranularity.depth;
+ assert(gwidth && gheight && gdepth);
+
+ struct zink_sparse_commitment *comm = bo->u.sparse.commitments;
+ VkImageSubresource subresource = { res->aspect, level, 0 };
+ unsigned nwidth = DIV_ROUND_UP(box->width, gwidth);
+ unsigned nheight = DIV_ROUND_UP(box->height, gheight);
+ unsigned ndepth = DIV_ROUND_UP(box->depth, gdepth);
+ VkExtent3D lastBlockExtent = {
+ (box->width % gwidth) ? box->width % gwidth : gwidth,
+ (box->height % gheight) ? box->height % gheight : gheight,
+ (box->depth % gdepth) ? box->depth % gdepth : gdepth
+ };
+#define NUM_BATCHED_BINDS 50
+ VkSparseImageMemoryBind ibind[NUM_BATCHED_BINDS];
+ uint32_t backing_start[NUM_BATCHED_BINDS], backing_size[NUM_BATCHED_BINDS];
+ struct zink_sparse_backing *backing[NUM_BATCHED_BINDS];
+ unsigned i = 0;
+ bool commits_pending = false;
+ uint32_t va_page_offset = 0;
+ for (unsigned l = 0; l < level; l++) {
+ unsigned mipwidth = DIV_ROUND_UP(MAX2(res->base.b.width0 >> l, 1), gwidth);
+ unsigned mipheight = DIV_ROUND_UP(MAX2(res->base.b.height0 >> l, 1), gheight);
+ unsigned mipdepth = DIV_ROUND_UP(res->base.b.array_size > 1 ? res->base.b.array_size : MAX2(res->base.b.depth0 >> l, 1), gdepth);
+ va_page_offset += mipwidth * mipheight * mipdepth;
+ }
+ for (unsigned d = 0; d < ndepth; d++) {
+ for (unsigned h = 0; h < nheight; h++) {
+ for (unsigned w = 0; w < nwidth; w++) {
+ ibind[i].subresource = subresource;
+ ibind[i].flags = 0;
+ // Offset
+ ibind[i].offset.x = w * gwidth;
+ ibind[i].offset.y = h * gheight;
+ if (res->base.b.array_size > 1) {
+ ibind[i].subresource.arrayLayer = d * gdepth;
+ ibind[i].offset.z = 0;
+ } else {
+ ibind[i].offset.z = d * gdepth;
+ }
+ // Size of the page
+ ibind[i].extent.width = (w == nwidth - 1) ? lastBlockExtent.width : gwidth;
+ ibind[i].extent.height = (h == nheight - 1) ? lastBlockExtent.height : gheight;
+ ibind[i].extent.depth = (d == ndepth - 1 && res->base.b.target != PIPE_TEXTURE_CUBE) ? lastBlockExtent.depth : gdepth;
+ uint32_t va_page = va_page_offset +
+ (d + (box->z / gdepth)) * ((MAX2(res->base.b.width0 >> level, 1) / gwidth) * (MAX2(res->base.b.height0 >> level, 1) / gheight)) +
+ (h + (box->y / gheight)) * (MAX2(res->base.b.width0 >> level, 1) / gwidth) +
+ (w + (box->x / gwidth));
+
+ uint32_t end_va_page = va_page + 1;
+
+ if (commit) {
+ while (va_page < end_va_page) {
+ uint32_t span_va_page;
+
+ /* Skip pages that are already committed. */
+ if (comm[va_page].backing) {
+ va_page++;
+ continue;
+ }
+
+ /* Determine length of uncommitted span. */
+ span_va_page = va_page;
+ while (va_page < end_va_page && !comm[va_page].backing)
+ va_page++;
+
+ /* Fill the uncommitted span with chunks of backing memory. */
+ while (span_va_page < va_page) {
+ backing_size[i] = va_page - span_va_page;
+ backing[i] = sparse_backing_alloc(screen, bo, &backing_start[i], &backing_size[i]);
+ if (!backing[i]) {
+ ok = false;
+ goto out;
+ }
+ if (level >= res->sparse.imageMipTailFirstLod) {
+ uint32_t offset = res->sparse.imageMipTailOffset + d * res->sparse.imageMipTailStride;
+ cur_sem = texture_commit_miptail(screen, res, backing[i]->bo, backing_start[i], offset, commit, cur_sem);
+ if (!cur_sem)
+ goto out;
+ } else {
+ ibind[i].memory = backing[i]->bo->mem ? backing[i]->bo->mem : backing[i]->bo->u.slab.real->mem;
+ ibind[i].memoryOffset = backing_start[i] * ZINK_SPARSE_BUFFER_PAGE_SIZE +
+ (backing[i]->bo->mem ? 0 : backing[i]->bo->offset);
+ commits_pending = true;
+ }
+
+ while (backing_size[i]) {
+ comm[span_va_page].backing = backing[i];
+ comm[span_va_page].page = backing_start[i];
+ span_va_page++;
+ backing_start[i]++;
+ backing_size[i]--;
+ }
+ i++;
+ }
+ }
+ } else {
+ ibind[i].memory = VK_NULL_HANDLE;
+ ibind[i].memoryOffset = 0;
+
+ while (va_page < end_va_page) {
+ /* Skip pages that are already uncommitted. */
+ if (!comm[va_page].backing) {
+ va_page++;
+ continue;
+ }
+
+ /* Group contiguous spans of pages. */
+ backing[i] = comm[va_page].backing;
+ backing_start[i] = comm[va_page].page;
+ comm[va_page].backing = NULL;
+
+ backing_size[i] = 1;
+ va_page++;
+
+ while (va_page < end_va_page &&
+ comm[va_page].backing == backing[i] &&
+ comm[va_page].page == backing_start[i] + backing_size[i]) {
+ comm[va_page].backing = NULL;
+ va_page++;
+ backing_size[i]++;
+ }
+ if (level >= res->sparse.imageMipTailFirstLod) {
+ uint32_t offset = res->sparse.imageMipTailOffset + d * res->sparse.imageMipTailStride;
+ cur_sem = texture_commit_miptail(screen, res, NULL, 0, offset, commit, cur_sem);
+ if (!cur_sem)
+ goto out;
+ } else {
+ commits_pending = true;
+ }
+ i++;
+ }
+ }
+ if (i == ARRAY_SIZE(ibind)) {
+ cur_sem = texture_commit_single(screen, res, ibind, ARRAY_SIZE(ibind), commit, cur_sem);
+ if (!cur_sem) {
+ for (unsigned s = 0; s < i; s++) {
+ ok = sparse_backing_free(screen, backing[s]->bo, backing[s], backing_start[s], backing_size[s]);
+ if (!ok) {
+ /* Couldn't allocate tracking data structures, so we have to leak */
+ fprintf(stderr, "zink: leaking sparse backing memory\n");
+ }
+ }
+ ok = false;
+ goto out;
+ }
+ commits_pending = false;
+ i = 0;
+ }
+ }
+ }
+ }
+ if (commits_pending) {
+ cur_sem = texture_commit_single(screen, res, ibind, i, commit, cur_sem);
+ if (!cur_sem) {
+ for (unsigned s = 0; s < i; s++) {
+ ok = sparse_backing_free(screen, backing[s]->bo, backing[s], backing_start[s], backing_size[s]);
+ if (!ok) {
+ /* Couldn't allocate tracking data structures, so we have to leak */
+ fprintf(stderr, "zink: leaking sparse backing memory\n");
+ }
+ }
+ ok = false;
+ }
+ }
+out:
simple_mtx_unlock(&bo->lock);
+ simple_mtx_unlock(&screen->queue_lock);
+ *sem = cur_sem;
return ok;
}
+bool
+zink_bo_get_kms_handle(struct zink_screen *screen, struct zink_bo *bo, int fd, uint32_t *handle)
+{
+#ifdef ZINK_USE_DMABUF
+ assert(bo->mem && !bo->u.real.use_reusable_pool);
+ simple_mtx_lock(&bo->u.real.export_lock);
+ list_for_each_entry(struct bo_export, export, &bo->u.real.exports, link) {
+ if (export->drm_fd == fd) {
+ simple_mtx_unlock(&bo->u.real.export_lock);
+ *handle = export->gem_handle;
+ return true;
+ }
+ }
+ struct bo_export *export = CALLOC_STRUCT(bo_export);
+ if (!export) {
+ simple_mtx_unlock(&bo->u.real.export_lock);
+ return false;
+ }
+ bool success = drmPrimeFDToHandle(screen->drm_fd, fd, handle) == 0;
+ if (success) {
+ list_addtail(&export->link, &bo->u.real.exports);
+ export->gem_handle = *handle;
+ export->drm_fd = screen->drm_fd;
+ } else {
+ mesa_loge("zink: failed drmPrimeFDToHandle %s", strerror(errno));
+ FREE(export);
+ }
+ simple_mtx_unlock(&bo->u.real.export_lock);
+ return success;
+#else
+ return false;
+#endif
+}
+
static const struct pb_vtbl bo_slab_vtbl = {
/* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
(void*)bo_slab_destroy
@@ -855,11 +1194,9 @@ static const struct pb_vtbl bo_slab_vtbl = {
};
static struct pb_slab *
-bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index, bool encrypted)
+bo_slab_alloc(void *priv, unsigned mem_type_idx, unsigned entry_size, unsigned group_index, bool encrypted)
{
struct zink_screen *screen = priv;
- VkMemoryPropertyFlags domains = vk_domain_from_heap(heap);
- uint32_t base_id;
unsigned slab_size = 0;
struct zink_slab *slab = CALLOC_STRUCT(zink_slab);
@@ -898,40 +1235,32 @@ bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_ind
}
assert(slab_size != 0);
- slab->buffer = zink_bo(zink_bo_create(screen, slab_size, slab_size, heap, 0, NULL));
+ slab->buffer = zink_bo(zink_bo_create(screen, slab_size, slab_size, zink_heap_from_domain_flags(screen->info.mem_props.memoryTypes[mem_type_idx].propertyFlags, 0),
+ 0, mem_type_idx, NULL));
if (!slab->buffer)
goto fail;
- slab_size = slab->buffer->base.size;
+ slab_size = slab->buffer->base.base.size;
slab->base.num_entries = slab_size / entry_size;
slab->base.num_free = slab->base.num_entries;
- slab->entry_size = entry_size;
+ slab->base.group_index = group_index;
+ slab->base.entry_size = entry_size;
slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
if (!slab->entries)
goto fail_buffer;
list_inithead(&slab->base.free);
-#ifdef _MSC_VER
- /* C11 too hard for msvc, no __sync_fetch_and_add */
- base_id = p_atomic_add_return(&screen->pb.next_bo_unique_id, slab->base.num_entries) - slab->base.num_entries;
-#else
- base_id = __sync_fetch_and_add(&screen->pb.next_bo_unique_id, slab->base.num_entries);
-#endif
for (unsigned i = 0; i < slab->base.num_entries; ++i) {
struct zink_bo *bo = &slab->entries[i];
simple_mtx_init(&bo->lock, mtx_plain);
- bo->base.alignment_log2 = util_logbase2(get_slab_entry_alignment(screen, entry_size));
- bo->base.size = entry_size;
+ bo->base.base.alignment_log2 = util_logbase2(get_slab_entry_alignment(screen, entry_size));
+ bo->base.base.size = entry_size;
bo->base.vtbl = &bo_slab_vtbl;
bo->offset = slab->buffer->offset + i * entry_size;
- bo->base.placement = domains;
- bo->unique_id = base_id + i;
bo->u.slab.entry.slab = &slab->base;
- bo->u.slab.entry.group_index = group_index;
- bo->u.slab.entry.entry_size = entry_size;
if (slab->buffer->mem) {
/* The slab is not suballocated. */
@@ -941,6 +1270,7 @@ bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_ind
bo->u.slab.real = slab->buffer->u.slab.real;
assert(bo->u.slab.real->mem);
}
+ bo->base.base.placement = bo->u.slab.real->base.base.placement;
list_addtail(&bo->u.slab.entry.head, &slab->base.free);
}
@@ -958,9 +1288,9 @@ fail:
}
static struct pb_slab *
-bo_slab_alloc_normal(void *priv, unsigned heap, unsigned entry_size, unsigned group_index)
+bo_slab_alloc_normal(void *priv, unsigned mem_type_idx, unsigned entry_size, unsigned group_index)
{
- return bo_slab_alloc(priv, heap, entry_size, group_index, false);
+ return bo_slab_alloc(priv, mem_type_idx, entry_size, group_index, false);
}
bool
@@ -970,12 +1300,12 @@ zink_bo_init(struct zink_screen *screen)
for (uint32_t i = 0; i < screen->info.mem_props.memoryHeapCount; ++i)
total_mem += screen->info.mem_props.memoryHeaps[i].size;
/* Create managers. */
- pb_cache_init(&screen->pb.bo_cache, ZINK_HEAP_MAX,
+ pb_cache_init(&screen->pb.bo_cache, screen->info.mem_props.memoryTypeCount,
500000, 2.0f, 0,
- total_mem / 8, screen,
+ total_mem / 8, offsetof(struct zink_bo, cache_entry), screen,
(void*)bo_destroy, (void*)bo_can_reclaim);
- unsigned min_slab_order = 8; /* 256 bytes */
+ unsigned min_slab_order = MIN_SLAB_ORDER; /* 256 bytes */
unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
NUM_SLAB_ALLOCATORS;
@@ -988,7 +1318,7 @@ zink_bo_init(struct zink_screen *screen)
if (!pb_slabs_init(&screen->pb.bo_slabs[i],
min_order, max_order,
- ZINK_HEAP_MAX, true,
+ screen->info.mem_props.memoryTypeCount, true,
screen,
bo_can_reclaim_slab,
bo_slab_alloc_normal,
@@ -998,8 +1328,6 @@ zink_bo_init(struct zink_screen *screen)
min_slab_order = max_order + 1;
}
screen->pb.min_alloc_size = 1 << screen->pb.bo_slabs[0].min_order;
- screen->pb.bo_export_table = util_hash_table_create_ptr_keys();
- simple_mtx_init(&screen->pb.bo_export_table_lock, mtx_plain);
return true;
}
@@ -1011,6 +1339,4 @@ zink_bo_deinit(struct zink_screen *screen)
pb_slabs_deinit(&screen->pb.bo_slabs[i]);
}
pb_cache_deinit(&screen->pb.bo_cache);
- _mesa_hash_table_destroy(screen->pb.bo_export_table, NULL);
- simple_mtx_destroy(&screen->pb.bo_export_table_lock);
}
diff --git a/src/gallium/drivers/zink/zink_bo.h b/src/gallium/drivers/zink/zink_bo.h
index 0fd74cb7923..cd7338aff5a 100644
--- a/src/gallium/drivers/zink/zink_bo.h
+++ b/src/gallium/drivers/zink/zink_bo.h
@@ -26,116 +26,48 @@
#ifndef ZINK_BO_H
#define ZINK_BO_H
-#include <vulkan/vulkan.h>
-#include "pipebuffer/pb_cache.h"
-#include "pipebuffer/pb_slab.h"
+#include "zink_types.h"
#include "zink_batch.h"
#define VK_VIS_VRAM (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-enum zink_resource_access {
- ZINK_RESOURCE_ACCESS_READ = 1,
- ZINK_RESOURCE_ACCESS_WRITE = 32,
- ZINK_RESOURCE_ACCESS_RW = ZINK_RESOURCE_ACCESS_READ | ZINK_RESOURCE_ACCESS_WRITE,
-};
-
-
-enum zink_heap {
- ZINK_HEAP_DEVICE_LOCAL,
- ZINK_HEAP_DEVICE_LOCAL_SPARSE,
- ZINK_HEAP_DEVICE_LOCAL_VISIBLE,
- ZINK_HEAP_HOST_VISIBLE_COHERENT,
- ZINK_HEAP_HOST_VISIBLE_CACHED,
- ZINK_HEAP_MAX,
-};
-
-enum zink_alloc_flag {
- ZINK_ALLOC_SPARSE = 1<<0,
- ZINK_ALLOC_NO_SUBALLOC = 1<<1,
-};
-
-
-struct zink_bo {
- struct pb_buffer base;
-
- union {
- struct {
- void *cpu_ptr; /* for user_ptr and permanent maps */
- int map_count;
-
- bool is_user_ptr;
- bool use_reusable_pool;
-
- /* Whether buffer_get_handle or buffer_from_handle has been called,
- * it can only transition from false to true. Protected by lock.
- */
- bool is_shared;
- } real;
- struct {
- struct pb_slab_entry entry;
- struct zink_bo *real;
- } slab;
- struct {
- uint32_t num_va_pages;
- uint32_t num_backing_pages;
-
- struct list_head backing;
-
- /* Commitment information for each page of the virtual memory area. */
- struct zink_sparse_commitment *commitments;
- } sparse;
- } u;
-
- VkDeviceMemory mem;
- uint64_t offset;
-
- uint32_t unique_id;
-
- simple_mtx_t lock;
-
- struct zink_batch_usage *reads;
- struct zink_batch_usage *writes;
-
- struct pb_cache_entry cache_entry[];
-};
-
-static inline struct zink_bo *
-zink_bo(struct pb_buffer *pbuf)
-{
- return (struct zink_bo*)pbuf;
-}
+#define VK_STAGING_RAM (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
+#define VK_LAZY_VRAM (VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+
-static inline enum zink_alloc_flag
+static ALWAYS_INLINE enum zink_alloc_flag
zink_alloc_flags_from_heap(enum zink_heap heap)
{
- enum zink_alloc_flag flags = 0;
switch (heap) {
case ZINK_HEAP_DEVICE_LOCAL_SPARSE:
- flags |= ZINK_ALLOC_SPARSE;
+ return ZINK_ALLOC_SPARSE;
break;
default:
break;
}
- return flags;
+ return (enum zink_alloc_flag)0;
}
-static inline VkMemoryPropertyFlags
+static ALWAYS_INLINE VkMemoryPropertyFlags
vk_domain_from_heap(enum zink_heap heap)
{
- VkMemoryPropertyFlags domains = 0;
+ VkMemoryPropertyFlags domains = (VkMemoryPropertyFlags)0;
switch (heap) {
case ZINK_HEAP_DEVICE_LOCAL:
case ZINK_HEAP_DEVICE_LOCAL_SPARSE:
domains = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
break;
+ case ZINK_HEAP_DEVICE_LOCAL_LAZY:
+ domains = VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ break;
case ZINK_HEAP_DEVICE_LOCAL_VISIBLE:
domains = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
break;
case ZINK_HEAP_HOST_VISIBLE_COHERENT:
domains = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
break;
- case ZINK_HEAP_HOST_VISIBLE_CACHED:
- domains = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ case ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED:
+ domains = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
break;
default:
break;
@@ -143,7 +75,7 @@ vk_domain_from_heap(enum zink_heap heap)
return domains;
}
-static inline enum zink_heap
+static ALWAYS_INLINE enum zink_heap
zink_heap_from_domain_flags(VkMemoryPropertyFlags domains, enum zink_alloc_flag flags)
{
if (flags & ZINK_ALLOC_SPARSE)
@@ -156,11 +88,22 @@ zink_heap_from_domain_flags(VkMemoryPropertyFlags domains, enum zink_alloc_flag
return ZINK_HEAP_DEVICE_LOCAL;
if (domains & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
- return ZINK_HEAP_HOST_VISIBLE_CACHED;
+ return ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED;
return ZINK_HEAP_HOST_VISIBLE_COHERENT;
}
+static ALWAYS_INLINE unsigned
+zink_mem_type_idx_from_types(struct zink_screen *screen, enum zink_heap heap, uint32_t types)
+{
+ for (unsigned i = 0; i < screen->heap_count[heap]; i++) {
+ if (types & BITFIELD_BIT(screen->heap_map[heap][i])) {
+ return screen->heap_map[heap][i];
+ }
+ }
+ return UINT32_MAX;
+}
+
bool
zink_bo_init(struct zink_screen *screen);
@@ -168,24 +111,27 @@ void
zink_bo_deinit(struct zink_screen *screen);
struct pb_buffer *
-zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, const void *pNext);
+zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, unsigned mem_type_idx, const void *pNext);
-static inline uint64_t
+bool
+zink_bo_get_kms_handle(struct zink_screen *screen, struct zink_bo *bo, int fd, uint32_t *handle);
+
+static ALWAYS_INLINE uint64_t
zink_bo_get_offset(const struct zink_bo *bo)
{
return bo->offset;
}
-static inline VkDeviceMemory
+static ALWAYS_INLINE VkDeviceMemory
zink_bo_get_mem(const struct zink_bo *bo)
{
return bo->mem ? bo->mem : bo->u.slab.real->mem;
}
-static inline VkDeviceSize
+static ALWAYS_INLINE VkDeviceSize
zink_bo_get_size(const struct zink_bo *bo)
{
- return bo->mem ? bo->base.size : bo->u.slab.real->base.size;
+ return bo->mem ? bo->base.base.size : bo->u.slab.real->base.base.size;
}
void *
@@ -194,67 +140,90 @@ void
zink_bo_unmap(struct zink_screen *screen, struct zink_bo *bo);
bool
-zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t offset, uint32_t size, bool commit);
+zink_bo_commit(struct zink_context *ctx, struct zink_resource *res, unsigned level, struct pipe_box *box, bool commit, VkSemaphore *sem);
-static inline bool
+static ALWAYS_INLINE bool
zink_bo_has_unflushed_usage(const struct zink_bo *bo)
{
- return zink_batch_usage_is_unflushed(bo->reads) ||
- zink_batch_usage_is_unflushed(bo->writes);
+ return zink_batch_usage_is_unflushed(bo->reads.u) ||
+ zink_batch_usage_is_unflushed(bo->writes.u);
}
-static inline bool
+static ALWAYS_INLINE bool
zink_bo_has_usage(const struct zink_bo *bo)
{
- return zink_batch_usage_exists(bo->reads) ||
- zink_batch_usage_exists(bo->writes);
+ return zink_bo_has_unflushed_usage(bo) ||
+ (zink_batch_usage_exists(bo->reads.u) && bo->reads.submit_count == bo->reads.u->submit_count) ||
+ (zink_batch_usage_exists(bo->writes.u) && bo->writes.submit_count == bo->writes.u->submit_count);
}
-static inline bool
+static ALWAYS_INLINE bool
zink_bo_usage_matches(const struct zink_bo *bo, const struct zink_batch_state *bs)
{
- return zink_batch_usage_matches(bo->reads, bs) ||
- zink_batch_usage_matches(bo->writes, bs);
+ return (zink_batch_usage_matches(bo->reads.u, bs) && bo->reads.submit_count == bo->reads.u->submit_count) ||
+ (zink_batch_usage_matches(bo->writes.u, bs) && bo->writes.submit_count == bo->writes.u->submit_count);
}
-static inline bool
+static ALWAYS_INLINE bool
zink_bo_usage_check_completion(struct zink_screen *screen, struct zink_bo *bo, enum zink_resource_access access)
{
- if (access & ZINK_RESOURCE_ACCESS_READ && !zink_screen_usage_check_completion(screen, bo->reads))
+ if (access & ZINK_RESOURCE_ACCESS_READ && !zink_screen_usage_check_completion(screen, bo->reads.u))
+ return false;
+ if (access & ZINK_RESOURCE_ACCESS_WRITE && !zink_screen_usage_check_completion(screen, bo->writes.u))
+ return false;
+ return true;
+}
+
+static ALWAYS_INLINE bool
+zink_bo_usage_check_completion_fast(struct zink_screen *screen, struct zink_bo *bo, enum zink_resource_access access)
+{
+ if (access & ZINK_RESOURCE_ACCESS_READ && !zink_screen_usage_check_completion_fast(screen, bo->reads.u))
return false;
- if (access & ZINK_RESOURCE_ACCESS_WRITE && !zink_screen_usage_check_completion(screen, bo->writes))
+ if (access & ZINK_RESOURCE_ACCESS_WRITE && !zink_screen_usage_check_completion_fast(screen, bo->writes.u))
return false;
return true;
}
-static inline void
+static ALWAYS_INLINE void
zink_bo_usage_wait(struct zink_context *ctx, struct zink_bo *bo, enum zink_resource_access access)
{
if (access & ZINK_RESOURCE_ACCESS_READ)
- zink_batch_usage_wait(ctx, bo->reads);
+ zink_batch_usage_wait(ctx, bo->reads.u);
if (access & ZINK_RESOURCE_ACCESS_WRITE)
- zink_batch_usage_wait(ctx, bo->writes);
+ zink_batch_usage_wait(ctx, bo->writes.u);
}
-static inline void
+static ALWAYS_INLINE void
+zink_bo_usage_try_wait(struct zink_context *ctx, struct zink_bo *bo, enum zink_resource_access access)
+{
+ if (access & ZINK_RESOURCE_ACCESS_READ)
+ zink_batch_usage_try_wait(ctx, bo->reads.u);
+ if (access & ZINK_RESOURCE_ACCESS_WRITE)
+ zink_batch_usage_try_wait(ctx, bo->writes.u);
+}
+
+static ALWAYS_INLINE void
zink_bo_usage_set(struct zink_bo *bo, struct zink_batch_state *bs, bool write)
{
- if (write)
- zink_batch_usage_set(&bo->writes, bs);
- else
- zink_batch_usage_set(&bo->reads, bs);
+ if (write) {
+ zink_batch_usage_set(&bo->writes.u, bs);
+ bo->writes.submit_count = bs->usage.submit_count;
+ } else {
+ zink_batch_usage_set(&bo->reads.u, bs);
+ bo->reads.submit_count = bs->usage.submit_count;
+ }
}
-static inline bool
+static ALWAYS_INLINE bool
zink_bo_usage_unset(struct zink_bo *bo, struct zink_batch_state *bs)
{
- zink_batch_usage_unset(&bo->reads, bs);
- zink_batch_usage_unset(&bo->writes, bs);
- return bo->reads || bo->writes;
+ zink_batch_usage_unset(&bo->reads.u, bs);
+ zink_batch_usage_unset(&bo->writes.u, bs);
+ return bo->reads.u || bo->writes.u;
}
-static inline void
+static ALWAYS_INLINE void
zink_bo_unref(struct zink_screen *screen, struct zink_bo *bo)
{
struct pb_buffer *pbuf = &bo->base;
diff --git a/src/gallium/drivers/zink/zink_clear.c b/src/gallium/drivers/zink/zink_clear.c
index 660203f7f07..236c67703ab 100644
--- a/src/gallium/drivers/zink/zink_clear.c
+++ b/src/gallium/drivers/zink/zink_clear.c
@@ -21,13 +21,14 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "zink_batch.h"
+#include "zink_clear.h"
#include "zink_context.h"
+#include "zink_format.h"
+#include "zink_inlines.h"
#include "zink_query.h"
-#include "zink_resource.h"
-#include "zink_screen.h"
#include "util/u_blitter.h"
-#include "util/u_dynarray.h"
#include "util/format/u_format.h"
#include "util/format_srgb.h"
#include "util/u_framebuffer.h"
@@ -37,24 +38,6 @@
#include "util/u_helpers.h"
static inline bool
-check_3d_layers(struct pipe_surface *psurf)
-{
- if (psurf->texture->target != PIPE_TEXTURE_3D)
- return true;
- /* SPEC PROBLEM:
- * though the vk spec doesn't seem to explicitly address this, currently drivers
- * are claiming that all 3D images have a single "3D" layer regardless of layercount,
- * so we can never clear them if we aren't trying to clear only layer 0
- */
- if (psurf->u.tex.first_layer)
- return false;
-
- if (psurf->u.tex.last_layer - psurf->u.tex.first_layer > 0)
- return false;
- return true;
-}
-
-static inline bool
scissor_states_equal(const struct pipe_scissor_state *a, const struct pipe_scissor_state *b)
{
return a->minx == b->minx && a->miny == b->miny && a->maxx == b->maxx && a->maxy == b->maxy;
@@ -70,15 +53,17 @@ clear_in_rp(struct pipe_context *pctx,
struct zink_context *ctx = zink_context(pctx);
struct pipe_framebuffer_state *fb = &ctx->fb_state;
+ zink_flush_dgc_if_enabled(ctx);
+
VkClearAttachment attachments[1 + PIPE_MAX_COLOR_BUFS];
int num_attachments = 0;
if (buffers & PIPE_CLEAR_COLOR) {
VkClearColorValue color;
- color.float32[0] = pcolor->f[0];
- color.float32[1] = pcolor->f[1];
- color.float32[2] = pcolor->f[2];
- color.float32[3] = pcolor->f[3];
+ color.uint32[0] = pcolor->ui[0];
+ color.uint32[1] = pcolor->ui[1];
+ color.uint32[2] = pcolor->ui[2];
+ color.uint32[3] = pcolor->ui[3];
for (unsigned i = 0; i < fb->nr_cbufs; i++) {
if (!(buffers & (PIPE_CLEAR_COLOR0 << i)) || !fb->cbufs[i])
@@ -98,6 +83,8 @@ clear_in_rp(struct pipe_context *pctx,
if (buffers & PIPE_CLEAR_STENCIL)
aspect |= VK_IMAGE_ASPECT_STENCIL_BIT;
+ assert(zink_is_zsbuf_used(ctx));
+
attachments[num_attachments].aspectMask = aspect;
attachments[num_attachments].clearValue.depthStencil.depth = depth;
attachments[num_attachments].clearValue.depthStencil.stencil = stencil;
@@ -106,10 +93,13 @@ clear_in_rp(struct pipe_context *pctx,
VkClearRect cr = {0};
if (scissor_state) {
+ /* invalid clear */
+ if (scissor_state->minx > ctx->fb_state.width || scissor_state->miny > ctx->fb_state.height)
+ return;
cr.rect.offset.x = scissor_state->minx;
cr.rect.offset.y = scissor_state->miny;
- cr.rect.extent.width = MIN2(fb->width, scissor_state->maxx - scissor_state->minx);
- cr.rect.extent.height = MIN2(fb->height, scissor_state->maxy - scissor_state->miny);
+ cr.rect.extent.width = MIN2(fb->width - cr.rect.offset.x, scissor_state->maxx - scissor_state->minx);
+ cr.rect.extent.height = MIN2(fb->height - cr.rect.offset.y, scissor_state->maxy - scissor_state->miny);
} else {
cr.rect.extent.width = fb->width;
cr.rect.extent.height = fb->height;
@@ -117,75 +107,42 @@ clear_in_rp(struct pipe_context *pctx,
cr.baseArrayLayer = 0;
cr.layerCount = util_framebuffer_get_num_layers(fb);
struct zink_batch *batch = &ctx->batch;
- zink_batch_rp(ctx);
+ assert(batch->in_rp);
VKCTX(CmdClearAttachments)(batch->state->cmdbuf, num_attachments, attachments, 1, &cr);
+ /*
+ Rendering within a subpass containing a feedback loop creates a data race, except in the following
+ cases:
+ • If a memory dependency is inserted between when the attachment is written and when it is
+ subsequently read by later fragments. Pipeline barriers expressing a subpass self-dependency
+ are the only way to achieve this, and one must be inserted every time a fragment will read
+ values at a particular sample (x, y, layer, sample) coordinate, if those values have been written
+ since the most recent pipeline barrier
+
+ VK 1.3.211, Chapter 8: Render Pass
+ */
+ if (ctx->fbfetch_outputs)
+ ctx->base.texture_barrier(&ctx->base, PIPE_TEXTURE_BARRIER_FRAMEBUFFER);
}
-static void
-clear_color_no_rp(struct zink_context *ctx, struct zink_resource *res, const union pipe_color_union *pcolor, unsigned level, unsigned layer, unsigned layerCount)
-{
- struct zink_batch *batch = &ctx->batch;
- zink_batch_no_rp(ctx);
- VkImageSubresourceRange range = {0};
- range.baseMipLevel = level;
- range.levelCount = 1;
- range.baseArrayLayer = layer;
- range.layerCount = layerCount;
- range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-
- VkClearColorValue color;
- color.float32[0] = pcolor->f[0];
- color.float32[1] = pcolor->f[1];
- color.float32[2] = pcolor->f[2];
- color.float32[3] = pcolor->f[3];
-
- if (zink_resource_image_needs_barrier(res, VK_IMAGE_LAYOUT_GENERAL, 0, 0) &&
- zink_resource_image_needs_barrier(res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, 0))
- zink_resource_image_barrier(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, 0);
- zink_batch_reference_resource_rw(batch, res, true);
- VKCTX(CmdClearColorImage)(batch->state->cmdbuf, res->obj->image, res->layout, &color, 1, &range);
-}
-
-static void
-clear_zs_no_rp(struct zink_context *ctx, struct zink_resource *res, VkImageAspectFlags aspects, double depth, unsigned stencil, unsigned level, unsigned layer, unsigned layerCount)
+static struct zink_framebuffer_clear_data *
+add_new_clear(struct zink_framebuffer_clear *fb_clear)
{
- struct zink_batch *batch = &ctx->batch;
- zink_batch_no_rp(ctx);
- VkImageSubresourceRange range = {0};
- range.baseMipLevel = level;
- range.levelCount = 1;
- range.baseArrayLayer = layer;
- range.layerCount = layerCount;
- range.aspectMask = aspects;
-
- VkClearDepthStencilValue zs_value = {depth, stencil};
-
- if (zink_resource_image_needs_barrier(res, VK_IMAGE_LAYOUT_GENERAL, 0, 0) &&
- zink_resource_image_needs_barrier(res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, 0))
- zink_resource_image_barrier(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, 0);
- zink_batch_reference_resource_rw(batch, res, true);
- VKCTX(CmdClearDepthStencilImage)(batch->state->cmdbuf, res->obj->image, res->layout, &zs_value, 1, &range);
+ struct zink_framebuffer_clear_data cd = {0};
+ util_dynarray_append(&fb_clear->clears, struct zink_framebuffer_clear_data, cd);
+ return zink_fb_clear_element(fb_clear, zink_fb_clear_count(fb_clear) - 1);
}
-
-
static struct zink_framebuffer_clear_data *
get_clear_data(struct zink_context *ctx, struct zink_framebuffer_clear *fb_clear, const struct pipe_scissor_state *scissor_state)
{
- struct zink_framebuffer_clear_data *clear = NULL;
unsigned num_clears = zink_fb_clear_count(fb_clear);
if (num_clears) {
struct zink_framebuffer_clear_data *last_clear = zink_fb_clear_element(fb_clear, num_clears - 1);
/* if we're completely overwriting the previous clear, merge this into the previous clear */
if (!scissor_state || (last_clear->has_scissor && scissor_states_equal(&last_clear->scissor, scissor_state)))
- clear = last_clear;
- }
- if (!clear) {
- struct zink_framebuffer_clear_data cd = {0};
- util_dynarray_append(&fb_clear->clears, struct zink_framebuffer_clear_data, cd);
- clear = zink_fb_clear_element(fb_clear, zink_fb_clear_count(fb_clear) - 1);
+ return last_clear;
}
- return clear;
+ return add_new_clear(fb_clear);
}
void
@@ -196,22 +153,98 @@ zink_clear(struct pipe_context *pctx,
double depth, unsigned stencil)
{
struct zink_context *ctx = zink_context(pctx);
+ struct zink_screen *screen = zink_screen(pctx->screen);
struct pipe_framebuffer_state *fb = &ctx->fb_state;
struct zink_batch *batch = &ctx->batch;
bool needs_rp = false;
- if (unlikely(!zink_screen(pctx->screen)->info.have_EXT_conditional_rendering && !zink_check_conditional_render(ctx)))
- return;
-
if (scissor_state) {
struct u_rect scissor = {scissor_state->minx, scissor_state->maxx, scissor_state->miny, scissor_state->maxy};
needs_rp = !zink_blit_region_fills(scissor, fb->width, fb->height);
}
+ if (unlikely(ctx->fb_layer_mismatch)) {
+ /* this is a terrible scenario:
+ * at least one attachment has a layerCount greater than the others,
+ * so iterate over all the mismatched attachments and pre-clear them separately,
+ * then continue to flag them as need (additional) clearing
+ * to avoid loadOp=LOAD
+ */
+ unsigned x = 0;
+ unsigned y = 0;
+ unsigned w = ctx->fb_state.width;
+ unsigned h = ctx->fb_state.height;
+ if (scissor_state) {
+ x = scissor_state->minx;
+ y = scissor_state->miny;
+ w = scissor_state->minx + scissor_state->maxx;
+ h = scissor_state->miny + scissor_state->maxy;
+ }
+ unsigned clear_buffers = buffers >> 2;
+ for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ if (ctx->fb_state.cbufs[i] &&
+ (ctx->fb_layer_mismatch & clear_buffers & BITFIELD_BIT(i))) {
+ if (ctx->void_clears & (PIPE_CLEAR_COLOR0 << i)) {
+ union pipe_color_union color;
+ color.f[0] = color.f[1] = color.f[2] = 0;
+ color.f[3] = 1.0;
+ pctx->clear_render_target(pctx, ctx->fb_state.cbufs[i], &color,
+ 0, 0,
+ ctx->fb_state.cbufs[i]->width, ctx->fb_state.cbufs[i]->height,
+ ctx->render_condition_active);
+ }
+ pctx->clear_render_target(pctx, ctx->fb_state.cbufs[i], pcolor,
+ x, y, w, h, ctx->render_condition_active);
+ }
+ }
+ if (ctx->fb_state.zsbuf && (buffers & PIPE_CLEAR_DEPTHSTENCIL))
+ pctx->clear_depth_stencil(pctx, ctx->fb_state.zsbuf, buffers & PIPE_CLEAR_DEPTHSTENCIL, depth, stencil,
+ x, y, w, h, ctx->render_condition_active);
+ }
if (batch->in_rp) {
- clear_in_rp(pctx, buffers, scissor_state, pcolor, depth, stencil);
- return;
+ if (buffers & PIPE_CLEAR_DEPTHSTENCIL && (ctx->zsbuf_unused || ctx->zsbuf_readonly)) {
+ /* this will need a layout change */
+ assert(!ctx->track_renderpasses);
+ zink_batch_no_rp(ctx);
+ } else {
+ clear_in_rp(pctx, buffers, scissor_state, pcolor, depth, stencil);
+ return;
+ }
+ }
+
+ unsigned rp_clears_enabled = ctx->rp_clears_enabled;
+
+ if (ctx->void_clears & buffers) {
+ unsigned void_clears = ctx->void_clears & buffers;
+ ctx->void_clears &= ~buffers;
+ union pipe_color_union color;
+ color.f[0] = color.f[1] = color.f[2] = 0;
+ color.f[3] = 1.0;
+ for (unsigned i = 0; i < fb->nr_cbufs; i++) {
+ if ((void_clears & (PIPE_CLEAR_COLOR0 << i)) && fb->cbufs[i]) {
+ struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[i];
+ unsigned num_clears = zink_fb_clear_count(fb_clear);
+ if (num_clears) {
+ if (zink_fb_clear_first_needs_explicit(fb_clear)) {
+ /* a scissored clear exists:
+ * - extend the clear array
+ * - shift existing clears back by one position
+ * - inject void clear base of array
+ */
+ add_new_clear(fb_clear);
+ struct zink_framebuffer_clear_data *clear = fb_clear->clears.data;
+ memmove(clear + 1, clear, num_clears);
+ memcpy(&clear->color, &color, sizeof(color));
+ } else {
+ /* no void clear needed */
+ }
+ void_clears &= ~(PIPE_CLEAR_COLOR0 << i);
+ }
+ }
+ }
+ if (void_clears)
+ pctx->clear(pctx, void_clears, NULL, &color, 0, 0);
}
if (buffers & PIPE_CLEAR_COLOR) {
@@ -224,11 +257,10 @@ zink_clear(struct pipe_context *pctx,
ctx->clears_enabled |= PIPE_CLEAR_COLOR0 << i;
clear->conditional = ctx->render_condition_active;
clear->has_scissor = needs_rp;
+ memcpy(&clear->color, pcolor, sizeof(union pipe_color_union));
+ zink_convert_color(screen, psurf->format, &clear->color, pcolor);
if (scissor_state && needs_rp)
clear->scissor = *scissor_state;
- clear->color.color = *pcolor;
- clear->color.srgb = psurf->format != psurf->texture->format &&
- !util_format_is_srgb(psurf->format) && util_format_is_srgb(psurf->texture->format);
if (zink_fb_clear_first_needs_explicit(fb_clear))
ctx->rp_clears_enabled &= ~(PIPE_CLEAR_COLOR0 << i);
else
@@ -250,11 +282,18 @@ zink_clear(struct pipe_context *pctx,
if (buffers & PIPE_CLEAR_STENCIL)
clear->zs.stencil = stencil;
clear->zs.bits |= (buffers & PIPE_CLEAR_DEPTHSTENCIL);
- if (zink_fb_clear_first_needs_explicit(fb_clear))
+ if (zink_fb_clear_first_needs_explicit(fb_clear)) {
ctx->rp_clears_enabled &= ~PIPE_CLEAR_DEPTHSTENCIL;
- else
+ if (!ctx->track_renderpasses)
+ ctx->dynamic_fb.tc_info.zsbuf_clear_partial = true;
+ } else {
ctx->rp_clears_enabled |= (buffers & PIPE_CLEAR_DEPTHSTENCIL);
+ if (!ctx->track_renderpasses)
+ ctx->dynamic_fb.tc_info.zsbuf_clear = true;
+ }
}
+ assert(!ctx->batch.in_rp);
+ ctx->rp_changed |= ctx->rp_clears_enabled != rp_clears_enabled;
}
static inline bool
@@ -297,7 +336,7 @@ zink_clear_framebuffer(struct zink_context *ctx, unsigned clear_buffers)
goto out;
/* colors don't match, fire this one off */
- if (!colors_equal(&a->color.color, &b->color.color))
+ if (!colors_equal(&a->color, &b->color))
goto out;
}
} else {
@@ -341,7 +380,7 @@ out:
}
zink_clear(&ctx->base, clear_bits,
clear->has_scissor ? &clear->scissor : NULL,
- &clear->color.color,
+ &clear->color,
zsclear ? zsclear->zs.depth : 0,
zsclear ? zsclear->zs.stencil : 0);
}
@@ -358,8 +397,10 @@ out:
}
to_clear = 0;
}
- for (int i = 0; i < ARRAY_SIZE(ctx->fb_clears); i++)
- zink_fb_clear_reset(ctx, i);
+ if (ctx->clears_enabled & PIPE_CLEAR_DEPTHSTENCIL)
+ zink_fb_clear_reset(ctx, PIPE_MAX_COLOR_BUFS);
+ u_foreach_bit(i, ctx->clears_enabled >> 2)
+ zink_fb_clear_reset(ctx, i);
}
static struct pipe_surface *
@@ -374,6 +415,101 @@ create_clear_surface(struct pipe_context *pctx, struct pipe_resource *pres, unsi
return pctx->create_surface(pctx, pres, &tmpl);
}
+static void
+set_clear_fb(struct pipe_context *pctx, struct pipe_surface *psurf, struct pipe_surface *zsurf)
+{
+ struct pipe_framebuffer_state fb_state = {0};
+ fb_state.width = psurf ? psurf->width : zsurf->width;
+ fb_state.height = psurf ? psurf->height : zsurf->height;
+ fb_state.nr_cbufs = !!psurf;
+ fb_state.cbufs[0] = psurf;
+ fb_state.zsbuf = zsurf;
+ pctx->set_framebuffer_state(pctx, &fb_state);
+}
+
+void
+zink_clear_texture_dynamic(struct pipe_context *pctx,
+ struct pipe_resource *pres,
+ unsigned level,
+ const struct pipe_box *box,
+ const void *data)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_screen *screen = zink_screen(pctx->screen);
+ struct zink_resource *res = zink_resource(pres);
+
+ bool full_clear = 0 <= box->x && u_minify(pres->width0, level) >= box->x + box->width &&
+ 0 <= box->y && u_minify(pres->height0, level) >= box->y + box->height &&
+ 0 <= box->z && u_minify(pres->target == PIPE_TEXTURE_3D ? pres->depth0 : pres->array_size, level) >= box->z + box->depth;
+
+ struct pipe_surface *surf = create_clear_surface(pctx, pres, level, box);
+
+ VkRenderingAttachmentInfo att = {0};
+ att.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
+ att.imageView = zink_csurface(surf)->image_view;
+ att.imageLayout = res->aspect & VK_IMAGE_ASPECT_COLOR_BIT ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+ att.loadOp = full_clear ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD;
+ att.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+
+ VkRenderingInfo info = {0};
+ info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO;
+ info.renderArea.offset.x = box->x;
+ info.renderArea.offset.y = box->y;
+ info.renderArea.extent.width = box->width;
+ info.renderArea.extent.height = box->height;
+ info.layerCount = MAX2(box->depth, 1);
+
+ union pipe_color_union color, tmp;
+ float depth = 0.0;
+ uint8_t stencil = 0;
+ if (res->aspect & VK_IMAGE_ASPECT_COLOR_BIT) {
+ util_format_unpack_rgba(pres->format, tmp.ui, data, 1);
+ zink_convert_color(screen, surf->format, &color, &tmp);
+ } else {
+ if (res->aspect & VK_IMAGE_ASPECT_DEPTH_BIT)
+ util_format_unpack_z_float(pres->format, &depth, data, 1);
+
+ if (res->aspect & VK_IMAGE_ASPECT_STENCIL_BIT)
+ util_format_unpack_s_8uint(pres->format, &stencil, data, 1);
+ }
+
+ zink_blit_barriers(ctx, NULL, res, full_clear);
+ VkCommandBuffer cmdbuf = zink_get_cmdbuf(ctx, NULL, res);
+ if (cmdbuf == ctx->batch.state->cmdbuf && ctx->batch.in_rp)
+ zink_batch_no_rp(ctx);
+
+ if (res->aspect & VK_IMAGE_ASPECT_COLOR_BIT) {
+ memcpy(&att.clearValue, &color, sizeof(float) * 4);
+ info.colorAttachmentCount = 1;
+ info.pColorAttachments = &att;
+ } else {
+ att.clearValue.depthStencil.depth = depth;
+ att.clearValue.depthStencil.stencil = stencil;
+ if (res->aspect & VK_IMAGE_ASPECT_DEPTH_BIT)
+ info.pDepthAttachment = &att;
+ if (res->aspect & VK_IMAGE_ASPECT_STENCIL_BIT)
+ info.pStencilAttachment = &att;
+ }
+ VKCTX(CmdBeginRendering)(cmdbuf, &info);
+ if (!full_clear) {
+ VkClearRect rect;
+ rect.rect = info.renderArea;
+ rect.baseArrayLayer = box->z;
+ rect.layerCount = box->depth;
+
+ VkClearAttachment clear_att;
+ clear_att.aspectMask = res->aspect;
+ clear_att.colorAttachment = 0;
+ clear_att.clearValue = att.clearValue;
+
+ VKCTX(CmdClearAttachments)(cmdbuf, 1, &clear_att, 1, &rect);
+ }
+ VKCTX(CmdEndRendering)(cmdbuf);
+ zink_batch_reference_resource_rw(&ctx->batch, res, true);
+ /* this will never destroy the surface */
+ pipe_surface_reference(&surf, NULL);
+}
+
void
zink_clear_texture(struct pipe_context *pctx,
struct pipe_resource *pres,
@@ -383,27 +519,24 @@ zink_clear_texture(struct pipe_context *pctx,
{
struct zink_context *ctx = zink_context(pctx);
struct zink_resource *res = zink_resource(pres);
- struct pipe_screen *pscreen = pctx->screen;
- struct u_rect region = zink_rect_from_box(box);
- bool needs_rp = !zink_blit_region_fills(region, pres->width0, pres->height0) || ctx->render_condition_active;
struct pipe_surface *surf = NULL;
+ struct pipe_scissor_state scissor = {box->x, box->y, box->x + box->width, box->y + box->height};
if (res->aspect & VK_IMAGE_ASPECT_COLOR_BIT) {
union pipe_color_union color;
util_format_unpack_rgba(pres->format, color.ui, data, 1);
- if (pscreen->is_format_supported(pscreen, pres->format, pres->target, 0, 0,
- PIPE_BIND_RENDER_TARGET) && !needs_rp) {
- zink_batch_no_rp(ctx);
- clear_color_no_rp(ctx, res, &color, level, box->z, box->depth);
- } else {
- surf = create_clear_surface(pctx, pres, level, box);
- zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS);
- util_blitter_clear_render_target(ctx->blitter, surf, &color, box->x, box->y, box->width, box->height);
- }
- if (res->base.b.target == PIPE_BUFFER)
- util_range_add(&res->base.b, &res->valid_buffer_range, box->x, box->x + box->width);
+ surf = create_clear_surface(pctx, pres, level, box);
+ util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state);
+ set_clear_fb(pctx, surf, NULL);
+ zink_blit_barriers(ctx, NULL, res, false);
+ ctx->blitting = true;
+ ctx->queries_disabled = true;
+ pctx->clear(pctx, PIPE_CLEAR_COLOR0, &scissor, &color, 0, 0);
+ util_blitter_restore_fb_state(ctx->blitter);
+ ctx->queries_disabled = false;
+ ctx->blitting = false;
} else {
float depth = 0.0;
uint8_t stencil = 0;
@@ -414,19 +547,21 @@ zink_clear_texture(struct pipe_context *pctx,
if (res->aspect & VK_IMAGE_ASPECT_STENCIL_BIT)
util_format_unpack_s_8uint(pres->format, &stencil, data, 1);
- if (!needs_rp) {
- zink_batch_no_rp(ctx);
- clear_zs_no_rp(ctx, res, res->aspect, depth, stencil, level, box->z, box->depth);
- } else {
- unsigned flags = 0;
- if (res->aspect & VK_IMAGE_ASPECT_DEPTH_BIT)
- flags |= PIPE_CLEAR_DEPTH;
- if (res->aspect & VK_IMAGE_ASPECT_STENCIL_BIT)
- flags |= PIPE_CLEAR_STENCIL;
- surf = create_clear_surface(pctx, pres, level, box);
- zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS);
- util_blitter_clear_depth_stencil(ctx->blitter, surf, flags, depth, stencil, box->x, box->y, box->width, box->height);
- }
+ unsigned flags = 0;
+ if (res->aspect & VK_IMAGE_ASPECT_DEPTH_BIT)
+ flags |= PIPE_CLEAR_DEPTH;
+ if (res->aspect & VK_IMAGE_ASPECT_STENCIL_BIT)
+ flags |= PIPE_CLEAR_STENCIL;
+ surf = create_clear_surface(pctx, pres, level, box);
+ util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state);
+ zink_blit_barriers(ctx, NULL, res, false);
+ ctx->blitting = true;
+ set_clear_fb(pctx, NULL, surf);
+ ctx->queries_disabled = true;
+ pctx->clear(pctx, flags, &scissor, NULL, depth, stencil);
+ util_blitter_restore_fb_state(ctx->blitter);
+ ctx->queries_disabled = false;
+ ctx->blitting = false;
}
/* this will never destroy the surface */
pipe_surface_reference(&surf, NULL);
@@ -454,11 +589,10 @@ zink_clear_buffer(struct pipe_context *pctx,
- size is the number of bytes to fill, and must be either a multiple of 4,
or VK_WHOLE_SIZE to fill the range from offset to the end of the buffer
*/
- struct zink_batch *batch = &ctx->batch;
- zink_batch_no_rp(ctx);
- zink_batch_reference_resource_rw(batch, res, true);
- util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + size);
- VKCTX(CmdFillBuffer)(batch->state->cmdbuf, res->obj->buffer, offset, size, *(uint32_t*)clear_value);
+ zink_resource_buffer_transfer_dst_barrier(ctx, res, offset, size);
+ VkCommandBuffer cmdbuf = zink_get_cmdbuf(ctx, NULL, res);
+ zink_batch_reference_resource_rw(&ctx->batch, res, true);
+ VKCTX(CmdFillBuffer)(cmdbuf, res->obj->buffer, offset, size, *(uint32_t*)clear_value);
return;
}
struct pipe_transfer *xfer;
@@ -484,10 +618,23 @@ zink_clear_render_target(struct pipe_context *pctx, struct pipe_surface *dst,
bool render_condition_enabled)
{
struct zink_context *ctx = zink_context(pctx);
- zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | (render_condition_enabled ? 0 : ZINK_BLIT_NO_COND_RENDER));
- util_blitter_clear_render_target(ctx->blitter, dst, color, dstx, dsty, width, height);
- if (!render_condition_enabled && ctx->render_condition_active)
+ zink_flush_dgc_if_enabled(ctx);
+ bool render_condition_active = ctx->render_condition_active;
+ if (!render_condition_enabled && render_condition_active) {
+ zink_stop_conditional_render(ctx);
+ ctx->render_condition_active = false;
+ }
+ util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state);
+ set_clear_fb(pctx, dst, NULL);
+ struct pipe_scissor_state scissor = {dstx, dsty, dstx + width, dsty + height};
+ zink_blit_barriers(ctx, NULL, zink_resource(dst->texture), false);
+ ctx->blitting = true;
+ pctx->clear(pctx, PIPE_CLEAR_COLOR0, &scissor, color, 0, 0);
+ util_blitter_restore_fb_state(ctx->blitter);
+ ctx->blitting = false;
+ if (!render_condition_enabled && render_condition_active)
zink_start_conditional_render(ctx);
+ ctx->render_condition_active = render_condition_active;
}
void
@@ -497,10 +644,36 @@ zink_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *dst,
bool render_condition_enabled)
{
struct zink_context *ctx = zink_context(pctx);
- zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | (render_condition_enabled ? 0 : ZINK_BLIT_NO_COND_RENDER));
- util_blitter_clear_depth_stencil(ctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height);
- if (!render_condition_enabled && ctx->render_condition_active)
+ /* check for stencil fallback */
+ bool blitting = ctx->blitting;
+ zink_flush_dgc_if_enabled(ctx);
+ bool render_condition_active = ctx->render_condition_active;
+ if (!render_condition_enabled && render_condition_active) {
+ zink_stop_conditional_render(ctx);
+ ctx->render_condition_active = false;
+ }
+ bool cur_attachment = zink_csurface(ctx->fb_state.zsbuf) == zink_csurface(dst);
+ if (dstx > ctx->fb_state.width || dsty > ctx->fb_state.height ||
+ dstx + width > ctx->fb_state.width ||
+ dsty + height > ctx->fb_state.height)
+ cur_attachment = false;
+ if (!cur_attachment) {
+ if (!blitting) {
+ util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state);
+ set_clear_fb(pctx, NULL, dst);
+ zink_blit_barriers(ctx, NULL, zink_resource(dst->texture), false);
+ ctx->blitting = true;
+ }
+ }
+ struct pipe_scissor_state scissor = {dstx, dsty, dstx + width, dsty + height};
+ pctx->clear(pctx, clear_flags, &scissor, NULL, depth, stencil);
+ if (!cur_attachment && !blitting) {
+ util_blitter_restore_fb_state(ctx->blitter);
+ ctx->blitting = false;
+ }
+ if (!render_condition_enabled && render_condition_active)
zink_start_conditional_render(ctx);
+ ctx->render_condition_active = render_condition_active;
}
bool
@@ -519,65 +692,43 @@ zink_fb_clear_first_needs_explicit(struct zink_framebuffer_clear *fb_clear)
return zink_fb_clear_element_needs_explicit(zink_fb_clear_element(fb_clear, 0));
}
-void
-zink_fb_clear_util_unpack_clear_color(struct zink_framebuffer_clear_data *clear, enum pipe_format format, union pipe_color_union *color)
-{
- const struct util_format_description *desc = util_format_description(format);
- if (clear->color.srgb) {
- /* if SRGB mode is disabled for the fb with a backing srgb image then we have to
- * convert this to srgb color
- */
- for (unsigned j = 0; j < MIN2(3, desc->nr_channels); j++) {
- assert(desc->channel[j].normalized);
- color->f[j] = util_format_srgb_to_linear_float(clear->color.color.f[j]);
- }
- color->f[3] = clear->color.color.f[3];
- } else {
- for (unsigned i = 0; i < 4; i++)
- color->f[i] = clear->color.color.f[i];
- }
-}
-
static void
fb_clears_apply_internal(struct zink_context *ctx, struct pipe_resource *pres, int i)
{
- struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[i];
-
if (!zink_fb_clear_enabled(ctx, i))
return;
if (ctx->batch.in_rp)
zink_clear_framebuffer(ctx, BITFIELD_BIT(i));
- else if (zink_resource(pres)->aspect == VK_IMAGE_ASPECT_COLOR_BIT) {
- if (zink_fb_clear_needs_explicit(fb_clear) || !check_3d_layers(ctx->fb_state.cbufs[i]))
- /* this will automatically trigger all the clears */
- zink_batch_rp(ctx);
- else {
- struct pipe_surface *psurf = ctx->fb_state.cbufs[i];
- struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0);
- union pipe_color_union color;
- zink_fb_clear_util_unpack_clear_color(clear, psurf->format, &color);
-
- clear_color_no_rp(ctx, zink_resource(pres), &color,
- psurf->u.tex.level, psurf->u.tex.first_layer,
- psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1);
+ else {
+ struct zink_resource *res = zink_resource(pres);
+ bool queries_disabled = ctx->queries_disabled;
+ VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
+ /* slightly different than the u_blitter handling:
+ * this can be called recursively while unordered_blitting=true
+ */
+ bool can_reorder = zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering &&
+ !ctx->render_condition_active &&
+ !ctx->unordered_blitting &&
+ zink_get_cmdbuf(ctx, NULL, res) == ctx->batch.state->reordered_cmdbuf;
+ if (can_reorder) {
+ /* set unordered_blitting but NOT blitting:
+ * let begin_rendering handle layouts
+ */
+ ctx->unordered_blitting = true;
+ /* for unordered clears, swap the unordered cmdbuf for the main one for the whole op to avoid conditional hell */
+ ctx->batch.state->cmdbuf = ctx->batch.state->reordered_cmdbuf;
+ ctx->rp_changed = true;
+ ctx->queries_disabled = true;
+ ctx->batch.state->has_barriers = true;
}
- zink_fb_clear_reset(ctx, i);
- return;
- } else {
- if (zink_fb_clear_needs_explicit(fb_clear) || !check_3d_layers(ctx->fb_state.zsbuf))
- /* this will automatically trigger all the clears */
- zink_batch_rp(ctx);
- else {
- struct pipe_surface *psurf = ctx->fb_state.zsbuf;
- struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0);
- VkImageAspectFlags aspects = 0;
- if (clear->zs.bits & PIPE_CLEAR_DEPTH)
- aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
- if (clear->zs.bits & PIPE_CLEAR_STENCIL)
- aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
- clear_zs_no_rp(ctx, zink_resource(pres), aspects, clear->zs.depth, clear->zs.stencil,
- psurf->u.tex.level, psurf->u.tex.first_layer,
- psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1);
+ /* this will automatically trigger all the clears */
+ zink_batch_rp(ctx);
+ if (can_reorder) {
+ zink_batch_no_rp(ctx);
+ ctx->unordered_blitting = false;
+ ctx->rp_changed = true;
+ ctx->queries_disabled = queries_disabled;
+ ctx->batch.state->cmdbuf = cmdbuf;
}
}
zink_fb_clear_reset(ctx, i);
@@ -586,6 +737,7 @@ fb_clears_apply_internal(struct zink_context *ctx, struct pipe_resource *pres, i
void
zink_fb_clear_reset(struct zink_context *ctx, unsigned i)
{
+ unsigned rp_clears_enabled = ctx->clears_enabled;
util_dynarray_clear(&ctx->fb_clears[i].clears);
if (i == PIPE_MAX_COLOR_BUFS) {
ctx->clears_enabled &= ~PIPE_CLEAR_DEPTHSTENCIL;
@@ -594,6 +746,8 @@ zink_fb_clear_reset(struct zink_context *ctx, unsigned i)
ctx->clears_enabled &= ~(PIPE_CLEAR_COLOR0 << i);
ctx->rp_clears_enabled &= ~(PIPE_CLEAR_COLOR0 << i);
}
+ if (ctx->rp_clears_enabled != rp_clears_enabled)
+ ctx->rp_loadop_changed = true;
}
void
@@ -603,7 +757,6 @@ zink_fb_clears_apply(struct zink_context *ctx, struct pipe_resource *pres)
for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
if (ctx->fb_state.cbufs[i] && ctx->fb_state.cbufs[i]->texture == pres) {
fb_clears_apply_internal(ctx, pres, i);
- return;
}
}
} else {
@@ -621,7 +774,6 @@ zink_fb_clears_discard(struct zink_context *ctx, struct pipe_resource *pres)
if (ctx->fb_state.cbufs[i] && ctx->fb_state.cbufs[i]->texture == pres) {
if (zink_fb_clear_enabled(ctx, i)) {
zink_fb_clear_reset(ctx, i);
- return;
}
}
}
@@ -695,7 +847,6 @@ zink_fb_clears_apply_or_discard(struct zink_context *ctx, struct pipe_resource *
for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
if (ctx->fb_state.cbufs[i] && ctx->fb_state.cbufs[i]->texture == pres) {
fb_clears_apply_or_discard_internal(ctx, pres, region, discard_only, false, i);
- return;
}
}
} else {
@@ -712,7 +863,6 @@ zink_fb_clears_apply_region(struct zink_context *ctx, struct pipe_resource *pres
for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
if (ctx->fb_state.cbufs[i] && ctx->fb_state.cbufs[i]->texture == pres) {
fb_clears_apply_or_discard_internal(ctx, pres, region, false, true, i);
- return;
}
}
} else {
@@ -721,3 +871,32 @@ zink_fb_clears_apply_region(struct zink_context *ctx, struct pipe_resource *pres
}
}
}
+
+void
+zink_fb_clear_rewrite(struct zink_context *ctx, unsigned idx, enum pipe_format before, enum pipe_format after)
+{
+ /* if the values for the clear color are incompatible, they must be rewritten;
+ * this occurs if:
+ * - the formats' srgb-ness does not match
+ * - the formats' signedness does not match
+ */
+ const struct util_format_description *bdesc = util_format_description(before);
+ const struct util_format_description *adesc = util_format_description(after);
+ int bfirst_non_void_chan = util_format_get_first_non_void_channel(before);
+ int afirst_non_void_chan = util_format_get_first_non_void_channel(after);
+ bool bsigned = false, asigned = false;
+ if (bfirst_non_void_chan > 0)
+ bsigned = bdesc->channel[bfirst_non_void_chan].type == UTIL_FORMAT_TYPE_SIGNED;
+ if (afirst_non_void_chan > 0)
+ asigned = adesc->channel[afirst_non_void_chan].type == UTIL_FORMAT_TYPE_SIGNED;
+ if (util_format_is_srgb(before) == util_format_is_srgb(after) &&
+ bsigned == asigned)
+ return;
+ struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[idx];
+ for (int j = 0; j < zink_fb_clear_count(fb_clear); j++) {
+ struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, j);
+ uint32_t data[4];
+ util_format_pack_rgba(before, data, clear->color.ui, 1);
+ util_format_unpack_rgba(after, clear->color.ui, data, 1);
+ }
+}
diff --git a/src/gallium/drivers/zink/zink_clear.h b/src/gallium/drivers/zink/zink_clear.h
index 5f6492a17a7..8df6c70aa42 100644
--- a/src/gallium/drivers/zink/zink_clear.h
+++ b/src/gallium/drivers/zink/zink_clear.h
@@ -24,34 +24,9 @@
* Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
*/
-#include "util/u_dynarray.h"
-#include "pipe/p_state.h"
-#include <vulkan/vulkan.h>
#include "util/u_rect.h"
-
-struct zink_context;
-struct zink_resource;
-
-struct zink_framebuffer_clear_data {
- union {
- struct {
- union pipe_color_union color;
- bool srgb;
- } color;
- struct {
- float depth;
- unsigned stencil;
- uint8_t bits : 2; // PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL
- } zs;
- };
- struct pipe_scissor_state scissor;
- bool has_scissor;
- bool conditional;
-};
-
-struct zink_framebuffer_clear {
- struct util_dynarray clears;
-};
+#include "zink_types.h"
+#include "zink_screen.h"
void
zink_clear(struct pipe_context *pctx,
@@ -66,6 +41,12 @@ zink_clear_texture(struct pipe_context *ctx,
const struct pipe_box *box,
const void *data);
void
+zink_clear_texture_dynamic(struct pipe_context *ctx,
+ struct pipe_resource *p_res,
+ unsigned level,
+ const struct pipe_box *box,
+ const void *data);
+void
zink_clear_buffer(struct pipe_context *pctx,
struct pipe_resource *pres,
unsigned offset,
@@ -115,6 +96,13 @@ zink_fb_clear_element_needs_explicit(struct zink_framebuffer_clear_data *clear)
return clear->has_scissor || clear->conditional;
}
+static inline bool
+zink_fb_clear_full_exists(struct zink_context *ctx, unsigned clear_buffer)
+{
+ struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[clear_buffer];
+ return zink_fb_clear_count(fb_clear) && !zink_fb_clear_first_needs_explicit(fb_clear);
+}
+
void
zink_clear_apply_conditionals(struct zink_context *ctx);
@@ -131,4 +119,4 @@ void
zink_fb_clears_apply_region(struct zink_context *ctx, struct pipe_resource *pres, struct u_rect region);
void
-zink_fb_clear_util_unpack_clear_color(struct zink_framebuffer_clear_data *clear, enum pipe_format format, union pipe_color_union *color);
+zink_fb_clear_rewrite(struct zink_context *ctx, unsigned idx, enum pipe_format before, enum pipe_format after);
diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c
index dd8c5311a1c..63cc33c3e4c 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -21,8 +21,10 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "nir_opcodes.h"
#include "zink_context.h"
#include "zink_compiler.h"
+#include "zink_descriptors.h"
#include "zink_program.h"
#include "zink_screen.h"
#include "nir_to_spirv/nir_to_spirv.h"
@@ -30,313 +32,992 @@
#include "pipe/p_state.h"
#include "nir.h"
+#include "nir_xfb_info.h"
+#include "nir/nir_draw_helpers.h"
#include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_serialize.h"
+#include "compiler/nir/nir_builtin_builder.h"
#include "nir/tgsi_to_nir.h"
#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_from_mesa.h"
#include "util/u_memory.h"
+#include "compiler/spirv/nir_spirv.h"
+#include "vk_util.h"
+
+bool
+zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
+
+
static void
-create_vs_pushconst(nir_shader *nir)
+copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
{
- nir_variable *vs_pushconst;
- /* create compatible layout for the ntv push constant loader */
- struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2);
- fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
- fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed");
- fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
- fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0);
- fields[1].name = ralloc_asprintf(nir, "draw_id");
- fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id);
- vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
- glsl_struct_type(fields, 2, "struct", false), "vs_pushconst");
- vs_pushconst->data.location = INT_MAX; //doesn't really matter
+ assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
+ if (glsl_type_is_struct_or_ifc(dst->type)) {
+ for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
+ copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
+ }
+ } else if (glsl_type_is_array_or_matrix(dst->type)) {
+ unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
+ for (unsigned i = 0; i < count; i++) {
+ copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
+ }
+ } else {
+ nir_def *load = nir_load_deref(b, src);
+ nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
+ }
+}
+
+static bool
+is_clipcull_dist(int location)
+{
+ switch (location) {
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ case VARYING_SLOT_CULL_DIST0:
+ case VARYING_SLOT_CULL_DIST1:
+ return true;
+ default: break;
+ }
+ return false;
}
+#define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
+
static void
-create_cs_pushconst(nir_shader *nir)
+create_gfx_pushconst(nir_shader *nir)
{
- nir_variable *cs_pushconst;
+#define PUSHCONST_MEMBER(member_idx, field) \
+fields[member_idx].type = \
+ glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
+fields[member_idx].name = ralloc_asprintf(nir, #field); \
+fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
+
+ nir_variable *pushconst;
/* create compatible layout for the ntv push constant loader */
- struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field));
- fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
- fields[0].name = ralloc_asprintf(nir, "work_dim");
- fields[0].offset = 0;
- cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
- glsl_struct_type(fields, 1, "struct", false), "cs_pushconst");
- cs_pushconst->data.location = INT_MAX; //doesn't really matter
+ struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
+ PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
+ PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
+ PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
+ PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
+ PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
+ PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
+ PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
+ PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
+
+ pushconst = nir_variable_create(nir, nir_var_mem_push_const,
+ glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
+ "gfx_pushconst");
+ pushconst->data.location = INT_MAX; //doesn't really matter
+
+#undef PUSHCONST_MEMBER
}
static bool
-reads_work_dim(nir_shader *shader)
+lower_basevertex_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
{
- return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM);
+ if (instr->intrinsic != nir_intrinsic_load_base_vertex)
+ return false;
+
+ b->cursor = nir_after_instr(&instr->instr);
+ nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
+ load->num_components = 1;
+ nir_def_init(&load->instr, &load->def, 1, 32);
+ nir_builder_instr_insert(b, &load->instr);
+
+ nir_def *composite = nir_build_alu(b, nir_op_bcsel,
+ nir_build_alu(b, nir_op_ieq, &load->def, nir_imm_int(b, 1), NULL, NULL),
+ &instr->def,
+ nir_imm_int(b, 0),
+ NULL);
+
+ nir_def_rewrite_uses_after(&instr->def, composite,
+ composite->parent_instr);
+ return true;
}
static bool
-lower_discard_if_instr(nir_builder *b, nir_instr *instr_, UNUSED void *cb_data)
+lower_basevertex(nir_shader *shader)
{
- if (instr_->type != nir_instr_type_intrinsic)
+ if (shader->info.stage != MESA_SHADER_VERTEX)
return false;
- nir_intrinsic_instr *instr = nir_instr_as_intrinsic(instr_);
+ if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
+ return false;
+
+ return nir_shader_intrinsics_pass(shader, lower_basevertex_instr,
+ nir_metadata_dominance, NULL);
+}
- if (instr->intrinsic == nir_intrinsic_discard_if) {
- b->cursor = nir_before_instr(&instr->instr);
- nir_if *if_stmt = nir_push_if(b, nir_ssa_for_src(b, instr->src[0], 1));
- nir_discard(b);
- nir_pop_if(b, if_stmt);
- nir_instr_remove(&instr->instr);
+static bool
+lower_drawid_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
+{
+ if (instr->intrinsic != nir_intrinsic_load_draw_id)
+ return false;
+
+ b->cursor = nir_before_instr(&instr->instr);
+ nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
+ load->num_components = 1;
+ nir_def_init(&load->instr, &load->def, 1, 32);
+ nir_builder_instr_insert(b, &load->instr);
+
+ nir_def_rewrite_uses(&instr->def, &load->def);
+
+ return true;
+}
+
+static bool
+lower_drawid(nir_shader *shader)
+{
+ if (shader->info.stage != MESA_SHADER_VERTEX)
+ return false;
+
+ if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
+ return false;
+
+ return nir_shader_intrinsics_pass(shader, lower_drawid_instr,
+ nir_metadata_dominance, NULL);
+}
+
+struct lower_gl_point_state {
+ nir_variable *gl_pos_out;
+ nir_variable *gl_point_size;
+};
+
+static bool
+lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+ struct lower_gl_point_state *state = data;
+ nir_def *vp_scale, *pos;
+
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
+ intrin->intrinsic != nir_intrinsic_emit_vertex)
+ return false;
+
+ if (nir_intrinsic_stream_id(intrin) != 0)
+ return false;
+
+ if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
+ intrin->intrinsic == nir_intrinsic_end_primitive) {
+ nir_instr_remove(&intrin->instr);
return true;
}
- /* a shader like this (shaders@glsl-fs-discard-04):
- uniform int j, k;
+ b->cursor = nir_before_instr(instr);
+
+ // viewport-map endpoints
+ nir_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
+ vp_scale = nir_load_push_constant_zink(b, 2, 32, vp_const_pos);
+
+ // Load point info values
+ nir_def *point_size = nir_load_var(b, state->gl_point_size);
+ nir_def *point_pos = nir_load_var(b, state->gl_pos_out);
+
+ // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
+ nir_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
+ w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
+ // halt_w_delta = w_delta / 2
+ nir_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5);
+
+ // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
+ nir_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
+ h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
+ // halt_h_delta = h_delta / 2
+ nir_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5);
+
+ nir_def *point_dir[4][2] = {
+ { nir_imm_float(b, -1), nir_imm_float(b, -1) },
+ { nir_imm_float(b, -1), nir_imm_float(b, 1) },
+ { nir_imm_float(b, 1), nir_imm_float(b, -1) },
+ { nir_imm_float(b, 1), nir_imm_float(b, 1) }
+ };
- void main()
- {
- for (int i = 0; i < j; i++) {
- if (i > k)
- continue;
- discard;
- }
- gl_FragColor = vec4(0.0, 1.0, 0.0, 0.0);
- }
+ nir_def *point_pos_x = nir_channel(b, point_pos, 0);
+ nir_def *point_pos_y = nir_channel(b, point_pos, 1);
+ for (size_t i = 0; i < 4; i++) {
+ pos = nir_vec4(b,
+ nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
+ nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
+ nir_channel(b, point_pos, 2),
+ nir_channel(b, point_pos, 3));
+ nir_store_var(b, state->gl_pos_out, pos, 0xf);
- will generate nir like:
+ nir_emit_vertex(b);
+ }
- loop {
- //snip
- if ssa_11 {
- block block_5:
- / preds: block_4 /
- vec1 32 ssa_17 = iadd ssa_50, ssa_31
- / succs: block_7 /
- } else {
- block block_6:
- / preds: block_4 /
- intrinsic discard () () <-- not last instruction
- vec1 32 ssa_23 = iadd ssa_50, ssa_31 <-- dead code loop itr increment
- / succs: block_7 /
- }
- //snip
- }
+ nir_end_primitive(b);
- which means that we can't assert like this:
+ nir_instr_remove(&intrin->instr);
- assert(instr->intrinsic != nir_intrinsic_discard ||
- nir_block_last_instr(instr->instr.block) == &instr->instr);
+ return true;
+}
+static bool
+lower_gl_point_gs(nir_shader *shader)
+{
+ struct lower_gl_point_state state;
- and it's unnecessary anyway since post-vtn optimizing will dce the instructions following the discard
- */
+ shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
+ shader->info.gs.vertices_out *= 4;
+
+ // Gets the gl_Position in and out
+ state.gl_pos_out =
+ nir_find_variable_with_location(shader, nir_var_shader_out,
+ VARYING_SLOT_POS);
+ state.gl_point_size =
+ nir_find_variable_with_location(shader, nir_var_shader_out,
+ VARYING_SLOT_PSIZ);
+
+ // if position in or gl_PointSize aren't written, we have nothing to do
+ if (!state.gl_pos_out || !state.gl_point_size)
+ return false;
+
+ return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
+ nir_metadata_dominance, &state);
+}
+
+struct lower_pv_mode_state {
+ nir_variable *varyings[VARYING_SLOT_MAX][4];
+ nir_variable *pos_counter;
+ nir_variable *out_pos_counter;
+ nir_variable *ring_offset;
+ unsigned ring_size;
+ unsigned primitive_vert_count;
+ unsigned prim;
+};
+
+static nir_def*
+lower_pv_mode_gs_ring_index(nir_builder *b,
+ struct lower_pv_mode_state *state,
+ nir_def *index)
+{
+ nir_def *ring_offset = nir_load_var(b, state->ring_offset);
+ return nir_imod_imm(b, nir_iadd(b, index, ring_offset),
+ state->ring_size);
+}
+
+/* Given the final deref of chain of derefs this function will walk up the chain
+ * until it finds a var deref.
+ *
+ * It will then recreate an identical chain that ends with the provided deref.
+ */
+static nir_deref_instr*
+replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
+{
+ nir_deref_instr *parent = nir_deref_instr_parent(old);
+ if (!parent)
+ return new;
+ switch(old->deref_type) {
+ case nir_deref_type_var:
+ return new;
+ case nir_deref_type_array:
+ return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
+ case nir_deref_type_struct:
+ return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
+ case nir_deref_type_array_wildcard:
+ case nir_deref_type_ptr_as_array:
+ case nir_deref_type_cast:
+ unreachable("unexpected deref type");
+ }
+ unreachable("impossible deref type");
+}
+
+static bool
+lower_pv_mode_gs_store(nir_builder *b,
+ nir_intrinsic_instr *intrin,
+ struct lower_pv_mode_state *state)
+{
+ b->cursor = nir_before_instr(&intrin->instr);
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ if (nir_deref_mode_is(deref, nir_var_shader_out)) {
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ gl_varying_slot location = var->data.location;
+ unsigned location_frac = var->data.location_frac;
+ assert(state->varyings[location][location_frac]);
+ nir_def *pos_counter = nir_load_var(b, state->pos_counter);
+ nir_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
+ nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]);
+ nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
+ // recreate the chain of deref that lead to the store.
+ nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref);
+ nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin));
+ nir_instr_remove(&intrin->instr);
+ return true;
+ }
return false;
}
+static void
+lower_pv_mode_emit_rotated_prim(nir_builder *b,
+ struct lower_pv_mode_state *state,
+ nir_def *current_vertex)
+{
+ nir_def *two = nir_imm_int(b, 2);
+ nir_def *three = nir_imm_int(b, 3);
+ bool is_triangle = state->primitive_vert_count == 3;
+ /* This shader will always see the last three vertices emitted by the user gs.
+ * The following table is used to to rotate primitives within a strip generated
+ * by the user gs such that the last vertex becomes the first.
+ *
+ * [lines, tris][even/odd index][vertex mod 3]
+ */
+ static const unsigned vert_maps[2][2][3] = {
+ {{1, 0, 0}, {1, 0, 0}},
+ {{2, 0, 1}, {2, 1, 0}}
+ };
+ /* When the primive supplied to the gs comes from a strip, the last provoking vertex
+ * is either the last or the second, depending on whether the triangle is at an odd
+ * or even position within the strip.
+ *
+ * odd or even primitive within draw
+ */
+ nir_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
+ for (unsigned i = 0; i < state->primitive_vert_count; i++) {
+ /* odd or even triangle within strip emitted by user GS
+ * this is handled using the table
+ */
+ nir_def *odd_user_prim = nir_imod(b, current_vertex, two);
+ unsigned offset_even = vert_maps[is_triangle][0][i];
+ unsigned offset_odd = vert_maps[is_triangle][1][i];
+ nir_def *offset_even_value = nir_imm_int(b, offset_even);
+ nir_def *offset_odd_value = nir_imm_int(b, offset_odd);
+ nir_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
+ offset_odd_value, offset_even_value);
+ /* Here we account for how triangles are provided to the gs from a strip.
+ * For even primitives we rotate by 3, meaning we do nothing.
+ * For odd primitives we rotate by 2, combined with the previous rotation this
+ * means the second vertex becomes the last.
+ */
+ if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
+ rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
+ nir_isub(b, three,
+ odd_prim)),
+ three);
+ /* Triangles that come from fans are provided to the gs the same way as
+ * odd triangles from a strip so always rotate by 2.
+ */
+ else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
+ rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
+ three);
+ rotated_i = nir_iadd(b, rotated_i, current_vertex);
+ nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
+ gl_varying_slot location = var->data.location;
+ unsigned location_frac = var->data.location_frac;
+ if (state->varyings[location][location_frac]) {
+ nir_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
+ nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index);
+ copy_vars(b, nir_build_deref_var(b, var), value);
+ }
+ }
+ nir_emit_vertex(b);
+ }
+}
+
static bool
-lower_discard_if(nir_shader *shader)
+lower_pv_mode_gs_emit_vertex(nir_builder *b,
+ nir_intrinsic_instr *intrin,
+ struct lower_pv_mode_state *state)
{
- return nir_shader_instructions_pass(shader,
- lower_discard_if_instr,
- nir_metadata_dominance,
- NULL);
+ b->cursor = nir_before_instr(&intrin->instr);
+
+ // increment pos_counter
+ nir_def *pos_counter = nir_load_var(b, state->pos_counter);
+ nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
+
+ nir_instr_remove(&intrin->instr);
+ return true;
}
static bool
-lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data)
+lower_pv_mode_gs_end_primitive(nir_builder *b,
+ nir_intrinsic_instr *intrin,
+ struct lower_pv_mode_state *state)
{
- if (in->type != nir_instr_type_intrinsic)
- return false;
- nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
- if (instr->intrinsic != nir_intrinsic_load_work_dim)
- return false;
+ b->cursor = nir_before_instr(&intrin->instr);
- if (instr->intrinsic == nir_intrinsic_load_work_dim) {
- b->cursor = nir_after_instr(&instr->instr);
- nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
- load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_intrinsic_set_range(load, 3 * sizeof(uint32_t));
- load->num_components = 1;
- nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim");
- nir_builder_instr_insert(b, &load->instr);
+ nir_def *pos_counter = nir_load_var(b, state->pos_counter);
+ nir_push_loop(b);
+ {
+ nir_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
+ nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
+ nir_imm_int(b, state->primitive_vert_count)));
+ nir_jump(b, nir_jump_break);
+ nir_pop_if(b, NULL);
- nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
+ lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
+ nir_end_primitive(b);
+
+ nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
}
+ nir_pop_loop(b, NULL);
+ /* Set the ring offset such that when position 0 is
+ * read we get the last value written
+ */
+ nir_store_var(b, state->ring_offset, pos_counter, 1);
+ nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
+ nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
+ nir_instr_remove(&intrin->instr);
return true;
}
static bool
-lower_work_dim(nir_shader *shader)
+lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
{
- if (shader->info.stage != MESA_SHADER_KERNEL)
+ if (instr->type != nir_instr_type_intrinsic)
return false;
- if (!reads_work_dim(shader))
+ struct lower_pv_mode_state *state = data;
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_store_deref:
+ return lower_pv_mode_gs_store(b, intrin, state);
+ case nir_intrinsic_copy_deref:
+ unreachable("should be lowered");
+ case nir_intrinsic_emit_vertex_with_counter:
+ case nir_intrinsic_emit_vertex:
+ return lower_pv_mode_gs_emit_vertex(b, intrin, state);
+ case nir_intrinsic_end_primitive:
+ case nir_intrinsic_end_primitive_with_counter:
+ return lower_pv_mode_gs_end_primitive(b, intrin, state);
+ default:
return false;
+ }
+}
+
+static bool
+lower_pv_mode_gs(nir_shader *shader, unsigned prim)
+{
+ nir_builder b;
+ struct lower_pv_mode_state state;
+ memset(state.varyings, 0, sizeof(state.varyings));
+
+ nir_function_impl *entry = nir_shader_get_entrypoint(shader);
+ b = nir_builder_at(nir_before_impl(entry));
+
+ state.primitive_vert_count =
+ mesa_vertices_per_prim(shader->info.gs.output_primitive);
+ state.ring_size = shader->info.gs.vertices_out;
+
+ nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
+ gl_varying_slot location = var->data.location;
+ unsigned location_frac = var->data.location_frac;
+
+ char name[100];
+ snprintf(name, sizeof(name), "__tmp_primverts_%d_%d", location, location_frac);
+ state.varyings[location][location_frac] =
+ nir_local_variable_create(entry,
+ glsl_array_type(var->type,
+ state.ring_size,
+ false),
+ name);
+ }
+
+ state.pos_counter = nir_local_variable_create(entry,
+ glsl_uint_type(),
+ "__pos_counter");
+
+ state.out_pos_counter = nir_local_variable_create(entry,
+ glsl_uint_type(),
+ "__out_pos_counter");
- return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL);
+ state.ring_offset = nir_local_variable_create(entry,
+ glsl_uint_type(),
+ "__ring_offset");
+
+ state.prim = prim;
+
+ // initialize pos_counter and out_pos_counter
+ nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
+ nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
+ nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
+
+ shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
+ (state.primitive_vert_count - 1)) *
+ state.primitive_vert_count;
+ return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
+ nir_metadata_dominance, &state);
+}
+
+struct lower_line_stipple_state {
+ nir_variable *pos_out;
+ nir_variable *stipple_out;
+ nir_variable *prev_pos;
+ nir_variable *pos_counter;
+ nir_variable *stipple_counter;
+ bool line_rectangular;
+};
+
+static nir_def *
+viewport_map(nir_builder *b, nir_def *vert,
+ nir_def *scale)
+{
+ nir_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
+ nir_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2),
+ w_recip);
+ return nir_fmul(b, ndc_point, scale);
}
static bool
-lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
+lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
{
- if (instr->type != nir_instr_type_deref)
- return false;
- nir_deref_instr *deref = nir_instr_as_deref(instr);
- if (deref->deref_type != nir_deref_type_var)
+ struct lower_line_stipple_state *state = data;
+ if (instr->type != nir_instr_type_intrinsic)
return false;
- nir_variable *var = nir_deref_instr_get_variable(deref);
- if (var->data.mode != nir_var_shader_in)
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
+ intrin->intrinsic != nir_intrinsic_emit_vertex)
return false;
- if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
+
+ b->cursor = nir_before_instr(instr);
+
+ nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
+ // viewport-map endpoints
+ nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
+ nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
+ nir_def *prev = nir_load_var(b, state->prev_pos);
+ nir_def *curr = nir_load_var(b, state->pos_out);
+ prev = viewport_map(b, prev, vp_scale);
+ curr = viewport_map(b, curr, vp_scale);
+
+ // calculate length of line
+ nir_def *len;
+ if (state->line_rectangular)
+ len = nir_fast_distance(b, prev, curr);
+ else {
+ nir_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
+ len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
+ }
+ // update stipple_counter
+ nir_store_var(b, state->stipple_counter,
+ nir_fadd(b, nir_load_var(b, state->stipple_counter),
+ len), 1);
+ nir_pop_if(b, NULL);
+ // emit stipple out
+ nir_copy_var(b, state->stipple_out, state->stipple_counter);
+ nir_copy_var(b, state->prev_pos, state->pos_out);
+
+ // update prev_pos and pos_counter for next vertex
+ b->cursor = nir_after_instr(instr);
+ nir_store_var(b, state->pos_counter,
+ nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
+ 1), 1);
+
+ return true;
+}
+
+static bool
+lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
+{
+ nir_builder b;
+ struct lower_line_stipple_state state;
+
+ state.pos_out =
+ nir_find_variable_with_location(shader, nir_var_shader_out,
+ VARYING_SLOT_POS);
+
+ // if position isn't written, we have nothing to do
+ if (!state.pos_out)
return false;
- /* create second variable for the split */
- nir_variable *var2 = nir_variable_clone(var, b->shader);
- /* split new variable into second slot */
- var2->data.driver_location++;
- nir_shader_add_variable(b->shader, var2);
+ state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
+ glsl_float_type(),
+ "__stipple");
+ state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
+ state.stipple_out->data.driver_location = shader->num_outputs++;
+ state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
+ shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
+
+ // create temp variables
+ state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
+ glsl_vec4_type(),
+ "__prev_pos");
+ state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
+ glsl_uint_type(),
+ "__pos_counter");
+ state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
+ glsl_float_type(),
+ "__stipple_counter");
+
+ state.line_rectangular = line_rectangular;
+ // initialize pos_counter and stipple_counter
+ nir_function_impl *entry = nir_shader_get_entrypoint(shader);
+ b = nir_builder_at(nir_before_impl(entry));
+ nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
+ nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
+
+ return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
+ nir_metadata_dominance, &state);
+}
- unsigned total_num_components = glsl_get_vector_elements(var->type);
- /* new variable is the second half of the dvec */
- var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
- /* clamp original variable to a dvec2 */
- deref->type = var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
+static bool
+lower_line_stipple_fs(nir_shader *shader)
+{
+ nir_builder b;
+ nir_function_impl *entry = nir_shader_get_entrypoint(shader);
+ b = nir_builder_at(nir_after_impl(entry));
+
+ // create stipple counter
+ nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
+ glsl_float_type(),
+ "__stipple");
+ stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
+ stipple->data.driver_location = shader->num_inputs++;
+ stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
+ shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
+
+ nir_variable *sample_mask_out =
+ nir_find_variable_with_location(shader, nir_var_shader_out,
+ FRAG_RESULT_SAMPLE_MASK);
+ if (!sample_mask_out) {
+ sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
+ glsl_uint_type(), "sample_mask");
+ sample_mask_out->data.driver_location = shader->num_outputs++;
+ sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
+ }
- /* create deref instr for new variable */
- b->cursor = nir_after_instr(instr);
- nir_deref_instr *deref2 = nir_build_deref_var(b, var2);
-
- nir_foreach_use_safe(use_src, &deref->dest.ssa) {
- nir_instr *use_instr = use_src->parent_instr;
- assert(use_instr->type == nir_instr_type_intrinsic &&
- nir_instr_as_intrinsic(use_instr)->intrinsic == nir_intrinsic_load_deref);
-
- /* this is a load instruction for the deref, and we need to split it into two instructions that we can
- * then zip back into a single ssa def */
- nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr);
- /* clamp the first load to 2 64bit components */
- intr->num_components = intr->dest.ssa.num_components = 2;
- b->cursor = nir_after_instr(use_instr);
- /* this is the second load instruction for the second half of the dvec3/4 components */
- nir_intrinsic_instr *intr2 = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
- intr2->src[0] = nir_src_for_ssa(&deref2->dest.ssa);
- intr2->num_components = total_num_components - 2;
- nir_ssa_dest_init(&intr2->instr, &intr2->dest, intr2->num_components, 64, NULL);
- nir_builder_instr_insert(b, &intr2->instr);
-
- nir_ssa_def *def[4];
- /* create a new dvec3/4 comprised of all the loaded components from both variables */
- def[0] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 0));
- def[1] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 1));
- def[2] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 0));
- if (total_num_components == 4)
- def[3] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 1));
- nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
- /* use the assembled dvec3/4 for all other uses of the load */
- nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
- new_vec->parent_instr);
+ nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
+ nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
+ nir_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
+ pattern = nir_iand_imm(&b, pattern, 0xffff);
+
+ nir_def *sample_mask_in = nir_load_sample_mask_in(&b);
+ nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
+ nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
+ nir_store_var(&b, v, sample_mask_in, 1);
+ nir_store_var(&b, sample_mask, sample_mask_in, 1);
+ nir_push_loop(&b);
+ {
+ nir_def *value = nir_load_var(&b, v);
+ nir_def *index = nir_ufind_msb(&b, value);
+ nir_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
+ nir_def *new_value = nir_ixor(&b, value, index_mask);
+ nir_store_var(&b, v, new_value, 1);
+ nir_push_if(&b, nir_ieq_imm(&b, value, 0));
+ nir_jump(&b, nir_jump_break);
+ nir_pop_if(&b, NULL);
+
+ nir_def *stipple_pos =
+ nir_interp_deref_at_sample(&b, 1, 32,
+ &nir_build_deref_var(&b, stipple)->def, index);
+ stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
+ nir_imm_float(&b, 16.0));
+ stipple_pos = nir_f2i32(&b, stipple_pos);
+ nir_def *bit =
+ nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
+ nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
+ {
+ nir_def *sample_mask_value = nir_load_var(&b, sample_mask);
+ sample_mask_value = nir_ixor(&b, sample_mask_value, index_mask);
+ nir_store_var(&b, sample_mask, sample_mask_value, 1);
+ }
+ nir_pop_if(&b, NULL);
}
+ nir_pop_loop(&b, NULL);
+ nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
return true;
}
-/* "64-bit three- and four-component vectors consume two consecutive locations."
- * - 14.1.4. Location Assignment
- *
- * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
- * are assigned to consecutive locations, loaded separately, and then assembled back into a
- * composite value that's used in place of the original loaded ssa src
- */
+struct lower_line_smooth_state {
+ nir_variable *pos_out;
+ nir_variable *line_coord_out;
+ nir_variable *prev_pos;
+ nir_variable *pos_counter;
+ nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
+ *varyings[VARYING_SLOT_MAX][4]; // location_frac
+};
+
static bool
-lower_64bit_vertex_attribs(nir_shader *shader)
+lower_line_smooth_gs_store(nir_builder *b,
+ nir_intrinsic_instr *intrin,
+ struct lower_line_smooth_state *state)
{
- if (shader->info.stage != MESA_SHADER_VERTEX)
- return false;
+ b->cursor = nir_before_instr(&intrin->instr);
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ if (nir_deref_mode_is(deref, nir_var_shader_out)) {
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ // we take care of position elsewhere
+ gl_varying_slot location = var->data.location;
+ unsigned location_frac = var->data.location_frac;
+ if (location != VARYING_SLOT_POS) {
+ assert(state->varyings[location]);
+ nir_store_var(b, state->varyings[location][location_frac],
+ intrin->src[1].ssa,
+ nir_intrinsic_write_mask(intrin));
+ nir_instr_remove(&intrin->instr);
+ return true;
+ }
+ }
- return nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
+ return false;
}
static bool
-lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
+lower_line_smooth_gs_emit_vertex(nir_builder *b,
+ nir_intrinsic_instr *intrin,
+ struct lower_line_smooth_state *state)
{
- if (in->type != nir_instr_type_intrinsic)
- return false;
- nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
- if (instr->intrinsic != nir_intrinsic_load_base_vertex)
- return false;
+ b->cursor = nir_before_instr(&intrin->instr);
+
+ nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
+ nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
+ nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
+ nir_def *prev = nir_load_var(b, state->prev_pos);
+ nir_def *curr = nir_load_var(b, state->pos_out);
+ nir_def *prev_vp = viewport_map(b, prev, vp_scale);
+ nir_def *curr_vp = viewport_map(b, curr, vp_scale);
+
+ nir_def *width = nir_load_push_constant_zink(b, 1, 32,
+ nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH));
+ nir_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
+
+ const unsigned yx[2] = { 1, 0 };
+ nir_def *vec = nir_fsub(b, curr_vp, prev_vp);
+ nir_def *len = nir_fast_length(b, vec);
+ nir_def *dir = nir_normalize(b, vec);
+ nir_def *half_length = nir_fmul_imm(b, len, 0.5);
+ half_length = nir_fadd_imm(b, half_length, 0.5);
+
+ nir_def *vp_scale_rcp = nir_frcp(b, vp_scale);
+ nir_def *tangent =
+ nir_fmul(b,
+ nir_fmul(b,
+ nir_swizzle(b, dir, yx, 2),
+ nir_imm_vec2(b, 1.0, -1.0)),
+ vp_scale_rcp);
+ tangent = nir_fmul(b, tangent, half_width);
+ tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
+ dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
+
+ nir_def *line_offets[8] = {
+ nir_fadd(b, tangent, nir_fneg(b, dir)),
+ nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
+ tangent,
+ nir_fneg(b, tangent),
+ tangent,
+ nir_fneg(b, tangent),
+ nir_fadd(b, tangent, dir),
+ nir_fadd(b, nir_fneg(b, tangent), dir),
+ };
+ nir_def *line_coord =
+ nir_vec4(b, half_width, half_width, half_length, half_length);
+ nir_def *line_coords[8] = {
+ nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, -1, 1)),
+ nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, -1, 1)),
+ nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
+ nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
+ nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
+ nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
+ nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 1, 1)),
+ nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 1, 1)),
+ };
- b->cursor = nir_after_instr(&instr->instr);
- nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
- load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_intrinsic_set_range(load, 4);
- load->num_components = 1;
- nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
- nir_builder_instr_insert(b, &load->instr);
+ /* emit first end-cap, and start line */
+ for (int i = 0; i < 4; ++i) {
+ nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
+ gl_varying_slot location = var->data.location;
+ unsigned location_frac = var->data.location_frac;
+ if (state->prev_varyings[location][location_frac])
+ nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
+ }
+ nir_store_var(b, state->pos_out,
+ nir_fadd(b, prev, nir_fmul(b, line_offets[i],
+ nir_channel(b, prev, 3))), 0xf);
+ nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
+ nir_emit_vertex(b);
+ }
- nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
- nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
- &instr->dest.ssa,
- nir_imm_int(b, 0),
- NULL);
+ /* finish line and emit last end-cap */
+ for (int i = 4; i < 8; ++i) {
+ nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
+ gl_varying_slot location = var->data.location;
+ unsigned location_frac = var->data.location_frac;
+ if (state->varyings[location][location_frac])
+ nir_copy_var(b, var, state->varyings[location][location_frac]);
+ }
+ nir_store_var(b, state->pos_out,
+ nir_fadd(b, curr, nir_fmul(b, line_offets[i],
+ nir_channel(b, curr, 3))), 0xf);
+ nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
+ nir_emit_vertex(b);
+ }
+ nir_end_primitive(b);
- nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
- composite->parent_instr);
+ nir_pop_if(b, NULL);
+
+ nir_copy_var(b, state->prev_pos, state->pos_out);
+ nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
+ gl_varying_slot location = var->data.location;
+ unsigned location_frac = var->data.location_frac;
+ if (state->varyings[location][location_frac])
+ nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
+ }
+
+ // update prev_pos and pos_counter for next vertex
+ b->cursor = nir_after_instr(&intrin->instr);
+ nir_store_var(b, state->pos_counter,
+ nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
+ 1), 1);
+
+ nir_instr_remove(&intrin->instr);
return true;
}
static bool
-lower_basevertex(nir_shader *shader)
+lower_line_smooth_gs_end_primitive(nir_builder *b,
+ nir_intrinsic_instr *intrin,
+ struct lower_line_smooth_state *state)
{
- if (shader->info.stage != MESA_SHADER_VERTEX)
- return false;
+ b->cursor = nir_before_instr(&intrin->instr);
- if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
- return false;
+ // reset line counter
+ nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
- return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
+ nir_instr_remove(&intrin->instr);
+ return true;
}
-
static bool
-lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
+lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
{
- if (in->type != nir_instr_type_intrinsic)
+ if (instr->type != nir_instr_type_intrinsic)
return false;
- nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
- if (instr->intrinsic != nir_intrinsic_load_draw_id)
+
+ struct lower_line_smooth_state *state = data;
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_store_deref:
+ return lower_line_smooth_gs_store(b, intrin, state);
+ case nir_intrinsic_copy_deref:
+ unreachable("should be lowered");
+ case nir_intrinsic_emit_vertex_with_counter:
+ case nir_intrinsic_emit_vertex:
+ return lower_line_smooth_gs_emit_vertex(b, intrin, state);
+ case nir_intrinsic_end_primitive:
+ case nir_intrinsic_end_primitive_with_counter:
+ return lower_line_smooth_gs_end_primitive(b, intrin, state);
+ default:
return false;
+ }
+}
- b->cursor = nir_before_instr(&instr->instr);
- nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
- load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1));
- nir_intrinsic_set_range(load, 4);
- load->num_components = 1;
- nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
- nir_builder_instr_insert(b, &load->instr);
+static bool
+lower_line_smooth_gs(nir_shader *shader)
+{
+ nir_builder b;
+ struct lower_line_smooth_state state;
+
+ memset(state.varyings, 0, sizeof(state.varyings));
+ memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
+ nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
+ gl_varying_slot location = var->data.location;
+ unsigned location_frac = var->data.location_frac;
+ if (location == VARYING_SLOT_POS)
+ continue;
- nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
+ char name[100];
+ snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
+ state.varyings[location][location_frac] =
+ nir_variable_create(shader, nir_var_shader_temp,
+ var->type, name);
- return true;
+ snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
+ state.prev_varyings[location][location_frac] =
+ nir_variable_create(shader, nir_var_shader_temp,
+ var->type, name);
+ }
+
+ state.pos_out =
+ nir_find_variable_with_location(shader, nir_var_shader_out,
+ VARYING_SLOT_POS);
+
+ // if position isn't written, we have nothing to do
+ if (!state.pos_out)
+ return false;
+
+ unsigned location = 0;
+ nir_foreach_shader_in_variable(var, shader) {
+ if (var->data.driver_location >= location)
+ location = var->data.driver_location + 1;
+ }
+
+ state.line_coord_out =
+ nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
+ "__line_coord");
+ state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
+ state.line_coord_out->data.driver_location = location;
+ state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
+ shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
+ shader->num_outputs++;
+
+ // create temp variables
+ state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
+ glsl_vec4_type(),
+ "__prev_pos");
+ state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
+ glsl_uint_type(),
+ "__pos_counter");
+
+ // initialize pos_counter
+ nir_function_impl *entry = nir_shader_get_entrypoint(shader);
+ b = nir_builder_at(nir_before_impl(entry));
+ nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
+
+ shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
+ shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
+
+ return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
+ nir_metadata_dominance, &state);
}
static bool
-lower_drawid(nir_shader *shader)
+lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
{
- if (shader->info.stage != MESA_SHADER_VERTEX)
- return false;
+ int dummy;
+ nir_builder b;
- if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
- return false;
+ nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
+ if (lower_stipple) {
+ stipple_counter = nir_variable_create(shader, nir_var_shader_in,
+ glsl_float_type(),
+ "__stipple");
+ stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
+ stipple_counter->data.driver_location = shader->num_inputs++;
+ stipple_counter->data.location =
+ MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
+ shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
+
+ stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
+ glsl_uint_type(),
+ "stipple_pattern");
+
+ // initialize stipple_pattern
+ nir_function_impl *entry = nir_shader_get_entrypoint(shader);
+ b = nir_builder_at(nir_before_impl(entry));
+ nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
+ nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
+ nir_store_var(&b, stipple_pattern, pattern, 1);
+ }
- return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
+ nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
+ return true;
}
static bool
@@ -353,11 +1034,314 @@ lower_dual_blend(nir_shader *shader)
return progress;
}
+static bool
+lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+ if (instr->type != nir_instr_type_alu)
+ return false;
+ nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
+ if (alu_instr->op != nir_op_pack_64_2x32 &&
+ alu_instr->op != nir_op_unpack_64_2x32)
+ return false;
+ b->cursor = nir_before_instr(&alu_instr->instr);
+ nir_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
+ nir_def *dest;
+ switch (alu_instr->op) {
+ case nir_op_pack_64_2x32:
+ dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
+ break;
+ case nir_op_unpack_64_2x32:
+ dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
+ break;
+ default:
+ unreachable("Impossible opcode");
+ }
+ nir_def_rewrite_uses(&alu_instr->def, dest);
+ nir_instr_remove(&alu_instr->instr);
+ return true;
+}
+
+static bool
+lower_64bit_pack(nir_shader *shader)
+{
+ return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
+ nir_metadata_block_index | nir_metadata_dominance, NULL);
+}
+
+nir_shader *
+zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
+ const nir_shader *prev_stage)
+{
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
+ options,
+ "filled quad gs");
+
+ nir_shader *nir = b.shader;
+ nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
+ nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
+ nir->info.gs.vertices_in = 4;
+ nir->info.gs.vertices_out = 6;
+ nir->info.gs.invocations = 1;
+ nir->info.gs.active_stream_mask = 1;
+
+ nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
+ memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
+ if (prev_stage->xfb_info) {
+ size_t size = nir_xfb_info_size(prev_stage->xfb_info->output_count);
+ nir->xfb_info = ralloc_memdup(nir, prev_stage->xfb_info, size);
+ }
+
+ nir_variable *in_vars[VARYING_SLOT_MAX];
+ nir_variable *out_vars[VARYING_SLOT_MAX];
+ unsigned num_vars = 0;
+
+ /* Create input/output variables. */
+ nir_foreach_shader_out_variable(var, prev_stage) {
+ assert(!var->data.patch);
+
+ /* input vars can't be created for those */
+ if (var->data.location == VARYING_SLOT_LAYER ||
+ var->data.location == VARYING_SLOT_VIEW_INDEX ||
+ /* psiz not needed for quads */
+ var->data.location == VARYING_SLOT_PSIZ)
+ continue;
+
+ char name[100];
+ if (var->name)
+ snprintf(name, sizeof(name), "in_%s", var->name);
+ else
+ snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
+
+ nir_variable *in = nir_variable_clone(var, nir);
+ ralloc_free(in->name);
+ in->name = ralloc_strdup(in, name);
+ in->type = glsl_array_type(var->type, 4, false);
+ in->data.mode = nir_var_shader_in;
+ nir_shader_add_variable(nir, in);
+
+ if (var->name)
+ snprintf(name, sizeof(name), "out_%s", var->name);
+ else
+ snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
+
+ nir_variable *out = nir_variable_clone(var, nir);
+ ralloc_free(out->name);
+ out->name = ralloc_strdup(out, name);
+ out->data.mode = nir_var_shader_out;
+ nir_shader_add_variable(nir, out);
+
+ in_vars[num_vars] = in;
+ out_vars[num_vars++] = out;
+ }
+
+ int mapping_first[] = {0, 1, 2, 0, 2, 3};
+ int mapping_last[] = {0, 1, 3, 1, 2, 3};
+ nir_def *last_pv_vert_def = nir_load_provoking_last(&b);
+ last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
+ for (unsigned i = 0; i < 6; ++i) {
+ /* swap indices 2 and 3 */
+ nir_def *idx = nir_bcsel(&b, last_pv_vert_def,
+ nir_imm_int(&b, mapping_last[i]),
+ nir_imm_int(&b, mapping_first[i]));
+ /* Copy inputs to outputs. */
+ for (unsigned j = 0; j < num_vars; ++j) {
+ if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
+ continue;
+ }
+ nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
+ copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
+ }
+ nir_emit_vertex(&b, 0);
+ if (i == 2)
+ nir_end_primitive(&b, 0);
+ }
+
+ nir_end_primitive(&b, 0);
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+ nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
+ return nir;
+}
+
+static bool
+lower_system_values_to_inlined_uniforms_instr(nir_builder *b,
+ nir_intrinsic_instr *intrin,
+ void *data)
+{
+ int inlined_uniform_offset;
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_flat_mask:
+ inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
+ break;
+ case nir_intrinsic_load_provoking_last:
+ inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
+ break;
+ default:
+ return false;
+ }
+
+ b->cursor = nir_before_instr(&intrin->instr);
+ assert(intrin->def.bit_size == 32 || intrin->def.bit_size == 64);
+ /* nir_inline_uniforms can't handle bit_size != 32 (it will silently ignore
+ * anything with a different bit_size) so we need to split the load. */
+ int num_dwords = intrin->def.bit_size / 32;
+ nir_def *dwords[2] = {NULL};
+ for (unsigned i = 0; i < num_dwords; i++)
+ dwords[i] = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
+ nir_imm_int(b, inlined_uniform_offset + i),
+ .align_mul = intrin->def.bit_size / 8,
+ .align_offset = 0,
+ .range_base = 0, .range = ~0);
+ nir_def *new_dest_def;
+ if (intrin->def.bit_size == 32)
+ new_dest_def = dwords[0];
+ else
+ new_dest_def = nir_pack_64_2x32_split(b, dwords[0], dwords[1]);
+ nir_def_rewrite_uses(&intrin->def, new_dest_def);
+ nir_instr_remove(&intrin->instr);
+ return true;
+}
+
+bool
+zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
+{
+ return nir_shader_intrinsics_pass(nir,
+ lower_system_values_to_inlined_uniforms_instr,
+ nir_metadata_dominance, NULL);
+}
+
+/* from radeonsi */
+static unsigned
+amd_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
+{
+ /* TODO: maybe implement shader profiles to disable, cf. 39804ebf1766d38004259085e1fec4ed8db86f1c */
+
+ switch (consumer->info.stage) {
+ case MESA_SHADER_TESS_CTRL: /* VS->TCS */
+ /* Non-amplifying shaders can always have their variyng expressions
+ * moved into later shaders.
+ */
+ return UINT_MAX;
+
+ case MESA_SHADER_GEOMETRY: /* VS->GS, TES->GS */
+ return consumer->info.gs.vertices_in == 1 ? UINT_MAX :
+ consumer->info.gs.vertices_in == 2 ? 20 : 14;
+
+ case MESA_SHADER_TESS_EVAL: /* VS->TES, TCS->TES */
+ case MESA_SHADER_FRAGMENT:
+ /* Up to 3 uniforms and 5 ALUs. */
+ return 14;
+
+ default:
+ unreachable("unexpected shader stage");
+ }
+}
+
+/* from radeonsi */
+static unsigned
+amd_varying_estimate_instr_cost(nir_instr *instr)
+{
+ unsigned dst_bit_size, src_bit_size, num_dst_dwords;
+ nir_op alu_op;
+
+ /* This is a very loose approximation based on gfx10. */
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ dst_bit_size = nir_instr_as_alu(instr)->def.bit_size;
+ src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size;
+ alu_op = nir_instr_as_alu(instr)->op;
+ num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
+
+ switch (alu_op) {
+ case nir_op_mov:
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ case nir_op_vec5:
+ case nir_op_vec8:
+ case nir_op_vec16:
+ case nir_op_fabs:
+ case nir_op_fneg:
+ case nir_op_fsat:
+ return 0;
+
+ case nir_op_imul:
+ case nir_op_umul_low:
+ return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords;
+
+ case nir_op_imul_high:
+ case nir_op_umul_high:
+ case nir_op_imul_2x32_64:
+ case nir_op_umul_2x32_64:
+ return 4;
+
+ case nir_op_fexp2:
+ case nir_op_flog2:
+ case nir_op_frcp:
+ case nir_op_frsq:
+ case nir_op_fsqrt:
+ case nir_op_fsin:
+ case nir_op_fcos:
+ case nir_op_fsin_amd:
+ case nir_op_fcos_amd:
+ return 4; /* FP16 & FP32. */
+
+ case nir_op_fpow:
+ return 4 + 1 + 4; /* log2 + mul + exp2 */
+
+ case nir_op_fsign:
+ return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */
+
+ case nir_op_idiv:
+ case nir_op_udiv:
+ case nir_op_imod:
+ case nir_op_umod:
+ case nir_op_irem:
+ return dst_bit_size == 64 ? 80 : 40;
+
+ case nir_op_fdiv:
+ return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */
+
+ case nir_op_fmod:
+ case nir_op_frem:
+ return dst_bit_size == 64 ? 80 : 8;
+
+ default:
+ /* Double opcodes. Comparisons have always full performance. */
+ if ((dst_bit_size == 64 &&
+ nir_op_infos[alu_op].output_type & nir_type_float) ||
+ (dst_bit_size >= 8 && src_bit_size == 64 &&
+ nir_op_infos[alu_op].input_types[0] & nir_type_float))
+ return 16;
+
+ return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32);
+ }
+
+ case nir_instr_type_intrinsic:
+ dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size;
+ num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
+
+ switch (nir_instr_as_intrinsic(instr)->intrinsic) {
+ case nir_intrinsic_load_deref:
+ /* Uniform or UBO load.
+ * Set a low cost to balance the number of scalar loads and ALUs.
+ */
+ return 3 * num_dst_dwords;
+
+ default:
+ unreachable("unexpected intrinsic");
+ }
+
+ default:
+ unreachable("unexpected instr type");
+ }
+}
+
void
zink_screen_init_compiler(struct zink_screen *screen)
{
static const struct nir_shader_compiler_options
default_options = {
+ .io_options = nir_io_glsl_lower_derefs,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
@@ -366,25 +1350,41 @@ zink_screen_init_compiler(struct zink_screen *screen)
.lower_flrp32 = true,
.lower_fpow = true,
.lower_fsat = true,
+ .lower_hadd = true,
+ .lower_iadd_sat = true,
+ .lower_fisnormal = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
+
+ /* We can only support 32-bit ldexp, but NIR doesn't have a flag
+ * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
+ * ldexp, so we don't just always lower it in NIR). Given that ldexp is
+ * effectively unused (no instances in shader-db), it's not worth the
+ * effort to do so.
+ * */
+ .lower_ldexp = true,
+
.lower_mul_high = true,
- .lower_rotate = true,
+ .lower_to_scalar = true,
.lower_uadd_carry = true,
- .lower_pack_64_2x32_split = true,
- .lower_unpack_64_2x32_split = true,
- .lower_pack_32_2x16_split = true,
- .lower_unpack_32_2x16_split = true,
+ .compact_arrays = true,
+ .lower_usub_borrow = true,
+ .lower_uadd_sat = true,
+ .lower_usub_sat = true,
.lower_vector_cmp = true,
.lower_int64_options = 0,
- .lower_doubles_options = ~nir_lower_fp64_full_software,
+ .lower_doubles_options = nir_lower_dround_even,
.lower_uniforms_to_ubo = true,
.has_fsub = true,
.has_isub = true,
.lower_mul_2x32_64 = true,
.support_16bit_alu = true, /* not quite what it sounds like */
+ .support_indirect_inputs = BITFIELD_MASK(MESA_SHADER_COMPUTE),
+ .support_indirect_outputs = BITFIELD_MASK(MESA_SHADER_COMPUTE),
+ .max_unroll_iterations = 0,
+ .use_interpolated_input_intrinsics = true,
};
screen->nir_options = default_options;
@@ -396,13 +1396,49 @@ zink_screen_init_compiler(struct zink_screen *screen)
screen->nir_options.lower_doubles_options = ~0;
screen->nir_options.lower_flrp64 = true;
screen->nir_options.lower_ffma64 = true;
+ /* soft fp64 function inlining will blow up loop bodies and effectively
+ * stop Vulkan drivers from unrolling the loops.
+ */
+ screen->nir_options.max_unroll_iterations_fp64 = 32;
+ }
+
+ if (screen->driver_workarounds.io_opt) {
+ screen->nir_options.io_options |= nir_io_glsl_opt_varyings;
+
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_MESA_RADV:
+ case VK_DRIVER_ID_AMD_OPEN_SOURCE:
+ case VK_DRIVER_ID_AMD_PROPRIETARY:
+ screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
+ screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
+ break;
+ default:
+ mesa_logw("zink: instruction costs not implemented for this implementation!");
+ screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
+ screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
+ }
}
+
+ /*
+ The OpFRem and OpFMod instructions use cheap approximations of remainder,
+ and the error can be large due to the discontinuity in trunc() and floor().
+ This can produce mathematically unexpected results in some cases, such as
+ FMod(x,x) computing x rather than 0, and can also cause the result to have
+ a different sign than the infinitely precise result.
+
+ -Table 84. Precision of core SPIR-V Instructions
+ * for drivers that are known to have imprecise fmod for doubles, lower dmod
+ */
+ if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
+ screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
+ screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
+ screen->nir_options.lower_doubles_options = nir_lower_dmod;
}
const void *
zink_get_compiler_options(struct pipe_screen *pscreen,
enum pipe_shader_ir ir,
- enum pipe_shader_type shader)
+ gl_shader_stage shader)
{
assert(ir == PIPE_SHADER_IR_NIR);
return &zink_screen(pscreen)->nir_options;
@@ -420,23 +1456,201 @@ zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
return tgsi_to_nir(tokens, screen, false);
}
+
+static bool
+def_is_64bit(nir_def *def, void *state)
+{
+ bool *lower = (bool *)state;
+ if (def && (def->bit_size == 64)) {
+ *lower = true;
+ return false;
+ }
+ return true;
+}
+
+static bool
+src_is_64bit(nir_src *src, void *state)
+{
+ bool *lower = (bool *)state;
+ if (src && (nir_src_bit_size(*src) == 64)) {
+ *lower = true;
+ return false;
+ }
+ return true;
+}
+
+static bool
+filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
+{
+ bool lower = false;
+ /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
+ * doesn't have const variants, so do the ugly const_cast here. */
+ nir_instr *instr = (nir_instr *)const_instr;
+
+ nir_foreach_def(instr, def_is_64bit, &lower);
+ if (lower)
+ return true;
+ nir_foreach_src(instr, src_is_64bit, &lower);
+ return lower;
+}
+
+static bool
+filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
+{
+ nir_instr *instr = (nir_instr *)const_instr;
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ switch (alu->op) {
+ case nir_op_pack_64_2x32_split:
+ case nir_op_pack_32_2x16_split:
+ case nir_op_unpack_32_2x16_split_x:
+ case nir_op_unpack_32_2x16_split_y:
+ case nir_op_unpack_64_2x32_split_x:
+ case nir_op_unpack_64_2x32_split_y:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+
+struct bo_vars {
+ nir_variable *uniforms[5];
+ nir_variable *ubo[5];
+ nir_variable *ssbo[5];
+ uint32_t first_ubo;
+ uint32_t first_ssbo;
+};
+
+static struct bo_vars
+get_bo_vars(struct zink_shader *zs, nir_shader *shader)
+{
+ struct bo_vars bo;
+ memset(&bo, 0, sizeof(bo));
+ if (zs->ubos_used)
+ bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
+ assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
+ if (zs->ssbos_used)
+ bo.first_ssbo = ffs(zs->ssbos_used) - 1;
+ assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
+ nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
+ unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
+ if (var->data.mode == nir_var_mem_ssbo) {
+ assert(!bo.ssbo[idx]);
+ bo.ssbo[idx] = var;
+ } else {
+ if (var->data.driver_location) {
+ assert(!bo.ubo[idx]);
+ bo.ubo[idx] = var;
+ } else {
+ assert(!bo.uniforms[idx]);
+ bo.uniforms[idx] = var;
+ }
+ }
+ }
+ return bo;
+}
+
+static bool
+bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+ struct bo_vars *bo = data;
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ nir_variable *var = NULL;
+ nir_def *offset = NULL;
+ bool is_load = true;
+ b->cursor = nir_before_instr(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_store_ssbo:
+ var = bo->ssbo[intr->def.bit_size >> 4];
+ offset = intr->src[2].ssa;
+ is_load = false;
+ break;
+ case nir_intrinsic_load_ssbo:
+ var = bo->ssbo[intr->def.bit_size >> 4];
+ offset = intr->src[1].ssa;
+ break;
+ case nir_intrinsic_load_ubo:
+ if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
+ var = bo->uniforms[intr->def.bit_size >> 4];
+ else
+ var = bo->ubo[intr->def.bit_size >> 4];
+ offset = intr->src[1].ssa;
+ break;
+ default:
+ return false;
+ }
+ nir_src offset_src = nir_src_for_ssa(offset);
+ if (!nir_src_is_const(offset_src))
+ return false;
+
+ unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
+ const struct glsl_type *strct_type = glsl_get_array_element(var->type);
+ unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
+ bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
+ if (has_unsized || offset_bytes + intr->num_components - 1 < size)
+ return false;
+
+ unsigned rewrites = 0;
+ nir_def *result[2];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ if (offset_bytes + i >= size) {
+ rewrites++;
+ if (is_load)
+ result[i] = nir_imm_zero(b, 1, intr->def.bit_size);
+ }
+ }
+ assert(rewrites == intr->num_components);
+ if (is_load) {
+ nir_def *load = nir_vec(b, result, intr->num_components);
+ nir_def_rewrite_uses(&intr->def, load);
+ }
+ nir_instr_remove(instr);
+ return true;
+}
+
+static bool
+bound_bo_access(nir_shader *shader, struct zink_shader *zs)
+{
+ struct bo_vars bo = get_bo_vars(zs, shader);
+ return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
+}
+
static void
-optimize_nir(struct nir_shader *s)
+optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink)
{
bool progress;
do {
progress = false;
+ if (s->options->lower_int64_options)
+ NIR_PASS_V(s, nir_lower_int64);
+ if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
+ NIR_PASS_V(s, lower_64bit_pack);
NIR_PASS_V(s, nir_lower_vars_to_ssa);
+ NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
+ NIR_PASS(progress, s, nir_opt_copy_prop_vars);
NIR_PASS(progress, s, nir_copy_prop);
NIR_PASS(progress, s, nir_opt_remove_phis);
+ if (s->options->lower_int64_options) {
+ NIR_PASS(progress, s, nir_lower_64bit_phis);
+ NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
+ }
NIR_PASS(progress, s, nir_opt_dce);
NIR_PASS(progress, s, nir_opt_dead_cf);
+ NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
NIR_PASS(progress, s, nir_opt_cse);
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);
NIR_PASS(progress, s, zink_nir_lower_b2b);
+ if (zs)
+ NIR_PASS(progress, s, bound_bo_access, zs);
+ if (can_shrink)
+ NIR_PASS(progress, s, nir_opt_shrink_vectors, false);
} while (progress);
do {
@@ -458,16 +1672,17 @@ optimize_nir(struct nir_shader *s)
static bool
lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
{
+ bool ms = data != NULL;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_deref)
return false;
- nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
- if (var != data)
+ nir_variable *var = nir_intrinsic_get_var(intr, 0);
+ if (!var->data.fb_fetch_output)
return false;
b->cursor = nir_after_instr(instr);
- nir_variable *fbfetch = nir_variable_clone(data, b->shader);
+ nir_variable *fbfetch = nir_variable_clone(var, b->shader);
/* If Dim is SubpassData, ... Image Format must be Unknown
* - SPIRV OpTypeImage specification
*/
@@ -475,18 +1690,20 @@ lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
fbfetch->data.mode = nir_var_uniform;
fbfetch->data.binding = ZINK_FBFETCH_BINDING;
- fbfetch->type = glsl_image_type(GLSL_SAMPLER_DIM_SUBPASS, false, GLSL_TYPE_FLOAT);
+ fbfetch->data.binding = ZINK_FBFETCH_BINDING;
+ fbfetch->data.sample = ms;
+ enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
+ fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
nir_shader_add_variable(b->shader, fbfetch);
- nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
- nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), nir_ssa_undef(b, 1, 32), nir_imm_int(b, 0));
- unsigned swiz[4] = {2, 1, 0, 3};
- nir_ssa_def *swizzle = nir_swizzle(b, load, swiz, 4);
- nir_ssa_def_rewrite_uses(&intr->dest.ssa, swizzle);
+ nir_def *deref = &nir_build_deref_var(b, fbfetch)->def;
+ nir_def *sample = ms ? nir_load_sample_id(b) : nir_undef(b, 1, 32);
+ nir_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
+ nir_def_rewrite_uses(&intr->def, load);
return true;
}
static bool
-lower_fbfetch(nir_shader *shader, nir_variable **fbfetch)
+lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
{
nir_foreach_shader_out_variable(var, shader) {
if (var->data.fb_fetch_output) {
@@ -497,71 +1714,375 @@ lower_fbfetch(nir_shader *shader, nir_variable **fbfetch)
assert(*fbfetch);
if (!*fbfetch)
return false;
- return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, *fbfetch);
+ return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
+}
+
+/*
+ * Add a check for out of bounds LOD for every texel fetch op
+ * It boils down to:
+ * - if (lod < query_levels(tex))
+ * - res = txf(tex)
+ * - else
+ * - res = (0, 0, 0, 1)
+ */
+static bool
+lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
+{
+ if (in->type != nir_instr_type_tex)
+ return false;
+ nir_tex_instr *txf = nir_instr_as_tex(in);
+ if (txf->op != nir_texop_txf)
+ return false;
+
+ b->cursor = nir_before_instr(in);
+ int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
+ assert(lod_idx >= 0);
+ nir_src lod_src = txf->src[lod_idx].src;
+ if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
+ return false;
+
+ nir_def *lod = lod_src.ssa;
+
+ int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
+ int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
+ nir_tex_instr *levels = nir_tex_instr_create(b->shader,
+ !!(offset_idx >= 0) + !!(handle_idx >= 0));
+ levels->op = nir_texop_query_levels;
+ levels->texture_index = txf->texture_index;
+ levels->dest_type = nir_type_int | lod->bit_size;
+ if (offset_idx >= 0) {
+ levels->src[0].src_type = nir_tex_src_texture_offset;
+ levels->src[0].src = nir_src_for_ssa(txf->src[offset_idx].src.ssa);
+ }
+ if (handle_idx >= 0) {
+ levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
+ levels->src[!!(offset_idx >= 0)].src = nir_src_for_ssa(txf->src[handle_idx].src.ssa);
+ }
+ nir_def_init(&levels->instr, &levels->def,
+ nir_tex_instr_dest_size(levels), 32);
+ nir_builder_instr_insert(b, &levels->instr);
+
+ nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->def));
+ nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
+ nir_builder_instr_insert(b, &new_txf->instr);
+
+ nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
+ nir_const_value oob_values[4] = {0};
+ unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
+ oob_values[3] = (txf->dest_type & nir_type_float) ?
+ nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
+ nir_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
+
+ nir_pop_if(b, lod_oob_else);
+ nir_def *robust_txf = nir_if_phi(b, &new_txf->def, oob_val);
+
+ nir_def_rewrite_uses(&txf->def, robust_txf);
+ nir_instr_remove_v(in);
+ return true;
+}
+
+/* This pass is used to workaround the lack of out of bounds LOD robustness
+ * for texel fetch ops in VK_EXT_image_robustness.
+ */
+static bool
+lower_txf_lod_robustness(nir_shader *shader)
+{
+ return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
}
/* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
static bool
check_psiz(struct nir_shader *s)
{
+ bool have_psiz = false;
nir_foreach_shader_out_variable(var, s) {
if (var->data.location == VARYING_SLOT_PSIZ) {
/* genuine PSIZ outputs will have this set */
- return !!var->data.explicit_location;
+ have_psiz |= !!var->data.explicit_location;
}
}
- return false;
+ return have_psiz;
+}
+
+static nir_variable *
+find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz, nir_variable_mode mode)
+{
+ assert((int)location >= 0);
+
+ nir_foreach_variable_with_modes(var, nir, mode) {
+ if (var->data.location == location && (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)) {
+ unsigned num_components = glsl_get_vector_elements(var->type);
+ if (glsl_type_is_64bit(glsl_without_array(var->type)))
+ num_components *= 2;
+ if (is_clipcull_dist(var->data.location))
+ num_components = glsl_get_aoa_size(var->type);
+ if (var->data.location_frac <= location_frac &&
+ var->data.location_frac + num_components > location_frac)
+ return var;
+ }
+ }
+ return NULL;
+}
+
+static bool
+is_inlined(const bool *inlined, const nir_xfb_output_info *output)
+{
+ unsigned num_components = util_bitcount(output->component_mask);
+ for (unsigned i = 0; i < num_components; i++)
+ if (!inlined[output->component_offset + i])
+ return false;
+ return true;
}
static void
-update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info,
- uint64_t outputs_written, bool have_psiz)
-{
- uint8_t reverse_map[64] = {0};
- unsigned slot = 0;
- /* semi-copied from iris */
- while (outputs_written) {
- int bit = u_bit_scan64(&outputs_written);
- /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
- if (bit == VARYING_SLOT_PSIZ && !have_psiz)
- continue;
- reverse_map[slot++] = bit;
+update_psiz_location(nir_shader *nir, nir_variable *psiz)
+{
+ uint32_t last_output = util_last_bit64(nir->info.outputs_written);
+ if (last_output < VARYING_SLOT_VAR0)
+ last_output = VARYING_SLOT_VAR0;
+ else
+ last_output++;
+ /* this should get fixed up by slot remapping */
+ psiz->data.location = last_output;
+}
+
+static const struct glsl_type *
+clamp_slot_type(const struct glsl_type *type, unsigned slot)
+{
+ /* could be dvec/dmat/mat: each member is the same */
+ const struct glsl_type *plain = glsl_without_array_or_matrix(type);
+ /* determine size of each member type */
+ unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
+ /* normalize slot idx to current type's size */
+ slot %= slot_count;
+ unsigned slot_components = glsl_get_components(plain);
+ if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
+ slot_components *= 2;
+ /* create a vec4 mask of the selected slot's components out of all the components */
+ uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
+ /* return a vecN of the selected components */
+ slot_components = util_bitcount(mask);
+ return glsl_vec_type(slot_components);
+}
+
+static const struct glsl_type *
+unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
+{
+ const struct glsl_type *type = slot_type;
+ unsigned slot_count = 0;
+ unsigned cur_slot = 0;
+ /* iterate over all the members in the struct, stopping once the slot idx is reached */
+ for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
+ /* use array type for slot counting but return array member type for unroll */
+ const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
+ type = glsl_without_array(arraytype);
+ slot_count = glsl_count_vec4_slots(arraytype, false, false);
}
+ *slot_idx -= (cur_slot - slot_count);
+ if (!glsl_type_is_struct_or_ifc(type))
+ /* this is a fully unrolled struct: find the number of vec components to output */
+ type = clamp_slot_type(type, *slot_idx);
+ return type;
+}
+
+static unsigned
+get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
+{
+ assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
+ const struct glsl_type *orig_type = var->type;
+ const struct glsl_type *type = glsl_without_array(var->type);
+ unsigned slot_idx = slot - so_slot;
+ if (type != orig_type)
+ slot_idx %= glsl_count_vec4_slots(type, false, false);
+ /* need to find the vec4 that's being exported by this slot */
+ while (glsl_type_is_struct_or_ifc(type))
+ type = unroll_struct_type(type, &slot_idx);
+
+ /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
+ unsigned num_components = glsl_get_components(glsl_without_array(type));
+ /* special handling: clip/cull distance are arrays with vector semantics */
+ if (is_clipcull_dist(var->data.location)) {
+ num_components = glsl_array_size(type);
+ if (slot_idx)
+ /* this is the second vec4 */
+ num_components %= 4;
+ else
+ /* this is the first vec4 */
+ num_components = MIN2(num_components, 4);
+ }
+ assert(num_components);
+ /* gallium handles xfb in terms of 32bit units */
+ if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
+ num_components *= 2;
+ return num_components;
+}
+
+static unsigned
+get_var_slot_count(nir_shader *nir, nir_variable *var)
+{
+ assert(var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out);
+ const struct glsl_type *type = var->type;
+ if (nir_is_arrayed_io(var, nir->info.stage))
+ type = glsl_get_array_element(type);
+ unsigned slot_count = 0;
+ if ((nir->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in && var->data.location >= VERT_ATTRIB_GENERIC0) ||
+ var->data.location >= VARYING_SLOT_VAR0)
+ slot_count = glsl_count_vec4_slots(type, false, false);
+ else if (glsl_type_is_array(type))
+ slot_count = DIV_ROUND_UP(glsl_get_aoa_size(type), 4);
+ else
+ slot_count = 1;
+ return slot_count;
+}
- nir_foreach_shader_out_variable(var, zs->nir)
- var->data.explicit_xfb_buffer = 0;
- bool inlined[64] = {0};
- for (unsigned i = 0; i < so_info->num_outputs; i++) {
- const struct pipe_stream_output *output = &so_info->output[i];
- unsigned slot = reverse_map[output->register_index];
+static const nir_xfb_output_info *
+find_packed_output(const nir_xfb_info *xfb_info, unsigned slot)
+{
+ for (unsigned i = 0; i < xfb_info->output_count; i++) {
+ const nir_xfb_output_info *packed_output = &xfb_info->outputs[i];
+ if (packed_output->location == slot)
+ return packed_output;
+ }
+ return NULL;
+}
+
+static void
+update_so_info(struct zink_shader *zs, nir_shader *nir, uint64_t outputs_written, bool have_psiz)
+{
+ bool inlined[VARYING_SLOT_MAX][4] = {0};
+ uint64_t packed = 0;
+ uint8_t packed_components[VARYING_SLOT_MAX] = {0};
+ uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
+ uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
+ uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
+ for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
+ const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
+ unsigned xfb_components = util_bitcount(output->component_mask);
/* always set stride to be used during draw */
- zs->streamout.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
- if ((zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) &&
- !output->start_component) {
+ zs->sinfo.stride[output->buffer] = nir->xfb_info->buffers[output->buffer].stride;
+ if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
+ for (unsigned c = 0; !is_inlined(inlined[output->location], output) && c < xfb_components; c++) {
+ unsigned slot = output->location;
+ if (inlined[slot][output->component_offset + c])
+ continue;
+ nir_variable *var = NULL;
+ while (!var && slot < VARYING_SLOT_TESS_MAX)
+ var = find_var_with_location_frac(nir, slot--, output->component_offset + c, have_psiz, nir_var_shader_out);
+ slot = output->location;
+ unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
+ if (!var || var->data.location > slot || var->data.location + slot_count <= slot) {
+ /* if no variable is found for the xfb output, no output exists */
+ inlined[slot][c + output->component_offset] = true;
+ continue;
+ }
+ if (var->data.explicit_xfb_buffer) {
+ /* handle dvec3 where gallium splits streamout over 2 registers */
+ for (unsigned j = 0; j < xfb_components; j++)
+ inlined[slot][c + output->component_offset + j] = true;
+ }
+ if (is_inlined(inlined[slot], output))
+ continue;
+ assert(!glsl_type_is_array(var->type) || is_clipcull_dist(var->data.location));
+ assert(!glsl_type_is_struct_or_ifc(var->type));
+ unsigned num_components = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : glsl_get_vector_elements(var->type);
+ if (glsl_type_is_64bit(glsl_without_array(var->type)))
+ num_components *= 2;
+ /* if this is the entire variable, try to blast it out during the initial declaration
+ * structs must be handled later to ensure accurate analysis
+ */
+ if ((num_components == xfb_components ||
+ num_components < xfb_components ||
+ (num_components > xfb_components && xfb_components == 4))) {
+ var->data.explicit_xfb_buffer = 1;
+ var->data.xfb.buffer = output->buffer;
+ var->data.xfb.stride = zs->sinfo.stride[output->buffer];
+ var->data.offset = (output->offset + c * sizeof(uint32_t));
+ var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
+ for (unsigned j = 0; j < MIN2(num_components, xfb_components); j++)
+ inlined[slot][c + output->component_offset + j] = true;
+ } else {
+ /* otherwise store some metadata for later */
+ packed |= BITFIELD64_BIT(slot);
+ packed_components[slot] += xfb_components;
+ packed_streams[slot] |= BITFIELD_BIT(nir->xfb_info->buffer_to_stream[output->buffer]);
+ packed_buffers[slot] |= BITFIELD_BIT(output->buffer);
+ for (unsigned j = 0; j < xfb_components; j++)
+ packed_offsets[output->location][j + output->component_offset + c] = output->offset + j * sizeof(uint32_t);
+ }
+ }
+ }
+ }
+
+ /* if this was flagged as a packed output before, and if all the components are
+ * being output with the same stream on the same buffer with increasing offsets, this entire variable
+ * can be consolidated into a single output to conserve locations
+ */
+ for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
+ const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
+ unsigned slot = output->location;
+ if (is_inlined(inlined[slot], output))
+ continue;
+ if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
nir_variable *var = NULL;
while (!var)
- var = nir_find_variable_with_location(zs->nir, nir_var_shader_out, slot--);
- slot++;
- if (inlined[slot])
- continue;
- assert(var && var->data.location == slot);
- /* if this is the entire variable, try to blast it out during the initial declaration */
- if (glsl_get_components(var->type) == output->num_components) {
- var->data.explicit_xfb_buffer = 1;
- var->data.xfb.buffer = output->output_buffer;
- var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
- var->data.offset = output->dst_offset * 4;
- var->data.stream = output->stream;
- inlined[slot] = true;
+ var = find_var_with_location_frac(nir, slot--, output->component_offset, have_psiz, nir_var_shader_out);
+ slot = output->location;
+ unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
+ if (!var || var->data.location > slot || var->data.location + slot_count <= slot)
continue;
+ /* this is a lowered 64bit variable that can't be exported due to packing */
+ if (var->data.is_xfb)
+ goto out;
+
+ unsigned num_slots = is_clipcull_dist(var->data.location) ?
+ glsl_array_size(var->type) / 4 :
+ glsl_count_vec4_slots(var->type, false, false);
+ /* for each variable, iterate over all the variable's slots and inline the outputs */
+ for (unsigned j = 0; j < num_slots; j++) {
+ slot = var->data.location + j;
+ const nir_xfb_output_info *packed_output = find_packed_output(nir->xfb_info, slot);
+ if (!packed_output)
+ goto out;
+
+ /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
+ if (!(packed & BITFIELD64_BIT(slot)) ||
+ util_bitcount(packed_streams[slot]) != 1 ||
+ util_bitcount(packed_buffers[slot]) != 1)
+ goto out;
+
+ /* if all the components the variable exports to this slot aren't captured, skip consolidation */
+ unsigned num_components = get_slot_components(var, slot, var->data.location);
+ if (num_components != packed_components[slot])
+ goto out;
+
+ /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
+ uint32_t prev_offset = packed_offsets[packed_output->location][0];
+ for (unsigned k = 1; k < num_components; k++) {
+ /* if the offsets are not incrementing as expected, skip consolidation */
+ if (packed_offsets[packed_output->location][k] != prev_offset + sizeof(uint32_t))
+ goto out;
+ prev_offset = packed_offsets[packed_output->location][k + packed_output->component_offset];
+ }
}
+ /* this output can be consolidated: blast out all the data inlined */
+ var->data.explicit_xfb_buffer = 1;
+ var->data.xfb.buffer = output->buffer;
+ var->data.xfb.stride = zs->sinfo.stride[output->buffer];
+ var->data.offset = output->offset;
+ var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
+ /* mark all slot components inlined to skip subsequent loop iterations */
+ for (unsigned j = 0; j < num_slots; j++) {
+ slot = var->data.location + j;
+ for (unsigned k = 0; k < packed_components[slot]; k++)
+ inlined[slot][k] = true;
+ packed &= ~BITFIELD64_BIT(slot);
+ }
+ continue;
}
- zs->streamout.so_info.output[zs->streamout.so_info.num_outputs] = *output;
- /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
- zs->streamout.so_info_slots[zs->streamout.so_info.num_outputs++] = reverse_map[output->register_index];
+out:
+ unreachable("xfb should be inlined by now!");
}
- zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs;
}
struct decompose_state {
@@ -585,7 +2106,7 @@ lower_attrib(nir_builder *b, nir_instr *instr, void *data)
return false;
unsigned num_components = glsl_get_vector_elements(split[0]->type);
b->cursor = nir_after_instr(instr);
- nir_ssa_def *loads[4];
+ nir_def *loads[4];
for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
if (state->needs_w) {
@@ -593,8 +2114,8 @@ lower_attrib(nir_builder *b, nir_instr *instr, void *data)
loads[3] = nir_channel(b, loads[0], 3);
loads[0] = nir_channel(b, loads[0], 0);
}
- nir_ssa_def *new_load = nir_vec(b, loads, num_components);
- nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
+ nir_def *new_load = nir_vec(b, loads, num_components);
+ nir_def_rewrite_uses(&intr->def, new_load);
nir_instr_remove_v(instr);
return true;
}
@@ -633,7 +2154,489 @@ decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decompose
}
nir_fixup_deref_modes(nir);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
- optimize_nir(nir);
+ optimize_nir(nir, NULL, true);
+ return true;
+}
+
+static bool
+rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+ struct zink_screen *screen = data;
+ const bool has_int64 = screen->info.feats.features.shaderInt64;
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ b->cursor = nir_before_instr(instr);
+ switch (intr->intrinsic) {
+ case nir_intrinsic_ssbo_atomic:
+ case nir_intrinsic_ssbo_atomic_swap: {
+ /* convert offset to uintN_t[idx] */
+ nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, intr->def.bit_size / 8);
+ nir_src_rewrite(&intr->src[1], offset);
+ return true;
+ }
+ case nir_intrinsic_load_ssbo:
+ case nir_intrinsic_load_ubo: {
+ /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
+ bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
+ nir_src_is_const(intr->src[0]) &&
+ nir_src_as_uint(intr->src[0]) == 0 &&
+ intr->def.bit_size == 64 &&
+ nir_intrinsic_align_offset(intr) % 8 != 0;
+ force_2x32 |= intr->def.bit_size == 64 && !has_int64;
+ nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
+ nir_src_rewrite(&intr->src[1], offset);
+ /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
+ if (force_2x32) {
+ /* this is always scalarized */
+ assert(intr->def.num_components == 1);
+ /* rewrite as 2x32 */
+ nir_def *load[2];
+ for (unsigned i = 0; i < 2; i++) {
+ if (intr->intrinsic == nir_intrinsic_load_ssbo)
+ load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
+ else
+ load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
+ nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
+ }
+ /* cast back to 64bit */
+ nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
+ nir_def_rewrite_uses(&intr->def, casted);
+ nir_instr_remove(instr);
+ }
+ return true;
+ }
+ case nir_intrinsic_load_scratch:
+ case nir_intrinsic_load_shared: {
+ b->cursor = nir_before_instr(instr);
+ bool force_2x32 = intr->def.bit_size == 64 && !has_int64;
+ nir_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
+ nir_src_rewrite(&intr->src[0], offset);
+ /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
+ if (force_2x32) {
+ /* this is always scalarized */
+ assert(intr->def.num_components == 1);
+ /* rewrite as 2x32 */
+ nir_def *load[2];
+ for (unsigned i = 0; i < 2; i++)
+ load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
+ /* cast back to 64bit */
+ nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
+ nir_def_rewrite_uses(&intr->def, casted);
+ nir_instr_remove(instr);
+ return true;
+ }
+ break;
+ }
+ case nir_intrinsic_store_ssbo: {
+ b->cursor = nir_before_instr(instr);
+ bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
+ nir_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
+ nir_src_rewrite(&intr->src[2], offset);
+ /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
+ if (force_2x32) {
+ /* this is always scalarized */
+ assert(intr->src[0].ssa->num_components == 1);
+ nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
+ for (unsigned i = 0; i < 2; i++)
+ nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
+ nir_instr_remove(instr);
+ }
+ return true;
+ }
+ case nir_intrinsic_store_scratch:
+ case nir_intrinsic_store_shared: {
+ b->cursor = nir_before_instr(instr);
+ bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
+ nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
+ nir_src_rewrite(&intr->src[1], offset);
+ /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
+ if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
+ /* this is always scalarized */
+ assert(intr->src[0].ssa->num_components == 1);
+ nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
+ for (unsigned i = 0; i < 2; i++)
+ nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
+ nir_instr_remove(instr);
+ }
+ return true;
+ }
+ default:
+ break;
+ }
+ return false;
+}
+
+static bool
+rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
+{
+ return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
+}
+
+static nir_variable *
+get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
+{
+ nir_variable *var, **ptr;
+ unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
+
+ if (ssbo)
+ ptr = &bo->ssbo[bit_size >> 4];
+ else {
+ if (!idx) {
+ ptr = &bo->uniforms[bit_size >> 4];
+ } else
+ ptr = &bo->ubo[bit_size >> 4];
+ }
+ var = *ptr;
+ if (!var) {
+ if (ssbo)
+ var = bo->ssbo[32 >> 4];
+ else {
+ if (!idx)
+ var = bo->uniforms[32 >> 4];
+ else
+ var = bo->ubo[32 >> 4];
+ }
+ var = nir_variable_clone(var, shader);
+ if (ssbo)
+ var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
+ else
+ var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
+ *ptr = var;
+ nir_shader_add_variable(shader, var);
+
+ struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
+ fields[0].name = ralloc_strdup(shader, "base");
+ fields[1].name = ralloc_strdup(shader, "unsized");
+ unsigned array_size = glsl_get_length(var->type);
+ const struct glsl_type *bare_type = glsl_without_array(var->type);
+ const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
+ unsigned length = glsl_get_length(array_type);
+ const struct glsl_type *type;
+ const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
+ if (bit_size > 32) {
+ assert(bit_size == 64);
+ type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
+ } else {
+ type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
+ }
+ fields[0].type = type;
+ fields[1].type = unsized;
+ var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
+ var->data.driver_location = idx;
+ }
+ return var;
+}
+
+static void
+rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
+{
+ nir_intrinsic_op op;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic == nir_intrinsic_ssbo_atomic)
+ op = nir_intrinsic_deref_atomic;
+ else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
+ op = nir_intrinsic_deref_atomic_swap;
+ else
+ unreachable("unknown intrinsic");
+ nir_def *offset = intr->src[1].ssa;
+ nir_src *src = &intr->src[0];
+ nir_variable *var = get_bo_var(b->shader, bo, true, src,
+ intr->def.bit_size);
+ nir_deref_instr *deref_var = nir_build_deref_var(b, var);
+ nir_def *idx = src->ssa;
+ if (bo->first_ssbo)
+ idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
+ nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
+ nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
+
+ /* generate new atomic deref ops for every component */
+ nir_def *result[4];
+ unsigned num_components = intr->def.num_components;
+ for (unsigned i = 0; i < num_components; i++) {
+ nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
+ nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
+ nir_def_init(&new_instr->instr, &new_instr->def, 1,
+ intr->def.bit_size);
+ nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
+ new_instr->src[0] = nir_src_for_ssa(&deref_arr->def);
+ /* deref ops have no offset src, so copy the srcs after it */
+ for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++)
+ new_instr->src[j - 1] = nir_src_for_ssa(intr->src[j].ssa);
+ nir_builder_instr_insert(b, &new_instr->instr);
+
+ result[i] = &new_instr->def;
+ offset = nir_iadd_imm(b, offset, 1);
+ }
+
+ nir_def *load = nir_vec(b, result, num_components);
+ nir_def_rewrite_uses(&intr->def, load);
+ nir_instr_remove(instr);
+}
+
+static bool
+remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+ struct bo_vars *bo = data;
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ nir_variable *var = NULL;
+ nir_def *offset = NULL;
+ bool is_load = true;
+ b->cursor = nir_before_instr(instr);
+ nir_src *src;
+ bool ssbo = true;
+ switch (intr->intrinsic) {
+ case nir_intrinsic_ssbo_atomic:
+ case nir_intrinsic_ssbo_atomic_swap:
+ rewrite_atomic_ssbo_instr(b, instr, bo);
+ return true;
+ case nir_intrinsic_store_ssbo:
+ src = &intr->src[1];
+ var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
+ offset = intr->src[2].ssa;
+ is_load = false;
+ break;
+ case nir_intrinsic_load_ssbo:
+ src = &intr->src[0];
+ var = get_bo_var(b->shader, bo, true, src, intr->def.bit_size);
+ offset = intr->src[1].ssa;
+ break;
+ case nir_intrinsic_load_ubo:
+ src = &intr->src[0];
+ var = get_bo_var(b->shader, bo, false, src, intr->def.bit_size);
+ offset = intr->src[1].ssa;
+ ssbo = false;
+ break;
+ default:
+ return false;
+ }
+ assert(var);
+ assert(offset);
+ nir_deref_instr *deref_var = nir_build_deref_var(b, var);
+ nir_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
+ if (!ssbo && bo->first_ubo && var->data.driver_location)
+ idx = nir_iadd_imm(b, idx, -bo->first_ubo);
+ else if (ssbo && bo->first_ssbo)
+ idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
+ nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var,
+ nir_i2iN(b, idx, deref_var->def.bit_size));
+ nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
+ assert(intr->num_components <= 2);
+ if (is_load) {
+ nir_def *result[2];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
+ nir_i2iN(b, offset, deref_struct->def.bit_size));
+ result[i] = nir_load_deref(b, deref_arr);
+ if (intr->intrinsic == nir_intrinsic_load_ssbo)
+ nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
+ offset = nir_iadd_imm(b, offset, 1);
+ }
+ nir_def *load = nir_vec(b, result, intr->num_components);
+ nir_def_rewrite_uses(&intr->def, load);
+ } else {
+ nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
+ nir_i2iN(b, offset, deref_struct->def.bit_size));
+ nir_build_store_deref(b, &deref_arr->def, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
+ }
+ nir_instr_remove(instr);
+ return true;
+}
+
+static bool
+remove_bo_access(nir_shader *shader, struct zink_shader *zs)
+{
+ struct bo_vars bo = get_bo_vars(zs, shader);
+ return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
+}
+
+static bool
+filter_io_instr(nir_intrinsic_instr *intr, bool *is_load, bool *is_input, bool *is_interp)
+{
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_interpolated_input:
+ *is_interp = true;
+ FALLTHROUGH;
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_per_vertex_input:
+ *is_input = true;
+ FALLTHROUGH;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_per_vertex_output:
+ case nir_intrinsic_load_per_primitive_output:
+ *is_load = true;
+ FALLTHROUGH;
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_primitive_output:
+ case nir_intrinsic_store_per_vertex_output:
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+static bool
+io_instr_is_arrayed(nir_intrinsic_instr *intr)
+{
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_per_vertex_input:
+ case nir_intrinsic_load_per_vertex_output:
+ case nir_intrinsic_load_per_primitive_output:
+ case nir_intrinsic_store_per_primitive_output:
+ case nir_intrinsic_store_per_vertex_output:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static bool
+find_var_deref(nir_shader *nir, nir_variable *var)
+{
+ nir_foreach_function_impl(impl, nir) {
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_deref)
+ continue;
+ nir_deref_instr *deref = nir_instr_as_deref(instr);
+ if (deref->deref_type == nir_deref_type_var && deref->var == var)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static bool
+find_var_io(nir_shader *nir, nir_variable *var)
+{
+ nir_foreach_function(function, nir) {
+ if (!function->impl)
+ continue;
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+ continue;
+ if (var->data.mode == nir_var_shader_in && !is_input)
+ continue;
+ if (var->data.mode == nir_var_shader_out && is_input)
+ continue;
+ unsigned slot_offset = 0;
+ if (var->data.fb_fetch_output && !is_load)
+ continue;
+ if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && !is_input && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
+ continue;
+ nir_src *src_offset = nir_get_io_offset_src(intr);
+ if (src_offset && nir_src_is_const(*src_offset))
+ slot_offset = nir_src_as_uint(*src_offset);
+ unsigned slot_count = get_var_slot_count(nir, var);
+ if (var->data.mode & (nir_var_shader_out | nir_var_shader_in) &&
+ var->data.fb_fetch_output == nir_intrinsic_io_semantics(intr).fb_fetch_output &&
+ var->data.location <= nir_intrinsic_io_semantics(intr).location + slot_offset &&
+ var->data.location + slot_count > nir_intrinsic_io_semantics(intr).location + slot_offset)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+struct clamp_layer_output_state {
+ nir_variable *original;
+ nir_variable *clamped;
+};
+
+static void
+clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
+{
+ nir_def *is_layered = nir_load_push_constant_zink(b, 1, 32,
+ nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED));
+ nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
+ nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
+ nir_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
+ nir_load_deref(b, original_deref),
+ nir_imm_int(b, 0));
+ nir_store_deref(b, clamped_deref, layer, 0);
+}
+
+static bool
+clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+ struct clamp_layer_output_state *state = data;
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
+ intr->intrinsic != nir_intrinsic_emit_vertex)
+ return false;
+ b->cursor = nir_before_instr(instr);
+ clamp_layer_output_emit(b, state);
+ return true;
+ }
+ default: return false;
+ }
+}
+
+static bool
+clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
+{
+ switch (vs->info.stage) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_GEOMETRY:
+ case MESA_SHADER_TESS_EVAL:
+ break;
+ default:
+ unreachable("invalid last vertex stage!");
+ }
+ struct clamp_layer_output_state state = {0};
+ state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
+ if (!state.original || (!find_var_deref(vs, state.original) && !find_var_io(vs, state.original)))
+ return false;
+ state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
+ state.clamped->data.location = VARYING_SLOT_LAYER;
+ nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
+ if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
+ state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
+ state.original->data.driver_location = (*next_location)++;
+ if (fs_var) {
+ fs_var->data.location = state.original->data.location;
+ fs_var->data.driver_location = state.original->data.driver_location;
+ }
+ } else {
+ if (state.original->data.explicit_xfb_buffer) {
+ /* Will xfb the clamped output but still better than nothing */
+ state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
+ state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
+ state.clamped->data.xfb.stride = state.original->data.xfb.stride;
+ state.clamped->data.offset = state.original->data.offset;
+ state.clamped->data.stream = state.original->data.stream;
+ }
+ state.original->data.mode = nir_var_shader_temp;
+ nir_fixup_deref_modes(vs);
+ }
+ if (vs->info.stage == MESA_SHADER_GEOMETRY) {
+ nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
+ } else {
+ nir_builder b;
+ nir_function_impl *impl = nir_shader_get_entrypoint(vs);
+ b = nir_builder_at(nir_after_impl(impl));
+ assert(impl->end_block->predecessors->entries == 1);
+ clamp_layer_output_emit(&b, &state);
+ nir_metadata_preserve(impl, nir_metadata_dominance);
+ }
+ optimize_nir(vs, NULL, true);
+ NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
return true;
}
@@ -641,9 +2644,9 @@ static void
assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
{
unsigned slot = var->data.location;
- switch (var->data.location) {
+ switch (slot) {
+ case -1:
case VARYING_SLOT_POS:
- case VARYING_SLOT_PNTC:
case VARYING_SLOT_PSIZ:
case VARYING_SLOT_LAYER:
case VARYING_SLOT_PRIMITIVE_ID:
@@ -659,21 +2662,22 @@ assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reser
default:
if (var->data.patch) {
- assert(var->data.location >= VARYING_SLOT_PATCH0);
- slot = var->data.location - VARYING_SLOT_PATCH0;
- } else if (var->data.location >= VARYING_SLOT_VAR0 &&
- var->data.mode == nir_var_shader_in &&
- stage == MESA_SHADER_TESS_EVAL) {
- slot = var->data.location - VARYING_SLOT_VAR0;
- } else {
- if (slot_map[var->data.location] == 0xff) {
- assert(*reserved < MAX_VARYING);
- slot_map[var->data.location] = *reserved;
- *reserved += glsl_count_vec4_slots(var->type, false, false);
- }
- slot = slot_map[var->data.location];
- assert(slot < MAX_VARYING);
+ assert(slot >= VARYING_SLOT_PATCH0);
+ slot -= VARYING_SLOT_PATCH0;
}
+ if (slot_map[slot] == 0xff) {
+ assert(*reserved < MAX_VARYING);
+ unsigned num_slots;
+ if (nir_is_arrayed_io(var, stage))
+ num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
+ else
+ num_slots = glsl_count_vec4_slots(var->type, false, false);
+ assert(*reserved + num_slots <= MAX_VARYING);
+ for (unsigned i = 0; i < num_slots; i++)
+ slot_map[slot + i] = (*reserved)++;
+ }
+ slot = slot_map[slot];
+ assert(slot < MAX_VARYING);
var->data.driver_location = slot;
}
}
@@ -690,9 +2694,9 @@ is_texcoord(gl_shader_stage stage, const nir_variable *var)
static bool
assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
{
- switch (var->data.location) {
+ unsigned slot = var->data.location;
+ switch (slot) {
case VARYING_SLOT_POS:
- case VARYING_SLOT_PNTC:
case VARYING_SLOT_PSIZ:
case VARYING_SLOT_LAYER:
case VARYING_SLOT_PRIMITIVE_ID:
@@ -707,87 +2711,1298 @@ assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reser
break;
default:
if (var->data.patch) {
- assert(var->data.location >= VARYING_SLOT_PATCH0);
- var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
- } else if (var->data.location >= VARYING_SLOT_VAR0 &&
- stage == MESA_SHADER_TESS_CTRL &&
- var->data.mode == nir_var_shader_out)
- var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
- else {
- if (slot_map[var->data.location] == (unsigned char)-1) {
- if (!is_texcoord(stage, var))
- /* dead io */
- return false;
- /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE */
- slot_map[var->data.location] = (*reserved)++;
+ assert(slot >= VARYING_SLOT_PATCH0);
+ slot -= VARYING_SLOT_PATCH0;
+ }
+ if (slot_map[slot] == (unsigned char)-1) {
+ /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
+ * so keep for now and eliminate later
+ */
+ if (is_texcoord(stage, var)) {
+ var->data.driver_location = -1;
+ return true;
}
- var->data.driver_location = slot_map[var->data.location];
+ /* patch variables may be read in the workgroup */
+ if (stage != MESA_SHADER_TESS_CTRL)
+ /* dead io */
+ return false;
+ unsigned num_slots;
+ if (nir_is_arrayed_io(var, stage))
+ num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
+ else
+ num_slots = glsl_count_vec4_slots(var->type, false, false);
+ assert(*reserved + num_slots <= MAX_VARYING);
+ for (unsigned i = 0; i < num_slots; i++)
+ slot_map[slot + i] = (*reserved)++;
}
+ var->data.driver_location = slot_map[slot];
}
return true;
}
static bool
-rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
+rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
{
nir_variable *var = data;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (intr->intrinsic != nir_intrinsic_load_deref)
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+ return false;
+ if (!is_load)
return false;
- nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
- if (deref_var != var)
+ unsigned location = nir_intrinsic_io_semantics(intr).location;
+ if (location != var->data.location)
return false;
- nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
- nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
+ b->cursor = nir_before_instr(instr);
+ nir_def *zero = nir_imm_zero(b, intr->def.num_components,
+ intr->def.bit_size);
+ if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
+ switch (location) {
+ case VARYING_SLOT_COL0:
+ case VARYING_SLOT_COL1:
+ case VARYING_SLOT_BFC0:
+ case VARYING_SLOT_BFC1:
+ /* default color is 0,0,0,1 */
+ if (intr->def.num_components == 4)
+ zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
+ break;
+ default:
+ break;
+ }
+ }
+ nir_def_rewrite_uses(&intr->def, zero);
+ nir_instr_remove(instr);
return true;
}
+
+
+static bool
+delete_psiz_store_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ switch (intr->intrinsic) {
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_primitive_output:
+ case nir_intrinsic_store_per_vertex_output:
+ break;
+ default:
+ return false;
+ }
+ if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ)
+ return false;
+ if (!data || (nir_src_is_const(intr->src[0]) && fabs(nir_src_as_float(intr->src[0]) - 1.0) < FLT_EPSILON)) {
+ nir_instr_remove(&intr->instr);
+ return true;
+ }
+ return false;
+}
+
+static bool
+delete_psiz_store(nir_shader *nir, bool one)
+{
+ bool progress = nir_shader_intrinsics_pass(nir, delete_psiz_store_instr,
+ nir_metadata_dominance, one ? nir : NULL);
+ if (progress)
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+ return progress;
+}
+
+struct write_components {
+ unsigned slot;
+ uint32_t component_mask;
+};
+
+static bool
+fill_zero_reads(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ struct write_components *wc = data;
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+ return false;
+ if (!is_input)
+ return false;
+ nir_io_semantics s = nir_intrinsic_io_semantics(intr);
+ if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
+ return false;
+ unsigned num_components = intr->num_components;
+ unsigned c = nir_intrinsic_component(intr);
+ if (intr->def.bit_size == 64)
+ num_components *= 2;
+ nir_src *src_offset = nir_get_io_offset_src(intr);
+ if (!nir_src_is_const(*src_offset))
+ return false;
+ unsigned slot_offset = nir_src_as_uint(*src_offset);
+ if (s.location + slot_offset != wc->slot)
+ return false;
+ uint32_t readmask = BITFIELD_MASK(intr->num_components) << c;
+ if (intr->def.bit_size == 64)
+ readmask |= readmask << (intr->num_components + c);
+ /* handle dvec3/dvec4 */
+ if (num_components + c > 4)
+ readmask >>= 4;
+ if ((wc->component_mask & readmask) == readmask)
+ return false;
+ uint32_t rewrite_mask = readmask & ~wc->component_mask;
+ if (!rewrite_mask)
+ return false;
+ b->cursor = nir_after_instr(&intr->instr);
+ nir_def *zero = nir_imm_zero(b, intr->def.num_components, intr->def.bit_size);
+ if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
+ switch (wc->slot) {
+ case VARYING_SLOT_COL0:
+ case VARYING_SLOT_COL1:
+ case VARYING_SLOT_BFC0:
+ case VARYING_SLOT_BFC1:
+ /* default color is 0,0,0,1 */
+ if (intr->def.num_components == 4)
+ zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
+ break;
+ default:
+ break;
+ }
+ }
+ rewrite_mask >>= c;
+ nir_def *dest = &intr->def;
+ u_foreach_bit(component, rewrite_mask)
+ dest = nir_vector_insert_imm(b, dest, nir_channel(b, zero, component), component);
+ nir_def_rewrite_uses_after(&intr->def, dest, dest->parent_instr);
+ return true;
+}
+
+static bool
+find_max_write_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ struct write_components *wc = data;
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+ return false;
+ if (is_input || is_load)
+ return false;
+ nir_io_semantics s = nir_intrinsic_io_semantics(intr);
+ if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
+ return false;
+ unsigned location = s.location;
+ unsigned c = nir_intrinsic_component(intr);
+ uint32_t wrmask = nir_intrinsic_write_mask(intr) << c;
+ if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) {
+ unsigned num_components = intr->num_components * 2;
+ nir_src *src_offset = nir_get_io_offset_src(intr);
+ if (nir_src_is_const(*src_offset)) {
+ if (location + nir_src_as_uint(*src_offset) != wc->slot && num_components + c < 4)
+ return false;
+ }
+ wrmask |= wrmask << intr->num_components;
+ /* handle dvec3/dvec4 */
+ if (num_components + c > 4)
+ wrmask >>= 4;
+ }
+ wc->component_mask |= wrmask;
+ return false;
+}
+
void
-zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
+zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
{
unsigned reserved = 0;
unsigned char slot_map[VARYING_SLOT_MAX];
memset(slot_map, -1, sizeof(slot_map));
bool do_fixup = false;
nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
+ nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
+ if (var) {
+ bool can_remove = false;
+ if (!nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) {
+ /* maintenance5 guarantees "A default size of 1.0 is used if PointSize is not written" */
+ if (screen->info.have_KHR_maintenance5 && !var->data.explicit_xfb_buffer && delete_psiz_store(producer, true))
+ can_remove = !(producer->info.outputs_written & VARYING_BIT_PSIZ);
+ else if (consumer->info.stage != MESA_SHADER_FRAGMENT)
+ can_remove = !var->data.explicit_location;
+ }
+ /* remove injected pointsize from all but the last vertex stage */
+ if (can_remove) {
+ var->data.mode = nir_var_shader_temp;
+ nir_fixup_deref_modes(producer);
+ delete_psiz_store(producer, false);
+ NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
+ optimize_nir(producer, NULL, true);
+ }
+ }
+ if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
+ producer->info.has_transform_feedback_varyings = false;
+ nir_foreach_shader_out_variable(var_out, producer)
+ var_out->data.explicit_xfb_buffer = false;
+ }
if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
/* never assign from tcs -> tes, always invert */
- nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
- assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
- nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
- if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
+ nir_foreach_variable_with_modes(var_in, consumer, nir_var_shader_in)
+ assign_producer_var_io(consumer->info.stage, var_in, &reserved, slot_map);
+ nir_foreach_variable_with_modes_safe(var_out, producer, nir_var_shader_out) {
+ if (!assign_consumer_var_io(producer->info.stage, var_out, &reserved, slot_map))
/* this is an output, nothing more needs to be done for it to be dropped */
do_fixup = true;
}
} else {
- nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
- assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
- nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
- if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
+ nir_foreach_variable_with_modes(var_out, producer, nir_var_shader_out)
+ assign_producer_var_io(producer->info.stage, var_out, &reserved, slot_map);
+ nir_foreach_variable_with_modes_safe(var_in, consumer, nir_var_shader_in) {
+ if (!assign_consumer_var_io(consumer->info.stage, var_in, &reserved, slot_map)) {
do_fixup = true;
- /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
- nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
+ /* input needs to be rewritten */
+ nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var_in);
}
}
+ if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
+ do_fixup |= clamp_layer_output(producer, consumer, &reserved);
+ }
+ nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
+ if (producer->info.io_lowered && consumer->info.io_lowered) {
+ u_foreach_bit64(slot, producer->info.outputs_written & BITFIELD64_RANGE(VARYING_SLOT_VAR0, 31)) {
+ struct write_components wc = {slot, 0};
+ nir_shader_intrinsics_pass(producer, find_max_write_components, nir_metadata_all, &wc);
+ assert(wc.component_mask);
+ if (wc.component_mask != BITFIELD_MASK(4))
+ do_fixup |= nir_shader_intrinsics_pass(consumer, fill_zero_reads, nir_metadata_dominance, &wc);
+ }
}
if (!do_fixup)
return;
nir_fixup_deref_modes(nir);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
- optimize_nir(nir);
+ optimize_nir(nir, NULL, true);
+}
+
+/* all types that hit this function contain something that is 64bit */
+static const struct glsl_type *
+rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
+{
+ if (glsl_type_is_array(type)) {
+ const struct glsl_type *child = glsl_get_array_element(type);
+ unsigned elements = glsl_array_size(type);
+ unsigned stride = glsl_get_explicit_stride(type);
+ return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
+ }
+ /* rewrite structs recursively */
+ if (glsl_type_is_struct_or_ifc(type)) {
+ unsigned nmembers = glsl_get_length(type);
+ struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
+ unsigned xfb_offset = 0;
+ for (unsigned i = 0; i < nmembers; i++) {
+ const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
+ fields[i] = *f;
+ xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
+ if (i < nmembers - 1 && xfb_offset % 8 &&
+ (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
+ (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
+ var->data.is_xfb = true;
+ }
+ fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
+ }
+ return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
+ }
+ if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
+ return type;
+ if (doubles_only && glsl_type_is_vector_or_scalar(type))
+ return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
+ enum glsl_base_type base_type;
+ switch (glsl_get_base_type(type)) {
+ case GLSL_TYPE_UINT64:
+ base_type = GLSL_TYPE_UINT;
+ break;
+ case GLSL_TYPE_INT64:
+ base_type = GLSL_TYPE_INT;
+ break;
+ case GLSL_TYPE_DOUBLE:
+ base_type = GLSL_TYPE_FLOAT;
+ break;
+ default:
+ unreachable("unknown 64-bit vertex attribute format!");
+ }
+ if (glsl_type_is_scalar(type))
+ return glsl_vector_type(base_type, 2);
+ unsigned num_components;
+ if (glsl_type_is_matrix(type)) {
+ /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
+ unsigned vec_components = glsl_get_vector_elements(type);
+ if (vec_components == 3)
+ vec_components = 4;
+ num_components = vec_components * 2 * glsl_get_matrix_columns(type);
+ } else {
+ num_components = glsl_get_vector_elements(type) * 2;
+ if (num_components <= 4)
+ return glsl_vector_type(base_type, num_components);
+ }
+ /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
+ struct glsl_struct_field fields[8] = {0};
+ unsigned remaining = num_components;
+ unsigned nfields = 0;
+ for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
+ assert(i < ARRAY_SIZE(fields));
+ fields[i].name = "";
+ fields[i].offset = i * 16;
+ fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
+ }
+ char buf[64];
+ snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
+ return glsl_struct_type(fields, nfields, buf, true);
}
-VkShaderModule
-zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key)
+static const struct glsl_type *
+deref_is_matrix(nir_deref_instr *deref)
{
- VkShaderModule mod = VK_NULL_HANDLE;
- void *streamout = NULL;
- nir_shader *nir = nir_shader_clone(NULL, base_nir);
+ if (glsl_type_is_matrix(deref->type))
+ return deref->type;
+ nir_deref_instr *parent = nir_deref_instr_parent(deref);
+ if (parent)
+ return deref_is_matrix(parent);
+ return NULL;
+}
+static bool
+lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var,
+ struct hash_table *derefs, struct set *deletes, bool doubles_only)
+{
+ bool func_progress = false;
+ nir_builder b = nir_builder_create(impl);
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ switch (instr->type) {
+ case nir_instr_type_deref: {
+ nir_deref_instr *deref = nir_instr_as_deref(instr);
+ if (!(deref->modes & var->data.mode))
+ continue;
+ if (nir_deref_instr_get_variable(deref) != var)
+ continue;
+
+ /* matrix types are special: store the original deref type for later use */
+ const struct glsl_type *matrix = deref_is_matrix(deref);
+ nir_deref_instr *parent = nir_deref_instr_parent(deref);
+ if (!matrix) {
+ /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
+ hash_table_foreach(derefs, he) {
+ /* propagate parent matrix type to row deref */
+ if (he->key == parent)
+ matrix = he->data;
+ }
+ }
+ if (matrix)
+ _mesa_hash_table_insert(derefs, deref, (void*)matrix);
+ if (deref->deref_type == nir_deref_type_var)
+ deref->type = var->type;
+ else
+ deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
+ }
+ break;
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_deref &&
+ intr->intrinsic != nir_intrinsic_load_deref)
+ break;
+ if (nir_intrinsic_get_var(intr, 0) != var)
+ break;
+ if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
+ (intr->intrinsic == nir_intrinsic_load_deref && intr->def.bit_size != 64))
+ break;
+ b.cursor = nir_before_instr(instr);
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ unsigned num_components = intr->num_components * 2;
+ nir_def *comp[NIR_MAX_VEC_COMPONENTS];
+ /* this is the stored matrix type from the deref */
+ struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
+ const struct glsl_type *matrix = he ? he->data : NULL;
+ if (doubles_only && !matrix)
+ break;
+ func_progress = true;
+ if (intr->intrinsic == nir_intrinsic_store_deref) {
+ /* first, unpack the src data to 32bit vec2 components */
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
+ comp[i * 2] = nir_channel(&b, ssa, 0);
+ comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
+ }
+ unsigned wrmask = nir_intrinsic_write_mask(intr);
+ unsigned mask = 0;
+ /* expand writemask for doubled components */
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ if (wrmask & BITFIELD_BIT(i))
+ mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
+ }
+ if (matrix) {
+ /* matrix types always come from array (row) derefs */
+ assert(deref->deref_type == nir_deref_type_array);
+ nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
+ /* let optimization clean up consts later */
+ nir_def *index = deref->arr.index.ssa;
+ /* this might be an indirect array index:
+ * - iterate over matrix columns
+ * - add if blocks for each column
+ * - perform the store in the block
+ */
+ for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
+ nir_push_if(&b, nir_ieq_imm(&b, index, idx));
+ unsigned vec_components = glsl_get_vector_elements(matrix);
+ /* always clamp dvec3 to 4 components */
+ if (vec_components == 3)
+ vec_components = 4;
+ unsigned start_component = idx * vec_components * 2;
+ /* struct member */
+ unsigned member = start_component / 4;
+ /* number of components remaining */
+ unsigned remaining = num_components;
+ for (unsigned i = 0; i < num_components; member++) {
+ if (!(mask & BITFIELD_BIT(i)))
+ continue;
+ assert(member < glsl_get_length(var_deref->type));
+ /* deref the rewritten struct to the appropriate vec4/vec2 */
+ nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
+ unsigned incr = MIN2(remaining, 4);
+ /* assemble the write component vec */
+ nir_def *val = nir_vec(&b, &comp[i], incr);
+ /* use the number of components being written as the writemask */
+ if (glsl_get_vector_elements(strct->type) > val->num_components)
+ val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
+ nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
+ remaining -= incr;
+ i += incr;
+ }
+ nir_pop_if(&b, NULL);
+ }
+ _mesa_set_add(deletes, &deref->instr);
+ } else if (num_components <= 4) {
+ /* simple store case: just write out the components */
+ nir_def *dest = nir_vec(&b, comp, num_components);
+ nir_store_deref(&b, deref, dest, mask);
+ } else {
+ /* writing > 4 components: access the struct and write to the appropriate vec4 members */
+ for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
+ if (!(mask & BITFIELD_MASK(4)))
+ continue;
+ nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
+ nir_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
+ if (glsl_get_vector_elements(strct->type) > dest->num_components)
+ dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
+ nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
+ mask >>= 4;
+ }
+ }
+ } else {
+ nir_def *dest = NULL;
+ if (matrix) {
+ /* matrix types always come from array (row) derefs */
+ assert(deref->deref_type == nir_deref_type_array);
+ nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
+ /* let optimization clean up consts later */
+ nir_def *index = deref->arr.index.ssa;
+ /* this might be an indirect array index:
+ * - iterate over matrix columns
+ * - add if blocks for each column
+ * - phi the loads using the array index
+ */
+ unsigned cols = glsl_get_matrix_columns(matrix);
+ nir_def *dests[4];
+ for (unsigned idx = 0; idx < cols; idx++) {
+ /* don't add an if for the final row: this will be handled in the else */
+ if (idx < cols - 1)
+ nir_push_if(&b, nir_ieq_imm(&b, index, idx));
+ unsigned vec_components = glsl_get_vector_elements(matrix);
+ /* always clamp dvec3 to 4 components */
+ if (vec_components == 3)
+ vec_components = 4;
+ unsigned start_component = idx * vec_components * 2;
+ /* struct member */
+ unsigned member = start_component / 4;
+ /* number of components remaining */
+ unsigned remaining = num_components;
+ /* component index */
+ unsigned comp_idx = 0;
+ for (unsigned i = 0; i < num_components; member++) {
+ assert(member < glsl_get_length(var_deref->type));
+ nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
+ nir_def *load = nir_load_deref(&b, strct);
+ unsigned incr = MIN2(remaining, 4);
+ /* repack the loads to 64bit */
+ for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
+ comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
+ remaining -= incr;
+ i += incr;
+ }
+ dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
+ if (idx < cols - 1)
+ nir_push_else(&b, NULL);
+ }
+ /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
+ for (unsigned idx = cols - 1; idx >= 1; idx--) {
+ nir_pop_if(&b, NULL);
+ dest = nir_if_phi(&b, dests[idx - 1], dest);
+ }
+ _mesa_set_add(deletes, &deref->instr);
+ } else if (num_components <= 4) {
+ /* simple load case */
+ nir_def *load = nir_load_deref(&b, deref);
+ /* pack 32bit loads into 64bit: this will automagically get optimized out later */
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
+ }
+ dest = nir_vec(&b, comp, intr->num_components);
+ } else {
+ /* writing > 4 components: access the struct and load the appropriate vec4 members */
+ for (unsigned i = 0; i < 2; i++, num_components -= 4) {
+ nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
+ nir_def *load = nir_load_deref(&b, strct);
+ comp[i * 2] = nir_pack_64_2x32(&b,
+ nir_trim_vector(&b, load, 2));
+ if (num_components > 2)
+ comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
+ }
+ dest = nir_vec(&b, comp, intr->num_components);
+ }
+ nir_def_rewrite_uses_after(&intr->def, dest, instr);
+ }
+ _mesa_set_add(deletes, instr);
+ break;
+ }
+ break;
+ default: break;
+ }
+ }
+ }
+ if (func_progress)
+ nir_metadata_preserve(impl, nir_metadata_none);
+ /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
+ set_foreach_remove(deletes, he)
+ nir_instr_remove((void*)he->key);
+ return func_progress;
+}
+
+static bool
+lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
+ struct set *deletes, bool doubles_only)
+{
+ if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
+ return false;
+ var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
+ /* once type is rewritten, rewrite all loads and stores */
+ nir_foreach_function_impl(impl, shader)
+ lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
+ return true;
+}
+
+/* rewrite all input/output variables using 32bit types and load/stores */
+static bool
+lower_64bit_vars(nir_shader *shader, bool doubles_only)
+{
+ bool progress = false;
+ struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ nir_foreach_function_impl(impl, shader) {
+ nir_foreach_function_temp_variable(var, impl) {
+ if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
+ continue;
+ var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
+ progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
+ }
+ }
+ ralloc_free(deletes);
+ ralloc_free(derefs);
+ if (progress) {
+ nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
+ nir_lower_phis_to_scalar(shader, false);
+ optimize_nir(shader, NULL, true);
+ }
+ return progress;
+}
+
+static void
+zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file)
+{
+ FILE *fp = fopen(file, "wb");
+ if (fp) {
+ fwrite(words, 1, size, fp);
+ fclose(fp);
+ fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file);
+ }
+}
+
+static VkShaderStageFlagBits
+zink_get_next_stage(gl_shader_stage stage)
+{
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
+ VK_SHADER_STAGE_GEOMETRY_BIT |
+ VK_SHADER_STAGE_FRAGMENT_BIT;
+ case MESA_SHADER_TESS_CTRL:
+ return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+ case MESA_SHADER_TESS_EVAL:
+ return VK_SHADER_STAGE_GEOMETRY_BIT |
+ VK_SHADER_STAGE_FRAGMENT_BIT;
+ case MESA_SHADER_GEOMETRY:
+ return VK_SHADER_STAGE_FRAGMENT_BIT;
+ case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_KERNEL:
+ return 0;
+ default:
+ unreachable("invalid shader stage");
+ }
+}
+
+struct zink_shader_object
+zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
+{
+ VkShaderModuleCreateInfo smci = {0};
+ VkShaderCreateInfoEXT sci = {0};
+
+ if (!spirv)
+ spirv = zs->spirv;
+
+ if (zink_debug & ZINK_DEBUG_SPIRV) {
+ char buf[256];
+ static int i;
+ snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
+ zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf);
+ }
+
+ sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
+ sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
+ sci.nextStage = zink_get_next_stage(zs->info.stage);
+ sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
+ sci.codeSize = spirv->num_words * sizeof(uint32_t);
+ sci.pCode = spirv->words;
+ sci.pName = "main";
+ VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
+ if (pg) {
+ sci.setLayoutCount = pg->num_dsl;
+ sci.pSetLayouts = pg->dsl;
+ } else {
+ sci.setLayoutCount = zs->info.stage + 1;
+ dsl[zs->info.stage] = zs->precompile.dsl;;
+ sci.pSetLayouts = dsl;
+ }
+ VkPushConstantRange pcr;
+ pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
+ pcr.offset = 0;
+ pcr.size = sizeof(struct zink_gfx_push_constant);
+ sci.pushConstantRangeCount = 1;
+ sci.pPushConstantRanges = &pcr;
+
+ smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+ smci.codeSize = spirv->num_words * sizeof(uint32_t);
+ smci.pCode = spirv->words;
+
+#ifndef NDEBUG
+ if (zink_debug & ZINK_DEBUG_VALIDATION) {
+ static const struct spirv_to_nir_options spirv_options = {
+ .environment = NIR_SPIRV_VULKAN,
+ .caps = {
+ .float64 = true,
+ .int16 = true,
+ .int64 = true,
+ .tessellation = true,
+ .float_controls = true,
+ .image_ms_array = true,
+ .image_read_without_format = true,
+ .image_write_without_format = true,
+ .storage_image_ms = true,
+ .geometry_streams = true,
+ .storage_8bit = true,
+ .storage_16bit = true,
+ .variable_pointers = true,
+ .stencil_export = true,
+ .post_depth_coverage = true,
+ .transform_feedback = true,
+ .device_group = true,
+ .draw_parameters = true,
+ .shader_viewport_index_layer = true,
+ .multiview = true,
+ .physical_storage_buffer_address = true,
+ .int64_atomics = true,
+ .subgroup_arithmetic = true,
+ .subgroup_basic = true,
+ .subgroup_ballot = true,
+ .subgroup_quad = true,
+ .subgroup_shuffle = true,
+ .subgroup_vote = true,
+ .vk_memory_model = true,
+ .vk_memory_model_device_scope = true,
+ .int8 = true,
+ .float16 = true,
+ .demote_to_helper_invocation = true,
+ .sparse_residency = true,
+ .min_lod = true,
+ .workgroup_memory_explicit_layout = true,
+ },
+ .ubo_addr_format = nir_address_format_32bit_index_offset,
+ .ssbo_addr_format = nir_address_format_32bit_index_offset,
+ .phys_ssbo_addr_format = nir_address_format_64bit_global,
+ .push_const_addr_format = nir_address_format_logical,
+ .shared_addr_format = nir_address_format_32bit_offset,
+ };
+ uint32_t num_spec_entries = 0;
+ struct nir_spirv_specialization *spec_entries = NULL;
+ VkSpecializationInfo sinfo = {0};
+ VkSpecializationMapEntry me[3];
+ uint32_t size[3] = {1,1,1};
+ if (!zs->info.workgroup_size[0]) {
+ sinfo.mapEntryCount = 3;
+ sinfo.pMapEntries = &me[0];
+ sinfo.dataSize = sizeof(uint32_t) * 3;
+ sinfo.pData = size;
+ uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
+ for (int i = 0; i < 3; i++) {
+ me[i].size = sizeof(uint32_t);
+ me[i].constantID = ids[i];
+ me[i].offset = i * sizeof(uint32_t);
+ }
+ spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
+ }
+ nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
+ spec_entries, num_spec_entries,
+ clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
+ assert(nir);
+ ralloc_free(nir);
+ free(spec_entries);
+ }
+#endif
+
+ VkResult ret;
+ struct zink_shader_object obj = {0};
+ if (!can_shobj || !screen->info.have_EXT_shader_object)
+ ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
+ else
+ ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
+ ASSERTED bool success = zink_screen_handle_vkresult(screen, ret);
+ assert(success);
+ return obj;
+}
+
+static void
+prune_io(nir_shader *nir)
+{
+ nir_foreach_shader_in_variable_safe(var, nir) {
+ if (!find_var_deref(nir, var) && !find_var_io(nir, var))
+ var->data.mode = nir_var_shader_temp;
+ }
+ nir_foreach_shader_out_variable_safe(var, nir) {
+ if (!find_var_deref(nir, var) && !find_var_io(nir, var))
+ var->data.mode = nir_var_shader_temp;
+ }
+ NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
+}
+
+static void
+flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
+{
+ /* unconvert from zink_binding() */
+ uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT);
+ assert(sampler_id < 32); //bitfield size for tracking
+ zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
+}
+
+static nir_def *
+rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
+{
+ assert(var);
+ const struct glsl_type *type = glsl_without_array(var->type);
+ enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
+ bool is_int = glsl_base_type_is_integer(ret_type);
+ unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
+ unsigned dest_size = tex->def.bit_size;
+ b->cursor = nir_after_instr(&tex->instr);
+ unsigned num_components = tex->def.num_components;
+ bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
+ if (bit_size == dest_size && !rewrite_depth)
+ return NULL;
+ nir_def *dest = &tex->def;
+ if (rewrite_depth && zs) {
+ if (nir_def_components_read(dest) & ~1) {
+ /* this needs recompiles */
+ if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
+ flag_shadow_tex(var, zs);
+ else
+ mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
+ return NULL;
+ }
+ /* If only .x is used in the NIR, then it's effectively not a legacy depth
+ * sample anyway and we don't want to ask for shader recompiles. This is
+ * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or
+ * LUMINANCE, so apps just use the first channel.
+ */
+ tex->def.num_components = 1;
+ tex->is_new_style_shadow = true;
+ }
+ if (bit_size != dest_size) {
+ tex->def.bit_size = bit_size;
+ tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
+
+ if (is_int) {
+ if (glsl_unsigned_base_type_of(ret_type) == ret_type)
+ dest = nir_u2uN(b, &tex->def, dest_size);
+ else
+ dest = nir_i2iN(b, &tex->def, dest_size);
+ } else {
+ dest = nir_f2fN(b, &tex->def, dest_size);
+ }
+ if (rewrite_depth)
+ return dest;
+ nir_def_rewrite_uses_after(&tex->def, dest, dest->parent_instr);
+ } else if (rewrite_depth) {
+ return dest;
+ }
+ return dest;
+}
+
+struct lower_zs_swizzle_state {
+ bool shadow_only;
+ unsigned base_sampler_id;
+ const struct zink_zs_swizzle_key *swizzle;
+};
+
+static bool
+lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+ struct lower_zs_swizzle_state *state = data;
+ const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
+ assert(state->shadow_only || swizzle_key);
+ if (instr->type != nir_instr_type_tex)
+ return false;
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
+ (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
+ return false;
+ if (tex->is_shadow && tex->op == nir_texop_tg4)
+ /* Will not even try to emulate the shadow comparison */
+ return false;
+ int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
+ nir_variable *var = NULL;
+ if (handle != -1)
+ /* gtfo bindless depth texture mode */
+ return false;
+ nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
+ if (glsl_type_is_sampler(glsl_without_array(img->type))) {
+ unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
+ if (tex->texture_index >= img->data.driver_location &&
+ tex->texture_index < img->data.driver_location + size) {
+ var = img;
+ break;
+ }
+ }
+ }
+ assert(var);
+ uint32_t sampler_id = var->data.binding - state->base_sampler_id;
+ const struct glsl_type *type = glsl_without_array(var->type);
+ enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
+ bool is_int = glsl_base_type_is_integer(ret_type);
+ unsigned num_components = tex->def.num_components;
+ if (tex->is_shadow)
+ tex->is_new_style_shadow = true;
+ nir_def *dest = rewrite_tex_dest(b, tex, var, NULL);
+ assert(dest || !state->shadow_only);
+ if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
+ return false;
+ else if (!dest)
+ dest = &tex->def;
+ else
+ tex->def.num_components = 1;
+ if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
+ /* these require manual swizzles */
+ if (tex->op == nir_texop_tg4) {
+ assert(!tex->is_shadow);
+ nir_def *swizzle;
+ switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
+ case PIPE_SWIZZLE_0:
+ swizzle = nir_imm_zero(b, 4, tex->def.bit_size);
+ break;
+ case PIPE_SWIZZLE_1:
+ if (is_int)
+ swizzle = nir_imm_intN_t(b, 4, tex->def.bit_size);
+ else
+ swizzle = nir_imm_floatN_t(b, 4, tex->def.bit_size);
+ break;
+ default:
+ if (!tex->component)
+ return false;
+ tex->component = 0;
+ return true;
+ }
+ nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
+ return true;
+ }
+ nir_def *vec[4];
+ for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
+ switch (swizzle_key->swizzle[sampler_id].s[i]) {
+ case PIPE_SWIZZLE_0:
+ vec[i] = nir_imm_zero(b, 1, tex->def.bit_size);
+ break;
+ case PIPE_SWIZZLE_1:
+ if (is_int)
+ vec[i] = nir_imm_intN_t(b, 1, tex->def.bit_size);
+ else
+ vec[i] = nir_imm_floatN_t(b, 1, tex->def.bit_size);
+ break;
+ default:
+ vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
+ break;
+ }
+ }
+ nir_def *swizzle = nir_vec(b, vec, num_components);
+ nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
+ } else {
+ assert(tex->is_shadow);
+ nir_def *vec[4] = {dest, dest, dest, dest};
+ nir_def *splat = nir_vec(b, vec, num_components);
+ nir_def_rewrite_uses_after(dest, splat, splat->parent_instr);
+ }
+ return true;
+}
+
+/* Applies in-shader swizzles when necessary for depth/shadow sampling.
+ *
+ * SPIRV only has new-style (scalar result) shadow sampling, so to emulate
+ * !is_new_style_shadow (vec4 result) shadow sampling we lower to a
+ * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR
+ * shader to expand out to vec4. Since this depends on sampler state, it's a
+ * draw-time shader recompile to do so.
+ *
+ * We may also need to apply shader swizzles for
+ * driver_workarounds.needs_zs_shader_swizzle.
+ */
+static bool
+lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
+{
+ /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */
+ unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
+ struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
+ return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
+}
+
+static bool
+invert_point_coord_instr(nir_builder *b, nir_intrinsic_instr *intr,
+ void *data)
+{
+ if (intr->intrinsic != nir_intrinsic_load_point_coord)
+ return false;
+ b->cursor = nir_after_instr(&intr->instr);
+ nir_def *def = nir_vec2(b, nir_channel(b, &intr->def, 0),
+ nir_fsub_imm(b, 1.0, nir_channel(b, &intr->def, 1)));
+ nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
+ return true;
+}
+
+static bool
+invert_point_coord(nir_shader *nir)
+{
+ if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD))
+ return false;
+ return nir_shader_intrinsics_pass(nir, invert_point_coord_instr,
+ nir_metadata_dominance, NULL);
+}
+
+static bool
+lower_sparse_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+ b->cursor = nir_after_instr(instr);
+
+ switch (instr->type) {
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (!tex->is_sparse)
+ return false;
+
+ nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &tex->def));
+ nir_def *vec = nir_vector_insert_imm(b, &tex->def, res,
+ tex->def.num_components - 1);
+ nir_def_rewrite_uses_after(&tex->def, vec, vec->parent_instr);
+ return true;
+ }
+
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_image_deref_sparse_load: {
+ nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &intrin->def));
+ nir_def *vec = nir_vector_insert_imm(b, &intrin->def, res, 4);
+ nir_def_rewrite_uses_after(&intrin->def, vec, vec->parent_instr);
+ return true;
+ }
+
+ case nir_intrinsic_sparse_residency_code_and: {
+ nir_def *res = nir_iand(b, intrin->src[0].ssa, intrin->src[1].ssa);
+ nir_def_rewrite_uses(&intrin->def, res);
+ return true;
+ }
+
+ case nir_intrinsic_is_sparse_texels_resident: {
+ nir_def *res = nir_i2b(b, intrin->src[0].ssa);
+ nir_def_rewrite_uses(&intrin->def, res);
+ return true;
+ }
+
+ default:
+ return false;
+ }
+ }
+
+ default:
+ return false;
+ }
+}
+
+static bool
+lower_sparse(nir_shader *shader)
+{
+ return nir_shader_instructions_pass(shader, lower_sparse_instr,
+ nir_metadata_dominance, NULL);
+}
+
+static bool
+add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+ return false;
+ bool is_special_io = (b->shader->info.stage == MESA_SHADER_VERTEX && is_input) ||
+ (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_input);
+ unsigned loc = nir_intrinsic_io_semantics(intr).location;
+ nir_src *src_offset = nir_get_io_offset_src(intr);
+ const unsigned slot_offset = src_offset && nir_src_is_const(*src_offset) ? nir_src_as_uint(*src_offset) : 0;
+ unsigned location = loc + slot_offset;
+ unsigned frac = nir_intrinsic_component(intr);
+ unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
+ /* set c aligned/rounded down to dword */
+ unsigned c = frac;
+ if (frac && bit_size < 32)
+ c = frac * bit_size / 32;
+ /* loop over all the variables and rewrite corresponding access */
+ nir_foreach_variable_with_modes(var, b->shader, is_input ? nir_var_shader_in : nir_var_shader_out) {
+ const struct glsl_type *type = var->type;
+ if (nir_is_arrayed_io(var, b->shader->info.stage))
+ type = glsl_get_array_element(type);
+ unsigned slot_count = get_var_slot_count(b->shader, var);
+ /* filter access that isn't specific to this variable */
+ if (var->data.location > location || var->data.location + slot_count <= location)
+ continue;
+ if (var->data.fb_fetch_output != nir_intrinsic_io_semantics(intr).fb_fetch_output)
+ continue;
+ if (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_load && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
+ continue;
+
+ unsigned size = 0;
+ bool is_struct = glsl_type_is_struct(glsl_without_array(type));
+ if (is_struct)
+ size = get_slot_components(var, var->data.location + slot_offset, var->data.location);
+ else if (!is_special_io && var->data.compact)
+ size = glsl_get_aoa_size(type);
+ else
+ size = glsl_get_vector_elements(glsl_without_array(type));
+ assert(size);
+ if (glsl_type_is_64bit(glsl_without_array(var->type)))
+ size *= 2;
+ if (var->data.location != location && size > 4 && size % 4 && !is_struct) {
+ /* adjust for dvec3-type slot overflow */
+ assert(location > var->data.location);
+ size -= (location - var->data.location) * 4;
+ }
+ assert(size);
+ if (var->data.location_frac + size <= c || var->data.location_frac > c)
+ continue;
+
+ b->cursor = nir_before_instr(&intr->instr);
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+ if (nir_is_arrayed_io(var, b->shader->info.stage)) {
+ assert(intr->intrinsic != nir_intrinsic_store_output);
+ deref = nir_build_deref_array(b, deref, intr->src[!is_load].ssa);
+ }
+ if (glsl_type_is_array(type)) {
+ /* unroll array derefs */
+ unsigned idx = var->data.compact ? (frac - var->data.location_frac) : 0;
+ assert(src_offset);
+ if (var->data.location < VARYING_SLOT_VAR0) {
+ if (src_offset) {
+ /* clip/cull dist and tess levels use different array offset semantics */
+ bool is_clipdist = (b->shader->info.stage != MESA_SHADER_VERTEX || var->data.mode == nir_var_shader_out) &&
+ is_clipcull_dist(var->data.location);
+ bool is_tess_level = b->shader->info.stage == MESA_SHADER_TESS_CTRL &&
+ var->data.location >= VARYING_SLOT_TESS_LEVEL_INNER && var->data.location >= VARYING_SLOT_TESS_LEVEL_OUTER;
+ bool is_builtin_array = is_clipdist || is_tess_level;
+ /* this is explicit for ease of debugging but could be collapsed at some point in the future*/
+ if (nir_src_is_const(*src_offset)) {
+ unsigned offset = slot_offset;
+ if (is_builtin_array)
+ offset *= 4;
+ if (is_clipdist) {
+ if (loc == VARYING_SLOT_CLIP_DIST1 || loc == VARYING_SLOT_CULL_DIST1)
+ offset += 4;
+ }
+ deref = nir_build_deref_array_imm(b, deref, offset + idx);
+ } else {
+ nir_def *offset = src_offset->ssa;
+ if (is_builtin_array)
+ nir_imul_imm(b, offset, 4);
+ deref = nir_build_deref_array(b, deref, idx ? nir_iadd_imm(b, offset, idx) : src_offset->ssa);
+ }
+ } else {
+ deref = nir_build_deref_array_imm(b, deref, idx);
+ }
+ type = glsl_get_array_element(type);
+ } else {
+ idx += location - var->data.location;
+ /* need to convert possible N*M to [N][M] */
+ nir_def *nm = idx ? nir_iadd_imm(b, src_offset->ssa, idx) : src_offset->ssa;
+ while (glsl_type_is_array(type)) {
+ const struct glsl_type *elem = glsl_get_array_element(type);
+ unsigned type_size = glsl_count_vec4_slots(elem, false, false);
+ nir_def *n = glsl_type_is_array(elem) ? nir_udiv_imm(b, nm, type_size) : nm;
+ if (glsl_type_is_vector_or_scalar(elem) && glsl_type_is_64bit(elem) && glsl_get_vector_elements(elem) > 2)
+ n = nir_udiv_imm(b, n, 2);
+ deref = nir_build_deref_array(b, deref, n);
+ nm = nir_umod_imm(b, nm, type_size);
+ type = glsl_get_array_element(type);
+ }
+ }
+ } else if (glsl_type_is_struct(type)) {
+ deref = nir_build_deref_struct(b, deref, slot_offset);
+ }
+ assert(!glsl_type_is_array(type));
+ unsigned num_components = glsl_get_vector_elements(type);
+ if (is_load) {
+ nir_def *load;
+ if (is_interp) {
+ nir_def *interp = intr->src[0].ssa;
+ nir_intrinsic_instr *interp_intr = nir_instr_as_intrinsic(interp->parent_instr);
+ assert(interp_intr);
+ var->data.interpolation = nir_intrinsic_interp_mode(interp_intr);
+ switch (interp_intr->intrinsic) {
+ case nir_intrinsic_load_barycentric_centroid:
+ load = nir_interp_deref_at_centroid(b, num_components, bit_size, &deref->def);
+ break;
+ case nir_intrinsic_load_barycentric_sample:
+ var->data.sample = 1;
+ load = nir_load_deref(b, deref);
+ break;
+ case nir_intrinsic_load_barycentric_pixel:
+ load = nir_load_deref(b, deref);
+ break;
+ case nir_intrinsic_load_barycentric_at_sample:
+ load = nir_interp_deref_at_sample(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
+ break;
+ case nir_intrinsic_load_barycentric_at_offset:
+ load = nir_interp_deref_at_offset(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
+ break;
+ default:
+ unreachable("unhandled interp!");
+ }
+ } else {
+ load = nir_load_deref(b, deref);
+ }
+ /* filter needed components */
+ if (intr->num_components < load->num_components)
+ load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << (c - var->data.location_frac));
+ nir_def_rewrite_uses(&intr->def, load);
+ } else {
+ nir_def *store = intr->src[0].ssa;
+ /* pad/filter components to match deref type */
+ if (intr->num_components < num_components) {
+ nir_def *zero = nir_imm_zero(b, 1, bit_size);
+ nir_def *vec[4] = {zero, zero, zero, zero};
+ u_foreach_bit(i, nir_intrinsic_write_mask(intr))
+ vec[c - var->data.location_frac + i] = nir_channel(b, store, i);
+ store = nir_vec(b, vec, num_components);
+ } if (store->num_components > num_components) {
+ store = nir_channels(b, store, nir_intrinsic_write_mask(intr));
+ }
+ if (store->bit_size != glsl_get_bit_size(type)) {
+ /* this should be some weird bindless io conversion */
+ assert(store->bit_size == 64 && glsl_get_bit_size(type) == 32);
+ assert(num_components != store->num_components);
+ store = nir_unpack_64_2x32(b, store);
+ }
+ nir_store_deref(b, deref, store, BITFIELD_RANGE(c - var->data.location_frac, intr->num_components));
+ }
+ nir_instr_remove(&intr->instr);
+ return true;
+ }
+ unreachable("failed to find variable for explicit io!");
+ return true;
+}
+
+static bool
+add_derefs(nir_shader *nir)
+{
+ return nir_shader_intrinsics_pass(nir, add_derefs_instr,
+ nir_metadata_dominance, NULL);
+}
+
+static struct zink_shader_object
+compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
+{
+ struct zink_shader_info *sinfo = &zs->sinfo;
+ prune_io(nir);
+
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_GEOMETRY:
+ NIR_PASS_V(nir, nir_divergence_analysis);
+ break;
+ default: break;
+ }
+ NIR_PASS_V(nir, nir_convert_from_ssa, true);
+
+ if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))
+ nir_index_ssa_defs(nir_shader_get_entrypoint(nir));
+ if (zink_debug & ZINK_DEBUG_NIR) {
+ fprintf(stderr, "NIR shader:\n---8<---\n");
+ nir_print_shader(nir, stderr);
+ fprintf(stderr, "---8<---\n");
+ }
+
+ struct zink_shader_object obj = {0};
+ struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
+ if (spirv)
+ obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
+
+ /* TODO: determine if there's any reason to cache spirv output? */
+ if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
+ zs->spirv = spirv;
+ else
+ obj.spirv = spirv;
+ return obj;
+}
+
+static bool
+remove_interpolate_at_sample(struct nir_builder *b, nir_intrinsic_instr *interp, void *data)
+{
+ if (interp->intrinsic != nir_intrinsic_interp_deref_at_sample)
+ return false;
+
+ b->cursor = nir_before_instr(&interp->instr);
+ nir_def *res = nir_load_deref(b, nir_src_as_deref(interp->src[0]));
+ nir_def_rewrite_uses(&interp->def, res);
+
+ return true;
+}
+
+struct zink_shader_object
+zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
+ nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
+{
+ bool need_optimize = true;
+ bool inlined_uniforms = false;
+
+ NIR_PASS_V(nir, add_derefs);
+ NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
if (key) {
if (key->inline_uniforms) {
NIR_PASS_V(nir, nir_inline_uniforms,
@@ -795,54 +4010,101 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad
key->base.inlined_uniform_values,
nir->info.inlinable_uniform_dw_offsets);
- optimize_nir(nir);
-
- /* This must be done again. */
- NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
- nir_var_shader_out);
+ inlined_uniforms = true;
}
/* TODO: use a separate mem ctx here for ralloc */
- switch (zs->nir->info.stage) {
- case MESA_SHADER_VERTEX: {
- uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
- const struct zink_vs_key *vs_key = zink_vs_key(key);
- switch (vs_key->size) {
- case 4:
- decomposed_attrs = vs_key->u32.decomposed_attrs;
- decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
+
+ if (!screen->optimal_keys) {
+ switch (zs->info.stage) {
+ case MESA_SHADER_VERTEX: {
+ uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
+ const struct zink_vs_key *vs_key = zink_vs_key(key);
+ switch (vs_key->size) {
+ case 4:
+ decomposed_attrs = vs_key->u32.decomposed_attrs;
+ decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
+ break;
+ case 2:
+ decomposed_attrs = vs_key->u16.decomposed_attrs;
+ decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
+ break;
+ case 1:
+ decomposed_attrs = vs_key->u8.decomposed_attrs;
+ decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
+ break;
+ default: break;
+ }
+ if (decomposed_attrs || decomposed_attrs_without_w)
+ NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
break;
- case 2:
- decomposed_attrs = vs_key->u16.decomposed_attrs;
- decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
+ }
+
+ case MESA_SHADER_GEOMETRY:
+ if (zink_gs_key(key)->lower_line_stipple) {
+ NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
+ NIR_PASS_V(nir, nir_lower_var_copies);
+ need_optimize = true;
+ }
+
+ if (zink_gs_key(key)->lower_line_smooth) {
+ NIR_PASS_V(nir, lower_line_smooth_gs);
+ NIR_PASS_V(nir, nir_lower_var_copies);
+ need_optimize = true;
+ }
+
+ if (zink_gs_key(key)->lower_gl_point) {
+ NIR_PASS_V(nir, lower_gl_point_gs);
+ need_optimize = true;
+ }
+
+ if (zink_gs_key(key)->lower_pv_mode) {
+ NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
+ need_optimize = true; //TODO verify that this is required
+ }
break;
- case 1:
- decomposed_attrs = vs_key->u8.decomposed_attrs;
- decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
+
+ default:
break;
- default: break;
}
- if (decomposed_attrs || decomposed_attrs_without_w)
- NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
- FALLTHROUGH;
}
+
+ switch (zs->info.stage) {
+ case MESA_SHADER_VERTEX:
case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY:
if (zink_vs_key_base(key)->last_vertex_stage) {
- if (zs->streamout.have_xfb)
- streamout = &zs->streamout;
-
- if (!zink_vs_key_base(key)->clip_halfz) {
+ if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
NIR_PASS_V(nir, nir_lower_clip_halfz);
}
if (zink_vs_key_base(key)->push_drawid) {
NIR_PASS_V(nir, lower_drawid);
}
+ } else {
+ nir->xfb_info = NULL;
}
+ if (zink_vs_key_base(key)->robust_access)
+ NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
break;
case MESA_SHADER_FRAGMENT:
- if (!zink_fs_key(key)->samples &&
- nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
+ if (zink_fs_key(key)->lower_line_smooth) {
+ NIR_PASS_V(nir, lower_line_smooth_fs,
+ zink_fs_key(key)->lower_line_stipple);
+ need_optimize = true;
+ } else if (zink_fs_key(key)->lower_line_stipple)
+ NIR_PASS_V(nir, lower_line_stipple_fs);
+
+ if (zink_fs_key(key)->lower_point_smooth) {
+ NIR_PASS_V(nir, nir_lower_point_smooth);
+ NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
+ nir->info.fs.uses_discard = true;
+ need_optimize = true;
+ }
+
+ if (zink_fs_key(key)->robust_access)
+ NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
+
+ if (!zink_fs_key_base(key)->samples && zink_shader_uses_samples(zs)) {
/* VK will always use gl_SampleMask[] values even if sample count is 0,
* so we need to skip this write here to mimic GL's behavior of ignoring it
*/
@@ -852,73 +4114,158 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad
}
nir_fixup_deref_modes(nir);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
- optimize_nir(nir);
+ NIR_PASS_V(nir, nir_shader_intrinsics_pass, remove_interpolate_at_sample,
+ nir_metadata_dominance | nir_metadata_block_index, NULL);
+
+ need_optimize = true;
}
- if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
+ if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
NIR_PASS_V(nir, lower_dual_blend);
}
- if (zink_fs_key(key)->coord_replace_bits) {
- NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits,
- false, zink_fs_key(key)->coord_replace_yinvert);
+ if (zink_fs_key_base(key)->coord_replace_bits)
+ NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false);
+ if (zink_fs_key_base(key)->point_coord_yinvert)
+ NIR_PASS_V(nir, invert_point_coord);
+ if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
+ nir_foreach_shader_in_variable(var, nir)
+ var->data.sample = true;
+ nir->info.fs.uses_sample_qualifier = true;
+ nir->info.fs.uses_sample_shading = true;
}
+ if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
+ NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
if (nir->info.fs.uses_fbfetch_output) {
nir_variable *fbfetch = NULL;
- NIR_PASS_V(nir, lower_fbfetch, &fbfetch);
+ NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
/* old variable must be deleted to avoid spirv errors */
fbfetch->data.mode = nir_var_shader_temp;
nir_fixup_deref_modes(nir);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
- optimize_nir(nir);
+ need_optimize = true;
}
+ nir_foreach_shader_in_variable_safe(var, nir) {
+ if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
+ continue;
+ nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
+ var->data.mode = nir_var_shader_temp;
+ nir_fixup_deref_modes(nir);
+ NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
+ need_optimize = true;
+ }
+ break;
+ case MESA_SHADER_COMPUTE:
+ if (zink_cs_key(key)->robust_access)
+ NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
break;
default: break;
}
+ if (key->base.needs_zs_shader_swizzle) {
+ assert(extra_data);
+ NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
+ }
+ if (key->base.nonseamless_cube_mask) {
+ NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
+ need_optimize = true;
+ }
}
- NIR_PASS_V(nir, nir_convert_from_ssa, true);
-
- struct spirv_shader *spirv = nir_to_spirv(nir, streamout, screen->spirv_version);
- if (!spirv)
- goto done;
+ if (screen->driconf.inline_uniforms) {
+ NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
+ NIR_PASS_V(nir, rewrite_bo_access, screen);
+ NIR_PASS_V(nir, remove_bo_access, zs);
+ need_optimize = true;
+ }
+ if (inlined_uniforms) {
+ optimize_nir(nir, zs, true);
+
+ /* This must be done again. */
+ NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
+ nir_var_shader_out);
+
+ nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+ if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
+ zs->can_inline = false;
+ } else if (need_optimize)
+ optimize_nir(nir, zs, true);
+ bool has_sparse = false;
+ NIR_PASS(has_sparse, nir, lower_sparse);
+ if (has_sparse)
+ optimize_nir(nir, zs, false);
+
+ struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
+ ralloc_free(nir);
+ return obj;
+}
- if (zink_debug & ZINK_DEBUG_SPIRV) {
- char buf[256];
- static int i;
- snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
- FILE *fp = fopen(buf, "wb");
- if (fp) {
- fwrite(spirv->words, sizeof(uint32_t), spirv->num_words, fp);
- fclose(fp);
- fprintf(stderr, "wrote '%s'...\n", buf);
+struct zink_shader_object
+zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
+{
+ nir_shader *nir = zink_shader_deserialize(screen, zs);
+ /* TODO: maybe compile multiple variants for different set counts for compact mode? */
+ int set = zs->info.stage == MESA_SHADER_FRAGMENT;
+ if (screen->info.have_EXT_shader_object)
+ set = zs->info.stage;
+ unsigned offsets[4];
+ zink_descriptor_shader_get_binding_offsets(zs, offsets);
+ nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
+ if (var->data.descriptor_set == screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS])
+ continue;
+ var->data.descriptor_set = set;
+ switch (var->data.mode) {
+ case nir_var_mem_ubo:
+ var->data.binding = !!var->data.driver_location;
+ break;
+ case nir_var_uniform:
+ if (glsl_type_is_sampler(glsl_without_array(var->type)))
+ var->data.binding += offsets[1];
+ break;
+ case nir_var_mem_ssbo:
+ var->data.binding += offsets[2];
+ break;
+ case nir_var_image:
+ var->data.binding += offsets[3];
+ break;
+ default: break;
+ }
+ }
+ NIR_PASS_V(nir, add_derefs);
+ NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
+ if (screen->driconf.inline_uniforms) {
+ NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
+ NIR_PASS_V(nir, rewrite_bo_access, screen);
+ NIR_PASS_V(nir, remove_bo_access, zs);
+ }
+ optimize_nir(nir, zs, true);
+ zink_descriptor_shader_init(screen, zs);
+ nir_shader *nir_clone = NULL;
+ if (screen->info.have_EXT_shader_object)
+ nir_clone = nir_shader_clone(nir, nir);
+ struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
+ if (screen->info.have_EXT_shader_object && !zs->info.internal) {
+ /* always try to pre-generate a tcs in case it's needed */
+ if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
+ nir_shader *nir_tcs = NULL;
+ /* use max pcp for compat */
+ zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir_clone, 32, &nir_tcs);
+ nir_tcs->info.separate_shader = true;
+ zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
+ ralloc_free(nir_tcs);
}
}
-
- VkShaderModuleCreateInfo smci = {0};
- smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
- smci.codeSize = spirv->num_words * sizeof(uint32_t);
- smci.pCode = spirv->words;
-
- if (VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod) != VK_SUCCESS)
- mod = VK_NULL_HANDLE;
-
-done:
ralloc_free(nir);
-
- /* TODO: determine if there's any reason to cache spirv output? */
- ralloc_free(spirv);
- return mod;
+ spirv_shader_delete(obj.spirv);
+ obj.spirv = NULL;
+ return obj;
}
static bool
-lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
+lower_baseinstance_instr(nir_builder *b, nir_intrinsic_instr *intr,
+ void *data)
{
- if (instr->type != nir_instr_type_intrinsic)
- return false;
- nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_instance_id)
return false;
- b->cursor = nir_after_instr(instr);
- nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
- nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
+ b->cursor = nir_after_instr(&intr->instr);
+ nir_def *def = nir_isub(b, &intr->def, nir_load_base_instance(b));
+ nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
return true;
}
@@ -927,146 +4274,379 @@ lower_baseinstance(nir_shader *shader)
{
if (shader->info.stage != MESA_SHADER_VERTEX)
return false;
- return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
+ return nir_shader_intrinsics_pass(shader, lower_baseinstance_instr,
+ nir_metadata_dominance, NULL);
}
-bool nir_lower_dynamic_bo_access(nir_shader *shader);
-
/* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
* so instead we delete all those broken variables and just make new ones
*/
static bool
-unbreak_bos(nir_shader *shader)
+unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
{
- uint32_t ssbo_used = 0;
- uint32_t ubo_used = 0;
uint64_t max_ssbo_size = 0;
uint64_t max_ubo_size = 0;
- bool ssbo_sizes[PIPE_MAX_SHADER_BUFFERS] = {false};
+ uint64_t max_uniform_size = 0;
- if (!shader->info.num_ssbos && !shader->info.num_ubos && !shader->num_uniforms)
+ if (!shader->info.num_ssbos && !shader->info.num_ubos)
return false;
+
+ nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
+ const struct glsl_type *type = glsl_without_array(var->type);
+ if (type_is_counter(type))
+ continue;
+ /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
+ unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
+ const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
+ if (interface_type) {
+ unsigned block_size = glsl_get_explicit_size(interface_type, true);
+ if (glsl_get_length(interface_type) == 1) {
+ /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
+ const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
+ if (glsl_type_is_array(f) && !glsl_array_size(f))
+ block_size = 0;
+ }
+ if (block_size) {
+ block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
+ size = MAX2(size, block_size);
+ }
+ }
+ if (var->data.mode == nir_var_mem_ubo) {
+ if (var->data.driver_location)
+ max_ubo_size = MAX2(max_ubo_size, size);
+ else
+ max_uniform_size = MAX2(max_uniform_size, size);
+ } else {
+ max_ssbo_size = MAX2(max_ssbo_size, size);
+ if (interface_type) {
+ if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
+ needs_size = true;
+ }
+ }
+ var->data.mode = nir_var_shader_temp;
+ }
+ nir_fixup_deref_modes(shader);
+ NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
+ optimize_nir(shader, NULL, true);
+
+ struct glsl_struct_field field = {0};
+ field.name = ralloc_strdup(shader, "base");
+ if (shader->info.num_ubos) {
+ if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
+ field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
+ nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
+ glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
+ "uniform_0@32");
+ var->interface_type = var->type;
+ var->data.mode = nir_var_mem_ubo;
+ var->data.driver_location = 0;
+ }
+
+ unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
+ uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
+ if (num_ubos && ubos_used) {
+ field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
+ /* shrink array as much as possible */
+ unsigned first_ubo = ffs(ubos_used) - 2;
+ assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
+ num_ubos -= first_ubo;
+ assert(num_ubos);
+ nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
+ glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
+ "ubos@32");
+ var->interface_type = var->type;
+ var->data.mode = nir_var_mem_ubo;
+ var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
+ }
+ }
+ if (shader->info.num_ssbos && zs->ssbos_used) {
+ /* shrink array as much as possible */
+ unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
+ assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
+ unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
+ assert(num_ssbos);
+ const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
+ field.type = ssbo_type;
+ nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
+ glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
+ "ssbos@32");
+ var->interface_type = var->type;
+ var->data.mode = nir_var_mem_ssbo;
+ var->data.driver_location = first_ssbo;
+ }
+ return true;
+}
+
+static uint32_t
+get_src_mask_ssbo(unsigned total, nir_src src)
+{
+ if (nir_src_is_const(src))
+ return BITFIELD_BIT(nir_src_as_uint(src));
+ return BITFIELD_MASK(total);
+}
+
+static uint32_t
+get_src_mask_ubo(unsigned total, nir_src src)
+{
+ if (nir_src_is_const(src))
+ return BITFIELD_BIT(nir_src_as_uint(src));
+ return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
+}
+
+static bool
+analyze_io(struct zink_shader *zs, nir_shader *shader)
+{
+ bool ret = false;
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
nir_foreach_block(block, impl) {
nir_foreach_instr(instr, block) {
+ if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
+ /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ nir_foreach_variable_with_modes(img, shader, nir_var_uniform) {
+ if (glsl_type_is_sampler(glsl_without_array(img->type))) {
+ unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
+ if (tex->texture_index >= img->data.driver_location &&
+ tex->texture_index < img->data.driver_location + size) {
+ BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
+ break;
+ }
+ }
+ }
+ continue;
+ }
if (instr->type != nir_instr_type_intrinsic)
continue;
-
+
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_store_ssbo:
- ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[1]));
+ zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
break;
-
+
case nir_intrinsic_get_ssbo_size: {
- uint32_t slot = nir_src_as_uint(intrin->src[0]);
- ssbo_used |= BITFIELD_BIT(slot);
- ssbo_sizes[slot] = true;
+ zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
+ ret = true;
break;
}
- case nir_intrinsic_ssbo_atomic_add:
- case nir_intrinsic_ssbo_atomic_imin:
- case nir_intrinsic_ssbo_atomic_umin:
- case nir_intrinsic_ssbo_atomic_imax:
- case nir_intrinsic_ssbo_atomic_umax:
- case nir_intrinsic_ssbo_atomic_and:
- case nir_intrinsic_ssbo_atomic_or:
- case nir_intrinsic_ssbo_atomic_xor:
- case nir_intrinsic_ssbo_atomic_exchange:
- case nir_intrinsic_ssbo_atomic_comp_swap:
- case nir_intrinsic_ssbo_atomic_fmin:
- case nir_intrinsic_ssbo_atomic_fmax:
- case nir_intrinsic_ssbo_atomic_fcomp_swap:
+ case nir_intrinsic_ssbo_atomic:
+ case nir_intrinsic_ssbo_atomic_swap:
case nir_intrinsic_load_ssbo:
- ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
+ zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
break;
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_vec4:
- ubo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
+ zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
break;
default:
break;
}
}
}
+ return ret;
+}
- nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
- const struct glsl_type *type = glsl_without_array(var->type);
- if (type_is_counter(type))
- continue;
- unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
- if (var->data.mode == nir_var_mem_ubo)
- max_ubo_size = MAX2(max_ubo_size, size);
- else
- max_ssbo_size = MAX2(max_ssbo_size, size);
- var->data.mode = nir_var_shader_temp;
+struct zink_bindless_info {
+ nir_variable *bindless[4];
+ unsigned bindless_set;
+};
+
+/* this is a "default" bindless texture used if the shader has no texture variables */
+static nir_variable *
+create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
+{
+ unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
+ nir_variable *var;
+
+ const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
+ var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
+ var->data.descriptor_set = descriptor_set;
+ var->data.driver_location = var->data.binding = binding;
+ return var;
+}
+
+/* this is a "default" bindless image used if the shader has no image variables */
+static nir_variable *
+create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
+{
+ unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
+ nir_variable *var;
+
+ const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
+ var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
+ var->data.descriptor_set = descriptor_set;
+ var->data.driver_location = var->data.binding = binding;
+ var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ return var;
+}
+
+/* rewrite bindless instructions as array deref instructions */
+static bool
+lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
+{
+ struct zink_bindless_info *bindless = data;
+
+ if (in->type == nir_instr_type_tex) {
+ nir_tex_instr *tex = nir_instr_as_tex(in);
+ int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
+ if (idx == -1)
+ return false;
+
+ nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
+ if (!var) {
+ var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
+ if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+ bindless->bindless[1] = var;
+ else
+ bindless->bindless[0] = var;
+ }
+ b->cursor = nir_before_instr(in);
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+ if (glsl_type_is_array(var->type))
+ deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
+ nir_src_rewrite(&tex->src[idx].src, &deref->def);
+
+ /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
+ * match up with it in contrast to normal sampler ops where things are a bit more flexible;
+ * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
+ * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
+ *
+ * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
+ * - Warhammer 40k: Dawn of War III
+ */
+ unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
+ unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+ unsigned coord_components = nir_src_num_components(tex->src[c].src);
+ if (coord_components < needed_components) {
+ nir_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
+ nir_src_rewrite(&tex->src[c].src, def);
+ tex->coord_components = needed_components;
+ }
+ return true;
}
+ if (in->type != nir_instr_type_intrinsic)
+ return false;
+ nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
+
+ nir_intrinsic_op op;
+#define OP_SWAP(OP) \
+ case nir_intrinsic_bindless_image_##OP: \
+ op = nir_intrinsic_image_deref_##OP; \
+ break;
+
+
+ /* convert bindless intrinsics to deref intrinsics */
+ switch (instr->intrinsic) {
+ OP_SWAP(atomic)
+ OP_SWAP(atomic_swap)
+ OP_SWAP(format)
+ OP_SWAP(load)
+ OP_SWAP(order)
+ OP_SWAP(samples)
+ OP_SWAP(size)
+ OP_SWAP(store)
+ default:
+ return false;
+ }
+
+ enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
+ nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
+ if (!var)
+ var = create_bindless_image(b->shader, dim, bindless->bindless_set);
+ instr->intrinsic = op;
+ b->cursor = nir_before_instr(in);
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+ if (glsl_type_is_array(var->type))
+ deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
+ nir_src_rewrite(&instr->src[0], &deref->def);
+ return true;
+}
+
+static bool
+lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
+{
+ if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
+ return false;
nir_fixup_deref_modes(shader);
NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
- optimize_nir(shader);
+ optimize_nir(shader, NULL, true);
+ return true;
+}
- if (!ssbo_used && !ubo_used)
+/* convert shader image/texture io variables to int64 handles for bindless indexing */
+static bool
+lower_bindless_io_instr(nir_builder *b, nir_intrinsic_instr *instr,
+ void *data)
+{
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ if (!filter_io_instr(instr, &is_load, &is_input, &is_interp))
return false;
- struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
- fields[0].name = ralloc_strdup(shader, "base");
- fields[1].name = ralloc_strdup(shader, "unsized");
- if (ubo_used) {
- const struct glsl_type *ubo_type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
- fields[0].type = ubo_type;
- u_foreach_bit(slot, ubo_used) {
- char buf[64];
- snprintf(buf, sizeof(buf), "ubo_slot_%u", slot);
- nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, glsl_struct_type(fields, 1, "struct", false), buf);
- var->interface_type = var->type;
- var->data.driver_location = slot;
- }
- }
- if (ssbo_used) {
- const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4);
- const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4);
- fields[0].type = ssbo_type;
- u_foreach_bit(slot, ssbo_used) {
- char buf[64];
- snprintf(buf, sizeof(buf), "ssbo_slot_%u", slot);
- if (ssbo_sizes[slot])
- fields[1].type = unsized;
- else
- fields[1].type = NULL;
- nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
- glsl_struct_type(fields, 1 + !!ssbo_sizes[slot], "struct", false), buf);
- var->interface_type = var->type;
- var->data.driver_location = slot;
- }
- }
+ nir_variable *var = find_var_with_location_frac(b->shader, nir_intrinsic_io_semantics(instr).location, nir_intrinsic_component(instr), false, is_input ? nir_var_shader_in : nir_var_shader_out);
+ if (var->data.bindless)
+ return false;
+ if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
+ return false;
+ if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
+ return false;
+
+ var->type = glsl_vector_type(GLSL_TYPE_INT, 2);
+ var->data.bindless = 1;
return true;
}
+static bool
+lower_bindless_io(nir_shader *shader)
+{
+ return nir_shader_intrinsics_pass(shader, lower_bindless_io_instr,
+ nir_metadata_dominance, NULL);
+}
+
static uint32_t
-zink_binding(gl_shader_stage stage, VkDescriptorType type, int index)
+zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
{
if (stage == MESA_SHADER_NONE) {
unreachable("not supported");
} else {
+ unsigned base = stage;
+ /* clamp compute bindings for better driver efficiency */
+ if (gl_shader_stage_is_compute(stage))
+ base = 0;
switch (type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- assert(index < PIPE_MAX_CONSTANT_BUFFERS);
- return (stage * PIPE_MAX_CONSTANT_BUFFERS) + index;
+ return base * 2 + !!index;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ assert(stage == MESA_SHADER_KERNEL);
+ FALLTHROUGH;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ if (stage == MESA_SHADER_KERNEL) {
+ assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+ return index + PIPE_MAX_SAMPLERS;
+ }
+ FALLTHROUGH;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ assert(index < PIPE_MAX_SAMPLERS);
+ assert(stage != MESA_SHADER_KERNEL);
+ return (base * PIPE_MAX_SAMPLERS) + index;
+
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
assert(index < PIPE_MAX_SAMPLERS);
- return (stage * PIPE_MAX_SAMPLERS) + index;
+ assert(stage == MESA_SHADER_KERNEL);
+ return index;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- assert(index < PIPE_MAX_SHADER_BUFFERS);
- return (stage * PIPE_MAX_SHADER_BUFFERS) + index;
+ return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- assert(index < PIPE_MAX_SHADER_IMAGES);
- return (stage * PIPE_MAX_SHADER_IMAGES) + index;
+ assert(index < ZINK_MAX_SHADER_IMAGES);
+ if (stage == MESA_SHADER_KERNEL)
+ return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
+ return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
default:
unreachable("unexpected type");
@@ -1074,40 +4654,1542 @@ zink_binding(gl_shader_stage stage, VkDescriptorType type, int index)
}
}
+static void
+handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
+{
+ if (glsl_type_is_struct(type)) {
+ for (unsigned i = 0; i < glsl_get_length(type); i++)
+ handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
+ return;
+ }
+
+ /* just a random scalar in a struct */
+ if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
+ return;
+
+ VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
+ unsigned binding;
+ switch (vktype) {
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ binding = 0;
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ binding = 1;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ binding = 2;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ binding = 3;
+ break;
+ default:
+ unreachable("unknown");
+ }
+ if (!bindless->bindless[binding]) {
+ bindless->bindless[binding] = nir_variable_clone(var, nir);
+ bindless->bindless[binding]->data.bindless = 0;
+ bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
+ bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
+ bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
+ if (!bindless->bindless[binding]->data.image.format)
+ bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ nir_shader_add_variable(nir, bindless->bindless[binding]);
+ } else {
+ assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
+ }
+ var->data.mode = nir_var_shader_temp;
+}
+
+static bool
+convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
+{
+ struct zink_screen *screen = data;
+ if (instr->type != nir_instr_type_tex)
+ return false;
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
+ return false;
+ if (tex->is_sparse && screen->need_2D_sparse) {
+ /* no known case of this exists: only nvidia can hit it, and nothing uses it */
+ mesa_loge("unhandled/unsupported 1D sparse texture!");
+ abort();
+ }
+ tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+ b->cursor = nir_before_instr(instr);
+ tex->coord_components++;
+ unsigned srcs[] = {
+ nir_tex_src_coord,
+ nir_tex_src_offset,
+ nir_tex_src_ddx,
+ nir_tex_src_ddy,
+ };
+ for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
+ unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
+ if (c == -1)
+ continue;
+ if (tex->src[c].src.ssa->num_components == tex->coord_components)
+ continue;
+ nir_def *def;
+ nir_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
+ if (tex->src[c].src.ssa->num_components == 1)
+ def = nir_vec2(b, tex->src[c].src.ssa, zero);
+ else
+ def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
+ nir_src_rewrite(&tex->src[c].src, def);
+ }
+ b->cursor = nir_after_instr(instr);
+ unsigned needed_components = nir_tex_instr_dest_size(tex);
+ unsigned num_components = tex->def.num_components;
+ if (needed_components > num_components) {
+ tex->def.num_components = needed_components;
+ assert(num_components < 3);
+ /* take either xz or just x since this is promoted to 2D from 1D */
+ uint32_t mask = num_components == 2 ? (1|4) : 1;
+ nir_def *dst = nir_channels(b, &tex->def, mask);
+ nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
+ }
+ return true;
+}
+
+static bool
+lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
+{
+ bool found = false;
+ nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
+ const struct glsl_type *type = glsl_without_array(var->type);
+ unsigned length = glsl_get_length(var->type);
+ if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
+ continue;
+ const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
+ var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
+
+ found = true;
+ }
+ if (found)
+ nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
+ return found;
+}
+
+static void
+scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
+{
+ nir_foreach_function_impl(impl, shader) {
+ nir_foreach_block_safe(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ zs->sinfo.have_sparse |= tex->is_sparse;
+ }
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic == nir_intrinsic_image_deref_load ||
+ intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
+ intr->intrinsic == nir_intrinsic_image_deref_store ||
+ intr->intrinsic == nir_intrinsic_image_deref_atomic ||
+ intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
+ intr->intrinsic == nir_intrinsic_image_deref_size ||
+ intr->intrinsic == nir_intrinsic_image_deref_samples ||
+ intr->intrinsic == nir_intrinsic_image_deref_format ||
+ intr->intrinsic == nir_intrinsic_image_deref_order) {
+
+ nir_variable *var = nir_intrinsic_get_var(intr, 0);
+
+ /* Structs have been lowered already, so get_aoa_size is sufficient. */
+ const unsigned size =
+ glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
+ BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
+ var->data.binding + (MAX2(size, 1) - 1));
+ }
+ if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
+ zs->uses_sample = true;
+ if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
+ intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
+ zs->sinfo.have_sparse = true;
+
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ if (filter_io_instr(intr, &is_load, &is_input, &is_interp)) {
+ nir_io_semantics s = nir_intrinsic_io_semantics(intr);
+ if (io_instr_is_arrayed(intr) && s.location < VARYING_SLOT_PATCH0) {
+ if (is_input)
+ zs->arrayed_inputs |= BITFIELD64_BIT(s.location);
+ else
+ zs->arrayed_outputs |= BITFIELD64_BIT(s.location);
+ }
+ /* TODO: delete this once #10826 is fixed */
+ if (!(is_input && shader->info.stage == MESA_SHADER_VERTEX)) {
+ if (is_clipcull_dist(s.location)) {
+ unsigned frac = nir_intrinsic_component(intr) + 1;
+ if (s.location < VARYING_SLOT_CULL_DIST0) {
+ if (s.location == VARYING_SLOT_CLIP_DIST1)
+ frac += 4;
+ shader->info.clip_distance_array_size = MAX3(shader->info.clip_distance_array_size, frac, s.num_slots);
+ } else {
+ if (s.location == VARYING_SLOT_CULL_DIST1)
+ frac += 4;
+ shader->info.cull_distance_array_size = MAX3(shader->info.cull_distance_array_size, frac, s.num_slots);
+ }
+ }
+ }
+ }
+
+ static bool warned = false;
+ if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
+ switch (intr->intrinsic) {
+ case nir_intrinsic_image_deref_atomic: {
+ nir_variable *var = nir_intrinsic_get_var(intr, 0);
+ if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd &&
+ util_format_is_float(var->data.image.format))
+ fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+static bool
+match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
+{
+ if (in->type != nir_instr_type_tex)
+ return false;
+ nir_tex_instr *tex = nir_instr_as_tex(in);
+ if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
+ return false;
+ int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
+ nir_variable *var = NULL;
+ if (handle != -1) {
+ var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
+ } else {
+ nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
+ if (glsl_type_is_sampler(glsl_without_array(img->type))) {
+ unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
+ if (tex->texture_index >= img->data.driver_location &&
+ tex->texture_index < img->data.driver_location + size) {
+ var = img;
+ break;
+ }
+ }
+ }
+ }
+ return !!rewrite_tex_dest(b, tex, var, data);
+}
+
+static bool
+match_tex_dests(nir_shader *shader, struct zink_shader *zs)
+{
+ return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs);
+}
+
+static bool
+split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
+{
+ if (in->type != nir_instr_type_alu)
+ return false;
+ nir_alu_instr *alu = nir_instr_as_alu(in);
+ switch (alu->op) {
+ case nir_op_ubitfield_extract:
+ case nir_op_ibitfield_extract:
+ case nir_op_bitfield_insert:
+ break;
+ default:
+ return false;
+ }
+ unsigned num_components = alu->def.num_components;
+ if (num_components == 1)
+ return false;
+ b->cursor = nir_before_instr(in);
+ nir_def *dests[NIR_MAX_VEC_COMPONENTS];
+ for (unsigned i = 0; i < num_components; i++) {
+ if (alu->op == nir_op_bitfield_insert)
+ dests[i] = nir_bitfield_insert(b,
+ nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
+ nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
+ nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
+ nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
+ else if (alu->op == nir_op_ubitfield_extract)
+ dests[i] = nir_ubitfield_extract(b,
+ nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
+ nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
+ nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
+ else
+ dests[i] = nir_ibitfield_extract(b,
+ nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
+ nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
+ nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
+ }
+ nir_def *dest = nir_vec(b, dests, num_components);
+ nir_def_rewrite_uses_after(&alu->def, dest, in);
+ nir_instr_remove(in);
+ return true;
+}
+
+
+static bool
+split_bitfields(nir_shader *shader)
+{
+ return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
+}
+
+static bool
+strip_tex_ms_instr(nir_builder *b, nir_instr *in, void *data)
+{
+ if (in->type != nir_instr_type_intrinsic)
+ return false;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(in);
+ switch (intr->intrinsic) {
+ case nir_intrinsic_image_deref_samples:
+ b->cursor = nir_before_instr(in);
+ nir_def_rewrite_uses_after(&intr->def, nir_imm_zero(b, 1, intr->def.bit_size), in);
+ nir_instr_remove(in);
+ break;
+ case nir_intrinsic_image_deref_store:
+ case nir_intrinsic_image_deref_load:
+ break;
+ default:
+ return false;
+ }
+ enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
+ if (dim != GLSL_SAMPLER_DIM_MS)
+ return false;
+
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ nir_deref_instr *parent = nir_deref_instr_parent(deref);
+ if (parent) {
+ parent->type = var->type;
+ deref->type = glsl_without_array(var->type);
+ } else {
+ deref->type = var->type;
+ }
+ nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
+ return true;
+}
+
+
+static bool
+strip_tex_ms(nir_shader *shader)
+{
+ bool progress = false;
+ nir_foreach_image_variable(var, shader) {
+ const struct glsl_type *bare_type = glsl_without_array(var->type);
+ if (glsl_get_sampler_dim(bare_type) != GLSL_SAMPLER_DIM_MS)
+ continue;
+ unsigned array_size = 0;
+ if (glsl_type_is_array(var->type))
+ array_size = glsl_array_size(var->type);
+
+ const struct glsl_type *new_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, glsl_sampler_type_is_array(bare_type), glsl_get_sampler_result_type(bare_type));
+ if (array_size)
+ new_type = glsl_array_type(new_type, array_size, glsl_get_explicit_stride(var->type));
+ var->type = new_type;
+ progress = true;
+ }
+ if (!progress)
+ return false;
+ return nir_shader_instructions_pass(shader, strip_tex_ms_instr, nir_metadata_all, NULL);
+}
+
+static void
+rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
+{
+ nir_foreach_function_impl(impl, nir) {
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_deref)
+ continue;
+ nir_deref_instr *deref = nir_instr_as_deref(instr);
+ nir_variable *img = nir_deref_instr_get_variable(deref);
+ if (img != var)
+ continue;
+ if (glsl_type_is_array(var->type)) {
+ if (deref->deref_type == nir_deref_type_array)
+ deref->type = glsl_without_array(var->type);
+ else
+ deref->type = var->type;
+ } else {
+ deref->type = var->type;
+ }
+ }
+ }
+ }
+}
+
+static void
+type_image(nir_shader *nir, nir_variable *var)
+{
+ nir_foreach_function_impl(impl, nir) {
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic == nir_intrinsic_image_deref_load ||
+ intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
+ intr->intrinsic == nir_intrinsic_image_deref_store ||
+ intr->intrinsic == nir_intrinsic_image_deref_atomic ||
+ intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
+ intr->intrinsic == nir_intrinsic_image_deref_samples ||
+ intr->intrinsic == nir_intrinsic_image_deref_format ||
+ intr->intrinsic == nir_intrinsic_image_deref_order) {
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ nir_variable *img = nir_deref_instr_get_variable(deref);
+ if (img != var)
+ continue;
+ nir_alu_type alu_type = nir_intrinsic_src_type(intr);
+ const struct glsl_type *type = glsl_without_array(var->type);
+ if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
+ assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
+ continue;
+ }
+ const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
+ if (glsl_type_is_array(var->type))
+ img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
+ var->type = img_type;
+ rewrite_cl_derefs(nir, var);
+ return;
+ }
+ }
+ }
+ }
+ nir_foreach_function_impl(impl, nir) {
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_image_deref_size)
+ continue;
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ nir_variable *img = nir_deref_instr_get_variable(deref);
+ if (img != var)
+ continue;
+ nir_alu_type alu_type = nir_type_uint32;
+ const struct glsl_type *type = glsl_without_array(var->type);
+ if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
+ continue;
+ }
+ const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
+ if (glsl_type_is_array(var->type))
+ img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
+ var->type = img_type;
+ rewrite_cl_derefs(nir, var);
+ return;
+ }
+ }
+ }
+ var->data.mode = nir_var_shader_temp;
+}
+
+static bool
+type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
+{
+ bool progress = false;
+ nir_foreach_function_impl(impl, nir) {
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (nir_tex_instr_need_sampler(tex))
+ *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
+ nir_variable *var = nir_find_sampler_variable_with_tex_index(nir, tex->texture_index);
+ assert(var);
+ if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID &&
+ nir_tex_instr_is_query(tex))
+ continue;
+ const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
+ unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
+ if (size > 1)
+ img_type = glsl_array_type(img_type, size, 0);
+ var->type = img_type;
+ progress = true;
+ }
+ }
+ }
+ return progress;
+}
+
+static bool
+delete_samplers(nir_shader *nir)
+{
+ bool progress = false;
+ nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
+ if (glsl_type_is_sampler(glsl_without_array(var->type))) {
+ var->data.mode = nir_var_shader_temp;
+ progress = true;
+ }
+ }
+ return progress;
+}
+
+static bool
+type_images(nir_shader *nir, unsigned *sampler_mask)
+{
+ bool progress = false;
+ progress |= delete_samplers(nir);
+ progress |= type_sampler_vars(nir, sampler_mask);
+ nir_foreach_variable_with_modes(var, nir, nir_var_image) {
+ type_image(nir, var);
+ progress = true;
+ }
+ return progress;
+}
+
+/* attempt to assign io for separate shaders */
+static bool
+fixup_io_locations(nir_shader *nir)
+{
+ nir_variable_mode modes;
+ if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX)
+ modes = nir_var_shader_in | nir_var_shader_out;
+ else
+ modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
+ u_foreach_bit(mode, modes) {
+ nir_variable_mode m = BITFIELD_BIT(mode);
+ if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) ||
+ (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) {
+ /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules:
+ * - i/o interface blocks don't need to match
+ * - any location can be present or not
+ * - it just has to work
+ *
+ * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS
+ * since it's a builtin and yolo it with all the other legacy crap
+ */
+ nir_foreach_variable_with_modes(var, nir, m) {
+ if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE))
+ continue;
+ if (var->data.location == VARYING_SLOT_VAR0)
+ var->data.driver_location = 0;
+ else if (var->data.patch)
+ var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
+ else
+ var->data.driver_location = var->data.location;
+ }
+ continue;
+ }
+ /* i/o interface blocks are required to be EXACT matches between stages:
+ * iterate over all locations and set locations incrementally
+ */
+ unsigned slot = 0;
+ for (unsigned i = 0; i < VARYING_SLOT_TESS_MAX; i++) {
+ if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
+ continue;
+ bool found = false;
+ unsigned size = 0;
+ nir_foreach_variable_with_modes(var, nir, m) {
+ if (var->data.location != i)
+ continue;
+ /* only add slots for non-component vars or first-time component vars */
+ if (!var->data.location_frac || !size) {
+ /* ensure variable is given enough slots */
+ if (nir_is_arrayed_io(var, nir->info.stage))
+ size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
+ else
+ size += glsl_count_vec4_slots(var->type, false, false);
+ }
+ if (var->data.patch)
+ var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
+ else
+ var->data.driver_location = slot;
+ found = true;
+ }
+ slot += size;
+ if (found) {
+ /* ensure the consumed slots aren't double iterated */
+ i += size - 1;
+ } else {
+ /* locations used between stages are not required to be contiguous */
+ if (i >= VARYING_SLOT_VAR0)
+ slot++;
+ }
+ }
+ }
+ return true;
+}
+
+static uint64_t
+zink_flat_flags(struct nir_shader *shader)
+{
+ uint64_t flat_flags = 0;
+ nir_foreach_shader_in_variable(var, shader) {
+ if (var->data.interpolation == INTERP_MODE_FLAT)
+ flat_flags |= BITFIELD64_BIT(var->data.location);
+ }
+
+ return flat_flags;
+}
+
+struct rework_io_state {
+ /* these are search criteria */
+ bool indirect_only;
+ unsigned location;
+ nir_variable_mode mode;
+ gl_shader_stage stage;
+ nir_shader *nir;
+ const char *name;
+
+ /* these are found by scanning */
+ bool arrayed_io;
+ bool medium_precision;
+ bool fb_fetch_output;
+ bool dual_source_blend_index;
+ uint32_t component_mask;
+ uint32_t ignored_component_mask;
+ unsigned array_size;
+ unsigned bit_size;
+ unsigned base;
+ nir_alu_type type;
+ /* must be last */
+ char *newname;
+};
+
+/* match an existing variable against the rework state */
+static nir_variable *
+find_rework_var(nir_shader *nir, struct rework_io_state *ris)
+{
+ nir_foreach_variable_with_modes(var, nir, ris->mode) {
+ const struct glsl_type *type = var->type;
+ if (nir_is_arrayed_io(var, nir->info.stage))
+ type = glsl_get_array_element(type);
+ if (var->data.fb_fetch_output != ris->fb_fetch_output)
+ continue;
+ if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out && ris->dual_source_blend_index != var->data.index)
+ continue;
+ unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false);
+ if (var->data.location > ris->location + ris->array_size || var->data.location + num_slots <= ris->location)
+ continue;
+ unsigned num_components = glsl_get_vector_elements(glsl_without_array(type));
+ assert(!glsl_type_contains_64bit(type));
+ uint32_t component_mask = ris->component_mask ? ris->component_mask : BITFIELD_MASK(4);
+ if (BITFIELD_RANGE(var->data.location_frac, num_components) & component_mask)
+ return var;
+ }
+ return NULL;
+}
+
+static void
+update_io_var_name(struct rework_io_state *ris, const char *name)
+{
+ if (!(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV)))
+ return;
+ if (!name)
+ return;
+ if (ris->name && !strcmp(ris->name, name))
+ return;
+ if (ris->newname && !strcmp(ris->newname, name))
+ return;
+ if (ris->newname) {
+ ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->newname, name);
+ } else if (ris->name) {
+ ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->name, name);
+ } else {
+ ris->newname = ralloc_strdup(ris->nir, name);
+ }
+}
+
+/* check/update tracking state for variable info */
+static void
+update_io_var_state(nir_intrinsic_instr *intr, struct rework_io_state *ris)
+{
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ filter_io_instr(intr, &is_load, &is_input, &is_interp);
+ nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+ unsigned frac = nir_intrinsic_component(intr);
+ /* the mask of components for the instruction */
+ uint32_t cmask = is_load ? BITFIELD_RANGE(frac, intr->num_components) : (nir_intrinsic_write_mask(intr) << frac);
+
+ /* always check for existing variables first */
+ struct rework_io_state test = {
+ .location = ris->location,
+ .mode = ris->mode,
+ .stage = ris->stage,
+ .arrayed_io = io_instr_is_arrayed(intr),
+ .medium_precision = sem.medium_precision,
+ .fb_fetch_output = sem.fb_fetch_output,
+ .dual_source_blend_index = sem.dual_source_blend_index,
+ .component_mask = cmask,
+ .array_size = sem.num_slots > 1 ? sem.num_slots : 0,
+ };
+ if (find_rework_var(ris->nir, &test))
+ return;
+
+ /* filter ignored components to scan later:
+ * - ignore no-overlapping-components case
+ * - always match fbfetch and dual src blend
+ */
+ if (ris->component_mask &&
+ (!(ris->component_mask & cmask) || ris->fb_fetch_output != sem.fb_fetch_output || ris->dual_source_blend_index != sem.dual_source_blend_index)) {
+ ris->ignored_component_mask |= cmask;
+ return;
+ }
+
+ assert(!ris->indirect_only || sem.num_slots > 1);
+ if (sem.num_slots > 1)
+ ris->array_size = MAX2(ris->array_size, sem.num_slots);
+
+ assert(!ris->component_mask || ris->arrayed_io == io_instr_is_arrayed(intr));
+ ris->arrayed_io = io_instr_is_arrayed(intr);
+
+ ris->component_mask |= cmask;
+
+ unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
+ assert(!ris->bit_size || ris->bit_size == bit_size);
+ ris->bit_size = bit_size;
+
+ nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
+ if (ris->type) {
+ /* in the case of clashing types, this heuristic guarantees some semblance of a match */
+ if (ris->type & nir_type_float || type & nir_type_float) {
+ ris->type = nir_type_float | bit_size;
+ } else if (ris->type & nir_type_int || type & nir_type_int) {
+ ris->type = nir_type_int | bit_size;
+ } else if (ris->type & nir_type_uint || type & nir_type_uint) {
+ ris->type = nir_type_uint | bit_size;
+ } else {
+ assert(bit_size == 1);
+ ris->type = nir_type_bool;
+ }
+ } else {
+ ris->type = type;
+ }
+
+ update_io_var_name(ris, intr->name);
+
+ ris->medium_precision |= sem.medium_precision;
+ ris->fb_fetch_output |= sem.fb_fetch_output;
+ ris->dual_source_blend_index |= sem.dual_source_blend_index;
+ if (ris->stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
+ ris->base = nir_intrinsic_base(intr);
+}
+
+/* instruction-level scanning for variable data */
+static bool
+scan_io_var_usage(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ struct rework_io_state *ris = data;
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ /* mode-based filtering */
+ if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+ return false;
+ if (ris->mode == nir_var_shader_in) {
+ if (!is_input)
+ return false;
+ } else {
+ if (is_input)
+ return false;
+ }
+ /* location-based filtering */
+ nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+ if (sem.location != ris->location && (ris->location > sem.location || ris->location + ris->array_size <= sem.location))
+ return false;
+
+ /* only scan indirect i/o when indirect_only is set */
+ nir_src *src_offset = nir_get_io_offset_src(intr);
+ if (!nir_src_is_const(*src_offset)) {
+ if (!ris->indirect_only)
+ return false;
+ update_io_var_state(intr, ris);
+ return false;
+ }
+
+ /* don't scan direct i/o when indirect_only is set */
+ if (ris->indirect_only)
+ return false;
+
+ update_io_var_state(intr, ris);
+ return false;
+}
+
+/* scan a given i/o slot for state info */
+static struct rework_io_state
+scan_io_var_slot(nir_shader *nir, nir_variable_mode mode, unsigned location, bool scan_indirects)
+{
+ struct rework_io_state ris = {
+ .location = location,
+ .mode = mode,
+ .stage = nir->info.stage,
+ .nir = nir,
+ };
+
+ struct rework_io_state test;
+ do {
+ update_io_var_name(&test, ris.newname ? ris.newname : ris.name);
+ test = ris;
+ /* always run indirect scan first to detect potential overlaps */
+ if (scan_indirects) {
+ ris.indirect_only = true;
+ nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
+ }
+ ris.indirect_only = false;
+ nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
+ /* keep scanning until no changes found */
+ } while (memcmp(&ris, &test, offsetof(struct rework_io_state, newname)));
+ return ris;
+}
+
+/* create a variable using explicit/scan info */
+static void
+create_io_var(nir_shader *nir, struct rework_io_state *ris)
+{
+ char name[1024];
+ assert(ris->component_mask);
+ if (ris->newname || ris->name) {
+ snprintf(name, sizeof(name), "%s", ris->newname ? ris->newname : ris->name);
+ /* always use builtin name where possible */
+ } else if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) {
+ snprintf(name, sizeof(name), "%s", gl_vert_attrib_name(ris->location));
+ } else if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out) {
+ snprintf(name, sizeof(name), "%s", gl_frag_result_name(ris->location));
+ } else if (nir_slot_is_sysval_output(ris->location, nir->info.stage)) {
+ snprintf(name, sizeof(name), "%s", gl_varying_slot_name_for_stage(ris->location, nir->info.stage));
+ } else {
+ int c = ffs(ris->component_mask) - 1;
+ if (c)
+ snprintf(name, sizeof(name), "slot_%u_c%u", ris->location, c);
+ else
+ snprintf(name, sizeof(name), "slot_%u", ris->location);
+ }
+ /* calculate vec/array type */
+ int frac = ffs(ris->component_mask) - 1;
+ int num_components = util_last_bit(ris->component_mask) - frac;
+ assert(ris->component_mask == BITFIELD_RANGE(frac, num_components));
+ const struct glsl_type *vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(ris->type), num_components);
+ if (ris->array_size)
+ vec_type = glsl_array_type(vec_type, ris->array_size, glsl_get_explicit_stride(vec_type));
+ if (ris->arrayed_io) {
+ /* tess size may be unknown with generated tcs */
+ unsigned arrayed = nir->info.stage == MESA_SHADER_GEOMETRY ?
+ nir->info.gs.vertices_in : 32 /* MAX_PATCH_VERTICES */;
+ vec_type = glsl_array_type(vec_type, arrayed, glsl_get_explicit_stride(vec_type));
+ }
+ nir_variable *var = nir_variable_create(nir, ris->mode, vec_type, name);
+ var->data.location_frac = frac;
+ var->data.location = ris->location;
+ /* gallium vertex inputs use intrinsic 'base' indexing */
+ if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
+ var->data.driver_location = ris->base;
+ var->data.patch = ris->location >= VARYING_SLOT_PATCH0 ||
+ ((nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) &&
+ (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER));
+ /* set flat by default: add_derefs will fill this in later after more shader passes */
+ if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_in)
+ var->data.interpolation = INTERP_MODE_FLAT;
+ var->data.fb_fetch_output = ris->fb_fetch_output;
+ var->data.index = ris->dual_source_blend_index;
+ var->data.precision = ris->medium_precision;
+ /* only clip/cull dist and tess levels are compact */
+ if (nir->info.stage != MESA_SHADER_VERTEX || ris->mode != nir_var_shader_in)
+ var->data.compact = is_clipcull_dist(ris->location) || (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER);
+}
+
+/* loop the i/o mask and generate variables for specified locations */
+static void
+loop_io_var_mask(nir_shader *nir, nir_variable_mode mode, bool indirect, bool patch, uint64_t mask)
+{
+ bool is_vertex_input = nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in;
+ u_foreach_bit64(slot, mask) {
+ if (patch)
+ slot += VARYING_SLOT_PATCH0;
+
+ /* this should've been handled explicitly */
+ assert(is_vertex_input || !is_clipcull_dist(slot));
+
+ unsigned remaining = 0;
+ do {
+ /* scan the slot for usage */
+ struct rework_io_state ris = scan_io_var_slot(nir, mode, slot, indirect);
+ /* one of these must be true or things have gone very wrong */
+ assert(indirect || ris.component_mask || find_rework_var(nir, &ris) || remaining);
+ /* release builds only */
+ if (!ris.component_mask)
+ break;
+
+ /* whatever reaches this point is either enough info to create a variable or an existing variable */
+ if (!find_rework_var(nir, &ris))
+ create_io_var(nir, &ris);
+ /* scanning may detect multiple potential variables per location at component offsets: process again */
+ remaining = ris.ignored_component_mask;
+ } while (remaining);
+ }
+}
+
+/* for a given mode, generate variables */
+static void
+rework_io_vars(nir_shader *nir, nir_variable_mode mode, struct zink_shader *zs)
+{
+ assert(mode == nir_var_shader_out || mode == nir_var_shader_in);
+ assert(util_bitcount(mode) == 1);
+ bool found = false;
+ /* if no i/o, skip */
+ if (mode == nir_var_shader_out)
+ found = nir->info.outputs_written || nir->info.outputs_read || nir->info.patch_outputs_written || nir->info.patch_outputs_read;
+ else
+ found = nir->info.inputs_read || nir->info.patch_inputs_read;
+ if (!found)
+ return;
+
+ /* use local copies to enable incremental processing */
+ uint64_t inputs_read = nir->info.inputs_read;
+ uint64_t inputs_read_indirectly = nir->info.inputs_read_indirectly;
+ uint64_t outputs_accessed = nir->info.outputs_written | nir->info.outputs_read;
+ uint64_t outputs_accessed_indirectly = nir->info.outputs_accessed_indirectly;
+
+ /* fragment outputs are special: handle separately */
+ if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) {
+ assert(!outputs_accessed_indirectly);
+ u_foreach_bit64(slot, outputs_accessed) {
+ struct rework_io_state ris = {
+ .location = slot,
+ .mode = mode,
+ .stage = nir->info.stage,
+ };
+ /* explicitly handle builtins */
+ switch (slot) {
+ case FRAG_RESULT_DEPTH:
+ case FRAG_RESULT_STENCIL:
+ case FRAG_RESULT_SAMPLE_MASK:
+ ris.bit_size = 32;
+ ris.component_mask = 0x1;
+ ris.type = slot == FRAG_RESULT_DEPTH ? nir_type_float32 : nir_type_uint32;
+ create_io_var(nir, &ris);
+ outputs_accessed &= ~BITFIELD64_BIT(slot);
+ break;
+ default:
+ break;
+ }
+ }
+ /* the rest of the outputs can be generated normally */
+ loop_io_var_mask(nir, mode, false, false, outputs_accessed);
+ return;
+ }
+
+ /* vertex inputs are special: handle separately */
+ if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
+ assert(!inputs_read_indirectly);
+ u_foreach_bit64(slot, inputs_read) {
+ /* explicitly handle builtins */
+ if (slot != VERT_ATTRIB_POS && slot != VERT_ATTRIB_POINT_SIZE)
+ continue;
+
+ uint32_t component_mask = slot == VERT_ATTRIB_POINT_SIZE ? 0x1 : 0xf;
+ struct rework_io_state ris = {
+ .location = slot,
+ .mode = mode,
+ .stage = nir->info.stage,
+ .bit_size = 32,
+ .component_mask = component_mask,
+ .type = nir_type_float32,
+ .newname = scan_io_var_slot(nir, nir_var_shader_in, slot, false).newname,
+ };
+ create_io_var(nir, &ris);
+ inputs_read &= ~BITFIELD64_BIT(slot);
+ }
+ /* the rest of the inputs can be generated normally */
+ loop_io_var_mask(nir, mode, false, false, inputs_read);
+ return;
+ }
+
+ /* these are the masks to process based on the mode: nothing "special" as above */
+ uint64_t mask = mode == nir_var_shader_in ? inputs_read : outputs_accessed;
+ uint64_t indirect_mask = mode == nir_var_shader_in ? inputs_read_indirectly : outputs_accessed_indirectly;
+ u_foreach_bit64(slot, mask) {
+ struct rework_io_state ris = {
+ .location = slot,
+ .mode = mode,
+ .stage = nir->info.stage,
+ .arrayed_io = (mode == nir_var_shader_in ? zs->arrayed_inputs : zs->arrayed_outputs) & BITFIELD64_BIT(slot),
+ };
+ /* explicitly handle builtins */
+ unsigned max_components = 0;
+ switch (slot) {
+ case VARYING_SLOT_FOGC:
+ /* use intr components */
+ break;
+ case VARYING_SLOT_POS:
+ case VARYING_SLOT_CLIP_VERTEX:
+ case VARYING_SLOT_PNTC:
+ case VARYING_SLOT_BOUNDING_BOX0:
+ case VARYING_SLOT_BOUNDING_BOX1:
+ max_components = 4;
+ ris.type = nir_type_float32;
+ break;
+ case VARYING_SLOT_CLIP_DIST0:
+ max_components = nir->info.clip_distance_array_size;
+ assert(max_components);
+ ris.type = nir_type_float32;
+ break;
+ case VARYING_SLOT_CULL_DIST0:
+ max_components = nir->info.cull_distance_array_size;
+ assert(max_components);
+ ris.type = nir_type_float32;
+ break;
+ case VARYING_SLOT_CLIP_DIST1:
+ case VARYING_SLOT_CULL_DIST1:
+ mask &= ~BITFIELD64_BIT(slot);
+ indirect_mask &= ~BITFIELD64_BIT(slot);
+ continue;
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ max_components = 4;
+ ris.type = nir_type_float32;
+ break;
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ max_components = 2;
+ ris.type = nir_type_float32;
+ break;
+ case VARYING_SLOT_PRIMITIVE_ID:
+ case VARYING_SLOT_LAYER:
+ case VARYING_SLOT_VIEWPORT:
+ case VARYING_SLOT_FACE:
+ case VARYING_SLOT_VIEW_INDEX:
+ case VARYING_SLOT_VIEWPORT_MASK:
+ ris.type = nir_type_int32;
+ max_components = 1;
+ break;
+ case VARYING_SLOT_PSIZ:
+ max_components = 1;
+ ris.type = nir_type_float32;
+ break;
+ default:
+ break;
+ }
+ if (!max_components)
+ continue;
+ switch (slot) {
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ case VARYING_SLOT_CULL_DIST0:
+ case VARYING_SLOT_CULL_DIST1:
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ /* compact arrays */
+ ris.component_mask = 0x1;
+ ris.array_size = max_components;
+ break;
+ default:
+ ris.component_mask = BITFIELD_MASK(max_components);
+ break;
+ }
+ ris.bit_size = 32;
+ create_io_var(nir, &ris);
+ mask &= ~BITFIELD64_BIT(slot);
+ /* eliminate clip/cull distance scanning early */
+ indirect_mask &= ~BITFIELD64_BIT(slot);
+ }
+
+ /* patch i/o */
+ if ((nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out) ||
+ (nir->info.stage == MESA_SHADER_TESS_EVAL && mode == nir_var_shader_in)) {
+ uint64_t patch_outputs_accessed = nir->info.patch_outputs_read | nir->info.patch_outputs_written;
+ uint64_t indirect_patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read_indirectly : nir->info.patch_outputs_accessed_indirectly;
+ uint64_t patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read : patch_outputs_accessed;
+
+ loop_io_var_mask(nir, mode, true, true, indirect_patch_mask);
+ loop_io_var_mask(nir, mode, false, true, patch_mask);
+ }
+
+ /* regular i/o */
+ loop_io_var_mask(nir, mode, true, false, indirect_mask);
+ loop_io_var_mask(nir, mode, false, false, mask);
+}
+
+static int
+zink_type_size(const struct glsl_type *type, bool bindless)
+{
+ return glsl_count_attribute_slots(type, false);
+}
+
+static nir_mem_access_size_align
+mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
+ uint8_t bit_size, uint32_t align,
+ uint32_t align_offset, bool offset_is_const,
+ const void *cb_data)
+{
+ align = nir_combined_align(align, align_offset);
+
+ assert(util_is_power_of_two_nonzero(align));
+
+ /* simply drop the bit_size for unaligned load/stores */
+ if (align < (bit_size / 8)) {
+ return (nir_mem_access_size_align){
+ .num_components = MIN2(bytes / align, 4),
+ .bit_size = align * 8,
+ .align = align,
+ };
+ } else {
+ return (nir_mem_access_size_align){
+ .num_components = MIN2(bytes / (bit_size / 8), 4),
+ .bit_size = bit_size,
+ .align = bit_size / 8,
+ };
+ }
+}
+
+static nir_mem_access_size_align
+mem_access_scratch_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
+ uint8_t bit_size, uint32_t align,
+ uint32_t align_offset, bool offset_is_const,
+ const void *cb_data)
+{
+ bit_size = *(const uint8_t *)cb_data;
+ align = nir_combined_align(align, align_offset);
+
+ assert(util_is_power_of_two_nonzero(align));
+
+ return (nir_mem_access_size_align){
+ .num_components = MIN2(bytes / (bit_size / 8), 4),
+ .bit_size = bit_size,
+ .align = bit_size / 8,
+ };
+}
+
+static bool
+alias_scratch_memory_scan_bit_size(struct nir_builder *b, nir_intrinsic_instr *instr, void *data)
+{
+ uint8_t *bit_size = data;
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_scratch:
+ *bit_size = MIN2(*bit_size, instr->def.bit_size);
+ return false;
+ case nir_intrinsic_store_scratch:
+ *bit_size = MIN2(*bit_size, instr->src[0].ssa->bit_size);
+ return false;
+ default:
+ return false;
+ }
+}
+
+static bool
+alias_scratch_memory(nir_shader *nir)
+{
+ uint8_t bit_size = 64;
+
+ nir_shader_intrinsics_pass(nir, alias_scratch_memory_scan_bit_size, nir_metadata_all, &bit_size);
+ nir_lower_mem_access_bit_sizes_options lower_scratch_mem_access_options = {
+ .modes = nir_var_function_temp,
+ .may_lower_unaligned_stores_to_atomics = true,
+ .callback = mem_access_scratch_size_align_cb,
+ .cb_data = &bit_size,
+ };
+ return nir_lower_mem_access_bit_sizes(nir, &lower_scratch_mem_access_options);
+}
+
+static uint8_t
+lower_vec816_alu(const nir_instr *instr, const void *cb_data)
+{
+ return 4;
+}
+
+static bool
+fix_vertex_input_locations_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ if (!filter_io_instr(intr, &is_load, &is_input, &is_interp) || !is_input)
+ return false;
+
+ nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+ if (sem.location < VERT_ATTRIB_GENERIC0)
+ return false;
+ sem.location = VERT_ATTRIB_GENERIC0 + nir_intrinsic_base(intr);
+ nir_intrinsic_set_io_semantics(intr, sem);
+ return true;
+}
+
+static bool
+fix_vertex_input_locations(nir_shader *nir)
+{
+ if (nir->info.stage != MESA_SHADER_VERTEX)
+ return false;
+
+ return nir_shader_intrinsics_pass(nir, fix_vertex_input_locations_instr, nir_metadata_all, NULL);
+}
+
+struct trivial_revectorize_state {
+ bool has_xfb;
+ uint32_t component_mask;
+ nir_intrinsic_instr *base;
+ nir_intrinsic_instr *next_emit_vertex;
+ nir_intrinsic_instr *merge[NIR_MAX_VEC_COMPONENTS];
+ struct set *deletions;
+};
+
+/* always skip xfb; scalarized xfb is preferred */
+static bool
+intr_has_xfb(nir_intrinsic_instr *intr)
+{
+ if (!nir_intrinsic_has_io_xfb(intr))
+ return false;
+ for (unsigned i = 0; i < 2; i++) {
+ if (nir_intrinsic_io_xfb(intr).out[i].num_components || nir_intrinsic_io_xfb2(intr).out[i].num_components) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/* helper to avoid vectorizing i/o for different vertices */
+static nir_intrinsic_instr *
+find_next_emit_vertex(nir_intrinsic_instr *intr)
+{
+ bool found = false;
+ nir_foreach_instr_safe(instr, intr->instr.block) {
+ if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
+ if (!found && test_intr != intr)
+ continue;
+ if (!found) {
+ assert(intr == test_intr);
+ found = true;
+ continue;
+ }
+ if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
+ return test_intr;
+ }
+ }
+ return NULL;
+}
+
+/* scan for vectorizable instrs on a given location */
+static bool
+trivial_revectorize_intr_scan(nir_shader *nir, nir_intrinsic_instr *intr, struct trivial_revectorize_state *state)
+{
+ nir_intrinsic_instr *base = state->base;
+
+ if (intr == base)
+ return false;
+
+ if (intr->intrinsic != base->intrinsic)
+ return false;
+
+ if (_mesa_set_search(state->deletions, intr))
+ return false;
+
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ filter_io_instr(intr, &is_load, &is_input, &is_interp);
+
+ nir_io_semantics base_sem = nir_intrinsic_io_semantics(base);
+ nir_io_semantics test_sem = nir_intrinsic_io_semantics(intr);
+ nir_alu_type base_type = is_load ? nir_intrinsic_dest_type(base) : nir_intrinsic_src_type(base);
+ nir_alu_type test_type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
+ int c = nir_intrinsic_component(intr);
+ /* already detected */
+ if (state->component_mask & BITFIELD_BIT(c))
+ return false;
+ /* not a match */
+ if (base_sem.location != test_sem.location || base_sem.num_slots != test_sem.num_slots || base_type != test_type)
+ return false;
+ /* only vectorize when all srcs match */
+ for (unsigned i = !is_input; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++) {
+ if (!nir_srcs_equal(intr->src[i], base->src[i]))
+ return false;
+ }
+ /* never match xfb */
+ state->has_xfb |= intr_has_xfb(intr);
+ if (state->has_xfb)
+ return false;
+ if (nir->info.stage == MESA_SHADER_GEOMETRY) {
+ /* only match same vertex */
+ if (state->next_emit_vertex != find_next_emit_vertex(intr))
+ return false;
+ }
+ uint32_t mask = is_load ? BITFIELD_RANGE(c, intr->num_components) : (nir_intrinsic_write_mask(intr) << c);
+ state->component_mask |= mask;
+ u_foreach_bit(component, mask)
+ state->merge[component] = intr;
+
+ return true;
+}
+
+static bool
+trivial_revectorize_scan(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ bool is_load = false;
+ bool is_input = false;
+ bool is_interp = false;
+ if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+ return false;
+ if (intr->num_components != 1)
+ return false;
+ nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+ if (!is_input || b->shader->info.stage != MESA_SHADER_VERTEX) {
+ /* always ignore compact arrays */
+ switch (sem.location) {
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ case VARYING_SLOT_CULL_DIST0:
+ case VARYING_SLOT_CULL_DIST1:
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ return false;
+ default: break;
+ }
+ }
+ /* always ignore to-be-deleted instrs */
+ if (_mesa_set_search(data, intr))
+ return false;
+
+ /* never vectorize xfb */
+ if (intr_has_xfb(intr))
+ return false;
+
+ int ic = nir_intrinsic_component(intr);
+ uint32_t mask = is_load ? BITFIELD_RANGE(ic, intr->num_components) : (nir_intrinsic_write_mask(intr) << ic);
+ /* already vectorized */
+ if (util_bitcount(mask) == 4)
+ return false;
+ struct trivial_revectorize_state state = {
+ .component_mask = mask,
+ .base = intr,
+ /* avoid clobbering i/o for different vertices */
+ .next_emit_vertex = b->shader->info.stage == MESA_SHADER_GEOMETRY ? find_next_emit_vertex(intr) : NULL,
+ .deletions = data,
+ };
+ u_foreach_bit(bit, mask)
+ state.merge[bit] = intr;
+ bool progress = false;
+ nir_foreach_instr(instr, intr->instr.block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
+ /* no matching across vertex emission */
+ if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
+ break;
+ progress |= trivial_revectorize_intr_scan(b->shader, test_intr, &state);
+ }
+ if (!progress || state.has_xfb)
+ return false;
+
+ /* verify nothing crazy happened */
+ assert(state.component_mask);
+ for (unsigned i = 0; i < 4; i++) {
+ assert(!state.merge[i] || !intr_has_xfb(state.merge[i]));
+ }
+
+ unsigned first_component = ffs(state.component_mask) - 1;
+ unsigned num_components = util_bitcount(state.component_mask);
+ unsigned num_contiguous = 0;
+ uint32_t contiguous_mask = 0;
+ for (unsigned i = 0; i < num_components; i++) {
+ unsigned c = i + first_component;
+ /* calc mask of contiguous components to vectorize */
+ if (state.component_mask & BITFIELD_BIT(c)) {
+ num_contiguous++;
+ contiguous_mask |= BITFIELD_BIT(c);
+ }
+ /* on the first gap or the the last component, vectorize */
+ if (!(state.component_mask & BITFIELD_BIT(c)) || i == num_components - 1) {
+ if (num_contiguous > 1) {
+ /* reindex to enable easy src/dest index comparison */
+ nir_index_ssa_defs(nir_shader_get_entrypoint(b->shader));
+ /* determine the first/last instr to use for the base (vectorized) load/store */
+ unsigned first_c = ffs(contiguous_mask) - 1;
+ nir_intrinsic_instr *base = NULL;
+ unsigned test_idx = is_load ? UINT32_MAX : 0;
+ for (unsigned j = 0; j < num_contiguous; j++) {
+ unsigned merge_c = j + first_c;
+ nir_intrinsic_instr *merge_intr = state.merge[merge_c];
+ /* avoid breaking ssa ordering by using:
+ * - first instr for vectorized load
+ * - last instr for vectorized store
+ * this guarantees all srcs have been seen
+ */
+ if ((is_load && merge_intr->def.index < test_idx) ||
+ (!is_load && merge_intr->src[0].ssa->index >= test_idx)) {
+ test_idx = is_load ? merge_intr->def.index : merge_intr->src[0].ssa->index;
+ base = merge_intr;
+ }
+ }
+ assert(base);
+ /* update instr components */
+ nir_intrinsic_set_component(base, nir_intrinsic_component(state.merge[first_c]));
+ unsigned orig_components = base->num_components;
+ base->num_components = num_contiguous;
+ /* do rewrites after loads and before stores */
+ b->cursor = is_load ? nir_after_instr(&base->instr) : nir_before_instr(&base->instr);
+ if (is_load) {
+ base->def.num_components = num_contiguous;
+ /* iterate the contiguous loaded components and rewrite merged dests */
+ for (unsigned j = 0; j < num_contiguous; j++) {
+ unsigned merge_c = j + first_c;
+ nir_intrinsic_instr *merge_intr = state.merge[merge_c];
+ /* detect if the merged instr loaded multiple components and use swizzle mask for rewrite */
+ unsigned use_components = merge_intr == base ? orig_components : merge_intr->def.num_components;
+ nir_def *swiz = nir_channels(b, &base->def, BITFIELD_RANGE(j, use_components));
+ nir_def_rewrite_uses_after(&merge_intr->def, swiz, merge_intr == base ? swiz->parent_instr : &merge_intr->instr);
+ j += use_components - 1;
+ }
+ } else {
+ nir_def *comp[NIR_MAX_VEC_COMPONENTS];
+ /* generate swizzled vec of store components and rewrite store src */
+ for (unsigned j = 0; j < num_contiguous; j++) {
+ unsigned merge_c = j + first_c;
+ nir_intrinsic_instr *merge_intr = state.merge[merge_c];
+ /* detect if the merged instr stored multiple components and extract them for rewrite */
+ unsigned use_components = merge_intr == base ? orig_components : merge_intr->num_components;
+ for (unsigned k = 0; k < use_components; k++)
+ comp[j + k] = nir_channel(b, merge_intr->src[0].ssa, k);
+ j += use_components - 1;
+ }
+ nir_def *val = nir_vec(b, comp, num_contiguous);
+ nir_src_rewrite(&base->src[0], val);
+ nir_intrinsic_set_write_mask(base, BITFIELD_MASK(num_contiguous));
+ }
+ /* deleting instructions during a foreach explodes the compiler, so delete later */
+ for (unsigned j = 0; j < num_contiguous; j++) {
+ unsigned merge_c = j + first_c;
+ nir_intrinsic_instr *merge_intr = state.merge[merge_c];
+ if (merge_intr != base)
+ _mesa_set_add(data, &merge_intr->instr);
+ }
+ }
+ contiguous_mask = 0;
+ num_contiguous = 0;
+ }
+ }
+
+ return true;
+}
+
+/* attempt to revectorize scalar i/o, ignoring xfb and "hard stuff" */
+static bool
+trivial_revectorize(nir_shader *nir)
+{
+ struct set deletions;
+
+ if (nir->info.stage > MESA_SHADER_FRAGMENT)
+ return false;
+
+ _mesa_set_init(&deletions, NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ bool progress = nir_shader_intrinsics_pass(nir, trivial_revectorize_scan, nir_metadata_dominance, &deletions);
+ /* now it's safe to delete */
+ set_foreach_remove(&deletions, entry) {
+ nir_instr *instr = (void*)entry->key;
+ nir_instr_remove(instr);
+ }
+ ralloc_free(deletions.table);
+ return progress;
+}
+
struct zink_shader *
-zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
- const struct pipe_stream_output_info *so_info)
+zink_shader_create(struct zink_screen *screen, struct nir_shader *nir)
{
- struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
+ struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
bool have_psiz = false;
+ ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
+ nir->info.outputs_written & VARYING_BIT_EDGE;
+
+ ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
+ ret->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout;
+ if (screen->info.have_KHR_shader_float_controls) {
+ if (screen->info.props12.shaderDenormFlushToZeroFloat16)
+ ret->sinfo.float_controls.flush_denorms |= 0x1;
+ if (screen->info.props12.shaderDenormFlushToZeroFloat32)
+ ret->sinfo.float_controls.flush_denorms |= 0x2;
+ if (screen->info.props12.shaderDenormFlushToZeroFloat64)
+ ret->sinfo.float_controls.flush_denorms |= 0x4;
+
+ if (screen->info.props12.shaderDenormPreserveFloat16)
+ ret->sinfo.float_controls.preserve_denorms |= 0x1;
+ if (screen->info.props12.shaderDenormPreserveFloat32)
+ ret->sinfo.float_controls.preserve_denorms |= 0x2;
+ if (screen->info.props12.shaderDenormPreserveFloat64)
+ ret->sinfo.float_controls.preserve_denorms |= 0x4;
+
+ ret->sinfo.float_controls.denorms_all_independence =
+ screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
+
+ ret->sinfo.float_controls.denorms_32_bit_independence =
+ ret->sinfo.float_controls.denorms_all_independence ||
+ screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
+ }
+ ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
+
+ util_queue_fence_init(&ret->precompile.fence);
+ util_dynarray_init(&ret->pipeline_libs, ret);
ret->hash = _mesa_hash_pointer(ret);
ret->programs = _mesa_pointer_set_create(NULL);
simple_mtx_init(&ret->lock, mtx_plain);
- nir_variable_mode indirect_derefs_modes = nir_var_function_temp;
- if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
- nir->info.stage == MESA_SHADER_TESS_EVAL)
- indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
+ if (nir->info.stage == MESA_SHADER_KERNEL) {
+ nir_lower_mem_access_bit_sizes_options lower_mem_access_options = {
+ .modes = nir_var_all ^ nir_var_function_temp,
+ .may_lower_unaligned_stores_to_atomics = true,
+ .callback = mem_access_size_align_cb,
+ .cb_data = screen,
+ };
+ NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options);
+ NIR_PASS_V(nir, alias_scratch_memory);
+ NIR_PASS_V(nir, nir_lower_alu_width, lower_vec816_alu, NULL);
+ NIR_PASS_V(nir, nir_lower_alu_vec8_16_srcs);
+ }
- NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
- UINT32_MAX);
+ NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, NULL, NULL);
+ optimize_nir(nir, NULL, true);
+ nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
+ if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
+ NIR_PASS_V(nir, lower_bindless_io);
+ break;
+ }
+ }
+ if (nir->info.stage < MESA_SHADER_FRAGMENT)
+ nir_gather_xfb_info_from_intrinsics(nir);
+ NIR_PASS_V(nir, fix_vertex_input_locations);
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+ scan_nir(screen, nir, ret);
+ NIR_PASS_V(nir, nir_opt_vectorize, NULL, NULL);
+ NIR_PASS_V(nir, trivial_revectorize);
+ if (nir->info.io_lowered) {
+ rework_io_vars(nir, nir_var_shader_in, ret);
+ rework_io_vars(nir, nir_var_shader_out, ret);
+ nir_sort_variables_by_location(nir, nir_var_shader_in);
+ nir_sort_variables_by_location(nir, nir_var_shader_out);
+ }
- if (nir->info.stage == MESA_SHADER_VERTEX)
- create_vs_pushconst(nir);
- else if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
+ if (nir->info.stage < MESA_SHADER_COMPUTE)
+ create_gfx_pushconst(nir);
+
+ if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
nir->info.stage == MESA_SHADER_TESS_EVAL)
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
- else if (nir->info.stage == MESA_SHADER_KERNEL)
- create_cs_pushconst(nir);
if (nir->info.stage < MESA_SHADER_FRAGMENT)
have_psiz = check_psiz(nir);
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ ret->flat_flags = zink_flat_flags(nir);
+
+ if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
+ NIR_PASS_V(nir, fixup_io_locations);
+
NIR_PASS_V(nir, lower_basevertex);
- NIR_PASS_V(nir, lower_work_dim);
- NIR_PASS_V(nir, nir_lower_regs_to_ssa);
NIR_PASS_V(nir, lower_baseinstance);
+ NIR_PASS_V(nir, split_bitfields);
+ if (!screen->info.feats.features.shaderStorageImageMultisample)
+ NIR_PASS_V(nir, strip_tex_ms);
+ NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
+
+ if (screen->info.have_EXT_shader_demote_to_helper_invocation) {
+ NIR_PASS_V(nir, nir_lower_discard_or_demote, true);
+ }
+
+ if (screen->need_2D_zs)
+ NIR_PASS_V(nir, lower_1d_shadow, screen);
{
nir_lower_subgroups_options subgroup_options = {0};
@@ -1116,25 +6198,57 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
subgroup_options.ballot_bit_size = 32;
subgroup_options.ballot_components = 4;
subgroup_options.lower_subgroup_masks = true;
+ if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
+ subgroup_options.subgroup_size = 1;
+ subgroup_options.lower_vote_trivial = true;
+ }
+ subgroup_options.lower_inverse_ballot = true;
NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
}
- optimize_nir(nir);
+ optimize_nir(nir, NULL, true);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
- NIR_PASS_V(nir, lower_discard_if);
- NIR_PASS_V(nir, nir_lower_fragcolor,
- nir->info.fs.color_is_dual_source ? 1 : 8);
- NIR_PASS_V(nir, lower_64bit_vertex_attribs);
- NIR_PASS_V(nir, unbreak_bos);
+ NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
+ nir_lower_demote_if_to_cf |
+ nir_lower_terminate_if_to_cf));
+
+ bool needs_size = analyze_io(ret, nir);
+ NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
+ /* run in compile if there could be inlined uniforms */
+ if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
+ NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
+ NIR_PASS_V(nir, rewrite_bo_access, screen);
+ NIR_PASS_V(nir, remove_bo_access, ret);
+ }
- if (zink_debug & ZINK_DEBUG_NIR) {
- fprintf(stderr, "NIR shader:\n---8<---\n");
- nir_print_shader(nir, stderr);
- fprintf(stderr, "---8<---\n");
+ struct zink_bindless_info bindless = {0};
+ bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
+ nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out)
+ var->data.is_xfb = false;
+
+ optimize_nir(nir, NULL, true);
+ prune_io(nir);
+
+ unsigned sampler_mask = 0;
+ if (nir->info.stage == MESA_SHADER_KERNEL) {
+ NIR_PASS_V(nir, type_images, &sampler_mask);
+ enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
+ VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER;
+ u_foreach_bit(s, sampler_mask) {
+ ret->bindings[ztype][ret->num_bindings[ztype]].index = s;
+ ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors);
+ ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
+ ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
+ ret->num_bindings[ztype]++;
+ }
+ ret->sinfo.sampler_mask = sampler_mask;
}
- foreach_list_typed_reverse(nir_variable, var, node, &nir->variables) {
+ unsigned ubo_binding_mask = 0;
+ unsigned ssbo_binding_mask = 0;
+ foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
if (_nir_shader_variable_has_mode(var, nir_var_uniform |
+ nir_var_image |
nir_var_mem_ubo |
nir_var_mem_ssbo)) {
enum zink_descriptor_type ztype;
@@ -1143,42 +6257,56 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
ztype = ZINK_DESCRIPTOR_TYPE_UBO;
/* buffer 0 is a push descriptor */
var->data.descriptor_set = !!var->data.driver_location;
- var->data.binding = !var->data.driver_location ? nir->info.stage :
+ var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
zink_binding(nir->info.stage,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
- var->data.driver_location);
+ var->data.driver_location,
+ screen->compact_descriptors);
assert(var->data.driver_location || var->data.binding < 10);
VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
int binding = var->data.binding;
- ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
- ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
- ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
- ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
- ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
- ret->num_bindings[ztype]++;
+ if (!var->data.driver_location) {
+ ret->has_uniforms = true;
+ } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
+ ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
+ ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
+ ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
+ ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
+ assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
+ ret->num_bindings[ztype]++;
+ ubo_binding_mask |= BITFIELD_BIT(binding);
+ }
} else if (var->data.mode == nir_var_mem_ssbo) {
ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
- var->data.descriptor_set = ztype + 1;
- var->data.binding = zink_binding(nir->info.stage,
+ var->data.descriptor_set = screen->desc_set_id[ztype];
+ var->data.binding = zink_binding(clamp_stage(&nir->info),
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- var->data.driver_location);
- ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
- ret->ssbos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
- ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
- ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
- ret->num_bindings[ztype]++;
+ var->data.driver_location,
+ screen->compact_descriptors);
+ if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
+ ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
+ ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
+ ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
+ assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
+ ret->num_bindings[ztype]++;
+ ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
+ }
} else {
- assert(var->data.mode == nir_var_uniform);
- if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
+ assert(var->data.mode == nir_var_uniform ||
+ var->data.mode == nir_var_image);
+ if (var->data.bindless) {
+ ret->bindless = true;
+ handle_bindless_var(nir, var, type, &bindless);
+ } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
- if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
- ret->num_texel_buffers++;
+ if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
ztype = zink_desc_type_from_vktype(vktype);
var->data.driver_location = var->data.binding;
- var->data.descriptor_set = ztype + 1;
- var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location);
+ var->data.descriptor_set = screen->desc_set_id[ztype];
+ var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
@@ -1187,14 +6315,33 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
else
ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
ret->num_bindings[ztype]++;
+ } else if (var->data.mode == nir_var_uniform) {
+ /* this is a dead uniform */
+ var->data.mode = 0;
+ exec_node_remove(&var->node);
}
}
}
}
-
- ret->nir = nir;
- if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings)
- update_so_info(ret, so_info, nir->info.outputs_written, have_psiz);
+ bool bindless_lowered = false;
+ NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
+ ret->bindless |= bindless_lowered;
+
+ if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
+ NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
+ if (nir->info.stage != MESA_SHADER_KERNEL)
+ NIR_PASS_V(nir, match_tex_dests, ret);
+
+ if (!nir->info.internal)
+ nir_foreach_shader_out_variable(var, nir)
+ var->data.explicit_xfb_buffer = 0;
+ if (nir->xfb_info && nir->xfb_info->output_count && nir->info.outputs_written)
+ update_so_info(ret, nir, nir->info.outputs_written, have_psiz);
+ zink_shader_serialize_blob(nir, &ret->blob);
+ memcpy(&ret->info, &nir->info, sizeof(nir->info));
+ ret->info.name = ralloc_strdup(ret, nir->info.name);
+
+ ret->can_inline = true;
return ret;
}
@@ -1205,18 +6352,24 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
struct zink_screen *screen = zink_screen(pscreen);
nir_shader *nir = nirptr;
- if (!screen->info.feats.features.shaderImageGatherExtended) {
- nir_lower_tex_options tex_opts = {0};
+ nir_lower_tex_options tex_opts = {
+ .lower_invalid_implicit_lod = true,
+ };
+ /*
+ Sampled Image must be an object whose type is OpTypeSampledImage.
+ The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
+ or Rect, and the Arrayed and MS operands must be 0.
+ - SPIRV, OpImageSampleProj* opcodes
+ */
+ tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
+ BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
+ tex_opts.lower_txp_array = true;
+ if (!screen->info.feats.features.shaderImageGatherExtended)
tex_opts.lower_tg4_offsets = true;
- NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
- }
- NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false);
- if (nir->info.stage == MESA_SHADER_GEOMETRY)
- NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
- optimize_nir(nir);
- if (nir->info.num_ubos || nir->info.num_ssbos)
- NIR_PASS_V(nir, nir_lower_dynamic_bo_access);
- nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+ NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
+ optimize_nir(nir, NULL, false);
+ if (nir->info.stage == MESA_SHADER_VERTEX)
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
if (screen->driconf.inline_uniforms)
nir_find_inlinable_uniforms(nir);
@@ -1224,39 +6377,133 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
}
void
-zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
-{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- set_foreach(shader->programs, entry) {
- if (shader->nir->info.stage == MESA_SHADER_COMPUTE) {
- struct zink_compute_program *comp = (void*)entry->key;
- if (!comp->base.removed) {
- _mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader);
- comp->base.removed = true;
+zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
+{
+ _mesa_set_destroy(shader->programs, NULL);
+ util_queue_fence_wait(&shader->precompile.fence);
+ util_queue_fence_destroy(&shader->precompile.fence);
+ zink_descriptor_shader_deinit(screen, shader);
+ if (screen->info.have_EXT_shader_object) {
+ VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
+ } else {
+ if (shader->precompile.obj.mod)
+ VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
+ if (shader->precompile.gpl)
+ VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
+ }
+ blob_finish(&shader->blob);
+ ralloc_free(shader->spirv);
+ free(shader->precompile.bindings);
+ ralloc_free(shader);
+}
+
+static bool
+gfx_shader_prune(struct zink_screen *screen, struct zink_shader *shader)
+{
+ /* this shader may still be precompiling, so access here must be locked and singular */
+ simple_mtx_lock(&shader->lock);
+ struct set_entry *entry = _mesa_set_next_entry(shader->programs, NULL);
+ struct zink_gfx_program *prog = (void*)(entry ? entry->key : NULL);
+ if (entry)
+ _mesa_set_remove(shader->programs, entry);
+ simple_mtx_unlock(&shader->lock);
+ if (!prog)
+ return false;
+ gl_shader_stage stage = shader->info.stage;
+ assert(stage < ZINK_GFX_SHADER_COUNT);
+ unsigned stages_present = prog->stages_present;
+ if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
+ prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
+ stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
+ unsigned idx = zink_program_cache_stages(stages_present);
+ if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
+ (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
+ struct hash_table *ht = &prog->base.ctx->program_cache[idx];
+ simple_mtx_lock(&prog->base.ctx->program_lock[idx]);
+ struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
+ assert(he && he->data == prog);
+ _mesa_hash_table_remove(ht, he);
+ prog->base.removed = true;
+ simple_mtx_unlock(&prog->base.ctx->program_lock[idx]);
+ util_queue_fence_wait(&prog->base.cache_fence);
+
+ for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
+ for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
+ hash_table_foreach(&prog->pipelines[r][i], table_entry) {
+ struct zink_gfx_pipeline_cache_entry *pc_entry = table_entry->data;
+
+ util_queue_fence_wait(&pc_entry->fence);
+ }
}
- comp->shader = NULL;
- zink_compute_program_reference(screen, &comp, NULL);
- } else {
- struct zink_gfx_program *prog = (void*)entry->key;
- enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage);
- assert(pstage < ZINK_SHADER_COUNT);
- if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) {
- _mesa_hash_table_remove_key(&ctx->program_cache[prog->stages_present >> 2], prog->shaders);
- prog->base.removed = true;
+ }
+ }
+ if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
+ prog->shaders[stage] = NULL;
+ prog->stages_remaining &= ~BITFIELD_BIT(stage);
+ }
+ /* only remove generated tcs during parent tes destruction */
+ if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
+ prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
+ if (stage != MESA_SHADER_FRAGMENT &&
+ prog->shaders[MESA_SHADER_GEOMETRY] &&
+ prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
+ shader) {
+ prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
+ }
+ zink_gfx_program_reference(screen, &prog, NULL);
+ return true;
+}
+
+void
+zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
+{
+ assert(shader->info.stage != MESA_SHADER_COMPUTE);
+ util_queue_fence_wait(&shader->precompile.fence);
+
+ /* if the shader is still precompiling, the program set must be pruned under lock */
+ while (gfx_shader_prune(screen, shader));
+
+ while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
+ struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
+ if (!libs->removed) {
+ libs->removed = true;
+ unsigned idx = zink_program_cache_stages(libs->stages_present);
+ simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
+ _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
+ simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
+ }
+ zink_gfx_lib_cache_unref(screen, libs);
+ }
+ if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
+ shader->non_fs.generated_tcs) {
+ /* automatically destroy generated tcs shaders when tes is destroyed */
+ zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
+ shader->non_fs.generated_tcs = NULL;
+ }
+ if (shader->info.stage != MESA_SHADER_FRAGMENT) {
+ for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
+ for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
+ if (shader->non_fs.generated_gs[i][j]) {
+ /* automatically destroy generated gs shaders when owner is destroyed */
+ zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
+ shader->non_fs.generated_gs[i][j] = NULL;
+ }
}
- prog->shaders[pstage] = NULL;
- if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated)
- /* automatically destroy generated tcs shaders when tes is destroyed */
- zink_shader_free(ctx, shader->generated);
- zink_gfx_program_reference(screen, &prog, NULL);
}
}
- _mesa_set_destroy(shader->programs, NULL);
- ralloc_free(shader->nir);
- FREE(shader);
+ zink_shader_free(screen, shader);
}
+struct zink_shader_object
+zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
+{
+ assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
+ /* shortcut all the nir passes since we just have to change this one word */
+ zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
+ return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
+}
+
/* creating a passthrough tcs shader that's roughly:
#version 150
@@ -1279,9 +6526,10 @@ void main()
*/
struct zink_shader *
-zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch)
+zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
{
- struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
+ struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
+ util_queue_fence_init(&ret->precompile.fence);
ret->hash = _mesa_hash_pointer(ret);
ret->programs = _mesa_pointer_set_create(NULL);
simple_mtx_init(&ret->lock, mtx_plain);
@@ -1291,20 +6539,22 @@ zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsig
fn->is_entrypoint = true;
nir_function_impl *impl = nir_function_impl_create(fn);
- nir_builder b;
- nir_builder_init(&b, impl);
- b.cursor = nir_before_block(nir_start_block(impl));
+ nir_builder b = nir_builder_at(nir_before_impl(impl));
- nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
+ nir_def *invocation_id = nir_load_invocation_id(&b);
- nir_foreach_shader_out_variable(var, vs->nir) {
- const struct glsl_type *type = var->type;
+ nir_foreach_shader_in_variable(var, tes) {
+ if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+ continue;
const struct glsl_type *in_type = var->type;
const struct glsl_type *out_type = var->type;
char buf[1024];
snprintf(buf, sizeof(buf), "%s_out", var->name);
- in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
- out_type = glsl_array_type(type, vertices_per_patch, 0);
+ if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
+ const struct glsl_type *type = var->type;
+ in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
+ out_type = glsl_array_type(type, vertices_per_patch, 0);
+ }
nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
@@ -1318,15 +6568,10 @@ zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsig
implementation-dependent maximum patch size (gl_MaxPatchVertices).
- ARB_tessellation_shader
*/
- for (unsigned i = 0; i < vertices_per_patch; i++) {
- /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
- nir_if *start_block = nir_push_if(&b, nir_ieq(&b, invocation_id, nir_imm_int(&b, i)));
- nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
- nir_ssa_def *load = nir_load_deref(&b, in_array_var);
- nir_deref_instr *out_array_var = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, out), i);
- nir_store_deref(&b, out_array_var, load, 0xff);
- nir_pop_if(&b, start_block);
- }
+ /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
+ nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
+ nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
+ copy_vars(&b, out_value, in_value);
}
nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
@@ -1335,24 +6580,12 @@ zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsig
gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
gl_TessLevelOuter->data.patch = 1;
- /* hacks so we can size these right for now */
- struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3);
- /* just use a single blob for padding here because it's easier */
- fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0);
- fields[0].name = ralloc_asprintf(nir, "padding");
- fields[0].offset = 0;
- fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0);
- fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner");
- fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level);
- fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0);
- fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter");
- fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level);
- nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const,
- glsl_struct_type(fields, 3, "struct", false), "pushconst");
- pushconst->data.location = VARYING_SLOT_VAR0;
-
- nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8);
- nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16);
+ create_gfx_pushconst(nir);
+
+ nir_def *load_inner = nir_load_push_constant_zink(&b, 2, 32,
+ nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL));
+ nir_def *load_outer = nir_load_push_constant_zink(&b, 4, 32,
+ nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL));
for (unsigned i = 0; i < 2; i++) {
nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
@@ -1366,13 +6599,58 @@ zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsig
nir->info.tess.tcs_vertices_out = vertices_per_patch;
nir_validate_shader(nir, "created");
- NIR_PASS_V(nir, nir_lower_regs_to_ssa);
- optimize_nir(nir);
+ optimize_nir(nir, NULL, true);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
- NIR_PASS_V(nir, lower_discard_if);
NIR_PASS_V(nir, nir_convert_from_ssa, true);
- ret->nir = nir;
- ret->is_generated = true;
+ *nir_ret = nir;
+ zink_shader_serialize_blob(nir, &ret->blob);
+ memcpy(&ret->info, &nir->info, sizeof(nir->info));
+ ret->non_fs.is_generated = true;
return ret;
}
+
+bool
+zink_shader_has_cubes(nir_shader *nir)
+{
+ nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
+ const struct glsl_type *type = glsl_without_array(var->type);
+ if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
+ return true;
+ }
+ return false;
+}
+
+nir_shader *
+zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
+{
+ struct blob_reader blob_reader;
+ blob_reader_init(&blob_reader, blob->data, blob->size);
+ return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
+}
+
+nir_shader *
+zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
+{
+ return zink_shader_blob_deserialize(screen, &zs->blob);
+}
+
+void
+zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
+{
+ blob_init(blob);
+#ifndef NDEBUG
+ bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
+#else
+ bool strip = false;
+#endif
+ nir_serialize(blob, nir, strip);
+}
+
+void
+zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
+{
+ nir_shader *nir = zink_shader_deserialize(screen, zs);
+ nir_print_shader(nir, fp);
+ ralloc_free(nir);
+}
diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h
index 270bf12c54b..e901ee45f7b 100644
--- a/src/gallium/drivers/zink/zink_compiler.h
+++ b/src/gallium/drivers/zink/zink_compiler.h
@@ -24,95 +24,72 @@
#ifndef ZINK_COMPILER_H
#define ZINK_COMPILER_H
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-
-#include "compiler/nir/nir.h"
-#include "compiler/shader_info.h"
-#include "util/u_live_shader_cache.h"
-
-#include <vulkan/vulkan.h>
-#include "zink_descriptors.h"
+#include "zink_types.h"
#define ZINK_WORKGROUP_SIZE_X 1
#define ZINK_WORKGROUP_SIZE_Y 2
#define ZINK_WORKGROUP_SIZE_Z 3
+#define ZINK_VARIABLE_SHARED_MEM 4
+#define ZINK_INLINE_VAL_FLAT_MASK 0
+#define ZINK_INLINE_VAL_PV_LAST_VERT 2
-struct pipe_screen;
-struct zink_context;
-struct zink_screen;
-struct zink_shader_key;
-struct zink_shader_module;
-struct zink_gfx_program;
-
-struct nir_shader_compiler_options;
-struct nir_shader;
+/* stop inlining shaders if they have >limit ssa vals after inlining:
+ * recompile time isn't worth the inline
+ */
+#define ZINK_ALWAYS_INLINE_LIMIT 1500
-struct set;
+struct zink_shader_key;
+struct spirv_shader;
struct tgsi_token;
-struct zink_so_info {
- struct pipe_stream_output_info so_info;
- unsigned so_info_slots[PIPE_MAX_SO_OUTPUTS];
- bool have_xfb;
-};
+static inline gl_shader_stage
+clamp_stage(const shader_info *info)
+{
+ return info->stage == MESA_SHADER_KERNEL ? MESA_SHADER_COMPUTE : info->stage;
+}
const void *
zink_get_compiler_options(struct pipe_screen *screen,
enum pipe_shader_ir ir,
- enum pipe_shader_type shader);
+ gl_shader_stage shader);
struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens);
-struct zink_shader {
- struct util_live_shader base;
- uint32_t hash;
- struct nir_shader *nir;
-
- struct zink_so_info streamout;
-
- struct {
- int index;
- int binding;
- VkDescriptorType type;
- unsigned char size;
- } bindings[ZINK_DESCRIPTOR_TYPES][ZINK_MAX_DESCRIPTORS_PER_TYPE];
- size_t num_bindings[ZINK_DESCRIPTOR_TYPES];
- unsigned num_texel_buffers;
- uint32_t ubos_used; // bitfield of which ubo indices are used
- uint32_t ssbos_used; // bitfield of which ssbo indices are used
-
- simple_mtx_t lock;
- struct set *programs;
-
- union {
- struct zink_shader *generated; // a generated shader that this shader "owns"
- bool is_generated; // if this is a driver-created shader (e.g., tcs)
- nir_variable *fbfetch; //for fs output
- };
-};
+nir_shader*
+zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
+ const nir_shader *prev_stage);
+
+bool
+zink_lower_system_values_to_inlined_uniforms(nir_shader *nir);
void
zink_screen_init_compiler(struct zink_screen *screen);
void
-zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer);
-VkShaderModule
-zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, const struct zink_shader_key *key);
-
+zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer);
+/* pass very large shader key data with extra_data */
+struct zink_shader_object
+zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg);
+struct zink_shader_object
+zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs);
struct zink_shader *
-zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
- const struct pipe_stream_output_info *so_info);
+zink_shader_create(struct zink_screen *screen, struct nir_shader *nir);
char *
zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr);
void
-zink_shader_free(struct zink_context *ctx, struct zink_shader *shader);
+zink_shader_free(struct zink_screen *screen, struct zink_shader *shader);
+void
+zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader);
+struct zink_shader_object
+zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg);
+struct zink_shader_object
+zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg);
struct zink_shader *
-zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch);
+zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret);
static inline bool
zink_shader_descriptor_is_buffer(struct zink_shader *zs, enum zink_descriptor_type type, unsigned i)
@@ -121,4 +98,14 @@ zink_shader_descriptor_is_buffer(struct zink_shader *zs, enum zink_descriptor_ty
zs->bindings[type][i].type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
}
+bool
+zink_shader_has_cubes(nir_shader *nir);
+nir_shader *
+zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob);
+nir_shader *
+zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs);
+void
+zink_shader_serialize_blob(nir_shader *nir, struct blob *blob);
+void
+zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp);
#endif
diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c
index 1d98bcbed86..77b60828366 100644
--- a/src/gallium/drivers/zink/zink_context.c
+++ b/src/gallium/drivers/zink/zink_context.c
@@ -21,34 +21,45 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "zink_clear.h"
#include "zink_context.h"
-
-#include "zink_batch.h"
-#include "zink_compiler.h"
+#include "zink_descriptors.h"
#include "zink_fence.h"
#include "zink_format.h"
#include "zink_framebuffer.h"
#include "zink_helpers.h"
-#include "zink_program.h"
+#include "zink_inlines.h"
+#include "zink_kopper.h"
#include "zink_pipeline.h"
+#include "zink_program.h"
#include "zink_query.h"
#include "zink_render_pass.h"
#include "zink_resource.h"
#include "zink_screen.h"
#include "zink_state.h"
#include "zink_surface.h"
-#include "zink_inlines.h"
+
+#include "nir/pipe_nir.h"
#include "util/u_blitter.h"
#include "util/u_debug.h"
#include "util/format_srgb.h"
#include "util/format/u_format.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
+#include "util/u_sample_positions.h"
+#include "util/u_string.h"
#include "util/u_thread.h"
+#include "util/perf/u_trace.h"
#include "util/u_cpu_detect.h"
+#include "util/thread_sched.h"
#include "util/strndup.h"
#include "nir.h"
+#include "nir_builder.h"
+
+#include "vk_format.h"
+
+#include "driver_trace/tr_context.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
@@ -57,11 +68,27 @@
#include "util/xxhash.h"
static void
-calc_descriptor_hash_sampler_state(struct zink_sampler_state *sampler_state)
+update_tc_info(struct zink_context *ctx)
{
- void *hash_data = &sampler_state->sampler;
- size_t data_size = sizeof(VkSampler);
- sampler_state->hash = XXH32(hash_data, data_size, 0);
+ if (ctx->track_renderpasses) {
+ const struct tc_renderpass_info *info = threaded_context_get_renderpass_info(ctx->tc);
+ ctx->rp_changed |= ctx->dynamic_fb.tc_info.data != info->data;
+ ctx->dynamic_fb.tc_info.data = info->data;
+ } else {
+ struct tc_renderpass_info info = ctx->dynamic_fb.tc_info;
+ bool zsbuf_used = !ctx->zsbuf_unused;
+ bool zsbuf_write = zink_is_zsbuf_write(ctx);
+ ctx->dynamic_fb.tc_info.data32[0] = 0;
+ if (ctx->clears_enabled & PIPE_CLEAR_DEPTHSTENCIL)
+ ctx->dynamic_fb.tc_info.zsbuf_clear_partial = true;
+ if (ctx->rp_clears_enabled & PIPE_CLEAR_DEPTHSTENCIL)
+ ctx->dynamic_fb.tc_info.zsbuf_clear = true;
+ if (ctx->dynamic_fb.tc_info.zsbuf_clear != info.zsbuf_clear)
+ ctx->rp_loadop_changed = true;
+ if (zink_is_zsbuf_write(ctx) != zsbuf_write)
+ ctx->rp_layout_changed = true;
+ ctx->rp_changed |= zink_is_zsbuf_used(ctx) != zsbuf_used;
+ }
}
void
@@ -73,8 +100,18 @@ debug_describe_zink_buffer_view(char *buf, const struct zink_buffer_view *ptr)
ALWAYS_INLINE static void
check_resource_for_batch_ref(struct zink_context *ctx, struct zink_resource *res)
{
- if (!zink_resource_has_binds(res))
- zink_batch_reference_resource(&ctx->batch, res);
+ if (!zink_resource_has_binds(res)) {
+ /* avoid desync between usage and tracking:
+ * - if usage exists, it must be removed before the context is destroyed
+ * - having usage does not imply having tracking
+ * - if tracking will be added here, also reapply usage to avoid dangling usage once tracking is removed
+ * TODO: somehow fix this for perf because it's an extra hash lookup
+ */
+ if (!res->obj->dt && zink_resource_has_usage(res))
+ zink_batch_reference_resource_rw(&ctx->batch, res, !!res->obj->bo->writes.u);
+ else
+ zink_batch_reference_resource(&ctx->batch, res);
+ }
}
static void
@@ -83,10 +120,32 @@ zink_context_destroy(struct pipe_context *pctx)
struct zink_context *ctx = zink_context(pctx);
struct zink_screen *screen = zink_screen(pctx->screen);
- if (screen->queue && !screen->device_lost && VKSCR(QueueWaitIdle)(screen->queue) != VK_SUCCESS)
- debug_printf("vkQueueWaitIdle failed\n");
+ struct pipe_framebuffer_state fb = {0};
+ pctx->set_framebuffer_state(pctx, &fb);
+
+ if (util_queue_is_initialized(&screen->flush_queue))
+ util_queue_finish(&screen->flush_queue);
+ if (ctx->batch.state && !screen->device_lost) {
+ simple_mtx_lock(&screen->queue_lock);
+ VkResult result = VKSCR(QueueWaitIdle)(screen->queue);
+ simple_mtx_unlock(&screen->queue_lock);
+
+ if (result != VK_SUCCESS)
+ mesa_loge("ZINK: vkQueueWaitIdle failed (%s)", vk_Result_to_str(result));
+ }
+
+ for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_cache); i++) {
+ simple_mtx_lock((&ctx->program_lock[i]));
+ hash_table_foreach(&ctx->program_cache[i], entry) {
+ struct zink_program *pg = entry->data;
+ zink_program_finish(ctx, pg);
+ pg->removed = true;
+ }
+ simple_mtx_unlock((&ctx->program_lock[i]));
+ }
- util_blitter_destroy(ctx->blitter);
+ if (ctx->blitter)
+ util_blitter_destroy(ctx->blitter);
for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++)
pipe_surface_release(&ctx->base, &ctx->fb_state.cbufs[i]);
pipe_surface_release(&ctx->base, &ctx->fb_state.zsbuf);
@@ -98,47 +157,124 @@ zink_context_destroy(struct pipe_context *pctx)
pipe_surface_release(&ctx->base, &ctx->dummy_surface[i]);
zink_buffer_view_reference(screen, &ctx->dummy_bufferview, NULL);
- simple_mtx_destroy(&ctx->batch_mtx);
- zink_clear_batch_state(ctx, ctx->batch.state);
- zink_batch_state_destroy(screen, ctx->batch.state);
- hash_table_foreach(&ctx->batch_states, entry) {
- struct zink_batch_state *bs = entry->data;
+ zink_descriptors_deinit_bindless(ctx);
+
+ struct zink_batch_state *bs = ctx->batch_states;
+ while (bs) {
+ struct zink_batch_state *bs_next = bs->next;
zink_clear_batch_state(ctx, bs);
- zink_batch_state_destroy(screen, bs);
+ /* restore link as we insert them into the screens free_batch_states
+ * list below
+ */
+ bs->next = bs_next;
+ bs = bs_next;
}
- util_dynarray_foreach(&ctx->free_batch_states, struct zink_batch_state*, bs) {
- zink_clear_batch_state(ctx, *bs);
- zink_batch_state_destroy(screen, *bs);
+ bs = ctx->free_batch_states;
+ while (bs) {
+ struct zink_batch_state *bs_next = bs->next;
+ zink_clear_batch_state(ctx, bs);
+ bs->ctx = NULL;
+ /* restore link as we insert them into the screens free_batch_states
+ * list below
+ */
+ bs->next = bs_next;
+ bs = bs_next;
}
-
- if (screen->info.have_KHR_imageless_framebuffer) {
- hash_table_foreach(&ctx->framebuffer_cache, he)
- zink_destroy_framebuffer(screen, he->data);
- } else if (ctx->framebuffer) {
- simple_mtx_lock(&screen->framebuffer_mtx);
- struct hash_entry *entry = _mesa_hash_table_search(&screen->framebuffer_cache, &ctx->framebuffer->state);
- if (zink_framebuffer_reference(screen, &ctx->framebuffer, NULL))
- _mesa_hash_table_remove(&screen->framebuffer_cache, entry);
- simple_mtx_unlock(&screen->framebuffer_mtx);
+ simple_mtx_lock(&screen->free_batch_states_lock);
+ if (ctx->batch_states) {
+ if (screen->free_batch_states)
+ screen->last_free_batch_state->next = ctx->batch_states;
+ else {
+ screen->free_batch_states = ctx->batch_states;
+ screen->last_free_batch_state = screen->free_batch_states;
+ }
+ }
+ while (screen->last_free_batch_state && screen->last_free_batch_state->next)
+ screen->last_free_batch_state = screen->last_free_batch_state->next;
+ if (ctx->free_batch_states) {
+ if (screen->free_batch_states)
+ screen->last_free_batch_state->next = ctx->free_batch_states;
+ else {
+ screen->free_batch_states = ctx->free_batch_states;
+ screen->last_free_batch_state = ctx->last_free_batch_state;
+ }
+ }
+ while (screen->last_free_batch_state && screen->last_free_batch_state->next)
+ screen->last_free_batch_state = screen->last_free_batch_state->next;
+ if (ctx->batch.state) {
+ zink_clear_batch_state(ctx, ctx->batch.state);
+ if (screen->free_batch_states)
+ screen->last_free_batch_state->next = ctx->batch.state;
+ else {
+ screen->free_batch_states = ctx->batch.state;
+ screen->last_free_batch_state = screen->free_batch_states;
+ }
+ }
+ while (screen->last_free_batch_state && screen->last_free_batch_state->next)
+ screen->last_free_batch_state = screen->last_free_batch_state->next;
+ simple_mtx_unlock(&screen->free_batch_states_lock);
+
+ for (unsigned i = 0; i < 2; i++) {
+ util_idalloc_fini(&ctx->di.bindless[i].tex_slots);
+ util_idalloc_fini(&ctx->di.bindless[i].img_slots);
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ free(ctx->di.bindless[i].db.buffer_infos);
+ else
+ free(ctx->di.bindless[i].t.buffer_infos);
+ free(ctx->di.bindless[i].img_infos);
+ util_dynarray_fini(&ctx->di.bindless[i].updates);
+ util_dynarray_fini(&ctx->di.bindless[i].resident);
}
+ if (ctx->null_fs)
+ pctx->delete_fs_state(pctx, ctx->null_fs);
+
+ hash_table_foreach(&ctx->framebuffer_cache, he)
+ zink_destroy_framebuffer(screen, he->data);
+
hash_table_foreach(ctx->render_pass_cache, he)
zink_destroy_render_pass(screen, he->data);
+ zink_context_destroy_query_pools(ctx);
+ set_foreach(&ctx->gfx_inputs, he) {
+ struct zink_gfx_input_key *ikey = (void*)he->key;
+ VKSCR(DestroyPipeline)(screen->dev, ikey->pipeline, NULL);
+ }
+ set_foreach(&ctx->gfx_outputs, he) {
+ struct zink_gfx_output_key *okey = (void*)he->key;
+ VKSCR(DestroyPipeline)(screen->dev, okey->pipeline, NULL);
+ }
u_upload_destroy(pctx->stream_uploader);
u_upload_destroy(pctx->const_uploader);
slab_destroy_child(&ctx->transfer_pool);
for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_cache); i++)
_mesa_hash_table_clear(&ctx->program_cache[i], NULL);
- _mesa_hash_table_clear(&ctx->compute_program_cache, NULL);
+ for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_lock); i++)
+ simple_mtx_destroy(&ctx->program_lock[i]);
_mesa_hash_table_destroy(ctx->render_pass_cache, NULL);
slab_destroy_child(&ctx->transfer_pool_unsync);
- screen->descriptors_deinit(ctx);
+ if (zink_debug & ZINK_DEBUG_DGC) {
+ for (unsigned i = 0; i < ARRAY_SIZE(ctx->dgc.upload); i++)
+ u_upload_destroy(ctx->dgc.upload[i]);
+ for (unsigned i = 0; i < ARRAY_SIZE(ctx->dgc.buffers); i++) {
+ if (!ctx->dgc.buffers[i])
+ continue;
+ struct pipe_resource *pres = &ctx->dgc.buffers[i]->base.b;
+ pipe_resource_reference(&pres, NULL);
+ }
+ util_dynarray_fini(&ctx->dgc.pipelines);
+ }
+
+ zink_descriptors_deinit(ctx);
- zink_descriptor_layouts_deinit(ctx);
+ if (!(ctx->flags & ZINK_CONTEXT_COPY_ONLY))
+ p_atomic_dec(&screen->base.num_contexts);
- p_atomic_dec(&screen->base.num_contexts);
+ util_dynarray_foreach(&ctx->di.global_bindings, struct pipe_resource *, res) {
+ pipe_resource_reference(res, NULL);
+ }
+ util_dynarray_fini(&ctx->di.global_bindings);
ralloc_free(ctx);
}
@@ -180,11 +316,20 @@ zink_set_device_reset_callback(struct pipe_context *pctx,
const struct pipe_device_reset_callback *cb)
{
struct zink_context *ctx = zink_context(pctx);
+ bool had_reset = !!ctx->reset.reset;
if (cb)
ctx->reset = *cb;
else
memset(&ctx->reset, 0, sizeof(ctx->reset));
+
+ bool have_reset = !!ctx->reset.reset;
+ if (had_reset != have_reset) {
+ if (have_reset)
+ p_atomic_inc(&zink_screen(pctx->screen)->robust_ctx_count);
+ else
+ p_atomic_dec(&zink_screen(pctx->screen)->robust_ctx_count);
+ }
}
static void
@@ -192,18 +337,30 @@ zink_set_context_param(struct pipe_context *pctx, enum pipe_context_param param,
unsigned value)
{
struct zink_context *ctx = zink_context(pctx);
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
switch (param) {
- case PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE:
- util_set_thread_affinity(zink_screen(ctx->base.screen)->flush_queue.threads[0],
- util_get_cpu_caps()->L3_affinity_mask[value],
- NULL, util_get_cpu_caps()->num_cpu_mask_bits);
+ case PIPE_CONTEXT_PARAM_UPDATE_THREAD_SCHEDULING:
+ if (screen->threaded_submit)
+ util_thread_sched_apply_policy(screen->flush_queue.threads[0],
+ UTIL_THREAD_DRIVER_SUBMIT, value, NULL);
break;
default:
break;
}
}
+static void
+zink_set_debug_callback(struct pipe_context *pctx, const struct util_debug_callback *cb)
+{
+ struct zink_context *ctx = zink_context(pctx);
+
+ if (cb)
+ ctx->dbg = *cb;
+ else
+ memset(&ctx->dbg, 0, sizeof(ctx->dbg));
+}
+
static VkSamplerMipmapMode
sampler_mipmap_mode(enum pipe_tex_mipfilter filter)
{
@@ -231,6 +388,18 @@ sampler_address_mode(enum pipe_tex_wrap filter)
unreachable("unexpected wrap");
}
+/* unnormalizedCoordinates only support CLAMP_TO_EDGE or CLAMP_TO_BORDER */
+static VkSamplerAddressMode
+sampler_address_mode_unnormalized(enum pipe_tex_wrap filter)
+{
+ switch (filter) {
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
+ default: break;
+ }
+ unreachable("unexpected wrap");
+}
+
static VkCompareOp
compare_op(enum pipe_compare_func op)
{
@@ -281,13 +450,24 @@ zink_create_sampler_state(struct pipe_context *pctx,
const struct pipe_sampler_state *state)
{
struct zink_screen *screen = zink_screen(pctx->screen);
+ ASSERTED struct zink_context *zink = zink_context(pctx);
bool need_custom = false;
-
+ bool need_clamped_border_color = false;
VkSamplerCreateInfo sci = {0};
VkSamplerCustomBorderColorCreateInfoEXT cbci = {0};
+ VkSamplerCustomBorderColorCreateInfoEXT cbci_clamped = {0};
sci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
+ if (screen->info.have_EXT_non_seamless_cube_map && !state->seamless_cube_map)
+ sci.flags |= VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT;
+ if (state->unnormalized_coords) {
+ assert(zink->flags & PIPE_CONTEXT_COMPUTE_ONLY);
+ sci.unnormalizedCoordinates = state->unnormalized_coords;
+ }
sci.magFilter = zink_filter(state->mag_img_filter);
- sci.minFilter = zink_filter(state->min_img_filter);
+ if (sci.unnormalizedCoordinates)
+ sci.minFilter = sci.magFilter;
+ else
+ sci.minFilter = zink_filter(state->min_img_filter);
VkSamplerReductionModeCreateInfo rci;
rci.sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO;
@@ -306,20 +486,31 @@ zink_create_sampler_state(struct pipe_context *pctx,
if (state->reduction_mode)
sci.pNext = &rci;
- if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ if (sci.unnormalizedCoordinates) {
+ sci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
+ } else if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
sci.mipmapMode = sampler_mipmap_mode(state->min_mip_filter);
sci.minLod = state->min_lod;
- sci.maxLod = state->max_lod;
+ sci.maxLod = MAX2(state->max_lod, state->min_lod);
} else {
sci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
- sci.minLod = 0;
- sci.maxLod = 0.25f;
+ sci.minLod = CLAMP(state->min_lod, 0.0f, 0.25f);
+ sci.maxLod = CLAMP(state->max_lod, 0.0f, 0.25f);
+ }
+
+ if (!sci.unnormalizedCoordinates) {
+ sci.addressModeU = sampler_address_mode(state->wrap_s);
+ sci.addressModeV = sampler_address_mode(state->wrap_t);
+ sci.addressModeW = sampler_address_mode(state->wrap_r);
+ } else {
+ sci.addressModeU = sampler_address_mode_unnormalized(state->wrap_s);
+ sci.addressModeV = sampler_address_mode_unnormalized(state->wrap_t);
+ sci.addressModeW = sampler_address_mode_unnormalized(state->wrap_r);
}
- sci.addressModeU = sampler_address_mode(state->wrap_s);
- sci.addressModeV = sampler_address_mode(state->wrap_t);
- sci.addressModeW = sampler_address_mode(state->wrap_r);
- sci.mipLodBias = state->lod_bias;
+ sci.mipLodBias = CLAMP(state->lod_bias,
+ -screen->info.props.limits.maxSamplerLodBias,
+ screen->info.props.limits.maxSamplerLodBias);
need_custom |= wrap_needs_border_color(state->wrap_s);
need_custom |= wrap_needs_border_color(state->wrap_t);
@@ -336,12 +527,58 @@ zink_create_sampler_state(struct pipe_context *pctx,
sci.borderColor = get_border_color(&state->border_color, is_integer, need_custom);
if (sci.borderColor > VK_BORDER_COLOR_INT_OPAQUE_WHITE && need_custom) {
+ if (!screen->info.border_color_feats.customBorderColorWithoutFormat &&
+ screen->info.driver_props.driverID != VK_DRIVER_ID_MESA_TURNIP) {
+ static bool warned = false;
+ warn_missing_feature(warned, "customBorderColorWithoutFormat");
+ }
if (screen->info.have_EXT_custom_border_color &&
- screen->info.border_color_feats.customBorderColorWithoutFormat) {
+ (screen->info.border_color_feats.customBorderColorWithoutFormat || state->border_color_format)) {
+ if (!screen->info.have_EXT_border_color_swizzle) {
+ static bool warned = false;
+ warn_missing_feature(warned, "VK_EXT_border_color_swizzle");
+ }
+
+ if (!is_integer && !screen->have_D24_UNORM_S8_UINT) {
+ union pipe_color_union clamped_border_color;
+ for (unsigned i = 0; i < 4; ++i) {
+ /* Use channel 0 on purpose, so that we can use OPAQUE_WHITE
+ * when the border color is 1.0. */
+ clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1);
+ }
+ if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) != 0) {
+ need_clamped_border_color = true;
+ cbci_clamped.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT;
+ cbci_clamped.format = VK_FORMAT_UNDEFINED;
+ /* these are identical unions */
+ memcpy(&cbci_clamped.customBorderColor, &clamped_border_color, sizeof(union pipe_color_union));
+ }
+ }
cbci.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT;
- cbci.format = VK_FORMAT_UNDEFINED;
- /* these are identical unions */
- memcpy(&cbci.customBorderColor, &state->border_color, sizeof(union pipe_color_union));
+ if (screen->info.border_color_feats.customBorderColorWithoutFormat) {
+ cbci.format = VK_FORMAT_UNDEFINED;
+ /* these are identical unions */
+ memcpy(&cbci.customBorderColor, &state->border_color, sizeof(union pipe_color_union));
+ } else {
+ if (util_format_is_depth_or_stencil(state->border_color_format)) {
+ if (is_integer) {
+ cbci.format = VK_FORMAT_S8_UINT;
+ for (unsigned i = 0; i < 4; i++)
+ cbci.customBorderColor.uint32[i] = CLAMP(state->border_color.ui[i], 0, 255);
+ } else {
+ cbci.format = zink_get_format(screen, util_format_get_depth_only(state->border_color_format));
+ /* these are identical unions */
+ memcpy(&cbci.customBorderColor, &state->border_color, sizeof(union pipe_color_union));
+ }
+ } else {
+ cbci.format = zink_get_format(screen, state->border_color_format);
+ union pipe_color_union color;
+ for (unsigned i = 0; i < 4; i++) {
+ zink_format_clamp_channel_srgb(util_format_description(state->border_color_format), &color, &state->border_color, i);
+ }
+ zink_convert_color(screen, state->border_color_format, (void*)&cbci.customBorderColor, &color);
+ }
+ }
cbci.pNext = sci.pNext;
sci.pNext = &cbci;
UNUSED uint32_t check = p_atomic_inc_return(&screen->cur_custom_border_color_samplers);
@@ -350,8 +587,6 @@ zink_create_sampler_state(struct pipe_context *pctx,
sci.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; // TODO with custom shader if we're super interested?
}
- sci.unnormalizedCoordinates = !state->normalized_coords;
-
if (state->max_anisotropy > 1) {
sci.maxAnisotropy = state->max_anisotropy;
sci.anisotropyEnable = VK_TRUE;
@@ -361,34 +596,37 @@ zink_create_sampler_state(struct pipe_context *pctx,
if (!sampler)
return NULL;
- if (VKSCR(CreateSampler)(screen->dev, &sci, NULL, &sampler->sampler) != VK_SUCCESS) {
+ VkResult result = VKSCR(CreateSampler)(screen->dev, &sci, NULL, &sampler->sampler);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateSampler failed (%s)", vk_Result_to_str(result));
FREE(sampler);
return NULL;
}
- util_dynarray_init(&sampler->desc_set_refs.refs, NULL);
- calc_descriptor_hash_sampler_state(sampler);
+ if (need_clamped_border_color) {
+ sci.pNext = &cbci_clamped;
+ result = VKSCR(CreateSampler)(screen->dev, &sci, NULL, &sampler->sampler_clamped);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateSampler failed (%s)", vk_Result_to_str(result));
+ VKSCR(DestroySampler)(screen->dev, sampler->sampler, NULL);
+ FREE(sampler);
+ return NULL;
+ }
+ }
sampler->custom_border_color = need_custom;
+ if (!screen->info.have_EXT_non_seamless_cube_map)
+ sampler->emulate_nonseamless = !state->seamless_cube_map;
return sampler;
}
ALWAYS_INLINE static VkImageLayout
-get_layout_for_binding(struct zink_resource *res, enum zink_descriptor_type type, bool is_compute)
+get_layout_for_binding(const struct zink_context *ctx, struct zink_resource *res, enum zink_descriptor_type type, bool is_compute)
{
if (res->obj->is_buffer)
return 0;
switch (type) {
case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
- return res->image_bind_count[is_compute] ?
- VK_IMAGE_LAYOUT_GENERAL :
- res->aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) ?
- //Vulkan-Docs#1490
- //(res->aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL :
- //res->aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL :
- (res->aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
- res->aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
- VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL) :
- VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ return zink_descriptor_util_image_layout_eval(ctx, res, is_compute);
case ZINK_DESCRIPTOR_TYPE_IMAGE:
return VK_IMAGE_LAYOUT_GENERAL;
default:
@@ -398,12 +636,23 @@ get_layout_for_binding(struct zink_resource *res, enum zink_descriptor_type type
}
ALWAYS_INLINE static struct zink_surface *
-get_imageview_for_binding(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type, unsigned idx)
+get_imageview_for_binding(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned idx)
{
switch (type) {
case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: {
struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->sampler_views[stage][idx]);
- return sampler_view->base.texture ? sampler_view->image_view : NULL;
+ if (!sampler_view || !sampler_view->base.texture)
+ return NULL;
+ /* if this is a non-seamless cube sampler, return the cube array view */
+ if (ctx->di.emulate_nonseamless[stage] & ctx->di.cubes[stage] & BITFIELD_BIT(idx))
+ return sampler_view->cube_array;
+ bool needs_zs_shader_swizzle = (ctx->di.zs_swizzle[stage].mask & BITFIELD_BIT(idx)) &&
+ zink_screen(ctx->base.screen)->driver_workarounds.needs_zs_shader_swizzle;
+ bool needs_shadow_shader_swizzle = (stage == MESA_SHADER_FRAGMENT) && ctx->gfx_stages[MESA_SHADER_FRAGMENT] &&
+ (ctx->di.zs_swizzle[MESA_SHADER_FRAGMENT].mask & ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask & BITFIELD_BIT(idx));
+ if (sampler_view->zs_view && (needs_zs_shader_swizzle || needs_shadow_shader_swizzle))
+ return sampler_view->zs_view;
+ return sampler_view->image_view;
}
case ZINK_DESCRIPTOR_TYPE_IMAGE: {
struct zink_image_view *image_view = &ctx->image_views[stage][idx];
@@ -417,7 +666,7 @@ get_imageview_for_binding(struct zink_context *ctx, enum pipe_shader_type stage,
}
ALWAYS_INLINE static struct zink_buffer_view *
-get_bufferview_for_binding(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type, unsigned idx)
+get_bufferview_for_binding(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned idx)
{
switch (type) {
case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: {
@@ -436,52 +685,65 @@ get_bufferview_for_binding(struct zink_context *ctx, enum pipe_shader_type stage
}
ALWAYS_INLINE static struct zink_resource *
-update_descriptor_state_ubo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot, struct zink_resource *res)
+update_descriptor_state_ubo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot, struct zink_resource *res)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
bool have_null_descriptors = screen->info.rb2_feats.nullDescriptor;
const enum zink_descriptor_type type = ZINK_DESCRIPTOR_TYPE_UBO;
ctx->di.descriptor_res[type][shader][slot] = res;
- ctx->di.ubos[shader][slot].offset = ctx->ubos[shader][slot].buffer_offset;
- if (res) {
- ctx->di.ubos[shader][slot].buffer = res->obj->buffer;
- ctx->di.ubos[shader][slot].range = ctx->ubos[shader][slot].buffer_size;
- assert(ctx->di.ubos[shader][slot].range <= screen->info.props.limits.maxUniformBufferRange);
- } else {
- VkBuffer null_buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
- ctx->di.ubos[shader][slot].buffer = have_null_descriptors ? VK_NULL_HANDLE : null_buffer;
- ctx->di.ubos[shader][slot].range = VK_WHOLE_SIZE;
- }
- if (!slot) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
if (res)
- ctx->di.push_valid |= BITFIELD64_BIT(shader);
+ ctx->di.db.ubos[shader][slot].address = res->obj->bda + ctx->ubos[shader][slot].buffer_offset;
else
- ctx->di.push_valid &= ~BITFIELD64_BIT(shader);
+ ctx->di.db.ubos[shader][slot].address = 0;
+ ctx->di.db.ubos[shader][slot].range = res ? ctx->ubos[shader][slot].buffer_size : VK_WHOLE_SIZE;
+ assert(ctx->di.db.ubos[shader][slot].range == VK_WHOLE_SIZE ||
+ ctx->di.db.ubos[shader][slot].range <= screen->info.props.limits.maxUniformBufferRange);
+ } else {
+ ctx->di.t.ubos[shader][slot].offset = ctx->ubos[shader][slot].buffer_offset;
+ if (res) {
+ ctx->di.t.ubos[shader][slot].buffer = res->obj->buffer;
+ ctx->di.t.ubos[shader][slot].range = ctx->ubos[shader][slot].buffer_size;
+ assert(ctx->di.t.ubos[shader][slot].range <= screen->info.props.limits.maxUniformBufferRange);
+ } else {
+ VkBuffer null_buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
+ ctx->di.t.ubos[shader][slot].buffer = have_null_descriptors ? VK_NULL_HANDLE : null_buffer;
+ ctx->di.t.ubos[shader][slot].range = VK_WHOLE_SIZE;
+ }
}
+
return res;
}
ALWAYS_INLINE static struct zink_resource *
-update_descriptor_state_ssbo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot, struct zink_resource *res)
+update_descriptor_state_ssbo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot, struct zink_resource *res)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
bool have_null_descriptors = screen->info.rb2_feats.nullDescriptor;
const enum zink_descriptor_type type = ZINK_DESCRIPTOR_TYPE_SSBO;
ctx->di.descriptor_res[type][shader][slot] = res;
- ctx->di.ssbos[shader][slot].offset = ctx->ssbos[shader][slot].buffer_offset;
- if (res) {
- ctx->di.ssbos[shader][slot].buffer = res->obj->buffer;
- ctx->di.ssbos[shader][slot].range = ctx->ssbos[shader][slot].buffer_size;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ if (res)
+ ctx->di.db.ssbos[shader][slot].address = res->obj->bda + ctx->ssbos[shader][slot].buffer_offset;
+ else
+ ctx->di.db.ssbos[shader][slot].address = 0;
+ ctx->di.db.ssbos[shader][slot].range = res ? ctx->ssbos[shader][slot].buffer_size : VK_WHOLE_SIZE;
} else {
- VkBuffer null_buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
- ctx->di.ssbos[shader][slot].buffer = have_null_descriptors ? VK_NULL_HANDLE : null_buffer;
- ctx->di.ssbos[shader][slot].range = VK_WHOLE_SIZE;
+ ctx->di.t.ssbos[shader][slot].offset = ctx->ssbos[shader][slot].buffer_offset;
+ if (res) {
+ ctx->di.t.ssbos[shader][slot].buffer = res->obj->buffer;
+ ctx->di.t.ssbos[shader][slot].range = ctx->ssbos[shader][slot].buffer_size;
+ } else {
+ VkBuffer null_buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
+ ctx->di.t.ssbos[shader][slot].buffer = have_null_descriptors ? VK_NULL_HANDLE : null_buffer;
+ ctx->di.t.ssbos[shader][slot].range = VK_WHOLE_SIZE;
+ }
}
return res;
}
ALWAYS_INLINE static struct zink_resource *
-update_descriptor_state_sampler(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot, struct zink_resource *res)
+update_descriptor_state_sampler(struct zink_context *ctx, gl_shader_stage shader, unsigned slot, struct zink_resource *res)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
bool have_null_descriptors = screen->info.rb2_feats.nullDescriptor;
@@ -489,36 +751,62 @@ update_descriptor_state_sampler(struct zink_context *ctx, enum pipe_shader_type
ctx->di.descriptor_res[type][shader][slot] = res;
if (res) {
if (res->obj->is_buffer) {
- struct zink_buffer_view *bv = get_bufferview_for_binding(ctx, shader, type, slot);
- ctx->di.tbos[shader][slot] = bv->buffer_view;
- ctx->di.sampler_surfaces[shader][slot].bufferview = bv;
- ctx->di.sampler_surfaces[shader][slot].is_buffer = true;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ ctx->di.db.tbos[shader][slot].address = res->obj->bda + ctx->sampler_views[shader][slot]->u.buf.offset;
+ ctx->di.db.tbos[shader][slot].range = zink_sampler_view(ctx->sampler_views[shader][slot])->tbo_size;
+ ctx->di.db.tbos[shader][slot].format = zink_get_format(screen, ctx->sampler_views[shader][slot]->format);
+ } else {
+ struct zink_buffer_view *bv = get_bufferview_for_binding(ctx, shader, type, slot);
+ ctx->di.t.tbos[shader][slot] = bv->buffer_view;
+ }
} else {
struct zink_surface *surface = get_imageview_for_binding(ctx, shader, type, slot);
- ctx->di.textures[shader][slot].imageLayout = get_layout_for_binding(res, type, shader == PIPE_SHADER_COMPUTE);
+ ctx->di.textures[shader][slot].imageLayout = ctx->blitting ? res->layout : get_layout_for_binding(ctx, res, type, shader == MESA_SHADER_COMPUTE);
ctx->di.textures[shader][slot].imageView = surface->image_view;
- ctx->di.sampler_surfaces[shader][slot].surface = surface;
- ctx->di.sampler_surfaces[shader][slot].is_buffer = false;
+ if (!screen->have_D24_UNORM_S8_UINT &&
+ ctx->sampler_states[shader][slot] && ctx->sampler_states[shader][slot]->sampler_clamped) {
+ struct zink_sampler_state *state = ctx->sampler_states[shader][slot];
+ VkSampler sampler = (surface->base.format == PIPE_FORMAT_Z24X8_UNORM && surface->ivci.format == VK_FORMAT_D32_SFLOAT) ||
+ (surface->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT && surface->ivci.format == VK_FORMAT_D32_SFLOAT_S8_UINT) ?
+ state->sampler_clamped :
+ state->sampler;
+ if (ctx->di.textures[shader][slot].sampler != sampler) {
+ ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1);
+ ctx->di.textures[shader][slot].sampler = sampler;
+ }
+ }
}
} else {
if (likely(have_null_descriptors)) {
ctx->di.textures[shader][slot].imageView = VK_NULL_HANDLE;
ctx->di.textures[shader][slot].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
- ctx->di.tbos[shader][slot] = VK_NULL_HANDLE;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ ctx->di.db.tbos[shader][slot].address = 0;
+ ctx->di.db.tbos[shader][slot].range = VK_WHOLE_SIZE;
+ } else {
+ ctx->di.t.tbos[shader][slot] = VK_NULL_HANDLE;
+ }
} else {
- struct zink_surface *null_surface = zink_csurface(ctx->dummy_surface[0]);
+ assert(zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB);
+ struct zink_surface *null_surface = zink_get_dummy_surface(ctx, 0);
struct zink_buffer_view *null_bufferview = ctx->dummy_bufferview;
ctx->di.textures[shader][slot].imageView = null_surface->image_view;
ctx->di.textures[shader][slot].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- ctx->di.tbos[shader][slot] = null_bufferview->buffer_view;
+ ctx->di.t.tbos[shader][slot] = null_bufferview->buffer_view;
}
- memset(&ctx->di.sampler_surfaces[shader][slot], 0, sizeof(ctx->di.sampler_surfaces[shader][slot]));
}
return res;
}
+void
+zink_update_shadow_samplerviews(struct zink_context *ctx, unsigned mask)
+{
+ u_foreach_bit(slot, mask)
+ update_descriptor_state_sampler(ctx, MESA_SHADER_FRAGMENT, slot, ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][MESA_SHADER_FRAGMENT][slot]);
+}
+
ALWAYS_INLINE static struct zink_resource *
-update_descriptor_state_image(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot, struct zink_resource *res)
+update_descriptor_state_image(struct zink_context *ctx, gl_shader_stage shader, unsigned slot, struct zink_resource *res)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
bool have_null_descriptors = screen->info.rb2_feats.nullDescriptor;
@@ -526,62 +814,130 @@ update_descriptor_state_image(struct zink_context *ctx, enum pipe_shader_type sh
ctx->di.descriptor_res[type][shader][slot] = res;
if (res) {
if (res->obj->is_buffer) {
- struct zink_buffer_view *bv = get_bufferview_for_binding(ctx, shader, type, slot);
- ctx->di.texel_images[shader][slot] = bv->buffer_view;
- ctx->di.image_surfaces[shader][slot].bufferview = bv;
- ctx->di.image_surfaces[shader][slot].is_buffer = true;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ ctx->di.db.texel_images[shader][slot].address = res->obj->bda + ctx->image_views[shader][slot].base.u.buf.offset;
+ ctx->di.db.texel_images[shader][slot].range = ctx->image_views[shader][slot].base.u.buf.size;
+ ctx->di.db.texel_images[shader][slot].format = zink_get_format(screen, ctx->image_views[shader][slot].base.format);
+ } else {
+ struct zink_buffer_view *bv = get_bufferview_for_binding(ctx, shader, type, slot);
+ ctx->di.t.texel_images[shader][slot] = bv->buffer_view;
+ }
} else {
struct zink_surface *surface = get_imageview_for_binding(ctx, shader, type, slot);
ctx->di.images[shader][slot].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
ctx->di.images[shader][slot].imageView = surface->image_view;
- ctx->di.image_surfaces[shader][slot].surface = surface;
- ctx->di.image_surfaces[shader][slot].is_buffer = false;
}
} else {
if (likely(have_null_descriptors)) {
memset(&ctx->di.images[shader][slot], 0, sizeof(ctx->di.images[shader][slot]));
- ctx->di.texel_images[shader][slot] = VK_NULL_HANDLE;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ ctx->di.db.texel_images[shader][slot].address = 0;
+ ctx->di.db.texel_images[shader][slot].range = VK_WHOLE_SIZE;
+ } else {
+ ctx->di.t.texel_images[shader][slot] = VK_NULL_HANDLE;
+ }
} else {
- struct zink_surface *null_surface = zink_csurface(ctx->dummy_surface[0]);
+ assert(zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB);
+ struct zink_surface *null_surface = zink_get_dummy_surface(ctx, 0);
struct zink_buffer_view *null_bufferview = ctx->dummy_bufferview;
ctx->di.images[shader][slot].imageView = null_surface->image_view;
ctx->di.images[shader][slot].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- ctx->di.texel_images[shader][slot] = null_bufferview->buffer_view;
+ ctx->di.t.texel_images[shader][slot] = null_bufferview->buffer_view;
}
- memset(&ctx->di.image_surfaces[shader][slot], 0, sizeof(ctx->di.image_surfaces[shader][slot]));
}
return res;
}
static void
+update_nonseamless_shader_key(struct zink_context *ctx, gl_shader_stage pstage)
+{
+ const uint32_t new_mask = ctx->di.emulate_nonseamless[pstage] & ctx->di.cubes[pstage];
+ if (pstage == MESA_SHADER_COMPUTE) {
+ if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask != new_mask)
+ ctx->compute_dirty = true;
+ ctx->compute_pipeline_state.key.base.nonseamless_cube_mask = new_mask;
+ } else {
+ if (zink_get_shader_key_base(ctx, pstage)->nonseamless_cube_mask != new_mask)
+ zink_set_shader_key_base(ctx, pstage)->nonseamless_cube_mask = new_mask;
+ }
+}
+
+static void
zink_bind_sampler_states(struct pipe_context *pctx,
- enum pipe_shader_type shader,
+ gl_shader_stage shader,
unsigned start_slot,
unsigned num_samplers,
void **samplers)
{
struct zink_context *ctx = zink_context(pctx);
+ struct zink_screen *screen = zink_screen(pctx->screen);
for (unsigned i = 0; i < num_samplers; ++i) {
struct zink_sampler_state *state = samplers[i];
- if (ctx->sampler_states[shader][start_slot + i] != state)
- zink_screen(pctx->screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot, 1);
+ if (samplers[i] == ctx->sampler_states[shader][start_slot + i])
+ continue;
+ ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot, 1);
ctx->sampler_states[shader][start_slot + i] = state;
- ctx->di.textures[shader][start_slot + i].sampler = state ? state->sampler : VK_NULL_HANDLE;
- if (state)
- zink_batch_usage_set(&state->batch_uses, ctx->batch.state);
+ if (state) {
+ ctx->di.textures[shader][start_slot + i].sampler = state->sampler;
+ if (state->sampler_clamped && !screen->have_D24_UNORM_S8_UINT) {
+ struct zink_surface *surface = get_imageview_for_binding(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot + i);
+ if (surface &&
+ ((surface->base.format == PIPE_FORMAT_Z24X8_UNORM && surface->ivci.format == VK_FORMAT_D32_SFLOAT) ||
+ (surface->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT && surface->ivci.format == VK_FORMAT_D32_SFLOAT_S8_UINT)))
+ ctx->di.textures[shader][start_slot + i].sampler = state->sampler_clamped;
+ }
+ } else {
+ ctx->di.textures[shader][start_slot + i].sampler = VK_NULL_HANDLE;
+ }
}
ctx->di.num_samplers[shader] = start_slot + num_samplers;
}
static void
+zink_bind_sampler_states_nonseamless(struct pipe_context *pctx,
+ gl_shader_stage shader,
+ unsigned start_slot,
+ unsigned num_samplers,
+ void **samplers)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ uint32_t old_mask = ctx->di.emulate_nonseamless[shader];
+ uint32_t mask = BITFIELD_RANGE(start_slot, num_samplers);
+ ctx->di.emulate_nonseamless[shader] &= ~mask;
+ for (unsigned i = 0; i < num_samplers; ++i) {
+ struct zink_sampler_state *state = samplers[i];
+ const uint32_t bit = BITFIELD_BIT(start_slot + i);
+ if (!state)
+ continue;
+ if (state->emulate_nonseamless)
+ ctx->di.emulate_nonseamless[shader] |= bit;
+ if (state->emulate_nonseamless != (old_mask & bit) && (ctx->di.cubes[shader] & bit)) {
+ struct zink_surface *surface = get_imageview_for_binding(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot + i);
+ if (surface && ctx->di.images[shader][start_slot + i].imageView != surface->image_view) {
+ ctx->di.images[shader][start_slot + i].imageView = surface->image_view;
+ update_descriptor_state_sampler(ctx, shader, start_slot + i, zink_resource(surface->base.texture));
+ ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot + i, 1);
+ }
+ }
+ }
+ zink_bind_sampler_states(pctx, shader, start_slot, num_samplers, samplers);
+ update_nonseamless_shader_key(ctx, shader);
+}
+
+static void
zink_delete_sampler_state(struct pipe_context *pctx,
void *sampler_state)
{
struct zink_sampler_state *sampler = sampler_state;
struct zink_batch *batch = &zink_context(pctx)->batch;
- zink_descriptor_set_refs_clear(&sampler->desc_set_refs, sampler_state);
- util_dynarray_append(&batch->state->zombie_samplers, VkSampler,
- sampler->sampler);
+ /* may be called if context_create fails */
+ if (batch->state) {
+ util_dynarray_append(&batch->state->zombie_samplers, VkSampler,
+ sampler->sampler);
+ if (sampler->sampler_clamped)
+ util_dynarray_append(&batch->state->zombie_samplers, VkSampler,
+ sampler->sampler_clamped);
+ }
if (sampler->custom_border_color)
p_atomic_dec(&zink_screen(pctx->screen)->cur_custom_border_color_samplers);
FREE(sampler);
@@ -607,29 +963,57 @@ hash_bufferview(void *bvci)
return _mesa_hash_data((char*)bvci + offset, sizeof(VkBufferViewCreateInfo) - offset);
}
-static struct zink_buffer_view *
-get_buffer_view(struct zink_context *ctx, struct zink_resource *res, enum pipe_format format, uint32_t offset, uint32_t range)
+static VkBufferViewCreateInfo
+create_bvci(struct zink_context *ctx, struct zink_resource *res, enum pipe_format format, uint32_t offset, uint32_t range)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
- struct zink_buffer_view *buffer_view = NULL;
- VkBufferViewCreateInfo bvci = {0};
+ VkBufferViewCreateInfo bvci;
+ // Zero whole struct (including alignment holes), so hash_bufferview
+ // does not access potentially uninitialized data.
+ memset(&bvci, 0, sizeof(bvci));
bvci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
- bvci.buffer = res->obj->buffer;
+ bvci.pNext = NULL;
+ if (screen->format_props[format].bufferFeatures & VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT)
+ bvci.buffer = res->obj->storage_buffer ? res->obj->storage_buffer : res->obj->buffer;
+ else
+ bvci.buffer = res->obj->buffer;
bvci.format = zink_get_format(screen, format);
assert(bvci.format);
bvci.offset = offset;
bvci.range = !offset && range == res->base.b.width0 ? VK_WHOLE_SIZE : range;
+ unsigned blocksize = util_format_get_blocksize(format);
+ if (bvci.range != VK_WHOLE_SIZE) {
+ /* clamp out partial texels */
+ bvci.range -= bvci.range % blocksize;
+ if (bvci.offset + bvci.range >= res->base.b.width0)
+ bvci.range = VK_WHOLE_SIZE;
+ }
+ uint64_t clamp = blocksize * screen->info.props.limits.maxTexelBufferElements;
+ if (bvci.range == VK_WHOLE_SIZE && res->base.b.width0 > clamp)
+ bvci.range = clamp;
+ bvci.flags = 0;
+ return bvci;
+}
+
+static struct zink_buffer_view *
+get_buffer_view(struct zink_context *ctx, struct zink_resource *res, VkBufferViewCreateInfo *bvci)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_buffer_view *buffer_view = NULL;
- uint32_t hash = hash_bufferview(&bvci);
+ uint32_t hash = hash_bufferview(bvci);
simple_mtx_lock(&res->bufferview_mtx);
- struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->bufferview_cache, hash, &bvci);
+ struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->bufferview_cache, hash, bvci);
if (he) {
buffer_view = he->data;
p_atomic_inc(&buffer_view->reference.count);
} else {
VkBufferView view;
- if (VKSCR(CreateBufferView)(screen->dev, &bvci, NULL, &view) != VK_SUCCESS)
+ VkResult result = VKSCR(CreateBufferView)(screen->dev, bvci, NULL, &view);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateBufferView failed (%s)", vk_Result_to_str(result));
goto out;
+ }
buffer_view = CALLOC_STRUCT(zink_buffer_view);
if (!buffer_view) {
VKSCR(DestroyBufferView)(screen->dev, view, NULL);
@@ -637,8 +1021,7 @@ get_buffer_view(struct zink_context *ctx, struct zink_resource *res, enum pipe_f
}
pipe_reference_init(&buffer_view->reference, 1);
pipe_resource_reference(&buffer_view->pres, &res->base.b);
- util_dynarray_init(&buffer_view->desc_set_refs.refs, NULL);
- buffer_view->bvci = bvci;
+ buffer_view->bvci = *bvci;
buffer_view->buffer_view = view;
buffer_view->hash = hash;
_mesa_hash_table_insert_pre_hashed(&res->bufferview_cache, hash, &buffer_view->bvci, buffer_view);
@@ -678,15 +1061,58 @@ clamp_zs_swizzle(enum pipe_swizzle swizzle)
return swizzle;
}
+ALWAYS_INLINE static enum pipe_swizzle
+clamp_alpha_swizzle(enum pipe_swizzle swizzle)
+{
+ if (swizzle == PIPE_SWIZZLE_W)
+ return PIPE_SWIZZLE_X;
+ if (swizzle < PIPE_SWIZZLE_W)
+ return PIPE_SWIZZLE_0;
+ return swizzle;
+}
+
+ALWAYS_INLINE static enum pipe_swizzle
+clamp_luminance_swizzle(enum pipe_swizzle swizzle)
+{
+ if (swizzle == PIPE_SWIZZLE_W)
+ return PIPE_SWIZZLE_1;
+ if (swizzle < PIPE_SWIZZLE_W)
+ return PIPE_SWIZZLE_X;
+ return swizzle;
+}
+
+ALWAYS_INLINE static enum pipe_swizzle
+clamp_luminance_alpha_swizzle(enum pipe_swizzle swizzle)
+{
+ if (swizzle == PIPE_SWIZZLE_W)
+ return PIPE_SWIZZLE_Y;
+ if (swizzle < PIPE_SWIZZLE_W)
+ return PIPE_SWIZZLE_X;
+ return swizzle;
+}
+
+ALWAYS_INLINE static bool
+viewtype_is_cube(const VkImageViewCreateInfo *ivci)
+{
+ return ivci->viewType == VK_IMAGE_VIEW_TYPE_CUBE ||
+ ivci->viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
+}
+
static struct pipe_sampler_view *
zink_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *pres,
const struct pipe_sampler_view *state)
{
struct zink_screen *screen = zink_screen(pctx->screen);
struct zink_resource *res = zink_resource(pres);
- struct zink_sampler_view *sampler_view = CALLOC_STRUCT(zink_sampler_view);
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_sampler_view *sampler_view = CALLOC_STRUCT_CL(zink_sampler_view);
bool err;
+ if (!sampler_view) {
+ mesa_loge("ZINK: failed to allocate sampler_view!");
+ return NULL;
+ }
+
sampler_view->base = *state;
sampler_view->base.texture = NULL;
pipe_resource_reference(&sampler_view->base.texture, pres);
@@ -699,46 +1125,136 @@ zink_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *pres,
struct pipe_surface templ = {0};
templ.u.tex.level = state->u.tex.first_level;
templ.format = state->format;
+ /* avoid needing mutable for depth/stencil sampling */
+ if (util_format_is_depth_and_stencil(pres->format))
+ templ.format = pres->format;
if (state->target != PIPE_TEXTURE_3D) {
templ.u.tex.first_layer = state->u.tex.first_layer;
templ.u.tex.last_layer = state->u.tex.last_layer;
}
+ if (zink_is_swapchain(res)) {
+ if (!zink_kopper_acquire(ctx, res, UINT64_MAX)) {
+ FREE_CL(sampler_view);
+ return NULL;
+ }
+ }
+
ivci = create_ivci(screen, res, &templ, state->target);
ivci.subresourceRange.levelCount = state->u.tex.last_level - state->u.tex.first_level + 1;
ivci.subresourceRange.aspectMask = sampler_aspect_from_format(state->format);
+ bool red_depth_sampler_view = false;
/* samplers for stencil aspects of packed formats need to always use stencil swizzle */
if (ivci.subresourceRange.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
ivci.components.r = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_r));
ivci.components.g = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_g));
ivci.components.b = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_b));
ivci.components.a = zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_a));
+
+ /* If we're sampling depth and we might need to do shader rewrites for
+ * legacy shadow sampling, then set up an extra image view that just
+ * returns the red (depth) component, so you can always have the shadow
+ * result available in the red component for the in-shader swizzling.
+ * (Or if we have PVR's needs_zs_shader_swizzle and are sampling ONE
+ * value for stencil, which also uses that view).
+ */
+ if (ivci.subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ||
+ zink_screen(ctx->base.screen)->driver_workarounds.needs_zs_shader_swizzle) {
+ VkComponentSwizzle *swizzle = (VkComponentSwizzle*)&ivci.components;
+ for (unsigned i = 0; i < 4; i++) {
+ if (swizzle[i] == VK_COMPONENT_SWIZZLE_ONE ||
+ (swizzle[i] == VK_COMPONENT_SWIZZLE_ZERO && ivci.subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT))
+ red_depth_sampler_view = true;
+ }
+ /* this is the data that will be used in shader rewrites */
+ sampler_view->swizzle.s[0] = clamp_zs_swizzle(sampler_view->base.swizzle_r);
+ sampler_view->swizzle.s[1] = clamp_zs_swizzle(sampler_view->base.swizzle_g);
+ sampler_view->swizzle.s[2] = clamp_zs_swizzle(sampler_view->base.swizzle_b);
+ sampler_view->swizzle.s[3] = clamp_zs_swizzle(sampler_view->base.swizzle_a);
+ }
} else {
+ enum pipe_swizzle swizzle[4] = {
+ sampler_view->base.swizzle_r,
+ sampler_view->base.swizzle_g,
+ sampler_view->base.swizzle_b,
+ sampler_view->base.swizzle_a
+ };
/* if we have e.g., R8G8B8X8, then we have to ignore alpha since we're just emulating
* these formats
*/
- if (zink_format_is_voidable_rgba_variant(state->format)) {
- const struct util_format_description *desc = util_format_description(state->format);
- sampler_view->base.swizzle_r = zink_clamp_void_swizzle(desc, sampler_view->base.swizzle_r);
- sampler_view->base.swizzle_g = zink_clamp_void_swizzle(desc, sampler_view->base.swizzle_g);
- sampler_view->base.swizzle_b = zink_clamp_void_swizzle(desc, sampler_view->base.swizzle_b);
- sampler_view->base.swizzle_a = zink_clamp_void_swizzle(desc, sampler_view->base.swizzle_a);
- }
- ivci.components.r = zink_component_mapping(sampler_view->base.swizzle_r);
- ivci.components.g = zink_component_mapping(sampler_view->base.swizzle_g);
- ivci.components.b = zink_component_mapping(sampler_view->base.swizzle_b);
- ivci.components.a = zink_component_mapping(sampler_view->base.swizzle_a);
+ if (zink_format_is_voidable_rgba_variant(state->format)) {
+ const struct util_format_description *view_desc = util_format_description(state->format);
+ for (int i = 0; i < 4; ++i)
+ swizzle[i] = zink_clamp_void_swizzle(view_desc, swizzle[i]);
+ } else if (util_format_is_alpha(state->format) && res->format != VK_FORMAT_A8_UNORM_KHR) {
+ for (int i = 0; i < 4; ++i)
+ swizzle[i] = clamp_alpha_swizzle(swizzle[i]);
+ } else if (util_format_is_luminance(pres->format) ||
+ util_format_is_luminance_alpha(pres->format)) {
+ if (util_format_is_luminance(pres->format)) {
+ for (int i = 0; i < 4; ++i)
+ swizzle[i] = clamp_luminance_swizzle(swizzle[i]);
+ } else {
+ for (int i = 0; i < 4; ++i)
+ swizzle[i] = clamp_luminance_alpha_swizzle(swizzle[i]);
+ }
+ if (state->format != pres->format) {
+ /* luminance / luminance-alpha formats can be reinterpreted
+ * as red / red-alpha formats by the state-tracker, and we
+ * need to whack the green/blue channels here to the
+ * correct values for that to work.
+ */
+ enum pipe_format linear = util_format_linear(pres->format);
+ if (state->format == util_format_luminance_to_red(linear)) {
+ assert(swizzle[1] == PIPE_SWIZZLE_X ||
+ swizzle[1] == PIPE_SWIZZLE_0);
+ assert(swizzle[2] == PIPE_SWIZZLE_X ||
+ swizzle[2] == PIPE_SWIZZLE_0);
+ swizzle[1] = swizzle[2] = PIPE_SWIZZLE_0;
+ } else
+ assert(state->format == linear);
+ }
+ } else if (util_format_is_red_alpha(pres->format)) {
+ /* RA formats are mapped to RG with adjusted swizzle */
+ assert(util_format_is_red_green(vk_format_to_pipe_format(ivci.format)));
+ swizzle[3] = PIPE_SWIZZLE_Y;
+ }
+
+ ivci.components.r = zink_component_mapping(swizzle[0]);
+ ivci.components.g = zink_component_mapping(swizzle[1]);
+ ivci.components.b = zink_component_mapping(swizzle[2]);
+ ivci.components.a = zink_component_mapping(swizzle[3]);
}
assert(ivci.format);
- sampler_view->image_view = (struct zink_surface*)zink_get_surface(zink_context(pctx), pres, &templ, &ivci);
+ sampler_view->image_view = zink_get_surface(ctx, pres, &templ, &ivci);
+ if (!screen->info.have_EXT_non_seamless_cube_map && viewtype_is_cube(&sampler_view->image_view->ivci)) {
+ ivci.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
+ sampler_view->cube_array = zink_get_surface(ctx, pres, &templ, &ivci);
+ } else if (red_depth_sampler_view) {
+ /* there is only one component, and real swizzling can't be done here,
+ * so ensure the shader gets the sampled data
+ */
+ ivci.components.r = VK_COMPONENT_SWIZZLE_R;
+ ivci.components.g = VK_COMPONENT_SWIZZLE_R;
+ ivci.components.b = VK_COMPONENT_SWIZZLE_R;
+ ivci.components.a = VK_COMPONENT_SWIZZLE_R;
+ sampler_view->zs_view = zink_get_surface(ctx, pres, &templ, &ivci);
+ }
err = !sampler_view->image_view;
} else {
- sampler_view->buffer_view = get_buffer_view(zink_context(pctx), res, state->format, state->u.buf.offset, state->u.buf.size);
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ /* always enforce limit clamping */
+ unsigned blocksize = util_format_get_blocksize(state->format);
+ sampler_view->tbo_size = MIN2(state->u.buf.size / blocksize, screen->info.props.limits.maxTexelBufferElements) * blocksize;
+ return &sampler_view->base;
+ }
+ VkBufferViewCreateInfo bvci = create_bvci(ctx, res, state->format, state->u.buf.offset, state->u.buf.size);
+ sampler_view->buffer_view = get_buffer_view(ctx, res, &bvci);
err = !sampler_view->buffer_view;
}
if (err) {
- FREE(sampler_view);
+ FREE_CL(sampler_view);
return NULL;
}
return &sampler_view->base;
@@ -749,13 +1265,19 @@ zink_destroy_buffer_view(struct zink_screen *screen, struct zink_buffer_view *bu
{
struct zink_resource *res = zink_resource(buffer_view->pres);
simple_mtx_lock(&res->bufferview_mtx);
+ if (buffer_view->reference.count) {
+ /* got a cache hit during deletion */
+ simple_mtx_unlock(&res->bufferview_mtx);
+ return;
+ }
struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->bufferview_cache, buffer_view->hash, &buffer_view->bvci);
assert(he);
_mesa_hash_table_remove(&res->bufferview_cache, he);
simple_mtx_unlock(&res->bufferview_mtx);
+ simple_mtx_lock(&res->obj->view_lock);
+ util_dynarray_append(&res->obj->views, VkBufferView, buffer_view->buffer_view);
+ simple_mtx_unlock(&res->obj->view_lock);
pipe_resource_reference(&buffer_view->pres, NULL);
- VKSCR(DestroyBufferView)(screen->dev, buffer_view->buffer_view, NULL);
- zink_descriptor_set_refs_clear(&buffer_view->desc_set_refs, buffer_view);
FREE(buffer_view);
}
@@ -764,13 +1286,16 @@ zink_sampler_view_destroy(struct pipe_context *pctx,
struct pipe_sampler_view *pview)
{
struct zink_sampler_view *view = zink_sampler_view(pview);
- if (pview->texture->target == PIPE_BUFFER)
- zink_buffer_view_reference(zink_screen(pctx->screen), &view->buffer_view, NULL);
- else {
+ if (pview->texture->target == PIPE_BUFFER) {
+ if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB)
+ zink_buffer_view_reference(zink_screen(pctx->screen), &view->buffer_view, NULL);
+ } else {
zink_surface_reference(zink_screen(pctx->screen), &view->image_view, NULL);
+ zink_surface_reference(zink_screen(pctx->screen), &view->cube_array, NULL);
+ zink_surface_reference(zink_screen(pctx->screen), &view->zs_view, NULL);
}
pipe_resource_reference(&pview->texture, NULL);
- FREE(view);
+ FREE_CL(view);
}
static void
@@ -781,68 +1306,7 @@ zink_get_sample_position(struct pipe_context *ctx,
{
/* TODO: handle this I guess */
assert(zink_screen(ctx->screen)->info.props.limits.standardSampleLocations);
- /* from 26.4. Multisampling */
- switch (sample_count) {
- case 0:
- case 1: {
- float pos[][2] = { {0.5,0.5}, };
- out_value[0] = pos[sample_index][0];
- out_value[1] = pos[sample_index][1];
- break;
- }
- case 2: {
- float pos[][2] = { {0.75,0.75},
- {0.25,0.25}, };
- out_value[0] = pos[sample_index][0];
- out_value[1] = pos[sample_index][1];
- break;
- }
- case 4: {
- float pos[][2] = { {0.375, 0.125},
- {0.875, 0.375},
- {0.125, 0.625},
- {0.625, 0.875}, };
- out_value[0] = pos[sample_index][0];
- out_value[1] = pos[sample_index][1];
- break;
- }
- case 8: {
- float pos[][2] = { {0.5625, 0.3125},
- {0.4375, 0.6875},
- {0.8125, 0.5625},
- {0.3125, 0.1875},
- {0.1875, 0.8125},
- {0.0625, 0.4375},
- {0.6875, 0.9375},
- {0.9375, 0.0625}, };
- out_value[0] = pos[sample_index][0];
- out_value[1] = pos[sample_index][1];
- break;
- }
- case 16: {
- float pos[][2] = { {0.5625, 0.5625},
- {0.4375, 0.3125},
- {0.3125, 0.625},
- {0.75, 0.4375},
- {0.1875, 0.375},
- {0.625, 0.8125},
- {0.8125, 0.6875},
- {0.6875, 0.1875},
- {0.375, 0.875},
- {0.5, 0.0625},
- {0.25, 0.125},
- {0.125, 0.75},
- {0.0, 0.5},
- {0.9375, 0.25},
- {0.875, 0.9375},
- {0.0625, 0.0}, };
- out_value[0] = pos[sample_index][0];
- out_value[1] = pos[sample_index][1];
- break;
- }
- default:
- unreachable("unhandled sample count!");
- }
+ u_default_get_sample_position(ctx, sample_count, sample_index, out_value);
}
static void
@@ -869,90 +1333,61 @@ update_existing_vbo(struct zink_context *ctx, unsigned slot)
if (!ctx->vertex_buffers[slot].buffer.resource)
return;
struct zink_resource *res = zink_resource(ctx->vertex_buffers[slot].buffer.resource);
+ res->vbo_bind_count--;
res->vbo_bind_mask &= ~BITFIELD_BIT(slot);
- ctx->vbufs[slot] = VK_NULL_HANDLE;
- ctx->vbuf_offsets[slot] = 0;
- update_res_bind_count(ctx, res, false, true);
-}
-
-ALWAYS_INLINE static struct zink_resource *
-set_vertex_buffer_clamped(struct zink_context *ctx, unsigned slot)
-{
- const struct pipe_vertex_buffer *ctx_vb = &ctx->vertex_buffers[slot];
- struct zink_resource *res = zink_resource(ctx_vb->buffer.resource);
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- if (ctx_vb->buffer_offset > screen->info.props.limits.maxVertexInputAttributeOffset) {
- /* buffer offset exceeds maximum: make a tmp buffer at this offset */
- ctx->vbufs[slot] = zink_resource_tmp_buffer(screen, res, ctx_vb->buffer_offset, 0, &ctx->vbuf_offsets[slot]);
- util_dynarray_append(&res->obj->tmp, VkBuffer, ctx->vbufs[slot]);
- /* the driver is broken and sets a min alignment that's larger than its max offset: rebind as staging buffer */
- if (unlikely(ctx->vbuf_offsets[slot] > screen->info.props.limits.maxVertexInputAttributeOffset)) {
- static bool warned = false;
- if (!warned)
- debug_printf("zink: this vulkan driver is BROKEN! maxVertexInputAttributeOffset < VkMemoryRequirements::alignment\n");
- warned = true;
- }
- } else {
- ctx->vbufs[slot] = res->obj->buffer;
- ctx->vbuf_offsets[slot] = ctx_vb->buffer_offset;
+ if (!res->vbo_bind_count) {
+ res->gfx_barrier &= ~VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
+ res->barrier_access[0] &= ~VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
}
- assert(ctx->vbufs[slot]);
- return res;
+ update_res_bind_count(ctx, res, false, true);
}
static void
zink_set_vertex_buffers(struct pipe_context *pctx,
- unsigned start_slot,
unsigned num_buffers,
- unsigned unbind_num_trailing_slots,
- bool take_ownership,
const struct pipe_vertex_buffer *buffers)
{
struct zink_context *ctx = zink_context(pctx);
+ const bool have_input_state = zink_screen(pctx->screen)->info.have_EXT_vertex_input_dynamic_state;
const bool need_state_change = !zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state &&
- !zink_screen(pctx->screen)->info.have_EXT_vertex_input_dynamic_state;
- uint32_t enabled_buffers = ctx->gfx_pipeline_state.vertex_buffers_enabled_mask;
- enabled_buffers |= u_bit_consecutive(start_slot, num_buffers);
- enabled_buffers &= ~u_bit_consecutive(start_slot + num_buffers, unbind_num_trailing_slots);
-
- if (buffers) {
- if (need_state_change)
- ctx->vertex_state_changed = true;
- for (unsigned i = 0; i < num_buffers; ++i) {
- const struct pipe_vertex_buffer *vb = buffers + i;
- struct pipe_vertex_buffer *ctx_vb = &ctx->vertex_buffers[start_slot + i];
- update_existing_vbo(ctx, start_slot + i);
- if (!take_ownership)
- pipe_resource_reference(&ctx_vb->buffer.resource, vb->buffer.resource);
- else {
- pipe_resource_reference(&ctx_vb->buffer.resource, NULL);
- ctx_vb->buffer.resource = vb->buffer.resource;
- }
- if (vb->buffer.resource) {
- struct zink_resource *res = zink_resource(vb->buffer.resource);
- res->vbo_bind_mask |= BITFIELD_BIT(start_slot + i);
- update_res_bind_count(ctx, res, false, false);
- ctx_vb->stride = vb->stride;
- ctx_vb->buffer_offset = vb->buffer_offset;
- /* always barrier before possible rebind */
- zink_resource_buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
- set_vertex_buffer_clamped(ctx, start_slot + i);
- } else
- enabled_buffers &= ~BITFIELD_BIT(i);
- }
- } else {
- if (need_state_change)
- ctx->vertex_state_changed = true;
- for (unsigned i = 0; i < num_buffers; ++i) {
- update_existing_vbo(ctx, start_slot + i);
- pipe_resource_reference(&ctx->vertex_buffers[start_slot + i].buffer.resource, NULL);
+ !have_input_state;
+ unsigned last_count = util_last_bit(ctx->gfx_pipeline_state.vertex_buffers_enabled_mask);
+ uint32_t enabled_buffers = BITFIELD_MASK(num_buffers);
+
+ assert(!num_buffers || buffers);
+
+ for (unsigned i = 0; i < num_buffers; ++i) {
+ const struct pipe_vertex_buffer *vb = buffers + i;
+ struct pipe_vertex_buffer *ctx_vb = &ctx->vertex_buffers[i];
+ update_existing_vbo(ctx, i);
+ pipe_resource_reference(&ctx_vb->buffer.resource, NULL);
+ ctx_vb->buffer.resource = vb->buffer.resource;
+
+ if (vb->buffer.resource) {
+ struct zink_resource *res = zink_resource(vb->buffer.resource);
+ res->vbo_bind_mask |= BITFIELD_BIT(i);
+ res->vbo_bind_count++;
+ res->gfx_barrier |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
+ res->barrier_access[0] |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
+ update_res_bind_count(ctx, res, false, false);
+ ctx_vb->buffer_offset = vb->buffer_offset;
+ /* always barrier before possible rebind */
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
+ zink_batch_resource_usage_set(&ctx->batch, res, false, true);
+ res->obj->unordered_read = false;
+ } else {
+ enabled_buffers &= ~BITFIELD_BIT(i);
}
}
- for (unsigned i = 0; i < unbind_num_trailing_slots; i++) {
- update_existing_vbo(ctx, start_slot + i);
- pipe_resource_reference(&ctx->vertex_buffers[start_slot + i].buffer.resource, NULL);
+ for (unsigned i = num_buffers; i < last_count; i++) {
+ update_existing_vbo(ctx, i);
+ pipe_resource_reference(&ctx->vertex_buffers[i].buffer.resource, NULL);
}
+ if (need_state_change)
+ ctx->vertex_state_changed = true;
+ else if (!have_input_state && ctx->gfx_pipeline_state.vertex_buffers_enabled_mask != enabled_buffers)
+ ctx->vertex_state_changed = true;
ctx->gfx_pipeline_state.vertex_buffers_enabled_mask = enabled_buffers;
ctx->vertex_buffers_dirty = num_buffers > 0;
#ifndef NDEBUG
@@ -971,14 +1406,9 @@ zink_set_viewport_states(struct pipe_context *pctx,
for (unsigned i = 0; i < num_viewports; ++i)
ctx->vp_state.viewport_states[start_slot + i] = state[i];
- ctx->vp_state.num_viewports = start_slot + num_viewports;
- if (!zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state) {
- if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports)
- ctx->gfx_pipeline_state.dirty = true;
- ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports;
- }
ctx->vp_state_changed = true;
+ zink_flush_dgc_if_enabled(ctx);
}
static void
@@ -991,11 +1421,12 @@ zink_set_scissor_states(struct pipe_context *pctx,
for (unsigned i = 0; i < num_scissors; i++)
ctx->vp_state.scissor_states[start_slot + i] = states[i];
ctx->scissor_changed = true;
+ zink_flush_dgc_if_enabled(ctx);
}
static void
zink_set_inlinable_constants(struct pipe_context *pctx,
- enum pipe_shader_type shader,
+ gl_shader_stage shader,
uint num_values, uint32_t *values)
{
struct zink_context *ctx = (struct zink_context *)pctx;
@@ -1003,50 +1434,75 @@ zink_set_inlinable_constants(struct pipe_context *pctx,
uint32_t *inlinable_uniforms;
struct zink_shader_key *key = NULL;
- if (shader == PIPE_SHADER_COMPUTE) {
- inlinable_uniforms = ctx->compute_inlinable_uniforms;
+ if (shader == MESA_SHADER_COMPUTE) {
+ key = &ctx->compute_pipeline_state.key;
} else {
+ assert(!zink_screen(pctx->screen)->optimal_keys ||
+ (shader == MESA_SHADER_GEOMETRY &&
+ ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
+ ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated));
key = &ctx->gfx_pipeline_state.shader_keys.key[shader];
- inlinable_uniforms = key->base.inlined_uniform_values;
}
+ inlinable_uniforms = key->base.inlined_uniform_values;
if (!(ctx->inlinable_uniforms_valid_mask & bit) ||
memcmp(inlinable_uniforms, values, num_values * 4)) {
memcpy(inlinable_uniforms, values, num_values * 4);
- ctx->dirty_shader_stages |= bit;
+ if (shader == MESA_SHADER_COMPUTE)
+ ctx->compute_dirty = true;
+ else
+ ctx->dirty_gfx_stages |= bit;
ctx->inlinable_uniforms_valid_mask |= bit;
- if (key)
- key->inline_uniforms = true;
+ key->inline_uniforms = true;
}
}
ALWAYS_INLINE static void
-unbind_ubo(struct zink_context *ctx, struct zink_resource *res, enum pipe_shader_type pstage, unsigned slot)
+unbind_descriptor_stage(struct zink_resource *res, gl_shader_stage pstage)
+{
+ if (!res->sampler_binds[pstage] && !res->image_binds[pstage] && !res->all_bindless)
+ res->gfx_barrier &= ~zink_pipeline_flags_from_pipe_stage(pstage);
+}
+
+ALWAYS_INLINE static void
+unbind_buffer_descriptor_stage(struct zink_resource *res, gl_shader_stage pstage)
+{
+ if (!res->ubo_bind_mask[pstage] && !res->ssbo_bind_mask[pstage])
+ unbind_descriptor_stage(res, pstage);
+}
+
+ALWAYS_INLINE static void
+unbind_ubo(struct zink_context *ctx, struct zink_resource *res, gl_shader_stage pstage, unsigned slot)
{
if (!res)
return;
res->ubo_bind_mask[pstage] &= ~BITFIELD_BIT(slot);
- res->ubo_bind_count[pstage == PIPE_SHADER_COMPUTE]--;
- update_res_bind_count(ctx, res, pstage == PIPE_SHADER_COMPUTE, true);
+ res->ubo_bind_count[pstage == MESA_SHADER_COMPUTE]--;
+ unbind_buffer_descriptor_stage(res, pstage);
+ if (!res->ubo_bind_count[pstage == MESA_SHADER_COMPUTE])
+ res->barrier_access[pstage == MESA_SHADER_COMPUTE] &= ~VK_ACCESS_UNIFORM_READ_BIT;
+ update_res_bind_count(ctx, res, pstage == MESA_SHADER_COMPUTE, true);
}
static void
-invalidate_inlined_uniforms(struct zink_context *ctx, enum pipe_shader_type pstage)
+invalidate_inlined_uniforms(struct zink_context *ctx, gl_shader_stage pstage)
{
unsigned bit = BITFIELD_BIT(pstage);
if (!(ctx->inlinable_uniforms_valid_mask & bit))
return;
ctx->inlinable_uniforms_valid_mask &= ~bit;
- ctx->dirty_shader_stages |= bit;
- if (pstage == PIPE_SHADER_COMPUTE)
+ if (pstage == MESA_SHADER_COMPUTE) {
+ ctx->compute_dirty = true;
return;
-
+ }
+ assert(!zink_screen(ctx->base.screen)->optimal_keys || (pstage == MESA_SHADER_GEOMETRY && ctx->is_generated_gs_bound));
+ ctx->dirty_gfx_stages |= bit;
struct zink_shader_key *key = &ctx->gfx_pipeline_state.shader_keys.key[pstage];
key->inline_uniforms = false;
}
static void
zink_set_constant_buffer(struct pipe_context *pctx,
- enum pipe_shader_type shader, uint index,
+ gl_shader_stage shader, uint index,
bool take_ownership,
const struct pipe_constant_buffer *cb)
{
@@ -1067,15 +1523,19 @@ zink_set_constant_buffer(struct pipe_context *pctx,
if (new_res) {
if (new_res != res) {
unbind_ubo(ctx, res, shader, index);
- new_res->ubo_bind_count[shader == PIPE_SHADER_COMPUTE]++;
+ new_res->ubo_bind_count[shader == MESA_SHADER_COMPUTE]++;
new_res->ubo_bind_mask[shader] |= BITFIELD_BIT(index);
- update_res_bind_count(ctx, new_res, shader == PIPE_SHADER_COMPUTE, false);
+ new_res->gfx_barrier |= zink_pipeline_flags_from_pipe_stage(shader);
+ new_res->barrier_access[shader == MESA_SHADER_COMPUTE] |= VK_ACCESS_UNIFORM_READ_BIT;
+ update_res_bind_count(ctx, new_res, shader == MESA_SHADER_COMPUTE, false);
}
- zink_batch_resource_usage_set(&ctx->batch, new_res, false);
- zink_fake_buffer_barrier(new_res, VK_ACCESS_UNIFORM_READ_BIT,
- zink_pipeline_flags_from_pipe_stage(shader));
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, new_res, VK_ACCESS_UNIFORM_READ_BIT,
+ new_res->gfx_barrier);
+ zink_batch_resource_usage_set(&ctx->batch, new_res, false, true);
+ if (!ctx->unordered_blitting)
+ new_res->obj->unordered_read = false;
}
- update |= ((index || screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY) && ctx->ubos[shader][index].buffer_offset != offset) ||
+ update |= ctx->ubos[shader][index].buffer_offset != offset ||
!!res != !!buffer || (res && res->obj->buffer != new_res->obj->buffer) ||
ctx->ubos[shader][index].buffer_size != cb->buffer_size;
@@ -1115,23 +1575,42 @@ zink_set_constant_buffer(struct pipe_context *pctx,
}
if (update)
- zink_screen(pctx->screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_UBO, index, 1);
+ ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_UBO, index, 1);
}
ALWAYS_INLINE static void
-unbind_ssbo(struct zink_context *ctx, struct zink_resource *res, enum pipe_shader_type pstage, unsigned slot, bool writable)
+unbind_descriptor_reads(struct zink_resource *res, bool is_compute)
+{
+ if (!res->sampler_bind_count[is_compute] && !res->image_bind_count[is_compute] && !res->all_bindless)
+ res->barrier_access[is_compute] &= ~VK_ACCESS_SHADER_READ_BIT;
+}
+
+ALWAYS_INLINE static void
+unbind_buffer_descriptor_reads(struct zink_resource *res, bool is_compute)
+{
+ if (!res->ssbo_bind_count[is_compute] && !res->all_bindless)
+ unbind_descriptor_reads(res, is_compute);
+}
+
+ALWAYS_INLINE static void
+unbind_ssbo(struct zink_context *ctx, struct zink_resource *res, gl_shader_stage pstage, unsigned slot, bool writable)
{
if (!res)
return;
res->ssbo_bind_mask[pstage] &= ~BITFIELD_BIT(slot);
- update_res_bind_count(ctx, res, pstage == PIPE_SHADER_COMPUTE, true);
+ res->ssbo_bind_count[pstage == MESA_SHADER_COMPUTE]--;
+ unbind_buffer_descriptor_stage(res, pstage);
+ unbind_buffer_descriptor_reads(res, pstage == MESA_SHADER_COMPUTE);
+ update_res_bind_count(ctx, res, pstage == MESA_SHADER_COMPUTE, true);
if (writable)
- res->write_bind_count[pstage == PIPE_SHADER_COMPUTE]--;
+ res->write_bind_count[pstage == MESA_SHADER_COMPUTE]--;
+ if (!res->write_bind_count[pstage == MESA_SHADER_COMPUTE])
+ res->barrier_access[pstage == MESA_SHADER_COMPUTE] &= ~VK_ACCESS_SHADER_WRITE_BIT;
}
static void
zink_set_shader_buffers(struct pipe_context *pctx,
- enum pipe_shader_type p_stage,
+ gl_shader_stage p_stage,
unsigned start_slot, unsigned count,
const struct pipe_shader_buffer *buffers,
unsigned writable_bitmask)
@@ -1142,43 +1621,52 @@ zink_set_shader_buffers(struct pipe_context *pctx,
unsigned modified_bits = u_bit_consecutive(start_slot, count);
unsigned old_writable_mask = ctx->writable_ssbos[p_stage];
+ assert(!ctx->unordered_blitting);
ctx->writable_ssbos[p_stage] &= ~modified_bits;
ctx->writable_ssbos[p_stage] |= writable_bitmask << start_slot;
for (unsigned i = 0; i < count; i++) {
- struct pipe_shader_buffer *ssbo = &ctx->ssbos[p_stage][start_slot + i];
+ unsigned slot = start_slot + i;
+ struct pipe_shader_buffer *ssbo = &ctx->ssbos[p_stage][slot];
struct zink_resource *res = ssbo->buffer ? zink_resource(ssbo->buffer) : NULL;
- bool was_writable = old_writable_mask & BITFIELD64_BIT(start_slot + i);
+ bool was_writable = old_writable_mask & BITFIELD64_BIT(slot);
if (buffers && buffers[i].buffer) {
struct zink_resource *new_res = zink_resource(buffers[i].buffer);
if (new_res != res) {
- unbind_ssbo(ctx, res, p_stage, i, was_writable);
- new_res->ssbo_bind_mask[p_stage] |= BITFIELD_BIT(i);
- update_res_bind_count(ctx, new_res, p_stage == PIPE_SHADER_COMPUTE, false);
+ unbind_ssbo(ctx, res, p_stage, slot, was_writable);
+ new_res->ssbo_bind_mask[p_stage] |= BITFIELD_BIT(slot);
+ new_res->ssbo_bind_count[p_stage == MESA_SHADER_COMPUTE]++;
+ new_res->gfx_barrier |= zink_pipeline_flags_from_pipe_stage(p_stage);
+ update_res_bind_count(ctx, new_res, p_stage == MESA_SHADER_COMPUTE, false);
}
VkAccessFlags access = VK_ACCESS_SHADER_READ_BIT;
- if (ctx->writable_ssbos[p_stage] & BITFIELD64_BIT(start_slot + i)) {
- new_res->write_bind_count[p_stage == PIPE_SHADER_COMPUTE]++;
+ if (ctx->writable_ssbos[p_stage] & BITFIELD64_BIT(slot)) {
+ new_res->write_bind_count[p_stage == MESA_SHADER_COMPUTE]++;
access |= VK_ACCESS_SHADER_WRITE_BIT;
}
pipe_resource_reference(&ssbo->buffer, &new_res->base.b);
- zink_batch_resource_usage_set(&ctx->batch, new_res, access & VK_ACCESS_SHADER_WRITE_BIT);
+ new_res->barrier_access[p_stage == MESA_SHADER_COMPUTE] |= access;
ssbo->buffer_offset = buffers[i].buffer_offset;
ssbo->buffer_size = MIN2(buffers[i].buffer_size, new_res->base.b.width0 - ssbo->buffer_offset);
util_range_add(&new_res->base.b, &new_res->valid_buffer_range, ssbo->buffer_offset,
ssbo->buffer_offset + ssbo->buffer_size);
- zink_fake_buffer_barrier(new_res, access,
- zink_pipeline_flags_from_pipe_stage(p_stage));
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, new_res, access,
+ new_res->gfx_barrier);
+ zink_batch_resource_usage_set(&ctx->batch, new_res, access & VK_ACCESS_SHADER_WRITE_BIT, true);
update = true;
- max_slot = MAX2(max_slot, start_slot + i);
- update_descriptor_state_ssbo(ctx, p_stage, start_slot + i, new_res);
+ max_slot = MAX2(max_slot, slot);
+ update_descriptor_state_ssbo(ctx, p_stage, slot, new_res);
+ if (zink_resource_access_is_write(access))
+ new_res->obj->unordered_write = false;
+ new_res->obj->unordered_read = false;
} else {
- update = !!res;
+ if (res)
+ update = true;
ssbo->buffer_offset = 0;
ssbo->buffer_size = 0;
if (res) {
- unbind_ssbo(ctx, res, p_stage, i, was_writable);
- update_descriptor_state_ssbo(ctx, p_stage, start_slot + i, NULL);
+ unbind_ssbo(ctx, res, p_stage, slot, was_writable);
+ update_descriptor_state_ssbo(ctx, p_stage, slot, NULL);
}
pipe_resource_reference(&ssbo->buffer, NULL);
}
@@ -1186,26 +1674,26 @@ zink_set_shader_buffers(struct pipe_context *pctx,
if (start_slot + count >= ctx->di.num_ssbos[p_stage])
ctx->di.num_ssbos[p_stage] = max_slot + 1;
if (update)
- zink_screen(pctx->screen)->context_invalidate_descriptor_state(ctx, p_stage, ZINK_DESCRIPTOR_TYPE_SSBO, start_slot, count);
+ ctx->invalidate_descriptor_state(ctx, p_stage, ZINK_DESCRIPTOR_TYPE_SSBO, start_slot, count);
}
static void
update_binds_for_samplerviews(struct zink_context *ctx, struct zink_resource *res, bool is_compute)
{
- VkImageLayout layout = get_layout_for_binding(res, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, is_compute);
+ VkImageLayout layout = get_layout_for_binding(ctx, res, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, is_compute);
if (is_compute) {
- u_foreach_bit(slot, res->sampler_binds[PIPE_SHADER_COMPUTE]) {
- if (ctx->di.textures[PIPE_SHADER_COMPUTE][slot].imageLayout != layout) {
- update_descriptor_state_sampler(ctx, PIPE_SHADER_COMPUTE, slot, res);
- zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, PIPE_SHADER_COMPUTE, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1);
+ u_foreach_bit(slot, res->sampler_binds[MESA_SHADER_COMPUTE]) {
+ if (ctx->di.textures[MESA_SHADER_COMPUTE][slot].imageLayout != layout) {
+ update_descriptor_state_sampler(ctx, MESA_SHADER_COMPUTE, slot, res);
+ ctx->invalidate_descriptor_state(ctx, MESA_SHADER_COMPUTE, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1);
}
}
} else {
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) {
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
u_foreach_bit(slot, res->sampler_binds[i]) {
if (ctx->di.textures[i][slot].imageLayout != layout) {
update_descriptor_state_sampler(ctx, i, slot, res);
- zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1);
+ ctx->invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1);
}
}
}
@@ -1215,7 +1703,7 @@ update_binds_for_samplerviews(struct zink_context *ctx, struct zink_resource *re
static void
flush_pending_clears(struct zink_context *ctx, struct zink_resource *res)
{
- if (res->fb_binds && ctx->clears_enabled)
+ if (res->fb_bind_count && ctx->clears_enabled)
zink_fb_clears_apply(ctx, &res->base.b);
}
@@ -1231,155 +1719,334 @@ unbind_shader_image_counts(struct zink_context *ctx, struct zink_resource *res,
update_binds_for_samplerviews(ctx, res, is_compute);
}
-ALWAYS_INLINE static void
+ALWAYS_INLINE static bool
check_for_layout_update(struct zink_context *ctx, struct zink_resource *res, bool is_compute)
{
- VkImageLayout layout = res->bind_count[is_compute] ? zink_descriptor_util_image_layout_eval(res, is_compute) : VK_IMAGE_LAYOUT_UNDEFINED;
- VkImageLayout other_layout = res->bind_count[!is_compute] ? zink_descriptor_util_image_layout_eval(res, !is_compute) : VK_IMAGE_LAYOUT_UNDEFINED;
- if (res->bind_count[is_compute] && res->layout != layout)
- _mesa_set_add(ctx->need_barriers[is_compute], res);
- if (res->bind_count[!is_compute] && (layout != other_layout || res->layout != other_layout))
- _mesa_set_add(ctx->need_barriers[!is_compute], res);
+ VkImageLayout layout = res->bind_count[is_compute] ? zink_descriptor_util_image_layout_eval(ctx, res, is_compute) : VK_IMAGE_LAYOUT_UNDEFINED;
+ VkImageLayout other_layout = res->bind_count[!is_compute] ? zink_descriptor_util_image_layout_eval(ctx, res, !is_compute) : VK_IMAGE_LAYOUT_UNDEFINED;
+ bool ret = false;
+ if (!is_compute && res->fb_binds && !(ctx->feedback_loops & res->fb_binds)) {
+ /* always double check feedback loops */
+ ret = !!_mesa_set_add(ctx->need_barriers[0], res);
+ } else {
+ if (res->bind_count[is_compute] && layout && res->layout != layout)
+ ret = !!_mesa_set_add(ctx->need_barriers[is_compute], res);
+ if (res->bind_count[!is_compute] && other_layout && (layout != other_layout || res->layout != other_layout))
+ ret = !!_mesa_set_add(ctx->need_barriers[!is_compute], res);
+ }
+ return ret;
}
static void
-unbind_shader_image(struct zink_context *ctx, enum pipe_shader_type stage, unsigned slot)
+unbind_shader_image(struct zink_context *ctx, gl_shader_stage stage, unsigned slot)
{
struct zink_image_view *image_view = &ctx->image_views[stage][slot];
- bool is_compute = stage == PIPE_SHADER_COMPUTE;
+ bool is_compute = stage == MESA_SHADER_COMPUTE;
if (!image_view->base.resource)
return;
struct zink_resource *res = zink_resource(image_view->base.resource);
+ res->image_binds[stage] &= ~BITFIELD_BIT(slot);
unbind_shader_image_counts(ctx, res, is_compute, image_view->base.access & PIPE_IMAGE_ACCESS_WRITE);
-
+ if (!res->write_bind_count[is_compute])
+ res->barrier_access[stage == MESA_SHADER_COMPUTE] &= ~VK_ACCESS_SHADER_WRITE_BIT;
+
if (image_view->base.resource->target == PIPE_BUFFER) {
- if (zink_batch_usage_exists(image_view->buffer_view->batch_uses))
- zink_batch_reference_bufferview(&ctx->batch, image_view->buffer_view);
+ unbind_buffer_descriptor_stage(res, stage);
+ unbind_buffer_descriptor_reads(res, stage == MESA_SHADER_COMPUTE);
zink_buffer_view_reference(zink_screen(ctx->base.screen), &image_view->buffer_view, NULL);
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ pipe_resource_reference(&image_view->base.resource, NULL);
} else {
+ unbind_descriptor_stage(res, stage);
+ unbind_descriptor_reads(res, stage == MESA_SHADER_COMPUTE);
if (!res->image_bind_count[is_compute])
check_for_layout_update(ctx, res, is_compute);
- if (zink_batch_usage_exists(image_view->surface->batch_uses))
- zink_batch_reference_surface(&ctx->batch, image_view->surface);
zink_surface_reference(zink_screen(ctx->base.screen), &image_view->surface, NULL);
}
- pipe_resource_reference(&image_view->base.resource, NULL);
image_view->base.resource = NULL;
image_view->surface = NULL;
}
+static struct zink_buffer_view *
+create_image_bufferview(struct zink_context *ctx, const struct pipe_image_view *view)
+{
+ struct zink_resource *res = zink_resource(view->resource);
+ VkBufferViewCreateInfo bvci = create_bvci(ctx, res, view->format, view->u.buf.offset, view->u.buf.size);
+ struct zink_buffer_view *buffer_view = get_buffer_view(ctx, res, &bvci);
+ if (!buffer_view)
+ return NULL;
+ util_range_add(&res->base.b, &res->valid_buffer_range, view->u.buf.offset,
+ view->u.buf.offset + view->u.buf.size);
+ return buffer_view;
+}
+
+static void
+finalize_image_bind(struct zink_context *ctx, struct zink_resource *res, bool is_compute)
+{
+ /* if this is the first image bind and there are sampler binds, the image's sampler layout
+ * must be updated to GENERAL
+ */
+ if (res->image_bind_count[is_compute] == 1 &&
+ res->bind_count[is_compute] > 1)
+ update_binds_for_samplerviews(ctx, res, is_compute);
+ if (!check_for_layout_update(ctx, res, is_compute)) {
+ /* no deferred barrier: unset unordered usage immediately */
+ // TODO: figure out a way to link up layouts between unordered and main cmdbuf
+ // if (zink_resource_access_is_write(res->barrier_access[is_compute]))
+ res->obj->unordered_write = false;
+ res->obj->unordered_read = false;
+ }
+}
+
+static struct zink_surface *
+create_image_surface(struct zink_context *ctx, const struct pipe_image_view *view, bool is_compute)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_resource *res = zink_resource(view->resource);
+ struct pipe_surface tmpl = {0};
+ enum pipe_texture_target target = res->base.b.target;
+ tmpl.format = view->format;
+ tmpl.u.tex.level = view->u.tex.level;
+ tmpl.u.tex.first_layer = view->u.tex.first_layer;
+ tmpl.u.tex.last_layer = view->u.tex.last_layer;
+ unsigned depth = 1 + tmpl.u.tex.last_layer - tmpl.u.tex.first_layer;
+ switch (target) {
+ case PIPE_TEXTURE_3D:
+ if (depth < u_minify(res->base.b.depth0, view->u.tex.level)) {
+ assert(depth == 1);
+ target = PIPE_TEXTURE_2D;
+ if (!screen->info.have_EXT_image_2d_view_of_3d ||
+ !screen->info.view2d_feats.image2DViewOf3D) {
+ static bool warned = false;
+ warn_missing_feature(warned, "image2DViewOf3D");
+ }
+ } else {
+ assert(tmpl.u.tex.first_layer == 0);
+ tmpl.u.tex.last_layer = 0;
+ }
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_1D_ARRAY:
+ if (depth < res->base.b.array_size && depth == 1)
+ target = target == PIPE_TEXTURE_2D_ARRAY ? PIPE_TEXTURE_2D : PIPE_TEXTURE_1D;
+ break;
+ default: break;
+ }
+ if (zink_format_needs_mutable(view->resource->format, view->format))
+ /* mutable not set by default */
+ zink_resource_object_init_mutable(ctx, res);
+ VkImageViewCreateInfo ivci = create_ivci(screen, res, &tmpl, target);
+ struct zink_surface *surface = zink_get_surface(ctx, view->resource, &tmpl, &ivci);
+ if (!surface)
+ return NULL;
+ if (is_compute)
+ flush_pending_clears(ctx, res);
+ return surface;
+}
+
static void
zink_set_shader_images(struct pipe_context *pctx,
- enum pipe_shader_type p_stage,
+ gl_shader_stage shader_type,
unsigned start_slot, unsigned count,
unsigned unbind_num_trailing_slots,
const struct pipe_image_view *images)
{
struct zink_context *ctx = zink_context(pctx);
+ struct zink_screen *screen = zink_screen(pctx->screen);
bool update = false;
+ bool is_compute = shader_type == MESA_SHADER_COMPUTE;
+ assert(!ctx->unordered_blitting);
for (unsigned i = 0; i < count; i++) {
- struct zink_image_view *image_view = &ctx->image_views[p_stage][start_slot + i];
- if (images && images[i].resource) {
- struct zink_resource *res = zink_resource(images[i].resource);
- struct zink_resource *old_res = zink_resource(image_view->base.resource);
+ struct zink_image_view *a = &ctx->image_views[shader_type][start_slot + i];
+ const struct pipe_image_view *b = images ? &images[i] : NULL;
+ struct zink_resource *res = b ? zink_resource(b->resource) : NULL;
+ if (b && b->resource) {
if (!zink_resource_object_init_storage(ctx, res)) {
debug_printf("couldn't create storage image!");
continue;
}
- if (res != old_res) {
- if (old_res) {
- unbind_shader_image_counts(ctx, old_res, p_stage == PIPE_SHADER_COMPUTE, image_view->base.access & PIPE_IMAGE_ACCESS_WRITE);
- if (!old_res->obj->is_buffer && !old_res->image_bind_count[p_stage == PIPE_SHADER_COMPUTE])
- check_for_layout_update(ctx, old_res, p_stage == PIPE_SHADER_COMPUTE);
- }
- update_res_bind_count(ctx, res, p_stage == PIPE_SHADER_COMPUTE, false);
- }
- util_copy_image_view(&image_view->base, images + i);
+
VkAccessFlags access = 0;
- if (image_view->base.access & PIPE_IMAGE_ACCESS_WRITE) {
- zink_resource(image_view->base.resource)->write_bind_count[p_stage == PIPE_SHADER_COMPUTE]++;
+ if (b->access & PIPE_IMAGE_ACCESS_WRITE) {
access |= VK_ACCESS_SHADER_WRITE_BIT;
}
- if (image_view->base.access & PIPE_IMAGE_ACCESS_READ) {
+ if (b->access & PIPE_IMAGE_ACCESS_READ) {
access |= VK_ACCESS_SHADER_READ_BIT;
}
- res->image_bind_count[p_stage == PIPE_SHADER_COMPUTE]++;
- if (images[i].resource->target == PIPE_BUFFER) {
- image_view->buffer_view = get_buffer_view(ctx, res, images[i].format, images[i].u.buf.offset, images[i].u.buf.size);
- assert(image_view->buffer_view);
- util_range_add(&res->base.b, &res->valid_buffer_range, images[i].u.buf.offset,
- images[i].u.buf.offset + images[i].u.buf.size);
- zink_batch_usage_set(&image_view->buffer_view->batch_uses, ctx->batch.state);
- zink_fake_buffer_barrier(res, access,
- zink_pipeline_flags_from_pipe_stage(p_stage));
+
+ bool changed = false;
+ if (!a->base.resource || a->base.resource != b->resource) {
+ /* this needs a full unbind+bind */
+ changed = true;
+ unbind_shader_image(ctx, shader_type, start_slot + i);
+ update_res_bind_count(ctx, res, is_compute, false);
+ res->image_bind_count[is_compute]++;
+ /* always increment write_bind_count on new bind */
+ if (b->access & PIPE_IMAGE_ACCESS_WRITE)
+ res->write_bind_count[is_compute]++;
+ /* db mode refcounts these */
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && b->resource->target == PIPE_BUFFER)
+ pipe_resource_reference(&a->base.resource, b->resource);
} else {
- struct pipe_surface tmpl = {0};
- tmpl.format = images[i].format;
- tmpl.nr_samples = 1;
- tmpl.u.tex.level = images[i].u.tex.level;
- tmpl.u.tex.first_layer = images[i].u.tex.first_layer;
- tmpl.u.tex.last_layer = images[i].u.tex.last_layer;
- struct pipe_surface *psurf = pctx->create_surface(pctx, &res->base.b, &tmpl);
- /* this is actually a zink_ctx_surface, but we just want the inner surface */
- image_view->surface = zink_csurface(psurf);
- FREE(psurf);
- assert(image_view->surface);
- /* if this is the first image bind and there are sampler binds, the image's sampler layout
- * must be updated to GENERAL
- */
- if (res->image_bind_count[p_stage == PIPE_SHADER_COMPUTE] == 1 &&
- res->bind_count[p_stage == PIPE_SHADER_COMPUTE] > 1)
- update_binds_for_samplerviews(ctx, res, p_stage == PIPE_SHADER_COMPUTE);
- check_for_layout_update(ctx, res, p_stage == PIPE_SHADER_COMPUTE);
- zink_batch_usage_set(&image_view->surface->batch_uses, ctx->batch.state);
- flush_pending_clears(ctx, res);
- }
- zink_batch_resource_usage_set(&ctx->batch, zink_resource(image_view->base.resource),
- zink_resource_access_is_write(access));
- update = true;
- update_descriptor_state_image(ctx, p_stage, start_slot + i, res);
- } else if (image_view->base.resource) {
- update |= !!image_view->base.resource;
+ /* resource matches: check for write flag change and partial rebind */
+
+ /* previous bind didn't have write: increment */
+ if ((b->access & PIPE_IMAGE_ACCESS_WRITE) && !(a->base.access & PIPE_IMAGE_ACCESS_WRITE))
+ res->write_bind_count[is_compute]++;
+ /* previous bind had write: decrement */
+ else if (!(b->access & PIPE_IMAGE_ACCESS_WRITE) && (a->base.access & PIPE_IMAGE_ACCESS_WRITE)) {
+ res->write_bind_count[is_compute]--;
+ if (!res->write_bind_count[is_compute])
+ res->barrier_access[is_compute] &= ~VK_ACCESS_SHADER_WRITE_BIT;
+ }
+
+ /* this may need a partial rebind */
+ changed = a->base.format != b->format || zink_resource(a->base.resource)->obj != res->obj;
+ if (!changed) {
+ if (b->resource->target == PIPE_BUFFER) {
+ /* db mode has no partial rebind */
+ if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB)
+ changed = !!memcmp(&a->base.u.buf, &b->u.buf, sizeof(b->u.buf));
+ } else {
+ /* no memcmp, these are bitfields */
+ changed = a->base.u.tex.first_layer != b->u.tex.first_layer ||
+ a->base.u.tex.last_layer != b->u.tex.last_layer ||
+ a->base.u.tex.level != b->u.tex.level;
+ }
+ }
+ }
+
+ if (changed) {
+ /* this is a partial rebind */
+ if (b->resource->target == PIPE_BUFFER) {
+ /* db has no partial rebind */
+ if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) {
+ /* bufferview rebind: get updated bufferview and unref old one */
+ struct zink_buffer_view *bv = create_image_bufferview(ctx, b);
+ /* identical rebind was already checked above */
+ assert(bv && bv != a->buffer_view);
+ zink_buffer_view_reference(screen, &a->buffer_view, NULL);
+ /* ref already added by create */
+ a->buffer_view = bv;
+ }
+ } else {
+ /* image rebind: get updated surface and unref old one */
+ struct zink_surface *surface = create_image_surface(ctx, b, is_compute);
+ /* identical rebind was already checked above */
+ assert(surface && surface != a->surface);
+ zink_surface_reference(screen, &a->surface, NULL);
+ /* ref already added by create */
+ a->surface = surface;
+ }
+ }
- unbind_shader_image(ctx, p_stage, start_slot + i);
- update_descriptor_state_image(ctx, p_stage, start_slot + i, NULL);
+ /* these operations occur regardless of binding/rebinding */
+ res->gfx_barrier |= zink_pipeline_flags_from_pipe_stage(shader_type);
+ res->barrier_access[is_compute] |= access;
+ if (b->resource->target == PIPE_BUFFER) {
+ screen->buffer_barrier(ctx, res, access,
+ res->gfx_barrier);
+ zink_batch_resource_usage_set(&ctx->batch, res,
+ zink_resource_access_is_write(access), true);
+ if (zink_resource_access_is_write(access))
+ res->obj->unordered_write = false;
+ res->obj->unordered_read = false;
+ } else {
+ finalize_image_bind(ctx, res, is_compute);
+ zink_batch_resource_usage_set(&ctx->batch, res,
+ zink_resource_access_is_write(access), false);
+ }
+ memcpy(&a->base, images + i, sizeof(struct pipe_image_view));
+ if (b->resource->target == PIPE_BUFFER) {
+ /* always enforce limit clamping */
+ unsigned blocksize = util_format_get_blocksize(a->base.format);
+ a->base.u.buf.size = MIN2(a->base.u.buf.size / blocksize, screen->info.props.limits.maxTexelBufferElements) * blocksize;
+ }
+ update = true;
+ res->image_binds[shader_type] |= BITFIELD_BIT(start_slot + i);
+ } else if (a->base.resource) {
+ update = true;
+ unbind_shader_image(ctx, shader_type, start_slot + i);
}
+ update_descriptor_state_image(ctx, shader_type, start_slot + i, res);
}
for (unsigned i = 0; i < unbind_num_trailing_slots; i++) {
- update |= !!ctx->image_views[p_stage][start_slot + count + i].base.resource;
- unbind_shader_image(ctx, p_stage, start_slot + count + i);
- update_descriptor_state_image(ctx, p_stage, start_slot + count + i, NULL);
+ update |= !!ctx->image_views[shader_type][start_slot + count + i].base.resource;
+ unbind_shader_image(ctx, shader_type, start_slot + count + i);
+ update_descriptor_state_image(ctx, shader_type, start_slot + count + i, NULL);
}
- ctx->di.num_images[p_stage] = start_slot + count;
+ ctx->di.num_images[shader_type] = start_slot + count;
if (update)
- zink_screen(pctx->screen)->context_invalidate_descriptor_state(ctx, p_stage, ZINK_DESCRIPTOR_TYPE_IMAGE, start_slot, count);
+ ctx->invalidate_descriptor_state(ctx, shader_type, ZINK_DESCRIPTOR_TYPE_IMAGE, start_slot, count);
}
-ALWAYS_INLINE static void
-check_samplerview_for_batch_ref(struct zink_context *ctx, struct zink_sampler_view *sv)
+static void
+update_feedback_loop_dynamic_state(struct zink_context *ctx)
{
- const struct zink_resource *res = zink_resource(sv->base.texture);
- if ((res->obj->is_buffer && zink_batch_usage_exists(sv->buffer_view->batch_uses)) ||
- (!res->obj->is_buffer && zink_batch_usage_exists(sv->image_view->batch_uses)))
- zink_batch_reference_sampler_view(&ctx->batch, sv);
+ if (!zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_dynamic_state)
+ return;
+ VkImageAspectFlags aspects = 0;
+ if (ctx->feedback_loops & BITFIELD_MASK(PIPE_MAX_COLOR_BUFS))
+ aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
+ if (ctx->feedback_loops & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS))
+ aspects |= VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+ VKCTX(CmdSetAttachmentFeedbackLoopEnableEXT)(ctx->batch.state->cmdbuf, aspects);
+}
+
+static void
+update_feedback_loop_state(struct zink_context *ctx, unsigned idx, unsigned feedback_loops)
+{
+ if (feedback_loops != ctx->feedback_loops) {
+ if (idx == PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop_zs) {
+ if (ctx->gfx_pipeline_state.feedback_loop_zs)
+ ctx->gfx_pipeline_state.dirty = true;
+ ctx->gfx_pipeline_state.feedback_loop_zs = false;
+ } else if (idx < PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop) {
+ if (ctx->gfx_pipeline_state.feedback_loop)
+ ctx->gfx_pipeline_state.dirty = true;
+ ctx->gfx_pipeline_state.feedback_loop = false;
+ }
+ update_feedback_loop_dynamic_state(ctx);
+ }
+ ctx->feedback_loops = feedback_loops;
}
ALWAYS_INLINE static void
-unbind_samplerview(struct zink_context *ctx, enum pipe_shader_type stage, unsigned slot)
+unbind_samplerview(struct zink_context *ctx, gl_shader_stage stage, unsigned slot)
{
struct zink_sampler_view *sv = zink_sampler_view(ctx->sampler_views[stage][slot]);
if (!sv || !sv->base.texture)
return;
struct zink_resource *res = zink_resource(sv->base.texture);
- check_samplerview_for_batch_ref(ctx, sv);
- update_res_bind_count(ctx, res, stage == PIPE_SHADER_COMPUTE, true);
+ res->sampler_bind_count[stage == MESA_SHADER_COMPUTE]--;
+ if (stage != MESA_SHADER_COMPUTE && !res->sampler_bind_count[0] && res->fb_bind_count) {
+ u_foreach_bit(idx, res->fb_binds) {
+ if (ctx->feedback_loops & BITFIELD_BIT(idx)) {
+ ctx->dynamic_fb.attachments[idx].imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ ctx->rp_layout_changed = true;
+ }
+ update_feedback_loop_state(ctx, idx, ctx->feedback_loops & ~BITFIELD_BIT(idx));
+ }
+ }
+ update_res_bind_count(ctx, res, stage == MESA_SHADER_COMPUTE, true);
res->sampler_binds[stage] &= ~BITFIELD_BIT(slot);
+ if (res->obj->is_buffer) {
+ unbind_buffer_descriptor_stage(res, stage);
+ unbind_buffer_descriptor_reads(res, stage == MESA_SHADER_COMPUTE);
+ } else {
+ unbind_descriptor_stage(res, stage);
+ unbind_descriptor_reads(res, stage == MESA_SHADER_COMPUTE);
+ if (!res->sampler_bind_count[stage == MESA_SHADER_COMPUTE])
+ check_for_layout_update(ctx, res, stage == MESA_SHADER_COMPUTE);
+ }
+ assert(slot < 32);
+ ctx->di.zs_swizzle[stage].mask &= ~BITFIELD_BIT(slot);
}
static void
zink_set_sampler_views(struct pipe_context *pctx,
- enum pipe_shader_type shader_type,
+ gl_shader_stage shader_type,
unsigned start_slot,
unsigned num_views,
unsigned unbind_num_trailing_slots,
@@ -1387,81 +2054,518 @@ zink_set_sampler_views(struct pipe_context *pctx,
struct pipe_sampler_view **views)
{
struct zink_context *ctx = zink_context(pctx);
- unsigned i;
+
+ const uint32_t mask = BITFIELD_RANGE(start_slot, num_views);
+ uint32_t shadow_mask = ctx->di.zs_swizzle[shader_type].mask;
+ ctx->di.cubes[shader_type] &= ~mask;
bool update = false;
- for (i = 0; i < num_views; ++i) {
- struct pipe_sampler_view *pview = views ? views[i] : NULL;
- struct zink_sampler_view *a = zink_sampler_view(ctx->sampler_views[shader_type][start_slot + i]);
- struct zink_sampler_view *b = zink_sampler_view(pview);
- struct zink_resource *res = b ? zink_resource(b->base.texture) : NULL;
- if (b && b->base.texture) {
- if (!a || zink_resource(a->base.texture) != res) {
- if (a)
- unbind_samplerview(ctx, shader_type, start_slot + i);
- update_res_bind_count(ctx, res, shader_type == PIPE_SHADER_COMPUTE, false);
- } else if (a != b) {
- check_samplerview_for_batch_ref(ctx, a);
- }
- if (res->base.b.target == PIPE_BUFFER) {
- if (b->buffer_view->bvci.buffer != res->obj->buffer) {
- /* if this resource has been rebound while it wasn't set here,
- * its backing resource will have changed and thus we need to update
- * the bufferview
- */
- struct zink_buffer_view *buffer_view = get_buffer_view(ctx, res, b->base.format, b->base.u.buf.offset, b->base.u.buf.size);
- assert(buffer_view != b->buffer_view);
- if (zink_batch_usage_exists(b->buffer_view->batch_uses))
- zink_batch_reference_bufferview(&ctx->batch, b->buffer_view);
- zink_buffer_view_reference(zink_screen(ctx->base.screen), &b->buffer_view, NULL);
- b->buffer_view = buffer_view;
- update = true;
+ bool shadow_update = false;
+ if (views) {
+ for (unsigned i = 0; i < num_views; ++i) {
+ struct pipe_sampler_view *pview = views[i];
+ struct zink_sampler_view *a = zink_sampler_view(ctx->sampler_views[shader_type][start_slot + i]);
+ struct zink_sampler_view *b = zink_sampler_view(pview);
+
+ if (a == b) {
+ if (take_ownership) {
+ struct pipe_sampler_view *view = views[i];
+ pipe_sampler_view_reference(&view, NULL);
}
- zink_batch_usage_set(&b->buffer_view->batch_uses, ctx->batch.state);
- zink_fake_buffer_barrier(res, VK_ACCESS_SHADER_READ_BIT,
- zink_pipeline_flags_from_pipe_stage(shader_type));
- if (!a || a->buffer_view->buffer_view != b->buffer_view->buffer_view)
- update = true;
- } else if (!res->obj->is_buffer) {
- if (res->obj != b->image_view->obj) {
- struct pipe_surface *psurf = &b->image_view->base;
- VkImageView iv = b->image_view->image_view;
- zink_rebind_surface(ctx, &psurf);
- b->image_view = zink_surface(psurf);
- update |= iv != b->image_view->image_view;
- } else if (a != b)
- update = true;
- flush_pending_clears(ctx, res);
- check_for_layout_update(ctx, res, shader_type == PIPE_SHADER_COMPUTE);
- zink_batch_usage_set(&b->image_view->batch_uses, ctx->batch.state);
- if (!a)
- update = true;
- }
- res->sampler_binds[shader_type] |= BITFIELD_BIT(start_slot + i);
- zink_batch_resource_usage_set(&ctx->batch, res, false);
- } else if (a) {
- unbind_samplerview(ctx, shader_type, start_slot + i);
- update = true;
- }
- if (take_ownership) {
- pipe_sampler_view_reference(&ctx->sampler_views[shader_type][start_slot + i], NULL);
- ctx->sampler_views[shader_type][start_slot + i] = pview;
- } else {
- pipe_sampler_view_reference(&ctx->sampler_views[shader_type][start_slot + i], pview);
+ continue;
+ }
+
+ struct zink_resource *res = b ? zink_resource(b->base.texture) : NULL;
+ if (b && b->base.texture) {
+ if (!a || zink_resource(a->base.texture) != res) {
+ if (a)
+ unbind_samplerview(ctx, shader_type, start_slot + i);
+ update_res_bind_count(ctx, res, shader_type == MESA_SHADER_COMPUTE, false);
+ res->sampler_bind_count[shader_type == MESA_SHADER_COMPUTE]++;
+ res->gfx_barrier |= zink_pipeline_flags_from_pipe_stage(shader_type);
+ res->barrier_access[shader_type == MESA_SHADER_COMPUTE] |= VK_ACCESS_SHADER_READ_BIT;
+ }
+ if (res->base.b.target == PIPE_BUFFER) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ if (!a || a->base.texture != b->base.texture || zink_resource(a->base.texture)->obj != res->obj ||
+ memcmp(&a->base.u.buf, &b->base.u.buf, sizeof(b->base.u.buf)))
+ update = true;
+ } else if (b->buffer_view->bvci.buffer != res->obj->buffer) {
+ /* if this resource has been rebound while it wasn't set here,
+ * its backing resource will have changed and thus we need to update
+ * the bufferview
+ */
+ VkBufferViewCreateInfo bvci = b->buffer_view->bvci;
+ bvci.buffer = res->obj->buffer;
+ struct zink_buffer_view *buffer_view = get_buffer_view(ctx, res, &bvci);
+ assert(buffer_view != b->buffer_view);
+ zink_buffer_view_reference(zink_screen(ctx->base.screen), &b->buffer_view, NULL);
+ b->buffer_view = buffer_view;
+ update = true;
+ } else if (!a || a->buffer_view->buffer_view != b->buffer_view->buffer_view)
+ update = true;
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_SHADER_READ_BIT,
+ res->gfx_barrier);
+ zink_batch_resource_usage_set(&ctx->batch, res, false, true);
+ if (!ctx->unordered_blitting)
+ res->obj->unordered_read = false;
+ } else {
+ if (zink_format_needs_mutable(res->base.b.format, b->image_view->base.format))
+ /* mutable not set by default */
+ zink_resource_object_init_mutable(ctx, res);
+ if (res->obj != b->image_view->obj) {
+ struct pipe_surface *psurf = &b->image_view->base;
+ VkImageView iv = b->image_view->image_view;
+ zink_rebind_surface(ctx, &psurf);
+ b->image_view = zink_surface(psurf);
+ update |= iv != b->image_view->image_view;
+ } else if (a != b)
+ update = true;
+ if (shader_type == MESA_SHADER_COMPUTE)
+ flush_pending_clears(ctx, res);
+ if (b->cube_array) {
+ ctx->di.cubes[shader_type] |= BITFIELD_BIT(start_slot + i);
+ }
+ if (!check_for_layout_update(ctx, res, shader_type == MESA_SHADER_COMPUTE) && !ctx->unordered_blitting) {
+ /* no deferred barrier: unset unordered usage immediately */
+ res->obj->unordered_read = false;
+ // TODO: figure out a way to link up layouts between unordered and main cmdbuf
+ res->obj->unordered_write = false;
+ }
+ if (!a)
+ update = true;
+ zink_batch_resource_usage_set(&ctx->batch, res, false, false);
+ if (b->zs_view) {
+ assert(start_slot + i < 32); //bitfield size
+ ctx->di.zs_swizzle[shader_type].mask |= BITFIELD_BIT(start_slot + i);
+ /* this is already gonna be slow, so don't bother trying to micro-optimize */
+ shadow_update |= memcmp(&ctx->di.zs_swizzle[shader_type].swizzle[start_slot + i],
+ &b->swizzle, sizeof(struct zink_zs_swizzle));
+ memcpy(&ctx->di.zs_swizzle[shader_type].swizzle[start_slot + i], &b->swizzle, sizeof(struct zink_zs_swizzle));
+ } else {
+ assert(start_slot + i < 32); //bitfield size
+ ctx->di.zs_swizzle[shader_type].mask &= ~BITFIELD_BIT(start_slot + i);
+ }
+ }
+ res->sampler_binds[shader_type] |= BITFIELD_BIT(start_slot + i);
+ } else if (a) {
+ unbind_samplerview(ctx, shader_type, start_slot + i);
+ update = true;
+ }
+ if (take_ownership) {
+ pipe_sampler_view_reference(&ctx->sampler_views[shader_type][start_slot + i], NULL);
+ ctx->sampler_views[shader_type][start_slot + i] = pview;
+ } else {
+ pipe_sampler_view_reference(&ctx->sampler_views[shader_type][start_slot + i], pview);
+ }
+ update_descriptor_state_sampler(ctx, shader_type, start_slot + i, res);
}
- update_descriptor_state_sampler(ctx, shader_type, start_slot + i, res);
+ } else {
+ unbind_num_trailing_slots += num_views;
+ num_views = 0;
}
- for (; i < num_views + unbind_num_trailing_slots; ++i) {
- update |= !!ctx->sampler_views[shader_type][start_slot + i];
- unbind_samplerview(ctx, shader_type, start_slot + i);
+ for (unsigned i = 0; i < unbind_num_trailing_slots; ++i) {
+ unsigned slot = start_slot + num_views + i;
+ update |= !!ctx->sampler_views[shader_type][slot];
+ unbind_samplerview(ctx, shader_type, slot);
pipe_sampler_view_reference(
- &ctx->sampler_views[shader_type][start_slot + i],
+ &ctx->sampler_views[shader_type][slot],
NULL);
- update_descriptor_state_sampler(ctx, shader_type, start_slot + i, NULL);
+ update_descriptor_state_sampler(ctx, shader_type, slot, NULL);
}
ctx->di.num_sampler_views[shader_type] = start_slot + num_views;
- if (update)
- zink_screen(pctx->screen)->context_invalidate_descriptor_state(ctx, shader_type, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot, num_views);
+ if (update) {
+ struct zink_screen *screen = zink_screen(pctx->screen);
+ ctx->invalidate_descriptor_state(ctx, shader_type, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, start_slot, num_views);
+ if (!screen->info.have_EXT_non_seamless_cube_map)
+ update_nonseamless_shader_key(ctx, shader_type);
+ shadow_update |= shadow_mask != ctx->di.zs_swizzle[shader_type].mask;
+ zink_set_zs_needs_shader_swizzle_key(ctx, shader_type, shadow_update);
+ }
+}
+
+static uint64_t
+zink_create_texture_handle(struct pipe_context *pctx, struct pipe_sampler_view *view, const struct pipe_sampler_state *state)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_resource *res = zink_resource(view->texture);
+ struct zink_sampler_view *sv = zink_sampler_view(view);
+ struct zink_bindless_descriptor *bd;
+ bd = calloc(1, sizeof(struct zink_bindless_descriptor));
+ if (!bd)
+ return 0;
+
+ bd->sampler = pctx->create_sampler_state(pctx, state);
+ if (!bd->sampler) {
+ free(bd);
+ return 0;
+ }
+
+ bd->ds.is_buffer = res->base.b.target == PIPE_BUFFER;
+ if (res->base.b.target == PIPE_BUFFER) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ pipe_resource_reference(&bd->ds.db.pres, view->texture);
+ bd->ds.db.format = view->format;
+ bd->ds.db.offset = view->u.buf.offset;
+ bd->ds.db.size = view->u.buf.size;
+ } else {
+ zink_buffer_view_reference(zink_screen(pctx->screen), &bd->ds.bufferview, sv->buffer_view);
+ }
+ } else {
+ zink_surface_reference(zink_screen(pctx->screen), &bd->ds.surface, sv->image_view);
+ }
+ uint64_t handle = util_idalloc_alloc(&ctx->di.bindless[bd->ds.is_buffer].tex_slots);
+ if (bd->ds.is_buffer)
+ handle += ZINK_MAX_BINDLESS_HANDLES;
+ bd->handle = handle;
+ _mesa_hash_table_insert(&ctx->di.bindless[bd->ds.is_buffer].tex_handles, (void*)(uintptr_t)handle, bd);
+ return handle;
+}
+
+static void
+zink_delete_texture_handle(struct pipe_context *pctx, uint64_t handle)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle);
+ struct hash_entry *he = _mesa_hash_table_search(&ctx->di.bindless[is_buffer].tex_handles, (void*)(uintptr_t)handle);
+ assert(he);
+ struct zink_bindless_descriptor *bd = he->data;
+ struct zink_descriptor_surface *ds = &bd->ds;
+ _mesa_hash_table_remove(&ctx->di.bindless[is_buffer].tex_handles, he);
+ uint32_t h = handle;
+ util_dynarray_append(&ctx->batch.state->bindless_releases[0], uint32_t, h);
+
+ if (ds->is_buffer) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ pipe_resource_reference(&ds->db.pres, NULL);
+ } else {
+ zink_buffer_view_reference(zink_screen(pctx->screen), &ds->bufferview, NULL);
+ }
+ } else {
+ zink_surface_reference(zink_screen(pctx->screen), &ds->surface, NULL);
+ pctx->delete_sampler_state(pctx, bd->sampler);
+ }
+ free(ds);
+}
+
+static void
+rebind_bindless_bufferview(struct zink_context *ctx, struct zink_resource *res, struct zink_descriptor_surface *ds)
+{
+ /* descriptor buffer is unaffected by this */
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ return;
+ /* if this resource has been rebound while it wasn't set here,
+ * its backing resource will have changed and thus we need to update
+ * the bufferview
+ */
+ VkBufferViewCreateInfo bvci = ds->bufferview->bvci;
+ bvci.buffer = res->obj->buffer;
+ struct zink_buffer_view *buffer_view = get_buffer_view(ctx, res, &bvci);
+ assert(buffer_view != ds->bufferview);
+ zink_buffer_view_reference(zink_screen(ctx->base.screen), &ds->bufferview, NULL);
+ ds->bufferview = buffer_view;
+}
+
+static void
+zero_bindless_descriptor(struct zink_context *ctx, uint32_t handle, bool is_buffer, bool is_image)
+{
+ if (likely(zink_screen(ctx->base.screen)->info.rb2_feats.nullDescriptor)) {
+ if (is_buffer) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ ctx->di.bindless[is_image].db.buffer_infos[handle].address = 0;
+ ctx->di.bindless[is_image].db.buffer_infos[handle].range = 0;
+ } else {
+ VkBufferView *bv = &ctx->di.bindless[is_image].t.buffer_infos[handle];
+ *bv = VK_NULL_HANDLE;
+ }
+ } else {
+ VkDescriptorImageInfo *ii = &ctx->di.bindless[is_image].img_infos[handle];
+ memset(ii, 0, sizeof(*ii));
+ }
+ } else {
+ if (is_buffer) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ ctx->di.bindless[is_image].db.buffer_infos[handle].address = zink_resource(ctx->dummy_bufferview->pres)->obj->bda;
+ ctx->di.bindless[is_image].db.buffer_infos[handle].range = 1;
+ } else {
+ VkBufferView *bv = &ctx->di.bindless[is_image].t.buffer_infos[handle];
+ struct zink_buffer_view *null_bufferview = ctx->dummy_bufferview;
+ *bv = null_bufferview->buffer_view;
+ }
+ } else {
+ struct zink_surface *null_surface = zink_get_dummy_surface(ctx, 0);
+ VkDescriptorImageInfo *ii = &ctx->di.bindless[is_image].img_infos[handle];
+ ii->sampler = VK_NULL_HANDLE;
+ ii->imageView = null_surface->image_view;
+ ii->imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+ }
+}
+
+static void
+unbind_bindless_descriptor(struct zink_context *ctx, struct zink_resource *res)
+{
+ if (!res->bindless[1]) {
+ /* check to remove write access */
+ for (unsigned i = 0; i < 2; i++) {
+ if (!res->write_bind_count[i])
+ res->barrier_access[i] &= ~VK_ACCESS_SHADER_WRITE_BIT;
+ }
+ }
+ bool is_buffer = res->base.b.target == PIPE_BUFFER;
+ if (!res->all_bindless) {
+ /* check to remove read access */
+ if (is_buffer) {
+ for (unsigned i = 0; i < 2; i++)
+ unbind_buffer_descriptor_reads(res, i);
+ } else {
+ for (unsigned i = 0; i < 2; i++)
+ unbind_descriptor_reads(res, i);
+ }
+ }
+ for (unsigned i = 0; i < 2; i++) {
+ if (!res->image_bind_count[i])
+ check_for_layout_update(ctx, res, i);
+ }
+}
+
+static void
+zink_make_texture_handle_resident(struct pipe_context *pctx, uint64_t handle, bool resident)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle);
+ struct hash_entry *he = _mesa_hash_table_search(&ctx->di.bindless[is_buffer].tex_handles, (void*)(uintptr_t)handle);
+ assert(he);
+ struct zink_bindless_descriptor *bd = he->data;
+ struct zink_descriptor_surface *ds = &bd->ds;
+ struct zink_resource *res = zink_descriptor_surface_resource(ds);
+ if (is_buffer)
+ handle -= ZINK_MAX_BINDLESS_HANDLES;
+ if (resident) {
+ update_res_bind_count(ctx, res, false, false);
+ update_res_bind_count(ctx, res, true, false);
+ res->bindless[0]++;
+ if (is_buffer) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ ctx->di.bindless[0].db.buffer_infos[handle].address = res->obj->bda + ds->db.offset;
+ ctx->di.bindless[0].db.buffer_infos[handle].range = ds->db.size;
+ ctx->di.bindless[0].db.buffer_infos[handle].format = zink_get_format(zink_screen(ctx->base.screen), ds->db.format);
+ } else {
+ if (ds->bufferview->bvci.buffer != res->obj->buffer)
+ rebind_bindless_bufferview(ctx, res, ds);
+ VkBufferView *bv = &ctx->di.bindless[0].t.buffer_infos[handle];
+ *bv = ds->bufferview->buffer_view;
+ }
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
+ zink_batch_resource_usage_set(&ctx->batch, res, false, true);
+ res->obj->unordered_read = false;
+ } else {
+ VkDescriptorImageInfo *ii = &ctx->di.bindless[0].img_infos[handle];
+ ii->sampler = bd->sampler->sampler;
+ ii->imageView = ds->surface->image_view;
+ ii->imageLayout = zink_descriptor_util_image_layout_eval(ctx, res, false);
+ flush_pending_clears(ctx, res);
+ if (!check_for_layout_update(ctx, res, false)) {
+ res->obj->unordered_read = false;
+ // TODO: figure out a way to link up layouts between unordered and main cmdbuf
+ res->obj->unordered_write = false;
+ }
+ if (!check_for_layout_update(ctx, res, true)) {
+ res->obj->unordered_read = false;
+ // TODO: figure out a way to link up layouts between unordered and main cmdbuf
+ res->obj->unordered_write = false;
+ }
+ zink_batch_resource_usage_set(&ctx->batch, res, false, false);
+ res->obj->unordered_write = false;
+ }
+ res->gfx_barrier |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ res->barrier_access[0] |= VK_ACCESS_SHADER_READ_BIT;
+ res->barrier_access[1] |= VK_ACCESS_SHADER_READ_BIT;
+ util_dynarray_append(&ctx->di.bindless[0].resident, struct zink_bindless_descriptor *, bd);
+ uint32_t h = is_buffer ? handle + ZINK_MAX_BINDLESS_HANDLES : handle;
+ util_dynarray_append(&ctx->di.bindless[0].updates, uint32_t, h);
+ } else {
+ zero_bindless_descriptor(ctx, handle, is_buffer, false);
+ util_dynarray_delete_unordered(&ctx->di.bindless[0].resident, struct zink_bindless_descriptor *, bd);
+ update_res_bind_count(ctx, res, false, true);
+ update_res_bind_count(ctx, res, true, true);
+ res->bindless[0]--;
+ unbind_bindless_descriptor(ctx, res);
+ }
+ ctx->di.bindless_dirty[0] = true;
+}
+
+static uint64_t
+zink_create_image_handle(struct pipe_context *pctx, const struct pipe_image_view *view)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_resource *res = zink_resource(view->resource);
+ struct zink_bindless_descriptor *bd;
+ if (!zink_resource_object_init_storage(ctx, res)) {
+ debug_printf("couldn't create storage image!");
+ return 0;
+ }
+ bd = calloc(1, sizeof(struct zink_bindless_descriptor));
+ if (!bd)
+ return 0;
+ bd->sampler = NULL;
+
+ bd->ds.is_buffer = res->base.b.target == PIPE_BUFFER;
+ if (res->base.b.target == PIPE_BUFFER)
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ pipe_resource_reference(&bd->ds.db.pres, view->resource);
+ bd->ds.db.format = view->format;
+ bd->ds.db.offset = view->u.buf.offset;
+ bd->ds.db.size = view->u.buf.size;
+ } else {
+ bd->ds.bufferview = create_image_bufferview(ctx, view);
+ }
+ else
+ bd->ds.surface = create_image_surface(ctx, view, false);
+ uint64_t handle = util_idalloc_alloc(&ctx->di.bindless[bd->ds.is_buffer].img_slots);
+ if (bd->ds.is_buffer)
+ handle += ZINK_MAX_BINDLESS_HANDLES;
+ bd->handle = handle;
+ _mesa_hash_table_insert(&ctx->di.bindless[bd->ds.is_buffer].img_handles, (void*)(uintptr_t)handle, bd);
+ return handle;
+}
+
+static void
+zink_delete_image_handle(struct pipe_context *pctx, uint64_t handle)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle);
+ struct hash_entry *he = _mesa_hash_table_search(&ctx->di.bindless[is_buffer].img_handles, (void*)(uintptr_t)handle);
+ assert(he);
+ struct zink_descriptor_surface *ds = he->data;
+ _mesa_hash_table_remove(&ctx->di.bindless[is_buffer].img_handles, he);
+ uint32_t h = handle;
+ util_dynarray_append(&ctx->batch.state->bindless_releases[1], uint32_t, h);
+
+ if (ds->is_buffer) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ pipe_resource_reference(&ds->db.pres, NULL);
+ } else {
+ zink_buffer_view_reference(zink_screen(pctx->screen), &ds->bufferview, NULL);
+ }
+ } else {
+ zink_surface_reference(zink_screen(pctx->screen), &ds->surface, NULL);
+ }
+ free(ds);
+}
+
+static void
+zink_make_image_handle_resident(struct pipe_context *pctx, uint64_t handle, unsigned paccess, bool resident)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle);
+ struct hash_entry *he = _mesa_hash_table_search(&ctx->di.bindless[is_buffer].img_handles, (void*)(uintptr_t)handle);
+ assert(he);
+ struct zink_bindless_descriptor *bd = he->data;
+ struct zink_descriptor_surface *ds = &bd->ds;
+ bd->access = paccess;
+ struct zink_resource *res = zink_descriptor_surface_resource(ds);
+ VkAccessFlags access = 0;
+ if (paccess & PIPE_IMAGE_ACCESS_WRITE) {
+ if (resident) {
+ res->write_bind_count[0]++;
+ res->write_bind_count[1]++;
+ } else {
+ res->write_bind_count[0]--;
+ res->write_bind_count[1]--;
+ }
+ access |= VK_ACCESS_SHADER_WRITE_BIT;
+ }
+ if (paccess & PIPE_IMAGE_ACCESS_READ) {
+ access |= VK_ACCESS_SHADER_READ_BIT;
+ }
+ if (is_buffer)
+ handle -= ZINK_MAX_BINDLESS_HANDLES;
+ if (resident) {
+ update_res_bind_count(ctx, res, false, false);
+ update_res_bind_count(ctx, res, true, false);
+ res->image_bind_count[0]++;
+ res->image_bind_count[1]++;
+ res->bindless[1]++;
+ if (is_buffer) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ ctx->di.bindless[0].db.buffer_infos[handle].address = res->obj->bda + ds->db.offset;
+ ctx->di.bindless[0].db.buffer_infos[handle].range = ds->db.size;
+ ctx->di.bindless[0].db.buffer_infos[handle].format = zink_get_format(zink_screen(ctx->base.screen), ds->db.format);
+ } else {
+ if (ds->bufferview->bvci.buffer != res->obj->buffer)
+ rebind_bindless_bufferview(ctx, res, ds);
+ VkBufferView *bv = &ctx->di.bindless[1].t.buffer_infos[handle];
+ *bv = ds->bufferview->buffer_view;
+ }
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, access, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
+ zink_batch_resource_usage_set(&ctx->batch, res, zink_resource_access_is_write(access), true);
+ if (zink_resource_access_is_write(access))
+ res->obj->unordered_write = false;
+ res->obj->unordered_read = false;
+ } else {
+ VkDescriptorImageInfo *ii = &ctx->di.bindless[1].img_infos[handle];
+ ii->sampler = VK_NULL_HANDLE;
+ ii->imageView = ds->surface->image_view;
+ ii->imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ finalize_image_bind(ctx, res, false);
+ finalize_image_bind(ctx, res, true);
+ zink_batch_resource_usage_set(&ctx->batch, res, zink_resource_access_is_write(access), false);
+ res->obj->unordered_write = false;
+ }
+ res->gfx_barrier |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ res->barrier_access[0] |= access;
+ res->barrier_access[1] |= access;
+ util_dynarray_append(&ctx->di.bindless[1].resident, struct zink_bindless_descriptor *, bd);
+ uint32_t h = is_buffer ? handle + ZINK_MAX_BINDLESS_HANDLES : handle;
+ util_dynarray_append(&ctx->di.bindless[1].updates, uint32_t, h);
+ } else {
+ zero_bindless_descriptor(ctx, handle, is_buffer, true);
+ util_dynarray_delete_unordered(&ctx->di.bindless[1].resident, struct zink_bindless_descriptor *, bd);
+ unbind_shader_image_counts(ctx, res, false, false);
+ unbind_shader_image_counts(ctx, res, true, false);
+ res->bindless[1]--;
+ unbind_bindless_descriptor(ctx, res);
+ }
+ ctx->di.bindless_dirty[1] = true;
+}
+
+static void
+zink_set_global_binding(struct pipe_context *pctx,
+ unsigned first, unsigned count,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ struct zink_context *ctx = zink_context(pctx);
+
+ size_t size = ctx->di.global_bindings.capacity;
+ if (!util_dynarray_resize(&ctx->di.global_bindings, struct pipe_resource*, first + count + 8))
+ unreachable("zink: out of memory somehow");
+ if (size != ctx->di.global_bindings.capacity) {
+ uint8_t *data = ctx->di.global_bindings.data;
+ memset(data + size, 0, ctx->di.global_bindings.capacity - size);
+ }
+
+ struct pipe_resource **globals = ctx->di.global_bindings.data;
+ for (unsigned i = 0; i < count; i++) {
+ if (resources && resources[i]) {
+ struct zink_resource *res = zink_resource(resources[i]);
+
+ util_range_add(&res->base.b, &res->valid_buffer_range, 0, res->base.b.width0);
+ pipe_resource_reference(&globals[first + i], resources[i]);
+
+ uint64_t addr = 0;
+ memcpy(&addr, handles[i], sizeof(addr));
+ addr += zink_resource_get_address(zink_screen(pctx->screen), res);
+ memcpy(handles[i], &addr, sizeof(addr));
+ zink_resource_usage_set(res, ctx->batch.state, true);
+ res->obj->unordered_read = res->obj->unordered_write = false;
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
+ } else if (globals[i]) {
+ zink_batch_reference_resource(&ctx->batch, zink_resource(globals[first + i]));
+ pipe_resource_reference(&globals[first + i], NULL);
+ }
+ }
}
static void
@@ -1493,316 +2597,619 @@ static void
zink_set_patch_vertices(struct pipe_context *pctx, uint8_t patch_vertices)
{
struct zink_context *ctx = zink_context(pctx);
- ctx->gfx_pipeline_state.patch_vertices = patch_vertices;
+ if (zink_set_tcs_key_patches(ctx, patch_vertices)) {
+ ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch = patch_vertices;
+ if (zink_screen(ctx->base.screen)->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints)
+ VKCTX(CmdSetPatchControlPointsEXT)(ctx->batch.state->cmdbuf, patch_vertices);
+ else
+ ctx->gfx_pipeline_state.dirty = true;
+ zink_flush_dgc_if_enabled(ctx);
+ }
}
-void
+static void
+init_null_fbfetch(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ ctx->di.null_fbfetch_init = true;
+ if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB)
+ return;
+ VkDescriptorGetInfoEXT info;
+ info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT;
+ info.pNext = NULL;
+ info.type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
+ info.data.pInputAttachmentImage = &ctx->di.fbfetch;
+ if (screen->info.db_props.inputAttachmentDescriptorSize)
+ VKSCR(GetDescriptorEXT)(screen->dev, &info, screen->info.db_props.inputAttachmentDescriptorSize, ctx->di.fbfetch_db);
+}
+
+bool
zink_update_fbfetch(struct zink_context *ctx)
{
const bool had_fbfetch = ctx->di.fbfetch.imageLayout == VK_IMAGE_LAYOUT_GENERAL;
- if (!ctx->gfx_stages[PIPE_SHADER_FRAGMENT] ||
- !ctx->gfx_stages[PIPE_SHADER_FRAGMENT]->nir->info.fs.uses_fbfetch_output) {
+ if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT] ||
+ !ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.fs.uses_fbfetch_output) {
if (!had_fbfetch)
- return;
+ return false;
+ zink_batch_no_rp(ctx);
ctx->di.fbfetch.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
ctx->di.fbfetch.imageView = zink_screen(ctx->base.screen)->info.rb2_feats.nullDescriptor ?
VK_NULL_HANDLE :
- zink_csurface(ctx->dummy_surface[0])->image_view;
- zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, PIPE_SHADER_FRAGMENT, ZINK_DESCRIPTOR_TYPE_UBO, 0, 1);
- return;
+ zink_get_dummy_surface(ctx, 0)->image_view;
+ ctx->invalidate_descriptor_state(ctx, MESA_SHADER_FRAGMENT, ZINK_DESCRIPTOR_TYPE_UBO, 0, 1);
+ return true;
}
bool changed = !had_fbfetch;
if (ctx->fb_state.cbufs[0]) {
VkImageView fbfetch = zink_csurface(ctx->fb_state.cbufs[0])->image_view;
+ if (!fbfetch)
+ /* swapchain image: retry later */
+ return false;
changed |= fbfetch != ctx->di.fbfetch.imageView;
ctx->di.fbfetch.imageView = zink_csurface(ctx->fb_state.cbufs[0])->image_view;
+
+ bool fbfetch_ms = ctx->fb_state.cbufs[0]->texture->nr_samples > 1;
+ if (zink_get_fs_base_key(ctx)->fbfetch_ms != fbfetch_ms)
+ zink_set_fs_base_key(ctx)->fbfetch_ms = fbfetch_ms;
+ } else {
+ ctx->di.fbfetch.imageView = zink_screen(ctx->base.screen)->info.rb2_feats.nullDescriptor ?
+ VK_NULL_HANDLE :
+ zink_get_dummy_surface(ctx, 0)->image_view;
}
+ bool ret = false;
ctx->di.fbfetch.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- if (changed)
- zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, PIPE_SHADER_FRAGMENT, ZINK_DESCRIPTOR_TYPE_UBO, 0, 1);
+ if (changed) {
+ ctx->invalidate_descriptor_state(ctx, MESA_SHADER_FRAGMENT, ZINK_DESCRIPTOR_TYPE_UBO, 0, 1);
+ if (!had_fbfetch) {
+ ret = true;
+ zink_batch_no_rp(ctx);
+ }
+ }
+ return ret;
}
-static size_t
-rp_state_size(const struct zink_render_pass_pipeline_state *pstate)
+void
+zink_update_vk_sample_locations(struct zink_context *ctx)
{
- return offsetof(struct zink_render_pass_pipeline_state, attachments) +
- sizeof(pstate->attachments[0]) * pstate->num_attachments;
+ if (ctx->gfx_pipeline_state.sample_locations_enabled && ctx->sample_locations_changed) {
+ unsigned samples = ctx->gfx_pipeline_state.rast_samples + 1;
+ unsigned idx = util_logbase2_ceil(MAX2(samples, 1));
+ VkExtent2D grid_size = zink_screen(ctx->base.screen)->maxSampleLocationGridSize[idx];
+
+ for (unsigned pixel = 0; pixel < grid_size.width * grid_size.height; pixel++) {
+ for (unsigned sample = 0; sample < samples; sample++) {
+ unsigned pixel_x = pixel % grid_size.width;
+ unsigned pixel_y = pixel / grid_size.width;
+ unsigned wi = pixel * samples + sample;
+ unsigned ri = (pixel_y * grid_size.width + pixel_x % grid_size.width);
+ ri = ri * samples + sample;
+ ctx->vk_sample_locations[wi].x = (ctx->sample_locations[ri] & 0xf) / 16.0f;
+ ctx->vk_sample_locations[wi].y = (16 - (ctx->sample_locations[ri] >> 4)) / 16.0f;
+ }
+ }
+ }
}
-static uint32_t
-hash_rp_state(const void *key)
-{
- const struct zink_render_pass_pipeline_state *s = key;
- return _mesa_hash_data(key, rp_state_size(s));
+static unsigned
+find_rp_state(struct zink_context *ctx)
+{
+ bool found = false;
+ /* calc the state idx using the samples to account for msrtss */
+ unsigned idx = zink_screen(ctx->base.screen)->info.have_EXT_multisampled_render_to_single_sampled && ctx->transient_attachments ?
+ util_logbase2_ceil(ctx->gfx_pipeline_state.rast_samples + 1) : 0;
+ struct set_entry *he = _mesa_set_search_or_add(&ctx->rendering_state_cache[idx], &ctx->gfx_pipeline_state.rendering_info, &found);
+ struct zink_rendering_info *info;
+ if (found) {
+ info = (void*)he->key;
+ return info->id;
+ }
+ info = ralloc(ctx, struct zink_rendering_info);
+ memcpy(info, &ctx->gfx_pipeline_state.rendering_info, sizeof(VkPipelineRenderingCreateInfo));
+ info->id = ctx->rendering_state_cache[idx].entries;
+ he->key = info;
+ return info->id;
}
-static bool
-equals_rp_state(const void *a, const void *b)
+unsigned
+zink_update_rendering_info(struct zink_context *ctx)
{
- return !memcmp(a, b, rp_state_size(a));
+ for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]);
+ ctx->gfx_pipeline_state.rendering_formats[i] = surf ? surf->info.format[0] : VK_FORMAT_UNDEFINED;
+ }
+ ctx->gfx_pipeline_state.rendering_info.depthAttachmentFormat = VK_FORMAT_UNDEFINED;
+ ctx->gfx_pipeline_state.rendering_info.stencilAttachmentFormat = VK_FORMAT_UNDEFINED;
+ if (ctx->fb_state.zsbuf && zink_is_zsbuf_used(ctx)) {
+ struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf);
+ bool has_depth = util_format_has_depth(util_format_description(ctx->fb_state.zsbuf->format));
+ bool has_stencil = util_format_has_stencil(util_format_description(ctx->fb_state.zsbuf->format));
+
+ if (has_depth)
+ ctx->gfx_pipeline_state.rendering_info.depthAttachmentFormat = surf->info.format[0];
+ if (has_stencil)
+ ctx->gfx_pipeline_state.rendering_info.stencilAttachmentFormat = surf->info.format[0];
+ }
+ return find_rp_state(ctx);
}
-static uint32_t
-hash_render_pass_state(const void *key)
+static unsigned
+calc_max_dummy_fbo_size(struct zink_context *ctx)
{
- struct zink_render_pass_state* s = (struct zink_render_pass_state*)key;
- return _mesa_hash_data(key, offsetof(struct zink_render_pass_state, rts) + sizeof(s->rts[0]) * s->num_rts);
+ unsigned size = MAX2(ctx->fb_state.width, ctx->fb_state.height);
+ return size ? size : MIN2(256, zink_screen(ctx->base.screen)->info.props.limits.maxImageDimension2D);
}
-static bool
-equals_render_pass_state(const void *a, const void *b)
+static unsigned
+begin_rendering(struct zink_context *ctx)
{
- const struct zink_render_pass_state *s_a = a, *s_b = b;
- if (s_a->num_rts != s_b->num_rts)
- return false;
- return memcmp(a, b, offsetof(struct zink_render_pass_state, rts) + sizeof(s_a->rts[0]) * s_a->num_rts) == 0;
-}
+ unsigned clear_buffers = 0;
+ ctx->gfx_pipeline_state.render_pass = NULL;
+ zink_update_vk_sample_locations(ctx);
+ bool has_swapchain = zink_render_update_swapchain(ctx);
+ if (has_swapchain)
+ zink_render_fixup_swapchain(ctx);
+ bool has_depth = false;
+ bool has_stencil = false;
+ bool changed_layout = false;
+ bool changed_size = false;
+ bool zsbuf_used = zink_is_zsbuf_used(ctx);
+ bool use_tc_info = !ctx->blitting && ctx->track_renderpasses;
+ if (ctx->rp_changed || ctx->rp_layout_changed || (!ctx->batch.in_rp && ctx->rp_loadop_changed)) {
+ /* init imageviews, base loadOp, formats */
+ for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]);
+ if (!surf)
+ continue;
-static struct zink_render_pass *
-get_render_pass(struct zink_context *ctx)
-{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- const struct pipe_framebuffer_state *fb = &ctx->fb_state;
- struct zink_render_pass_state state = {0};
- uint32_t clears = 0;
- state.swapchain_init = ctx->new_swapchain;
- state.samples = fb->samples > 0;
+ if (!zink_resource(surf->base.texture)->valid)
+ ctx->dynamic_fb.attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+ else
+ ctx->dynamic_fb.attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+ if (use_tc_info) {
+ if (ctx->dynamic_fb.tc_info.cbuf_invalidate & BITFIELD_BIT(i))
+ ctx->dynamic_fb.attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
+ else
+ ctx->dynamic_fb.attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ }
+ }
+
+ /* unset depth and stencil info: reset below */
+ VkImageLayout zlayout = ctx->dynamic_fb.info.pDepthAttachment ? ctx->dynamic_fb.info.pDepthAttachment->imageLayout : VK_IMAGE_LAYOUT_UNDEFINED;
+ VkImageLayout slayout = ctx->dynamic_fb.info.pStencilAttachment ? ctx->dynamic_fb.info.pStencilAttachment->imageLayout : VK_IMAGE_LAYOUT_UNDEFINED;
+ ctx->dynamic_fb.info.pDepthAttachment = NULL;
+ ctx->dynamic_fb.info.pStencilAttachment = NULL;
+
+ if (ctx->fb_state.zsbuf && zsbuf_used) {
+ struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf);
+ has_depth = util_format_has_depth(util_format_description(ctx->fb_state.zsbuf->format));
+ has_stencil = util_format_has_stencil(util_format_description(ctx->fb_state.zsbuf->format));
+
+ /* depth may or may not be used but init it anyway */
+ if (zink_resource(surf->base.texture)->valid)
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+ else
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+
+ if (use_tc_info) {
+ if (ctx->dynamic_fb.tc_info.zsbuf_invalidate)
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
+ else
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ }
+
+ /* stencil may or may not be used but init it anyway */
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].loadOp = ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].loadOp;
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].storeOp = ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].storeOp;
+
+ if (has_depth) {
+ ctx->dynamic_fb.info.pDepthAttachment = &ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS];
+ /* stencil info only set for clears below */
+ }
+ if (has_stencil) {
+ /* must be stencil-only */
+ ctx->dynamic_fb.info.pStencilAttachment = &ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS + 1];
+ }
+ } else {
+ ctx->dynamic_fb.info.pDepthAttachment = NULL;
+ }
+ if (zlayout != (ctx->dynamic_fb.info.pDepthAttachment ? ctx->dynamic_fb.info.pDepthAttachment->imageLayout : VK_IMAGE_LAYOUT_UNDEFINED))
+ changed_layout = true;
+ if (slayout != (ctx->dynamic_fb.info.pStencilAttachment ? ctx->dynamic_fb.info.pStencilAttachment->imageLayout : VK_IMAGE_LAYOUT_UNDEFINED))
+ changed_layout = true;
- u_foreach_bit(i, ctx->fbfetch_outputs)
- state.rts[i].fbfetch = true;
+ /* similar to begin_render_pass(), but just filling in VkRenderingInfo */
+ for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ /* these are no-ops */
+ if (!ctx->fb_state.cbufs[i] || !zink_fb_clear_enabled(ctx, i))
+ continue;
+ /* these need actual clear calls inside the rp */
+ struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(&ctx->fb_clears[i], 0);
+ if (zink_fb_clear_needs_explicit(&ctx->fb_clears[i])) {
+ clear_buffers |= (PIPE_CLEAR_COLOR0 << i);
+ if (zink_fb_clear_count(&ctx->fb_clears[i]) < 2 ||
+ zink_fb_clear_element_needs_explicit(clear))
+ continue;
+ }
+ /* we now know there's one clear that can be done here */
+ memcpy(&ctx->dynamic_fb.attachments[i].clearValue, &clear->color, sizeof(float) * 4);
+ ctx->dynamic_fb.attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
+ }
+ if (ctx->fb_state.zsbuf && zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS)) {
+ struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS];
+ struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0);
+ if (!zink_fb_clear_element_needs_explicit(clear)) {
+ /* base zs clear info */
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].clearValue.depthStencil.depth = clear->zs.depth;
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].clearValue.depthStencil.stencil = clear->zs.stencil;
+ /* always init separate stencil attachment */
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].clearValue.depthStencil.stencil = clear->zs.stencil;
+ if ((zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH))
+ /* initiate a depth clear */
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
+ if ((zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_STENCIL)) {
+ /* use a stencil clear, also set stencil attachment */
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
+ }
+ }
+ }
+ if (changed_size || changed_layout)
+ ctx->rp_changed = true;
+ ctx->rp_loadop_changed = false;
+ ctx->rp_layout_changed = false;
+ }
+ /* always assemble clear_buffers mask:
+ * if a scissored clear must be triggered during glFlush,
+ * the renderpass metadata may be unchanged (e.g., LOAD from previous rp),
+ * but the buffer mask must still be returned
+ */
+ if (ctx->clears_enabled) {
+ for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ /* these are no-ops */
+ if (!ctx->fb_state.cbufs[i] || !zink_fb_clear_enabled(ctx, i))
+ continue;
+ /* these need actual clear calls inside the rp */
+ if (zink_fb_clear_needs_explicit(&ctx->fb_clears[i]))
+ clear_buffers |= (PIPE_CLEAR_COLOR0 << i);
+ }
+ if (ctx->fb_state.zsbuf && zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS)) {
+ struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS];
+ struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0);
+ if (zink_fb_clear_needs_explicit(fb_clear)) {
+ for (int j = !zink_fb_clear_element_needs_explicit(clear);
+ (clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL && j < zink_fb_clear_count(fb_clear);
+ j++)
+ clear_buffers |= zink_fb_clear_element(fb_clear, j)->zs.bits;
+ }
+ }
+ }
+
+ if (!ctx->rp_changed && ctx->batch.in_rp)
+ return 0;
+ ctx->rp_changed = false;
- for (int i = 0; i < fb->nr_cbufs; i++) {
- struct pipe_surface *surf = fb->cbufs[i];
+ /* update pipeline info id for compatibility VUs */
+ unsigned rp_state = zink_update_rendering_info(ctx);
+ /* validate zs VUs: attachment must be null or format must be valid */
+ assert(!ctx->dynamic_fb.info.pDepthAttachment || ctx->gfx_pipeline_state.rendering_info.depthAttachmentFormat);
+ assert(!ctx->dynamic_fb.info.pStencilAttachment || ctx->gfx_pipeline_state.rendering_info.stencilAttachmentFormat);
+ bool rp_changed = ctx->gfx_pipeline_state.rp_state != rp_state;
+ if (!rp_changed && ctx->batch.in_rp)
+ return 0;
+
+ zink_batch_no_rp(ctx);
+ for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ VkImageView iv = VK_NULL_HANDLE;
+ struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]);
if (surf) {
- state.rts[i].format = zink_get_format(screen, surf->format);
- state.rts[i].samples = surf->texture->nr_samples > 0 ? surf->texture->nr_samples :
- VK_SAMPLE_COUNT_1_BIT;
- state.rts[i].clear_color = zink_fb_clear_enabled(ctx, i) && !zink_fb_clear_first_needs_explicit(&ctx->fb_clears[i]);
- clears |= !!state.rts[i].clear_color ? PIPE_CLEAR_COLOR0 << i : 0;
- state.rts[i].swapchain = surf->texture->bind & PIPE_BIND_SCANOUT;
+ iv = zink_prep_fb_attachment(ctx, surf, i);
+ if (!iv)
+ /* dead swapchain */
+ return 0;
+ ctx->dynamic_fb.attachments[i].imageLayout = zink_resource(surf->base.texture)->layout;
+ }
+ ctx->dynamic_fb.attachments[i].imageView = iv;
+ }
+ if (has_swapchain) {
+ ASSERTED struct zink_resource *res = zink_resource(ctx->fb_state.cbufs[0]->texture);
+ zink_render_fixup_swapchain(ctx);
+ if (res->use_damage)
+ ctx->dynamic_fb.info.renderArea = res->damage;
+ /* clamp for late swapchain resize */
+ if (res->base.b.width0 < ctx->dynamic_fb.info.renderArea.extent.width)
+ ctx->dynamic_fb.info.renderArea.extent.width = res->base.b.width0;
+ if (res->base.b.height0 < ctx->dynamic_fb.info.renderArea.extent.height)
+ ctx->dynamic_fb.info.renderArea.extent.height = res->base.b.height0;
+ }
+ if (ctx->fb_state.zsbuf && zsbuf_used) {
+ struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf);
+ VkImageView iv = zink_prep_fb_attachment(ctx, surf, ctx->fb_state.nr_cbufs);
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].imageView = iv;
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].imageLayout = zink_resource(surf->base.texture)->layout;
+ assert(ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].imageView = iv;
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].imageLayout = zink_resource(surf->base.texture)->layout;
+ assert(ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
+ if (ctx->transient_attachments & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS)) {
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS + 1].resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
} else {
- state.rts[i].format = VK_FORMAT_R8_UINT;
- state.rts[i].samples = fb->samples;
- }
- state.num_rts++;
- }
- state.num_cbufs = fb->nr_cbufs;
-
- if (fb->zsbuf) {
- struct zink_resource *zsbuf = zink_resource(fb->zsbuf->texture);
- struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS];
- state.rts[fb->nr_cbufs].format = zsbuf->format;
- state.rts[fb->nr_cbufs].samples = zsbuf->base.b.nr_samples > 0 ? zsbuf->base.b.nr_samples : VK_SAMPLE_COUNT_1_BIT;
- state.rts[fb->nr_cbufs].clear_color = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) &&
- !zink_fb_clear_first_needs_explicit(fb_clear) &&
- (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH);
- state.rts[fb->nr_cbufs].clear_stencil = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) &&
- !zink_fb_clear_first_needs_explicit(fb_clear) &&
- (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_STENCIL);
- if (state.rts[fb->nr_cbufs].clear_color)
- clears |= PIPE_CLEAR_DEPTH;
- if (state.rts[fb->nr_cbufs].clear_stencil)
- clears |= PIPE_CLEAR_STENCIL;
- const uint64_t outputs_written = ctx->gfx_stages[PIPE_SHADER_FRAGMENT] ?
- ctx->gfx_stages[PIPE_SHADER_FRAGMENT]->nir->info.outputs_written : 0;
- bool needs_write = (ctx->dsa_state && ctx->dsa_state->hw_state.depth_write) ||
- outputs_written & (BITFIELD64_BIT(FRAG_RESULT_DEPTH) | BITFIELD64_BIT(FRAG_RESULT_STENCIL));
- state.rts[fb->nr_cbufs].needs_write = needs_write || state.rts[fb->nr_cbufs].clear_color || state.rts[fb->nr_cbufs].clear_stencil;
- state.num_rts++;
- }
- state.have_zsbuf = fb->zsbuf != NULL;
- assert(clears == ctx->rp_clears_enabled);
- state.clears = clears;
- uint32_t hash = hash_render_pass_state(&state);
- struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->render_pass_cache, hash,
- &state);
- struct zink_render_pass *rp;
- if (entry) {
- rp = entry->data;
- assert(rp->state.clears == clears);
- } else {
- struct zink_render_pass_pipeline_state pstate;
- pstate.samples = state.samples;
- rp = zink_create_render_pass(screen, &state, &pstate);
- if (!_mesa_hash_table_insert_pre_hashed(ctx->render_pass_cache, hash, &rp->state, rp))
- return NULL;
- bool found = false;
- struct set_entry *entry = _mesa_set_search_or_add(&ctx->render_pass_state_cache, &pstate, &found);
- struct zink_render_pass_pipeline_state *ppstate;
- if (!found) {
- entry->key = ralloc(ctx, struct zink_render_pass_pipeline_state);
- ppstate = (void*)entry->key;
- memcpy(ppstate, &pstate, rp_state_size(&pstate));
- ppstate->id = ctx->render_pass_state_cache.entries;
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS].resolveMode = 0;
+ ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS + 1].resolveMode = 0;
}
- ppstate = (void*)entry->key;
- rp->pipeline_state = ppstate->id;
}
- return rp;
+ ctx->zsbuf_unused = !zsbuf_used;
+ assert(ctx->fb_state.width >= ctx->dynamic_fb.info.renderArea.extent.width);
+ assert(ctx->fb_state.height >= ctx->dynamic_fb.info.renderArea.extent.height);
+ ctx->gfx_pipeline_state.dirty |= rp_changed;
+ ctx->gfx_pipeline_state.rp_state = rp_state;
+
+ VkMultisampledRenderToSingleSampledInfoEXT msrtss = {
+ VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT,
+ NULL,
+ VK_TRUE,
+ ctx->gfx_pipeline_state.rast_samples + 1,
+ };
+
+ if (zink_screen(ctx->base.screen)->info.have_EXT_multisampled_render_to_single_sampled)
+ ctx->dynamic_fb.info.pNext = ctx->transient_attachments ? &msrtss : NULL;
+ assert(!ctx->transient_attachments || msrtss.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT);
+ VKCTX(CmdBeginRendering)(ctx->batch.state->cmdbuf, &ctx->dynamic_fb.info);
+ ctx->batch.in_rp = true;
+ return clear_buffers;
}
-static uint32_t
-hash_framebuffer_imageless(const void *key)
+ALWAYS_INLINE static void
+update_layered_rendering_state(struct zink_context *ctx)
{
- struct zink_framebuffer_state* s = (struct zink_framebuffer_state*)key;
- return _mesa_hash_data(key, offsetof(struct zink_framebuffer_state, infos) + sizeof(s->infos[0]) * s->num_attachments);
+ if (!zink_screen(ctx->base.screen)->driver_workarounds.needs_sanitised_layer)
+ return;
+ unsigned framebffer_is_layered = zink_framebuffer_get_num_layers(&ctx->fb_state) > 1;
+ VKCTX(CmdPushConstants)(
+ ctx->batch.state->cmdbuf,
+ zink_screen(ctx->base.screen)->gfx_push_constant_layout,
+ VK_SHADER_STAGE_ALL_GRAPHICS,
+ offsetof(struct zink_gfx_push_constant, framebuffer_is_layered), sizeof(unsigned),
+ &framebffer_is_layered);
}
-static bool
-equals_framebuffer_imageless(const void *a, const void *b)
+ALWAYS_INLINE static void
+batch_ref_fb_surface(struct zink_context *ctx, struct pipe_surface *psurf)
{
- struct zink_framebuffer_state *s = (struct zink_framebuffer_state*)a;
- return memcmp(a, b, offsetof(struct zink_framebuffer_state, infos) + sizeof(s->infos[0]) * s->num_attachments) == 0;
+ if (!psurf)
+ return;
+ zink_batch_reference_resource(&ctx->batch, zink_resource(psurf->texture));
+ struct zink_surface *transient = zink_transient_surface(psurf);
+ if (transient)
+ zink_batch_reference_resource(&ctx->batch, zink_resource(transient->base.texture));
}
-static void
-setup_framebuffer(struct zink_context *ctx)
+void
+zink_batch_rp(struct zink_context *ctx)
{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- struct zink_render_pass *rp = ctx->gfx_pipeline_state.render_pass;
-
- if (ctx->gfx_pipeline_state.sample_locations_enabled && ctx->sample_locations_changed) {
- unsigned samples = ctx->gfx_pipeline_state.rast_samples + 1;
- unsigned idx = util_logbase2_ceil(MAX2(samples, 1));
- VkExtent2D grid_size = screen->maxSampleLocationGridSize[idx];
-
- for (unsigned pixel = 0; pixel < grid_size.width * grid_size.height; pixel++) {
- for (unsigned sample = 0; sample < samples; sample++) {
- unsigned pixel_x = pixel % grid_size.width;
- unsigned pixel_y = pixel / grid_size.width;
- unsigned wi = pixel * samples + sample;
- unsigned ri = (pixel_y * grid_size.width + pixel_x % grid_size.width);
- ri = ri * samples + sample;
- ctx->vk_sample_locations[wi].x = (ctx->sample_locations[ri] & 0xf) / 16.0f;
- ctx->vk_sample_locations[wi].y = (16 - (ctx->sample_locations[ri] >> 4)) / 16.0f;
- }
- }
+ assert(!(ctx->batch.in_rp && ctx->rp_changed));
+ if (!ctx->track_renderpasses && !ctx->blitting) {
+ if (ctx->rp_tc_info_updated)
+ zink_parse_tc_info(ctx);
+ }
+ if (ctx->batch.in_rp && !ctx->rp_layout_changed)
+ return;
+ bool in_rp = ctx->batch.in_rp;
+ if (!in_rp && ctx->void_clears) {
+ union pipe_color_union color;
+ color.f[0] = color.f[1] = color.f[2] = 0;
+ color.f[3] = 1.0;
+ ctx->base.clear(&ctx->base, ctx->void_clears, NULL, &color, 0, 0);
+ ctx->void_clears = 0;
+ }
+ if (!ctx->blitting) {
+ if (ctx->rp_tc_info_updated)
+ update_tc_info(ctx);
+ ctx->rp_tc_info_updated = false;
}
+ bool maybe_has_query_ends = !ctx->track_renderpasses || ctx->dynamic_fb.tc_info.has_query_ends;
+ ctx->queries_in_rp = maybe_has_query_ends;
+ /* if possible, out-of-renderpass resume any queries that were stopped when previous rp ended */
+ if (!ctx->queries_disabled && !maybe_has_query_ends) {
+ zink_resume_queries(ctx, &ctx->batch);
+ zink_query_update_gs_states(ctx);
+ }
+ unsigned clear_buffers;
+ /* use renderpass for multisample-to-singlesample or fbfetch:
+ * - msrtss is TODO
+ * - dynamic rendering doesn't have input attachments
+ */
+ if (!zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering ||
+ (ctx->transient_attachments && !zink_screen(ctx->base.screen)->info.have_EXT_multisampled_render_to_single_sampled) ||
+ (ctx->fbfetch_outputs && !zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering_local_read))
+ clear_buffers = zink_begin_render_pass(ctx);
+ else
+ clear_buffers = begin_rendering(ctx);
+ assert(!ctx->rp_changed);
- if (rp)
- ctx->rp_changed |= ctx->rp_clears_enabled != rp->state.clears;
- if (ctx->rp_changed)
- rp = get_render_pass(ctx);
+ /* update the render-passes HUD query */
+ ctx->hud.render_passes++;
- ctx->fb_changed |= rp != ctx->gfx_pipeline_state.render_pass;
- if (rp->pipeline_state != ctx->gfx_pipeline_state.rp_state) {
- ctx->gfx_pipeline_state.rp_state = rp->pipeline_state;
- ctx->gfx_pipeline_state.dirty = true;
+ if (!in_rp && ctx->batch.in_rp) {
+ /* only hit this for valid swapchain and new renderpass */
+ if (ctx->render_condition.query)
+ zink_start_conditional_render(ctx);
+ zink_clear_framebuffer(ctx, clear_buffers);
+ if (ctx->pipeline_changed[0]) {
+ for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++)
+ batch_ref_fb_surface(ctx, ctx->fb_state.cbufs[i]);
+ batch_ref_fb_surface(ctx, ctx->fb_state.zsbuf);
+ }
}
+ /* unable to previously determine that queries didn't split renderpasses: ensure queries start inside renderpass */
+ if (!ctx->queries_disabled && maybe_has_query_ends) {
+ zink_resume_queries(ctx, &ctx->batch);
+ zink_query_update_gs_states(ctx);
+ }
+}
- ctx->rp_changed = false;
+void
+zink_batch_no_rp_safe(struct zink_context *ctx)
+{
+ if (!ctx->batch.in_rp)
+ return;
+ zink_flush_dgc_if_enabled(ctx);
+ if (ctx->render_condition.query)
+ zink_stop_conditional_render(ctx);
+ /* suspend all queries that were started in a renderpass
+ * they can then be resumed upon beginning a new renderpass
+ */
+ if (!ctx->queries_disabled)
+ zink_query_renderpass_suspend(ctx);
+ if (ctx->gfx_pipeline_state.render_pass)
+ zink_end_render_pass(ctx);
+ else {
+ VKCTX(CmdEndRendering)(ctx->batch.state->cmdbuf);
+ ctx->batch.in_rp = false;
+ }
+ assert(!ctx->batch.in_rp);
+}
- if (!ctx->fb_changed)
+void
+zink_batch_no_rp(struct zink_context *ctx)
+{
+ if (!ctx->batch.in_rp)
return;
+ if (ctx->track_renderpasses && !ctx->blitting)
+ tc_renderpass_info_reset(&ctx->dynamic_fb.tc_info);
+ zink_batch_no_rp_safe(ctx);
+}
- ctx->init_framebuffer(screen, ctx->framebuffer, rp);
- ctx->fb_changed = false;
- ctx->gfx_pipeline_state.render_pass = rp;
+ALWAYS_INLINE static void
+update_res_sampler_layouts(struct zink_context *ctx, struct zink_resource *res)
+{
+ unsigned find = res->sampler_bind_count[0];
+ for (unsigned i = 0; find && i < MESA_SHADER_COMPUTE; i++) {
+ u_foreach_bit(slot, res->sampler_binds[i]) {
+ /* only set layout, skip rest of update */
+ if (ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][i][slot] == res)
+ ctx->di.textures[i][slot].imageLayout = zink_descriptor_util_image_layout_eval(ctx, res, false);
+ find--;
+ if (!find) break;
+ }
+ }
}
-static VkImageView
-prep_fb_attachment(struct zink_context *ctx, struct pipe_surface *psurf, unsigned i)
+VkImageView
+zink_prep_fb_attachment(struct zink_context *ctx, struct zink_surface *surf, unsigned i)
{
- if (!psurf)
- return zink_csurface(ctx->dummy_surface[util_logbase2_ceil(ctx->fb_state.samples)])->image_view;
-
- struct zink_surface *surf = zink_csurface(psurf);
- zink_batch_resource_usage_set(&ctx->batch, zink_resource(surf->base.texture), true);
- zink_batch_usage_set(&surf->batch_uses, ctx->batch.state);
+ struct zink_resource *res;
+ if (!surf) {
+ surf = zink_get_dummy_surface(ctx, util_logbase2_ceil(ctx->fb_state.samples));
+ res = zink_resource(surf->base.texture);
+ } else {
+ res = zink_resource(surf->base.texture);
+ zink_batch_resource_usage_set(&ctx->batch, res, true, false);
+ }
- struct zink_resource *res = zink_resource(surf->base.texture);
VkAccessFlags access;
VkPipelineStageFlags pipeline;
- VkImageLayout layout = zink_render_pass_attachment_get_barrier_info(ctx->gfx_pipeline_state.render_pass,
- i, &pipeline, &access);
- zink_resource_image_barrier(ctx, res, layout, access, pipeline);
+ if (zink_is_swapchain(res)) {
+ if (!zink_kopper_acquire(ctx, res, UINT64_MAX))
+ return VK_NULL_HANDLE;
+ zink_surface_swapchain_update(ctx, surf);
+ if (!i)
+ zink_update_fbfetch(ctx);
+ }
+ if (ctx->blitting)
+ return surf->image_view;
+ VkImageLayout layout;
+ /* depth attachment is stored as the last attachment, but bitfields always use PIPE_MAX_COLOR_BUFS */
+ int idx = i == ctx->fb_state.nr_cbufs ? PIPE_MAX_COLOR_BUFS : i;
+ if (ctx->feedback_loops & BITFIELD_BIT(idx)) {
+ /* reevaluate feedback loop in case layout change eliminates the loop */
+ if (!res->sampler_bind_count[0] || (idx == PIPE_MAX_COLOR_BUFS && !zink_is_zsbuf_write(ctx)))
+ update_feedback_loop_state(ctx, i, ctx->feedback_loops & ~BITFIELD_BIT(idx));
+ }
+ if (ctx->track_renderpasses) {
+ layout = zink_tc_renderpass_info_parse(ctx, &ctx->dynamic_fb.tc_info, idx, &pipeline, &access);
+ assert(i < ctx->fb_state.nr_cbufs || layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL || !zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS));
+ if (i == ctx->fb_state.nr_cbufs && zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS))
+ assert(ctx->dynamic_fb.tc_info.zsbuf_clear || ctx->dynamic_fb.tc_info.zsbuf_clear_partial || ctx->dynamic_fb.tc_info.zsbuf_load);
+ } else {
+ if (ctx->gfx_pipeline_state.render_pass) {
+ layout = zink_render_pass_attachment_get_barrier_info(&ctx->gfx_pipeline_state.render_pass->state.rts[i],
+ i < ctx->fb_state.nr_cbufs, &pipeline, &access);
+ } else {
+ struct zink_rt_attrib rt;
+ if (i < ctx->fb_state.nr_cbufs)
+ zink_init_color_attachment(ctx, i, &rt);
+ else
+ zink_init_zs_attachment(ctx, &rt);
+ layout = zink_render_pass_attachment_get_barrier_info(&rt, i < ctx->fb_state.nr_cbufs, &pipeline, &access);
+ /* avoid unnecessary read-only layout change */
+ if (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL &&
+ res->layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
+ !res->bind_count[0])
+ layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+ }
+ }
+ /*
+ The image subresources for a storage image must be in the VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR or
+ VK_IMAGE_LAYOUT_GENERAL layout in order to access its data in a shader.
+ - 14.1.1. Storage Image
+ */
+ if (res->image_bind_count[0])
+ layout = VK_IMAGE_LAYOUT_GENERAL;
+ else if (!zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_layout &&
+ layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT)
+ layout = VK_IMAGE_LAYOUT_GENERAL;
+ if (res->valid || res->layout != layout)
+ zink_screen(ctx->base.screen)->image_barrier(ctx, res, layout, access, pipeline);
+ if (!(res->aspect & VK_IMAGE_ASPECT_COLOR_BIT))
+ ctx->zsbuf_readonly = res->layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
+ res->obj->unordered_read = res->obj->unordered_write = false;
+ if (i == ctx->fb_state.nr_cbufs && res->sampler_bind_count[0])
+ update_res_sampler_layouts(ctx, res);
return surf->image_view;
}
-static unsigned
-begin_render_pass(struct zink_context *ctx)
-{
- struct zink_batch *batch = &ctx->batch;
- struct pipe_framebuffer_state *fb_state = &ctx->fb_state;
+static uint32_t
+hash_rendering_state(const void *key)
+{
+ const VkPipelineRenderingCreateInfo *info = key;
+ uint32_t hash = 0;
+ /*
+ uint32_t colorAttachmentCount;
+ const VkFormat* pColorAttachmentFormats;
+ VkFormat depthAttachmentFormat;
+ VkFormat stencilAttachmentFormat;
+ * this data is not optimally arranged, so it must be manually hashed
+ */
+ hash = XXH32(&info->colorAttachmentCount, sizeof(uint32_t), hash);
+ hash = XXH32(&info->depthAttachmentFormat, sizeof(uint32_t), hash);
+ hash = XXH32(&info->stencilAttachmentFormat, sizeof(VkFormat), hash);
+ return XXH32(info->pColorAttachmentFormats, sizeof(VkFormat) * info->colorAttachmentCount, hash);
+}
- VkRenderPassBeginInfo rpbi = {0};
- rpbi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
- rpbi.renderPass = ctx->gfx_pipeline_state.render_pass->render_pass;
- rpbi.renderArea.offset.x = 0;
- rpbi.renderArea.offset.y = 0;
- rpbi.renderArea.extent.width = fb_state->width;
- rpbi.renderArea.extent.height = fb_state->height;
+static bool
+equals_rendering_state(const void *a, const void *b)
+{
+ const VkPipelineRenderingCreateInfo *ai = a;
+ const VkPipelineRenderingCreateInfo *bi = b;
+ return ai->colorAttachmentCount == bi->colorAttachmentCount &&
+ ai->depthAttachmentFormat == bi->depthAttachmentFormat &&
+ ai->stencilAttachmentFormat == bi->stencilAttachmentFormat &&
+ !memcmp(ai->pColorAttachmentFormats, bi->pColorAttachmentFormats, sizeof(VkFormat) * ai->colorAttachmentCount);
+}
- VkClearValue clears[PIPE_MAX_COLOR_BUFS + 1] = {0};
- unsigned clear_buffers = 0;
- uint32_t clear_validate = 0;
- for (int i = 0; i < fb_state->nr_cbufs; i++) {
- /* these are no-ops */
- if (!fb_state->cbufs[i] || !zink_fb_clear_enabled(ctx, i))
- continue;
- /* these need actual clear calls inside the rp */
- struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(&ctx->fb_clears[i], 0);
- if (zink_fb_clear_needs_explicit(&ctx->fb_clears[i])) {
- clear_buffers |= (PIPE_CLEAR_COLOR0 << i);
- if (zink_fb_clear_count(&ctx->fb_clears[i]) < 2 ||
- zink_fb_clear_element_needs_explicit(clear))
- continue;
- }
- /* we now know there's one clear that can be done here */
- zink_fb_clear_util_unpack_clear_color(clear, fb_state->cbufs[i]->format, (void*)&clears[i].color);
- rpbi.clearValueCount = i + 1;
- clear_validate |= PIPE_CLEAR_COLOR0 << i;
- assert(ctx->framebuffer->rp->state.clears);
- }
- if (fb_state->zsbuf && zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS)) {
- struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS];
- struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0);
- if (!zink_fb_clear_element_needs_explicit(clear)) {
- clears[fb_state->nr_cbufs].depthStencil.depth = clear->zs.depth;
- clears[fb_state->nr_cbufs].depthStencil.stencil = clear->zs.stencil;
- rpbi.clearValueCount = fb_state->nr_cbufs + 1;
- clear_validate |= clear->zs.bits;
- assert(ctx->framebuffer->rp->state.clears);
- }
- if (zink_fb_clear_needs_explicit(fb_clear)) {
- for (int j = !zink_fb_clear_element_needs_explicit(clear);
- (clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL && j < zink_fb_clear_count(fb_clear);
- j++)
- clear_buffers |= zink_fb_clear_element(fb_clear, j)->zs.bits;
- }
- }
- assert(clear_validate == ctx->framebuffer->rp->state.clears);
- rpbi.pClearValues = &clears[0];
- rpbi.framebuffer = ctx->framebuffer->fb;
-
- assert(ctx->gfx_pipeline_state.render_pass && ctx->framebuffer);
-
- VkRenderPassAttachmentBeginInfo infos;
- VkImageView att[PIPE_MAX_COLOR_BUFS + 1];
- infos.sType = VK_STRUCTURE_TYPE_RENDER_PASS_ATTACHMENT_BEGIN_INFO;
- infos.pNext = NULL;
- infos.attachmentCount = ctx->framebuffer->state.num_attachments;
- infos.pAttachments = att;
- for (int i = 0; i < ctx->fb_state.nr_cbufs; i++)
- att[i] = prep_fb_attachment(ctx, ctx->fb_state.cbufs[i], i);
- att[ctx->fb_state.nr_cbufs] = prep_fb_attachment(ctx, ctx->fb_state.zsbuf, ctx->fb_state.nr_cbufs);
- if (zink_screen(ctx->base.screen)->info.have_KHR_imageless_framebuffer) {
-#ifndef NDEBUG
- for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
- assert(!ctx->fb_state.cbufs[i] || zink_resource(ctx->fb_state.cbufs[i]->texture)->obj->vkusage == ctx->framebuffer->state.infos[i].usage);
- assert(!ctx->fb_state.cbufs[i] || zink_resource(ctx->fb_state.cbufs[i]->texture)->obj->vkflags == ctx->framebuffer->state.infos[i].flags);
- }
- assert(!ctx->fb_state.zsbuf || zink_resource(ctx->fb_state.zsbuf->texture)->obj->vkusage == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].usage);
- assert(!ctx->fb_state.zsbuf || zink_resource(ctx->fb_state.zsbuf->texture)->obj->vkflags == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].flags);
-#endif
- rpbi.pNext = &infos;
- }
+static uint32_t
+hash_framebuffer_imageless(const void *key)
+{
+ struct zink_framebuffer_state* s = (struct zink_framebuffer_state*)key;
+ return _mesa_hash_data(key, offsetof(struct zink_framebuffer_state, infos) + sizeof(s->infos[0]) * s->num_attachments);
+}
- VKCTX(CmdBeginRenderPass)(batch->state->cmdbuf, &rpbi, VK_SUBPASS_CONTENTS_INLINE);
- batch->in_rp = true;
- ctx->new_swapchain = false;
- return clear_buffers;
+static bool
+equals_framebuffer_imageless(const void *a, const void *b)
+{
+ struct zink_framebuffer_state *s = (struct zink_framebuffer_state*)a;
+ return memcmp(a, b, offsetof(struct zink_framebuffer_state, infos) + sizeof(s->infos[0]) * s->num_attachments) == 0;
}
void
@@ -1832,38 +3239,15 @@ zink_evaluate_depth_buffer(struct pipe_context *pctx)
zink_batch_no_rp(ctx);
}
-void
-zink_begin_render_pass(struct zink_context *ctx)
-{
- setup_framebuffer(ctx);
- assert(ctx->gfx_pipeline_state.render_pass);
- unsigned clear_buffers = begin_render_pass(ctx);
-
- if (ctx->render_condition.query)
- zink_start_conditional_render(ctx);
- zink_clear_framebuffer(ctx, clear_buffers);
-}
-
-void
-zink_end_render_pass(struct zink_context *ctx)
-{
- if (ctx->batch.in_rp) {
- if (ctx->render_condition.query)
- zink_stop_conditional_render(ctx);
- VKCTX(CmdEndRenderPass)(ctx->batch.state->cmdbuf);
- }
- ctx->batch.in_rp = false;
-}
-
static void
sync_flush(struct zink_context *ctx, struct zink_batch_state *bs)
{
- if (zink_screen(ctx->base.screen)->threaded)
+ if (zink_screen(ctx->base.screen)->threaded_submit)
util_queue_fence_wait(&bs->flush_completed);
}
static inline VkAccessFlags
-get_access_flags_for_binding(struct zink_context *ctx, enum zink_descriptor_type type, enum pipe_shader_type stage, unsigned idx)
+get_access_flags_for_binding(struct zink_context *ctx, enum zink_descriptor_type type, gl_shader_stage stage, unsigned idx)
{
VkAccessFlags flags = 0;
switch (type) {
@@ -1893,7 +3277,7 @@ get_access_flags_for_binding(struct zink_context *ctx, enum zink_descriptor_type
}
static void
-update_resource_refs_for_stage(struct zink_context *ctx, enum pipe_shader_type stage)
+update_resource_refs_for_stage(struct zink_context *ctx, gl_shader_stage stage)
{
struct zink_batch *batch = &ctx->batch;
unsigned max_slot[] = {
@@ -1902,32 +3286,25 @@ update_resource_refs_for_stage(struct zink_context *ctx, enum pipe_shader_type s
[ZINK_DESCRIPTOR_TYPE_SSBO] = ctx->di.num_ssbos[stage],
[ZINK_DESCRIPTOR_TYPE_IMAGE] = ctx->di.num_images[stage]
};
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) {
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) {
for (unsigned j = 0; j < max_slot[i]; j++) {
if (ctx->di.descriptor_res[i][stage][j]) {
struct zink_resource *res = ctx->di.descriptor_res[i][stage][j];
if (!res)
continue;
+ bool is_buffer = res->obj->is_buffer;
bool is_write = zink_resource_access_is_write(get_access_flags_for_binding(ctx, i, stage, j));
- zink_batch_resource_usage_set(batch, res, is_write);
-
- struct zink_sampler_view *sv = zink_sampler_view(ctx->sampler_views[stage][j]);
- struct zink_sampler_state *sampler_state = ctx->sampler_states[stage][j];
- struct zink_image_view *iv = &ctx->image_views[stage][j];
- if (sampler_state && i == ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW && j <= ctx->di.num_samplers[stage])
- zink_batch_usage_set(&sampler_state->batch_uses, ctx->batch.state);
- if (sv && i == ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW && j <= ctx->di.num_sampler_views[stage]) {
- if (res->obj->is_buffer)
- zink_batch_usage_set(&sv->buffer_view->batch_uses, ctx->batch.state);
- else
- zink_batch_usage_set(&sv->image_view->batch_uses, ctx->batch.state);
- zink_batch_reference_sampler_view(batch, sv);
- } else if (i == ZINK_DESCRIPTOR_TYPE_IMAGE && j <= ctx->di.num_images[stage]) {
- if (res->obj->is_buffer)
- zink_batch_usage_set(&iv->buffer_view->batch_uses, ctx->batch.state);
+ if (zink_is_swapchain(res)) {
+ if (!zink_kopper_acquire(ctx, res, UINT64_MAX))
+ /* technically this is a failure condition, but there's no safe way out */
+ continue;
+ }
+ zink_batch_resource_usage_set(batch, res, is_write, is_buffer);
+ if (!ctx->unordered_blitting) {
+ if (is_write || !res->obj->is_buffer)
+ res->obj->unordered_read = res->obj->unordered_write = false;
else
- zink_batch_usage_set(&iv->surface->batch_uses, ctx->batch.state);
- zink_batch_reference_image_view(batch, iv);
+ res->obj->unordered_read = false;
}
}
}
@@ -1939,39 +3316,106 @@ zink_update_descriptor_refs(struct zink_context *ctx, bool compute)
{
struct zink_batch *batch = &ctx->batch;
if (compute) {
- update_resource_refs_for_stage(ctx, PIPE_SHADER_COMPUTE);
+ update_resource_refs_for_stage(ctx, MESA_SHADER_COMPUTE);
if (ctx->curr_compute)
zink_batch_reference_program(batch, &ctx->curr_compute->base);
} else {
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++)
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
update_resource_refs_for_stage(ctx, i);
unsigned vertex_buffers_enabled_mask = ctx->gfx_pipeline_state.vertex_buffers_enabled_mask;
unsigned last_vbo = util_last_bit(vertex_buffers_enabled_mask);
for (unsigned i = 0; i < last_vbo + 1; i++) {
- if (ctx->vertex_buffers[i].buffer.resource)
- zink_batch_resource_usage_set(batch, zink_resource(ctx->vertex_buffers[i].buffer.resource), false);
+ struct zink_resource *res = zink_resource(ctx->vertex_buffers[i].buffer.resource);
+ if (res) {
+ zink_batch_resource_usage_set(batch, res, false, true);
+ if (!ctx->unordered_blitting)
+ res->obj->unordered_read = false;
+ }
}
if (ctx->curr_program)
zink_batch_reference_program(batch, &ctx->curr_program->base);
}
+ if (ctx->di.bindless_refs_dirty) {
+ ctx->di.bindless_refs_dirty = false;
+ for (unsigned i = 0; i < 2; i++) {
+ util_dynarray_foreach(&ctx->di.bindless[i].resident, struct zink_bindless_descriptor*, bd) {
+ struct zink_resource *res = zink_descriptor_surface_resource(&(*bd)->ds);
+ zink_batch_resource_usage_set(&ctx->batch, res, (*bd)->access & PIPE_IMAGE_ACCESS_WRITE, res->obj->is_buffer);
+ if (!ctx->unordered_blitting) {
+ if ((*bd)->access & PIPE_IMAGE_ACCESS_WRITE || !res->obj->is_buffer)
+ res->obj->unordered_read = res->obj->unordered_write = false;
+ else
+ res->obj->unordered_read = false;
+ }
+ }
+ }
+ }
+
+ unsigned global_count = util_dynarray_num_elements(&ctx->di.global_bindings, struct zink_resource*);
+ struct zink_resource **globals = ctx->di.global_bindings.data;
+ for (unsigned i = 0; i < global_count; i++) {
+ struct zink_resource *res = globals[i];
+ if (!res)
+ continue;
+ zink_batch_resource_usage_set(batch, res, true, true);
+ res->obj->unordered_read = res->obj->unordered_write = false;
+ }
+}
+
+static void
+reapply_color_write(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ assert(screen->info.have_EXT_color_write_enable);
+ const VkBool32 enables[PIPE_MAX_COLOR_BUFS] = {1, 1, 1, 1, 1, 1, 1, 1};
+ const VkBool32 disables[PIPE_MAX_COLOR_BUFS] = {0};
+ const unsigned max_att = MIN2(PIPE_MAX_COLOR_BUFS, screen->info.props.limits.maxColorAttachments);
+ VKCTX(CmdSetColorWriteEnableEXT)(ctx->batch.state->cmdbuf, max_att, ctx->disable_color_writes ? disables : enables);
+ VKCTX(CmdSetColorWriteEnableEXT)(ctx->batch.state->reordered_cmdbuf, max_att, enables);
+ assert(screen->info.have_EXT_extended_dynamic_state);
+ if (ctx->dsa_state)
+ VKCTX(CmdSetDepthWriteEnable)(ctx->batch.state->cmdbuf, ctx->disable_color_writes ? VK_FALSE : ctx->dsa_state->hw_state.depth_write);
}
static void
stall(struct zink_context *ctx)
{
- sync_flush(ctx, zink_batch_state(ctx->last_fence));
- zink_vkfence_wait(zink_screen(ctx->base.screen), ctx->last_fence, PIPE_TIMEOUT_INFINITE);
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ sync_flush(ctx, ctx->last_batch_state);
+ zink_screen_timeline_wait(screen, ctx->last_batch_state->fence.batch_id, OS_TIMEOUT_INFINITE);
zink_batch_reset_all(ctx);
}
+void
+zink_reset_ds3_states(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (!screen->info.have_EXT_extended_dynamic_state3)
+ return;
+ if (screen->have_full_ds3)
+ ctx->ds3_states = UINT32_MAX;
+ else
+ ctx->ds3_states = BITFIELD_MASK(ZINK_DS3_BLEND_A2C);
+ if (!screen->info.dynamic_state3_feats.extendedDynamicState3AlphaToOneEnable)
+ ctx->ds3_states &= ~BITFIELD_BIT(ZINK_DS3_BLEND_A21);
+ if (!screen->info.dynamic_state3_feats.extendedDynamicState3LineStippleEnable)
+ ctx->ds3_states &= ~BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE_ON);
+ if (screen->driver_workarounds.no_linestipple)
+ ctx->ds3_states &= ~BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE);
+}
+
static void
flush_batch(struct zink_context *ctx, bool sync)
{
struct zink_batch *batch = &ctx->batch;
+ assert(!ctx->unordered_blitting);
if (ctx->clears_enabled)
/* start rp to do all the clears */
- zink_begin_render_pass(ctx);
- zink_end_render_pass(ctx);
+ zink_batch_rp(ctx);
+ zink_batch_no_rp_safe(ctx);
+
+ util_queue_fence_wait(&ctx->unsync_fence);
+ util_queue_fence_reset(&ctx->flush_fence);
zink_end_batch(ctx, batch);
ctx->deferred_fence = NULL;
@@ -1981,8 +3425,9 @@ flush_batch(struct zink_context *ctx, bool sync)
if (ctx->batch.state->is_device_lost) {
check_device_lost(ctx);
} else {
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
zink_start_batch(ctx, batch);
- if (zink_screen(ctx->base.screen)->info.have_EXT_transform_feedback && ctx->num_so_targets)
+ if (screen->info.have_EXT_transform_feedback && ctx->num_so_targets)
ctx->dirty_so_targets = true;
ctx->pipeline_changed[0] = ctx->pipeline_changed[1] = true;
zink_select_draw_vbo(ctx);
@@ -1990,9 +3435,25 @@ flush_batch(struct zink_context *ctx, bool sync)
if (ctx->oom_stall)
stall(ctx);
+ zink_reset_ds3_states(ctx);
+
ctx->oom_flush = false;
ctx->oom_stall = false;
+ ctx->dd.bindless_bound = false;
+ ctx->di.bindless_refs_dirty = true;
+ ctx->sample_locations_changed = ctx->gfx_pipeline_state.sample_locations_enabled;
+ if (zink_screen(ctx->base.screen)->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) {
+ VKCTX(CmdSetPatchControlPointsEXT)(ctx->batch.state->cmdbuf, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
+ VKCTX(CmdSetPatchControlPointsEXT)(ctx->batch.state->reordered_cmdbuf, 1);
+ }
+ update_feedback_loop_dynamic_state(ctx);
+ if (screen->info.have_EXT_color_write_enable)
+ reapply_color_write(ctx);
+ update_layered_rendering_state(ctx);
+ tc_renderpass_info_reset(&ctx->dynamic_fb.tc_info);
+ ctx->rp_tc_info_updated = true;
}
+ util_queue_fence_signal(&ctx->flush_fence);
}
void
@@ -2016,30 +3477,124 @@ static bool
rebind_fb_state(struct zink_context *ctx, struct zink_resource *match_res, bool from_set_fb)
{
bool rebind = false;
- for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ for (int i = 0; i < ctx->fb_state.nr_cbufs; i++)
rebind |= rebind_fb_surface(ctx, &ctx->fb_state.cbufs[i], match_res);
- if (from_set_fb && ctx->fb_state.cbufs[i] && ctx->fb_state.cbufs[i]->texture->bind & PIPE_BIND_SCANOUT)
- ctx->new_swapchain = true;
- }
rebind |= rebind_fb_surface(ctx, &ctx->fb_state.zsbuf, match_res);
return rebind;
}
static void
-unbind_fb_surface(struct zink_context *ctx, struct pipe_surface *surf, bool changed)
+unbind_fb_surface(struct zink_context *ctx, struct pipe_surface *surf, unsigned idx, bool changed)
{
+ ctx->dynamic_fb.attachments[idx].imageView = VK_NULL_HANDLE;
if (!surf)
return;
+ struct zink_resource *res = zink_resource(surf->texture);
if (changed) {
- zink_fb_clears_apply(ctx, surf->texture);
- if (zink_batch_usage_exists(zink_csurface(surf)->batch_uses))
- zink_batch_reference_surface(&ctx->batch, zink_csurface(surf));
ctx->rp_changed = true;
}
- struct zink_resource *res = zink_resource(surf->texture);
- res->fb_binds--;
- if (!res->fb_binds)
- check_resource_for_batch_ref(ctx, res);
+ res->fb_bind_count--;
+ if (!res->fb_bind_count && !res->bind_count[0])
+ _mesa_set_remove_key(ctx->need_barriers[0], res);
+ unsigned feedback_loops = ctx->feedback_loops;
+ if (ctx->feedback_loops & BITFIELD_BIT(idx)) {
+ ctx->dynamic_fb.attachments[idx].imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ ctx->rp_layout_changed = true;
+ }
+ ctx->feedback_loops &= ~BITFIELD_BIT(idx);
+ if (feedback_loops != ctx->feedback_loops) {
+ if (idx == PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop_zs) {
+ if (ctx->gfx_pipeline_state.feedback_loop_zs)
+ ctx->gfx_pipeline_state.dirty = true;
+ ctx->gfx_pipeline_state.feedback_loop_zs = false;
+ } else if (idx < PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop) {
+ if (ctx->gfx_pipeline_state.feedback_loop)
+ ctx->gfx_pipeline_state.dirty = true;
+ ctx->gfx_pipeline_state.feedback_loop = false;
+ }
+ }
+ res->fb_binds &= ~BITFIELD_BIT(idx);
+ /* this is called just before the resource loses a reference, so a refcount==1 means the resource will be destroyed */
+ if (!res->fb_bind_count && res->base.b.reference.count > 1) {
+ if (ctx->track_renderpasses && !ctx->blitting) {
+ if (!(res->base.b.bind & PIPE_BIND_DISPLAY_TARGET) && util_format_is_depth_or_stencil(surf->format))
+ /* assume that all depth buffers which are not swapchain images will be used for sampling to avoid splitting renderpasses */
+ zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
+ if (!zink_is_swapchain(res) && !util_format_is_depth_or_stencil(surf->format))
+ /* assume that all color buffers which are not swapchain images will be used for sampling to avoid splitting renderpasses */
+ zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
+ }
+ if (res->sampler_bind_count[0]) {
+ update_res_sampler_layouts(ctx, res);
+ if (res->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL && !ctx->blitting)
+ _mesa_set_add(ctx->need_barriers[0], res);
+ }
+ }
+}
+
+void
+zink_set_null_fs(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ bool prev_disable_fs = ctx->disable_fs;
+ ctx->disable_fs = ctx->rast_state && ctx->rast_state->base.rasterizer_discard &&
+ (ctx->primitives_generated_active || (!ctx->queries_disabled && ctx->primitives_generated_suspended));
+ struct zink_shader *zs = ctx->gfx_stages[MESA_SHADER_FRAGMENT];
+ unsigned compact = screen->compact_descriptors ? ZINK_DESCRIPTOR_COMPACT : 0;
+ /* can't use CWE if side effects */
+ bool no_cwe = (zs && (zs->ssbos_used || zs->bindless || zs->num_bindings[ZINK_DESCRIPTOR_TYPE_IMAGE - compact])) ||
+ ctx->fs_query_active || ctx->occlusion_query_active || !screen->info.have_EXT_color_write_enable;
+ bool prev_disable_color_writes = ctx->disable_color_writes;
+ ctx->disable_color_writes = ctx->disable_fs && !no_cwe;
+
+ if (ctx->disable_fs == prev_disable_fs) {
+ /* if this is a true no-op then return */
+ if (!ctx->disable_fs || ctx->disable_color_writes == !no_cwe)
+ return;
+ /* else changing disable modes */
+ }
+
+ /* either of these cases requires removing the previous mode */
+ if (!ctx->disable_fs || (prev_disable_fs && prev_disable_color_writes != !no_cwe)) {
+ if (prev_disable_color_writes)
+ reapply_color_write(ctx);
+ else
+ ctx->base.bind_fs_state(&ctx->base, ctx->saved_fs);
+ ctx->saved_fs = NULL;
+ /* fs/CWE reenabled, fs active, done */
+ if (!ctx->disable_fs)
+ return;
+ }
+
+ /* always use CWE when possible */
+ if (!no_cwe) {
+ reapply_color_write(ctx);
+ return;
+ }
+ /* otherwise need to bind a null fs */
+ if (!ctx->null_fs) {
+ nir_shader *nir = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, &screen->nir_options, "null_fs").shader;
+ nir->info.separate_shader = true;
+ ctx->null_fs = pipe_shader_from_nir(&ctx->base, nir);
+ }
+ ctx->saved_fs = ctx->gfx_stages[MESA_SHADER_FRAGMENT];
+ ctx->base.bind_fs_state(&ctx->base, ctx->null_fs);
+}
+
+static void
+check_framebuffer_surface_mutable(struct pipe_context *pctx, struct pipe_surface *psurf)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_ctx_surface *csurf = (struct zink_ctx_surface *)psurf;
+ if (!csurf->needs_mutable)
+ return;
+ zink_resource_object_init_mutable(ctx, zink_resource(psurf->texture));
+ struct pipe_surface *psurf2 = pctx->create_surface(pctx, psurf->texture, psurf);
+ pipe_resource_reference(&psurf2->texture, NULL);
+ struct zink_ctx_surface *csurf2 = (struct zink_ctx_surface *)psurf2;
+ zink_surface_reference(zink_screen(pctx->screen), &csurf->surf, csurf2->surf);
+ pctx->surface_destroy(pctx, psurf2);
+ csurf->needs_mutable = false;
}
static void
@@ -2047,99 +3602,188 @@ zink_set_framebuffer_state(struct pipe_context *pctx,
const struct pipe_framebuffer_state *state)
{
struct zink_context *ctx = zink_context(pctx);
+ struct zink_screen *screen = zink_screen(pctx->screen);
+ unsigned samples = state->nr_cbufs || state->zsbuf ? 0 : state->samples;
+ unsigned w = ctx->fb_state.width;
+ unsigned h = ctx->fb_state.height;
+ unsigned layers = MAX2(zink_framebuffer_get_num_layers(state), 1);
+ bool flush_clears = ctx->clears_enabled &&
+ (ctx->dynamic_fb.info.layerCount != layers ||
+ state->width != w || state->height != h);
+ if (ctx->clears_enabled && !flush_clears) {
+ for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ if (i >= state->nr_cbufs || !ctx->fb_state.cbufs[i] || !state->cbufs[i])
+ flush_clears |= zink_fb_clear_enabled(ctx, i);
+ else if (zink_fb_clear_enabled(ctx, i) && ctx->fb_state.cbufs[i] != state->cbufs[i]) {
+ struct zink_surface *a = zink_csurface(ctx->fb_state.cbufs[i]);
+ struct zink_surface *b = zink_csurface(state->cbufs[i]);
+ if (a == b)
+ continue;
+ if (!a || !b || memcmp(&a->base.u.tex, &b->base.u.tex, sizeof(b->base.u.tex)) ||
+ a->base.texture != b->base.texture)
+ flush_clears = true;
+ else if (a->base.format != b->base.format)
+ zink_fb_clear_rewrite(ctx, i, a->base.format, b->base.format);
+ }
+ }
+ }
+ if (ctx->fb_state.zsbuf != state->zsbuf)
+ flush_clears |= zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS);
+ if (flush_clears) {
+ bool queries_disabled = ctx->queries_disabled;
+ ctx->queries_disabled = true;
+ zink_batch_rp(ctx);
+ ctx->queries_disabled = queries_disabled;
+ }
+ /* need to ensure we start a new rp on next draw */
+ zink_batch_no_rp_safe(ctx);
for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
- struct pipe_surface *surf = ctx->fb_state.cbufs[i];
- unbind_fb_surface(ctx, surf, i >= state->nr_cbufs || surf != state->cbufs[i]);
+ struct pipe_surface *psurf = ctx->fb_state.cbufs[i];
+ if (i < state->nr_cbufs)
+ ctx->rp_changed |= !!zink_transient_surface(psurf) != !!zink_transient_surface(state->cbufs[i]);
+ unbind_fb_surface(ctx, psurf, i, i >= state->nr_cbufs || psurf != state->cbufs[i]);
+ if (psurf && ctx->needs_present == zink_resource(psurf->texture))
+ ctx->needs_present = NULL;
}
if (ctx->fb_state.zsbuf) {
- struct pipe_surface *surf = ctx->fb_state.zsbuf;
- struct zink_resource *res = zink_resource(surf->texture);
- bool changed = surf != state->zsbuf;
- unbind_fb_surface(ctx, surf, changed);
+ struct pipe_surface *psurf = ctx->fb_state.zsbuf;
+ struct zink_resource *res = zink_resource(psurf->texture);
+ bool changed = psurf != state->zsbuf;
+ unbind_fb_surface(ctx, psurf, PIPE_MAX_COLOR_BUFS, changed);
+ if (!changed)
+ ctx->rp_changed |= !!zink_transient_surface(psurf) != !!zink_transient_surface(state->zsbuf);
if (changed && unlikely(res->obj->needs_zs_evaluate))
/* have to flush zs eval while the sample location data still exists,
* so just throw some random barrier */
- zink_resource_image_barrier(ctx, res, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
}
/* renderpass changes if the number or types of attachments change */
ctx->rp_changed |= ctx->fb_state.nr_cbufs != state->nr_cbufs;
ctx->rp_changed |= !!ctx->fb_state.zsbuf != !!state->zsbuf;
-
- unsigned w = ctx->fb_state.width;
- unsigned h = ctx->fb_state.height;
+ if (ctx->fb_state.nr_cbufs != state->nr_cbufs) {
+ ctx->blend_state_changed |= screen->have_full_ds3;
+ if (state->nr_cbufs && screen->have_full_ds3)
+ ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_ON) | BITFIELD_BIT(ZINK_DS3_BLEND_WRITE) | BITFIELD_BIT(ZINK_DS3_BLEND_EQ);
+ }
util_copy_framebuffer_state(&ctx->fb_state, state);
- zink_update_fbfetch(ctx);
- unsigned prev_void_alpha_attachments = ctx->gfx_pipeline_state.void_alpha_attachments;
- ctx->gfx_pipeline_state.void_alpha_attachments = 0;
+ ctx->rp_changed |= zink_update_fbfetch(ctx);
+ ctx->transient_attachments = 0;
+ ctx->fb_layer_mismatch = 0;
+
+ ctx->dynamic_fb.info.renderArea.offset.x = 0;
+ ctx->dynamic_fb.info.renderArea.offset.y = 0;
+ ctx->dynamic_fb.info.renderArea.extent.width = state->width;
+ ctx->dynamic_fb.info.renderArea.extent.height = state->height;
+ ctx->dynamic_fb.info.colorAttachmentCount = ctx->fb_state.nr_cbufs;
+ ctx->rp_changed |= ctx->dynamic_fb.info.layerCount != layers;
+ ctx->dynamic_fb.info.layerCount = layers;
+ ctx->gfx_pipeline_state.rendering_info.colorAttachmentCount = ctx->fb_state.nr_cbufs;
+
+ ctx->void_clears = 0;
for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
- struct pipe_surface *surf = ctx->fb_state.cbufs[i];
- if (surf) {
- zink_resource(surf->texture)->fb_binds++;
- ctx->gfx_pipeline_state.void_alpha_attachments |= util_format_has_alpha1(surf->format) ? BITFIELD_BIT(i) : 0;
+ struct pipe_surface *psurf = ctx->fb_state.cbufs[i];
+ if (psurf) {
+ struct zink_surface *transient = zink_transient_surface(psurf);
+ if (transient || psurf->nr_samples)
+ ctx->transient_attachments |= BITFIELD_BIT(i);
+ if (!samples)
+ samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, psurf->nr_samples ? psurf->nr_samples : 1);
+ struct zink_resource *res = zink_resource(psurf->texture);
+ check_framebuffer_surface_mutable(pctx, psurf);
+ if (zink_csurface(psurf)->info.layerCount > layers)
+ ctx->fb_layer_mismatch |= BITFIELD_BIT(i);
+ if (res->modifiers) {
+ assert(!ctx->needs_present || ctx->needs_present == res);
+ ctx->needs_present = res;
+ }
+ if (res->obj->dt) {
+ /* #6274 */
+ if (!zink_screen(ctx->base.screen)->info.have_KHR_swapchain_mutable_format &&
+ psurf->format != res->base.b.format) {
+ static bool warned = false;
+ if (!warned) {
+ mesa_loge("zink: SRGB framebuffer unsupported without KHR_swapchain_mutable_format");
+ warned = true;
+ }
+ }
+ }
+ res->fb_bind_count++;
+ res->fb_binds |= BITFIELD_BIT(i);
+ batch_ref_fb_surface(ctx, ctx->fb_state.cbufs[i]);
+ if (util_format_has_alpha1(psurf->format)) {
+ if (!res->valid && !zink_fb_clear_full_exists(ctx, i))
+ ctx->void_clears |= (PIPE_CLEAR_COLOR0 << i);
+ }
}
}
- if (ctx->gfx_pipeline_state.void_alpha_attachments != prev_void_alpha_attachments)
- ctx->gfx_pipeline_state.dirty = true;
+ unsigned depth_bias_scale_factor = ctx->depth_bias_scale_factor;
if (ctx->fb_state.zsbuf) {
- struct pipe_surface *surf = ctx->fb_state.zsbuf;
- zink_resource(surf->texture)->fb_binds++;
+ struct pipe_surface *psurf = ctx->fb_state.zsbuf;
+ struct zink_surface *transient = zink_transient_surface(psurf);
+ check_framebuffer_surface_mutable(pctx, psurf);
+ batch_ref_fb_surface(ctx, ctx->fb_state.zsbuf);
+ if (transient || psurf->nr_samples)
+ ctx->transient_attachments |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS);
+ if (!samples)
+ samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, psurf->nr_samples ? psurf->nr_samples : 1);
+ if (zink_csurface(psurf)->info.layerCount > layers)
+ ctx->fb_layer_mismatch |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS);
+ zink_resource(psurf->texture)->fb_bind_count++;
+ zink_resource(psurf->texture)->fb_binds |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS);
+ switch (psurf->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z16_UNORM_S8_UINT:
+ ctx->depth_bias_scale_factor = zink_screen(ctx->base.screen)->driver_workarounds.z16_unscaled_bias;
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_X24S8_UINT:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ ctx->depth_bias_scale_factor = zink_screen(ctx->base.screen)->driver_workarounds.z24_unscaled_bias;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_Z32_UNORM:
+ ctx->depth_bias_scale_factor = 1<<23;
+ break;
+ default:
+ ctx->depth_bias_scale_factor = 0;
+ }
+ } else {
+ ctx->depth_bias_scale_factor = 0;
}
+ if (depth_bias_scale_factor != ctx->depth_bias_scale_factor &&
+ ctx->rast_state && ctx->rast_state->base.offset_units_unscaled)
+ ctx->rast_state_changed = true;
+ rebind_fb_state(ctx, NULL, true);
+ ctx->fb_state.samples = MAX2(samples, 1);
+ zink_update_framebuffer_state(ctx);
if (ctx->fb_state.width != w || ctx->fb_state.height != h)
ctx->scissor_changed = true;
- rebind_fb_state(ctx, NULL, true);
- ctx->fb_state.samples = util_framebuffer_get_num_samples(state);
- /* get_framebuffer adds a ref if the fb is reused or created;
- * always do get_framebuffer first to avoid deleting the same fb
- * we're about to use
- */
- struct zink_framebuffer *fb = ctx->get_framebuffer(ctx);
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- if (ctx->framebuffer && !screen->info.have_KHR_imageless_framebuffer) {
- simple_mtx_lock(&screen->framebuffer_mtx);
- struct hash_entry *he = _mesa_hash_table_search(&screen->framebuffer_cache, &ctx->framebuffer->state);
- if (ctx->framebuffer && !ctx->framebuffer->state.num_attachments) {
- /* if this has no attachments then its lifetime has ended */
- _mesa_hash_table_remove(&screen->framebuffer_cache, he);
- he = NULL;
- /* ensure an unflushed fb doesn't get destroyed by deferring it */
- util_dynarray_append(&ctx->batch.state->dead_framebuffers, struct zink_framebuffer*, ctx->framebuffer);
- ctx->framebuffer = NULL;
- }
- /* a framebuffer loses 1 ref every time we unset it;
- * we do NOT add refs here, as the ref has already been added in
- * get_framebuffer()
- */
- if (zink_framebuffer_reference(screen, &ctx->framebuffer, NULL) && he)
- _mesa_hash_table_remove(&screen->framebuffer_cache, he);
- simple_mtx_unlock(&screen->framebuffer_mtx);
- }
- ctx->fb_changed |= ctx->framebuffer != fb;
- ctx->framebuffer = fb;
uint8_t rast_samples = ctx->fb_state.samples - 1;
- /* update the shader key if applicable:
- * if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count:
- * in GL, rast_samples==1 means ignore gl_SampleMask[]
- * in VK, gl_SampleMask[] is never ignored
- */
- if (rast_samples != ctx->gfx_pipeline_state.rast_samples &&
- (!ctx->gfx_stages[PIPE_SHADER_FRAGMENT] ||
- ctx->gfx_stages[PIPE_SHADER_FRAGMENT]->nir->info.outputs_written & (1 << FRAG_RESULT_SAMPLE_MASK)))
- zink_set_fs_key(ctx)->samples = ctx->fb_state.samples > 0;
+ if (rast_samples != ctx->gfx_pipeline_state.rast_samples)
+ zink_update_fs_key_samples(ctx);
if (ctx->gfx_pipeline_state.rast_samples != rast_samples) {
ctx->sample_locations_changed |= ctx->gfx_pipeline_state.sample_locations_enabled;
- ctx->gfx_pipeline_state.dirty = true;
+ zink_flush_dgc_if_enabled(ctx);
+ if (screen->have_full_ds3)
+ ctx->sample_mask_changed = true;
+ else
+ ctx->gfx_pipeline_state.dirty = true;
}
ctx->gfx_pipeline_state.rast_samples = rast_samples;
- /* need to ensure we start a new rp on next draw */
- zink_batch_no_rp(ctx);
/* this is an ideal time to oom flush since it won't split a renderpass */
- if (ctx->oom_flush)
+ if (ctx->oom_flush && !ctx->unordered_blitting)
flush_batch(ctx, false);
+ else
+ update_layered_rendering_state(ctx);
+
+ ctx->rp_tc_info_updated = !ctx->blitting;
}
static void
@@ -2148,14 +3792,32 @@ zink_set_blend_color(struct pipe_context *pctx,
{
struct zink_context *ctx = zink_context(pctx);
memcpy(ctx->blend_constants, color->color, sizeof(float) * 4);
+
+ ctx->blend_color_changed = true;
+ zink_flush_dgc_if_enabled(ctx);
}
static void
zink_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
{
struct zink_context *ctx = zink_context(pctx);
+ if (ctx->gfx_pipeline_state.sample_mask == sample_mask)
+ return;
ctx->gfx_pipeline_state.sample_mask = sample_mask;
+ zink_flush_dgc_if_enabled(ctx);
+ if (zink_screen(pctx->screen)->have_full_ds3)
+ ctx->sample_mask_changed = true;
+ else
+ ctx->gfx_pipeline_state.dirty = true;
+}
+
+static void
+zink_set_min_samples(struct pipe_context *pctx, unsigned min_samples)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ ctx->gfx_pipeline_state.min_samples = min_samples - 1;
ctx->gfx_pipeline_state.dirty = true;
+ zink_flush_dgc_if_enabled(ctx);
}
static void
@@ -2170,402 +3832,7 @@ zink_set_sample_locations(struct pipe_context *pctx, size_t size, const uint8_t
if (locations)
memcpy(ctx->sample_locations, locations, size);
-}
-
-static VkAccessFlags
-access_src_flags(VkImageLayout layout)
-{
- switch (layout) {
- case VK_IMAGE_LAYOUT_UNDEFINED:
- return 0;
-
- case VK_IMAGE_LAYOUT_GENERAL:
- return VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
-
- case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
- return VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
- case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
- return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
-
- case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
- case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
- return VK_ACCESS_SHADER_READ_BIT;
-
- case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
- return VK_ACCESS_TRANSFER_READ_BIT;
-
- case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
- return VK_ACCESS_TRANSFER_WRITE_BIT;
-
- case VK_IMAGE_LAYOUT_PREINITIALIZED:
- return VK_ACCESS_HOST_WRITE_BIT;
-
- case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
- return 0;
-
- default:
- unreachable("unexpected layout");
- }
-}
-
-static VkAccessFlags
-access_dst_flags(VkImageLayout layout)
-{
- switch (layout) {
- case VK_IMAGE_LAYOUT_UNDEFINED:
- return 0;
-
- case VK_IMAGE_LAYOUT_GENERAL:
- return VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
-
- case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
- return VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
- case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
- return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
-
- case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
- return VK_ACCESS_SHADER_READ_BIT;
-
- case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
- return VK_ACCESS_TRANSFER_READ_BIT;
-
- case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
- return VK_ACCESS_SHADER_READ_BIT;
-
- case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
- return VK_ACCESS_TRANSFER_WRITE_BIT;
-
- case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
- return 0;
-
- default:
- unreachable("unexpected layout");
- }
-}
-
-static VkPipelineStageFlags
-pipeline_dst_stage(VkImageLayout layout)
-{
- switch (layout) {
- case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
- return VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
- return VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
-
- case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
- return VK_PIPELINE_STAGE_TRANSFER_BIT;
- case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
- return VK_PIPELINE_STAGE_TRANSFER_BIT;
-
- case VK_IMAGE_LAYOUT_GENERAL:
- return VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-
- case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
- case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
- return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
-
- default:
- return VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
- }
-}
-
-#define ALL_READ_ACCESS_FLAGS \
- (VK_ACCESS_INDIRECT_COMMAND_READ_BIT | \
- VK_ACCESS_INDEX_READ_BIT | \
- VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | \
- VK_ACCESS_UNIFORM_READ_BIT | \
- VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | \
- VK_ACCESS_SHADER_READ_BIT | \
- VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | \
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | \
- VK_ACCESS_TRANSFER_READ_BIT |\
- VK_ACCESS_HOST_READ_BIT |\
- VK_ACCESS_MEMORY_READ_BIT |\
- VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |\
- VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT |\
- VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT |\
- VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR |\
- VK_ACCESS_SHADING_RATE_IMAGE_READ_BIT_NV |\
- VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT |\
- VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_NV |\
- VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV |\
- VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV)
-
-
-bool
-zink_resource_access_is_write(VkAccessFlags flags)
-{
- return (flags & ALL_READ_ACCESS_FLAGS) != flags;
-}
-
-bool
-zink_resource_image_needs_barrier(struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline)
-{
- if (!pipeline)
- pipeline = pipeline_dst_stage(new_layout);
- if (!flags)
- flags = access_dst_flags(new_layout);
- return res->layout != new_layout || (res->obj->access_stage & pipeline) != pipeline ||
- (res->obj->access & flags) != flags ||
- zink_resource_access_is_write(res->obj->access) ||
- zink_resource_access_is_write(flags);
-}
-
-bool
-zink_resource_image_barrier_init(VkImageMemoryBarrier *imb, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline)
-{
- if (!pipeline)
- pipeline = pipeline_dst_stage(new_layout);
- if (!flags)
- flags = access_dst_flags(new_layout);
-
- VkImageSubresourceRange isr = {
- res->aspect,
- 0, VK_REMAINING_MIP_LEVELS,
- 0, VK_REMAINING_ARRAY_LAYERS
- };
- *imb = (VkImageMemoryBarrier){
- VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- NULL,
- res->obj->access ? res->obj->access : access_src_flags(res->layout),
- flags,
- res->layout,
- new_layout,
- VK_QUEUE_FAMILY_IGNORED,
- VK_QUEUE_FAMILY_IGNORED,
- res->obj->image,
- isr
- };
- return res->obj->needs_zs_evaluate || zink_resource_image_needs_barrier(res, new_layout, flags, pipeline);
-}
-
-static inline bool
-is_shader_pipline_stage(VkPipelineStageFlags pipeline)
-{
- return pipeline & (VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
- VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
- VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
- VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
- VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
-}
-
-static void
-resource_check_defer_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkPipelineStageFlags pipeline)
-{
- assert(res->obj->is_buffer);
- if (res->bind_count[0] - res->so_bind_count > 0) {
- if ((res->obj->is_buffer && res->vbo_bind_mask && !(pipeline & VK_PIPELINE_STAGE_VERTEX_INPUT_BIT)) ||
- ((!res->obj->is_buffer || util_bitcount(res->vbo_bind_mask) != res->bind_count[0]) && !is_shader_pipline_stage(pipeline)))
- /* gfx rebind */
- _mesa_set_add(ctx->need_barriers[0], res);
- }
- if (res->bind_count[1] && !(pipeline & VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT))
- /* compute rebind */
- _mesa_set_add(ctx->need_barriers[1], res);
-}
-
-static inline VkCommandBuffer
-get_cmdbuf(struct zink_context *ctx, struct zink_resource *res)
-{
- if ((res->obj->access && !res->obj->unordered_barrier) || !ctx->batch.in_rp) {
- zink_batch_no_rp(ctx);
- res->obj->unordered_barrier = false;
- return ctx->batch.state->cmdbuf;
- }
- res->obj->unordered_barrier = true;
- ctx->batch.state->has_barriers = true;
- return ctx->batch.state->barrier_cmdbuf;
-}
-
-static void
-resource_check_defer_image_barrier(struct zink_context *ctx, struct zink_resource *res, VkImageLayout layout, VkPipelineStageFlags pipeline)
-{
- assert(!res->obj->is_buffer);
-
- bool is_compute = pipeline == VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
- /* if this is a non-shader barrier and there are binds, always queue a shader barrier */
- bool is_shader = is_shader_pipline_stage(pipeline);
- if ((is_shader || !res->bind_count[is_compute]) &&
- /* if no layout change is needed between gfx and compute, do nothing */
- !res->bind_count[!is_compute] && (!is_compute || !res->fb_binds))
- return;
-
- if (res->bind_count[!is_compute] && is_shader) {
- /* if the layout is the same between gfx and compute, do nothing */
- if (layout == zink_descriptor_util_image_layout_eval(res, !is_compute))
- return;
- }
- /* queue a layout change if a layout change will be needed */
- if (res->bind_count[!is_compute])
- _mesa_set_add(ctx->need_barriers[!is_compute], res);
- /* also queue a layout change if this is a non-shader layout */
- if (res->bind_count[is_compute] && !is_shader)
- _mesa_set_add(ctx->need_barriers[is_compute], res);
-}
-
-void
-zink_resource_image_barrier(struct zink_context *ctx, struct zink_resource *res,
- VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline)
-{
- VkImageMemoryBarrier imb;
- if (!pipeline)
- pipeline = pipeline_dst_stage(new_layout);
-
- if (!zink_resource_image_barrier_init(&imb, res, new_layout, flags, pipeline))
- return;
- /* only barrier if we're changing layout or doing something besides read -> read */
- VkCommandBuffer cmdbuf = get_cmdbuf(ctx, res);
- assert(new_layout);
- if (!res->obj->access_stage)
- imb.srcAccessMask = 0;
- if (res->obj->needs_zs_evaluate)
- imb.pNext = &res->obj->zs_evaluate;
- res->obj->needs_zs_evaluate = false;
- if (res->dmabuf_acquire) {
- imb.srcQueueFamilyIndex = VK_QUEUE_FAMILY_FOREIGN_EXT;
- imb.dstQueueFamilyIndex = zink_screen(ctx->base.screen)->gfx_queue;
- res->dmabuf_acquire = false;
- }
- VKCTX(CmdPipelineBarrier)(
- cmdbuf,
- res->obj->access_stage ? res->obj->access_stage : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- pipeline,
- 0,
- 0, NULL,
- 0, NULL,
- 1, &imb
- );
-
- resource_check_defer_image_barrier(ctx, res, new_layout, pipeline);
-
- if (res->obj->unordered_barrier) {
- res->obj->access |= imb.dstAccessMask;
- res->obj->access_stage |= pipeline;
- } else {
- res->obj->access = imb.dstAccessMask;
- res->obj->access_stage = pipeline;
- }
- res->layout = new_layout;
-}
-
-
-VkPipelineStageFlags
-zink_pipeline_flags_from_stage(VkShaderStageFlagBits stage)
-{
- switch (stage) {
- case VK_SHADER_STAGE_VERTEX_BIT:
- return VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
- case VK_SHADER_STAGE_FRAGMENT_BIT:
- return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
- case VK_SHADER_STAGE_GEOMETRY_BIT:
- return VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
- case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
- return VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT;
- case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
- return VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
- case VK_SHADER_STAGE_COMPUTE_BIT:
- return VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
- default:
- unreachable("unknown shader stage bit");
- }
-}
-
-ALWAYS_INLINE static VkPipelineStageFlags
-pipeline_access_stage(VkAccessFlags flags)
-{
- if (flags & (VK_ACCESS_UNIFORM_READ_BIT |
- VK_ACCESS_SHADER_READ_BIT |
- VK_ACCESS_SHADER_WRITE_BIT))
- return VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV |
- VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV |
- VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR |
- VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
- VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
- VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
- VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
- VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
- return VK_PIPELINE_STAGE_TRANSFER_BIT;
-}
-
-ALWAYS_INLINE static bool
-zink_resource_buffer_needs_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline)
-{
- if (!res->obj->access || !res->obj->access_stage)
- return true;
- if (!pipeline)
- pipeline = pipeline_access_stage(flags);
- return zink_resource_access_is_write(res->obj->access) ||
- zink_resource_access_is_write(flags) ||
- ((res->obj->access_stage & pipeline) != pipeline && !(res->obj->access_stage & (pipeline - 1))) ||
- (res->obj->access & flags) != flags;
-}
-
-void
-zink_fake_buffer_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline)
-{
- res->obj->access = flags;
- res->obj->access_stage = pipeline;
-}
-
-void
-zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline)
-{
- VkMemoryBarrier bmb;
- if (!pipeline)
- pipeline = pipeline_access_stage(flags);
- if (!zink_resource_buffer_needs_barrier(res, flags, pipeline))
- return;
-
- bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
- bmb.pNext = NULL;
- bmb.srcAccessMask = res->obj->access;
- bmb.dstAccessMask = flags;
- if (!res->obj->access_stage)
- bmb.srcAccessMask = 0;
- VkCommandBuffer cmdbuf = get_cmdbuf(ctx, res);
- /* only barrier if we're changing layout or doing something besides read -> read */
- VKCTX(CmdPipelineBarrier)(
- cmdbuf,
- res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access),
- pipeline,
- 0,
- 1, &bmb,
- 0, NULL,
- 0, NULL
- );
-
- resource_check_defer_buffer_barrier(ctx, res, pipeline);
-
- if (res->obj->unordered_barrier) {
- res->obj->access |= bmb.dstAccessMask;
- res->obj->access_stage |= pipeline;
- } else {
- res->obj->access = bmb.dstAccessMask;
- res->obj->access_stage = pipeline;
- }
-}
-
-bool
-zink_resource_needs_barrier(struct zink_resource *res, VkImageLayout layout, VkAccessFlags flags, VkPipelineStageFlags pipeline)
-{
- if (res->base.b.target == PIPE_BUFFER)
- return zink_resource_buffer_needs_barrier(res, flags, pipeline);
- return zink_resource_image_needs_barrier(res, layout, flags, pipeline);
-}
-
-VkShaderStageFlagBits
-zink_shader_stage(enum pipe_shader_type type)
-{
- VkShaderStageFlagBits stages[] = {
- [PIPE_SHADER_VERTEX] = VK_SHADER_STAGE_VERTEX_BIT,
- [PIPE_SHADER_FRAGMENT] = VK_SHADER_STAGE_FRAGMENT_BIT,
- [PIPE_SHADER_GEOMETRY] = VK_SHADER_STAGE_GEOMETRY_BIT,
- [PIPE_SHADER_TESS_CTRL] = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
- [PIPE_SHADER_TESS_EVAL] = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
- [PIPE_SHADER_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT,
- };
- return stages[type];
+ zink_flush_dgc_if_enabled(ctx);
}
static void
@@ -2577,32 +3844,85 @@ zink_flush(struct pipe_context *pctx,
bool deferred = flags & PIPE_FLUSH_DEFERRED;
bool deferred_fence = false;
struct zink_batch *batch = &ctx->batch;
- struct zink_fence *fence = NULL;
+ struct zink_batch_state *bs = NULL;
struct zink_screen *screen = zink_screen(ctx->base.screen);
- unsigned submit_count = 0;
+ VkSemaphore export_sem = VK_NULL_HANDLE;
/* triggering clears will force has_work */
- if (!deferred && ctx->clears_enabled)
+ if (!deferred && ctx->clears_enabled) {
+ /* if fbfetch outputs are active, disable them when flushing clears */
+ unsigned fbfetch_outputs = ctx->fbfetch_outputs;
+ if (fbfetch_outputs) {
+ ctx->fbfetch_outputs = 0;
+ ctx->rp_changed = true;
+ }
+ if (ctx->fb_state.zsbuf)
+ zink_blit_barriers(ctx, NULL, zink_resource(ctx->fb_state.zsbuf->texture), false);
+
+ for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ if (ctx->fb_state.cbufs[i])
+ zink_blit_barriers(ctx, NULL, zink_resource(ctx->fb_state.cbufs[i]->texture), false);
+ }
+ ctx->blitting = true;
/* start rp to do all the clears */
- zink_begin_render_pass(ctx);
+ zink_batch_rp(ctx);
+ ctx->blitting = false;
+ ctx->fbfetch_outputs = fbfetch_outputs;
+ ctx->rp_changed |= fbfetch_outputs > 0;
+ }
+
+ if (flags & PIPE_FLUSH_END_OF_FRAME) {
+#ifdef HAVE_RENDERDOC_APP_H
+ p_atomic_inc(&screen->renderdoc_frame);
+#endif
+ if (ctx->needs_present && ctx->needs_present->obj->dt_idx != UINT32_MAX &&
+ zink_is_swapchain(ctx->needs_present)) {
+ zink_kopper_readback_update(ctx, ctx->needs_present);
+ screen->image_barrier(ctx, ctx->needs_present, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 0, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+ }
+ ctx->needs_present = NULL;
+ }
+
+ if (flags & PIPE_FLUSH_FENCE_FD) {
+ assert(!deferred && pfence);
+ const VkExportSemaphoreCreateInfo esci = {
+ .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
+ .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
+ };
+ const VkSemaphoreCreateInfo sci = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ .pNext = &esci,
+ };
+ VkResult result = VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &export_sem);
+ if (zink_screen_handle_vkresult(screen, result)) {
+ assert(!batch->state->signal_semaphore);
+ batch->state->signal_semaphore = export_sem;
+ batch->has_work = true;
+ } else {
+ mesa_loge("ZINK: vkCreateSemaphore failed (%s)", vk_Result_to_str(result));
+
+ /* let flush proceed and ensure a null sem for fence_get_fd to return -1 */
+ export_sem = VK_NULL_HANDLE;
+ }
+ }
if (!batch->has_work) {
if (pfence) {
/* reuse last fence */
- fence = ctx->last_fence;
+ bs = ctx->last_batch_state;
}
if (!deferred) {
- struct zink_batch_state *last = zink_batch_state(ctx->last_fence);
+ struct zink_batch_state *last = ctx->last_batch_state;
if (last) {
sync_flush(ctx, last);
if (last->is_device_lost)
check_device_lost(ctx);
}
}
- tc_driver_internal_flush_notify(ctx->tc);
+ if (ctx->tc && !ctx->track_renderpasses)
+ tc_driver_internal_flush_notify(ctx->tc);
} else {
- fence = &batch->state->fence;
- submit_count = batch->state->submit_count;
+ bs = batch->state;
if (deferred && !(flags & PIPE_FLUSH_FENCE_FD) && pfence)
deferred_fence = true;
else
@@ -2622,35 +3942,33 @@ zink_flush(struct pipe_context *pctx,
*pfence = (struct pipe_fence_handle *)mfence;
}
- mfence->fence = fence;
- if (fence)
- mfence->submit_count = submit_count;
+ assert(!mfence->fence);
+ mfence->fence = &bs->fence;
+ mfence->sem = export_sem;
+ if (bs) {
+ mfence->submit_count = bs->usage.submit_count;
+ util_dynarray_append(&bs->fence.mfences, struct zink_tc_fence *, mfence);
+ }
+ if (export_sem) {
+ pipe_reference(NULL, &mfence->reference);
+ util_dynarray_append(&ctx->batch.state->fences, struct zink_tc_fence*, mfence);
+ }
if (deferred_fence) {
- assert(fence);
+ assert(bs);
mfence->deferred_ctx = pctx;
- assert(!ctx->deferred_fence || ctx->deferred_fence == fence);
- ctx->deferred_fence = fence;
+ assert(!ctx->deferred_fence || ctx->deferred_fence == &bs->fence);
+ ctx->deferred_fence = &bs->fence;
}
- if (!fence || flags & TC_FLUSH_ASYNC) {
+ if (!bs || flags & TC_FLUSH_ASYNC) {
if (!util_queue_fence_is_signalled(&mfence->ready))
util_queue_fence_signal(&mfence->ready);
}
}
- if (fence) {
+ if (bs) {
if (!(flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC)))
- sync_flush(ctx, zink_batch_state(fence));
-
- if (flags & PIPE_FLUSH_END_OF_FRAME && !(flags & TC_FLUSH_ASYNC) && !deferred) {
- /* if the first frame has not yet occurred, we need an explicit fence here
- * in some cases in order to correctly draw the first frame, though it's
- * unknown at this time why this is the case
- */
- if (!ctx->first_frame_done)
- zink_vkfence_wait(screen, fence, PIPE_TIMEOUT_INFINITE);
- ctx->first_frame_done = true;
- }
+ sync_flush(ctx, bs);
}
}
@@ -2661,51 +3979,28 @@ zink_fence_wait(struct pipe_context *pctx)
if (ctx->batch.has_work)
pctx->flush(pctx, NULL, PIPE_FLUSH_HINT_FINISH);
- if (ctx->last_fence)
+ if (ctx->last_batch_state)
stall(ctx);
}
void
-zink_wait_on_batch(struct zink_context *ctx, uint32_t batch_id)
+zink_wait_on_batch(struct zink_context *ctx, uint64_t batch_id)
{
- struct zink_batch_state *bs = ctx->batch.state;
- assert(bs);
- if (!batch_id || bs->fence.batch_id == batch_id)
+ struct zink_batch_state *bs;
+ if (!batch_id) {
/* not submitted yet */
flush_batch(ctx, true);
- if (ctx->have_timelines) {
- if (!zink_screen_timeline_wait(zink_screen(ctx->base.screen), batch_id, UINT64_MAX))
- check_device_lost(ctx);
- return;
+ bs = ctx->last_batch_state;
+ assert(bs);
+ batch_id = bs->fence.batch_id;
}
- simple_mtx_lock(&ctx->batch_mtx);
- struct zink_fence *fence;
-
- assert(batch_id || ctx->last_fence);
- if (ctx->last_fence && (!batch_id || batch_id == zink_batch_state(ctx->last_fence)->fence.batch_id))
- fence = ctx->last_fence;
- else {
- struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&ctx->batch_states, batch_id, (void*)(uintptr_t)batch_id);
- if (!he) {
- simple_mtx_unlock(&ctx->batch_mtx);
- /* if we can't find it, it either must have finished already or is on a different context */
- if (!zink_screen_check_last_finished(zink_screen(ctx->base.screen), batch_id)) {
- /* if it hasn't finished, it's on another context, so force a flush so there's something to wait on */
- ctx->batch.has_work = true;
- zink_fence_wait(&ctx->base);
- }
- return;
- }
- fence = he->data;
- }
- simple_mtx_unlock(&ctx->batch_mtx);
- assert(fence);
- sync_flush(ctx, zink_batch_state(fence));
- zink_vkfence_wait(zink_screen(ctx->base.screen), fence, PIPE_TIMEOUT_INFINITE);
+ assert(batch_id);
+ if (!zink_screen_timeline_wait(zink_screen(ctx->base.screen), batch_id, UINT64_MAX))
+ check_device_lost(ctx);
}
bool
-zink_check_batch_completion(struct zink_context *ctx, uint32_t batch_id, bool have_lock)
+zink_check_batch_completion(struct zink_context *ctx, uint64_t batch_id)
{
assert(ctx->batch.state);
if (!batch_id)
@@ -2715,76 +4010,60 @@ zink_check_batch_completion(struct zink_context *ctx, uint32_t batch_id, bool ha
if (zink_screen_check_last_finished(zink_screen(ctx->base.screen), batch_id))
return true;
- if (ctx->have_timelines) {
- bool success = zink_screen_timeline_wait(zink_screen(ctx->base.screen), batch_id, 0);
- if (!success)
- check_device_lost(ctx);
- return success;
- }
- struct zink_fence *fence;
-
- if (!have_lock)
- simple_mtx_lock(&ctx->batch_mtx);
-
- if (ctx->last_fence && batch_id == zink_batch_state(ctx->last_fence)->fence.batch_id)
- fence = ctx->last_fence;
- else {
- struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&ctx->batch_states, batch_id, (void*)(uintptr_t)batch_id);
- /* if we can't find it, it either must have finished already or is on a different context */
- if (!he) {
- if (!have_lock)
- simple_mtx_unlock(&ctx->batch_mtx);
- /* return compare against last_finished, since this has info from all contexts */
- return zink_screen_check_last_finished(zink_screen(ctx->base.screen), batch_id);
- }
- fence = he->data;
- }
- if (!have_lock)
- simple_mtx_unlock(&ctx->batch_mtx);
- assert(fence);
- if (zink_screen(ctx->base.screen)->threaded &&
- !util_queue_fence_is_signalled(&zink_batch_state(fence)->flush_completed))
- return false;
- return zink_vkfence_wait(zink_screen(ctx->base.screen), fence, 0);
+ bool success = zink_screen_timeline_wait(zink_screen(ctx->base.screen), batch_id, 0);
+ if (!success)
+ check_device_lost(ctx);
+ return success;
}
static void
zink_texture_barrier(struct pipe_context *pctx, unsigned flags)
{
struct zink_context *ctx = zink_context(pctx);
+ VkAccessFlags dst = flags == PIPE_TEXTURE_BARRIER_FRAMEBUFFER ?
+ VK_ACCESS_INPUT_ATTACHMENT_READ_BIT :
+ VK_ACCESS_SHADER_READ_BIT;
+
if (!ctx->framebuffer || !ctx->framebuffer->state.num_attachments)
return;
- VkMemoryBarrier bmb;
- bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
- bmb.pNext = NULL;
- bmb.srcAccessMask = 0;
- bmb.dstAccessMask = 0;
- zink_batch_no_rp(ctx);
- if (ctx->fb_state.zsbuf) {
- VkMemoryBarrier dmb;
- dmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ /* if this is a fb barrier, flush all pending clears */
+ if (ctx->rp_clears_enabled && dst == VK_ACCESS_INPUT_ATTACHMENT_READ_BIT)
+ zink_batch_rp(ctx);
+
+ /* this is not an in-renderpass barrier */
+ if (!ctx->fbfetch_outputs)
+ zink_batch_no_rp(ctx);
+
+ if (zink_screen(ctx->base.screen)->info.have_KHR_synchronization2) {
+ VkDependencyInfo dep = {0};
+ dep.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO;
+ dep.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
+ dep.memoryBarrierCount = 1;
+
+ VkMemoryBarrier2 dmb = {0};
+ dmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2;
dmb.pNext = NULL;
- dmb.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
- dmb.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
- VKCTX(CmdPipelineBarrier)(
- ctx->batch.state->cmdbuf,
- VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
- VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
- 0,
- 1, &dmb,
- 0, NULL,
- 0, NULL
- );
+ dmb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ dmb.dstAccessMask = dst;
+ dmb.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ dmb.dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
+ dep.pMemoryBarriers = &dmb;
+
+ /* if zs fbfetch is a thing?
+ if (ctx->fb_state.zsbuf) {
+ const VkPipelineStageFlagBits2 depth_flags = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT;
+ dmb.dstAccessMask |= VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
+ dmb.srcStageMask |= depth_flags;
+ dmb.dstStageMask |= depth_flags;
+ }
+ */
+ VKCTX(CmdPipelineBarrier2)(ctx->batch.state->cmdbuf, &dep);
} else {
- bmb.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
- bmb.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT;
- }
- if (ctx->fb_state.nr_cbufs > 0) {
- bmb.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
- bmb.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT;
- }
- if (bmb.srcAccessMask)
+ VkMemoryBarrier bmb = {0};
+ bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ bmb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ bmb.dstAccessMask = dst;
VKCTX(CmdPipelineBarrier)(
ctx->batch.state->cmdbuf,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
@@ -2794,6 +4073,7 @@ zink_texture_barrier(struct pipe_context *pctx, unsigned flags)
0, NULL,
0, NULL
);
+ }
}
static inline void
@@ -2805,7 +4085,7 @@ mem_barrier(struct zink_context *ctx, VkPipelineStageFlags src_stage, VkPipeline
mb.pNext = NULL;
mb.srcAccessMask = src;
mb.dstAccessMask = dst;
- zink_end_render_pass(ctx);
+ zink_batch_no_rp(ctx);
VKCTX(CmdPipelineBarrier)(batch->state->cmdbuf, src_stage, dst_stage, 0, 1, &mb, 0, NULL, 0, NULL);
}
@@ -2829,11 +4109,11 @@ zink_flush_memory_barrier(struct zink_context *ctx, bool is_compute)
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_UNIFORM_READ_BIT);
+ if (ctx->memory_barrier & PIPE_BARRIER_INDIRECT_BUFFER)
+ mem_barrier(ctx, src, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_ACCESS_INDIRECT_COMMAND_READ_BIT);
if (!is_compute) {
- if (ctx->memory_barrier & PIPE_BARRIER_INDIRECT_BUFFER)
- mem_barrier(ctx, src, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
- VK_ACCESS_SHADER_WRITE_BIT,
- VK_ACCESS_INDIRECT_COMMAND_READ_BIT);
if (ctx->memory_barrier & PIPE_BARRIER_VERTEX_BUFFER)
mem_barrier(ctx, gfx_flags, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
@@ -2878,227 +4158,19 @@ zink_flush_resource(struct pipe_context *pctx,
struct pipe_resource *pres)
{
struct zink_context *ctx = zink_context(pctx);
- /* TODO: this is not futureproof and should be updated once proper
- * WSI support is added
- */
- if (pres->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
- pipe_resource_reference(&ctx->batch.state->flush_res, pres);
-}
-
-void
-zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src,
- unsigned dst_offset, unsigned src_offset, unsigned size)
-{
- VkBufferCopy region;
- region.srcOffset = src_offset;
- region.dstOffset = dst_offset;
- region.size = size;
-
- struct zink_batch *batch = &ctx->batch;
- zink_batch_no_rp(ctx);
- zink_batch_reference_resource_rw(batch, src, false);
- zink_batch_reference_resource_rw(batch, dst, true);
- util_range_add(&dst->base.b, &dst->valid_buffer_range, dst_offset, dst_offset + size);
- zink_resource_buffer_barrier(ctx, src, VK_ACCESS_TRANSFER_READ_BIT, 0);
- zink_resource_buffer_barrier(ctx, dst, VK_ACCESS_TRANSFER_WRITE_BIT, 0);
- VKCTX(CmdCopyBuffer)(batch->state->cmdbuf, src->obj->buffer, dst->obj->buffer, 1, &region);
-}
-
-void
-zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src,
- unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz,
- unsigned src_level, const struct pipe_box *src_box, enum pipe_map_flags map_flags)
-{
- struct zink_resource *img = dst->base.b.target == PIPE_BUFFER ? src : dst;
- struct zink_resource *buf = dst->base.b.target == PIPE_BUFFER ? dst : src;
- struct zink_batch *batch = &ctx->batch;
- zink_batch_no_rp(ctx);
-
- bool buf2img = buf == src;
-
- if (buf2img) {
- zink_resource_image_barrier(ctx, img, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, 0);
- zink_resource_buffer_barrier(ctx, buf, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
- } else {
- zink_resource_image_barrier(ctx, img, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 0, 0);
- zink_resource_buffer_barrier(ctx, buf, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
- util_range_add(&dst->base.b, &dst->valid_buffer_range, dstx, dstx + src_box->width);
- }
-
- VkBufferImageCopy region = {0};
- region.bufferOffset = buf2img ? src_box->x : dstx;
- region.bufferRowLength = 0;
- region.bufferImageHeight = 0;
- region.imageSubresource.mipLevel = buf2img ? dst_level : src_level;
- switch (img->base.b.target) {
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_1D_ARRAY:
- /* these use layer */
- region.imageSubresource.baseArrayLayer = buf2img ? dstz : src_box->z;
- region.imageSubresource.layerCount = src_box->depth;
- region.imageOffset.z = 0;
- region.imageExtent.depth = 1;
- break;
- case PIPE_TEXTURE_3D:
- /* this uses depth */
- region.imageSubresource.baseArrayLayer = 0;
- region.imageSubresource.layerCount = 1;
- region.imageOffset.z = buf2img ? dstz : src_box->z;
- region.imageExtent.depth = src_box->depth;
- break;
- default:
- /* these must only copy one layer */
- region.imageSubresource.baseArrayLayer = 0;
- region.imageSubresource.layerCount = 1;
- region.imageOffset.z = 0;
- region.imageExtent.depth = 1;
- }
- region.imageOffset.x = buf2img ? dstx : src_box->x;
- region.imageOffset.y = buf2img ? dsty : src_box->y;
-
- region.imageExtent.width = src_box->width;
- region.imageExtent.height = src_box->height;
-
- zink_batch_reference_resource_rw(batch, img, buf2img);
- zink_batch_reference_resource_rw(batch, buf, !buf2img);
-
- /* we're using u_transfer_helper_deinterleave, which means we'll be getting PIPE_MAP_* usage
- * to indicate whether to copy either the depth or stencil aspects
- */
- unsigned aspects = 0;
- if (map_flags) {
- assert((map_flags & (PIPE_MAP_DEPTH_ONLY | PIPE_MAP_STENCIL_ONLY)) !=
- (PIPE_MAP_DEPTH_ONLY | PIPE_MAP_STENCIL_ONLY));
- if (map_flags & PIPE_MAP_DEPTH_ONLY)
- aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
- else if (map_flags & PIPE_MAP_STENCIL_ONLY)
- aspects = VK_IMAGE_ASPECT_STENCIL_BIT;
- }
- if (!aspects)
- aspects = img->aspect;
- while (aspects) {
- int aspect = 1 << u_bit_scan(&aspects);
- region.imageSubresource.aspectMask = aspect;
-
- /* this may or may not work with multisampled depth/stencil buffers depending on the driver implementation:
- *
- * srcImage must have a sample count equal to VK_SAMPLE_COUNT_1_BIT
- * - vkCmdCopyImageToBuffer spec
- *
- * dstImage must have a sample count equal to VK_SAMPLE_COUNT_1_BIT
- * - vkCmdCopyBufferToImage spec
- */
- if (buf2img)
- VKCTX(CmdCopyBufferToImage)(batch->state->cmdbuf, buf->obj->buffer, img->obj->image, img->layout, 1, &region);
- else
- VKCTX(CmdCopyImageToBuffer)(batch->state->cmdbuf, img->obj->image, img->layout, buf->obj->buffer, 1, &region);
- }
-}
-
-static void
-zink_resource_copy_region(struct pipe_context *pctx,
- struct pipe_resource *pdst,
- unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *psrc,
- unsigned src_level, const struct pipe_box *src_box)
-{
- struct zink_resource *dst = zink_resource(pdst);
- struct zink_resource *src = zink_resource(psrc);
- struct zink_context *ctx = zink_context(pctx);
- if (dst->base.b.target != PIPE_BUFFER && src->base.b.target != PIPE_BUFFER) {
- VkImageCopy region = {0};
- if (util_format_get_num_planes(src->base.b.format) == 1 &&
- util_format_get_num_planes(dst->base.b.format) == 1) {
- /* If neither the calling command’s srcImage nor the calling command’s dstImage
- * has a multi-planar image format then the aspectMask member of srcSubresource
- * and dstSubresource must match
- *
- * -VkImageCopy spec
- */
- assert(src->aspect == dst->aspect);
- } else
- unreachable("planar formats not yet handled");
-
- zink_fb_clears_apply_or_discard(ctx, pdst, (struct u_rect){dstx, dstx + src_box->width, dsty, dsty + src_box->height}, false);
- zink_fb_clears_apply_region(ctx, psrc, zink_rect_from_box(src_box));
-
- region.srcSubresource.aspectMask = src->aspect;
- region.srcSubresource.mipLevel = src_level;
- switch (src->base.b.target) {
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_1D_ARRAY:
- /* these use layer */
- region.srcSubresource.baseArrayLayer = src_box->z;
- region.srcSubresource.layerCount = src_box->depth;
- region.srcOffset.z = 0;
- region.extent.depth = 1;
- break;
- case PIPE_TEXTURE_3D:
- /* this uses depth */
- region.srcSubresource.baseArrayLayer = 0;
- region.srcSubresource.layerCount = 1;
- region.srcOffset.z = src_box->z;
- region.extent.depth = src_box->depth;
- break;
- default:
- /* these must only copy one layer */
- region.srcSubresource.baseArrayLayer = 0;
- region.srcSubresource.layerCount = 1;
- region.srcOffset.z = 0;
- region.extent.depth = 1;
- }
-
- region.srcOffset.x = src_box->x;
- region.srcOffset.y = src_box->y;
-
- region.dstSubresource.aspectMask = dst->aspect;
- region.dstSubresource.mipLevel = dst_level;
- switch (dst->base.b.target) {
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_1D_ARRAY:
- /* these use layer */
- region.dstSubresource.baseArrayLayer = dstz;
- region.dstSubresource.layerCount = src_box->depth;
- region.dstOffset.z = 0;
- break;
- case PIPE_TEXTURE_3D:
- /* this uses depth */
- region.dstSubresource.baseArrayLayer = 0;
- region.dstSubresource.layerCount = 1;
- region.dstOffset.z = dstz;
- break;
- default:
- /* these must only copy one layer */
- region.dstSubresource.baseArrayLayer = 0;
- region.dstSubresource.layerCount = 1;
- region.dstOffset.z = 0;
+ struct zink_resource *res = zink_resource(pres);
+ if (res->obj->dt) {
+ if (zink_kopper_acquired(res->obj->dt, res->obj->dt_idx) && (!ctx->clears_enabled || !res->fb_bind_count)) {
+ zink_batch_no_rp_safe(ctx);
+ zink_kopper_readback_update(ctx, res);
+ zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 0, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+ zink_batch_reference_resource_rw(&ctx->batch, res, true);
+ } else {
+ ctx->needs_present = res;
}
-
- region.dstOffset.x = dstx;
- region.dstOffset.y = dsty;
- region.extent.width = src_box->width;
- region.extent.height = src_box->height;
-
- struct zink_batch *batch = &ctx->batch;
- zink_batch_no_rp(ctx);
- zink_batch_reference_resource_rw(batch, src, false);
- zink_batch_reference_resource_rw(batch, dst, true);
-
- zink_resource_setup_transfer_layouts(ctx, src, dst);
- VKCTX(CmdCopyImage)(batch->state->cmdbuf, src->obj->image, src->layout,
- dst->obj->image, dst->layout,
- 1, &region);
- } else if (dst->base.b.target == PIPE_BUFFER &&
- src->base.b.target == PIPE_BUFFER) {
- zink_copy_buffer(ctx, dst, src, dstx, src_box->x, src_box->width);
- } else
- zink_copy_image_buffer(ctx, dst, src, dst_level, dstx, dsty, dstz, src_level, src_box, 0);
+ ctx->batch.swapchain = res;
+ } else if (res->dmabuf)
+ res->queue = VK_QUEUE_FAMILY_FOREIGN_EXT;
}
static struct pipe_stream_output_target *
@@ -3112,11 +4184,7 @@ zink_create_stream_output_target(struct pipe_context *pctx,
if (!t)
return NULL;
- /* using PIPE_BIND_CUSTOM here lets us create a custom pipe buffer resource,
- * which allows us to differentiate and use VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT
- * as we must for this case
- */
- t->counter_buffer = pipe_buffer_create(pctx->screen, PIPE_BIND_STREAM_OUTPUT | PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, 4);
+ t->counter_buffer = pipe_buffer_create(pctx->screen, PIPE_BIND_STREAM_OUTPUT, PIPE_USAGE_DEFAULT, 4);
if (!t->counter_buffer) {
FREE(t);
return NULL;
@@ -3151,6 +4219,11 @@ zink_set_stream_output_targets(struct pipe_context *pctx,
{
struct zink_context *ctx = zink_context(pctx);
+ /* always set counter_buffer_valid=false on unbind:
+ * - on resume (indicated by offset==-1), set counter_buffer_valid=true
+ * - otherwise the counter buffer is invalidated
+ */
+
if (num_targets == 0) {
for (unsigned i = 0; i < ctx->num_so_targets; i++) {
if (ctx->so_targets[i]) {
@@ -3169,15 +4242,8 @@ zink_set_stream_output_targets(struct pipe_context *pctx,
pipe_so_target_reference(&ctx->so_targets[i], targets[i]);
if (!t)
continue;
- struct zink_resource *res = zink_resource(t->counter_buffer);
- if (offsets[0] == (unsigned)-1)
- ctx->xfb_barrier |= zink_resource_buffer_needs_barrier(res,
- VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT,
- VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
- else
- ctx->xfb_barrier |= zink_resource_buffer_needs_barrier(res,
- VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT,
- VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
+ if (offsets[0] != (unsigned)-1)
+ t->counter_buffer_valid = false;
struct zink_resource *so = zink_resource(ctx->so_targets[i]->buffer);
if (so) {
so->so_bind_count++;
@@ -3199,6 +4265,7 @@ zink_set_stream_output_targets(struct pipe_context *pctx,
/* TODO: possibly avoid rebinding on resume if resuming from same buffers? */
ctx->dirty_so_targets = true;
}
+ zink_flush_dgc_if_enabled(ctx);
}
void
@@ -3228,24 +4295,26 @@ zink_rebind_framebuffer(struct zink_context *ctx, struct zink_resource *res)
return;
zink_batch_no_rp(ctx);
- if (zink_screen(ctx->base.screen)->info.have_KHR_imageless_framebuffer) {
- struct zink_framebuffer *fb = ctx->get_framebuffer(ctx);
- ctx->fb_changed |= ctx->framebuffer != fb;
- ctx->framebuffer = fb;
- }
+ struct zink_framebuffer *fb = zink_get_framebuffer(ctx);
+ ctx->fb_changed |= ctx->framebuffer != fb;
+ ctx->framebuffer = fb;
}
ALWAYS_INLINE static struct zink_resource *
-rebind_ubo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot)
+rebind_ubo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot)
{
struct zink_resource *res = update_descriptor_state_ubo(ctx, shader, slot,
ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_UBO][shader][slot]);
- zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_UBO, slot, 1);
+ if (res) {
+ res->obj->unordered_read = false;
+ res->obj->access |= VK_ACCESS_SHADER_READ_BIT;
+ }
+ ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_UBO, slot, 1);
return res;
}
ALWAYS_INLINE static struct zink_resource *
-rebind_ssbo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot)
+rebind_ssbo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot)
{
const struct pipe_shader_buffer *ssbo = &ctx->ssbos[shader][slot];
struct zink_resource *res = zink_resource(ssbo->buffer);
@@ -3254,58 +4323,86 @@ rebind_ssbo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slo
util_range_add(&res->base.b, &res->valid_buffer_range, ssbo->buffer_offset,
ssbo->buffer_offset + ssbo->buffer_size);
update_descriptor_state_ssbo(ctx, shader, slot, res);
- zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SSBO, slot, 1);
+ if (res) {
+ res->obj->unordered_read = false;
+ res->obj->access |= VK_ACCESS_SHADER_READ_BIT;
+ if (ctx->writable_ssbos[shader] & BITFIELD_BIT(slot)) {
+ res->obj->unordered_write = false;
+ res->obj->access |= VK_ACCESS_SHADER_WRITE_BIT;
+ }
+ }
+ ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SSBO, slot, 1);
return res;
}
ALWAYS_INLINE static struct zink_resource *
-rebind_tbo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot)
+rebind_tbo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot)
{
struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->sampler_views[shader][slot]);
if (!sampler_view || sampler_view->base.texture->target != PIPE_BUFFER)
return NULL;
struct zink_resource *res = zink_resource(sampler_view->base.texture);
- if (zink_batch_usage_exists(sampler_view->buffer_view->batch_uses))
- zink_batch_reference_bufferview(&ctx->batch, sampler_view->buffer_view);
- zink_buffer_view_reference(zink_screen(ctx->base.screen), &sampler_view->buffer_view, NULL);
- sampler_view->buffer_view = get_buffer_view(ctx, res, sampler_view->base.format,
- sampler_view->base.u.buf.offset, sampler_view->base.u.buf.size);
+ if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) {
+ VkBufferViewCreateInfo bvci = sampler_view->buffer_view->bvci;
+ bvci.buffer = res->obj->buffer;
+ zink_buffer_view_reference(zink_screen(ctx->base.screen), &sampler_view->buffer_view, NULL);
+ sampler_view->buffer_view = get_buffer_view(ctx, res, &bvci);
+ }
update_descriptor_state_sampler(ctx, shader, slot, res);
- zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1);
+ if (res) {
+ res->obj->unordered_read = false;
+ res->obj->access |= VK_ACCESS_SHADER_READ_BIT;
+ }
+ ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, slot, 1);
return res;
}
ALWAYS_INLINE static struct zink_resource *
-rebind_ibo(struct zink_context *ctx, enum pipe_shader_type shader, unsigned slot)
+rebind_ibo(struct zink_context *ctx, gl_shader_stage shader, unsigned slot)
{
struct zink_image_view *image_view = &ctx->image_views[shader][slot];
struct zink_resource *res = zink_resource(image_view->base.resource);
if (!res || res->base.b.target != PIPE_BUFFER)
return NULL;
- zink_descriptor_set_refs_clear(&image_view->buffer_view->desc_set_refs, image_view->buffer_view);
- if (zink_batch_usage_exists(image_view->buffer_view->batch_uses))
- zink_batch_reference_bufferview(&ctx->batch, image_view->buffer_view);
- zink_buffer_view_reference(zink_screen(ctx->base.screen), &image_view->buffer_view, NULL);
+ VkBufferViewCreateInfo bvci;
+ if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) {
+ bvci = image_view->buffer_view->bvci;
+ bvci.buffer = res->obj->buffer;
+ zink_buffer_view_reference(zink_screen(ctx->base.screen), &image_view->buffer_view, NULL);
+ }
if (!zink_resource_object_init_storage(ctx, res)) {
debug_printf("couldn't create storage image!");
return NULL;
}
- image_view->buffer_view = get_buffer_view(ctx, res, image_view->base.format,
- image_view->base.u.buf.offset, image_view->base.u.buf.size);
- assert(image_view->buffer_view);
+ if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) {
+ image_view->buffer_view = get_buffer_view(ctx, res, &bvci);
+ assert(image_view->buffer_view);
+ }
+ if (res) {
+ res->obj->unordered_read = false;
+ res->obj->access |= VK_ACCESS_SHADER_READ_BIT;
+ if (image_view->base.access & PIPE_IMAGE_ACCESS_WRITE) {
+ res->obj->unordered_write = false;
+ res->obj->access |= VK_ACCESS_SHADER_WRITE_BIT;
+ }
+ }
util_range_add(&res->base.b, &res->valid_buffer_range, image_view->base.u.buf.offset,
image_view->base.u.buf.offset + image_view->base.u.buf.size);
update_descriptor_state_image(ctx, shader, slot, res);
- zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_IMAGE, slot, 1);
+ ctx->invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_IMAGE, slot, 1);
return res;
}
static unsigned
-rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_t rebind_mask, const unsigned expected_num_rebinds)
+rebind_buffer(struct zink_context *ctx, struct zink_resource *res, uint32_t rebind_mask, const unsigned expected_num_rebinds)
{
unsigned num_rebinds = 0;
bool has_write = false;
+ if (!zink_resource_has_binds(res))
+ return 0;
+
+ assert(!res->bindless[1]); //TODO
if ((rebind_mask & BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER)) || (!rebind_mask && res->so_bind_count && ctx->num_so_targets)) {
for (unsigned i = 0; i < ctx->num_so_targets; i++) {
if (ctx->so_targets[i]) {
@@ -3316,25 +4413,29 @@ rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_
}
}
}
+ rebind_mask &= ~BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
}
- if (num_rebinds && expected_num_rebinds == num_rebinds)
+ if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask)
goto end;
if ((rebind_mask & BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER)) || (!rebind_mask && res->vbo_bind_mask)) {
u_foreach_bit(slot, res->vbo_bind_mask) {
if (ctx->vertex_buffers[slot].buffer.resource != &res->base.b) //wrong context
goto end;
- set_vertex_buffer_clamped(ctx, slot);
+ res->obj->access |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
+ res->obj->access_stage |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
+ res->obj->unordered_read = false;
num_rebinds++;
}
+ rebind_mask &= ~BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
ctx->vertex_buffers_dirty = true;
}
- if (num_rebinds && expected_num_rebinds == num_rebinds)
+ if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask)
goto end;
const uint32_t ubo_mask = rebind_mask ?
- rebind_mask & BITFIELD_RANGE(TC_BINDING_UBO_VS, PIPE_SHADER_TYPES) :
- ((res->ubo_bind_count[0] ? BITFIELD_RANGE(TC_BINDING_UBO_VS, (PIPE_SHADER_TYPES - 1)) : 0) |
+ rebind_mask & BITFIELD_RANGE(TC_BINDING_UBO_VS, MESA_SHADER_STAGES) :
+ ((res->ubo_bind_count[0] ? BITFIELD_RANGE(TC_BINDING_UBO_VS, (MESA_SHADER_STAGES - 1)) : 0) |
(res->ubo_bind_count[1] ? BITFIELD_BIT(TC_BINDING_UBO_CS) : 0));
u_foreach_bit(shader, ubo_mask >> TC_BINDING_UBO_VS) {
u_foreach_bit(slot, res->ubo_bind_mask[shader]) {
@@ -3344,12 +4445,13 @@ rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_
num_rebinds++;
}
}
- if (num_rebinds && expected_num_rebinds == num_rebinds)
+ rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_UBO_VS, MESA_SHADER_STAGES);
+ if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask)
goto end;
const unsigned ssbo_mask = rebind_mask ?
- rebind_mask & BITFIELD_RANGE(TC_BINDING_SSBO_VS, PIPE_SHADER_TYPES) :
- BITFIELD_RANGE(TC_BINDING_SSBO_VS, PIPE_SHADER_TYPES);
+ rebind_mask & BITFIELD_RANGE(TC_BINDING_SSBO_VS, MESA_SHADER_STAGES) :
+ BITFIELD_RANGE(TC_BINDING_SSBO_VS, MESA_SHADER_STAGES);
u_foreach_bit(shader, ssbo_mask >> TC_BINDING_SSBO_VS) {
u_foreach_bit(slot, res->ssbo_bind_mask[shader]) {
struct pipe_shader_buffer *ssbo = &ctx->ssbos[shader][slot];
@@ -3360,11 +4462,12 @@ rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_
num_rebinds++;
}
}
- if (num_rebinds && expected_num_rebinds == num_rebinds)
+ rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_SSBO_VS, MESA_SHADER_STAGES);
+ if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask)
goto end;
const unsigned sampler_mask = rebind_mask ?
- rebind_mask & BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, PIPE_SHADER_TYPES) :
- BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, PIPE_SHADER_TYPES);
+ rebind_mask & BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, MESA_SHADER_STAGES) :
+ BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, MESA_SHADER_STAGES);
u_foreach_bit(shader, sampler_mask >> TC_BINDING_SAMPLERVIEW_VS) {
u_foreach_bit(slot, res->sampler_binds[shader]) {
struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->sampler_views[shader][slot]);
@@ -3374,12 +4477,13 @@ rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_
num_rebinds++;
}
}
- if (num_rebinds && expected_num_rebinds == num_rebinds)
+ rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, MESA_SHADER_STAGES);
+ if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask)
goto end;
const unsigned image_mask = rebind_mask ?
- rebind_mask & BITFIELD_RANGE(TC_BINDING_IMAGE_VS, PIPE_SHADER_TYPES) :
- BITFIELD_RANGE(TC_BINDING_IMAGE_VS, PIPE_SHADER_TYPES);
+ rebind_mask & BITFIELD_RANGE(TC_BINDING_IMAGE_VS, MESA_SHADER_STAGES) :
+ BITFIELD_RANGE(TC_BINDING_IMAGE_VS, MESA_SHADER_STAGES);
unsigned num_image_rebinds_remaining = rebind_mask ? expected_num_rebinds - num_rebinds : res->image_bind_count[0] + res->image_bind_count[1];
u_foreach_bit(shader, image_mask >> TC_BINDING_IMAGE_VS) {
for (unsigned slot = 0; num_image_rebinds_remaining && slot < ctx->di.num_images[shader]; slot++) {
@@ -3395,24 +4499,372 @@ rebind_buffer(struct zink_context *ctx, struct zink_resource *res, const uint32_
}
}
end:
- zink_batch_resource_usage_set(&ctx->batch, res, has_write);
+ if (num_rebinds)
+ zink_batch_resource_usage_set(&ctx->batch, res, has_write, true);
return num_rebinds;
}
+void
+zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src,
+ unsigned dst_offset, unsigned src_offset, unsigned size)
+{
+ VkBufferCopy region;
+ region.srcOffset = src_offset;
+ region.dstOffset = dst_offset;
+ region.size = size;
+
+ struct zink_batch *batch = &ctx->batch;
+
+ struct pipe_box box;
+ u_box_3d((int)src_offset, 0, 0, (int)size, 0, 0, &box);
+ /* must barrier if something wrote the valid buffer range */
+ bool valid_write = zink_check_valid_buffer_src_access(ctx, src, src_offset, size);
+ bool unordered_src = !valid_write && !zink_check_unordered_transfer_access(src, 0, &box);
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, src, VK_ACCESS_TRANSFER_READ_BIT, 0);
+ bool unordered_dst = zink_resource_buffer_transfer_dst_barrier(ctx, dst, dst_offset, size);
+ bool can_unorder = unordered_dst && unordered_src && !ctx->no_reorder;
+ VkCommandBuffer cmdbuf = can_unorder ? ctx->batch.state->reordered_cmdbuf : zink_get_cmdbuf(ctx, src, dst);
+ ctx->batch.state->has_barriers |= can_unorder;
+ zink_batch_reference_resource_rw(batch, src, false);
+ zink_batch_reference_resource_rw(batch, dst, true);
+ if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
+ VkMemoryBarrier mb;
+ mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ mb.pNext = NULL;
+ mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
+ mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
+ VKCTX(CmdPipelineBarrier)(cmdbuf,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, 1, &mb, 0, NULL, 0, NULL);
+ }
+ bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "copy_buffer(%d)", size);
+ VKCTX(CmdCopyBuffer)(cmdbuf, src->obj->buffer, dst->obj->buffer, 1, &region);
+ zink_cmd_debug_marker_end(ctx, cmdbuf, marker);
+}
+
+void
+zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src,
+ unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz,
+ unsigned src_level, const struct pipe_box *src_box, enum pipe_map_flags map_flags)
+{
+ struct zink_resource *img = dst->base.b.target == PIPE_BUFFER ? src : dst;
+ struct zink_resource *use_img = img;
+ struct zink_resource *buf = dst->base.b.target == PIPE_BUFFER ? dst : src;
+ struct zink_batch *batch = &ctx->batch;
+ bool needs_present_readback = false;
+
+ bool buf2img = buf == src;
+ bool unsync = !!(map_flags & PIPE_MAP_UNSYNCHRONIZED);
+ if (unsync) {
+ util_queue_fence_wait(&ctx->flush_fence);
+ util_queue_fence_reset(&ctx->unsync_fence);
+ }
+
+ if (buf2img) {
+ if (zink_is_swapchain(img)) {
+ if (!zink_kopper_acquire(ctx, img, UINT64_MAX))
+ return;
+ }
+ struct pipe_box box = *src_box;
+ box.x = dstx;
+ box.y = dsty;
+ box.z = dstz;
+ zink_resource_image_transfer_dst_barrier(ctx, img, dst_level, &box, unsync);
+ if (!unsync)
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, buf, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
+ } else {
+ assert(!(map_flags & PIPE_MAP_UNSYNCHRONIZED));
+ if (zink_is_swapchain(img))
+ needs_present_readback = zink_kopper_acquire_readback(ctx, img, &use_img);
+ zink_screen(ctx->base.screen)->image_barrier(ctx, use_img, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 0, 0);
+ zink_resource_buffer_transfer_dst_barrier(ctx, buf, dstx, src_box->width);
+ }
+
+ VkBufferImageCopy region = {0};
+ region.bufferOffset = buf2img ? src_box->x : dstx;
+ region.bufferRowLength = 0;
+ region.bufferImageHeight = 0;
+ region.imageSubresource.mipLevel = buf2img ? dst_level : src_level;
+ enum pipe_texture_target img_target = img->base.b.target;
+ if (img->need_2D)
+ img_target = img_target == PIPE_TEXTURE_1D ? PIPE_TEXTURE_2D : PIPE_TEXTURE_2D_ARRAY;
+ switch (img_target) {
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_1D_ARRAY:
+ /* these use layer */
+ region.imageSubresource.baseArrayLayer = buf2img ? dstz : src_box->z;
+ region.imageSubresource.layerCount = src_box->depth;
+ region.imageOffset.z = 0;
+ region.imageExtent.depth = 1;
+ break;
+ case PIPE_TEXTURE_3D:
+ /* this uses depth */
+ region.imageSubresource.baseArrayLayer = 0;
+ region.imageSubresource.layerCount = 1;
+ region.imageOffset.z = buf2img ? dstz : src_box->z;
+ region.imageExtent.depth = src_box->depth;
+ break;
+ default:
+ /* these must only copy one layer */
+ region.imageSubresource.baseArrayLayer = 0;
+ region.imageSubresource.layerCount = 1;
+ region.imageOffset.z = 0;
+ region.imageExtent.depth = 1;
+ }
+ region.imageOffset.x = buf2img ? dstx : src_box->x;
+ region.imageOffset.y = buf2img ? dsty : src_box->y;
+
+ region.imageExtent.width = src_box->width;
+ region.imageExtent.height = src_box->height;
+
+ VkCommandBuffer cmdbuf = unsync ?
+ ctx->batch.state->unsynchronized_cmdbuf :
+ /* never promote to unordered if swapchain was acquired */
+ needs_present_readback ?
+ ctx->batch.state->cmdbuf :
+ buf2img ? zink_get_cmdbuf(ctx, buf, use_img) : zink_get_cmdbuf(ctx, use_img, buf);
+ zink_batch_reference_resource_rw(batch, use_img, buf2img);
+ zink_batch_reference_resource_rw(batch, buf, !buf2img);
+ if (unsync) {
+ ctx->batch.state->has_unsync = true;
+ use_img->obj->unsync_access = true;
+ }
+
+ /* we're using u_transfer_helper_deinterleave, which means we'll be getting PIPE_MAP_* usage
+ * to indicate whether to copy either the depth or stencil aspects
+ */
+ unsigned aspects = 0;
+ if (map_flags) {
+ assert((map_flags & (PIPE_MAP_DEPTH_ONLY | PIPE_MAP_STENCIL_ONLY)) !=
+ (PIPE_MAP_DEPTH_ONLY | PIPE_MAP_STENCIL_ONLY));
+ if (map_flags & PIPE_MAP_DEPTH_ONLY)
+ aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
+ else if (map_flags & PIPE_MAP_STENCIL_ONLY)
+ aspects = VK_IMAGE_ASPECT_STENCIL_BIT;
+ }
+ if (!aspects)
+ aspects = img->aspect;
+ if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
+ VkMemoryBarrier mb;
+ mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ mb.pNext = NULL;
+ mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
+ mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
+ VKCTX(CmdPipelineBarrier)(cmdbuf,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, 1, &mb, 0, NULL, 0, NULL);
+ }
+ while (aspects) {
+ int aspect = 1 << u_bit_scan(&aspects);
+ region.imageSubresource.aspectMask = aspect;
+
+ /* MSAA transfers should have already been handled by U_TRANSFER_HELPER_MSAA_MAP, since
+ * there's no way to resolve using this interface:
+ *
+ * srcImage must have a sample count equal to VK_SAMPLE_COUNT_1_BIT
+ * - vkCmdCopyImageToBuffer spec
+ *
+ * dstImage must have a sample count equal to VK_SAMPLE_COUNT_1_BIT
+ * - vkCmdCopyBufferToImage spec
+ */
+ assert(img->base.b.nr_samples <= 1);
+ bool marker;
+ if (buf2img) {
+ marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "copy_buffer2image(%s, %dx%dx%d)",
+ util_format_short_name(dst->base.b.format),
+ region.imageExtent.width,
+ region.imageExtent.height,
+ MAX2(region.imageSubresource.layerCount, region.imageExtent.depth));
+ VKCTX(CmdCopyBufferToImage)(cmdbuf, buf->obj->buffer, use_img->obj->image, use_img->layout, 1, &region);
+ } else {
+ marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "copy_image2buffer(%s, %dx%dx%d)",
+ util_format_short_name(src->base.b.format),
+ region.imageExtent.width,
+ region.imageExtent.height,
+ MAX2(region.imageSubresource.layerCount, region.imageExtent.depth));
+ VKCTX(CmdCopyImageToBuffer)(cmdbuf, use_img->obj->image, use_img->layout, buf->obj->buffer, 1, &region);
+ }
+ zink_cmd_debug_marker_end(ctx, cmdbuf, marker);
+ }
+ if (unsync)
+ util_queue_fence_signal(&ctx->unsync_fence);
+ if (needs_present_readback) {
+ assert(!unsync);
+ if (buf2img) {
+ img->obj->unordered_write = false;
+ buf->obj->unordered_read = false;
+ } else {
+ img->obj->unordered_read = false;
+ buf->obj->unordered_write = false;
+ }
+ zink_kopper_present_readback(ctx, img);
+ }
+
+ if (ctx->oom_flush && !ctx->batch.in_rp && !ctx->unordered_blitting)
+ flush_batch(ctx, false);
+}
+
+static void
+zink_resource_copy_region(struct pipe_context *pctx,
+ struct pipe_resource *pdst,
+ unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *psrc,
+ unsigned src_level, const struct pipe_box *src_box)
+{
+ struct zink_resource *dst = zink_resource(pdst);
+ struct zink_resource *src = zink_resource(psrc);
+ struct zink_context *ctx = zink_context(pctx);
+ if (dst->base.b.target != PIPE_BUFFER && src->base.b.target != PIPE_BUFFER) {
+ VkImageCopy region;
+ /* fill struct holes */
+ memset(&region, 0, sizeof(region));
+ if (util_format_get_num_planes(src->base.b.format) == 1 &&
+ util_format_get_num_planes(dst->base.b.format) == 1) {
+ /* If neither the calling command’s srcImage nor the calling command’s dstImage
+ * has a multi-planar image format then the aspectMask member of srcSubresource
+ * and dstSubresource must match
+ *
+ * -VkImageCopy spec
+ */
+ assert(src->aspect == dst->aspect);
+ } else
+ unreachable("planar formats not yet handled");
+
+
+ region.srcSubresource.aspectMask = src->aspect;
+ region.srcSubresource.mipLevel = src_level;
+ enum pipe_texture_target src_target = src->base.b.target;
+ if (src->need_2D)
+ src_target = src_target == PIPE_TEXTURE_1D ? PIPE_TEXTURE_2D : PIPE_TEXTURE_2D_ARRAY;
+ switch (src_target) {
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_1D_ARRAY:
+ /* these use layer */
+ region.srcSubresource.baseArrayLayer = src_box->z;
+ region.srcSubresource.layerCount = src_box->depth;
+ region.srcOffset.z = 0;
+ region.extent.depth = 1;
+ break;
+ case PIPE_TEXTURE_3D:
+ /* this uses depth */
+ region.srcSubresource.baseArrayLayer = 0;
+ region.srcSubresource.layerCount = 1;
+ region.srcOffset.z = src_box->z;
+ region.extent.depth = src_box->depth;
+ break;
+ default:
+ /* these must only copy one layer */
+ region.srcSubresource.baseArrayLayer = 0;
+ region.srcSubresource.layerCount = 1;
+ region.srcOffset.z = 0;
+ region.extent.depth = 1;
+ }
+
+ region.srcOffset.x = src_box->x;
+ region.srcOffset.y = src_box->y;
+
+ region.dstSubresource.aspectMask = dst->aspect;
+ region.dstSubresource.mipLevel = dst_level;
+ enum pipe_texture_target dst_target = dst->base.b.target;
+ if (dst->need_2D)
+ dst_target = dst_target == PIPE_TEXTURE_1D ? PIPE_TEXTURE_2D : PIPE_TEXTURE_2D_ARRAY;
+ switch (dst_target) {
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_1D_ARRAY:
+ /* these use layer */
+ region.dstSubresource.baseArrayLayer = dstz;
+ region.dstSubresource.layerCount = src_box->depth;
+ region.dstOffset.z = 0;
+ break;
+ case PIPE_TEXTURE_3D:
+ /* this uses depth */
+ region.dstSubresource.baseArrayLayer = 0;
+ region.dstSubresource.layerCount = 1;
+ region.dstOffset.z = dstz;
+ break;
+ default:
+ /* these must only copy one layer */
+ region.dstSubresource.baseArrayLayer = 0;
+ region.dstSubresource.layerCount = 1;
+ region.dstOffset.z = 0;
+ }
+
+ region.dstOffset.x = dstx;
+ region.dstOffset.y = dsty;
+ region.extent.width = src_box->width;
+ region.extent.height = src_box->height;
+
+ /* ignore no-op copies */
+ if (src == dst &&
+ !memcmp(&region.dstOffset, &region.srcOffset, sizeof(region.srcOffset)) &&
+ !memcmp(&region.dstSubresource, &region.srcSubresource, sizeof(region.srcSubresource)))
+ return;
+
+ zink_fb_clears_apply_or_discard(ctx, pdst, (struct u_rect){dstx, dstx + src_box->width, dsty, dsty + src_box->height}, false);
+ zink_fb_clears_apply_region(ctx, psrc, zink_rect_from_box(src_box));
+
+ struct zink_batch *batch = &ctx->batch;
+ zink_resource_setup_transfer_layouts(ctx, src, dst);
+ VkCommandBuffer cmdbuf = zink_get_cmdbuf(ctx, src, dst);
+ zink_batch_reference_resource_rw(batch, src, false);
+ zink_batch_reference_resource_rw(batch, dst, true);
+
+ if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
+ VkMemoryBarrier mb;
+ mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ mb.pNext = NULL;
+ mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
+ mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
+ VKCTX(CmdPipelineBarrier)(cmdbuf,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, 1, &mb, 0, NULL, 0, NULL);
+ }
+ bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "copy_image(%s->%s, %dx%dx%d)",
+ util_format_short_name(psrc->format),
+ util_format_short_name(pdst->format),
+ region.extent.width,
+ region.extent.height,
+ MAX2(region.srcSubresource.layerCount, region.extent.depth));
+ VKCTX(CmdCopyImage)(cmdbuf, src->obj->image, src->layout,
+ dst->obj->image, dst->layout,
+ 1, &region);
+ zink_cmd_debug_marker_end(ctx, cmdbuf, marker);
+ } else if (dst->base.b.target == PIPE_BUFFER &&
+ src->base.b.target == PIPE_BUFFER) {
+ zink_copy_buffer(ctx, dst, src, dstx, src_box->x, src_box->width);
+ } else
+ zink_copy_image_buffer(ctx, dst, src, dst_level, dstx, dsty, dstz, src_level, src_box, 0);
+ if (ctx->oom_flush && !ctx->batch.in_rp && !ctx->unordered_blitting)
+ flush_batch(ctx, false);
+}
+
static bool
zink_resource_commit(struct pipe_context *pctx, struct pipe_resource *pres, unsigned level, struct pipe_box *box, bool commit)
{
struct zink_context *ctx = zink_context(pctx);
struct zink_resource *res = zink_resource(pres);
- struct zink_screen *screen = zink_screen(pctx->screen);
/* if any current usage exists, flush the queue */
if (zink_resource_has_unflushed_usage(res))
zink_flush_queue(ctx);
- bool ret = zink_bo_commit(screen, res, box->x, box->width, commit);
- if (!ret)
+ VkSemaphore sem = VK_NULL_HANDLE;
+ bool ret = zink_bo_commit(ctx, res, level, box, commit, &sem);
+ if (ret) {
+ if (sem)
+ zink_batch_add_wait_semaphore(&ctx->batch, sem);
+ } else {
check_device_lost(ctx);
+ }
return ret;
}
@@ -3420,43 +4872,44 @@ zink_resource_commit(struct pipe_context *pctx, struct pipe_resource *pres, unsi
static void
rebind_image(struct zink_context *ctx, struct zink_resource *res)
{
- zink_rebind_framebuffer(ctx, res);
- if (!zink_resource_has_binds(res))
- return;
- for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
- if (res->sampler_binds[i]) {
- for (unsigned j = 0; j < ctx->di.num_sampler_views[i]; j++) {
- struct zink_sampler_view *sv = zink_sampler_view(ctx->sampler_views[i][j]);
- if (sv && sv->base.texture == &res->base.b) {
- struct pipe_surface *psurf = &sv->image_view->base;
- zink_rebind_surface(ctx, &psurf);
- sv->image_view = zink_surface(psurf);
- zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, j, 1);
- update_descriptor_state_sampler(ctx, i, j, res);
- }
- }
- }
- if (!res->image_bind_count[i == PIPE_SHADER_COMPUTE])
- continue;
- for (unsigned j = 0; j < ctx->di.num_images[i]; j++) {
- if (zink_resource(ctx->image_views[i][j].base.resource) == res) {
- zink_screen(ctx->base.screen)->context_invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_IMAGE, j, 1);
- update_descriptor_state_sampler(ctx, i, j, res);
- _mesa_set_add(ctx->need_barriers[i == PIPE_SHADER_COMPUTE], res);
- }
- }
- }
+ assert(!ctx->blitting);
+ if (res->fb_binds)
+ zink_rebind_framebuffer(ctx, res);
+ if (!zink_resource_has_binds(res))
+ return;
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (res->sampler_binds[i]) {
+ for (unsigned j = 0; j < ctx->di.num_sampler_views[i]; j++) {
+ struct zink_sampler_view *sv = zink_sampler_view(ctx->sampler_views[i][j]);
+ if (sv && sv->base.texture == &res->base.b) {
+ struct pipe_surface *psurf = &sv->image_view->base;
+ zink_rebind_surface(ctx, &psurf);
+ sv->image_view = zink_surface(psurf);
+ ctx->invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, j, 1);
+ update_descriptor_state_sampler(ctx, i, j, res);
+ }
+ }
+ }
+ if (!res->image_bind_count[i == MESA_SHADER_COMPUTE])
+ continue;
+ for (unsigned j = 0; j < ctx->di.num_images[i]; j++) {
+ if (zink_resource(ctx->image_views[i][j].base.resource) == res) {
+ ctx->invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_IMAGE, j, 1);
+ update_descriptor_state_image(ctx, i, j, res);
+ _mesa_set_add(ctx->need_barriers[i == MESA_SHADER_COMPUTE], res);
+ }
+ }
+ }
}
bool
zink_resource_rebind(struct zink_context *ctx, struct zink_resource *res)
{
- /* force counter buffer reset */
- res->so_valid = false;
- if (!zink_resource_has_binds(res))
- return true;
- if (res->base.b.target == PIPE_BUFFER)
+ if (res->base.b.target == PIPE_BUFFER) {
+ /* force counter buffer reset */
+ res->so_valid = false;
return rebind_buffer(ctx, res, 0, 0) == res->bind_count[0] + res->bind_count[1];
+ }
rebind_image(ctx, res);
return false;
}
@@ -3465,33 +4918,66 @@ void
zink_rebind_all_buffers(struct zink_context *ctx)
{
struct zink_batch *batch = &ctx->batch;
- u_foreach_bit(slot, ctx->gfx_pipeline_state.vertex_buffers_enabled_mask)
- set_vertex_buffer_clamped(ctx, slot);
ctx->vertex_buffers_dirty = ctx->gfx_pipeline_state.vertex_buffers_enabled_mask > 0;
ctx->dirty_so_targets = ctx->num_so_targets > 0;
if (ctx->num_so_targets)
- zink_resource_buffer_barrier(ctx, zink_resource(ctx->dummy_xfb_buffer),
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, zink_resource(ctx->dummy_xfb_buffer),
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
- for (unsigned shader = PIPE_SHADER_VERTEX; shader < PIPE_SHADER_TYPES; shader++) {
+ for (unsigned shader = MESA_SHADER_VERTEX; shader < MESA_SHADER_STAGES; shader++) {
for (unsigned slot = 0; slot < ctx->di.num_ubos[shader]; slot++) {
struct zink_resource *res = rebind_ubo(ctx, shader, slot);
if (res)
- zink_batch_resource_usage_set(batch, res, false);
+ zink_batch_resource_usage_set(batch, res, false, true);
}
for (unsigned slot = 0; slot < ctx->di.num_sampler_views[shader]; slot++) {
struct zink_resource *res = rebind_tbo(ctx, shader, slot);
if (res)
- zink_batch_resource_usage_set(batch, res, false);
+ zink_batch_resource_usage_set(batch, res, false, true);
}
for (unsigned slot = 0; slot < ctx->di.num_ssbos[shader]; slot++) {
struct zink_resource *res = rebind_ssbo(ctx, shader, slot);
if (res)
- zink_batch_resource_usage_set(batch, res, (ctx->writable_ssbos[shader] & BITFIELD64_BIT(slot)) != 0);
+ zink_batch_resource_usage_set(batch, res, (ctx->writable_ssbos[shader] & BITFIELD64_BIT(slot)) != 0, true);
}
for (unsigned slot = 0; slot < ctx->di.num_images[shader]; slot++) {
struct zink_resource *res = rebind_ibo(ctx, shader, slot);
if (res)
- zink_batch_resource_usage_set(batch, res, (ctx->image_views[shader][slot].base.access & PIPE_IMAGE_ACCESS_WRITE) != 0);
+ zink_batch_resource_usage_set(batch, res, (ctx->image_views[shader][slot].base.access & PIPE_IMAGE_ACCESS_WRITE) != 0, true);
+ }
+ }
+}
+
+void
+zink_rebind_all_images(struct zink_context *ctx)
+{
+ assert(!ctx->blitting);
+ rebind_fb_state(ctx, NULL, false);
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ for (unsigned j = 0; j < ctx->di.num_sampler_views[i]; j++) {
+ struct zink_sampler_view *sv = zink_sampler_view(ctx->sampler_views[i][j]);
+ if (!sv || !sv->image_view || sv->image_view->base.texture->target == PIPE_BUFFER)
+ continue;
+ struct zink_resource *res = zink_resource(sv->image_view->base.texture);
+ if (res->obj != sv->image_view->obj) {
+ struct pipe_surface *psurf = &sv->image_view->base;
+ zink_rebind_surface(ctx, &psurf);
+ sv->image_view = zink_surface(psurf);
+ ctx->invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, j, 1);
+ update_descriptor_state_sampler(ctx, i, j, res);
+ }
+ }
+ for (unsigned j = 0; j < ctx->di.num_images[i]; j++) {
+ struct zink_image_view *image_view = &ctx->image_views[i][j];
+ struct zink_resource *res = zink_resource(image_view->base.resource);
+ if (!res || res->base.b.target == PIPE_BUFFER)
+ continue;
+ if (ctx->image_views[i][j].surface->obj != res->obj) {
+ zink_surface_reference(zink_screen(ctx->base.screen), &image_view->surface, NULL);
+ image_view->surface = create_image_surface(ctx, &image_view->base, i == MESA_SHADER_COMPUTE);
+ ctx->invalidate_descriptor_state(ctx, i, ZINK_DESCRIPTOR_TYPE_IMAGE, j, 1);
+ update_descriptor_state_image(ctx, i, j, res);
+ _mesa_set_add(ctx->need_barriers[i == MESA_SHADER_COMPUTE], res);
+ }
}
}
}
@@ -3510,16 +4996,19 @@ zink_context_replace_buffer_storage(struct pipe_context *pctx, struct pipe_resou
assert(d->obj);
assert(s->obj);
util_idalloc_mt_free(&screen->buffer_ids, delete_buffer_id);
- zink_resource_object_reference(screen, NULL, s->obj);
- if (zink_resource_has_unflushed_usage(d) ||
- (zink_resource_has_usage(d) && zink_resource_has_binds(d)))
- zink_batch_reference_resource_move(&ctx->batch, d);
- else
- zink_resource_object_reference(screen, &d->obj, NULL);
- d->obj = s->obj;
+ zink_batch_reference_resource(&ctx->batch, d);
+ /* don't be too creative */
+ zink_resource_object_reference(screen, &d->obj, s->obj);
+ d->valid_buffer_range = s->valid_buffer_range;
+ zink_resource_copies_reset(d);
/* force counter buffer reset */
d->so_valid = false;
- if (num_rebinds && rebind_buffer(ctx, d, rebind_mask, num_rebinds) != num_rebinds)
+ /* FIXME: tc buffer sharedness tracking */
+ if (!num_rebinds) {
+ num_rebinds = d->bind_count[0] + d->bind_count[1];
+ rebind_mask = 0;
+ }
+ if (num_rebinds && rebind_buffer(ctx, d, rebind_mask, num_rebinds) < num_rebinds)
ctx->buffer_rebind_counter = p_atomic_inc_return(&screen->buffer_rebind_counter);
}
@@ -3529,6 +5018,8 @@ zink_context_is_resource_busy(struct pipe_screen *pscreen, struct pipe_resource
struct zink_screen *screen = zink_screen(pscreen);
struct zink_resource *res = zink_resource(pres);
uint32_t check_usage = 0;
+ if (usage & PIPE_MAP_UNSYNCHRONIZED && (!res->obj->unsync_access || zink_is_swapchain(res)))
+ return true;
if (usage & PIPE_MAP_READ)
check_usage |= ZINK_RESOURCE_ACCESS_WRITE;
if (usage & PIPE_MAP_WRITE)
@@ -3561,20 +5052,291 @@ zink_emit_string_marker(struct pipe_context *pctx,
free(temp);
}
+VkIndirectCommandsLayoutTokenNV *
+zink_dgc_add_token(struct zink_context *ctx, VkIndirectCommandsTokenTypeNV type, void **mem)
+{
+ size_t size = 0;
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ VkIndirectCommandsLayoutTokenNV *ret = util_dynarray_grow(&ctx->dgc.tokens, VkIndirectCommandsLayoutTokenNV, 1);
+ ret->sType = VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_TOKEN_NV;
+ ret->pNext = NULL;
+ ret->tokenType = type;
+ ret->vertexDynamicStride = ctx->gfx_pipeline_state.uses_dynamic_stride;
+ ret->indirectStateFlags = 0;
+ ret->indexTypeCount = 0;
+ switch (type) {
+ case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV:
+ ret->stream = ZINK_DGC_VBO;
+ size = sizeof(VkBindVertexBufferIndirectCommandNV);
+ break;
+ case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV:
+ ret->stream = ZINK_DGC_IB;
+ size = sizeof(VkBindIndexBufferIndirectCommandNV);
+ break;
+ case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV:
+ ret->stream = ZINK_DGC_PSO;
+ size = sizeof(VkBindShaderGroupIndirectCommandNV);
+ break;
+ case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV:
+ ret->stream = ZINK_DGC_PUSH;
+ ret->pushconstantPipelineLayout = ctx->dgc.last_prog->base.layout;
+ ret->pushconstantShaderStageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
+ size = sizeof(float) * 6; //size for full tess level upload every time
+ break;
+ case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV:
+ ret->stream = ZINK_DGC_DRAW;
+ size = sizeof(VkDrawIndirectCommand);
+ break;
+ case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV:
+ ret->stream = ZINK_DGC_DRAW;
+ size = sizeof(VkDrawIndexedIndirectCommand);
+ break;
+ default:
+ unreachable("ack");
+ }
+ struct zink_resource *old = NULL;
+ unsigned stream_count = screen->info.nv_dgc_props.maxIndirectCommandsStreamCount >= ZINK_DGC_MAX ? ZINK_DGC_MAX : 1;
+ if (stream_count == 1)
+ ret->stream = 0;
+ unsigned stream = ret->stream;
+ bool max_exceeded = !ctx->dgc.max_size[stream];
+ ret->offset = ctx->dgc.cur_offsets[stream];
+ if (ctx->dgc.buffers[stream]) {
+ /* detect end of buffer */
+ if (ctx->dgc.bind_offsets[stream] + ctx->dgc.cur_offsets[stream] + size > ctx->dgc.buffers[stream]->base.b.width0) {
+ old = ctx->dgc.buffers[stream];
+ ctx->dgc.buffers[stream] = NULL;
+ max_exceeded = true;
+ }
+ }
+ if (!ctx->dgc.buffers[stream]) {
+ if (max_exceeded)
+ ctx->dgc.max_size[stream] += size * 5;
+ uint8_t *ptr;
+ unsigned offset;
+ u_upload_alloc(ctx->dgc.upload[stream], 0, ctx->dgc.max_size[stream],
+ screen->info.props.limits.minMemoryMapAlignment, &offset,
+ (struct pipe_resource **)&ctx->dgc.buffers[stream], (void **)&ptr);
+ size_t cur_size = old ? (ctx->dgc.cur_offsets[stream] - ctx->dgc.bind_offsets[stream]) : 0;
+ if (old) {
+ struct pipe_resource *pold = &old->base.b;
+ /* copy and delete old buffer */
+ zink_batch_reference_resource_rw(&ctx->batch, old, true);
+ memcpy(ptr + offset, ctx->dgc.maps[stream] + ctx->dgc.bind_offsets[stream], cur_size);
+ pipe_resource_reference(&pold, NULL);
+ }
+ ctx->dgc.maps[stream] = ptr;
+ ctx->dgc.bind_offsets[stream] = offset;
+ ctx->dgc.cur_offsets[stream] = cur_size;
+ }
+ *mem = ctx->dgc.maps[stream] + ctx->dgc.cur_offsets[stream];
+ ctx->dgc.cur_offsets[stream] += size;
+ return ret;
+}
+
+void
+zink_flush_dgc(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_batch_state *bs = ctx->batch.state;
+ if (!ctx->dgc.valid)
+ return;
+
+ /* tokens should be created as they are used */
+ unsigned num_cmds = util_dynarray_num_elements(&ctx->dgc.tokens, VkIndirectCommandsLayoutTokenNV);
+ assert(num_cmds);
+ VkIndirectCommandsLayoutTokenNV *cmds = ctx->dgc.tokens.data;
+ uint32_t strides[ZINK_DGC_MAX] = {0};
+
+ unsigned stream_count = screen->info.nv_dgc_props.maxIndirectCommandsStreamCount >= ZINK_DGC_MAX ? ZINK_DGC_MAX : 1;
+ VkIndirectCommandsStreamNV streams[ZINK_DGC_MAX];
+ for (unsigned i = 0; i < stream_count; i++) {
+ if (ctx->dgc.buffers[i]) {
+ streams[i].buffer = ctx->dgc.buffers[i]->obj->buffer;
+ streams[i].offset = ctx->dgc.bind_offsets[i];
+ } else {
+ streams[i].buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
+ streams[i].offset = 0;
+ }
+ }
+ /* this is a stupid pipeline that will never actually be used as anything but a container */
+ VkPipeline pipeline = VK_NULL_HANDLE;
+ if (screen->info.nv_dgc_props.maxGraphicsShaderGroupCount == 1) {
+ /* RADV doesn't support shader pipeline binds, so use this hacky path */
+ pipeline = ctx->gfx_pipeline_state.pipeline;
+ } else {
+ VkPrimitiveTopology vkmode = zink_primitive_topology(ctx->gfx_pipeline_state.gfx_prim_mode);
+ pipeline = zink_create_gfx_pipeline(screen, ctx->dgc.last_prog, ctx->dgc.last_prog->objs, &ctx->gfx_pipeline_state, ctx->gfx_pipeline_state.element_state->binding_map, vkmode, false, &ctx->dgc.pipelines);
+ assert(pipeline);
+ util_dynarray_append(&bs->dgc.pipelines, VkPipeline, pipeline);
+ VKCTX(CmdBindPipelineShaderGroupNV)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline, 0);
+ }
+ unsigned remaining = num_cmds;
+ for (unsigned i = 0; i < num_cmds; i += screen->info.nv_dgc_props.maxIndirectCommandsTokenCount, remaining -= screen->info.nv_dgc_props.maxIndirectCommandsTokenCount) {
+ VkIndirectCommandsLayoutCreateInfoNV lci = {
+ VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NV,
+ NULL,
+ 0,
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ MIN2(remaining, screen->info.nv_dgc_props.maxIndirectCommandsTokenCount),
+ cmds + i,
+ stream_count,
+ strides
+ };
+ VkIndirectCommandsLayoutNV iclayout;
+ ASSERTED VkResult res = VKSCR(CreateIndirectCommandsLayoutNV)(screen->dev, &lci, NULL, &iclayout);
+ assert(res == VK_SUCCESS);
+ util_dynarray_append(&bs->dgc.layouts, VkIndirectCommandsLayoutNV, iclayout);
+
+ /* a lot of hacks to set up a preprocess buffer */
+ VkGeneratedCommandsMemoryRequirementsInfoNV info = {
+ VK_STRUCTURE_TYPE_GENERATED_COMMANDS_MEMORY_REQUIREMENTS_INFO_NV,
+ NULL,
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline,
+ iclayout,
+ 1
+ };
+ VkMemoryRequirements2 reqs = {
+ VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2
+ };
+ VKSCR(GetGeneratedCommandsMemoryRequirementsNV)(screen->dev, &info, &reqs);
+ struct pipe_resource templ = {0};
+ templ.target = PIPE_BUFFER;
+ templ.format = PIPE_FORMAT_R8_UNORM;
+ templ.bind = 0;
+ templ.usage = PIPE_USAGE_IMMUTABLE;
+ templ.flags = 0;
+ templ.width0 = reqs.memoryRequirements.size;
+ templ.height0 = 1;
+ templ.depth0 = 1;
+ templ.array_size = 1;
+ uint64_t params[] = {reqs.memoryRequirements.size, reqs.memoryRequirements.alignment, reqs.memoryRequirements.memoryTypeBits};
+ struct pipe_resource *pres = screen->base.resource_create_with_modifiers(&screen->base, &templ, params, 3);
+ assert(pres);
+ zink_batch_reference_resource_rw(&ctx->batch, zink_resource(pres), true);
+
+ VkGeneratedCommandsInfoNV gen = {
+ VK_STRUCTURE_TYPE_GENERATED_COMMANDS_INFO_NV,
+ NULL,
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline,
+ iclayout,
+ stream_count,
+ streams,
+ 1,
+ zink_resource(pres)->obj->buffer,
+ 0,
+ pres->width0,
+ VK_NULL_HANDLE,
+ 0,
+ VK_NULL_HANDLE,
+ 0
+ };
+ VKCTX(CmdExecuteGeneratedCommandsNV)(ctx->batch.state->cmdbuf, VK_FALSE, &gen);
+
+ pipe_resource_reference(&pres, NULL);
+ }
+ util_dynarray_clear(&ctx->dgc.pipelines);
+ util_dynarray_clear(&ctx->dgc.tokens);
+ ctx->dgc.valid = false;
+ ctx->pipeline_changed[0] = true;
+ zink_select_draw_vbo(ctx);
+}
+
+struct pipe_surface *
+zink_get_dummy_pipe_surface(struct zink_context *ctx, int samples_index)
+{
+ unsigned size = calc_max_dummy_fbo_size(ctx);
+ bool needs_null_init = false;
+ if (ctx->dummy_surface[samples_index]) {
+ /* delete old surface if ETOOSMALL */
+ struct zink_resource *res = zink_resource(ctx->dummy_surface[samples_index]->texture);
+ if (res->base.b.width0 > size || res->base.b.height0 > size) {
+ pipe_surface_release(&ctx->base, &ctx->dummy_surface[samples_index]);
+ needs_null_init = !samples_index && ctx->di.null_fbfetch_init;
+ if (!samples_index)
+ ctx->di.null_fbfetch_init = false;
+ }
+ }
+ if (!ctx->dummy_surface[samples_index]) {
+ ctx->dummy_surface[samples_index] = zink_surface_create_null(ctx, PIPE_TEXTURE_2D, size, size, BITFIELD_BIT(samples_index));
+ assert(ctx->dummy_surface[samples_index]);
+ /* This is possibly used with imageLoad which according to GL spec must return 0 */
+ if (!samples_index) {
+ union pipe_color_union color = {0};
+ struct pipe_box box;
+ u_box_2d(0, 0, size, size, &box);
+ ctx->base.clear_texture(&ctx->base, ctx->dummy_surface[samples_index]->texture, 0, &box, &color);
+ }
+ }
+ if (needs_null_init)
+ init_null_fbfetch(ctx);
+ return ctx->dummy_surface[samples_index];
+}
+
+struct zink_surface *
+zink_get_dummy_surface(struct zink_context *ctx, int samples_index)
+{
+ return zink_csurface(zink_get_dummy_pipe_surface(ctx, samples_index));
+
+}
+
+static void
+zink_tc_parse_dsa(void *state, struct tc_renderpass_info *info)
+{
+ struct zink_depth_stencil_alpha_state *cso = state;
+ info->zsbuf_write_dsa |= (cso->hw_state.depth_write || cso->hw_state.stencil_test);
+ info->zsbuf_read_dsa |= (cso->hw_state.depth_test || cso->hw_state.stencil_test);
+ /* TODO: if zsbuf fbfetch is ever supported */
+}
+
+static void
+zink_tc_parse_fs(void *state, struct tc_renderpass_info *info)
+{
+ struct zink_shader *zs = state;
+ info->zsbuf_write_fs |= zs->info.outputs_written & (BITFIELD64_BIT(FRAG_RESULT_DEPTH) | BITFIELD64_BIT(FRAG_RESULT_STENCIL));
+ /* TODO: if >1 fbfetch attachment is ever supported */
+ info->cbuf_fbfetch |= zs->info.fs.uses_fbfetch_output ? BITFIELD_BIT(0) : 0;
+}
+
+void
+zink_parse_tc_info(struct zink_context *ctx)
+{
+ struct tc_renderpass_info *info = &ctx->dynamic_fb.tc_info;
+ /* reset cso info first */
+ info->data16[2] = 0;
+ if (ctx->gfx_stages[MESA_SHADER_FRAGMENT])
+ zink_tc_parse_fs(ctx->gfx_stages[MESA_SHADER_FRAGMENT], info);
+ if (ctx->dsa_state)
+ zink_tc_parse_dsa(ctx->dsa_state, info);
+ if (ctx->zsbuf_unused == zink_is_zsbuf_used(ctx))
+ ctx->rp_layout_changed = true;
+}
+
struct pipe_context *
zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
{
struct zink_screen *screen = zink_screen(pscreen);
struct zink_context *ctx = rzalloc(NULL, struct zink_context);
+ bool is_copy_only = (flags & ZINK_CONTEXT_COPY_ONLY) > 0;
+ bool is_compute_only = (flags & PIPE_CONTEXT_COMPUTE_ONLY) > 0;
+ bool is_robust = (flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS) > 0;
if (!ctx)
goto fail;
- ctx->have_timelines = screen->info.have_KHR_timeline_semaphore;
+ ctx->flags = flags;
ctx->pipeline_changed[0] = ctx->pipeline_changed[1] = true;
ctx->gfx_pipeline_state.dirty = true;
+ ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch = 1;
+ ctx->gfx_pipeline_state.uses_dynamic_stride = screen->info.have_EXT_extended_dynamic_state ||
+ screen->info.have_EXT_vertex_input_dynamic_state;
ctx->compute_pipeline_state.dirty = true;
ctx->fb_changed = ctx->rp_changed = true;
- ctx->gfx_pipeline_state.gfx_prim_mode = PIPE_PRIM_MAX;
+ ctx->sample_mask_changed = true;
+ ctx->gfx_pipeline_state.gfx_prim_mode = MESA_PRIM_COUNT;
+ ctx->gfx_pipeline_state.shader_rast_prim = MESA_PRIM_COUNT;
+ ctx->gfx_pipeline_state.rast_prim = MESA_PRIM_COUNT;
zink_init_draw_functions(ctx, screen);
zink_init_grid_functions(ctx);
@@ -3582,22 +5344,15 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
ctx->base.screen = pscreen;
ctx->base.priv = priv;
- if (screen->info.have_KHR_imageless_framebuffer) {
- ctx->get_framebuffer = zink_get_framebuffer_imageless;
- ctx->init_framebuffer = zink_init_framebuffer_imageless;
- } else {
- ctx->get_framebuffer = zink_get_framebuffer;
- ctx->init_framebuffer = zink_init_framebuffer;
- }
-
ctx->base.destroy = zink_context_destroy;
+ ctx->base.set_debug_callback = zink_set_debug_callback;
ctx->base.get_device_reset_status = zink_get_device_reset_status;
ctx->base.set_device_reset_callback = zink_set_device_reset_callback;
zink_context_state_init(&ctx->base);
ctx->base.create_sampler_state = zink_create_sampler_state;
- ctx->base.bind_sampler_states = zink_bind_sampler_states;
+ ctx->base.bind_sampler_states = screen->info.have_EXT_non_seamless_cube_map ? zink_bind_sampler_states : zink_bind_sampler_states_nonseamless;
ctx->base.delete_sampler_state = zink_delete_sampler_state;
ctx->base.create_sampler_view = zink_create_sampler_view;
@@ -3623,15 +5378,20 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
ctx->base.set_tess_state = zink_set_tess_state;
ctx->base.set_patch_vertices = zink_set_patch_vertices;
+ ctx->base.set_min_samples = zink_set_min_samples;
+ ctx->gfx_pipeline_state.min_samples = 0;
ctx->base.set_sample_mask = zink_set_sample_mask;
+ ctx->gfx_pipeline_state.sample_mask = UINT32_MAX;
ctx->base.clear = zink_clear;
- ctx->base.clear_texture = zink_clear_texture;
+ ctx->base.clear_texture = screen->info.have_KHR_dynamic_rendering ? zink_clear_texture_dynamic : zink_clear_texture;
ctx->base.clear_buffer = zink_clear_buffer;
ctx->base.clear_render_target = zink_clear_render_target;
ctx->base.clear_depth_stencil = zink_clear_depth_stencil;
+ ctx->base.create_fence_fd = zink_create_fence_fd;
ctx->base.fence_server_sync = zink_fence_server_sync;
+ ctx->base.fence_server_signal = zink_fence_server_signal;
ctx->base.flush = zink_flush;
ctx->base.memory_barrier = zink_memory_barrier;
ctx->base.texture_barrier = zink_texture_barrier;
@@ -3645,6 +5405,8 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
ctx->base.set_stream_output_targets = zink_set_stream_output_targets;
ctx->base.flush_resource = zink_flush_resource;
+ if (screen->info.have_KHR_buffer_device_address)
+ ctx->base.set_global_binding = zink_set_global_binding;
ctx->base.emit_string_marker = zink_emit_string_marker;
@@ -3652,6 +5414,10 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
zink_context_resource_init(&ctx->base);
zink_context_query_init(&ctx->base);
+ util_queue_fence_init(&ctx->flush_fence);
+ util_queue_fence_init(&ctx->unsync_fence);
+
+ list_inithead(&ctx->query_pools);
_mesa_set_init(&ctx->update_barriers[0][0], ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
_mesa_set_init(&ctx->update_barriers[1][0], ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
_mesa_set_init(&ctx->update_barriers[0][1], ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
@@ -3659,12 +5425,6 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
ctx->need_barriers[0] = &ctx->update_barriers[0][0];
ctx->need_barriers[1] = &ctx->update_barriers[1][0];
- util_dynarray_init(&ctx->free_batch_states, ctx);
- _mesa_hash_table_init(&ctx->batch_states, ctx, NULL, _mesa_key_pointer_equal);
-
- ctx->gfx_pipeline_state.have_EXT_extended_dynamic_state = screen->info.have_EXT_extended_dynamic_state;
- ctx->gfx_pipeline_state.have_EXT_extended_dynamic_state2 = screen->info.have_EXT_extended_dynamic_state2;
-
slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
slab_create_child(&ctx->transfer_pool_unsync, &screen->transfer_pool);
@@ -3673,89 +5433,228 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
for (int i = 0; i < ARRAY_SIZE(ctx->fb_clears); i++)
util_dynarray_init(&ctx->fb_clears[i].clears, ctx);
- ctx->blitter = util_blitter_create(&ctx->base);
- if (!ctx->blitter)
- goto fail;
+ if (zink_debug & ZINK_DEBUG_DGC) {
+ util_dynarray_init(&ctx->dgc.pipelines, ctx);
+ util_dynarray_init(&ctx->dgc.tokens, ctx);
+ for (unsigned i = 0; i < ARRAY_SIZE(ctx->dgc.upload); i++)
+ ctx->dgc.upload[i] = u_upload_create_default(&ctx->base);
+ }
+
+ if (!is_copy_only) {
+ ctx->blitter = util_blitter_create(&ctx->base);
+ if (!ctx->blitter)
+ goto fail;
+ }
+ zink_set_last_vertex_key(ctx)->last_vertex_stage = true;
ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base.last_vertex_stage = true;
- ctx->last_vertex_stage_dirty = true;
- ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_VERTEX].size = sizeof(struct zink_vs_key_base);
- ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_TESS_EVAL].size = sizeof(struct zink_vs_key_base);
- ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_GEOMETRY].size = sizeof(struct zink_vs_key_base);
- ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_FRAGMENT].size = sizeof(struct zink_fs_key);
- _mesa_hash_table_init(&ctx->compute_program_cache, ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ zink_set_tcs_key_patches(ctx, 1);
+ if (!screen->optimal_keys) {
+ ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].size = sizeof(struct zink_vs_key_base);
+ ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_TESS_EVAL].size = sizeof(struct zink_vs_key_base);
+ ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_TESS_CTRL].size = sizeof(struct zink_tcs_key);
+ ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_GEOMETRY].size = sizeof(struct zink_gs_key);
+ ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].size = sizeof(struct zink_fs_key);
+
+ /* this condition must be updated if new fields are added to zink_cs_key */
+ if (screen->driver_workarounds.lower_robustImageAccess2)
+ ctx->compute_pipeline_state.key.size = sizeof(struct zink_cs_key);
+
+ if (is_robust && screen->driver_workarounds.lower_robustImageAccess2) {
+ ctx->compute_pipeline_state.key.key.cs.robust_access = true;
+ for (gl_shader_stage pstage = MESA_SHADER_VERTEX; pstage < MESA_SHADER_FRAGMENT; pstage++)
+ ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base.robust_access = true;
+ ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.robust_access = true;
+ }
+ }
_mesa_hash_table_init(&ctx->framebuffer_cache, ctx, hash_framebuffer_imageless, equals_framebuffer_imageless);
- _mesa_set_init(&ctx->render_pass_state_cache, ctx, hash_rp_state, equals_rp_state);
- ctx->render_pass_cache = _mesa_hash_table_create(NULL,
- hash_render_pass_state,
- equals_render_pass_state);
- if (!ctx->render_pass_cache)
+ if (!zink_init_render_pass(ctx))
goto fail;
-
- const uint8_t data[] = {0};
- ctx->dummy_vertex_buffer = pipe_buffer_create(&screen->base,
- PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_SHADER_IMAGE, PIPE_USAGE_IMMUTABLE, sizeof(data));
- if (!ctx->dummy_vertex_buffer)
- goto fail;
- ctx->dummy_xfb_buffer = pipe_buffer_create(&screen->base,
- PIPE_BIND_STREAM_OUTPUT, PIPE_USAGE_DEFAULT, sizeof(data));
- if (!ctx->dummy_xfb_buffer)
- goto fail;
- for (unsigned i = 0; i < ARRAY_SIZE(ctx->dummy_surface); i++) {
- if (!(screen->info.props.limits.framebufferDepthSampleCounts & BITFIELD_BIT(i)))
- continue;
- ctx->dummy_surface[i] = zink_surface_create_null(ctx, PIPE_TEXTURE_2D, 1024, 1024, BITFIELD_BIT(i));
- if (!ctx->dummy_surface[i])
+ for (unsigned i = 0; i < ARRAY_SIZE(ctx->rendering_state_cache); i++)
+ _mesa_set_init(&ctx->rendering_state_cache[i], ctx, hash_rendering_state, equals_rendering_state);
+ ctx->dynamic_fb.info.pColorAttachments = ctx->dynamic_fb.attachments;
+ ctx->dynamic_fb.info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO;
+ for (unsigned i = 0; i < ARRAY_SIZE(ctx->dynamic_fb.attachments); i++) {
+ VkRenderingAttachmentInfo *att = &ctx->dynamic_fb.attachments[i];
+ att->sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
+ att->imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ att->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ }
+ ctx->gfx_pipeline_state.rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO;
+ ctx->gfx_pipeline_state.rendering_info.pColorAttachmentFormats = ctx->gfx_pipeline_state.rendering_formats;
+ ctx->gfx_pipeline_state.feedback_loop = screen->driver_workarounds.always_feedback_loop;
+ ctx->gfx_pipeline_state.feedback_loop_zs = screen->driver_workarounds.always_feedback_loop_zs;
+
+ const uint32_t data[] = {0};
+ if (!is_copy_only) {
+ ctx->dummy_vertex_buffer = pipe_buffer_create(&screen->base,
+ PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_SHADER_IMAGE, PIPE_USAGE_IMMUTABLE, sizeof(data));
+ if (!ctx->dummy_vertex_buffer)
+ goto fail;
+ ctx->dummy_xfb_buffer = pipe_buffer_create(&screen->base,
+ PIPE_BIND_STREAM_OUTPUT, PIPE_USAGE_IMMUTABLE, sizeof(data));
+ if (!ctx->dummy_xfb_buffer)
goto fail;
}
- ctx->dummy_bufferview = get_buffer_view(ctx, zink_resource(ctx->dummy_vertex_buffer), PIPE_FORMAT_R8_UNORM, 0, sizeof(data));
- if (!ctx->dummy_bufferview)
- goto fail;
-
- if (!zink_descriptor_layouts_init(ctx))
- goto fail;
+ if (!is_copy_only) {
+ VkBufferViewCreateInfo bvci = create_bvci(ctx, zink_resource(ctx->dummy_vertex_buffer), PIPE_FORMAT_R8G8B8A8_UNORM, 0, sizeof(data));
+ ctx->dummy_bufferview = get_buffer_view(ctx, zink_resource(ctx->dummy_vertex_buffer), &bvci);
+ if (!ctx->dummy_bufferview)
+ goto fail;
- if (!screen->descriptors_init(ctx)) {
- zink_screen_init_descriptor_funcs(screen, true);
- if (!screen->descriptors_init(ctx))
+ if (!zink_descriptors_init(ctx))
goto fail;
}
- ctx->have_timelines = screen->info.have_KHR_timeline_semaphore;
- simple_mtx_init(&ctx->batch_mtx, mtx_plain);
+ if (!is_copy_only && !is_compute_only) {
+ ctx->base.create_texture_handle = zink_create_texture_handle;
+ ctx->base.delete_texture_handle = zink_delete_texture_handle;
+ ctx->base.make_texture_handle_resident = zink_make_texture_handle_resident;
+ ctx->base.create_image_handle = zink_create_image_handle;
+ ctx->base.delete_image_handle = zink_delete_image_handle;
+ ctx->base.make_image_handle_resident = zink_make_image_handle_resident;
+ for (unsigned i = 0; i < 2; i++) {
+ _mesa_hash_table_init(&ctx->di.bindless[i].img_handles, ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ _mesa_hash_table_init(&ctx->di.bindless[i].tex_handles, ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
+
+ /* allocate 1024 slots and reserve slot 0 */
+ util_idalloc_init(&ctx->di.bindless[i].tex_slots, ZINK_MAX_BINDLESS_HANDLES);
+ util_idalloc_alloc(&ctx->di.bindless[i].tex_slots);
+ util_idalloc_init(&ctx->di.bindless[i].img_slots, ZINK_MAX_BINDLESS_HANDLES);
+ util_idalloc_alloc(&ctx->di.bindless[i].img_slots);
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ ctx->di.bindless[i].db.buffer_infos = malloc(sizeof(VkDescriptorAddressInfoEXT) * ZINK_MAX_BINDLESS_HANDLES);
+ if (!ctx->di.bindless[i].db.buffer_infos) {
+ mesa_loge("ZINK: failed to allocate ctx->di.bindless[%d].db.buffer_infos!",i);
+ goto fail;
+ }
+ for (unsigned j = 0; j < ZINK_MAX_BINDLESS_HANDLES; j++) {
+ ctx->di.bindless[i].db.buffer_infos[j].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT;
+ ctx->di.bindless[i].db.buffer_infos[j].pNext = NULL;
+ }
+ } else {
+ ctx->di.bindless[i].t.buffer_infos = malloc(sizeof(VkBufferView) * ZINK_MAX_BINDLESS_HANDLES);
+ if (!ctx->di.bindless[i].t.buffer_infos) {
+ mesa_loge("ZINK: failed to allocate ctx->di.bindless[%d].t.buffer_infos!",i);
+ goto fail;
+ }
+ }
+ ctx->di.bindless[i].img_infos = malloc(sizeof(VkDescriptorImageInfo) * ZINK_MAX_BINDLESS_HANDLES);
+ if (!ctx->di.bindless[i].img_infos) {
+ mesa_loge("ZINK: failed to allocate ctx->di.bindless[%d].img_infos!",i);
+ goto fail;
+ }
+ util_dynarray_init(&ctx->di.bindless[i].updates, NULL);
+ util_dynarray_init(&ctx->di.bindless[i].resident, NULL);
+ }
+ }
+
+ simple_mtx_init(&ctx->batch.ref_lock, mtx_plain);
zink_start_batch(ctx, &ctx->batch);
if (!ctx->batch.state)
goto fail;
- pipe_buffer_write(&ctx->base, ctx->dummy_vertex_buffer, 0, sizeof(data), data);
- pipe_buffer_write(&ctx->base, ctx->dummy_xfb_buffer, 0, sizeof(data), data);
-
- for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
- /* need to update these based on screen config for null descriptors */
- for (unsigned j = 0; j < 32; j++) {
- update_descriptor_state_ubo(ctx, i, j, NULL);
- update_descriptor_state_sampler(ctx, i, j, NULL);
- update_descriptor_state_ssbo(ctx, i, j, NULL);
- update_descriptor_state_image(ctx, i, j, NULL);
+ if (screen->compact_descriptors)
+ ctx->invalidate_descriptor_state = zink_context_invalidate_descriptor_state_compact;
+ else
+ ctx->invalidate_descriptor_state = zink_context_invalidate_descriptor_state;
+ if (!is_copy_only && !is_compute_only) {
+ pipe_buffer_write_nooverlap(&ctx->base, ctx->dummy_vertex_buffer, 0, sizeof(data), data);
+ pipe_buffer_write_nooverlap(&ctx->base, ctx->dummy_xfb_buffer, 0, sizeof(data), data);
+ if (screen->info.have_EXT_color_write_enable)
+ reapply_color_write(ctx);
+
+ /* set on startup just to avoid validation errors if a draw comes through without
+ * a tess shader later
+ */
+ if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) {
+ VKCTX(CmdSetPatchControlPointsEXT)(ctx->batch.state->cmdbuf, 1);
+ VKCTX(CmdSetPatchControlPointsEXT)(ctx->batch.state->reordered_cmdbuf, 1);
+ }
+ }
+ if (!is_copy_only) {
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ /* need to update these based on screen config for null descriptors */
+ for (unsigned j = 0; j < ARRAY_SIZE(ctx->di.t.ubos[i]); j++) {
+ update_descriptor_state_ubo(ctx, i, j, NULL);
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ ctx->di.db.ubos[i][j].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT;
+ }
+ for (unsigned j = 0; j < ARRAY_SIZE(ctx->di.textures[i]); j++) {
+ update_descriptor_state_sampler(ctx, i, j, NULL);
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ ctx->di.db.tbos[i][j].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT;
+ }
+ for (unsigned j = 0; j < ARRAY_SIZE(ctx->di.t.ssbos[i]); j++) {
+ update_descriptor_state_ssbo(ctx, i, j, NULL);
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ ctx->di.db.ssbos[i][j].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT;
+ }
+ for (unsigned j = 0; j < ARRAY_SIZE(ctx->di.images[i]); j++) {
+ update_descriptor_state_image(ctx, i, j, NULL);
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ ctx->di.db.texel_images[i][j].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT;
+ }
}
+
+ p_atomic_inc(&screen->base.num_contexts);
}
- if (!screen->info.rb2_feats.nullDescriptor)
- ctx->di.fbfetch.imageView = zink_csurface(ctx->dummy_surface[0])->image_view;
- p_atomic_inc(&screen->base.num_contexts);
zink_select_draw_vbo(ctx);
zink_select_launch_grid(ctx);
+ if (!is_copy_only && zink_debug & ZINK_DEBUG_SHADERDB) {
+ if (!screen->info.have_EXT_vertex_input_dynamic_state) {
+ struct pipe_vertex_element velems[32] = {0};
+ for (unsigned i = 0; i < ARRAY_SIZE(velems); i++)
+ velems[i].src_format = PIPE_FORMAT_R8G8B8_UNORM;
+ void *state = ctx->base.create_vertex_elements_state(&ctx->base, ARRAY_SIZE(velems), velems);
+ ctx->base.bind_vertex_elements_state(&ctx->base, state);
+ }
+ ctx->gfx_pipeline_state.sample_mask = BITFIELD_MASK(32);
+ struct pipe_framebuffer_state fb = {0};
+ fb.cbufs[0] = zink_get_dummy_pipe_surface(ctx, 0);
+ fb.nr_cbufs = 1;
+ fb.width = fb.height = 256;
+ ctx->base.set_framebuffer_state(&ctx->base, &fb);
+ ctx->disable_fs = true;
+ struct pipe_depth_stencil_alpha_state dsa = {0};
+ void *state = ctx->base.create_depth_stencil_alpha_state(&ctx->base, &dsa);
+ ctx->base.bind_depth_stencil_alpha_state(&ctx->base, state);
+
+ struct pipe_blend_state blend = {
+ .rt[0].colormask = 0xF
+ };
+
+ void *blend_state = ctx->base.create_blend_state(&ctx->base, &blend);
+ ctx->base.bind_blend_state(&ctx->base, blend_state);
+
+ zink_batch_rp(ctx);
+ }
+
+ if (!is_compute_only && zink_debug & ZINK_DEBUG_NOREORDER)
+ ctx->no_reorder = true;
+
if (!(flags & PIPE_CONTEXT_PREFER_THREADED) || flags & PIPE_CONTEXT_COMPUTE_ONLY) {
return &ctx->base;
}
struct threaded_context *tc = (struct threaded_context*)threaded_context_create(&ctx->base, &screen->transfer_pool,
zink_context_replace_buffer_storage,
- zink_create_tc_fence_for_tc,
- zink_context_is_resource_busy, true, &ctx->tc);
+ &(struct threaded_context_options){
+ .create_fence = zink_create_tc_fence_for_tc,
+ .is_resource_busy = zink_context_is_resource_busy,
+ .driver_calls_flush_notify = !screen->driver_workarounds.track_renderpasses,
+ .unsynchronized_get_device_reset_status = true,
+ .unsynchronized_texture_subdata = true,
+ .parse_renderpass_info = screen->driver_workarounds.track_renderpasses,
+ .dsa_parse = zink_tc_parse_dsa,
+ .fs_parse = zink_tc_parse_fs,
+ },
+ &ctx->tc);
if (tc && (struct zink_context*)tc != ctx) {
+ ctx->track_renderpasses = screen->driver_workarounds.track_renderpasses;
threaded_context_init_bytes_mapped_limit(tc, 4);
ctx->base.set_context_param = zink_set_context_param;
}
@@ -3767,3 +5666,143 @@ fail:
zink_context_destroy(&ctx->base);
return NULL;
}
+
+struct zink_context *
+zink_tc_context_unwrap(struct pipe_context *pctx, bool threaded)
+{
+ /* need to get the actual zink_context, not the threaded context */
+ if (threaded)
+ pctx = threaded_context_unwrap_sync(pctx);
+ pctx = trace_get_possibly_threaded_context(pctx);
+ return zink_context(pctx);
+}
+
+
+static bool
+add_implicit_feedback_loop(struct zink_context *ctx, struct zink_resource *res)
+{
+ /* can only feedback loop with fb+sampler bind; image bind must be GENERAL */
+ if (!res->fb_bind_count || !res->sampler_bind_count[0] || res->image_bind_count[0])
+ return false;
+ if (!(res->aspect & VK_IMAGE_ASPECT_COLOR_BIT) && !zink_is_zsbuf_write(ctx))
+ /* if zsbuf isn't used then it effectively has no fb binds */
+ /* if zsbuf isn't written to then it'll be fine with read-only access */
+ return false;
+ bool is_feedback = false;
+ /* avoid false positives when a texture is bound but not used */
+ u_foreach_bit(vkstage, res->gfx_barrier) {
+ VkPipelineStageFlags vkstagebit = BITFIELD_BIT(vkstage);
+ if (vkstagebit < VK_PIPELINE_STAGE_VERTEX_SHADER_BIT || vkstagebit > VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)
+ continue;
+ /* in-range VkPipelineStageFlagBits can be converted to VkShaderStageFlags with a bitshift */
+ gl_shader_stage stage = vk_to_mesa_shader_stage((VkShaderStageFlagBits)(vkstagebit >> 3));
+ /* check shader texture usage against resource's sampler binds */
+ if ((ctx->gfx_stages[stage] && (res->sampler_binds[stage] & ctx->gfx_stages[stage]->info.textures_used[0])))
+ is_feedback = true;
+ }
+ if (!is_feedback)
+ return false;
+ if (ctx->feedback_loops & res->fb_binds)
+ /* already added */
+ return true;
+ /* new feedback loop detected */
+ if (res->aspect == VK_IMAGE_ASPECT_COLOR_BIT) {
+ if (!ctx->gfx_pipeline_state.feedback_loop)
+ ctx->gfx_pipeline_state.dirty = true;
+ ctx->gfx_pipeline_state.feedback_loop = true;
+ } else {
+ if (!ctx->gfx_pipeline_state.feedback_loop_zs)
+ ctx->gfx_pipeline_state.dirty = true;
+ ctx->gfx_pipeline_state.feedback_loop_zs = true;
+ }
+ ctx->rp_layout_changed = true;
+ ctx->feedback_loops |= res->fb_binds;
+ u_foreach_bit(idx, res->fb_binds) {
+ if (zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_layout)
+ ctx->dynamic_fb.attachments[idx].imageLayout = VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
+ else
+ ctx->dynamic_fb.attachments[idx].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+ update_feedback_loop_dynamic_state(ctx);
+ return true;
+}
+
+void
+zink_update_barriers(struct zink_context *ctx, bool is_compute,
+ struct pipe_resource *index, struct pipe_resource *indirect, struct pipe_resource *indirect_draw_count)
+{
+ assert(!ctx->blitting);
+ if (!ctx->need_barriers[is_compute]->entries)
+ return;
+ struct set *need_barriers = ctx->need_barriers[is_compute];
+ ctx->barrier_set_idx[is_compute] = !ctx->barrier_set_idx[is_compute];
+ ctx->need_barriers[is_compute] = &ctx->update_barriers[is_compute][ctx->barrier_set_idx[is_compute]];
+ ASSERTED bool check_rp = ctx->batch.in_rp && ctx->dynamic_fb.tc_info.zsbuf_invalidate;
+ set_foreach(need_barriers, he) {
+ struct zink_resource *res = (struct zink_resource *)he->key;
+ if (res->bind_count[is_compute]) {
+ VkPipelineStageFlagBits pipeline = is_compute ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT : res->gfx_barrier;
+ if (res->base.b.target == PIPE_BUFFER)
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, res->barrier_access[is_compute], pipeline);
+ else {
+ bool is_feedback = is_compute ? false : add_implicit_feedback_loop(ctx, res);
+ VkImageLayout layout = zink_descriptor_util_image_layout_eval(ctx, res, is_compute);
+ /* GENERAL is only used for feedback loops and storage image binds */
+ if (is_feedback || layout != VK_IMAGE_LAYOUT_GENERAL || res->image_bind_count[is_compute])
+ zink_screen(ctx->base.screen)->image_barrier(ctx, res, layout, res->barrier_access[is_compute], pipeline);
+ assert(!check_rp || check_rp == ctx->batch.in_rp);
+ if (is_feedback)
+ update_res_sampler_layouts(ctx, res);
+ }
+ if (zink_resource_access_is_write(res->barrier_access[is_compute]) ||
+ // TODO: figure out a way to link up layouts between unordered and main cmdbuf
+ res->base.b.target != PIPE_BUFFER)
+ res->obj->unordered_write = false;
+ res->obj->unordered_read = false;
+ /* always barrier on draw if this resource has either multiple image write binds or
+ * image write binds and image read binds
+ */
+ if (res->write_bind_count[is_compute] && res->bind_count[is_compute] > 1)
+ _mesa_set_add_pre_hashed(ctx->need_barriers[is_compute], he->hash, res);
+ }
+ _mesa_set_remove(need_barriers, he);
+ if (!need_barriers->entries)
+ break;
+ }
+}
+
+/**
+ * Emits a debug marker in the cmd stream to be captured by perfetto during
+ * execution on the GPU.
+ */
+bool
+zink_cmd_debug_marker_begin(struct zink_context *ctx, VkCommandBuffer cmdbuf, const char *fmt, ...)
+{
+ if (!zink_tracing)
+ return false;
+
+ char *name;
+ va_list va;
+ va_start(va, fmt);
+ int ret = vasprintf(&name, fmt, va);
+ va_end(va);
+
+ if (ret == -1)
+ return false;
+
+ VkDebugUtilsLabelEXT info = { 0 };
+ info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
+ info.pLabelName = name;
+
+ VKCTX(CmdBeginDebugUtilsLabelEXT)(cmdbuf ? cmdbuf : ctx->batch.state->cmdbuf, &info);
+
+ free(name);
+ return true;
+}
+
+void
+zink_cmd_debug_marker_end(struct zink_context *ctx, VkCommandBuffer cmdbuf, bool emitted)
+{
+ if (emitted)
+ VKCTX(CmdEndDebugUtilsLabelEXT)(cmdbuf);
+}
diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h
index abfe0199c15..79b6c087bff 100644
--- a/src/gallium/drivers/zink/zink_context.h
+++ b/src/gallium/drivers/zink/zink_context.h
@@ -24,29 +24,17 @@
#ifndef ZINK_CONTEXT_H
#define ZINK_CONTEXT_H
-#define ZINK_FBFETCH_BINDING 6 //COMPUTE+1
-#define ZINK_SHADER_COUNT (PIPE_SHADER_TYPES - 1)
-
-#define ZINK_DEFAULT_MAX_DESCS 5000
-#define ZINK_DEFAULT_DESC_CLAMP (ZINK_DEFAULT_MAX_DESCS * 0.9)
-
-#include "zink_clear.h"
-#include "zink_pipeline.h"
-#include "zink_batch.h"
-#include "zink_compiler.h"
-#include "zink_descriptors.h"
-#include "zink_surface.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_state.h"
#include "util/u_rect.h"
-#include "util/u_threaded_context.h"
+#include "zink_types.h"
+#include "vk_enum_to_str.h"
-#include "util/slab.h"
-#include "util/list.h"
-#include "util/u_dynarray.h"
+#define GFX_SHADER_BITS (VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \
+ VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \
+ VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \
+ VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)
-#include <vulkan/vulkan.h>
+#define pipe_buffer_write "use tc_buffer_write to avoid breaking threaded context"
#ifdef __cplusplus
extern "C" {
@@ -62,285 +50,17 @@ struct zink_rasterizer_state;
struct zink_resource;
struct zink_vertex_elements_state;
-enum zink_blit_flags {
- ZINK_BLIT_NORMAL = 1 << 0,
- ZINK_BLIT_SAVE_FS = 1 << 1,
- ZINK_BLIT_SAVE_FB = 1 << 2,
- ZINK_BLIT_SAVE_TEXTURES = 1 << 3,
- ZINK_BLIT_NO_COND_RENDER = 1 << 4,
-};
-
-struct zink_sampler_state {
- VkSampler sampler;
- uint32_t hash;
- struct zink_descriptor_refs desc_set_refs;
- struct zink_batch_usage *batch_uses;
- bool custom_border_color;
-};
-
-struct zink_buffer_view {
- struct pipe_reference reference;
- struct pipe_resource *pres;
- VkBufferViewCreateInfo bvci;
- VkBufferView buffer_view;
- uint32_t hash;
- struct zink_batch_usage *batch_uses;
- struct zink_descriptor_refs desc_set_refs;
-};
-
-struct zink_sampler_view {
- struct pipe_sampler_view base;
- union {
- struct zink_surface *image_view;
- struct zink_buffer_view *buffer_view;
- };
-};
-
-struct zink_image_view {
- struct pipe_image_view base;
- union {
- struct zink_surface *surface;
- struct zink_buffer_view *buffer_view;
- };
-};
-
-static inline struct zink_sampler_view *
-zink_sampler_view(struct pipe_sampler_view *pview)
-{
- return (struct zink_sampler_view *)pview;
-}
+#define perf_debug(ctx, ...) do { \
+ util_debug_message(&ctx->dbg, PERF_INFO, __VA_ARGS__); \
+} while(0)
-struct zink_so_target {
- struct pipe_stream_output_target base;
- struct pipe_resource *counter_buffer;
- VkDeviceSize counter_buffer_offset;
- uint32_t stride;
- bool counter_buffer_valid;
-};
-static inline struct zink_so_target *
-zink_so_target(struct pipe_stream_output_target *so_target)
+static inline struct zink_resource *
+zink_descriptor_surface_resource(struct zink_descriptor_surface *ds)
{
- return (struct zink_so_target *)so_target;
-}
-
-struct zink_viewport_state {
- struct pipe_viewport_state viewport_states[PIPE_MAX_VIEWPORTS];
- struct pipe_scissor_state scissor_states[PIPE_MAX_VIEWPORTS];
- uint8_t num_viewports;
-};
-
-
-struct zink_descriptor_surface {
- union {
- struct zink_surface *surface;
- struct zink_buffer_view *bufferview;
- };
- bool is_buffer;
-};
-
-typedef void (*pipe_draw_vbo_func)(struct pipe_context *pipe,
- const struct pipe_draw_info *info,
- unsigned drawid_offset,
- const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count_bias *draws,
- unsigned num_draws);
-
-typedef void (*pipe_launch_grid_func)(struct pipe_context *pipe, const struct pipe_grid_info *info);
-
-typedef enum {
- ZINK_NO_MULTIDRAW,
- ZINK_MULTIDRAW,
-} zink_multidraw;
-
-typedef enum {
- ZINK_NO_DYNAMIC_STATE,
- ZINK_DYNAMIC_STATE,
-} zink_dynamic_state;
-
-typedef enum {
- ZINK_NO_DYNAMIC_STATE2,
- ZINK_DYNAMIC_STATE2,
-} zink_dynamic_state2;
-
-typedef enum {
- ZINK_NO_DYNAMIC_VERTEX_INPUT,
- ZINK_DYNAMIC_VERTEX_INPUT,
-} zink_dynamic_vertex_input;
-
-struct zink_context {
- struct pipe_context base;
- struct threaded_context *tc;
- struct slab_child_pool transfer_pool;
- struct slab_child_pool transfer_pool_unsync;
- struct blitter_context *blitter;
-
- pipe_draw_vbo_func draw_vbo[2]; //batch changed
- pipe_launch_grid_func launch_grid[2]; //batch changed
-
- struct pipe_device_reset_callback reset;
-
- uint32_t curr_batch; //the current batch id
-
- simple_mtx_t batch_mtx;
- struct zink_fence *deferred_fence;
- struct zink_fence *last_fence; //the last command buffer submitted
- struct hash_table batch_states; //submitted batch states
- struct util_dynarray free_batch_states; //unused batch states
- bool oom_flush;
- bool oom_stall;
- struct zink_batch batch;
-
- unsigned shader_has_inlinable_uniforms_mask;
- unsigned inlinable_uniforms_valid_mask;
- uint32_t compute_inlinable_uniforms[MAX_INLINABLE_UNIFORMS];
-
- struct pipe_constant_buffer ubos[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
- struct pipe_shader_buffer ssbos[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
- uint32_t writable_ssbos[PIPE_SHADER_TYPES];
- struct zink_image_view image_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
-
- struct pipe_framebuffer_state fb_state;
- struct zink_framebuffer *(*get_framebuffer)(struct zink_context*);
- void (*init_framebuffer)(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp);
- struct hash_table framebuffer_cache;
-
- struct zink_vertex_elements_state *element_state;
- struct zink_rasterizer_state *rast_state;
- struct zink_depth_stencil_alpha_state *dsa_state;
-
- struct hash_table desc_set_layouts[ZINK_DESCRIPTOR_TYPES];
- bool pipeline_changed[2]; //gfx, compute
-
- struct zink_shader *gfx_stages[ZINK_SHADER_COUNT];
- struct zink_shader *last_vertex_stage;
- bool shader_reads_drawid;
- bool shader_reads_basevertex;
- struct zink_gfx_pipeline_state gfx_pipeline_state;
- /* there are 5 gfx stages, but VS and FS are assumed to be always present,
- * thus only 3 stages need to be considered, giving 2^3 = 8 program caches.
- */
- struct hash_table program_cache[8];
- uint32_t gfx_hash;
- struct zink_gfx_program *curr_program;
-
- struct zink_descriptor_data *dd;
-
- struct zink_shader *compute_stage;
- struct zink_compute_pipeline_state compute_pipeline_state;
- struct hash_table compute_program_cache;
- struct zink_compute_program *curr_compute;
-
- unsigned shader_stages : ZINK_SHADER_COUNT; /* mask of bound gfx shader stages */
- unsigned dirty_shader_stages : 6; /* mask of changed shader stages */
- bool last_vertex_stage_dirty;
-
- struct set render_pass_state_cache;
- struct hash_table *render_pass_cache;
- bool new_swapchain;
- bool fb_changed;
- bool rp_changed;
-
- struct zink_framebuffer *framebuffer;
- struct zink_framebuffer_clear fb_clears[PIPE_MAX_COLOR_BUFS + 1];
- uint16_t clears_enabled;
- uint16_t rp_clears_enabled;
- uint16_t fbfetch_outputs;
-
- VkBuffer vbufs[PIPE_MAX_ATTRIBS];
- unsigned vbuf_offsets[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
- bool vertex_buffers_dirty;
-
- void *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
- struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
-
- struct zink_viewport_state vp_state;
- bool vp_state_changed;
- bool scissor_changed;
-
- float blend_constants[4];
-
- bool sample_locations_changed;
- VkSampleLocationEXT vk_sample_locations[PIPE_MAX_SAMPLE_LOCATION_GRID_SIZE * PIPE_MAX_SAMPLE_LOCATION_GRID_SIZE];
- uint8_t sample_locations[2 * 4 * 8 * 16];
-
- struct pipe_stencil_ref stencil_ref;
-
- union {
- struct {
- float default_inner_level[2];
- float default_outer_level[4];
- };
- float tess_levels[6];
- };
-
- struct list_head suspended_queries;
- struct list_head primitives_generated_queries;
- bool queries_disabled, render_condition_active;
- struct {
- struct zink_query *query;
- bool inverted;
- } render_condition;
-
- struct pipe_resource *dummy_vertex_buffer;
- struct pipe_resource *dummy_xfb_buffer;
- struct pipe_surface *dummy_surface[7];
- struct zink_buffer_view *dummy_bufferview;
-
- unsigned buffer_rebind_counter;
-
- struct {
- /* descriptor info */
- VkDescriptorBufferInfo ubos[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
- uint32_t push_valid;
- uint8_t num_ubos[PIPE_SHADER_TYPES];
-
- VkDescriptorBufferInfo ssbos[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
- uint8_t num_ssbos[PIPE_SHADER_TYPES];
-
- VkDescriptorImageInfo textures[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
- VkBufferView tbos[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
- uint8_t num_samplers[PIPE_SHADER_TYPES];
- uint8_t num_sampler_views[PIPE_SHADER_TYPES];
-
- VkDescriptorImageInfo images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
- VkBufferView texel_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
- uint8_t num_images[PIPE_SHADER_TYPES];
-
- VkDescriptorImageInfo fbfetch;
-
- struct zink_resource *descriptor_res[ZINK_DESCRIPTOR_TYPES][PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
- struct zink_descriptor_surface sampler_surfaces[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
- struct zink_descriptor_surface image_surfaces[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
- } di;
- struct set *need_barriers[2]; //gfx, compute
- struct set update_barriers[2][2]; //[gfx, compute][current, next]
- uint8_t barrier_set_idx[2];
- unsigned memory_barrier;
-
- uint32_t num_so_targets;
- struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_OUTPUTS];
- bool dirty_so_targets;
- bool xfb_barrier;
- bool first_frame_done;
- bool have_timelines;
-
- bool gfx_dirty;
-
- bool is_device_lost;
- bool primitive_restart;
- bool vertex_state_changed : 1;
- bool blend_state_changed : 1;
- bool rast_state_changed : 1;
- bool dsa_state_changed : 1;
- bool stencil_ref_changed : 1;
-};
-
-static inline struct zink_context *
-zink_context(struct pipe_context *context)
-{
- return (struct zink_context *)context;
+ return ds->is_buffer ?
+ zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB ? zink_resource(ds->db.pres) : zink_resource(ds->bufferview->pres) :
+ (struct zink_resource*)ds->surface->base.texture;
}
static inline bool
@@ -351,18 +71,45 @@ zink_fb_clear_enabled(const struct zink_context *ctx, unsigned idx)
return ctx->clears_enabled & (PIPE_CLEAR_COLOR0 << idx);
}
+static inline uint32_t
+zink_program_cache_stages(uint32_t stages_present)
+{
+ return (stages_present & ((1 << MESA_SHADER_TESS_CTRL) |
+ (1 << MESA_SHADER_TESS_EVAL) |
+ (1 << MESA_SHADER_GEOMETRY))) >> 1;
+}
+
+static ALWAYS_INLINE bool
+zink_is_zsbuf_used(const struct zink_context *ctx)
+{
+ return ctx->blitting || tc_renderpass_info_is_zsbuf_used(&ctx->dynamic_fb.tc_info);
+}
+
+static ALWAYS_INLINE bool
+zink_is_zsbuf_write(const struct zink_context *ctx)
+{
+ if (!zink_is_zsbuf_used(ctx))
+ return false;
+ return ctx->dynamic_fb.tc_info.zsbuf_write_fs || ctx->dynamic_fb.tc_info.zsbuf_write_dsa ||
+ ctx->dynamic_fb.tc_info.zsbuf_clear || ctx->dynamic_fb.tc_info.zsbuf_clear_partial;
+}
+
void
zink_fence_wait(struct pipe_context *ctx);
void
-zink_wait_on_batch(struct zink_context *ctx, uint32_t batch_id);
-
+zink_wait_on_batch(struct zink_context *ctx, uint64_t batch_id);
+void
+zink_reset_ds3_states(struct zink_context *ctx);
bool
-zink_check_batch_completion(struct zink_context *ctx, uint32_t batch_id, bool have_lock);
-
+zink_check_batch_completion(struct zink_context *ctx, uint64_t batch_id);
+VkCommandBuffer
+zink_get_cmdbuf(struct zink_context *ctx, struct zink_resource *src, struct zink_resource *dst);
+unsigned
+zink_update_rendering_info(struct zink_context *ctx);
void
zink_flush_queue(struct zink_context *ctx);
-void
+bool
zink_update_fbfetch(struct zink_context *ctx);
bool
zink_resource_access_is_write(VkAccessFlags flags);
@@ -370,56 +117,59 @@ zink_resource_access_is_write(VkAccessFlags flags);
void
zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline);
void
-zink_fake_buffer_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline);
-bool
-zink_resource_image_needs_barrier(struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);
-bool
+zink_resource_buffer_barrier2(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline);
+void
zink_resource_image_barrier_init(VkImageMemoryBarrier *imb, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);
void
+zink_resource_image_barrier2_init(VkImageMemoryBarrier2 *imb, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);
+void
zink_resource_image_barrier(struct zink_context *ctx, struct zink_resource *res,
VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);
-
+void
+zink_resource_image_barrier2(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);
+bool
+zink_check_unordered_transfer_access(struct zink_resource *res, unsigned level, const struct pipe_box *box);
bool
-zink_resource_needs_barrier(struct zink_resource *res, VkImageLayout layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);
+zink_check_valid_buffer_src_access(struct zink_context *ctx, struct zink_resource *res, unsigned offset, unsigned size);
+void
+zink_resource_image_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box, bool unsync);
+bool
+zink_resource_buffer_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned offset, unsigned size);
+void
+zink_synchronization_init(struct zink_screen *screen);
void
zink_update_descriptor_refs(struct zink_context *ctx, bool compute);
void
zink_init_vk_sample_locations(struct zink_context *ctx, VkSampleLocationsInfoEXT *loc);
void
-zink_begin_render_pass(struct zink_context *ctx);
-void
-zink_end_render_pass(struct zink_context *ctx);
+zink_batch_rp(struct zink_context *ctx);
-static inline void
-zink_batch_rp(struct zink_context *ctx)
-{
- if (!ctx->batch.in_rp)
- zink_begin_render_pass(ctx);
-}
+void
+zink_batch_no_rp(struct zink_context *ctx);
+void
+zink_batch_no_rp_safe(struct zink_context *ctx);
-static inline void
-zink_batch_no_rp(struct zink_context *ctx)
-{
- zink_end_render_pass(ctx);
- assert(!ctx->batch.in_rp);
-}
+VkImageView
+zink_prep_fb_attachment(struct zink_context *ctx, struct zink_surface *surf, unsigned i);
+void
+zink_update_vk_sample_locations(struct zink_context *ctx);
static inline VkPipelineStageFlags
-zink_pipeline_flags_from_pipe_stage(enum pipe_shader_type pstage)
+zink_pipeline_flags_from_pipe_stage(gl_shader_stage pstage)
{
switch (pstage) {
- case PIPE_SHADER_VERTEX:
+ case MESA_SHADER_VERTEX:
return VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
- case PIPE_SHADER_FRAGMENT:
+ case MESA_SHADER_FRAGMENT:
return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
- case PIPE_SHADER_GEOMETRY:
+ case MESA_SHADER_GEOMETRY:
return VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
- case PIPE_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_CTRL:
return VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT;
- case PIPE_SHADER_TESS_EVAL:
+ case MESA_SHADER_TESS_EVAL:
return VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
- case PIPE_SHADER_COMPUTE:
+ case MESA_SHADER_COMPUTE:
return VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
default:
unreachable("unknown shader stage");
@@ -428,13 +178,44 @@ zink_pipeline_flags_from_pipe_stage(enum pipe_shader_type pstage)
void
zink_rebind_all_buffers(struct zink_context *ctx);
+void
+zink_rebind_all_images(struct zink_context *ctx);
void
+zink_parse_tc_info(struct zink_context *ctx);
+void
zink_flush_memory_barrier(struct zink_context *ctx, bool is_compute);
void
zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen);
void
zink_init_grid_functions(struct zink_context *ctx);
+struct zink_context *
+zink_tc_context_unwrap(struct pipe_context *pctx, bool threaded);
+
+void
+zink_update_barriers(struct zink_context *ctx, bool is_compute,
+ struct pipe_resource *index, struct pipe_resource *indirect, struct pipe_resource *indirect_draw_count);
+
+
+bool
+zink_cmd_debug_marker_begin(struct zink_context *ctx, VkCommandBuffer cmdbuf, const char *fmt, ...);
+void
+zink_cmd_debug_marker_end(struct zink_context *ctx, VkCommandBuffer cmdbuf,bool emitted);
+void
+zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src,
+ unsigned dst_offset, unsigned src_offset, unsigned size);
+
+VkIndirectCommandsLayoutTokenNV *
+zink_dgc_add_token(struct zink_context *ctx, VkIndirectCommandsTokenTypeNV type, void **mem);
+void
+zink_flush_dgc(struct zink_context *ctx);
+
+static ALWAYS_INLINE void
+zink_flush_dgc_if_enabled(struct zink_context *ctx)
+{
+ if (unlikely(zink_debug & ZINK_DEBUG_DGC))
+ zink_flush_dgc(ctx);
+}
#ifdef __cplusplus
}
@@ -444,9 +225,6 @@ zink_init_grid_functions(struct zink_context *ctx);
VkPipelineStageFlags
zink_pipeline_flags_from_stage(VkShaderStageFlagBits stage);
-VkShaderStageFlagBits
-zink_shader_stage(enum pipe_shader_type type);
-
struct pipe_context *
zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
@@ -455,6 +233,8 @@ zink_context_query_init(struct pipe_context *ctx);
void
zink_blit_begin(struct zink_context *ctx, enum zink_blit_flags flags);
+void
+zink_blit_barriers(struct zink_context *ctx, struct zink_resource *src, struct zink_resource *dst, bool whole_dst);
void
zink_blit(struct pipe_context *pctx,
@@ -482,12 +262,14 @@ zink_component_mapping(enum pipe_swizzle swizzle)
case PIPE_SWIZZLE_W: return VK_COMPONENT_SWIZZLE_A;
case PIPE_SWIZZLE_0: return VK_COMPONENT_SWIZZLE_ZERO;
case PIPE_SWIZZLE_1: return VK_COMPONENT_SWIZZLE_ONE;
- case PIPE_SWIZZLE_NONE: return VK_COMPONENT_SWIZZLE_IDENTITY; // ???
default:
unreachable("unexpected swizzle");
}
}
+void
+zink_update_shadow_samplerviews(struct zink_context *ctx, unsigned mask);
+
enum pipe_swizzle
zink_clamp_void_swizzle(const struct util_format_description *desc, enum pipe_swizzle swizzle);
@@ -496,10 +278,8 @@ zink_resource_rebind(struct zink_context *ctx, struct zink_resource *res);
void
zink_rebind_framebuffer(struct zink_context *ctx, struct zink_resource *res);
-
void
-zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src,
- unsigned dst_offset, unsigned src_offset, unsigned size);
+zink_set_null_fs(struct zink_context *ctx);
void
zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src,
@@ -509,6 +289,11 @@ zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, stru
void
zink_destroy_buffer_view(struct zink_screen *screen, struct zink_buffer_view *buffer_view);
+struct pipe_surface *
+zink_get_dummy_pipe_surface(struct zink_context *ctx, int samples_index);
+struct zink_surface *
+zink_get_dummy_surface(struct zink_context *ctx, int samples_index);
+
void
debug_describe_zink_buffer_view(char *buf, const struct zink_buffer_view *ptr);
diff --git a/src/gallium/drivers/zink/zink_descriptors.c b/src/gallium/drivers/zink/zink_descriptors.c
index 9a0025ce25f..790daa30330 100644
--- a/src/gallium/drivers/zink/zink_descriptors.c
+++ b/src/gallium/drivers/zink/zink_descriptors.c
@@ -1,5 +1,6 @@
/*
* Copyright © 2020 Mike Blumenkrantz
+ * Copyright © 2022 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,316 +25,17 @@
* Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
*/
-#include "tgsi/tgsi_from_mesa.h"
-
-
-
#include "zink_context.h"
+#include "zink_compiler.h"
#include "zink_descriptors.h"
#include "zink_program.h"
+#include "zink_render_pass.h"
#include "zink_resource.h"
#include "zink_screen.h"
#define XXH_INLINE_ALL
#include "util/xxhash.h"
-
-struct zink_descriptor_pool {
- struct pipe_reference reference;
- enum zink_descriptor_type type;
- struct hash_table *desc_sets;
- struct hash_table *free_desc_sets;
- struct util_dynarray alloc_desc_sets;
- VkDescriptorPool descpool;
- struct zink_descriptor_pool_key key;
- unsigned num_resources;
- unsigned num_sets_allocated;
- simple_mtx_t mtx;
-};
-
-struct zink_descriptor_set {
- struct zink_descriptor_pool *pool;
- struct pipe_reference reference; //incremented for batch usage
- VkDescriptorSet desc_set;
- uint32_t hash;
- bool invalid;
- bool punted;
- bool recycled;
- struct zink_descriptor_state_key key;
- struct zink_batch_usage *batch_uses;
-#ifndef NDEBUG
- /* for extra debug asserts */
- unsigned num_resources;
-#endif
- union {
- struct zink_resource_object **res_objs;
- struct {
- struct zink_descriptor_surface *surfaces;
- struct zink_sampler_state **sampler_states;
- };
- };
-};
-
-union zink_program_descriptor_refs {
- struct zink_resource **res;
- struct zink_descriptor_surface *dsurf;
- struct {
- struct zink_descriptor_surface *dsurf;
- struct zink_sampler_state **sampler_state;
- } sampler;
-};
-
-struct zink_program_descriptor_data_cached {
- struct zink_program_descriptor_data base;
- struct zink_descriptor_pool *pool[ZINK_DESCRIPTOR_TYPES];
- struct zink_descriptor_set *last_set[ZINK_DESCRIPTOR_TYPES];
- unsigned num_refs[ZINK_DESCRIPTOR_TYPES];
- union zink_program_descriptor_refs *refs[ZINK_DESCRIPTOR_TYPES];
- unsigned cache_misses[ZINK_DESCRIPTOR_TYPES];
-};
-
-
-static inline struct zink_program_descriptor_data_cached *
-pdd_cached(struct zink_program *pg)
-{
- return (struct zink_program_descriptor_data_cached*)pg->dd;
-}
-
-static bool
-batch_add_desc_set(struct zink_batch *batch, struct zink_descriptor_set *zds)
-{
- if (zink_batch_usage_matches(zds->batch_uses, batch->state) ||
- !batch_ptr_add_usage(batch, batch->state->dd->desc_sets, zds))
- return false;
- pipe_reference(NULL, &zds->reference);
- zink_batch_usage_set(&zds->batch_uses, batch->state);
- return true;
-}
-
-static void
-debug_describe_zink_descriptor_pool(char *buf, const struct zink_descriptor_pool *ptr)
-{
- sprintf(buf, "zink_descriptor_pool");
-}
-
-static inline uint32_t
-get_sampler_view_hash(const struct zink_sampler_view *sampler_view)
-{
- if (!sampler_view)
- return 0;
- return sampler_view->base.target == PIPE_BUFFER ?
- sampler_view->buffer_view->hash : sampler_view->image_view->hash;
-}
-
-static inline uint32_t
-get_image_view_hash(const struct zink_image_view *image_view)
-{
- if (!image_view || !image_view->base.resource)
- return 0;
- return image_view->base.resource->target == PIPE_BUFFER ?
- image_view->buffer_view->hash : image_view->surface->hash;
-}
-
-uint32_t
-zink_get_sampler_view_hash(struct zink_context *ctx, struct zink_sampler_view *sampler_view, bool is_buffer)
-{
- return get_sampler_view_hash(sampler_view) ? get_sampler_view_hash(sampler_view) :
- (is_buffer ? zink_screen(ctx->base.screen)->null_descriptor_hashes.buffer_view :
- zink_screen(ctx->base.screen)->null_descriptor_hashes.image_view);
-}
-
-uint32_t
-zink_get_image_view_hash(struct zink_context *ctx, struct zink_image_view *image_view, bool is_buffer)
-{
- return get_image_view_hash(image_view) ? get_image_view_hash(image_view) :
- (is_buffer ? zink_screen(ctx->base.screen)->null_descriptor_hashes.buffer_view :
- zink_screen(ctx->base.screen)->null_descriptor_hashes.image_view);
-}
-
-#ifndef NDEBUG
-static uint32_t
-get_descriptor_surface_hash(struct zink_context *ctx, struct zink_descriptor_surface *dsurf)
-{
- return dsurf->is_buffer ? (dsurf->bufferview ? dsurf->bufferview->hash : zink_screen(ctx->base.screen)->null_descriptor_hashes.buffer_view) :
- (dsurf->surface ? dsurf->surface->hash : zink_screen(ctx->base.screen)->null_descriptor_hashes.image_view);
-}
-#endif
-
-static bool
-desc_state_equal(const void *a, const void *b)
-{
- const struct zink_descriptor_state_key *a_k = (void*)a;
- const struct zink_descriptor_state_key *b_k = (void*)b;
-
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) {
- if (a_k->exists[i] != b_k->exists[i])
- return false;
- if (a_k->exists[i] && b_k->exists[i] &&
- a_k->state[i] != b_k->state[i])
- return false;
- }
- return true;
-}
-
-static uint32_t
-desc_state_hash(const void *key)
-{
- const struct zink_descriptor_state_key *d_key = (void*)key;
- uint32_t hash = 0;
- bool first = true;
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) {
- if (d_key->exists[i]) {
- if (!first)
- hash = XXH32(&d_key->state[i], sizeof(uint32_t), hash);
- else
- hash = d_key->state[i];
- first = false;
- }
- }
- return hash;
-}
-
-static void
-pop_desc_set_ref(struct zink_descriptor_set *zds, struct util_dynarray *refs)
-{
- size_t size = sizeof(struct zink_descriptor_reference);
- unsigned num_elements = refs->size / size;
- for (unsigned i = 0; i < num_elements; i++) {
- struct zink_descriptor_reference *ref = util_dynarray_element(refs, struct zink_descriptor_reference, i);
- if (&zds->invalid == ref->invalid) {
- memcpy(util_dynarray_element(refs, struct zink_descriptor_reference, i),
- util_dynarray_pop_ptr(refs, struct zink_descriptor_reference), size);
- break;
- }
- }
-}
-
-static void
-descriptor_set_invalidate(struct zink_descriptor_set *zds)
-{
- zds->invalid = true;
- for (unsigned i = 0; i < zds->pool->key.layout->num_descriptors; i++) {
- switch (zds->pool->type) {
- case ZINK_DESCRIPTOR_TYPE_UBO:
- case ZINK_DESCRIPTOR_TYPE_SSBO:
- if (zds->res_objs[i])
- pop_desc_set_ref(zds, &zds->res_objs[i]->desc_set_refs.refs);
- zds->res_objs[i] = NULL;
- break;
- case ZINK_DESCRIPTOR_TYPE_IMAGE:
- if (zds->surfaces[i].is_buffer) {
- if (zds->surfaces[i].bufferview)
- pop_desc_set_ref(zds, &zds->surfaces[i].bufferview->desc_set_refs.refs);
- zds->surfaces[i].bufferview = NULL;
- } else {
- if (zds->surfaces[i].surface)
- pop_desc_set_ref(zds, &zds->surfaces[i].surface->desc_set_refs.refs);
- zds->surfaces[i].surface = NULL;
- }
- break;
- case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
- if (zds->surfaces[i].is_buffer) {
- if (zds->surfaces[i].bufferview)
- pop_desc_set_ref(zds, &zds->surfaces[i].bufferview->desc_set_refs.refs);
- zds->surfaces[i].bufferview = NULL;
- } else {
- if (zds->surfaces[i].surface)
- pop_desc_set_ref(zds, &zds->surfaces[i].surface->desc_set_refs.refs);
- zds->surfaces[i].surface = NULL;
- }
- if (zds->sampler_states[i])
- pop_desc_set_ref(zds, &zds->sampler_states[i]->desc_set_refs.refs);
- zds->sampler_states[i] = NULL;
- break;
- default:
- break;
- }
- }
-}
-
-#ifndef NDEBUG
-static void
-descriptor_pool_clear(struct hash_table *ht)
-{
- _mesa_hash_table_clear(ht, NULL);
-}
-#endif
-
-static void
-descriptor_pool_free(struct zink_screen *screen, struct zink_descriptor_pool *pool)
-{
- if (!pool)
- return;
- if (pool->descpool)
- VKSCR(DestroyDescriptorPool)(screen->dev, pool->descpool, NULL);
-
- simple_mtx_lock(&pool->mtx);
-#ifndef NDEBUG
- if (pool->desc_sets)
- descriptor_pool_clear(pool->desc_sets);
- if (pool->free_desc_sets)
- descriptor_pool_clear(pool->free_desc_sets);
-#endif
- if (pool->desc_sets)
- _mesa_hash_table_destroy(pool->desc_sets, NULL);
- if (pool->free_desc_sets)
- _mesa_hash_table_destroy(pool->free_desc_sets, NULL);
-
- simple_mtx_unlock(&pool->mtx);
- util_dynarray_fini(&pool->alloc_desc_sets);
- simple_mtx_destroy(&pool->mtx);
- ralloc_free(pool);
-}
-
-static struct zink_descriptor_pool *
-descriptor_pool_create(struct zink_screen *screen, enum zink_descriptor_type type,
- struct zink_descriptor_layout_key *layout_key, VkDescriptorPoolSize *sizes, unsigned num_type_sizes)
-{
- struct zink_descriptor_pool *pool = rzalloc(NULL, struct zink_descriptor_pool);
- if (!pool)
- return NULL;
- pipe_reference_init(&pool->reference, 1);
- pool->type = type;
- pool->key.layout = layout_key;
- pool->key.num_type_sizes = num_type_sizes;
- size_t types_size = num_type_sizes * sizeof(VkDescriptorPoolSize);
- pool->key.sizes = ralloc_size(pool, types_size);
- if (!pool->key.sizes) {
- ralloc_free(pool);
- return NULL;
- }
- memcpy(pool->key.sizes, sizes, types_size);
- simple_mtx_init(&pool->mtx, mtx_plain);
- for (unsigned i = 0; i < layout_key->num_descriptors; i++) {
- pool->num_resources += layout_key->bindings[i].descriptorCount;
- }
- pool->desc_sets = _mesa_hash_table_create(NULL, desc_state_hash, desc_state_equal);
- if (!pool->desc_sets)
- goto fail;
-
- pool->free_desc_sets = _mesa_hash_table_create(NULL, desc_state_hash, desc_state_equal);
- if (!pool->free_desc_sets)
- goto fail;
-
- util_dynarray_init(&pool->alloc_desc_sets, NULL);
-
- VkDescriptorPoolCreateInfo dpci = {0};
- dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
- dpci.pPoolSizes = sizes;
- dpci.poolSizeCount = num_type_sizes;
- dpci.flags = 0;
- dpci.maxSets = ZINK_DEFAULT_MAX_DESCS;
- if (VKSCR(CreateDescriptorPool)(screen->dev, &dpci, 0, &pool->descpool) != VK_SUCCESS) {
- debug_printf("vkCreateDescriptorPool failed\n");
- goto fail;
- }
-
- return pool;
-fail:
- descriptor_pool_free(screen, pool);
- return NULL;
-}
-
static VkDescriptorSetLayout
descriptor_layout_create(struct zink_screen *screen, enum zink_descriptor_type t, VkDescriptorSetLayoutBinding *bindings, unsigned num_bindings)
{
@@ -343,16 +45,17 @@ descriptor_layout_create(struct zink_screen *screen, enum zink_descriptor_type t
dcslci.pNext = NULL;
VkDescriptorSetLayoutBindingFlagsCreateInfo fci = {0};
VkDescriptorBindingFlags flags[ZINK_MAX_DESCRIPTORS_PER_TYPE];
- if (screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY) {
- dcslci.pNext = &fci;
- if (t == ZINK_DESCRIPTOR_TYPES)
- dcslci.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
- fci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO;
- fci.bindingCount = num_bindings;
- fci.pBindingFlags = flags;
- for (unsigned i = 0; i < num_bindings; i++) {
- flags[i] = 0;
- }
+ dcslci.pNext = &fci;
+ /* TODO bindless */
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && t != ZINK_DESCRIPTOR_BINDLESS)
+ dcslci.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
+ else if (t == ZINK_DESCRIPTOR_TYPE_UNIFORMS)
+ dcslci.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
+ fci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO;
+ fci.bindingCount = num_bindings;
+ fci.pBindingFlags = flags;
+ for (unsigned i = 0; i < num_bindings; i++) {
+ flags[i] = 0;
}
dcslci.bindingCount = num_bindings;
dcslci.pBindings = bindings;
@@ -367,8 +70,9 @@ descriptor_layout_create(struct zink_screen *screen, enum zink_descriptor_type t
return VK_NULL_HANDLE;
}
}
- if (VKSCR(CreateDescriptorSetLayout)(screen->dev, &dcslci, 0, &dsl) != VK_SUCCESS)
- debug_printf("vkCreateDescriptorSetLayout failed\n");
+ VkResult result = VKSCR(CreateDescriptorSetLayout)(screen->dev, &dcslci, 0, &dsl);
+ if (result != VK_SUCCESS)
+ mesa_loge("ZINK: vkCreateDescriptorSetLayout failed (%s)", vk_Result_to_str(result));
return dsl;
}
@@ -377,8 +81,10 @@ hash_descriptor_layout(const void *key)
{
uint32_t hash = 0;
const struct zink_descriptor_layout_key *k = key;
- hash = XXH32(&k->num_descriptors, sizeof(unsigned), hash);
- hash = XXH32(k->bindings, k->num_descriptors * sizeof(VkDescriptorSetLayoutBinding), hash);
+ hash = XXH32(&k->num_bindings, sizeof(unsigned), hash);
+ /* only hash first 3 members: no holes and the rest are always constant */
+ for (unsigned i = 0; i < k->num_bindings; i++)
+ hash = XXH32(&k->bindings[i], offsetof(VkDescriptorSetLayoutBinding, stageFlags), hash);
return hash;
}
@@ -388,113 +94,158 @@ equals_descriptor_layout(const void *a, const void *b)
{
const struct zink_descriptor_layout_key *a_k = a;
const struct zink_descriptor_layout_key *b_k = b;
- return a_k->num_descriptors == b_k->num_descriptors &&
- !memcmp(a_k->bindings, b_k->bindings, a_k->num_descriptors * sizeof(VkDescriptorSetLayoutBinding));
+ return a_k->num_bindings == b_k->num_bindings &&
+ (!a_k->num_bindings || !memcmp(a_k->bindings, b_k->bindings, a_k->num_bindings * sizeof(VkDescriptorSetLayoutBinding)));
}
static struct zink_descriptor_layout *
-create_layout(struct zink_context *ctx, enum zink_descriptor_type type,
+create_layout(struct zink_screen *screen, enum zink_descriptor_type type,
VkDescriptorSetLayoutBinding *bindings, unsigned num_bindings,
struct zink_descriptor_layout_key **layout_key)
{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- VkDescriptorSetLayout dsl = descriptor_layout_create(screen, type, bindings, MAX2(num_bindings, 1));
+ VkDescriptorSetLayout dsl = descriptor_layout_create(screen, type, bindings, num_bindings);
if (!dsl)
return NULL;
- struct zink_descriptor_layout_key *k = ralloc(ctx, struct zink_descriptor_layout_key);
- k->use_count = 0;
- k->num_descriptors = num_bindings;
- size_t bindings_size = MAX2(num_bindings, 1) * sizeof(VkDescriptorSetLayoutBinding);
- k->bindings = ralloc_size(k, bindings_size);
- if (!k->bindings) {
- ralloc_free(k);
- VKSCR(DestroyDescriptorSetLayout)(screen->dev, dsl, NULL);
- return NULL;
+ size_t bindings_size = num_bindings * sizeof(VkDescriptorSetLayoutBinding);
+ struct zink_descriptor_layout_key *k = ralloc_size(screen, sizeof(struct zink_descriptor_layout_key) + bindings_size);
+ k->num_bindings = num_bindings;
+ if (num_bindings) {
+ k->bindings = (void *)(k + 1);
+ memcpy(k->bindings, bindings, bindings_size);
}
- memcpy(k->bindings, bindings, bindings_size);
- struct zink_descriptor_layout *layout = rzalloc(ctx, struct zink_descriptor_layout);
+ struct zink_descriptor_layout *layout = rzalloc(screen, struct zink_descriptor_layout);
layout->layout = dsl;
*layout_key = k;
return layout;
}
-struct zink_descriptor_layout *
-zink_descriptor_util_layout_get(struct zink_context *ctx, enum zink_descriptor_type type,
+static struct zink_descriptor_layout *
+descriptor_util_layout_get(struct zink_screen *screen, enum zink_descriptor_type type,
VkDescriptorSetLayoutBinding *bindings, unsigned num_bindings,
struct zink_descriptor_layout_key **layout_key)
{
uint32_t hash = 0;
struct zink_descriptor_layout_key key = {
- .num_descriptors = num_bindings,
+ .num_bindings = num_bindings,
.bindings = bindings,
};
- VkDescriptorSetLayoutBinding null_binding;
- if (!bindings) {
- null_binding.binding = 0;
- null_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- null_binding.descriptorCount = 1;
- null_binding.pImmutableSamplers = NULL;
- null_binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
- VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
- VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_COMPUTE_BIT;
- key.bindings = &null_binding;
- }
-
- if (type != ZINK_DESCRIPTOR_TYPES) {
+ /* push descriptor layouts are unique and can't be reused */
+ if (type != ZINK_DESCRIPTOR_TYPE_UNIFORMS) {
hash = hash_descriptor_layout(&key);
- struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&ctx->desc_set_layouts[type], hash, &key);
+ simple_mtx_lock(&screen->desc_set_layouts_lock);
+ struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&screen->desc_set_layouts[type], hash, &key);
+ simple_mtx_unlock(&screen->desc_set_layouts_lock);
if (he) {
*layout_key = (void*)he->key;
return he->data;
}
}
- struct zink_descriptor_layout *layout = create_layout(ctx, type, bindings ? bindings : &null_binding, num_bindings, layout_key);
- if (layout && type != ZINK_DESCRIPTOR_TYPES) {
- _mesa_hash_table_insert_pre_hashed(&ctx->desc_set_layouts[type], hash, *layout_key, layout);
+ struct zink_descriptor_layout *layout = create_layout(screen, type, bindings, num_bindings, layout_key);
+ if (layout && type != ZINK_DESCRIPTOR_TYPE_UNIFORMS) {
+ simple_mtx_lock(&screen->desc_set_layouts_lock);
+ _mesa_hash_table_insert_pre_hashed(&screen->desc_set_layouts[type], hash, *layout_key, layout);
+ simple_mtx_unlock(&screen->desc_set_layouts_lock);
}
return layout;
}
+
+static uint32_t
+hash_descriptor_pool_key(const void *key)
+{
+ uint32_t hash = 0;
+ const struct zink_descriptor_pool_key *k = key;
+ hash = XXH32(&k->layout, sizeof(void*), hash);
+ for (unsigned i = 0; i < k->num_type_sizes; i++)
+ hash = XXH32(&k->sizes[i], sizeof(VkDescriptorPoolSize), hash);
+
+ return hash;
+}
+
+static bool
+equals_descriptor_pool_key(const void *a, const void *b)
+{
+ const struct zink_descriptor_pool_key *a_k = a;
+ const struct zink_descriptor_pool_key *b_k = b;
+ const unsigned a_num_type_sizes = a_k->num_type_sizes;
+ const unsigned b_num_type_sizes = b_k->num_type_sizes;
+ return a_k->layout == b_k->layout &&
+ a_num_type_sizes == b_num_type_sizes &&
+ !memcmp(a_k->sizes, b_k->sizes, b_num_type_sizes * sizeof(VkDescriptorPoolSize));
+}
+
+static struct zink_descriptor_pool_key *
+descriptor_util_pool_key_get(struct zink_screen *screen, enum zink_descriptor_type type,
+ struct zink_descriptor_layout_key *layout_key,
+ VkDescriptorPoolSize *sizes, unsigned num_type_sizes)
+{
+ uint32_t hash = 0;
+ struct zink_descriptor_pool_key key;
+ key.num_type_sizes = num_type_sizes;
+ /* push descriptor pools can't be shared/reused by other types */
+ if (type != ZINK_DESCRIPTOR_TYPE_UNIFORMS) {
+ key.layout = layout_key;
+ memcpy(key.sizes, sizes, num_type_sizes * sizeof(VkDescriptorPoolSize));
+ hash = hash_descriptor_pool_key(&key);
+ simple_mtx_lock(&screen->desc_pool_keys_lock);
+ struct set_entry *he = _mesa_set_search_pre_hashed(&screen->desc_pool_keys[type], hash, &key);
+ simple_mtx_unlock(&screen->desc_pool_keys_lock);
+ if (he)
+ return (void*)he->key;
+ }
+
+ struct zink_descriptor_pool_key *pool_key = rzalloc(screen, struct zink_descriptor_pool_key);
+ pool_key->layout = layout_key;
+ pool_key->num_type_sizes = num_type_sizes;
+ assert(pool_key->num_type_sizes);
+ memcpy(pool_key->sizes, sizes, num_type_sizes * sizeof(VkDescriptorPoolSize));
+ if (type != ZINK_DESCRIPTOR_TYPE_UNIFORMS) {
+ simple_mtx_lock(&screen->desc_pool_keys_lock);
+ _mesa_set_add_pre_hashed(&screen->desc_pool_keys[type], hash, pool_key);
+ pool_key->id = screen->desc_pool_keys[type].entries - 1;
+ simple_mtx_unlock(&screen->desc_pool_keys_lock);
+ }
+ return pool_key;
+}
+
static void
init_push_binding(VkDescriptorSetLayoutBinding *binding, unsigned i, VkDescriptorType type)
{
- binding->binding = tgsi_processor_to_shader_stage(i);
+ binding->binding = i;
binding->descriptorType = type;
binding->descriptorCount = 1;
- binding->stageFlags = zink_shader_stage(i);
+ binding->stageFlags = mesa_to_vk_shader_stage(i);
binding->pImmutableSamplers = NULL;
}
static VkDescriptorType
get_push_types(struct zink_screen *screen, enum zink_descriptor_type *dsl_type)
{
- *dsl_type = screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY &&
- screen->info.have_KHR_push_descriptor ? ZINK_DESCRIPTOR_TYPES : ZINK_DESCRIPTOR_TYPE_UBO;
- return screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY ?
- VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
+ *dsl_type = screen->info.have_KHR_push_descriptor ? ZINK_DESCRIPTOR_TYPE_UNIFORMS : ZINK_DESCRIPTOR_TYPE_UBO;
+ return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
}
static struct zink_descriptor_layout *
create_gfx_layout(struct zink_context *ctx, struct zink_descriptor_layout_key **layout_key, bool fbfetch)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
- VkDescriptorSetLayoutBinding bindings[PIPE_SHADER_TYPES];
+ VkDescriptorSetLayoutBinding bindings[MESA_SHADER_STAGES];
enum zink_descriptor_type dsl_type;
VkDescriptorType vktype = get_push_types(screen, &dsl_type);
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++)
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
init_push_binding(&bindings[i], i, vktype);
if (fbfetch) {
- bindings[ZINK_SHADER_COUNT].binding = ZINK_FBFETCH_BINDING;
- bindings[ZINK_SHADER_COUNT].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
- bindings[ZINK_SHADER_COUNT].descriptorCount = 1;
- bindings[ZINK_SHADER_COUNT].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
- bindings[ZINK_SHADER_COUNT].pImmutableSamplers = NULL;
+ bindings[ZINK_GFX_SHADER_COUNT].binding = ZINK_FBFETCH_BINDING;
+ bindings[ZINK_GFX_SHADER_COUNT].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
+ bindings[ZINK_GFX_SHADER_COUNT].descriptorCount = 1;
+ bindings[ZINK_GFX_SHADER_COUNT].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
+ bindings[ZINK_GFX_SHADER_COUNT].pImmutableSamplers = NULL;
}
- return create_layout(ctx, dsl_type, bindings, fbfetch ? ARRAY_SIZE(bindings) : ARRAY_SIZE(bindings) - 1, layout_key);
+ return create_layout(screen, dsl_type, bindings, fbfetch ? ARRAY_SIZE(bindings) : ARRAY_SIZE(bindings) - 1, layout_key);
}
bool
@@ -504,106 +255,42 @@ zink_descriptor_util_push_layouts_get(struct zink_context *ctx, struct zink_desc
VkDescriptorSetLayoutBinding compute_binding;
enum zink_descriptor_type dsl_type;
VkDescriptorType vktype = get_push_types(screen, &dsl_type);
- init_push_binding(&compute_binding, PIPE_SHADER_COMPUTE, vktype);
+ init_push_binding(&compute_binding, MESA_SHADER_COMPUTE, vktype);
dsls[0] = create_gfx_layout(ctx, &layout_keys[0], false);
- dsls[1] = create_layout(ctx, dsl_type, &compute_binding, 1, &layout_keys[1]);
+ dsls[1] = create_layout(screen, dsl_type, &compute_binding, 1, &layout_keys[1]);
return dsls[0] && dsls[1];
}
-void
-zink_descriptor_util_init_null_set(struct zink_context *ctx, VkDescriptorSet desc_set)
-{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- VkDescriptorBufferInfo push_info;
- VkWriteDescriptorSet push_wd;
- push_wd.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
- push_wd.pNext = NULL;
- push_wd.dstBinding = 0;
- push_wd.dstArrayElement = 0;
- push_wd.descriptorCount = 1;
- push_wd.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- push_wd.dstSet = desc_set;
- push_wd.pBufferInfo = &push_info;
- push_info.buffer = screen->info.rb2_feats.nullDescriptor ?
- VK_NULL_HANDLE :
- zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
- push_info.offset = 0;
- push_info.range = VK_WHOLE_SIZE;
- VKSCR(UpdateDescriptorSets)(screen->dev, 1, &push_wd, 0, NULL);
-}
-
VkImageLayout
-zink_descriptor_util_image_layout_eval(const struct zink_resource *res, bool is_compute)
-{
- return res->image_bind_count[is_compute] ? VK_IMAGE_LAYOUT_GENERAL :
- res->aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) ?
- //Vulkan-Docs#1490
- //(res->aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL :
- //res->aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL :
- (res->aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
- res->aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
- VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL) :
- VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-}
-
-static uint32_t
-hash_descriptor_pool(const void *key)
-{
- uint32_t hash = 0;
- const struct zink_descriptor_pool_key *k = key;
- hash = XXH32(&k->num_type_sizes, sizeof(unsigned), hash);
- hash = XXH32(&k->layout, sizeof(k->layout), hash);
- hash = XXH32(k->sizes, k->num_type_sizes * sizeof(VkDescriptorPoolSize), hash);
-
- return hash;
-}
-
-static bool
-equals_descriptor_pool(const void *a, const void *b)
-{
- const struct zink_descriptor_pool_key *a_k = a;
- const struct zink_descriptor_pool_key *b_k = b;
- return a_k->num_type_sizes == b_k->num_type_sizes &&
- a_k->layout == b_k->layout &&
- !memcmp(a_k->sizes, b_k->sizes, a_k->num_type_sizes * sizeof(VkDescriptorPoolSize));
-}
-
-static struct zink_descriptor_pool *
-descriptor_pool_get(struct zink_context *ctx, enum zink_descriptor_type type,
- struct zink_descriptor_layout_key *layout_key, VkDescriptorPoolSize *sizes, unsigned num_type_sizes)
+zink_descriptor_util_image_layout_eval(const struct zink_context *ctx, const struct zink_resource *res, bool is_compute)
{
- uint32_t hash = 0;
- if (type != ZINK_DESCRIPTOR_TYPES) {
- struct zink_descriptor_pool_key key = {
- .layout = layout_key,
- .num_type_sizes = num_type_sizes,
- .sizes = sizes,
- };
-
- hash = hash_descriptor_pool(&key);
- struct hash_entry *he = _mesa_hash_table_search_pre_hashed(ctx->dd->descriptor_pools[type], hash, &key);
- if (he)
- return (void*)he->data;
+ if (res->bindless[0] || res->bindless[1]) {
+ /* bindless needs most permissive layout */
+ if (res->image_bind_count[0] || res->image_bind_count[1])
+ return VK_IMAGE_LAYOUT_GENERAL;
+ return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
- struct zink_descriptor_pool *pool = descriptor_pool_create(zink_screen(ctx->base.screen), type, layout_key, sizes, num_type_sizes);
- if (type != ZINK_DESCRIPTOR_TYPES)
- _mesa_hash_table_insert_pre_hashed(ctx->dd->descriptor_pools[type], hash, &pool->key, pool);
- return pool;
-}
-
-static bool
-get_invalidated_desc_set(struct zink_descriptor_set *zds)
-{
- if (!zds->invalid)
- return false;
- return p_atomic_read(&zds->reference.count) == 1;
+ if (res->image_bind_count[is_compute])
+ return VK_IMAGE_LAYOUT_GENERAL;
+ if (!is_compute && res->fb_bind_count && res->sampler_bind_count[0]) {
+ /* feedback loop */
+ if (!(res->obj->vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) || zink_is_zsbuf_write(ctx)) {
+ if (zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_layout)
+ return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
+ return VK_IMAGE_LAYOUT_GENERAL;
+ }
+ }
+ if (res->obj->vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)
+ return VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
+ return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
bool
zink_descriptor_util_alloc_sets(struct zink_screen *screen, VkDescriptorSetLayout dsl, VkDescriptorPool pool, VkDescriptorSet *sets, unsigned num_sets)
{
VkDescriptorSetAllocateInfo dsai;
- VkDescriptorSetLayout *layouts = alloca(sizeof(*layouts) * num_sets);
+ VkDescriptorSetLayout layouts[100];
+ assert(num_sets <= ARRAY_SIZE(layouts));
memset((void *)&dsai, 0, sizeof(dsai));
dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
dsai.pNext = NULL;
@@ -613,1142 +300,1559 @@ zink_descriptor_util_alloc_sets(struct zink_screen *screen, VkDescriptorSetLayou
layouts[i] = dsl;
dsai.pSetLayouts = layouts;
- if (VKSCR(AllocateDescriptorSets)(screen->dev, &dsai, sets) != VK_SUCCESS) {
- debug_printf("ZINK: %" PRIu64 " failed to allocate descriptor set :/\n", (uint64_t)dsl);
+ VkResult result = VKSCR(AllocateDescriptorSets)(screen->dev, &dsai, sets);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: %" PRIu64 " failed to allocate descriptor set :/ (%s)", (uint64_t)dsl, vk_Result_to_str(result));
return false;
}
return true;
}
-unsigned
-zink_descriptor_program_num_sizes(struct zink_program *pg, enum zink_descriptor_type type)
+static void
+init_db_template_entry(struct zink_screen *screen, struct zink_shader *shader, enum zink_descriptor_type type,
+ unsigned idx, struct zink_descriptor_template *entry, unsigned *entry_idx)
+{
+ int index = shader->bindings[type][idx].index;
+ gl_shader_stage stage = clamp_stage(&shader->info);
+ entry->count = shader->bindings[type][idx].size;
+
+ switch (shader->bindings[type][idx].type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ entry->offset = offsetof(struct zink_context, di.db.ubos[stage][index]);
+ entry->stride = sizeof(VkDescriptorAddressInfoEXT);
+ entry->db_size = screen->info.db_props.robustUniformBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ entry->offset = offsetof(struct zink_context, di.textures[stage][index]);
+ entry->stride = sizeof(VkDescriptorImageInfo);
+ entry->db_size = screen->info.db_props.combinedImageSamplerDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ entry->offset = offsetof(struct zink_context, di.textures[stage][index]);
+ entry->stride = sizeof(VkDescriptorImageInfo);
+ entry->db_size = screen->info.db_props.sampledImageDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ entry->offset = offsetof(struct zink_context, di.textures[stage][index]);
+ entry->stride = sizeof(VkDescriptorImageInfo);
+ entry->db_size = screen->info.db_props.samplerDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ entry->offset = offsetof(struct zink_context, di.db.tbos[stage][index]);
+ entry->stride = sizeof(VkDescriptorAddressInfoEXT);
+ entry->db_size = screen->info.db_props.robustUniformTexelBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ entry->offset = offsetof(struct zink_context, di.db.ssbos[stage][index]);
+ entry->stride = sizeof(VkDescriptorAddressInfoEXT);
+ entry->db_size = screen->info.db_props.robustStorageBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ entry->offset = offsetof(struct zink_context, di.images[stage][index]);
+ entry->stride = sizeof(VkDescriptorImageInfo);
+ entry->db_size = screen->info.db_props.storageImageDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ entry->offset = offsetof(struct zink_context, di.db.texel_images[stage][index]);
+ entry->stride = sizeof(VkDescriptorAddressInfoEXT);
+ entry->db_size = screen->info.db_props.robustStorageTexelBufferDescriptorSize;
+ break;
+ default:
+ unreachable("unknown type");
+ }
+ (*entry_idx)++;
+}
+
+static void
+init_template_entry(struct zink_shader *shader, enum zink_descriptor_type type,
+ unsigned idx, VkDescriptorUpdateTemplateEntry *entry, unsigned *entry_idx)
+{
+ int index = shader->bindings[type][idx].index;
+ gl_shader_stage stage = clamp_stage(&shader->info);
+ entry->dstArrayElement = 0;
+ entry->dstBinding = shader->bindings[type][idx].binding;
+ entry->descriptorCount = shader->bindings[type][idx].size;
+ if (shader->bindings[type][idx].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
+ /* filter out DYNAMIC type here since this is just the uniform set */
+ entry->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+ else
+ entry->descriptorType = shader->bindings[type][idx].type;
+ switch (shader->bindings[type][idx].type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ entry->offset = offsetof(struct zink_context, di.t.ubos[stage][index]);
+ entry->stride = sizeof(VkDescriptorBufferInfo);
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ entry->offset = offsetof(struct zink_context, di.textures[stage][index]);
+ entry->stride = sizeof(VkDescriptorImageInfo);
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ entry->offset = offsetof(struct zink_context, di.t.tbos[stage][index]);
+ entry->stride = sizeof(VkBufferView);
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ entry->offset = offsetof(struct zink_context, di.t.ssbos[stage][index]);
+ entry->stride = sizeof(VkDescriptorBufferInfo);
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ entry->offset = offsetof(struct zink_context, di.images[stage][index]);
+ entry->stride = sizeof(VkDescriptorImageInfo);
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ entry->offset = offsetof(struct zink_context, di.t.texel_images[stage][index]);
+ entry->stride = sizeof(VkBufferView);
+ break;
+ default:
+ unreachable("unknown type");
+ }
+ (*entry_idx)++;
+}
+
+static void
+init_program_db(struct zink_screen *screen, struct zink_program *pg, enum zink_descriptor_type type, VkDescriptorSetLayoutBinding *bindings, unsigned num_bindings, VkDescriptorSetLayout dsl)
+{
+ VkDeviceSize val;
+ VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, dsl, &val);
+ pg->dd.db_size[type] = val;
+ pg->dd.db_offset[type] = rzalloc_array(pg, uint32_t, num_bindings);
+ for (unsigned i = 0; i < num_bindings; i++) {
+ VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, dsl, bindings[i].binding, &val);
+ pg->dd.db_offset[type][i] = val;
+ }
+}
+
+static uint16_t
+descriptor_program_num_sizes(VkDescriptorPoolSize *sizes, enum zink_descriptor_type type)
{
switch (type) {
case ZINK_DESCRIPTOR_TYPE_UBO:
- return 1;
+ return !!sizes[ZDS_INDEX_UBO].descriptorCount;
case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
- return !!pg->dd->sizes[ZDS_INDEX_COMBINED_SAMPLER].descriptorCount +
- !!pg->dd->sizes[ZDS_INDEX_UNIFORM_TEXELS].descriptorCount;
+ return !!sizes[ZDS_INDEX_COMBINED_SAMPLER].descriptorCount +
+ !!sizes[ZDS_INDEX_UNIFORM_TEXELS].descriptorCount +
+ !!sizes[ZDS_INDEX_SAMPLER].descriptorCount;
case ZINK_DESCRIPTOR_TYPE_SSBO:
- return 1;
+ return !!sizes[ZDS_INDEX_STORAGE_BUFFER].descriptorCount;
case ZINK_DESCRIPTOR_TYPE_IMAGE:
- return !!pg->dd->sizes[ZDS_INDEX_STORAGE_IMAGE].descriptorCount +
- !!pg->dd->sizes[ZDS_INDEX_STORAGE_TEXELS].descriptorCount;
+ return !!sizes[ZDS_INDEX_STORAGE_IMAGE].descriptorCount +
+ !!sizes[ZDS_INDEX_STORAGE_TEXELS].descriptorCount;
default: break;
}
unreachable("unknown type");
}
-static struct zink_descriptor_set *
-allocate_desc_set(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, unsigned descs_used, bool is_compute)
+static uint16_t
+descriptor_program_num_sizes_compact(VkDescriptorPoolSize *sizes, unsigned desc_set)
{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- bool push_set = type == ZINK_DESCRIPTOR_TYPES;
- struct zink_descriptor_pool *pool = push_set ? ctx->dd->push_pool[is_compute] : pdd_cached(pg)->pool[type];
-#define DESC_BUCKET_FACTOR 10
- unsigned bucket_size = pool->key.layout->num_descriptors ? DESC_BUCKET_FACTOR : 1;
- if (pool->key.layout->num_descriptors) {
- for (unsigned desc_factor = DESC_BUCKET_FACTOR; desc_factor < descs_used; desc_factor *= DESC_BUCKET_FACTOR)
- bucket_size = desc_factor;
- }
- /* never grow more than this many at a time */
- bucket_size = MIN2(bucket_size, ZINK_DEFAULT_MAX_DESCS);
- VkDescriptorSet *desc_set = alloca(sizeof(*desc_set) * bucket_size);
- if (!zink_descriptor_util_alloc_sets(screen, push_set ? ctx->dd->push_dsl[is_compute]->layout : pg->dsl[type + 1], pool->descpool, desc_set, bucket_size))
- return VK_NULL_HANDLE;
-
- struct zink_descriptor_set *alloc = ralloc_array(pool, struct zink_descriptor_set, bucket_size);
- assert(alloc);
- unsigned num_resources = pool->num_resources;
- struct zink_resource_object **res_objs = NULL;
- void **samplers = NULL;
- struct zink_descriptor_surface *surfaces = NULL;
- switch (type) {
+ switch (desc_set) {
+ case ZINK_DESCRIPTOR_TYPE_UBO:
+ return !!sizes[ZDS_INDEX_COMP_UBO].descriptorCount + !!sizes[ZDS_INDEX_COMP_STORAGE_BUFFER].descriptorCount;
case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
- samplers = rzalloc_array(pool, void*, num_resources * bucket_size);
- assert(samplers);
- FALLTHROUGH;
+ return !!sizes[ZDS_INDEX_COMP_COMBINED_SAMPLER].descriptorCount +
+ !!sizes[ZDS_INDEX_COMP_UNIFORM_TEXELS].descriptorCount +
+ !!sizes[ZDS_INDEX_COMP_SAMPLER].descriptorCount +
+ !!sizes[ZDS_INDEX_COMP_STORAGE_IMAGE].descriptorCount +
+ !!sizes[ZDS_INDEX_COMP_STORAGE_TEXELS].descriptorCount;
+ case ZINK_DESCRIPTOR_TYPE_SSBO:
case ZINK_DESCRIPTOR_TYPE_IMAGE:
- surfaces = rzalloc_array(pool, struct zink_descriptor_surface, num_resources * bucket_size);
- assert(surfaces);
- break;
- default:
- res_objs = rzalloc_array(pool, struct zink_resource_object*, num_resources * bucket_size);
- assert(res_objs);
- break;
- }
- for (unsigned i = 0; i < bucket_size; i ++) {
- struct zink_descriptor_set *zds = &alloc[i];
- pipe_reference_init(&zds->reference, 1);
- zds->pool = pool;
- zds->hash = 0;
- zds->batch_uses = NULL;
- zds->invalid = true;
- zds->punted = zds->recycled = false;
-#ifndef NDEBUG
- zds->num_resources = num_resources;
-#endif
- switch (type) {
- case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
- zds->sampler_states = (struct zink_sampler_state**)&samplers[i * pool->key.layout->num_descriptors];
- FALLTHROUGH;
- case ZINK_DESCRIPTOR_TYPE_IMAGE:
- zds->surfaces = &surfaces[i * pool->key.layout->num_descriptors];
- break;
- default:
- zds->res_objs = (struct zink_resource_object**)&res_objs[i * pool->key.layout->num_descriptors];
- break;
- }
- zds->desc_set = desc_set[i];
- if (i > 0)
- util_dynarray_append(&pool->alloc_desc_sets, struct zink_descriptor_set *, zds);
+ default: break;
}
- pool->num_sets_allocated += bucket_size;
- return alloc;
+ unreachable("unknown type");
}
-static void
-populate_zds_key(struct zink_context *ctx, enum zink_descriptor_type type, bool is_compute,
- struct zink_descriptor_state_key *key, uint32_t push_usage)
+/* create all the descriptor objects for a program:
+ * called during program creation
+ * may be called from threads (no unsafe ctx use!)
+ */
+bool
+zink_descriptor_program_init(struct zink_context *ctx, struct zink_program *pg)
{
- if (is_compute) {
- for (unsigned i = 1; i < ZINK_SHADER_COUNT; i++)
- key->exists[i] = false;
- key->exists[0] = true;
- if (type == ZINK_DESCRIPTOR_TYPES)
- key->state[0] = ctx->dd->push_state[is_compute];
- else
- key->state[0] = ctx->dd->descriptor_states[is_compute].state[type];
- } else if (type == ZINK_DESCRIPTOR_TYPES) {
- /* gfx only */
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) {
- if (push_usage & BITFIELD_BIT(i)) {
- key->exists[i] = true;
- key->state[i] = ctx->dd->gfx_push_state[i];
- } else
- key->exists[i] = false;
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ VkDescriptorSetLayoutBinding bindings[ZINK_DESCRIPTOR_BASE_TYPES][MESA_SHADER_STAGES * 64];
+ VkDescriptorUpdateTemplateEntry entries[ZINK_DESCRIPTOR_BASE_TYPES][MESA_SHADER_STAGES * 64];
+ unsigned num_bindings[ZINK_DESCRIPTOR_BASE_TYPES] = {0};
+ uint8_t has_bindings = 0;
+ unsigned push_count = 0;
+ uint16_t num_type_sizes[ZINK_DESCRIPTOR_BASE_TYPES];
+ VkDescriptorPoolSize sizes[ZDS_INDEX_MAX] = {0}; //zink_descriptor_size_index
+
+ struct zink_shader **stages;
+ if (pg->is_compute)
+ stages = &((struct zink_compute_program*)pg)->shader;
+ else
+ stages = ((struct zink_gfx_program*)pg)->shaders;
+
+ if (!pg->is_compute && stages[MESA_SHADER_FRAGMENT]->info.fs.uses_fbfetch_output) {
+ push_count = 1;
+ pg->dd.fbfetch = true;
+ }
+
+ unsigned entry_idx[ZINK_DESCRIPTOR_BASE_TYPES] = {0};
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ unsigned desc_set_size[ZINK_DESCRIPTOR_BASE_TYPES];
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
+ desc_set_size[i] = zink_program_num_bindings_typed(pg, i);
+ if (screen->compact_descriptors) {
+ desc_set_size[ZINK_DESCRIPTOR_TYPE_UBO] += desc_set_size[ZINK_DESCRIPTOR_TYPE_SSBO];
+ desc_set_size[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] += desc_set_size[ZINK_DESCRIPTOR_TYPE_IMAGE];
+ desc_set_size[ZINK_DESCRIPTOR_TYPE_SSBO] = 0;
+ desc_set_size[ZINK_DESCRIPTOR_TYPE_IMAGE] = 0;
}
- } else {
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) {
- key->exists[i] = ctx->dd->gfx_descriptor_states[i].valid[type];
- key->state[i] = ctx->dd->gfx_descriptor_states[i].state[type];
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) {
+ if (desc_set_size[i])
+ pg->dd.db_template[i] = rzalloc_array(pg, struct zink_descriptor_template, desc_set_size[i]);
}
}
-}
-static void
-punt_invalid_set(struct zink_descriptor_set *zds, struct hash_entry *he)
-{
- /* this is no longer usable, so we punt it for now until it gets recycled */
- assert(!zds->recycled);
- if (!he)
- he = _mesa_hash_table_search_pre_hashed(zds->pool->desc_sets, zds->hash, &zds->key);
- _mesa_hash_table_remove(zds->pool->desc_sets, he);
- zds->punted = true;
-}
+ unsigned num_shaders = pg->is_compute ? 1 : ZINK_GFX_SHADER_COUNT;
+ bool have_push = screen->info.have_KHR_push_descriptor;
+ /* iterate over the shaders and generate binding/layout/template structs */
+ for (int i = 0; i < num_shaders; i++) {
+ struct zink_shader *shader = stages[i];
+ if (!shader)
+ continue;
-static struct zink_descriptor_set *
-zink_descriptor_set_get(struct zink_context *ctx,
- enum zink_descriptor_type type,
- bool is_compute,
- bool *cache_hit)
-{
- *cache_hit = false;
- struct zink_descriptor_set *zds;
- struct zink_program *pg = is_compute ? (struct zink_program *)ctx->curr_compute : (struct zink_program *)ctx->curr_program;
- struct zink_batch *batch = &ctx->batch;
- bool push_set = type == ZINK_DESCRIPTOR_TYPES;
- struct zink_descriptor_pool *pool = push_set ? ctx->dd->push_pool[is_compute] : pdd_cached(pg)->pool[type];
- unsigned descs_used = 1;
- assert(type <= ZINK_DESCRIPTOR_TYPES);
-
- assert(pool->key.layout->num_descriptors);
- uint32_t hash = push_set ? ctx->dd->push_state[is_compute] :
- ctx->dd->descriptor_states[is_compute].state[type];
-
- struct zink_descriptor_set *last_set = push_set ? ctx->dd->last_set[is_compute] : pdd_cached(pg)->last_set[type];
- /* if the current state hasn't changed since the last time it was used,
- * it's impossible for this set to not be valid, which means that an
- * early return here can be done safely and with no locking
- */
- if (last_set && ((push_set && !ctx->dd->changed[is_compute][ZINK_DESCRIPTOR_TYPES]) ||
- (!push_set && !ctx->dd->changed[is_compute][type]))) {
- *cache_hit = true;
- return last_set;
- }
-
- struct zink_descriptor_state_key key;
- populate_zds_key(ctx, type, is_compute, &key, pg->dd->push_usage);
-
- simple_mtx_lock(&pool->mtx);
- if (last_set && last_set->hash == hash && desc_state_equal(&last_set->key, &key)) {
- zds = last_set;
- *cache_hit = !zds->invalid;
- if (zds->recycled) {
- struct hash_entry *he = _mesa_hash_table_search_pre_hashed(pool->free_desc_sets, hash, &key);
- if (he)
- _mesa_hash_table_remove(pool->free_desc_sets, he);
- zds->recycled = false;
+ gl_shader_stage stage = clamp_stage(&shader->info);
+ VkShaderStageFlagBits stage_flags = mesa_to_vk_shader_stage(stage);
+ /* uniform ubos handled in push */
+ if (shader->has_uniforms) {
+ pg->dd.push_usage |= BITFIELD64_BIT(stage);
+ push_count++;
}
- if (zds->invalid) {
- if (zink_batch_usage_exists(zds->batch_uses))
- punt_invalid_set(zds, NULL);
- else
- /* this set is guaranteed to be in pool->alloc_desc_sets */
- goto skip_hash_tables;
- zds = NULL;
+ for (int j = 0; j < ZINK_DESCRIPTOR_BASE_TYPES; j++) {
+ unsigned desc_type = screen->desc_set_id[j] - 1;
+ for (int k = 0; k < shader->num_bindings[j]; k++) {
+ assert(num_bindings[desc_type] < ARRAY_SIZE(bindings[desc_type]));
+ VkDescriptorSetLayoutBinding *binding = &bindings[desc_type][num_bindings[desc_type]];
+ binding->binding = shader->bindings[j][k].binding;
+ binding->descriptorType = shader->bindings[j][k].type;
+ binding->descriptorCount = shader->bindings[j][k].size;
+ binding->stageFlags = stage_flags;
+ binding->pImmutableSamplers = NULL;
+
+ unsigned idx = screen->compact_descriptors ? zink_vktype_to_size_idx_comp(shader->bindings[j][k].type) :
+ zink_vktype_to_size_idx(shader->bindings[j][k].type);
+ sizes[idx].descriptorCount += shader->bindings[j][k].size;
+ sizes[idx].type = shader->bindings[j][k].type;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ init_db_template_entry(screen, shader, j, k, &pg->dd.db_template[desc_type][entry_idx[desc_type]], &entry_idx[desc_type]);
+ else
+ init_template_entry(shader, j, k, &entries[desc_type][entry_idx[desc_type]], &entry_idx[desc_type]);
+ num_bindings[desc_type]++;
+ has_bindings |= BITFIELD_BIT(desc_type);
+ }
+ num_type_sizes[desc_type] = screen->compact_descriptors ?
+ descriptor_program_num_sizes_compact(sizes, desc_type) :
+ descriptor_program_num_sizes(sizes, j);
}
- if (zds)
- goto out;
- }
-
- struct hash_entry *he = _mesa_hash_table_search_pre_hashed(pool->desc_sets, hash, &key);
- bool recycled = false, punted = false;
- if (he) {
- zds = (void*)he->data;
- if (zds->invalid && zink_batch_usage_exists(zds->batch_uses)) {
- punt_invalid_set(zds, he);
- zds = NULL;
- punted = true;
- }
+ pg->dd.bindless |= shader->bindless;
+ }
+ pg->dd.binding_usage = has_bindings;
+ if (!has_bindings && !push_count && !pg->dd.bindless) {
+ pg->layout = zink_pipeline_layout_create(screen, pg->dsl, pg->num_dsl, pg->is_compute, 0);
+ if (pg->layout)
+ pg->compat_id = _mesa_hash_data(pg->dsl, pg->num_dsl * sizeof(pg->dsl[0]));
+ return !!pg->layout;
}
- if (!he) {
- he = _mesa_hash_table_search_pre_hashed(pool->free_desc_sets, hash, &key);
- recycled = true;
- }
- if (he && !punted) {
- zds = (void*)he->data;
- *cache_hit = !zds->invalid;
- if (recycled) {
- /* need to migrate this entry back to the in-use hash */
- _mesa_hash_table_remove(pool->free_desc_sets, he);
- goto out;
+
+ pg->dsl[pg->num_dsl++] = push_count ? ctx->dd.push_dsl[pg->is_compute]->layout : ctx->dd.dummy_dsl->layout;
+ /* iterate over the found descriptor types and create layouts / pool keys */
+ if (has_bindings) {
+ for (unsigned i = 0; i < ARRAY_SIZE(sizes); i++)
+ sizes[i].descriptorCount *= MAX_LAZY_DESCRIPTORS;
+ u_foreach_bit(desc_type, has_bindings) {
+ /* descriptor sets must be bound contiguously, so add null sets for any that are "missing" */
+ for (unsigned i = 0; i < desc_type; i++) {
+ /* push set is always 0 */
+ if (!pg->dsl[i + 1]) {
+ /* inject a null dsl */
+ pg->dsl[pg->num_dsl++] = ctx->dd.dummy_dsl->layout;
+ pg->dd.binding_usage |= BITFIELD_BIT(i);
+ }
+ }
+ struct zink_descriptor_layout_key *key;
+ pg->dd.layouts[pg->num_dsl] = descriptor_util_layout_get(screen, desc_type, bindings[desc_type], num_bindings[desc_type], &key);
+ unsigned idx = screen->compact_descriptors ? zink_descriptor_type_to_size_idx_comp(desc_type) :
+ zink_descriptor_type_to_size_idx(desc_type);
+ /* some sets can have multiple descriptor types: ensure the size arrays for these types are contiguous for creating the pool key */
+ VkDescriptorPoolSize *sz = &sizes[idx];
+ VkDescriptorPoolSize sz2[5];
+ if (screen->compact_descriptors || (pg->is_compute && stages[0]->info.stage == MESA_SHADER_KERNEL)) {
+ unsigned found = 0;
+ while (found < num_type_sizes[desc_type]) {
+ if (sz->descriptorCount) {
+ memcpy(&sz2[found], sz, sizeof(VkDescriptorPoolSize));
+ found++;
+ }
+ sz++;
+ }
+ sz = sz2;
+ } else {
+ if (!sz->descriptorCount)
+ sz++;
+ }
+ pg->dd.pool_key[desc_type] = descriptor_util_pool_key_get(screen, desc_type, key, sz, num_type_sizes[desc_type]);
+ pg->dd.pool_key[desc_type]->use_count++;
+ pg->dsl[pg->num_dsl] = pg->dd.layouts[pg->num_dsl]->layout;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ init_program_db(screen, pg, desc_type, bindings[desc_type], num_bindings[desc_type], pg->dsl[pg->num_dsl]);
+ pg->num_dsl++;
}
- goto quick_out;
- }
-skip_hash_tables:
- if (util_dynarray_num_elements(&pool->alloc_desc_sets, struct zink_descriptor_set *)) {
- /* grab one off the allocated array */
- zds = util_dynarray_pop(&pool->alloc_desc_sets, struct zink_descriptor_set *);
- goto out;
- }
-
- if (_mesa_hash_table_num_entries(pool->free_desc_sets)) {
- /* try for an invalidated set first */
- unsigned count = 0;
- hash_table_foreach(pool->free_desc_sets, he) {
- struct zink_descriptor_set *tmp = he->data;
- if ((count++ >= 100 && tmp->reference.count == 1) || get_invalidated_desc_set(he->data)) {
- zds = tmp;
- assert(p_atomic_read(&zds->reference.count) == 1);
- descriptor_set_invalidate(zds);
- _mesa_hash_table_remove(pool->free_desc_sets, he);
- goto out;
+ }
+ /* TODO: make this dynamic so that bindless set id can be 0 if no other descriptors are used? */
+ if (pg->dd.bindless) {
+ unsigned desc_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
+ pg->num_dsl = desc_set + 1;
+ pg->dsl[desc_set] = screen->bindless_layout;
+ /* separate handling for null set injection when only bindless descriptors are used */
+ for (unsigned i = 0; i < desc_set; i++) {
+ if (!pg->dsl[i]) {
+ /* inject a null dsl */
+ pg->dsl[i] = ctx->dd.dummy_dsl->layout;
+ if (i != screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_UNIFORMS])
+ pg->dd.binding_usage |= BITFIELD_BIT(i);
}
}
+ /* all lower id sets are guaranteed to be used */
+ pg->dd.binding_usage |= BITFIELD_MASK(ZINK_DESCRIPTOR_BASE_TYPES);
}
- assert(pool->num_sets_allocated < ZINK_DEFAULT_MAX_DESCS);
-
- zds = allocate_desc_set(ctx, pg, type, descs_used, is_compute);
-out:
- if (unlikely(pool->num_sets_allocated >= ZINK_DEFAULT_DESC_CLAMP &&
- _mesa_hash_table_num_entries(pool->free_desc_sets) < ZINK_DEFAULT_MAX_DESCS - ZINK_DEFAULT_DESC_CLAMP))
- ctx->oom_flush = ctx->oom_stall = true;
- zds->hash = hash;
- populate_zds_key(ctx, type, is_compute, &zds->key, pg->dd->push_usage);
- zds->recycled = false;
- _mesa_hash_table_insert_pre_hashed(pool->desc_sets, hash, &zds->key, zds);
-quick_out:
- zds->punted = zds->invalid = false;
- batch_add_desc_set(batch, zds);
- if (push_set)
- ctx->dd->last_set[is_compute] = zds;
- else
- pdd_cached(pg)->last_set[type] = zds;
- simple_mtx_unlock(&pool->mtx);
+ pg->layout = zink_pipeline_layout_create(screen, pg->dsl, pg->num_dsl, pg->is_compute, 0);
+ if (!pg->layout)
+ return false;
+ pg->compat_id = _mesa_hash_data(pg->dsl, pg->num_dsl * sizeof(pg->dsl[0]));
+
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ return true;
- return zds;
+ VkDescriptorUpdateTemplateCreateInfo template[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES] = {0};
+ /* type of template */
+ VkDescriptorUpdateTemplateType types[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES] = {VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET};
+ if (have_push)
+ types[0] = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR;
+
+ /* number of descriptors in template */
+ unsigned wd_count[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES];
+ if (push_count)
+ wd_count[0] = pg->is_compute ? 1 : (ZINK_GFX_SHADER_COUNT + !!ctx->dd.has_fbfetch);
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
+ wd_count[i + 1] = pg->dd.pool_key[i] ? pg->dd.pool_key[i]->layout->num_bindings : 0;
+
+ VkDescriptorUpdateTemplateEntry *push_entries[2] = {
+ ctx->dd.push_entries,
+ &ctx->dd.compute_push_entry,
+ };
+ for (unsigned i = 0; i < pg->num_dsl; i++) {
+ bool is_push = i == 0;
+ /* no need for empty templates */
+ if (pg->dsl[i] == ctx->dd.dummy_dsl->layout ||
+ pg->dsl[i] == screen->bindless_layout ||
+ (!is_push && pg->dd.templates[i]))
+ continue;
+ template[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
+ assert(wd_count[i]);
+ template[i].descriptorUpdateEntryCount = wd_count[i];
+ if (is_push)
+ template[i].pDescriptorUpdateEntries = push_entries[pg->is_compute];
+ else
+ template[i].pDescriptorUpdateEntries = entries[i - 1];
+ template[i].templateType = types[i];
+ template[i].descriptorSetLayout = pg->dsl[i];
+ template[i].pipelineBindPoint = pg->is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
+ template[i].pipelineLayout = pg->layout;
+ template[i].set = i;
+ VkDescriptorUpdateTemplate t;
+ if (VKSCR(CreateDescriptorUpdateTemplate)(screen->dev, &template[i], NULL, &t) != VK_SUCCESS)
+ return false;
+ pg->dd.templates[i] = t;
+ }
+ return true;
}
void
-zink_descriptor_set_recycle(struct zink_descriptor_set *zds)
+zink_descriptor_shader_get_binding_offsets(const struct zink_shader *shader, unsigned *offsets)
{
- struct zink_descriptor_pool *pool = zds->pool;
- /* if desc set is still in use by a batch, don't recache */
- uint32_t refcount = p_atomic_read(&zds->reference.count);
- if (refcount != 1)
- return;
- /* this is a null set */
- if (!pool->key.layout->num_descriptors)
- return;
- simple_mtx_lock(&pool->mtx);
- if (zds->punted)
- zds->invalid = true;
- else {
- /* if we've previously punted this set, then it won't have a hash or be in either of the tables */
- struct hash_entry *he = _mesa_hash_table_search_pre_hashed(pool->desc_sets, zds->hash, &zds->key);
- if (!he) {
- /* desc sets can be used multiple times in the same batch */
- simple_mtx_unlock(&pool->mtx);
- return;
+ offsets[ZINK_DESCRIPTOR_TYPE_UBO] = 0;
+ offsets[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] = (shader->num_bindings[ZINK_DESCRIPTOR_TYPE_UBO] ?
+ shader->bindings[ZINK_DESCRIPTOR_TYPE_UBO][shader->num_bindings[ZINK_DESCRIPTOR_TYPE_UBO] - 1].binding + 1 :
+ 1);
+ offsets[ZINK_DESCRIPTOR_TYPE_SSBO] = offsets[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] + (shader->num_bindings[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] ?
+ shader->bindings[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][shader->num_bindings[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] - 1].binding + 1 :
+ 1);
+ offsets[ZINK_DESCRIPTOR_TYPE_IMAGE] = offsets[ZINK_DESCRIPTOR_TYPE_SSBO] + (shader->num_bindings[ZINK_DESCRIPTOR_TYPE_SSBO] ?
+ shader->bindings[ZINK_DESCRIPTOR_TYPE_SSBO][shader->num_bindings[ZINK_DESCRIPTOR_TYPE_SSBO] - 1].binding + 1 :
+ 1);
+}
+
+void
+zink_descriptor_shader_init(struct zink_screen *screen, struct zink_shader *shader)
+{
+ VkDescriptorSetLayoutBinding bindings[ZINK_DESCRIPTOR_BASE_TYPES * ZINK_MAX_DESCRIPTORS_PER_TYPE];
+ unsigned num_bindings = 0;
+ VkShaderStageFlagBits stage_flags = mesa_to_vk_shader_stage(clamp_stage(&shader->info));
+
+ unsigned desc_set_size = shader->has_uniforms;
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
+ desc_set_size += shader->num_bindings[i];
+ if (desc_set_size)
+ shader->precompile.db_template = rzalloc_array(shader, struct zink_descriptor_template, desc_set_size);
+
+ if (shader->has_uniforms) {
+ VkDescriptorSetLayoutBinding *binding = &bindings[num_bindings];
+ binding->binding = 0;
+ binding->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+ binding->descriptorCount = 1;
+ binding->stageFlags = stage_flags;
+ binding->pImmutableSamplers = NULL;
+ struct zink_descriptor_template *entry = &shader->precompile.db_template[num_bindings];
+ entry->count = 1;
+ entry->offset = offsetof(struct zink_context, di.db.ubos[clamp_stage(&shader->info)][0]);
+ entry->stride = sizeof(VkDescriptorAddressInfoEXT);
+ entry->db_size = screen->info.db_props.robustUniformBufferDescriptorSize;
+ num_bindings++;
+ }
+ /* sync with zink_shader_compile_separate() */
+ unsigned offsets[4];
+ zink_descriptor_shader_get_binding_offsets(shader, offsets);
+ for (int j = 0; j < ZINK_DESCRIPTOR_BASE_TYPES; j++) {
+ for (int k = 0; k < shader->num_bindings[j]; k++) {
+ VkDescriptorSetLayoutBinding *binding = &bindings[num_bindings];
+ if (j == ZINK_DESCRIPTOR_TYPE_UBO)
+ binding->binding = 1;
+ else
+ binding->binding = shader->bindings[j][k].binding + offsets[j];
+ binding->descriptorType = shader->bindings[j][k].type;
+ binding->descriptorCount = shader->bindings[j][k].size;
+ binding->stageFlags = stage_flags;
+ binding->pImmutableSamplers = NULL;
+
+ unsigned temp = 0;
+ init_db_template_entry(screen, shader, j, k, &shader->precompile.db_template[num_bindings], &temp);
+ num_bindings++;
}
- _mesa_hash_table_remove(pool->desc_sets, he);
}
-
- if (zds->invalid) {
- descriptor_set_invalidate(zds);
- util_dynarray_append(&pool->alloc_desc_sets, struct zink_descriptor_set *, zds);
- } else {
- zds->recycled = true;
- _mesa_hash_table_insert_pre_hashed(pool->free_desc_sets, zds->hash, &zds->key, zds);
+ if (num_bindings) {
+ shader->precompile.dsl = descriptor_layout_create(screen, 0, bindings, num_bindings);
+ shader->precompile.bindings = mem_dup(bindings, num_bindings * sizeof(VkDescriptorSetLayoutBinding));
+ shader->precompile.num_bindings = num_bindings;
+ VkDeviceSize val;
+ VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, shader->precompile.dsl, &val);
+ shader->precompile.db_size = val;
+ shader->precompile.db_offset = rzalloc_array(shader, uint32_t, num_bindings);
+ for (unsigned i = 0; i < num_bindings; i++) {
+ VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, shader->precompile.dsl, bindings[i].binding, &val);
+ shader->precompile.db_offset[i] = val;
+ }
+ }
+ if (screen->info.have_EXT_shader_object)
+ return;
+ VkDescriptorSetLayout dsl[ZINK_DESCRIPTOR_ALL_TYPES] = {0};
+ unsigned num_dsl = num_bindings ? 2 : 0;
+ if (shader->bindless)
+ num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES;
+ if (num_bindings || shader->bindless) {
+ dsl[shader->info.stage == MESA_SHADER_FRAGMENT] = shader->precompile.dsl;
+ if (shader->bindless)
+ dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout;
}
- simple_mtx_unlock(&pool->mtx);
+ shader->precompile.layout = zink_pipeline_layout_create(screen, dsl, num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
}
-
-static void
-desc_set_ref_add(struct zink_descriptor_set *zds, struct zink_descriptor_refs *refs, void **ref_ptr, void *ptr)
+void
+zink_descriptor_shader_deinit(struct zink_screen *screen, struct zink_shader *shader)
{
- struct zink_descriptor_reference ref = {ref_ptr, &zds->invalid};
- *ref_ptr = ptr;
- if (ptr)
- util_dynarray_append(&refs->refs, struct zink_descriptor_reference, ref);
+ if (shader->precompile.dsl)
+ VKSCR(DestroyDescriptorSetLayout)(screen->dev, shader->precompile.dsl, NULL);
+ if (shader->precompile.layout)
+ VKSCR(DestroyPipelineLayout)(screen->dev, shader->precompile.layout, NULL);
}
-static void
-zink_descriptor_surface_desc_set_add(struct zink_descriptor_surface *dsurf, struct zink_descriptor_set *zds, unsigned idx)
+/* called during program destroy */
+void
+zink_descriptor_program_deinit(struct zink_screen *screen, struct zink_program *pg)
{
- assert(idx < zds->num_resources);
- zds->surfaces[idx].is_buffer = dsurf->is_buffer;
- if (dsurf->is_buffer)
- desc_set_ref_add(zds, &dsurf->bufferview->desc_set_refs, (void**)&zds->surfaces[idx].bufferview, dsurf->bufferview);
- else
- desc_set_ref_add(zds, &dsurf->surface->desc_set_refs, (void**)&zds->surfaces[idx].surface, dsurf->surface);
+ for (unsigned i = 0; pg->num_dsl && i < ZINK_DESCRIPTOR_BASE_TYPES; i++) {
+ if (pg->dd.pool_key[i]) {
+ pg->dd.pool_key[i]->use_count--;
+ pg->dd.pool_key[i] = NULL;
+ }
+ }
+ for (unsigned i = 0; pg->num_dsl && i < ZINK_DESCRIPTOR_NON_BINDLESS_TYPES; i++) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY && pg->dd.templates[i]) {
+ VKSCR(DestroyDescriptorUpdateTemplate)(screen->dev, pg->dd.templates[i], NULL);
+ pg->dd.templates[i] = VK_NULL_HANDLE;
+ }
+ }
}
static void
-zink_image_view_desc_set_add(struct zink_image_view *image_view, struct zink_descriptor_set *zds, unsigned idx, bool is_buffer)
+pool_destroy(struct zink_screen *screen, struct zink_descriptor_pool *pool)
{
- assert(idx < zds->num_resources);
- if (is_buffer)
- desc_set_ref_add(zds, &image_view->buffer_view->desc_set_refs, (void**)&zds->surfaces[idx].bufferview, image_view->buffer_view);
- else
- desc_set_ref_add(zds, &image_view->surface->desc_set_refs, (void**)&zds->surfaces[idx].surface, image_view->surface);
+ VKSCR(DestroyDescriptorPool)(screen->dev, pool->pool, NULL);
+ FREE(pool);
}
static void
-zink_sampler_state_desc_set_add(struct zink_sampler_state *sampler_state, struct zink_descriptor_set *zds, unsigned idx)
+multi_pool_destroy(struct zink_screen *screen, struct zink_descriptor_pool_multi *mpool)
{
- assert(idx < zds->num_resources);
- if (sampler_state)
- desc_set_ref_add(zds, &sampler_state->desc_set_refs, (void**)&zds->sampler_states[idx], sampler_state);
- else
- zds->sampler_states[idx] = NULL;
+ if (mpool->pool)
+ pool_destroy(screen, mpool->pool);
+ FREE(mpool);
}
-static void
-zink_resource_desc_set_add(struct zink_resource *res, struct zink_descriptor_set *zds, unsigned idx)
+static bool
+clear_multi_pool_overflow(struct zink_screen *screen, struct util_dynarray *overflowed_pools)
{
- assert(idx < zds->num_resources);
- desc_set_ref_add(zds, res ? &res->obj->desc_set_refs : NULL, (void**)&zds->res_objs[idx], res ? res->obj : NULL);
+ bool found = false;
+ while (util_dynarray_num_elements(overflowed_pools, struct zink_descriptor_pool*)) {
+ struct zink_descriptor_pool *pool = util_dynarray_pop(overflowed_pools, struct zink_descriptor_pool*);
+ pool_destroy(screen, pool);
+ found = true;
+ }
+ return found;
}
-void
-zink_descriptor_set_refs_clear(struct zink_descriptor_refs *refs, void *ptr)
+static VkDescriptorPool
+create_pool(struct zink_screen *screen, unsigned num_type_sizes, const VkDescriptorPoolSize *sizes, unsigned flags)
{
- util_dynarray_foreach(&refs->refs, struct zink_descriptor_reference, ref) {
- if (*ref->ref == ptr) {
- *ref->invalid = true;
- *ref->ref = NULL;
+ VkDescriptorPool pool;
+ VkDescriptorPoolCreateInfo dpci = {0};
+ dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+ dpci.pPoolSizes = sizes;
+ dpci.poolSizeCount = num_type_sizes;
+ dpci.flags = flags;
+ dpci.maxSets = MAX_LAZY_DESCRIPTORS;
+ VkResult result;
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(CreateDescriptorPool)(screen->dev, &dpci, 0, &pool),
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateDescriptorPool failed (%s)", vk_Result_to_str(result));
+ return VK_NULL_HANDLE;
}
- }
- util_dynarray_fini(&refs->refs);
+ );
+ return pool;
}
-static inline void
-zink_descriptor_pool_reference(struct zink_screen *screen,
- struct zink_descriptor_pool **dst,
- struct zink_descriptor_pool *src)
-{
- struct zink_descriptor_pool *old_dst = dst ? *dst : NULL;
+static struct zink_descriptor_pool *
+get_descriptor_pool(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, struct zink_batch_state *bs, bool is_compute);
- if (pipe_reference_described(old_dst ? &old_dst->reference : NULL, &src->reference,
- (debug_reference_descriptor)debug_describe_zink_descriptor_pool))
- descriptor_pool_free(screen, old_dst);
- if (dst) *dst = src;
+/* set a multi-pool to its zink_descriptor_pool_key::id-indexed array element on a given batch state */
+static bool
+set_pool(struct zink_batch_state *bs, struct zink_program *pg, struct zink_descriptor_pool_multi *mpool, enum zink_descriptor_type type)
+{
+ /* push descriptors should never reach this */
+ assert(type != ZINK_DESCRIPTOR_TYPE_UNIFORMS);
+ assert(mpool);
+ const struct zink_descriptor_pool_key *pool_key = pg->dd.pool_key[type];
+ size_t size = bs->dd.pools[type].capacity;
+ /* ensure the pool array is big enough to have an element for this key */
+ if (!util_dynarray_resize(&bs->dd.pools[type], struct zink_descriptor_pool_multi*, pool_key->id + 1))
+ return false;
+ if (size != bs->dd.pools[type].capacity) {
+ /* when resizing, always zero the new data to avoid garbage */
+ uint8_t *data = bs->dd.pools[type].data;
+ memset(data + size, 0, bs->dd.pools[type].capacity - size);
+ }
+ /* dynarray can't track sparse array sizing, so the array size must be manually tracked */
+ bs->dd.pool_size[type] = MAX2(bs->dd.pool_size[type], pool_key->id + 1);
+ struct zink_descriptor_pool_multi **mppool = util_dynarray_element(&bs->dd.pools[type], struct zink_descriptor_pool_multi*, pool_key->id);
+ *mppool = mpool;
+ return true;
}
-static void
-create_descriptor_ref_template(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type)
+static struct zink_descriptor_pool *
+alloc_new_pool(struct zink_screen *screen, struct zink_descriptor_pool_multi *mpool)
{
- struct zink_shader **stages;
- if (pg->is_compute)
- stages = &((struct zink_compute_program*)pg)->shader;
- else
- stages = ((struct zink_gfx_program*)pg)->shaders;
- unsigned num_shaders = pg->is_compute ? 1 : ZINK_SHADER_COUNT;
-
- for (int i = 0; i < num_shaders; i++) {
- struct zink_shader *shader = stages[i];
- if (!shader)
- continue;
-
- for (int j = 0; j < shader->num_bindings[type]; j++) {
- int index = shader->bindings[type][j].index;
- if (type == ZINK_DESCRIPTOR_TYPE_UBO && !index)
- continue;
- pdd_cached(pg)->num_refs[type] += shader->bindings[type][j].size;
- }
+ struct zink_descriptor_pool *pool = CALLOC_STRUCT(zink_descriptor_pool);
+ if (!pool)
+ return NULL;
+ const unsigned num_type_sizes = mpool->pool_key->sizes[1].descriptorCount ? 2 : 1;
+ pool->pool = create_pool(screen, num_type_sizes, mpool->pool_key->sizes, 0);
+ if (!pool->pool) {
+ FREE(pool);
+ return NULL;
}
+ return pool;
+}
- pdd_cached(pg)->refs[type] = ralloc_array(pg->dd, union zink_program_descriptor_refs, pdd_cached(pg)->num_refs[type]);
- if (!pdd_cached(pg)->refs[type])
- return;
-
- unsigned ref_idx = 0;
- for (int i = 0; i < num_shaders; i++) {
- struct zink_shader *shader = stages[i];
- if (!shader)
- continue;
-
- enum pipe_shader_type stage = pipe_shader_type_from_mesa(shader->nir->info.stage);
- for (int j = 0; j < shader->num_bindings[type]; j++) {
- int index = shader->bindings[type][j].index;
- for (unsigned k = 0; k < shader->bindings[type][j].size; k++) {
- switch (type) {
- case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
- pdd_cached(pg)->refs[type][ref_idx].sampler.sampler_state = (struct zink_sampler_state**)&ctx->sampler_states[stage][index + k];
- pdd_cached(pg)->refs[type][ref_idx].sampler.dsurf = &ctx->di.sampler_surfaces[stage][index + k];
- break;
- case ZINK_DESCRIPTOR_TYPE_IMAGE:
- pdd_cached(pg)->refs[type][ref_idx].dsurf = &ctx->di.image_surfaces[stage][index + k];
- break;
- case ZINK_DESCRIPTOR_TYPE_UBO:
- if (!index)
- continue;
- FALLTHROUGH;
- default:
- pdd_cached(pg)->refs[type][ref_idx].res = &ctx->di.descriptor_res[type][stage][index + k];
- break;
- }
- assert(ref_idx < pdd_cached(pg)->num_refs[type]);
- ref_idx++;
+/* strictly for finding a usable pool in oom scenarios */
+static void
+find_pool(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_descriptor_pool_multi *mpool, bool both)
+{
+ bool found = false;
+ /* worst case: iterate all the pools for the batch until something can be recycled */
+ for (unsigned type = 0; type < ZINK_DESCRIPTOR_BASE_TYPES; type++) {
+ for (unsigned i = 0; i < bs->dd.pool_size[type]; i++) {
+ struct zink_descriptor_pool_multi **mppool = util_dynarray_element(&bs->dd.pools[type], struct zink_descriptor_pool_multi *, i);
+ if (mppool && *mppool && *mppool != mpool) {
+ unsigned idx[] = {!(*mppool)->overflow_idx, (*mppool)->overflow_idx};
+ for (unsigned j = 0; j < 1 + !!both; j++)
+ found |= clear_multi_pool_overflow(screen, &(*mppool)->overflowed_pools[idx[j]]);
}
}
}
+ if (found)
+ mpool->pool = alloc_new_pool(screen, mpool);
}
-bool
-zink_descriptor_program_init(struct zink_context *ctx, struct zink_program *pg)
+static struct zink_descriptor_pool *
+check_pool_alloc(struct zink_context *ctx, struct zink_descriptor_pool_multi *mpool, struct zink_program *pg,
+ enum zink_descriptor_type type, struct zink_batch_state *bs, bool is_compute)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
-
- pg->dd = (void*)rzalloc(pg, struct zink_program_descriptor_data_cached);
- if (!pg->dd)
- return false;
-
- if (!zink_descriptor_program_init_lazy(ctx, pg))
- return false;
-
- /* no descriptors */
- if (!pg->dd)
- return true;
-
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) {
- if (!pg->dd->layout_key[i])
- continue;
-
- unsigned idx = zink_descriptor_type_to_size_idx(i);
- VkDescriptorPoolSize *size = &pg->dd->sizes[idx];
- /* this is a sampler/image set with no images only texels */
- if (!size->descriptorCount)
- size++;
- unsigned num_sizes = zink_descriptor_program_num_sizes(pg, i);
- struct zink_descriptor_pool *pool = descriptor_pool_get(ctx, i, pg->dd->layout_key[i], size, num_sizes);
- if (!pool)
- return false;
- zink_descriptor_pool_reference(screen, &pdd_cached(pg)->pool[i], pool);
-
- if (screen->info.have_KHR_descriptor_update_template &&
- screen->descriptor_mode != ZINK_DESCRIPTOR_MODE_NOTEMPLATES)
- create_descriptor_ref_template(ctx, pg, i);
+ assert(mpool->pool_key == pg->dd.pool_key[type]);
+ /* a current pool may not exist */
+ if (!mpool->pool) {
+ /* first, try to recycle a pool from the idle overflowed sets */
+ if (util_dynarray_contains(&mpool->overflowed_pools[!mpool->overflow_idx], struct zink_descriptor_pool*))
+ mpool->pool = util_dynarray_pop(&mpool->overflowed_pools[!mpool->overflow_idx], struct zink_descriptor_pool*);
+ else
+ /* if none exist, try to create a new one */
+ mpool->pool = alloc_new_pool(screen, mpool);
+ /* OOM: force pool recycling from overflows */
+ if (!mpool->pool) {
+ find_pool(screen, bs, mpool, false);
+ if (!mpool->pool) {
+ /* bad case: iterate unused batches and recycle */
+ for (struct zink_batch_state *state = ctx->free_batch_states; state; state = state->next)
+ find_pool(screen, state, mpool, true);
+ if (!mpool->pool) {
+ /* worst case: iterate in-use batches and recycle (very safe) */
+ for (struct zink_batch_state *state = ctx->batch_states; state; state = state->next)
+ find_pool(screen, state, mpool, false);
+ }
+ }
+ }
+ if (!mpool->pool)
+ unreachable("out of descriptor memory!");
}
-
- return true;
+ struct zink_descriptor_pool *pool = mpool->pool;
+ /* allocate up to $current * 10, e.g., 10 -> 100;
+ * never allocate more than 100 at a time to minimize unused descriptor sets
+ */
+ if (pool->set_idx == pool->sets_alloc) {
+ unsigned sets_to_alloc = MIN2(MIN2(MAX2(pool->sets_alloc * 10, 10), MAX_LAZY_DESCRIPTORS) - pool->sets_alloc, 100);
+ if (!sets_to_alloc) {
+ /* overflowed pool: store for reuse */
+ pool->set_idx = 0;
+ util_dynarray_append(&mpool->overflowed_pools[mpool->overflow_idx], struct zink_descriptor_pool*, pool);
+ mpool->pool = NULL;
+ /* call recursively to get recycle/oom handling */
+ return get_descriptor_pool(ctx, pg, type, bs, is_compute);
+ }
+ if (!zink_descriptor_util_alloc_sets(screen, pg->dsl[type + 1],
+ pool->pool, &pool->sets[pool->sets_alloc], sets_to_alloc))
+ return NULL;
+ pool->sets_alloc += sets_to_alloc;
+ }
+ return pool;
}
-void
-zink_descriptor_program_deinit(struct zink_screen *screen, struct zink_program *pg)
+static struct zink_descriptor_pool *
+create_push_pool(struct zink_screen *screen, struct zink_batch_state *bs, bool is_compute, bool has_fbfetch)
{
- if (!pg->dd)
- return;
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++)
- zink_descriptor_pool_reference(screen, &pdd_cached(pg)->pool[i], NULL);
-
- zink_descriptor_program_deinit_lazy(screen, pg);
+ struct zink_descriptor_pool *pool = CALLOC_STRUCT(zink_descriptor_pool);
+ VkDescriptorPoolSize sizes[2];
+ sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+ if (is_compute)
+ sizes[0].descriptorCount = MAX_LAZY_DESCRIPTORS;
+ else {
+ sizes[0].descriptorCount = ZINK_GFX_SHADER_COUNT * MAX_LAZY_DESCRIPTORS;
+ sizes[1].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
+ sizes[1].descriptorCount = MAX_LAZY_DESCRIPTORS;
+ }
+ pool->pool = create_pool(screen, !is_compute && has_fbfetch ? 2 : 1, sizes, 0);
+ return pool;
}
-static void
-zink_descriptor_pool_deinit(struct zink_context *ctx)
+static struct zink_descriptor_pool *
+check_push_pool_alloc(struct zink_context *ctx, struct zink_descriptor_pool_multi *mpool, struct zink_batch_state *bs, bool is_compute)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) {
- hash_table_foreach(ctx->dd->descriptor_pools[i], entry) {
- struct zink_descriptor_pool *pool = (void*)entry->data;
- zink_descriptor_pool_reference(screen, &pool, NULL);
+ struct zink_descriptor_pool *pool = mpool->pool;
+ /* allocate up to $current * 10, e.g., 10 -> 100 or 100 -> 1000 */
+ if (pool->set_idx == pool->sets_alloc || unlikely(ctx->dd.has_fbfetch != bs->dd.has_fbfetch)) {
+ unsigned sets_to_alloc = MIN2(MIN2(MAX2(pool->sets_alloc * 10, 10), MAX_LAZY_DESCRIPTORS) - pool->sets_alloc, 100);
+ if (!sets_to_alloc || unlikely(ctx->dd.has_fbfetch != bs->dd.has_fbfetch)) {
+ /* overflowed pool: store for reuse */
+ pool->set_idx = 0;
+ util_dynarray_append(&mpool->overflowed_pools[mpool->overflow_idx], struct zink_descriptor_pool*, pool);
+ if (util_dynarray_contains(&mpool->overflowed_pools[!mpool->overflow_idx], struct zink_descriptor_pool*))
+ bs->dd.push_pool[is_compute].pool = util_dynarray_pop(&mpool->overflowed_pools[!mpool->overflow_idx], struct zink_descriptor_pool*);
+ else
+ bs->dd.push_pool[is_compute].pool = create_push_pool(screen, bs, is_compute, ctx->dd.has_fbfetch);
+ if (unlikely(ctx->dd.has_fbfetch != bs->dd.has_fbfetch))
+ mpool->reinit_overflow = true;
+ bs->dd.has_fbfetch = ctx->dd.has_fbfetch;
+ return check_push_pool_alloc(ctx, &bs->dd.push_pool[is_compute], bs, is_compute);
}
- _mesa_hash_table_destroy(ctx->dd->descriptor_pools[i], NULL);
+ if (!zink_descriptor_util_alloc_sets(screen, ctx->dd.push_dsl[is_compute]->layout,
+ pool->pool, &pool->sets[pool->sets_alloc], sets_to_alloc)) {
+ mesa_loge("ZINK: failed to allocate push set!");
+ return NULL;
+ }
+ pool->sets_alloc += sets_to_alloc;
}
+ return pool;
}
-static bool
-zink_descriptor_pool_init(struct zink_context *ctx)
+static struct zink_descriptor_pool *
+get_descriptor_pool(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, struct zink_batch_state *bs, bool is_compute)
{
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) {
- ctx->dd->descriptor_pools[i] = _mesa_hash_table_create(ctx, hash_descriptor_pool, equals_descriptor_pool);
- if (!ctx->dd->descriptor_pools[i])
- return false;
- }
struct zink_screen *screen = zink_screen(ctx->base.screen);
- VkDescriptorPoolSize sizes[2];
- sizes[0].type = screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
- sizes[0].descriptorCount = ZINK_SHADER_COUNT * ZINK_DEFAULT_MAX_DESCS;
- sizes[1].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
- sizes[1].descriptorCount = ZINK_DEFAULT_MAX_DESCS;
- ctx->dd->push_pool[0] = descriptor_pool_get(ctx, 0, ctx->dd->push_layout_keys[0], sizes, ctx->dd->has_fbfetch ? 2 : 1);
- sizes[0].descriptorCount = ZINK_DEFAULT_MAX_DESCS;
- ctx->dd->push_pool[1] = descriptor_pool_get(ctx, 0, ctx->dd->push_layout_keys[1], sizes, 1);
- return ctx->dd->push_pool[0] && ctx->dd->push_pool[1];
+ const struct zink_descriptor_pool_key *pool_key = pg->dd.pool_key[type];
+ struct zink_descriptor_pool_multi **mppool = bs->dd.pool_size[type] > pool_key->id ?
+ util_dynarray_element(&bs->dd.pools[type], struct zink_descriptor_pool_multi *, pool_key->id) :
+ NULL;
+ if (mppool && *mppool)
+ return check_pool_alloc(ctx, *mppool, pg, type, bs, is_compute);
+ struct zink_descriptor_pool_multi *mpool = CALLOC_STRUCT(zink_descriptor_pool_multi);
+ if (!mpool)
+ return NULL;
+ util_dynarray_init(&mpool->overflowed_pools[0], NULL);
+ util_dynarray_init(&mpool->overflowed_pools[1], NULL);
+ mpool->pool_key = pool_key;
+ if (!set_pool(bs, pg, mpool, type)) {
+ multi_pool_destroy(screen, mpool);
+ return NULL;
+ }
+ assert(pool_key->id < bs->dd.pool_size[type]);
+ return check_pool_alloc(ctx, mpool, pg, type, bs, is_compute);
}
-
-static void
-desc_set_res_add(struct zink_descriptor_set *zds, struct zink_resource *res, unsigned int i, bool cache_hit)
+ALWAYS_INLINE static VkDescriptorSet
+get_descriptor_set(struct zink_descriptor_pool *pool)
{
- /* if we got a cache hit, we have to verify that the cached set is still valid;
- * we store the vk resource to the set here to avoid a more complex and costly mechanism of maintaining a
- * hash table on every resource with the associated descriptor sets that then needs to be iterated through
- * whenever a resource is destroyed
- */
- assert(!cache_hit || zds->res_objs[i] == (res ? res->obj : NULL));
- if (!cache_hit)
- zink_resource_desc_set_add(res, zds, i);
+ if (!pool)
+ return VK_NULL_HANDLE;
+
+ assert(pool->set_idx < pool->sets_alloc);
+ return pool->sets[pool->set_idx++];
}
-static void
-desc_set_sampler_add(struct zink_context *ctx, struct zink_descriptor_set *zds, struct zink_descriptor_surface *dsurf,
- struct zink_sampler_state *state, unsigned int i, bool cache_hit)
-{
- /* if we got a cache hit, we have to verify that the cached set is still valid;
- * we store the vk resource to the set here to avoid a more complex and costly mechanism of maintaining a
- * hash table on every resource with the associated descriptor sets that then needs to be iterated through
- * whenever a resource is destroyed
- */
-#ifndef NDEBUG
- uint32_t cur_hash = get_descriptor_surface_hash(ctx, &zds->surfaces[i]);
- uint32_t new_hash = get_descriptor_surface_hash(ctx, dsurf);
-#endif
- assert(!cache_hit || cur_hash == new_hash);
- assert(!cache_hit || zds->sampler_states[i] == state);
- if (!cache_hit) {
- zink_descriptor_surface_desc_set_add(dsurf, zds, i);
- zink_sampler_state_desc_set_add(state, zds, i);
+static bool
+populate_sets(struct zink_context *ctx, struct zink_batch_state *bs,
+ struct zink_program *pg, uint8_t changed_sets, VkDescriptorSet *sets)
+{
+ u_foreach_bit(type, changed_sets) {
+ if (pg->dd.pool_key[type]) {
+ struct zink_descriptor_pool *pool = get_descriptor_pool(ctx, pg, type, bs, pg->is_compute);
+ sets[type] = get_descriptor_set(pool);
+ if (!sets[type])
+ return false;
+ } else
+ sets[type] = VK_NULL_HANDLE;
}
+ return true;
}
static void
-desc_set_image_add(struct zink_context *ctx, struct zink_descriptor_set *zds, struct zink_image_view *image_view,
- unsigned int i, bool is_buffer, bool cache_hit)
+reinit_db(struct zink_screen *screen, struct zink_batch_state *bs)
{
- /* if we got a cache hit, we have to verify that the cached set is still valid;
- * we store the vk resource to the set here to avoid a more complex and costly mechanism of maintaining a
- * hash table on every resource with the associated descriptor sets that then needs to be iterated through
- * whenever a resource is destroyed
- */
-#ifndef NDEBUG
- uint32_t cur_hash = get_descriptor_surface_hash(ctx, &zds->surfaces[i]);
- uint32_t new_hash = zink_get_image_view_hash(ctx, image_view, is_buffer);
-#endif
- assert(!cache_hit || cur_hash == new_hash);
- if (!cache_hit)
- zink_image_view_desc_set_add(image_view, zds, i, is_buffer);
+ zink_batch_descriptor_deinit(screen, bs);
+ zink_batch_descriptor_init(screen, bs);
}
static void
-desc_set_descriptor_surface_add(struct zink_context *ctx, struct zink_descriptor_set *zds, struct zink_descriptor_surface *dsurf,
- unsigned int i, bool cache_hit)
+enlarge_db(struct zink_context *ctx)
{
- /* if we got a cache hit, we have to verify that the cached set is still valid;
- * we store the vk resource to the set here to avoid a more complex and costly mechanism of maintaining a
- * hash table on every resource with the associated descriptor sets that then needs to be iterated through
- * whenever a resource is destroyed
- */
-#ifndef NDEBUG
- uint32_t cur_hash = get_descriptor_surface_hash(ctx, &zds->surfaces[i]);
- uint32_t new_hash = get_descriptor_surface_hash(ctx, dsurf);
-#endif
- assert(!cache_hit || cur_hash == new_hash);
- if (!cache_hit)
- zink_descriptor_surface_desc_set_add(dsurf, zds, i);
-}
-
-static unsigned
-init_write_descriptor(struct zink_shader *shader, struct zink_descriptor_set *zds, enum zink_descriptor_type type, int idx, VkWriteDescriptorSet *wd, unsigned num_wds)
-{
- wd->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
- wd->pNext = NULL;
- wd->dstBinding = shader ? shader->bindings[type][idx].binding : idx;
- wd->dstArrayElement = 0;
- wd->descriptorCount = shader ? shader->bindings[type][idx].size : 1;
- wd->descriptorType = shader ? shader->bindings[type][idx].type :
- idx == ZINK_FBFETCH_BINDING ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
- wd->dstSet = zds->desc_set;
- return num_wds + 1;
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_batch_state *bs = ctx->batch.state;
+ /* ensure current db surives */
+ zink_batch_reference_resource(&ctx->batch, bs->dd.db);
+ /* rebinding a db mid-batch is extremely costly: if we start with a factor
+ * 16 and then half the factor with each new allocation. It shouldn't need to
+ * do this more than twice. */
+ ctx->dd.db.max_db_size *= ctx->dd.db.size_enlarge_scale;
+ ctx->dd.db.size_enlarge_scale = MAX2(ctx->dd.db.size_enlarge_scale >> 1, 4);
+ reinit_db(screen, bs);
}
-static unsigned
-update_push_ubo_descriptors(struct zink_context *ctx, struct zink_descriptor_set *zds,
- bool is_compute, bool cache_hit, uint32_t *dynamic_offsets)
+static void
+update_separable(struct zink_context *ctx, struct zink_program *pg)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
- VkWriteDescriptorSet wds[ZINK_SHADER_COUNT + 1];
- VkDescriptorBufferInfo buffer_infos[ZINK_SHADER_COUNT];
- struct zink_shader **stages;
- bool fbfetch = false;
+ struct zink_batch_state *bs = ctx->batch.state;
+
+ unsigned use_buffer = 0;
+ VkDescriptorGetInfoEXT info;
+ info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT;
+ info.pNext = NULL;
+ struct zink_gfx_program *prog = (struct zink_gfx_program *)pg;
+ size_t db_size = 0;
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
+ if (prog->shaders[i])
+ db_size += prog->shaders[i]->precompile.db_size;
+ }
- unsigned num_stages = is_compute ? 1 : ZINK_SHADER_COUNT;
- struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base;
- if (is_compute)
- stages = &ctx->curr_compute->shader;
- else
- stages = &ctx->gfx_stages[0];
+ if (bs->dd.db_offset + db_size >= bs->dd.db->base.b.width0)
+ enlarge_db(ctx);
- for (int i = 0; i < num_stages; i++) {
- struct zink_shader *shader = stages[i];
- enum pipe_shader_type pstage = shader ? pipe_shader_type_from_mesa(shader->nir->info.stage) : i;
- VkDescriptorBufferInfo *info = &ctx->di.ubos[pstage][0];
- unsigned dynamic_idx = is_compute ? 0 : tgsi_processor_to_shader_stage(pstage);
-
- /* Values are taken from pDynamicOffsets in an order such that all entries for set N come before set N+1;
- * within a set, entries are ordered by the binding numbers in the descriptor set layouts
- * - vkCmdBindDescriptorSets spec
- *
- * because of this, we have to populate the dynamic offsets by their shader stage to ensure they
- * match what the driver expects
- */
- const bool used = (pg->dd->push_usage & BITFIELD_BIT(pstage)) == BITFIELD_BIT(pstage);
- dynamic_offsets[dynamic_idx] = used ? info->offset : 0;
- if (!cache_hit) {
- init_write_descriptor(NULL, zds, ZINK_DESCRIPTOR_TYPE_UBO, tgsi_processor_to_shader_stage(pstage), &wds[i], 0);
- if (used) {
- desc_set_res_add(zds, ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_UBO][pstage][0], i, cache_hit);
- buffer_infos[i].buffer = info->buffer;
- buffer_infos[i].range = info->range;
+ if (!bs->dd.db_bound)
+ zink_batch_bind_db(ctx);
+
+ for (unsigned j = 0; j < ZINK_GFX_SHADER_COUNT; j++) {
+ struct zink_shader *zs = prog->shaders[j];
+ if (!zs || !zs->precompile.dsl)
+ continue;
+ uint64_t offset = bs->dd.db_offset;
+ assert(bs->dd.db->base.b.width0 > bs->dd.db_offset + zs->precompile.db_size);
+ for (unsigned i = 0; i < zs->precompile.num_bindings; i++) {
+ info.type = zs->precompile.bindings[i].descriptorType;
+ uint64_t desc_offset = offset + zs->precompile.db_offset[i];
+ if (screen->info.db_props.combinedImageSamplerDescriptorSingleArray ||
+ zs->precompile.bindings[i].descriptorType != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
+ zs->precompile.bindings[i].descriptorCount == 1) {
+ for (unsigned k = 0; k < zs->precompile.bindings[i].descriptorCount; k++) {
+ /* VkDescriptorDataEXT is a union of pointers; the member doesn't matter */
+ info.data.pSampler = (void*)(((uint8_t*)ctx) + zs->precompile.db_template[i].offset + k * zs->precompile.db_template[i].stride);
+ VKSCR(GetDescriptorEXT)(screen->dev, &info, zs->precompile.db_template[i].db_size, bs->dd.db_map + desc_offset + k * zs->precompile.db_template[i].db_size);
+ }
} else {
- desc_set_res_add(zds, NULL, i, cache_hit);
- if (unlikely(!screen->info.rb2_feats.nullDescriptor))
- buffer_infos[i].buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
- else
- buffer_infos[i].buffer = VK_NULL_HANDLE;
- buffer_infos[i].range = VK_WHOLE_SIZE;
+ assert(zs->precompile.bindings[i].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
+ char buf[1024];
+ uint8_t *db = bs->dd.db_map + desc_offset;
+ uint8_t *samplers = db + zs->precompile.bindings[i].descriptorCount * screen->info.db_props.sampledImageDescriptorSize;
+ for (unsigned k = 0; k < zs->precompile.bindings[i].descriptorCount; k++) {
+ /* VkDescriptorDataEXT is a union of pointers; the member doesn't matter */
+ info.data.pSampler = (void*)(((uint8_t*)ctx) + zs->precompile.db_template[i].offset +
+ k * zs->precompile.db_template[i].stride);
+ VKSCR(GetDescriptorEXT)(screen->dev, &info, zs->precompile.db_template[i].db_size, buf);
+ /* drivers that don't support combinedImageSamplerDescriptorSingleArray must have sampler arrays written in memory as
+ *
+ * | array_of_samplers[] | array_of_sampled_images[] |
+ *
+ * which means each descriptor's data must be split
+ */
+ memcpy(db, buf, screen->info.db_props.samplerDescriptorSize);
+ memcpy(samplers, &buf[screen->info.db_props.samplerDescriptorSize], screen->info.db_props.sampledImageDescriptorSize);
+ db += screen->info.db_props.sampledImageDescriptorSize;
+ samplers += screen->info.db_props.samplerDescriptorSize;
+ }
}
- /* these are dynamic UBO descriptors, so we have to always set 0 as the descriptor offset */
- buffer_infos[i].offset = 0;
- wds[i].pBufferInfo = &buffer_infos[i];
}
+ bs->dd.cur_db_offset[use_buffer] = bs->dd.db_offset;
+ bs->dd.db_offset += zs->precompile.db_size;
+ /* TODO: maybe compile multiple variants for different set counts for compact mode? */
+ int set_idx = screen->info.have_EXT_shader_object ? j : j == MESA_SHADER_FRAGMENT;
+ VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pg->layout, set_idx, 1, &use_buffer, &offset);
}
- if (unlikely(!cache_hit && !is_compute && ctx->fbfetch_outputs)) {
- struct zink_resource *res = zink_resource(ctx->fb_state.cbufs[0]->texture);
- init_write_descriptor(NULL, zds, 0, MESA_SHADER_STAGES, &wds[ZINK_SHADER_COUNT], 0);
- desc_set_res_add(zds, res, ZINK_SHADER_COUNT, cache_hit);
- wds[ZINK_SHADER_COUNT].pImageInfo = &ctx->di.fbfetch;
- fbfetch = true;
- }
-
- if (!cache_hit)
- VKSCR(UpdateDescriptorSets)(screen->dev, num_stages + !!fbfetch, wds, 0, NULL);
- return num_stages;
}
+/* updates the mask of changed_sets and binds the mask of bind_sets */
static void
-set_descriptor_set_refs(struct zink_context *ctx, struct zink_descriptor_set *zds, struct zink_program *pg, bool cache_hit)
+zink_descriptors_update_masked_buffer(struct zink_context *ctx, bool is_compute, uint8_t changed_sets, uint8_t bind_sets)
{
- enum zink_descriptor_type type = zds->pool->type;
- for (unsigned i = 0; i < pdd_cached(pg)->num_refs[type]; i++) {
- switch (type) {
- case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
- desc_set_sampler_add(ctx, zds, pdd_cached(pg)->refs[type][i].sampler.dsurf,
- *pdd_cached(pg)->refs[type][i].sampler.sampler_state, i, cache_hit);
- break;
- case ZINK_DESCRIPTOR_TYPE_IMAGE:
- desc_set_descriptor_surface_add(ctx, zds, pdd_cached(pg)->refs[type][i].dsurf, i, cache_hit);
- break;
- default:
- desc_set_res_add(zds, *pdd_cached(pg)->refs[type][i].res, i, cache_hit);
- break;
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_batch_state *bs = ctx->batch.state;
+ struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base;
+
+ /* skip if no descriptors are updated */
+ if (!pg->dd.binding_usage || (!changed_sets && !bind_sets))
+ return;
+
+ unsigned use_buffer = 0;
+ u_foreach_bit(type, changed_sets | bind_sets) {
+ if (!pg->dd.pool_key[type])
+ continue;
+ assert(type + 1 < pg->num_dsl);
+ assert(type < ZINK_DESCRIPTOR_BASE_TYPES);
+ bool changed = (changed_sets & BITFIELD_BIT(type)) > 0;
+ uint64_t offset = changed ? bs->dd.db_offset : bs->dd.cur_db_offset[type];
+ if (pg->dd.db_template[type] && changed) {
+ const struct zink_descriptor_layout_key *key = pg->dd.pool_key[type]->layout;
+ VkDescriptorGetInfoEXT info;
+ info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT;
+ info.pNext = NULL;
+ assert(bs->dd.db->base.b.width0 > bs->dd.db_offset + pg->dd.db_size[type]);
+ for (unsigned i = 0; i < key->num_bindings; i++) {
+ info.type = key->bindings[i].descriptorType;
+ uint64_t desc_offset = offset + pg->dd.db_offset[type][i];
+ if (screen->info.db_props.combinedImageSamplerDescriptorSingleArray ||
+ key->bindings[i].descriptorType != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
+ key->bindings[i].descriptorCount == 1) {
+ for (unsigned j = 0; j < key->bindings[i].descriptorCount; j++) {
+ /* VkDescriptorDataEXT is a union of pointers; the member doesn't matter */
+ info.data.pSampler = (void*)(((uint8_t*)ctx) + pg->dd.db_template[type][i].offset + j * pg->dd.db_template[type][i].stride);
+ VKSCR(GetDescriptorEXT)(screen->dev, &info, pg->dd.db_template[type][i].db_size, bs->dd.db_map + desc_offset + j * pg->dd.db_template[type][i].db_size);
+ }
+ } else {
+ assert(key->bindings[i].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
+ char buf[1024];
+ uint8_t *db = bs->dd.db_map + desc_offset;
+ uint8_t *samplers = db + key->bindings[i].descriptorCount * screen->info.db_props.sampledImageDescriptorSize;
+ for (unsigned j = 0; j < key->bindings[i].descriptorCount; j++) {
+ /* VkDescriptorDataEXT is a union of pointers; the member doesn't matter */
+ info.data.pSampler = (void*)(((uint8_t*)ctx) + pg->dd.db_template[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][i].offset +
+ j * pg->dd.db_template[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][i].stride);
+ VKSCR(GetDescriptorEXT)(screen->dev, &info, pg->dd.db_template[type][ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW].db_size, buf);
+ /* drivers that don't support combinedImageSamplerDescriptorSingleArray must have sampler arrays written in memory as
+ *
+ * | array_of_samplers[] | array_of_sampled_images[] |
+ *
+ * which means each descriptor's data must be split
+ */
+ memcpy(db, buf, screen->info.db_props.samplerDescriptorSize);
+ memcpy(samplers, &buf[screen->info.db_props.samplerDescriptorSize], screen->info.db_props.sampledImageDescriptorSize);
+ db += screen->info.db_props.sampledImageDescriptorSize;
+ samplers += screen->info.db_props.samplerDescriptorSize;
+ }
+ }
+ }
+ bs->dd.cur_db_offset[type] = bs->dd.db_offset;
+ bs->dd.db_offset += pg->dd.db_size[type];
}
+ zink_flush_dgc_if_enabled(ctx);
+ /* templates are indexed by the set id, so increment type by 1
+ * (this is effectively an optimization of indirecting through screen->desc_set_id)
+ */
+ VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf,
+ is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pg->layout,
+ type + 1, 1,
+ &use_buffer,
+ &offset);
}
}
-static void
-update_descriptors_internal(struct zink_context *ctx, enum zink_descriptor_type type, struct zink_descriptor_set *zds, struct zink_program *pg, bool cache_hit)
+/* updates the mask of changed_sets and binds the mask of bind_sets */
+void
+zink_descriptors_update_masked(struct zink_context *ctx, bool is_compute, uint8_t changed_sets, uint8_t bind_sets)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
- struct zink_shader **stages;
-
- unsigned num_stages = pg->is_compute ? 1 : ZINK_SHADER_COUNT;
- if (pg->is_compute)
- stages = &ctx->curr_compute->shader;
- else
- stages = &ctx->gfx_stages[0];
+ struct zink_batch_state *bs = ctx->batch.state;
+ struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base;
+ VkDescriptorSet desc_sets[ZINK_DESCRIPTOR_BASE_TYPES];
- if (cache_hit || !zds)
+ /* skip if no descriptors are updated */
+ if (!pg->dd.binding_usage || (!changed_sets && !bind_sets))
return;
- if (screen->info.have_KHR_descriptor_update_template &&
- screen->descriptor_mode != ZINK_DESCRIPTOR_MODE_NOTEMPLATES) {
- set_descriptor_set_refs(ctx, zds, pg, cache_hit);
- zink_descriptor_set_update_lazy(ctx, pg, type, zds->desc_set);
+ /* populate usable sets for the changed_sets mask */
+ if (!populate_sets(ctx, bs, pg, changed_sets, desc_sets)) {
+ debug_printf("ZINK: couldn't get descriptor sets!\n");
return;
}
-
- unsigned num_resources = 0;
- ASSERTED unsigned num_bindings = zds->pool->num_resources;
- VkWriteDescriptorSet wds[ZINK_MAX_DESCRIPTORS_PER_TYPE];
- unsigned num_wds = 0;
-
- for (int i = 0; i < num_stages; i++) {
- struct zink_shader *shader = stages[i];
- if (!shader)
- continue;
- enum pipe_shader_type stage = pipe_shader_type_from_mesa(shader->nir->info.stage);
- for (int j = 0; j < shader->num_bindings[type]; j++) {
- int index = shader->bindings[type][j].index;
- switch (type) {
- case ZINK_DESCRIPTOR_TYPE_UBO:
- if (!index)
- continue;
- FALLTHROUGH;
- case ZINK_DESCRIPTOR_TYPE_SSBO: {
- VkDescriptorBufferInfo *info;
- struct zink_resource *res = ctx->di.descriptor_res[type][stage][index];
- if (type == ZINK_DESCRIPTOR_TYPE_UBO)
- info = &ctx->di.ubos[stage][index];
- else
- info = &ctx->di.ssbos[stage][index];
- assert(num_resources < num_bindings);
- desc_set_res_add(zds, res, num_resources++, cache_hit);
- wds[num_wds].pBufferInfo = info;
- }
- break;
- case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
- case ZINK_DESCRIPTOR_TYPE_IMAGE: {
- VkDescriptorImageInfo *image_info;
- VkBufferView *buffer_info;
- if (type == ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW) {
- image_info = &ctx->di.textures[stage][index];
- buffer_info = &ctx->di.tbos[stage][index];
- } else {
- image_info = &ctx->di.images[stage][index];
- buffer_info = &ctx->di.texel_images[stage][index];
- }
- bool is_buffer = zink_shader_descriptor_is_buffer(shader, type, j);
- for (unsigned k = 0; k < shader->bindings[type][j].size; k++) {
- assert(num_resources < num_bindings);
- if (type == ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW) {
- struct zink_sampler_state *sampler = NULL;
- if (!is_buffer && image_info->imageView)
- sampler = ctx->sampler_states[stage][index + k];;
-
- desc_set_sampler_add(ctx, zds, &ctx->di.sampler_surfaces[stage][index + k], sampler, num_resources++, cache_hit);
- } else {
- struct zink_image_view *image_view = &ctx->image_views[stage][index + k];
- desc_set_image_add(ctx, zds, image_view, num_resources++, is_buffer, cache_hit);
- }
- }
- if (is_buffer)
- wds[num_wds].pTexelBufferView = buffer_info;
- else
- wds[num_wds].pImageInfo = image_info;
- }
- break;
- default:
- unreachable("unknown descriptor type");
- }
- num_wds = init_write_descriptor(shader, zds, type, j, &wds[num_wds], num_wds);
+ /* no flushing allowed: sets are allocated to the batch, so this breaks everything */
+ assert(ctx->batch.state == bs);
+
+ u_foreach_bit(type, changed_sets) {
+ assert(type + 1 < pg->num_dsl);
+ if (pg->dd.pool_key[type]) {
+ zink_flush_dgc_if_enabled(ctx);
+ /* templates are indexed by the set id, so increment type by 1
+ * (this is effectively an optimization of indirecting through screen->desc_set_id)
+ */
+ VKSCR(UpdateDescriptorSetWithTemplate)(screen->dev, desc_sets[type], pg->dd.templates[type + 1], ctx);
+ VKSCR(CmdBindDescriptorSets)(bs->cmdbuf,
+ is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS,
+ /* same set indexing as above */
+ pg->layout, type + 1, 1, &desc_sets[type],
+ 0, NULL);
+ bs->dd.sets[is_compute][type + 1] = desc_sets[type];
}
}
- if (num_wds)
- VKSCR(UpdateDescriptorSets)(screen->dev, num_wds, wds, 0, NULL);
+ /* these are the unchanged sets being rebound across pipeline changes when compat_id changes but the set is the same
+ * also handles binding null sets
+ */
+ u_foreach_bit(type, bind_sets & ~changed_sets) {
+ if (!pg->dd.pool_key[type])
+ continue;
+ /* same set indexing as above */
+ assert(bs->dd.sets[is_compute][type + 1]);
+ zink_flush_dgc_if_enabled(ctx);
+ VKSCR(CmdBindDescriptorSets)(bs->cmdbuf,
+ is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS,
+ /* same set indexing as above */
+ pg->layout, type + 1, 1, &bs->dd.sets[is_compute][type + 1],
+ 0, NULL);
+ }
}
static void
-zink_context_update_descriptor_states(struct zink_context *ctx, struct zink_program *pg);
-
-#define MAX_CACHE_MISSES 50
-
+bind_bindless_db(struct zink_context *ctx, struct zink_program *pg)
+{
+ struct zink_batch_state *bs = ctx->batch.state;
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ unsigned index = 1;
+ VkDeviceSize offset = 0;
+ VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf,
+ pg->is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pg->layout,
+ screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS], 1,
+ &index,
+ &offset);
+ ctx->dd.bindless_bound = true;
+}
+
+/* entrypoint for all descriptor updating:
+ * - update push set
+ * - generate masks for updating other sets
+ * - always called from driver thread
+ */
void
zink_descriptors_update(struct zink_context *ctx, bool is_compute)
{
- struct zink_program *pg = is_compute ? (struct zink_program *)ctx->curr_compute : (struct zink_program *)ctx->curr_program;
+ struct zink_batch_state *bs = ctx->batch.state;
+ struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base;
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ bool have_KHR_push_descriptor = screen->info.have_KHR_push_descriptor;
- zink_context_update_descriptor_states(ctx, pg);
- bool cache_hit;
- VkDescriptorSet desc_set;
- struct zink_descriptor_set *zds;
+ bool batch_changed = !bs->dd.pg[is_compute];
+ if (batch_changed) {
+ /* update all sets and bind null sets */
+ ctx->dd.state_changed[is_compute] = pg->dd.binding_usage & BITFIELD_MASK(ZINK_DESCRIPTOR_TYPE_UNIFORMS);
+ ctx->dd.push_state_changed[is_compute] = !!pg->dd.push_usage || ctx->dd.has_fbfetch != bs->dd.has_fbfetch;
+ }
- struct zink_batch *batch = &ctx->batch;
- VkPipelineBindPoint bp = is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
+ if (!is_compute) {
+ struct zink_gfx_program *prog = (struct zink_gfx_program*)pg;
+ if (prog->is_separable) {
+ /* force all descriptors update on next pass: separables use different layouts */
+ ctx->dd.state_changed[is_compute] = BITFIELD_MASK(ZINK_DESCRIPTOR_TYPE_UNIFORMS);
+ ctx->dd.push_state_changed[is_compute] = true;
+ update_separable(ctx, pg);
+ if (pg->dd.bindless)
+ bind_bindless_db(ctx, pg);
+ return;
+ }
+ }
- {
- uint32_t dynamic_offsets[PIPE_MAX_CONSTANT_BUFFERS];
- unsigned dynamic_offset_idx = 0;
+ if (pg != bs->dd.pg[is_compute]) {
+ /* if we don't already know that we have to update all sets,
+ * check to see if any dsls changed
+ *
+ * also always update the dsl pointers on program change
+ */
+ for (unsigned i = 0; i < ARRAY_SIZE(bs->dd.dsl[is_compute]); i++) {
+ /* push set is already detected, start at 1 */
+ if (bs->dd.dsl[is_compute][i] != pg->dsl[i + 1])
+ ctx->dd.state_changed[is_compute] |= BITFIELD_BIT(i);
+ bs->dd.dsl[is_compute][i] = pg->dsl[i + 1];
+ }
+ ctx->dd.push_state_changed[is_compute] |= bs->dd.push_usage[is_compute] != pg->dd.push_usage;
+ bs->dd.push_usage[is_compute] = pg->dd.push_usage;
+ }
- /* push set is indexed in vulkan as 0 but isn't in the general pool array */
- ctx->dd->changed[is_compute][ZINK_DESCRIPTOR_TYPES] |= ctx->dd->pg[is_compute] != pg;
- if (pg->dd->push_usage) {
- zds = zink_descriptor_set_get(ctx, ZINK_DESCRIPTOR_TYPES, is_compute, &cache_hit);
- } else {
- zds = NULL;
- cache_hit = false;
+ uint8_t changed_sets = pg->dd.binding_usage & ctx->dd.state_changed[is_compute];
+ /*
+ * when binding a pipeline, the pipeline can correctly access any previously bound
+ * descriptor sets which were bound with compatible pipeline layouts
+ * VK 14.2.2
+ */
+ uint8_t bind_sets = bs->dd.pg[is_compute] && bs->dd.compat_id[is_compute] == pg->compat_id ? 0 : pg->dd.binding_usage;
+
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ size_t check_size = 0;
+ if (pg->dd.push_usage && ctx->dd.push_state_changed[is_compute])
+ check_size += ctx->dd.db_size[is_compute];
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) {
+ if (changed_sets & BITFIELD_BIT(i))
+ check_size += pg->dd.db_size[i];
}
- ctx->dd->changed[is_compute][ZINK_DESCRIPTOR_TYPES] = false;
- desc_set = zds ? zds->desc_set : ctx->dd->dummy_set;
-
- if (pg->dd->push_usage) // push set
- dynamic_offset_idx = update_push_ubo_descriptors(ctx, zds,
- is_compute, cache_hit, dynamic_offsets);
- VKCTX(CmdBindDescriptorSets)(batch->state->cmdbuf, bp,
- pg->layout, 0, 1, &desc_set,
- dynamic_offset_idx, dynamic_offsets);
- }
-
- {
- for (int h = 0; h < ZINK_DESCRIPTOR_TYPES; h++) {
- if (pdd_cached(pg)->cache_misses[h] < MAX_CACHE_MISSES) {
- ctx->dd->changed[is_compute][h] |= ctx->dd->pg[is_compute] != pg;
- if (pg->dsl[h + 1]) {
- /* null set has null pool */
- if (pdd_cached(pg)->pool[h]) {
- zds = zink_descriptor_set_get(ctx, h, is_compute, &cache_hit);
- if (cache_hit) {
- pdd_cached(pg)->cache_misses[h] = 0;
- } else if (likely(zink_screen(ctx->base.screen)->descriptor_mode != ZINK_DESCRIPTOR_MODE_NOFALLBACK)) {
- if (++pdd_cached(pg)->cache_misses[h] == MAX_CACHE_MISSES) {
- const char *set_names[] = {
- "UBO",
- "TEXTURES",
- "SSBO",
- "IMAGES",
- };
- debug_printf("zink: descriptor cache exploded for prog %p set %s: getting lazy (not a bug, just lettin you know)\n", pg, set_names[h]);
- }
- }
- } else
- zds = NULL;
- /* reuse dummy set for bind */
- desc_set = zds ? zds->desc_set : ctx->dd->dummy_set;
- update_descriptors_internal(ctx, h, zds, pg, cache_hit);
-
- VKCTX(CmdBindDescriptorSets)(batch->state->cmdbuf, bp,
- pg->layout, h + 1, 1, &desc_set,
- 0, NULL);
+
+ if (bs->dd.db_offset + check_size >= bs->dd.db->base.b.width0) {
+ enlarge_db(ctx);
+ changed_sets = pg->dd.binding_usage;
+ ctx->dd.push_state_changed[is_compute] = true;
+ zink_flush_dgc_if_enabled(ctx);
+ }
+
+ if (!bs->dd.db_bound)
+ zink_batch_bind_db(ctx);
+ }
+
+ if (pg->dd.push_usage && (ctx->dd.push_state_changed[is_compute] || bind_sets)) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ uint32_t index = 0;
+ uint64_t offset = ctx->dd.push_state_changed[is_compute] ?
+ bs->dd.db_offset :
+ bs->dd.cur_db_offset[ZINK_DESCRIPTOR_TYPE_UNIFORMS];
+ if (ctx->dd.push_state_changed[is_compute]) {
+ assert(bs->dd.db->base.b.width0 > bs->dd.db_offset + ctx->dd.db_size[is_compute]);
+ for (unsigned i = 0; i < (is_compute ? 1 : ZINK_GFX_SHADER_COUNT); i++) {
+ VkDescriptorGetInfoEXT info;
+ info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT;
+ info.pNext = NULL;
+ info.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+ info.data.pUniformBuffer = &ctx->di.db.ubos[is_compute ? MESA_SHADER_COMPUTE : i][0];
+ uint64_t stage_offset = offset + (is_compute ? 0 : ctx->dd.db_offset[i]);
+ VKSCR(GetDescriptorEXT)(screen->dev, &info, screen->info.db_props.robustUniformBufferDescriptorSize,
+ bs->dd.db_map + stage_offset);
+ }
+ if (!is_compute && ctx->dd.has_fbfetch) {
+ uint64_t stage_offset = offset + ctx->dd.db_offset[MESA_SHADER_FRAGMENT + 1];
+ if (pg->dd.fbfetch && screen->info.db_props.inputAttachmentDescriptorSize) {
+ /* real fbfetch descriptor */
+ VkDescriptorGetInfoEXT info;
+ info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT;
+ info.pNext = NULL;
+ info.type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
+ info.data.pInputAttachmentImage = &ctx->di.fbfetch;
+ VKSCR(GetDescriptorEXT)(screen->dev, &info, screen->info.db_props.inputAttachmentDescriptorSize,
+ bs->dd.db_map + stage_offset);
+ } else {
+ /* reuse cached dummy descriptor */
+ memcpy(bs->dd.db_map + stage_offset, ctx->di.fbfetch_db, screen->info.db_props.inputAttachmentDescriptorSize);
+ }
}
+ bs->dd.cur_db_offset[ZINK_DESCRIPTOR_TYPE_UNIFORMS] = bs->dd.db_offset;
+ bs->dd.db_offset += ctx->dd.db_size[is_compute];
+ }
+ zink_flush_dgc_if_enabled(ctx);
+ VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf,
+ is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pg->layout,
+ 0, 1,
+ &index,
+ &offset);
+ } else {
+ if (ctx->dd.push_state_changed[0]) {
+ zink_flush_dgc_if_enabled(ctx);
+ }
+ if (have_KHR_push_descriptor) {
+ if (ctx->dd.push_state_changed[is_compute])
+ VKCTX(CmdPushDescriptorSetWithTemplateKHR)(bs->cmdbuf, pg->dd.templates[0],
+ pg->layout, 0, ctx);
} else {
- zink_descriptors_update_lazy_masked(ctx, is_compute, BITFIELD_BIT(h), false, false);
+ if (ctx->dd.push_state_changed[is_compute]) {
+ struct zink_descriptor_pool *pool = check_push_pool_alloc(ctx, &bs->dd.push_pool[pg->is_compute], bs, pg->is_compute);
+ VkDescriptorSet push_set = get_descriptor_set(pool);
+ if (!push_set)
+ mesa_loge("ZINK: failed to get push descriptor set! prepare to crash!");
+ VKCTX(UpdateDescriptorSetWithTemplate)(screen->dev, push_set, pg->dd.templates[0], ctx);
+ bs->dd.sets[is_compute][0] = push_set;
+ }
+ assert(bs->dd.sets[is_compute][0]);
+ VKCTX(CmdBindDescriptorSets)(bs->cmdbuf,
+ is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pg->layout, 0, 1, &bs->dd.sets[is_compute][0],
+ 0, NULL);
}
- ctx->dd->changed[is_compute][h] = false;
}
}
- ctx->dd->pg[is_compute] = pg;
+ ctx->dd.push_state_changed[is_compute] = false;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ zink_descriptors_update_masked_buffer(ctx, is_compute, changed_sets, bind_sets);
+ else
+ zink_descriptors_update_masked(ctx, is_compute, changed_sets, bind_sets);
+ /* bindless descriptors are context-based and get updated elsewhere */
+ if (pg->dd.bindless && unlikely(!ctx->dd.bindless_bound)) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ bind_bindless_db(ctx, pg);
+ } else {
+ VKCTX(CmdBindDescriptorSets)(ctx->batch.state->cmdbuf, is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pg->layout, screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS], 1, &ctx->dd.t.bindless_set,
+ 0, NULL);
+ }
+ ctx->dd.bindless_bound = true;
+ }
+ bs->dd.pg[is_compute] = pg;
+ ctx->dd.pg[is_compute] = pg;
+ bs->dd.compat_id[is_compute] = pg->compat_id;
+ ctx->dd.state_changed[is_compute] = 0;
}
+/* called from gallium descriptor change hooks, e.g., set_sampler_views */
void
-zink_batch_descriptor_deinit(struct zink_screen *screen, struct zink_batch_state *bs)
+zink_context_invalidate_descriptor_state(struct zink_context *ctx, gl_shader_stage shader, enum zink_descriptor_type type, unsigned start, unsigned count)
{
- if (!bs->dd)
- return;
- _mesa_set_destroy(bs->dd->desc_sets, NULL);
- zink_batch_descriptor_deinit_lazy(screen, bs);
+ if (type == ZINK_DESCRIPTOR_TYPE_UBO && !start)
+ ctx->dd.push_state_changed[shader == MESA_SHADER_COMPUTE] = true;
+ else
+ ctx->dd.state_changed[shader == MESA_SHADER_COMPUTE] |= BITFIELD_BIT(type);
}
-
void
-zink_batch_descriptor_reset(struct zink_screen *screen, struct zink_batch_state *bs)
+zink_context_invalidate_descriptor_state_compact(struct zink_context *ctx, gl_shader_stage shader, enum zink_descriptor_type type, unsigned start, unsigned count)
{
- set_foreach(bs->dd->desc_sets, entry) {
- struct zink_descriptor_set *zds = (void*)entry->key;
- zink_batch_usage_unset(&zds->batch_uses, bs);
- /* reset descriptor pools when no bs is using this program to avoid
- * having some inactive program hogging a billion descriptors
- */
- pipe_reference(&zds->reference, NULL);
- zink_descriptor_set_recycle(zds);
- _mesa_set_remove(bs->dd->desc_sets, entry);
+ if (type == ZINK_DESCRIPTOR_TYPE_UBO && !start)
+ ctx->dd.push_state_changed[shader == MESA_SHADER_COMPUTE] = true;
+ else {
+ if (type > ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW)
+ type -= ZINK_DESCRIPTOR_COMPACT;
+ ctx->dd.state_changed[shader == MESA_SHADER_COMPUTE] |= BITFIELD_BIT(type);
}
- zink_batch_descriptor_reset_lazy(screen, bs);
-}
-
-bool
-zink_batch_descriptor_init(struct zink_screen *screen, struct zink_batch_state *bs)
-{
- if (!zink_batch_descriptor_init_lazy(screen, bs))
- return false;
- bs->dd->desc_sets = _mesa_pointer_set_create(bs);
- return !!bs->dd->desc_sets;
-}
-
-static uint32_t
-calc_descriptor_state_hash_ubo(struct zink_context *ctx, enum pipe_shader_type shader, int idx, uint32_t hash, bool need_offset)
-{
- struct zink_resource *res = ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_UBO][shader][idx];
- struct zink_resource_object *obj = res ? res->obj : NULL;
- hash = XXH32(&obj, sizeof(void*), hash);
- void *hash_data = &ctx->di.ubos[shader][idx].range;
- size_t data_size = sizeof(unsigned);
- hash = XXH32(hash_data, data_size, hash);
- if (need_offset)
- hash = XXH32(&ctx->di.ubos[shader][idx].offset, sizeof(unsigned), hash);
- return hash;
}
-static uint32_t
-calc_descriptor_state_hash_ssbo(struct zink_context *ctx, struct zink_shader *zs, enum pipe_shader_type shader, int i, int idx, uint32_t hash)
+static void
+deinit_multi_pool_overflow(struct zink_screen *screen, struct zink_descriptor_pool_multi *mpool)
{
- struct zink_resource *res = ctx->di.descriptor_res[ZINK_DESCRIPTOR_TYPE_SSBO][shader][idx];
- struct zink_resource_object *obj = res ? res->obj : NULL;
- hash = XXH32(&obj, sizeof(void*), hash);
- if (obj) {
- struct pipe_shader_buffer *ssbo = &ctx->ssbos[shader][idx];
- hash = XXH32(&ssbo->buffer_offset, sizeof(ssbo->buffer_offset), hash);
- hash = XXH32(&ssbo->buffer_size, sizeof(ssbo->buffer_size), hash);
+ for (unsigned i = 0; i < 2; i++) {
+ clear_multi_pool_overflow(screen, &mpool->overflowed_pools[i]);
+ util_dynarray_fini(&mpool->overflowed_pools[i]);
}
- return hash;
}
-static uint32_t
-calc_descriptor_state_hash_sampler(struct zink_context *ctx, struct zink_shader *zs, enum pipe_shader_type shader, int i, int idx, uint32_t hash)
+/* called during batch state destroy */
+void
+zink_batch_descriptor_deinit(struct zink_screen *screen, struct zink_batch_state *bs)
{
- for (unsigned k = 0; k < zs->bindings[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW][i].size; k++) {
- struct zink_sampler_view *sampler_view = zink_sampler_view(ctx->sampler_views[shader][idx + k]);
- bool is_buffer = zink_shader_descriptor_is_buffer(zs, ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW, i);
- ctx->di.sampler_surfaces[shader][idx + k].is_buffer = is_buffer;
- uint32_t val = zink_get_sampler_view_hash(ctx, sampler_view, is_buffer);
- hash = XXH32(&val, sizeof(uint32_t), hash);
- if (is_buffer)
- continue;
-
- struct zink_sampler_state *sampler_state = ctx->sampler_states[shader][idx + k];
-
- if (sampler_state)
- hash = XXH32(&sampler_state->hash, sizeof(uint32_t), hash);
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) {
+ for (unsigned j = 0; j < bs->dd.pools[i].capacity / sizeof(struct zink_descriptor_pool_multi *); j++) {
+ struct zink_descriptor_pool_multi **mppool = util_dynarray_element(&bs->dd.pools[i], struct zink_descriptor_pool_multi *, j);
+ if (mppool && *mppool) {
+ deinit_multi_pool_overflow(screen, *mppool);
+ multi_pool_destroy(screen, *mppool);
+ }
+ }
+ util_dynarray_fini(&bs->dd.pools[i]);
}
- return hash;
-}
-
-static uint32_t
-calc_descriptor_state_hash_image(struct zink_context *ctx, struct zink_shader *zs, enum pipe_shader_type shader, int i, int idx, uint32_t hash)
-{
- for (unsigned k = 0; k < zs->bindings[ZINK_DESCRIPTOR_TYPE_IMAGE][i].size; k++) {
- bool is_buffer = zink_shader_descriptor_is_buffer(zs, ZINK_DESCRIPTOR_TYPE_IMAGE, i);
- uint32_t val = zink_get_image_view_hash(ctx, &ctx->image_views[shader][idx + k], is_buffer);
- ctx->di.image_surfaces[shader][idx + k].is_buffer = is_buffer;
- hash = XXH32(&val, sizeof(uint32_t), hash);
+ for (unsigned i = 0; i < 2; i++) {
+ if (bs->dd.push_pool[i].pool)
+ pool_destroy(screen, bs->dd.push_pool[i].pool);
+ deinit_multi_pool_overflow(screen, &bs->dd.push_pool[i]);
}
- return hash;
+
+ if (bs->dd.db_xfer)
+ zink_screen_buffer_unmap(&screen->base, bs->dd.db_xfer);
+ bs->dd.db_xfer = NULL;
+ if (bs->dd.db)
+ screen->base.resource_destroy(&screen->base, &bs->dd.db->base.b);
+ bs->dd.db = NULL;
+ bs->dd.db_bound = false;
+ bs->dd.db_offset = 0;
+ memset(bs->dd.cur_db_offset, 0, sizeof(bs->dd.cur_db_offset));
}
-static uint32_t
-update_descriptor_stage_state(struct zink_context *ctx, enum pipe_shader_type shader, enum zink_descriptor_type type)
+/* ensure the idle/usable overflow set array always has as many members as possible by merging both arrays on batch state reset */
+static void
+consolidate_pool_alloc(struct zink_screen *screen, struct zink_descriptor_pool_multi *mpool)
{
- struct zink_shader *zs = shader == PIPE_SHADER_COMPUTE ? ctx->compute_stage : ctx->gfx_stages[shader];
-
- uint32_t hash = 0;
- for (int i = 0; i < zs->num_bindings[type]; i++) {
- /* skip push set members */
- if (zs->bindings[type][i].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
- continue;
+ unsigned sizes[] = {
+ util_dynarray_num_elements(&mpool->overflowed_pools[0], struct zink_descriptor_pool*),
+ util_dynarray_num_elements(&mpool->overflowed_pools[1], struct zink_descriptor_pool*),
+ };
+ if (!sizes[0] && !sizes[1])
+ return;
+ /* set idx to whichever overflow is smaller */
+ mpool->overflow_idx = sizes[0] > sizes[1];
+ if (!mpool->overflowed_pools[mpool->overflow_idx].size)
+ return;
- int idx = zs->bindings[type][i].index;
- switch (type) {
- case ZINK_DESCRIPTOR_TYPE_UBO:
- hash = calc_descriptor_state_hash_ubo(ctx, shader, idx, hash, true);
- break;
- case ZINK_DESCRIPTOR_TYPE_SSBO:
- hash = calc_descriptor_state_hash_ssbo(ctx, zs, shader, i, idx, hash);
- break;
- case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
- hash = calc_descriptor_state_hash_sampler(ctx, zs, shader, i, idx, hash);
- break;
- case ZINK_DESCRIPTOR_TYPE_IMAGE:
- hash = calc_descriptor_state_hash_image(ctx, zs, shader, i, idx, hash);
- break;
- default:
- unreachable("unknown descriptor type");
- }
- }
- return hash;
+ /* attempt to consolidate all the overflow into one array to maximize reuse */
+ util_dynarray_append_dynarray(&mpool->overflowed_pools[!mpool->overflow_idx], &mpool->overflowed_pools[mpool->overflow_idx]);
+ util_dynarray_clear(&mpool->overflowed_pools[mpool->overflow_idx]);
}
-static void
-update_descriptor_state(struct zink_context *ctx, enum zink_descriptor_type type, bool is_compute)
+/* called when a batch state is reset, i.e., just before a batch state becomes the current state */
+void
+zink_batch_descriptor_reset(struct zink_screen *screen, struct zink_batch_state *bs)
{
- /* we shouldn't be calling this if we don't have to */
- assert(!ctx->dd->descriptor_states[is_compute].valid[type]);
- bool has_any_usage = false;
-
- if (is_compute) {
- /* just update compute state */
- bool has_usage = zink_program_get_descriptor_usage(ctx, PIPE_SHADER_COMPUTE, type);
- if (has_usage)
- ctx->dd->descriptor_states[is_compute].state[type] = update_descriptor_stage_state(ctx, PIPE_SHADER_COMPUTE, type);
- else
- ctx->dd->descriptor_states[is_compute].state[type] = 0;
- has_any_usage = has_usage;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ bs->dd.db_offset = 0;
+ if (bs->dd.db && bs->dd.db->base.b.width0 < bs->ctx->dd.db.max_db_size * screen->base_descriptor_size)
+ reinit_db(screen, bs);
+ bs->dd.db_bound = false;
} else {
- /* update all gfx states */
- bool first = true;
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) {
- bool has_usage = false;
- /* this is the incremental update for the shader stage */
- if (!ctx->dd->gfx_descriptor_states[i].valid[type]) {
- ctx->dd->gfx_descriptor_states[i].state[type] = 0;
- if (ctx->gfx_stages[i]) {
- has_usage = zink_program_get_descriptor_usage(ctx, i, type);
- if (has_usage)
- ctx->dd->gfx_descriptor_states[i].state[type] = update_descriptor_stage_state(ctx, i, type);
- ctx->dd->gfx_descriptor_states[i].valid[type] = has_usage;
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) {
+ struct zink_descriptor_pool_multi **mpools = bs->dd.pools[i].data;
+ for (unsigned j = 0; j < bs->dd.pool_size[i]; j++) {
+ struct zink_descriptor_pool_multi *mpool = mpools[j];
+ if (!mpool)
+ continue;
+ consolidate_pool_alloc(screen, mpool);
+
+ /* if the pool is still in use, reset the current set index */
+ if (mpool->pool_key->use_count)
+ mpool->pool->set_idx = 0;
+ else {
+ /* otherwise destroy it to reclaim memory */
+ multi_pool_destroy(screen, mpool);
+ mpools[j] = NULL;
}
}
- if (ctx->dd->gfx_descriptor_states[i].valid[type]) {
- /* this is the overall state update for the descriptor set hash */
- if (first) {
- /* no need to double hash the first state */
- ctx->dd->descriptor_states[is_compute].state[type] = ctx->dd->gfx_descriptor_states[i].state[type];
- first = false;
- } else {
- ctx->dd->descriptor_states[is_compute].state[type] = XXH32(&ctx->dd->gfx_descriptor_states[i].state[type],
- sizeof(uint32_t),
- ctx->dd->descriptor_states[is_compute].state[type]);
- }
+ }
+ for (unsigned i = 0; i < 2; i++) {
+ if (bs->dd.push_pool[i].reinit_overflow) {
+ /* these don't match current fbfetch usage and can never be used again */
+ clear_multi_pool_overflow(screen, &bs->dd.push_pool[i].overflowed_pools[bs->dd.push_pool[i].overflow_idx]);
+ } else if (bs->dd.push_pool[i].pool) {
+ consolidate_pool_alloc(screen, &bs->dd.push_pool[i]);
}
- has_any_usage |= has_usage;
+ if (bs->dd.push_pool[i].pool)
+ bs->dd.push_pool[i].pool->set_idx = 0;
}
}
- ctx->dd->descriptor_states[is_compute].valid[type] = has_any_usage;
+ memset(bs->dd.pg, 0, sizeof(bs->dd.pg));
}
-static void
-zink_context_update_descriptor_states(struct zink_context *ctx, struct zink_program *pg)
+/* called on batch state creation */
+bool
+zink_batch_descriptor_init(struct zink_screen *screen, struct zink_batch_state *bs)
{
- if (pg->dd->push_usage && (!ctx->dd->push_valid[pg->is_compute] ||
- pg->dd->push_usage != ctx->dd->last_push_usage[pg->is_compute])) {
- uint32_t hash = 0;
- if (pg->is_compute) {
- hash = calc_descriptor_state_hash_ubo(ctx, PIPE_SHADER_COMPUTE, 0, 0, false);
- } else {
- bool first = true;
- u_foreach_bit(stage, pg->dd->push_usage) {
- if (!ctx->dd->gfx_push_valid[stage]) {
- ctx->dd->gfx_push_state[stage] = calc_descriptor_state_hash_ubo(ctx, stage, 0, 0, false);
- ctx->dd->gfx_push_valid[stage] = true;
- }
- if (first)
- hash = ctx->dd->gfx_push_state[stage];
- else
- hash = XXH32(&ctx->dd->gfx_push_state[stage], sizeof(uint32_t), hash);
- first = false;
- }
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
+ util_dynarray_init(&bs->dd.pools[i], bs);
+ if (!screen->info.have_KHR_push_descriptor) {
+ for (unsigned i = 0; i < 2; i++) {
+ bs->dd.push_pool[i].pool = create_push_pool(screen, bs, i, false);
+ util_dynarray_init(&bs->dd.push_pool[i].overflowed_pools[0], bs);
+ util_dynarray_init(&bs->dd.push_pool[i].overflowed_pools[1], bs);
}
- ctx->dd->push_state[pg->is_compute] = hash;
- ctx->dd->push_valid[pg->is_compute] = true;
- ctx->dd->last_push_usage[pg->is_compute] = pg->dd->push_usage;
}
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) {
- if (pdd_cached(pg)->pool[i] && pdd_cached(pg)->cache_misses[i] < MAX_CACHE_MISSES &&
- !ctx->dd->descriptor_states[pg->is_compute].valid[i])
- update_descriptor_state(ctx, i, pg->is_compute);
+
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && !(bs->ctx->flags & ZINK_CONTEXT_COPY_ONLY)) {
+ unsigned bind = ZINK_BIND_DESCRIPTOR;
+ struct pipe_resource *pres = pipe_buffer_create(&screen->base, bind, 0, bs->ctx->dd.db.max_db_size * screen->base_descriptor_size);
+ if (!pres)
+ return false;
+ bs->dd.db = zink_resource(pres);
+ bs->dd.db_map = pipe_buffer_map(&bs->ctx->base, pres, PIPE_MAP_READ | PIPE_MAP_WRITE | PIPE_MAP_PERSISTENT | PIPE_MAP_COHERENT | PIPE_MAP_THREAD_SAFE, &bs->dd.db_xfer);
}
+ return true;
}
-void
-zink_context_invalidate_descriptor_state(struct zink_context *ctx, enum pipe_shader_type shader, enum zink_descriptor_type type, unsigned start, unsigned count)
+static void
+init_push_template_entry(VkDescriptorUpdateTemplateEntry *entry, unsigned i)
{
- zink_context_invalidate_descriptor_state_lazy(ctx, shader, type, start, count);
- if (type == ZINK_DESCRIPTOR_TYPE_UBO && !start) {
- /* ubo 0 is the push set */
- ctx->dd->push_state[shader == PIPE_SHADER_COMPUTE] = 0;
- ctx->dd->push_valid[shader == PIPE_SHADER_COMPUTE] = false;
- if (shader != PIPE_SHADER_COMPUTE) {
- ctx->dd->gfx_push_state[shader] = 0;
- ctx->dd->gfx_push_valid[shader] = false;
- }
- ctx->dd->changed[shader == PIPE_SHADER_COMPUTE][ZINK_DESCRIPTOR_TYPES] = true;
- return;
- }
- if (shader != PIPE_SHADER_COMPUTE) {
- ctx->dd->gfx_descriptor_states[shader].valid[type] = false;
- ctx->dd->gfx_descriptor_states[shader].state[type] = 0;
- }
- ctx->dd->descriptor_states[shader == PIPE_SHADER_COMPUTE].valid[type] = false;
- ctx->dd->descriptor_states[shader == PIPE_SHADER_COMPUTE].state[type] = 0;
- ctx->dd->changed[shader == PIPE_SHADER_COMPUTE][type] = true;
+ entry->dstBinding = i;
+ entry->descriptorCount = 1;
+ entry->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+ entry->offset = offsetof(struct zink_context, di.t.ubos[i][0]);
+ entry->stride = sizeof(VkDescriptorBufferInfo);
}
+/* called on context creation */
bool
zink_descriptors_init(struct zink_context *ctx)
{
- zink_descriptors_init_lazy(ctx);
- if (!ctx->dd)
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
+ VkDescriptorUpdateTemplateEntry *entry = &ctx->dd.push_entries[i];
+ init_push_template_entry(entry, i);
+ }
+ init_push_template_entry(&ctx->dd.compute_push_entry, MESA_SHADER_COMPUTE);
+ VkDescriptorUpdateTemplateEntry *entry = &ctx->dd.push_entries[ZINK_GFX_SHADER_COUNT]; //fbfetch
+ entry->dstBinding = ZINK_FBFETCH_BINDING;
+ entry->descriptorCount = 1;
+ entry->descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
+ entry->offset = offsetof(struct zink_context, di.fbfetch);
+ entry->stride = sizeof(VkDescriptorImageInfo);
+ struct zink_descriptor_layout_key *layout_key;
+ if (!zink_descriptor_util_push_layouts_get(ctx, ctx->dd.push_dsl, ctx->dd.push_layout_keys))
+ return false;
+
+ ctx->dd.dummy_dsl = descriptor_util_layout_get(screen, 0, NULL, 0, &layout_key);
+ if (!ctx->dd.dummy_dsl)
return false;
- return zink_descriptor_pool_init(ctx);
+
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ VkDeviceSize val;
+ for (unsigned i = 0; i < 2; i++) {
+ VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, ctx->dd.push_dsl[i]->layout, &val);
+ ctx->dd.db_size[i] = val;
+ }
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
+ VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, ctx->dd.push_dsl[0]->layout, i, &val);
+ ctx->dd.db_offset[i] = val;
+ }
+ /* start small */
+ ctx->dd.db.max_db_size = 250;
+ ctx->dd.db.size_enlarge_scale = 16;
+ }
+
+ return true;
}
+/* called on context destroy */
void
zink_descriptors_deinit(struct zink_context *ctx)
{
- zink_descriptor_pool_deinit(ctx);
- zink_descriptors_deinit_lazy(ctx);
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (ctx->dd.push_dsl[0])
+ VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd.push_dsl[0]->layout, NULL);
+ if (ctx->dd.push_dsl[1])
+ VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd.push_dsl[1]->layout, NULL);
}
+/* called on screen creation */
bool
-zink_descriptor_layouts_init(struct zink_context *ctx)
+zink_descriptor_layouts_init(struct zink_screen *screen)
{
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++)
- if (!_mesa_hash_table_init(&ctx->desc_set_layouts[i], ctx, hash_descriptor_layout, equals_descriptor_layout))
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) {
+ if (!_mesa_hash_table_init(&screen->desc_set_layouts[i], screen, hash_descriptor_layout, equals_descriptor_layout))
+ return false;
+ if (!_mesa_set_init(&screen->desc_pool_keys[i], screen, hash_descriptor_pool_key, equals_descriptor_pool_key))
return false;
+ }
+ simple_mtx_init(&screen->desc_set_layouts_lock, mtx_plain);
+ simple_mtx_init(&screen->desc_pool_keys_lock, mtx_plain);
return true;
}
+/* called on screen destroy */
void
-zink_descriptor_layouts_deinit(struct zink_context *ctx)
+zink_descriptor_layouts_deinit(struct zink_screen *screen)
{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) {
- hash_table_foreach(&ctx->desc_set_layouts[i], he) {
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) {
+ hash_table_foreach(&screen->desc_set_layouts[i], he) {
struct zink_descriptor_layout *layout = he->data;
VKSCR(DestroyDescriptorSetLayout)(screen->dev, layout->layout, NULL);
- if (layout->desc_template)
- VKSCR(DestroyDescriptorUpdateTemplate)(screen->dev, layout->desc_template, NULL);
ralloc_free(layout);
- _mesa_hash_table_remove(&ctx->desc_set_layouts[i], he);
+ _mesa_hash_table_remove(&screen->desc_set_layouts[i], he);
}
}
+ simple_mtx_destroy(&screen->desc_set_layouts_lock);
+ simple_mtx_destroy(&screen->desc_pool_keys_lock);
}
-
+/* fbfetch descriptor is not initialized by default since it is seldom used
+ * once it is needed, new push layouts/sets are allocated and all previous layouts/sets are destroyed
+ */
void
zink_descriptor_util_init_fbfetch(struct zink_context *ctx)
{
- if (ctx->dd->has_fbfetch)
+ if (ctx->dd.has_fbfetch)
+ return;
+
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd.push_dsl[0]->layout, NULL);
+ //don't free these now, let ralloc free on teardown to avoid invalid access
+ //ralloc_free(ctx->dd.push_dsl[0]);
+ //ralloc_free(ctx->dd.push_layout_keys[0]);
+ ctx->dd.push_dsl[0] = create_gfx_layout(ctx, &ctx->dd.push_layout_keys[0], true);
+ ctx->dd.has_fbfetch = true;
+
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ VkDeviceSize val;
+ VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, ctx->dd.push_dsl[0]->layout, &val);
+ ctx->dd.db_size[0] = val;
+ for (unsigned i = 0; i < ARRAY_SIZE(ctx->dd.db_offset); i++) {
+ VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, ctx->dd.push_dsl[0]->layout, i, &val);
+ ctx->dd.db_offset[i] = val;
+ }
+ }
+}
+
+/* called when a shader that uses bindless is created */
+void
+zink_descriptors_init_bindless(struct zink_context *ctx)
+{
+ if (ctx->dd.bindless_init)
return;
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ assert(screen->bindless_layout);
+ ctx->dd.bindless_init = true;
+
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ unsigned bind = ZINK_BIND_DESCRIPTOR;
+ VkDeviceSize size;
+ VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, screen->bindless_layout, &size);
+ struct pipe_resource *pres = pipe_buffer_create(&screen->base, bind, 0, size);
+ ctx->dd.db.bindless_db = zink_resource(pres);
+ ctx->dd.db.bindless_db_map = pipe_buffer_map(&ctx->base, pres, PIPE_MAP_READ | PIPE_MAP_WRITE | PIPE_MAP_PERSISTENT, &ctx->dd.db.bindless_db_xfer);
+ zink_batch_bind_db(ctx);
+ for (unsigned i = 0; i < 4; i++) {
+ VkDeviceSize offset;
+ VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, screen->bindless_layout, i, &offset);
+ ctx->dd.db.bindless_db_offsets[i] = offset;
+ }
+ } else {
+ VkDescriptorPoolCreateInfo dpci = {0};
+ VkDescriptorPoolSize sizes[4];
+ for (unsigned i = 0; i < 4; i++) {
+ sizes[i].type = zink_descriptor_type_from_bindless_index(i);
+ sizes[i].descriptorCount = ZINK_MAX_BINDLESS_HANDLES;
+ }
+ dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+ dpci.pPoolSizes = sizes;
+ dpci.poolSizeCount = 4;
+ dpci.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT;
+ dpci.maxSets = 1;
+ VkResult result = VKSCR(CreateDescriptorPool)(screen->dev, &dpci, 0, &ctx->dd.t.bindless_pool);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateDescriptorPool failed (%s)", vk_Result_to_str(result));
+ return;
+ }
+
+ zink_descriptor_util_alloc_sets(screen, screen->bindless_layout, ctx->dd.t.bindless_pool, &ctx->dd.t.bindless_set, 1);
+ }
+}
+
+/* called on context destroy */
+void
+zink_descriptors_deinit_bindless(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ if (ctx->dd.db.bindless_db_xfer)
+ pipe_buffer_unmap(&ctx->base, ctx->dd.db.bindless_db_xfer);
+ if (ctx->dd.db.bindless_db) {
+ struct pipe_resource *pres = &ctx->dd.db.bindless_db->base.b;
+ pipe_resource_reference(&pres, NULL);
+ }
+ } else {
+ if (ctx->dd.t.bindless_pool)
+ VKSCR(DestroyDescriptorPool)(screen->dev, ctx->dd.t.bindless_pool, NULL);
+ }
+}
+/* entrypoint for updating bindless descriptors: called from draw/dispatch */
+void
+zink_descriptors_update_bindless(struct zink_context *ctx)
+{
struct zink_screen *screen = zink_screen(ctx->base.screen);
- VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd->push_dsl[0]->layout, NULL);
- ralloc_free(ctx->dd->push_dsl[0]);
- ralloc_free(ctx->dd->push_layout_keys[0]);
- ctx->dd->push_dsl[0] = create_gfx_layout(ctx, &ctx->dd->push_layout_keys[0], true);
- ctx->dd->has_fbfetch = true;
- if (screen->descriptor_mode != ZINK_DESCRIPTOR_MODE_LAZY)
- zink_descriptor_pool_init(ctx);
+ VkDescriptorGetInfoEXT info;
+ info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT;
+ info.pNext = NULL;
+ /* bindless descriptors are split between images and buffers */
+ for (unsigned i = 0; i < 2; i++) {
+ if (!ctx->di.bindless_dirty[i])
+ continue;
+ while (util_dynarray_contains(&ctx->di.bindless[i].updates, uint32_t)) {
+ /* updates are tracked by handle */
+ uint32_t handle = util_dynarray_pop(&ctx->di.bindless[i].updates, uint32_t);
+ bool is_buffer = ZINK_BINDLESS_IS_BUFFER(handle);
+ unsigned binding = i * 2 + !!is_buffer;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ if (is_buffer) {
+ size_t size = i ? screen->info.db_props.robustStorageTexelBufferDescriptorSize : screen->info.db_props.robustUniformTexelBufferDescriptorSize;
+ info.type = i ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
+ info.data.pSampler = (void*)&ctx->di.bindless[i].db.buffer_infos[handle - ZINK_MAX_BINDLESS_HANDLES];
+ VKSCR(GetDescriptorEXT)(screen->dev, &info, size, ctx->dd.db.bindless_db_map + ctx->dd.db.bindless_db_offsets[binding] + handle * size);
+ } else {
+ info.type = i ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE : VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+ if (screen->info.db_props.combinedImageSamplerDescriptorSingleArray || i) {
+ size_t size = i ? screen->info.db_props.storageImageDescriptorSize : screen->info.db_props.combinedImageSamplerDescriptorSize;
+ info.data.pSampler = (void*)&ctx->di.bindless[i].img_infos[handle];
+ VKSCR(GetDescriptorEXT)(screen->dev, &info, size, ctx->dd.db.bindless_db_map + ctx->dd.db.bindless_db_offsets[binding] + handle * size);
+ } else {
+ /* drivers that don't support combinedImageSamplerDescriptorSingleArray must have sampler arrays written in memory as
+ *
+ * | array_of_samplers[] | array_of_sampled_images[] |
+ *
+ * which means each descriptor's data must be split
+ */
+ uint8_t buf[1024];
+ size_t size = screen->info.db_props.combinedImageSamplerDescriptorSize;
+ info.data.pSampler = (void*)&ctx->di.bindless[i].img_infos[handle];
+ VKSCR(GetDescriptorEXT)(screen->dev, &info, size, buf);
+ memcpy(ctx->dd.db.bindless_db_map + ctx->dd.db.bindless_db_offsets[binding] + handle * screen->info.db_props.samplerDescriptorSize, buf, screen->info.db_props.samplerDescriptorSize);
+ size_t offset = screen->info.db_props.samplerDescriptorSize * ZINK_MAX_BINDLESS_HANDLES;
+ offset += handle * screen->info.db_props.sampledImageDescriptorSize;
+ memcpy(ctx->dd.db.bindless_db_map + ctx->dd.db.bindless_db_offsets[binding] + offset, &buf[screen->info.db_props.samplerDescriptorSize], screen->info.db_props.sampledImageDescriptorSize);
+ }
+ }
+ } else {
+ VkWriteDescriptorSet wd;
+ wd.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+ wd.pNext = NULL;
+ wd.dstSet = ctx->dd.t.bindless_set;
+ wd.dstBinding = binding;
+ /* buffer handle ids are offset by ZINK_MAX_BINDLESS_HANDLES for internal tracking */
+ wd.dstArrayElement = is_buffer ? handle - ZINK_MAX_BINDLESS_HANDLES : handle;
+ wd.descriptorCount = 1;
+ wd.descriptorType = zink_descriptor_type_from_bindless_index(wd.dstBinding);
+ if (is_buffer)
+ wd.pTexelBufferView = &ctx->di.bindless[i].t.buffer_infos[wd.dstArrayElement];
+ else
+ wd.pImageInfo = &ctx->di.bindless[i].img_infos[handle];
+ /* this sucks, but sets must be singly updated to be handled correctly */
+ VKSCR(UpdateDescriptorSets)(screen->dev, 1, &wd, 0, NULL);
+ }
+ }
+ }
+ ctx->di.any_bindless_dirty = 0;
}
diff --git a/src/gallium/drivers/zink/zink_descriptors.h b/src/gallium/drivers/zink/zink_descriptors.h
index a2b56da3dbf..8280a05f194 100644
--- a/src/gallium/drivers/zink/zink_descriptors.h
+++ b/src/gallium/drivers/zink/zink_descriptors.h
@@ -26,126 +26,16 @@
#ifndef ZINK_DESCRIPTOR_H
# define ZINK_DESCRIPTOR_H
-#include <vulkan/vulkan.h>
-#include "util/u_dynarray.h"
-#include "util/u_inlines.h"
-#include "util/simple_mtx.h"
-#include "zink_batch.h"
+#include "zink_types.h"
#ifdef __cplusplus
extern "C" {
#endif
-#ifndef ZINK_SHADER_COUNT
-#define ZINK_SHADER_COUNT (PIPE_SHADER_TYPES - 1)
-#endif
-
-enum zink_descriptor_type {
- ZINK_DESCRIPTOR_TYPE_UBO,
- ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW,
- ZINK_DESCRIPTOR_TYPE_SSBO,
- ZINK_DESCRIPTOR_TYPE_IMAGE,
- ZINK_DESCRIPTOR_TYPES,
-};
-
-#define ZINK_MAX_DESCRIPTORS_PER_TYPE (32 * ZINK_SHADER_COUNT)
-
-struct zink_descriptor_refs {
- struct util_dynarray refs;
-};
-
-
-/* hashes of all the named types in a given state */
-struct zink_descriptor_state {
- bool valid[ZINK_DESCRIPTOR_TYPES];
- uint32_t state[ZINK_DESCRIPTOR_TYPES];
-};
-
-enum zink_descriptor_size_index {
- ZDS_INDEX_UBO,
- ZDS_INDEX_COMBINED_SAMPLER,
- ZDS_INDEX_UNIFORM_TEXELS,
- ZDS_INDEX_STORAGE_BUFFER,
- ZDS_INDEX_STORAGE_IMAGE,
- ZDS_INDEX_STORAGE_TEXELS,
-};
-
-struct hash_table;
-
-struct zink_context;
-struct zink_image_view;
-struct zink_program;
-struct zink_resource;
-struct zink_sampler;
-struct zink_sampler_view;
-struct zink_shader;
-struct zink_screen;
-
-
-struct zink_descriptor_state_key {
- bool exists[ZINK_SHADER_COUNT];
- uint32_t state[ZINK_SHADER_COUNT];
-};
-
-struct zink_descriptor_layout_key {
- unsigned num_descriptors;
- VkDescriptorSetLayoutBinding *bindings;
- unsigned use_count;
-};
-
-struct zink_descriptor_layout {
- VkDescriptorSetLayout layout;
- VkDescriptorUpdateTemplateKHR desc_template;
-};
-
-struct zink_descriptor_pool_key {
- struct zink_descriptor_layout_key *layout;
- unsigned num_type_sizes;
- VkDescriptorPoolSize *sizes;
-};
-
-struct zink_descriptor_reference {
- void **ref;
- bool *invalid;
-};
-
-
-struct zink_descriptor_data {
- struct zink_descriptor_state gfx_descriptor_states[ZINK_SHADER_COUNT]; // keep incremental hashes here
- struct zink_descriptor_state descriptor_states[2]; // gfx, compute
- struct hash_table *descriptor_pools[ZINK_DESCRIPTOR_TYPES];
-
- struct zink_descriptor_layout_key *push_layout_keys[2]; //gfx, compute
- struct zink_descriptor_pool *push_pool[2]; //gfx, compute
- struct zink_descriptor_layout *push_dsl[2]; //gfx, compute
- uint8_t last_push_usage[2];
- bool push_valid[2];
- uint32_t push_state[2];
- bool gfx_push_valid[ZINK_SHADER_COUNT];
- uint32_t gfx_push_state[ZINK_SHADER_COUNT];
- struct zink_descriptor_set *last_set[2];
-
- VkDescriptorPool dummy_pool;
- struct zink_descriptor_layout *dummy_dsl;
- VkDescriptorSet dummy_set;
-
- bool changed[2][ZINK_DESCRIPTOR_TYPES + 1];
- bool has_fbfetch;
- struct zink_program *pg[2]; //gfx, compute
-};
+#define ZINK_DESCRIPTOR_COMPACT 2
-struct zink_program_descriptor_data {
- uint8_t push_usage;
- VkDescriptorPoolSize sizes[6]; //zink_descriptor_size_index
- struct zink_descriptor_layout_key *layout_key[ZINK_DESCRIPTOR_TYPES]; //push set doesn't need one
- uint8_t binding_usage;
- struct zink_descriptor_layout *layouts[ZINK_DESCRIPTOR_TYPES + 1];
- VkDescriptorUpdateTemplateKHR push_template;
-};
-struct zink_batch_descriptor_data {
- struct set *desc_sets;
-};
+#define ZINK_BINDLESS_IS_BUFFER(HANDLE) (HANDLE >= ZINK_MAX_BINDLESS_HANDLES)
static inline enum zink_descriptor_size_index
zink_vktype_to_size_idx(VkDescriptorType type)
@@ -154,10 +44,13 @@ zink_vktype_to_size_idx(VkDescriptorType type)
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
return ZDS_INDEX_UBO;
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
return ZDS_INDEX_COMBINED_SAMPLER;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
return ZDS_INDEX_UNIFORM_TEXELS;
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ return ZDS_INDEX_SAMPLER;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
return ZDS_INDEX_STORAGE_BUFFER;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
@@ -169,6 +62,31 @@ zink_vktype_to_size_idx(VkDescriptorType type)
unreachable("unknown type");
}
+static inline enum zink_descriptor_size_index_compact
+zink_vktype_to_size_idx_comp(VkDescriptorType type)
+{
+ switch (type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ return ZDS_INDEX_COMP_UBO;
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ return ZDS_INDEX_COMP_COMBINED_SAMPLER;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ return ZDS_INDEX_COMP_UNIFORM_TEXELS;
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ return ZDS_INDEX_COMP_SAMPLER;
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ return ZDS_INDEX_COMP_STORAGE_BUFFER;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ return ZDS_INDEX_COMP_STORAGE_IMAGE;
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ return ZDS_INDEX_COMP_STORAGE_TEXELS;
+ default: break;
+ }
+ unreachable("unknown type");
+}
+
static inline enum zink_descriptor_size_index
zink_descriptor_type_to_size_idx(enum zink_descriptor_type type)
{
@@ -185,42 +103,63 @@ zink_descriptor_type_to_size_idx(enum zink_descriptor_type type)
}
unreachable("unknown type");
}
-unsigned
-zink_descriptor_program_num_sizes(struct zink_program *pg, enum zink_descriptor_type type);
+
+static inline enum zink_descriptor_size_index_compact
+zink_descriptor_type_to_size_idx_comp(enum zink_descriptor_type type)
+{
+ switch (type) {
+ case ZINK_DESCRIPTOR_TYPE_UBO:
+ return ZDS_INDEX_COMP_UBO;
+ case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
+ return ZDS_INDEX_COMP_COMBINED_SAMPLER;
+ case ZINK_DESCRIPTOR_TYPE_SSBO:
+ case ZINK_DESCRIPTOR_TYPE_IMAGE:
+ default: break;
+ }
+ unreachable("unknown type");
+}
+
+/* bindless descriptor bindings have their own struct indexing */
+ALWAYS_INLINE static VkDescriptorType
+zink_descriptor_type_from_bindless_index(unsigned idx)
+{
+ switch (idx) {
+ case 0: return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+ case 1: return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
+ case 2: return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+ case 3: return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
+ default:
+ unreachable("unknown index");
+ }
+}
+
bool
-zink_descriptor_layouts_init(struct zink_context *ctx);
+zink_descriptor_layouts_init(struct zink_screen *screen);
void
-zink_descriptor_layouts_deinit(struct zink_context *ctx);
+zink_descriptor_layouts_deinit(struct zink_screen *screen);
-uint32_t
-zink_get_sampler_view_hash(struct zink_context *ctx, struct zink_sampler_view *sampler_view, bool is_buffer);
-uint32_t
-zink_get_image_view_hash(struct zink_context *ctx, struct zink_image_view *image_view, bool is_buffer);
bool
zink_descriptor_util_alloc_sets(struct zink_screen *screen, VkDescriptorSetLayout dsl, VkDescriptorPool pool, VkDescriptorSet *sets, unsigned num_sets);
-struct zink_descriptor_layout *
-zink_descriptor_util_layout_get(struct zink_context *ctx, enum zink_descriptor_type type,
- VkDescriptorSetLayoutBinding *bindings, unsigned num_bindings,
- struct zink_descriptor_layout_key **layout_key);
void
zink_descriptor_util_init_fbfetch(struct zink_context *ctx);
bool
zink_descriptor_util_push_layouts_get(struct zink_context *ctx, struct zink_descriptor_layout **dsls, struct zink_descriptor_layout_key **layout_keys);
-void
-zink_descriptor_util_init_null_set(struct zink_context *ctx, VkDescriptorSet desc_set);
VkImageLayout
-zink_descriptor_util_image_layout_eval(const struct zink_resource *res, bool is_compute);
-
-/* these two can't be called in lazy mode */
+zink_descriptor_util_image_layout_eval(const struct zink_context *ctx, const struct zink_resource *res, bool is_compute);
void
-zink_descriptor_set_refs_clear(struct zink_descriptor_refs *refs, void *ptr);
+zink_descriptors_init_bindless(struct zink_context *ctx);
void
-zink_descriptor_set_recycle(struct zink_descriptor_set *zds);
-
-
-
+zink_descriptors_deinit_bindless(struct zink_context *ctx);
+void
+zink_descriptors_update_bindless(struct zink_context *ctx);
+void
+zink_descriptor_shader_get_binding_offsets(const struct zink_shader *shader, unsigned *offsets);
+void
+zink_descriptor_shader_init(struct zink_screen *screen, struct zink_shader *shader);
+void
+zink_descriptor_shader_deinit(struct zink_screen *screen, struct zink_shader *shader);
bool
zink_descriptor_program_init(struct zink_context *ctx, struct zink_program *pg);
@@ -233,14 +172,9 @@ zink_descriptors_update(struct zink_context *ctx, bool is_compute);
void
-zink_context_invalidate_descriptor_state(struct zink_context *ctx, enum pipe_shader_type shader, enum zink_descriptor_type type, unsigned, unsigned);
-
-uint32_t
-zink_get_sampler_view_hash(struct zink_context *ctx, struct zink_sampler_view *sampler_view, bool is_buffer);
-uint32_t
-zink_get_image_view_hash(struct zink_context *ctx, struct zink_image_view *image_view, bool is_buffer);
-struct zink_resource *
-zink_get_resource_for_descriptor(struct zink_context *ctx, enum zink_descriptor_type type, enum pipe_shader_type shader, int idx);
+zink_context_invalidate_descriptor_state(struct zink_context *ctx, gl_shader_stage shader, enum zink_descriptor_type type, unsigned, unsigned);
+void
+zink_context_invalidate_descriptor_state_compact(struct zink_context *ctx, gl_shader_stage shader, enum zink_descriptor_type type, unsigned, unsigned);
void
zink_batch_descriptor_deinit(struct zink_screen *screen, struct zink_batch_state *bs);
@@ -255,37 +189,8 @@ zink_descriptors_init(struct zink_context *ctx);
void
zink_descriptors_deinit(struct zink_context *ctx);
-//LAZY
-bool
-zink_descriptor_program_init_lazy(struct zink_context *ctx, struct zink_program *pg);
-
-void
-zink_descriptor_program_deinit_lazy(struct zink_screen *screen, struct zink_program *pg);
-
-void
-zink_descriptors_update_lazy(struct zink_context *ctx, bool is_compute);
-
-
-void
-zink_context_invalidate_descriptor_state_lazy(struct zink_context *ctx, enum pipe_shader_type shader, enum zink_descriptor_type type, unsigned, unsigned);
-
-void
-zink_batch_descriptor_deinit_lazy(struct zink_screen *screen, struct zink_batch_state *bs);
-void
-zink_batch_descriptor_reset_lazy(struct zink_screen *screen, struct zink_batch_state *bs);
-bool
-zink_batch_descriptor_init_lazy(struct zink_screen *screen, struct zink_batch_state *bs);
-
-bool
-zink_descriptors_init_lazy(struct zink_context *ctx);
-
-void
-zink_descriptors_deinit_lazy(struct zink_context *ctx);
-
-void
-zink_descriptor_set_update_lazy(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, VkDescriptorSet set);
void
-zink_descriptors_update_lazy_masked(struct zink_context *ctx, bool is_compute, uint8_t changed_sets, bool need_push, bool update_push);
+zink_descriptors_update_masked(struct zink_context *ctx, bool is_compute, uint8_t changed_sets, uint8_t bind_sets);
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/drivers/zink/zink_descriptors_lazy.c b/src/gallium/drivers/zink/zink_descriptors_lazy.c
deleted file mode 100644
index a727d8ae3c5..00000000000
--- a/src/gallium/drivers/zink/zink_descriptors_lazy.c
+++ /dev/null
@@ -1,689 +0,0 @@
-/*
- * Copyright © 2021 Valve Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
- */
-#include "tgsi/tgsi_from_mesa.h"
-
-
-
-#include "zink_context.h"
-#include "zink_compiler.h"
-#include "zink_descriptors.h"
-#include "zink_program.h"
-#include "zink_resource.h"
-#include "zink_screen.h"
-
-#define MAX_LAZY_DESCRIPTORS (ZINK_DEFAULT_MAX_DESCS / 10)
-
-struct zink_descriptor_data_lazy {
- struct zink_descriptor_data base;
- VkDescriptorUpdateTemplateEntry push_entries[PIPE_SHADER_TYPES]; //gfx+fbfetch
- VkDescriptorUpdateTemplateEntry compute_push_entry;
- bool push_state_changed[2]; //gfx, compute
- uint8_t state_changed[2]; //gfx, compute
-};
-
-struct zink_descriptor_pool {
- VkDescriptorPool pool;
- VkDescriptorSet sets[MAX_LAZY_DESCRIPTORS];
- unsigned set_idx;
- unsigned sets_alloc;
-};
-
-struct zink_batch_descriptor_data_lazy {
- struct zink_batch_descriptor_data base;
- struct util_dynarray overflowed_pools;
- struct hash_table pools[ZINK_DESCRIPTOR_TYPES];
- struct zink_descriptor_pool *push_pool[2];
- struct zink_program *pg[2]; //gfx, compute
- VkDescriptorSetLayout dsl[2][ZINK_DESCRIPTOR_TYPES];
- unsigned push_usage[2];
- bool has_fbfetch;
-};
-
-ALWAYS_INLINE static struct zink_descriptor_data_lazy *
-dd_lazy(struct zink_context *ctx)
-{
- return (struct zink_descriptor_data_lazy*)ctx->dd;
-}
-
-ALWAYS_INLINE static struct zink_batch_descriptor_data_lazy *
-bdd_lazy(struct zink_batch_state *bs)
-{
- return (struct zink_batch_descriptor_data_lazy*)bs->dd;
-}
-
-static void
-init_template_entry(struct zink_shader *shader, enum zink_descriptor_type type,
- unsigned idx, unsigned offset, VkDescriptorUpdateTemplateEntry *entry, unsigned *entry_idx, bool flatten_dynamic)
-{
- int index = shader->bindings[type][idx].index;
- enum pipe_shader_type stage = pipe_shader_type_from_mesa(shader->nir->info.stage);
- entry->dstArrayElement = 0;
- entry->dstBinding = shader->bindings[type][idx].binding;
- if (shader->bindings[type][idx].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC && flatten_dynamic)
- /* filter out DYNAMIC type here */
- entry->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- else
- entry->descriptorType = shader->bindings[type][idx].type;
- switch (shader->bindings[type][idx].type) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- entry->descriptorCount = 1;
- entry->offset = offsetof(struct zink_context, di.ubos[stage][index + offset]);
- entry->stride = sizeof(VkDescriptorBufferInfo);
- break;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- entry->descriptorCount = shader->bindings[type][idx].size;
- entry->offset = offsetof(struct zink_context, di.textures[stage][index + offset]);
- entry->stride = sizeof(VkDescriptorImageInfo);
- break;
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- entry->descriptorCount = shader->bindings[type][idx].size;
- entry->offset = offsetof(struct zink_context, di.tbos[stage][index + offset]);
- entry->stride = sizeof(VkBufferView);
- break;
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- entry->descriptorCount = 1;
- entry->offset = offsetof(struct zink_context, di.ssbos[stage][index + offset]);
- entry->stride = sizeof(VkDescriptorBufferInfo);
- break;
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- entry->descriptorCount = shader->bindings[type][idx].size;
- entry->offset = offsetof(struct zink_context, di.images[stage][index + offset]);
- entry->stride = sizeof(VkDescriptorImageInfo);
- break;
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- entry->descriptorCount = shader->bindings[type][idx].size;
- entry->offset = offsetof(struct zink_context, di.texel_images[stage][index + offset]);
- entry->stride = sizeof(VkBufferView);
- break;
- default:
- unreachable("unknown type");
- }
- (*entry_idx)++;
-}
-
-bool
-zink_descriptor_program_init_lazy(struct zink_context *ctx, struct zink_program *pg)
-{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- VkDescriptorSetLayoutBinding bindings[ZINK_DESCRIPTOR_TYPES][PIPE_SHADER_TYPES * 32];
- VkDescriptorUpdateTemplateEntry entries[ZINK_DESCRIPTOR_TYPES][PIPE_SHADER_TYPES * 32];
- unsigned num_bindings[ZINK_DESCRIPTOR_TYPES] = {0};
- uint8_t has_bindings = 0;
- unsigned push_count = 0;
-
- struct zink_shader **stages;
- if (pg->is_compute)
- stages = &((struct zink_compute_program*)pg)->shader;
- else {
- stages = ((struct zink_gfx_program*)pg)->shaders;
- if (stages[PIPE_SHADER_FRAGMENT]->nir->info.fs.uses_fbfetch_output) {
- zink_descriptor_util_init_fbfetch(ctx);
- push_count = 1;
- }
- }
-
- if (!pg->dd)
- pg->dd = (void*)rzalloc(pg, struct zink_program_descriptor_data);
- if (!pg->dd)
- return false;
-
- unsigned entry_idx[ZINK_DESCRIPTOR_TYPES] = {0};
-
- unsigned num_shaders = pg->is_compute ? 1 : ZINK_SHADER_COUNT;
- bool have_push = screen->info.have_KHR_push_descriptor;
- for (int i = 0; i < num_shaders; i++) {
- struct zink_shader *shader = stages[i];
- if (!shader)
- continue;
-
- enum pipe_shader_type stage = pipe_shader_type_from_mesa(shader->nir->info.stage);
- VkShaderStageFlagBits stage_flags = zink_shader_stage(stage);
- for (int j = 0; j < ZINK_DESCRIPTOR_TYPES; j++) {
- for (int k = 0; k < shader->num_bindings[j]; k++) {
- /* dynamic ubos handled in push */
- if (shader->bindings[j][k].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
- pg->dd->push_usage |= BITFIELD64_BIT(stage);
-
- push_count++;
- continue;
- }
-
- assert(num_bindings[j] < ARRAY_SIZE(bindings[j]));
- VkDescriptorSetLayoutBinding *binding = &bindings[j][num_bindings[j]];
- binding->binding = shader->bindings[j][k].binding;
- binding->descriptorType = shader->bindings[j][k].type;
- binding->descriptorCount = shader->bindings[j][k].size;
- binding->stageFlags = stage_flags;
- binding->pImmutableSamplers = NULL;
-
- enum zink_descriptor_size_index idx = zink_vktype_to_size_idx(shader->bindings[j][k].type);
- pg->dd->sizes[idx].descriptorCount += shader->bindings[j][k].size;
- pg->dd->sizes[idx].type = shader->bindings[j][k].type;
- switch (shader->bindings[j][k].type) {
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- init_template_entry(shader, j, k, 0, &entries[j][entry_idx[j]], &entry_idx[j], screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY);
- break;
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- for (unsigned l = 0; l < shader->bindings[j][k].size; l++)
- init_template_entry(shader, j, k, l, &entries[j][entry_idx[j]], &entry_idx[j], screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY);
- break;
- default:
- break;
- }
- num_bindings[j]++;
- has_bindings |= BITFIELD_BIT(j);
- }
- }
- }
- pg->dd->binding_usage = has_bindings;
- if (!has_bindings && !push_count) {
- ralloc_free(pg->dd);
- pg->dd = NULL;
-
- pg->layout = zink_pipeline_layout_create(screen, pg);
- return !!pg->layout;
- }
-
- pg->dsl[pg->num_dsl++] = push_count ? ctx->dd->push_dsl[pg->is_compute]->layout : ctx->dd->dummy_dsl->layout;
- if (has_bindings) {
- u_foreach_bit(type, has_bindings) {
- for (unsigned i = 0; i < type; i++) {
- /* push set is always 0 */
- if (!pg->dsl[i + 1]) {
- /* inject a null dsl */
- pg->dsl[pg->num_dsl++] = ctx->dd->dummy_dsl->layout;
- pg->dd->binding_usage |= BITFIELD_BIT(i);
- }
- }
- pg->dd->layouts[pg->num_dsl] = zink_descriptor_util_layout_get(ctx, type, bindings[type], num_bindings[type], &pg->dd->layout_key[type]);
- pg->dd->layout_key[type]->use_count++;
- pg->dsl[pg->num_dsl] = pg->dd->layouts[pg->num_dsl]->layout;
- pg->num_dsl++;
- }
- for (unsigned i = 0; i < ARRAY_SIZE(pg->dd->sizes); i++)
- pg->dd->sizes[i].descriptorCount *= screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY ? MAX_LAZY_DESCRIPTORS : ZINK_DEFAULT_MAX_DESCS;
- }
-
- pg->layout = zink_pipeline_layout_create(screen, pg);
- if (!pg->layout)
- return false;
- if (!screen->info.have_KHR_descriptor_update_template || screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_NOTEMPLATES)
- return true;
-
- VkDescriptorUpdateTemplateCreateInfo template[ZINK_DESCRIPTOR_TYPES + 1] = {0};
- /* type of template */
- VkDescriptorUpdateTemplateType types[ZINK_DESCRIPTOR_TYPES + 1] = {VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET};
- if (have_push && screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY)
- types[0] = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR;
-
- /* number of descriptors in template */
- unsigned wd_count[ZINK_DESCRIPTOR_TYPES + 1];
- if (push_count)
- wd_count[0] = pg->is_compute ? 1 : (ZINK_SHADER_COUNT + !!ctx->dd->has_fbfetch);
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++)
- wd_count[i + 1] = pg->dd->layout_key[i] ? pg->dd->layout_key[i]->num_descriptors : 0;
-
- VkDescriptorUpdateTemplateEntry *push_entries[2] = {
- dd_lazy(ctx)->push_entries,
- &dd_lazy(ctx)->compute_push_entry,
- };
- for (unsigned i = 0; i < pg->num_dsl; i++) {
- bool is_push = i == 0;
- /* no need for empty templates */
- if (pg->dsl[i] == ctx->dd->dummy_dsl->layout ||
- (!is_push && pg->dd->layouts[i]->desc_template))
- continue;
- template[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
- assert(wd_count[i]);
- template[i].descriptorUpdateEntryCount = wd_count[i];
- if (is_push)
- template[i].pDescriptorUpdateEntries = push_entries[pg->is_compute];
- else
- template[i].pDescriptorUpdateEntries = entries[i - 1];
- template[i].templateType = types[i];
- template[i].descriptorSetLayout = pg->dsl[i];
- template[i].pipelineBindPoint = pg->is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
- template[i].pipelineLayout = pg->layout;
- template[i].set = i;
- VkDescriptorUpdateTemplateKHR t;
- if (VKSCR(CreateDescriptorUpdateTemplate)(screen->dev, &template[i], NULL, &t) != VK_SUCCESS)
- return false;
- if (is_push)
- pg->dd->push_template = t;
- else
- pg->dd->layouts[i]->desc_template = t;
- }
- return true;
-}
-
-void
-zink_descriptor_program_deinit_lazy(struct zink_screen *screen, struct zink_program *pg)
-{
- for (unsigned i = 0; pg->num_dsl && i < ZINK_DESCRIPTOR_TYPES; i++) {
- if (pg->dd->layout_key[i])
- pg->dd->layout_key[i]->use_count--;
- }
- if (pg->dd && pg->dd->push_template)
- VKSCR(DestroyDescriptorUpdateTemplate)(screen->dev, pg->dd->push_template, NULL);
- ralloc_free(pg->dd);
-}
-
-static VkDescriptorPool
-create_pool(struct zink_screen *screen, unsigned num_type_sizes, VkDescriptorPoolSize *sizes, unsigned flags)
-{
- VkDescriptorPool pool;
- VkDescriptorPoolCreateInfo dpci = {0};
- dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
- dpci.pPoolSizes = sizes;
- dpci.poolSizeCount = num_type_sizes;
- dpci.flags = flags;
- dpci.maxSets = MAX_LAZY_DESCRIPTORS;
- if (VKSCR(CreateDescriptorPool)(screen->dev, &dpci, 0, &pool) != VK_SUCCESS) {
- debug_printf("vkCreateDescriptorPool failed\n");
- return VK_NULL_HANDLE;
- }
- return pool;
-}
-
-static struct zink_descriptor_pool *
-get_descriptor_pool_lazy(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, struct zink_batch_descriptor_data_lazy *bdd, bool is_compute);
-
-static struct zink_descriptor_pool *
-check_pool_alloc(struct zink_context *ctx, struct zink_descriptor_pool *pool, struct hash_entry *he, struct zink_program *pg,
- enum zink_descriptor_type type, struct zink_batch_descriptor_data_lazy *bdd, bool is_compute)
-{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- /* allocate up to $current * 10, e.g., 10 -> 100 or 100 -> 1000 */
- if (pool->set_idx == pool->sets_alloc) {
- unsigned sets_to_alloc = MIN2(MIN2(MAX2(pool->sets_alloc * 10, 10), MAX_LAZY_DESCRIPTORS) - pool->sets_alloc, 100);
- if (!sets_to_alloc) {
- /* overflowed pool: queue for deletion on next reset */
- util_dynarray_append(&bdd->overflowed_pools, struct zink_descriptor_pool*, pool);
- _mesa_hash_table_remove(&bdd->pools[type], he);
- return get_descriptor_pool_lazy(ctx, pg, type, bdd, is_compute);
- }
- if (!zink_descriptor_util_alloc_sets(screen, pg->dsl[type + 1],
- pool->pool, &pool->sets[pool->sets_alloc], sets_to_alloc))
- return NULL;
- pool->sets_alloc += sets_to_alloc;
- }
- return pool;
-}
-
-static struct zink_descriptor_pool *
-create_push_pool(struct zink_screen *screen, struct zink_batch_descriptor_data_lazy *bdd, bool is_compute, bool has_fbfetch)
-{
- struct zink_descriptor_pool *pool = rzalloc(bdd, struct zink_descriptor_pool);
- VkDescriptorPoolSize sizes[2];
- sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- if (is_compute)
- sizes[0].descriptorCount = MAX_LAZY_DESCRIPTORS;
- else {
- sizes[0].descriptorCount = ZINK_SHADER_COUNT * MAX_LAZY_DESCRIPTORS;
- sizes[1].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
- sizes[1].descriptorCount = MAX_LAZY_DESCRIPTORS;
- }
- pool->pool = create_pool(screen, !is_compute && has_fbfetch ? 2 : 1, sizes, 0);
- return pool;
-}
-
-static struct zink_descriptor_pool *
-check_push_pool_alloc(struct zink_context *ctx, struct zink_descriptor_pool *pool, struct zink_batch_descriptor_data_lazy *bdd, bool is_compute)
-{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- /* allocate up to $current * 10, e.g., 10 -> 100 or 100 -> 1000 */
- if (pool->set_idx == pool->sets_alloc || unlikely(ctx->dd->has_fbfetch != bdd->has_fbfetch)) {
- unsigned sets_to_alloc = MIN2(MIN2(MAX2(pool->sets_alloc * 10, 10), MAX_LAZY_DESCRIPTORS) - pool->sets_alloc, 100);
- if (!sets_to_alloc || unlikely(ctx->dd->has_fbfetch != bdd->has_fbfetch)) {
- /* overflowed pool: queue for deletion on next reset */
- util_dynarray_append(&bdd->overflowed_pools, struct zink_descriptor_pool*, pool);
- bdd->push_pool[is_compute] = create_push_pool(screen, bdd, is_compute, ctx->dd->has_fbfetch);
- return check_push_pool_alloc(ctx, bdd->push_pool[is_compute], bdd, is_compute);
- }
- if (!zink_descriptor_util_alloc_sets(screen, ctx->dd->push_dsl[is_compute]->layout,
- pool->pool, &pool->sets[pool->sets_alloc], sets_to_alloc))
- return NULL;
- pool->sets_alloc += sets_to_alloc;
- }
- return pool;
-}
-
-static struct zink_descriptor_pool *
-get_descriptor_pool_lazy(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, struct zink_batch_descriptor_data_lazy *bdd, bool is_compute)
-{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- struct hash_entry *he = _mesa_hash_table_search(&bdd->pools[type], pg->dd->layout_key[type]);
- struct zink_descriptor_pool *pool;
- if (he) {
- pool = he->data;
- return check_pool_alloc(ctx, pool, he, pg, type, bdd, is_compute);
- }
- pool = rzalloc(bdd, struct zink_descriptor_pool);
- if (!pool)
- return NULL;
- unsigned idx = zink_descriptor_type_to_size_idx(type);
- VkDescriptorPoolSize *size = &pg->dd->sizes[idx];
- /* this is a sampler/image set with no images only texels */
- if (!size->descriptorCount)
- size++;
- pool->pool = create_pool(screen, zink_descriptor_program_num_sizes(pg, type), size, 0);
- if (!pool->pool) {
- ralloc_free(pool);
- return NULL;
- }
- _mesa_hash_table_insert(&bdd->pools[type], pg->dd->layout_key[type], pool);
- return check_pool_alloc(ctx, pool, he, pg, type, bdd, is_compute);
-}
-
-ALWAYS_INLINE static VkDescriptorSet
-get_descriptor_set_lazy(struct zink_descriptor_pool *pool)
-{
- if (!pool)
- return VK_NULL_HANDLE;
-
- assert(pool->set_idx < pool->sets_alloc);
- return pool->sets[pool->set_idx++];
-}
-
-static bool
-populate_sets(struct zink_context *ctx, struct zink_batch_descriptor_data_lazy *bdd,
- struct zink_program *pg, uint8_t *changed_sets, bool need_push, VkDescriptorSet *sets)
-{
- if (need_push && !zink_screen(ctx->base.screen)->info.have_KHR_push_descriptor) {
- struct zink_descriptor_pool *pool = check_push_pool_alloc(ctx, bdd->push_pool[pg->is_compute], bdd, pg->is_compute);
- sets[0] = get_descriptor_set_lazy(pool);
- if (!sets[0])
- return false;
- } else
- sets[0] = VK_NULL_HANDLE;
- u_foreach_bit(type, *changed_sets) {
- if (pg->dd->layout_key[type]) {
- struct zink_descriptor_pool *pool = get_descriptor_pool_lazy(ctx, pg, type, bdd, pg->is_compute);
- sets[type + 1] = get_descriptor_set_lazy(pool);
- } else
- sets[type + 1] = ctx->dd->dummy_set;
- if (!sets[type + 1])
- return false;
- }
- return true;
-}
-
-void
-zink_descriptor_set_update_lazy(struct zink_context *ctx, struct zink_program *pg, enum zink_descriptor_type type, VkDescriptorSet set)
-{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- VKCTX(UpdateDescriptorSetWithTemplate)(screen->dev, set, pg->dd->layouts[type + 1]->desc_template, ctx);
-}
-
-void
-zink_descriptors_update_lazy_masked(struct zink_context *ctx, bool is_compute, uint8_t changed_sets, bool need_push, bool update_push)
-{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- struct zink_batch *batch = &ctx->batch;
- struct zink_batch_state *bs = ctx->batch.state;
- struct zink_batch_descriptor_data_lazy *bdd = bdd_lazy(bs);
- struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base;
- VkDescriptorSet desc_sets[5];
- if (!populate_sets(ctx, bdd, pg, &changed_sets, need_push, desc_sets)) {
- debug_printf("ZINK: couldn't get descriptor sets!\n");
- return;
- }
- /* no flushing allowed */
- assert(ctx->batch.state == bs);
-
- if (pg->dd->binding_usage && changed_sets) {
- u_foreach_bit(type, changed_sets) {
- if (pg->dd->layout_key[type])
- VKSCR(UpdateDescriptorSetWithTemplate)(screen->dev, desc_sets[type + 1], pg->dd->layouts[type + 1]->desc_template, ctx);
- assert(type + 1 < pg->num_dsl);
- VKSCR(CmdBindDescriptorSets)(bs->cmdbuf,
- is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS,
- /* set index incremented by 1 to account for push set */
- pg->layout, type + 1, 1, &desc_sets[type + 1],
- 0, NULL);
- }
- dd_lazy(ctx)->state_changed[is_compute] = false;
- }
-
- if (update_push) {
- if (pg->dd->push_usage && dd_lazy(ctx)->push_state_changed[is_compute]) {
- if (screen->info.have_KHR_push_descriptor)
- VKSCR(CmdPushDescriptorSetWithTemplateKHR)(batch->state->cmdbuf, pg->dd->push_template,
- pg->layout, 0, ctx);
- else {
- assert(desc_sets[0]);
- VKSCR(UpdateDescriptorSetWithTemplate)(screen->dev, desc_sets[0], pg->dd->push_template, ctx);
- VKSCR(CmdBindDescriptorSets)(batch->state->cmdbuf,
- is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS,
- pg->layout, 0, 1, &desc_sets[0],
- 0, NULL);
- }
- dd_lazy(ctx)->push_state_changed[is_compute] = false;
- } else if (dd_lazy(ctx)->push_state_changed[is_compute]) {
- VKSCR(CmdBindDescriptorSets)(bs->cmdbuf,
- is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS,
- pg->layout, 0, 1, &ctx->dd->dummy_set,
- 0, NULL);
- dd_lazy(ctx)->push_state_changed[is_compute] = false;
- }
- }
- bdd->pg[is_compute] = pg;
- ctx->dd->pg[is_compute] = pg;
-}
-
-void
-zink_descriptors_update_lazy(struct zink_context *ctx, bool is_compute)
-{
- struct zink_batch_state *bs = ctx->batch.state;
- struct zink_batch_descriptor_data_lazy *bdd = bdd_lazy(bs);
- struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base;
-
- bool batch_changed = !bdd->pg[is_compute];
- if (batch_changed) {
- /* update all sets and bind null sets */
- dd_lazy(ctx)->state_changed[is_compute] = pg->dd->binding_usage;
- dd_lazy(ctx)->push_state_changed[is_compute] = !!pg->dd->push_usage;
- }
-
- if (pg != bdd->pg[is_compute]) {
- /* if we don't already know that we have to update all sets,
- * check to see if any dsls changed
- *
- * also always update the dsl pointers on program change
- */
- for (unsigned i = 0; i < ARRAY_SIZE(bdd->dsl[is_compute]); i++) {
- /* push set is already detected, start at 1 */
- if (bdd->dsl[is_compute][i] != pg->dsl[i + 1])
- dd_lazy(ctx)->state_changed[is_compute] |= BITFIELD_BIT(i);
- bdd->dsl[is_compute][i] = pg->dsl[i + 1];
- }
- dd_lazy(ctx)->push_state_changed[is_compute] |= bdd->push_usage[is_compute] != pg->dd->push_usage;
- bdd->push_usage[is_compute] = pg->dd->push_usage;
- }
- bdd->pg[is_compute] = pg;
-
- uint8_t changed_sets = pg->dd->binding_usage & dd_lazy(ctx)->state_changed[is_compute];
- bool need_push = pg->dd->push_usage &&
- (dd_lazy(ctx)->push_state_changed[is_compute] || batch_changed);
- zink_descriptors_update_lazy_masked(ctx, is_compute, changed_sets, need_push, true);
-}
-
-void
-zink_context_invalidate_descriptor_state_lazy(struct zink_context *ctx, enum pipe_shader_type shader, enum zink_descriptor_type type, unsigned start, unsigned count)
-{
- if (type == ZINK_DESCRIPTOR_TYPE_UBO && !start)
- dd_lazy(ctx)->push_state_changed[shader == PIPE_SHADER_COMPUTE] = true;
- else
- dd_lazy(ctx)->state_changed[shader == PIPE_SHADER_COMPUTE] |= BITFIELD_BIT(type);
-}
-
-void
-zink_batch_descriptor_deinit_lazy(struct zink_screen *screen, struct zink_batch_state *bs)
-{
- if (!bs->dd)
- return;
- struct zink_batch_descriptor_data_lazy *bdd = bdd_lazy(bs);
- if (screen->info.have_KHR_descriptor_update_template) {
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) {
- hash_table_foreach(&bdd->pools[i], entry) {
- struct zink_descriptor_pool *pool = (void*)entry->data;
- VKSCR(DestroyDescriptorPool)(screen->dev, pool->pool, NULL);
- }
- }
- if (bdd->push_pool[0])
- VKSCR(DestroyDescriptorPool)(screen->dev, bdd->push_pool[0]->pool, NULL);
- if (bdd->push_pool[1])
- VKSCR(DestroyDescriptorPool)(screen->dev, bdd->push_pool[1]->pool, NULL);
- }
- ralloc_free(bs->dd);
-}
-
-static void
-pool_destroy(struct zink_screen *screen, struct zink_descriptor_pool *pool)
-{
- VKSCR(DestroyDescriptorPool)(screen->dev, pool->pool, NULL);
- ralloc_free(pool);
-}
-
-void
-zink_batch_descriptor_reset_lazy(struct zink_screen *screen, struct zink_batch_state *bs)
-{
- if (!screen->info.have_KHR_descriptor_update_template)
- return;
- struct zink_batch_descriptor_data_lazy *bdd = bdd_lazy(bs);
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) {
- hash_table_foreach(&bdd->pools[i], entry) {
- const struct zink_descriptor_layout_key *key = entry->key;
- struct zink_descriptor_pool *pool = (void*)entry->data;
- if (key->use_count)
- pool->set_idx = 0;
- else {
- pool_destroy(screen, pool);
- _mesa_hash_table_remove(&bdd->pools[i], entry);
- }
- }
- }
- for (unsigned i = 0; i < 2; i++) {
- bdd->pg[i] = NULL;
- if (bdd->push_pool[i])
- bdd->push_pool[i]->set_idx = 0;
- }
- while (util_dynarray_num_elements(&bdd->overflowed_pools, struct zink_descriptor_pool*)) {
- struct zink_descriptor_pool *pool = util_dynarray_pop(&bdd->overflowed_pools, struct zink_descriptor_pool*);
- pool_destroy(screen, pool);
- }
-}
-
-bool
-zink_batch_descriptor_init_lazy(struct zink_screen *screen, struct zink_batch_state *bs)
-{
- bs->dd = (void*)rzalloc(bs, struct zink_batch_descriptor_data_lazy);
- if (!bs->dd)
- return false;
- if (!screen->info.have_KHR_descriptor_update_template)
- return true;
- struct zink_batch_descriptor_data_lazy *bdd = bdd_lazy(bs);
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++) {
- if (!_mesa_hash_table_init(&bdd->pools[i], bs->dd, _mesa_hash_pointer, _mesa_key_pointer_equal))
- return false;
- }
- util_dynarray_init(&bdd->overflowed_pools, bs->dd);
- if (!screen->info.have_KHR_push_descriptor) {
- bdd->push_pool[0] = create_push_pool(screen, bdd, false, false);
- bdd->push_pool[1] = create_push_pool(screen, bdd, true, false);
- }
- return true;
-}
-
-static void
-init_push_template_entry(VkDescriptorUpdateTemplateEntry *entry, unsigned i)
-{
- entry->dstBinding = tgsi_processor_to_shader_stage(i);
- entry->descriptorCount = 1;
- entry->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- entry->offset = offsetof(struct zink_context, di.ubos[i][0]);
- entry->stride = sizeof(VkDescriptorBufferInfo);
-}
-
-bool
-zink_descriptors_init_lazy(struct zink_context *ctx)
-{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- ctx->dd = (void*)rzalloc(ctx, struct zink_descriptor_data_lazy);
- if (!ctx->dd)
- return false;
-
- if (screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_NOTEMPLATES)
- printf("ZINK: CACHED/NOTEMPLATES DESCRIPTORS\n");
- else if (screen->info.have_KHR_descriptor_update_template) {
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) {
- VkDescriptorUpdateTemplateEntry *entry = &dd_lazy(ctx)->push_entries[i];
- init_push_template_entry(entry, i);
- }
- init_push_template_entry(&dd_lazy(ctx)->compute_push_entry, PIPE_SHADER_COMPUTE);
- VkDescriptorUpdateTemplateEntry *entry = &dd_lazy(ctx)->push_entries[ZINK_SHADER_COUNT]; //fbfetch
- entry->dstBinding = ZINK_FBFETCH_BINDING;
- entry->descriptorCount = 1;
- entry->descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
- entry->offset = offsetof(struct zink_context, di.fbfetch);
- entry->stride = sizeof(VkDescriptorImageInfo);
- if (screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY)
- printf("ZINK: USING LAZY DESCRIPTORS\n");
- }
- struct zink_descriptor_layout_key *layout_key;
- if (!zink_descriptor_util_push_layouts_get(ctx, ctx->dd->push_dsl, ctx->dd->push_layout_keys))
- return false;
-
- ctx->dd->dummy_dsl = zink_descriptor_util_layout_get(ctx, 0, NULL, 0, &layout_key);
- if (!ctx->dd->dummy_dsl)
- return false;
- VkDescriptorPoolSize null_size = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1};
- ctx->dd->dummy_pool = create_pool(screen, 1, &null_size, 0);
- zink_descriptor_util_alloc_sets(screen, ctx->dd->dummy_dsl->layout,
- ctx->dd->dummy_pool, &ctx->dd->dummy_set, 1);
- zink_descriptor_util_init_null_set(ctx, ctx->dd->dummy_set);
- return true;
-}
-
-void
-zink_descriptors_deinit_lazy(struct zink_context *ctx)
-{
- if (ctx->dd) {
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- if (ctx->dd->dummy_pool)
- VKSCR(DestroyDescriptorPool)(screen->dev, ctx->dd->dummy_pool, NULL);
- if (ctx->dd->push_dsl[0])
- VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd->push_dsl[0]->layout, NULL);
- if (ctx->dd->push_dsl[1])
- VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd->push_dsl[1]->layout, NULL);
- }
- ralloc_free(ctx->dd);
-}
diff --git a/src/gallium/drivers/zink/zink_device_info.py b/src/gallium/drivers/zink/zink_device_info.py
index 3ac8011336a..ec265deccb9 100644
--- a/src/gallium/drivers/zink/zink_device_info.py
+++ b/src/gallium/drivers/zink/zink_device_info.py
@@ -60,21 +60,63 @@ import sys
# - guard: adds a #if defined(`extension_name`)/#endif guard around the code generated for this Extension.
EXTENSIONS = [
Extension("VK_KHR_maintenance1",
- required=True),
+ required=True),
Extension("VK_KHR_maintenance2"),
Extension("VK_KHR_maintenance3"),
+ Extension("VK_KHR_maintenance4",
+ alias="maint4",
+ features=True),
+ Extension("VK_KHR_maintenance5",
+ alias="maint5",
+ features=True, properties=True),
+ Extension("VK_KHR_maintenance6",
+ alias="maint6",
+ features=True, properties=True),
Extension("VK_KHR_external_memory"),
Extension("VK_KHR_external_memory_fd"),
+ Extension("VK_KHR_vulkan_memory_model"),
+ Extension("VK_KHR_workgroup_memory_explicit_layout", alias="explicit_layout", features=True),
+ Extension("VK_KHR_pipeline_executable_properties",
+ alias="pipestats",
+ features=True),
+ Extension("VK_KHR_external_semaphore_fd"),
+ Extension("VK_KHR_create_renderpass2",
+ required=True),
+ Extension("VK_KHR_synchronization2",
+ alias="sync2",
+ features=True),
+ Extension("VK_KHR_external_memory_win32"),
+ Extension("VK_KHR_external_semaphore_win32"),
Extension("VK_EXT_external_memory_dma_buf"),
+ Extension("VK_KHR_buffer_device_address",
+ alias="bda",
+ features=True),
+ Extension("VK_EXT_external_memory_host", alias="ext_host_mem", properties=True),
Extension("VK_EXT_queue_family_foreign"),
+ Extension("VK_KHR_swapchain_mutable_format"),
+ Extension("VK_KHR_incremental_present"),
Extension("VK_EXT_provoking_vertex",
- alias="pv",
- features=True,
- properties=True,
- conditions=["$feats.provokingVertexLast"]),
+ alias="pv",
+ features=True,
+ properties=True,
+ conditions=["$feats.provokingVertexLast"]),
Extension("VK_EXT_shader_viewport_index_layer"),
Extension("VK_KHR_get_memory_requirements2"),
Extension("VK_EXT_post_depth_coverage"),
+ Extension("VK_EXT_depth_clip_control",
+ alias="clip_control",
+ features=True),
+ Extension("VK_EXT_depth_clamp_zero_one",
+ alias="clamp_01",
+ features=True),
+ Extension("VK_EXT_shader_subgroup_ballot"),
+ Extension("VK_EXT_shader_subgroup_vote"),
+ Extension("VK_EXT_shader_atomic_float",
+ alias="atomic_float",
+ features=True),
+ Extension("VK_KHR_shader_atomic_int64",
+ alias="atomic_int",
+ features=True),
Extension("VK_KHR_8bit_storage",
alias="storage_8bit",
features=True,
@@ -83,126 +125,205 @@ EXTENSIONS = [
alias="storage_16bit",
features=True,
conditions=["$feats.storageBuffer16BitAccess"]),
+ Extension("VK_EXT_image_2d_view_of_3d",
+ alias="view2d",
+ features=True),
Extension("VK_KHR_driver_properties",
- alias="driver",
- properties=True),
+ alias="driver",
+ properties=True),
Extension("VK_EXT_memory_budget"),
+ Extension("VK_EXT_memory_priority", alias="memprio", features=True),
+ Extension("VK_EXT_pageable_device_local_memory", alias="mempage", features=True),
Extension("VK_KHR_draw_indirect_count"),
+ Extension("VK_EXT_dynamic_rendering_unused_attachments", alias="unused", features=True),
+ Extension("VK_EXT_shader_object", alias="shobj", features=True, properties=True),
+ Extension("VK_EXT_attachment_feedback_loop_layout",
+ alias="feedback_loop",
+ features=True),
+ Extension("VK_EXT_attachment_feedback_loop_dynamic_state", alias="feedback_dyn", features=True),
+ Extension("VK_NV_device_generated_commands", alias="nv_dgc", features=True, properties=True),
Extension("VK_EXT_fragment_shader_interlock",
- alias="interlock",
- features=True,
- conditions=["$feats.fragmentShaderSampleInterlock", "$feats.fragmentShaderPixelInterlock"]),
+ alias="interlock",
+ features=True,
+ conditions=["$feats.fragmentShaderSampleInterlock", "$feats.fragmentShaderPixelInterlock"]),
Extension("VK_EXT_sample_locations",
- alias="sample_locations",
- properties=True),
- Extension("VK_EXT_conservative_rasterization",
- alias="cons_raster",
- properties=True,
- conditions=["$props.fullyCoveredFragmentShaderInputVariable"]),
+ alias="sample_locations",
+ properties=True),
Extension("VK_KHR_shader_draw_parameters"),
Extension("VK_KHR_sampler_mirror_clamp_to_edge"),
+ Extension("VK_EXT_descriptor_buffer", alias="db", features=True, properties=True),
Extension("VK_EXT_conditional_rendering",
- alias="cond_render",
- features=True,
- conditions=["$feats.conditionalRendering"]),
+ alias="cond_render",
+ features=True,
+ conditions=["$feats.conditionalRendering"]),
Extension("VK_EXT_transform_feedback",
- alias="tf",
- properties=True,
- features=True,
- conditions=["$feats.transformFeedback"]),
+ alias="tf",
+ properties=True,
+ features=True,
+ conditions=["$feats.transformFeedback"]),
Extension("VK_EXT_index_type_uint8",
- alias="index_uint8",
- features=True,
- conditions=["$feats.indexTypeUint8"]),
+ alias="index_uint8",
+ features=True,
+ conditions=["$feats.indexTypeUint8"]),
+ Extension("VK_KHR_image_format_list"),
+ Extension("VK_KHR_sampler_ycbcr_conversion"),
Extension("VK_KHR_imageless_framebuffer",
- alias="imgless",
- features=True,
- conditions=["$feats.imagelessFramebuffer"]),
+ alias="imgless",
+ features=True,
+ required=True),
Extension("VK_EXT_robustness2",
- alias="rb2",
- properties=True,
- features=True,
- conditions=["$feats.nullDescriptor"]),
+ alias="rb2",
+ properties=True,
+ features=True,
+ conditions=["$feats.nullDescriptor"]),
+ Extension("VK_EXT_image_robustness",
+ alias="rb_image",
+ features=True),
Extension("VK_EXT_image_drm_format_modifier"),
Extension("VK_EXT_vertex_attribute_divisor",
- alias="vdiv",
- properties=True,
- features=True,
- conditions=["$feats.vertexAttributeInstanceRateDivisor"]),
+ alias="vdiv",
+ properties=True,
+ features=True,
+ conditions=["$feats.vertexAttributeInstanceRateDivisor"]),
Extension("VK_EXT_calibrated_timestamps"),
+ Extension("VK_NV_linear_color_attachment",
+ alias="linear_color",
+ features=True),
+ Extension("VK_KHR_dynamic_rendering",
+ alias="dynamic_render",
+ features=True),
+ Extension("VK_KHR_dynamic_rendering_local_read",
+ alias="drlr",
+ features=True),
+ Extension("VK_EXT_multisampled_render_to_single_sampled",
+ alias="msrtss",
+ features=True),
Extension("VK_KHR_shader_clock",
- alias="shader_clock",
- features=True,
- conditions=["$feats.shaderSubgroupClock"]),
+ alias="shader_clock",
+ features=True,
+ conditions=["$feats.shaderSubgroupClock"]),
+ Extension("VK_INTEL_shader_integer_functions2",
+ alias="shader_int_fns2",
+ features=True,
+ conditions=["$feats.shaderIntegerFunctions2"]),
Extension("VK_EXT_sampler_filter_minmax",
- alias="reduction",
- properties=True),
+ alias="reduction",
+ properties=True,
+ conditions=["$props.filterMinmaxSingleComponentFormats"]),
Extension("VK_EXT_custom_border_color",
- alias="border_color",
- properties=True,
- features=True,
- conditions=["$feats.customBorderColors"]),
+ alias="border_color",
+ properties=True,
+ features=True,
+ conditions=["$feats.customBorderColors"]),
+ Extension("VK_EXT_non_seamless_cube_map",
+ alias="nonseamless",
+ features=True),
+ Extension("VK_EXT_border_color_swizzle",
+ alias="border_swizzle",
+ features=True),
Extension("VK_EXT_blend_operation_advanced",
- alias="blend",
- properties=True,
- # TODO: we can probably support non-premul here with some work?
- conditions=["$props.advancedBlendNonPremultipliedSrcColor", "$props.advancedBlendNonPremultipliedDstColor"]),
+ alias="blend",
+ properties=True,
+ # TODO: we can probably support non-premul here with some work?
+ conditions=["$props.advancedBlendNonPremultipliedSrcColor", "$props.advancedBlendNonPremultipliedDstColor"]),
Extension("VK_EXT_extended_dynamic_state",
- alias="dynamic_state",
- features=True,
- conditions=["$feats.extendedDynamicState"]),
+ alias="dynamic_state",
+ features=True,
+ conditions=["$feats.extendedDynamicState"]),
Extension("VK_EXT_extended_dynamic_state2",
- alias="dynamic_state2",
- features=True,
- conditions=["$feats.extendedDynamicState2"]),
+ alias="dynamic_state2",
+ features=True,
+ conditions=["$feats.extendedDynamicState2"]),
+ Extension("VK_EXT_extended_dynamic_state3",
+ alias="dynamic_state3",
+ properties=True,
+ features=True),
Extension("VK_EXT_pipeline_creation_cache_control",
- alias="pipeline_cache_control",
- features=True,
- conditions=["$feats.pipelineCreationCacheControl"]),
+ alias="pipeline_cache_control",
+ features=True,
+ conditions=["$feats.pipelineCreationCacheControl"]),
Extension("VK_EXT_shader_stencil_export",
- alias="stencil_export"),
- Extension("VK_EXTX_portability_subset",
- alias="portability_subset_extx",
- nonstandard=True,
- properties=True,
- features=True,
- guard=True),
- Extension("VK_KHR_timeline_semaphore"),
+ alias="stencil_export"),
+ Extension("VK_KHR_portability_subset",
+ alias="portability_subset",
+ features=True,
+ guard=True),
+ Extension("VK_NV_compute_shader_derivatives",
+ alias="shader_derivs",
+ features=True,
+ conditions=["$feats.computeDerivativeGroupQuads", "$feats.computeDerivativeGroupLinear"]),
+ Extension("VK_KHR_timeline_semaphore",
+ alias="timeline",
+ features=True),
+ Extension("VK_EXT_color_write_enable",
+ alias="cwrite",
+ features=True),
Extension("VK_EXT_4444_formats",
- alias="format_4444",
- features=True),
+ alias="format_4444",
+ features=True),
+ Extension("VK_EXT_host_image_copy",
+ alias="hic",
+ features=True,
+ properties=True),
Extension("VK_EXT_scalar_block_layout",
- alias="scalar_block_layout",
- features=True,
- conditions=["$feats.scalarBlockLayout"]),
+ alias="scalar_block_layout",
+ features=True,
+ conditions=["$feats.scalarBlockLayout"]),
Extension("VK_KHR_swapchain"),
+ Extension("VK_EXT_rasterization_order_attachment_access",
+ alias="rast_order_access",
+ features=True,
+ conditions=["$feats.rasterizationOrderColorAttachmentAccess"]),
Extension("VK_KHR_shader_float16_int8",
alias="shader_float16_int8",
features=True),
Extension("VK_EXT_multi_draw",
alias="multidraw",
- features=True,
- properties=True,
- conditions=["$feats.multiDraw"]),
+ features=True,
+ properties=True,
+ conditions=["$feats.multiDraw"]),
+ Extension("VK_EXT_primitives_generated_query",
+ alias="primgen",
+ features=True),
+ Extension("VK_KHR_pipeline_library"),
+ Extension("VK_EXT_graphics_pipeline_library",
+ alias="gpl",
+ features=True,
+ properties=True),
Extension("VK_KHR_push_descriptor",
- alias="push",
- properties=True),
+ alias="push",
+ properties=True),
Extension("VK_KHR_descriptor_update_template",
- alias="template"),
+ alias="template", required=True),
Extension("VK_EXT_line_rasterization",
- alias="line_rast",
- properties=True,
- features=True),
+ alias="line_rast",
+ properties=True,
+ features=True),
Extension("VK_EXT_vertex_input_dynamic_state",
- alias="vertex_input",
- features=True,
- conditions=["$feats.vertexInputDynamicState"]),
+ alias="vertex_input",
+ features=True,
+ conditions=["$feats.vertexInputDynamicState"]),
Extension("VK_EXT_primitive_topology_list_restart",
- alias="list_restart",
- features=True,
- conditions=["$feats.primitiveTopologyListRestart"]),
+ alias="list_restart",
+ features=True,
+ conditions=["$feats.primitiveTopologyListRestart"]),
Extension("VK_KHR_dedicated_allocation",
- alias="dedicated"),
+ alias="dedicated"),
+ Extension("VK_EXT_descriptor_indexing",
+ alias="desc_indexing",
+ features=True,
+ properties=True,
+ conditions=["$feats.descriptorBindingPartiallyBound"]),
+ Extension("VK_EXT_depth_clip_enable",
+ alias="depth_clip_enable",
+ features=True),
+ Extension("VK_EXT_shader_demote_to_helper_invocation",
+ alias="demote",
+ features=True,
+ conditions=["$feats.shaderDemoteToHelperInvocation"]),
+ Extension("VK_KHR_shader_float_controls",
+ alias="float_controls"),
+ Extension("VK_KHR_format_feature_flags2"),
]
# constructor: Versions(device_version(major, minor, patch), struct_version(major, minor))
@@ -214,14 +335,13 @@ EXTENSIONS = [
VERSIONS = [
Version((1,1,0), (1,1)),
Version((1,2,0), (1,2)),
+ Version((1,3,0), (1,3)),
]
# There exists some inconsistencies regarding the enum constants, fix them.
# This is basically generated_code.replace(key, value).
REPLACEMENTS = {
- "ROBUSTNESS2": "ROBUSTNESS_2",
"PROPERTIES_PROPERTIES": "PROPERTIES",
- "EXTENDED_DYNAMIC_STATE2": "EXTENDED_DYNAMIC_STATE_2",
}
@@ -252,7 +372,16 @@ header_code = """
#include "util/u_memory.h"
-#include <vulkan/vulkan.h>
+#include <vulkan/vulkan_core.h>
+
+#ifdef VK_ENABLE_BETA_EXTENSIONS
+#include <vulkan/vulkan_beta.h>
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+#include <vulkan/vulkan_win32.h>
+#endif
struct zink_screen;
@@ -269,6 +398,7 @@ struct zink_device_info {
%endfor
VkPhysicalDeviceFeatures2 feats;
+ VkPhysicalDeviceSubgroupProperties subgroup;
%for version in versions:
VkPhysicalDeviceVulkan${version.struct()}Features feats${version.struct()};
%endfor
@@ -279,6 +409,7 @@ struct zink_device_info {
%endfor
VkPhysicalDeviceMemoryProperties mem_props;
+ VkPhysicalDeviceIDProperties deviceid_props;
%for ext in extensions:
<%helpers:guard ext="${ext}">
@@ -319,6 +450,7 @@ void zink_stub_${cmd.lstrip("vk")}(void);
impl_code = """
<%namespace name="helpers" file="helpers"/>
+#include "vk_enum_to_str.h"
#include "zink_device_info.h"
#include "zink_screen.h"
@@ -334,14 +466,22 @@ zink_get_physical_device_info(struct zink_screen *screen)
uint32_t num_extensions = 0;
// get device memory properties
- vkGetPhysicalDeviceMemoryProperties(screen->pdev, &info->mem_props);
+ screen->vk.GetPhysicalDeviceMemoryProperties(screen->pdev, &info->mem_props);
// enumerate device supported extensions
- if (vkEnumerateDeviceExtensionProperties(screen->pdev, NULL, &num_extensions, NULL) == VK_SUCCESS) {
+ VkResult result = screen->vk.EnumerateDeviceExtensionProperties(screen->pdev, NULL, &num_extensions, NULL);
+ if (result != VK_SUCCESS) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: vkEnumerateDeviceExtensionProperties failed (%s)", vk_Result_to_str(result));
+ } else {
if (num_extensions > 0) {
VkExtensionProperties *extensions = MALLOC(sizeof(VkExtensionProperties) * num_extensions);
if (!extensions) goto fail;
- vkEnumerateDeviceExtensionProperties(screen->pdev, NULL, &num_extensions, extensions);
+ result = screen->vk.EnumerateDeviceExtensionProperties(screen->pdev, NULL, &num_extensions, extensions);
+ if (result != VK_SUCCESS) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: vkEnumerateDeviceExtensionProperties failed (%s)", vk_Result_to_str(result));
+ }
for (uint32_t i = 0; i < num_extensions; ++i) {
%for ext in extensions:
@@ -361,22 +501,6 @@ zink_get_physical_device_info(struct zink_screen *screen)
}
}
- %for version in versions:
- if (${version.version()} <= screen->vk_version) {
- %for ext in extensions:
- %if ext.core_since and ext.core_since.struct_version == version.struct_version:
- <%helpers:guard ext="${ext}">
- %if not (ext.has_features or ext.has_properties):
- info->have_${ext.name_with_vendor()} = true;
- %else:
- support_${ext.name_with_vendor()} = true;
- %endif
- </%helpers:guard>
- %endif
- %endfor
- }
- %endfor
-
// get device features
if (screen->vk.GetPhysicalDeviceFeatures2) {
// check for device extension features
@@ -399,7 +523,11 @@ zink_get_physical_device_info(struct zink_screen *screen)
%for ext in extensions:
%if ext.has_features:
<%helpers:guard ext="${ext}">
+%if ext.features_promoted:
+ if (support_${ext.name_with_vendor()} && !info->have_vulkan${ext.core_since.struct()}) {
+%else:
if (support_${ext.name_with_vendor()}) {
+%endif
info->${ext.field("feats")}.sType = ${ext.stype("FEATURES")};
info->${ext.field("feats")}.pNext = info->feats.pNext;
info->feats.pNext = &info->${ext.field("feats")};
@@ -410,7 +538,7 @@ zink_get_physical_device_info(struct zink_screen *screen)
screen->vk.GetPhysicalDeviceFeatures2(screen->pdev, &info->feats);
} else {
- vkGetPhysicalDeviceFeatures(screen->pdev, &info->feats.features);
+ screen->vk.GetPhysicalDeviceFeatures(screen->pdev, &info->feats.features);
}
// check for device properties
@@ -434,7 +562,11 @@ zink_get_physical_device_info(struct zink_screen *screen)
%for ext in extensions:
%if ext.has_properties:
<%helpers:guard ext="${ext}">
+%if ext.properties_promoted:
+ if (support_${ext.name_with_vendor()} && !info->have_vulkan${ext.core_since.struct()}) {
+%else:
if (support_${ext.name_with_vendor()}) {
+%endif
info->${ext.field("props")}.sType = ${ext.stype("PROPERTIES")};
info->${ext.field("props")}.pNext = props.pNext;
props.pNext = &info->${ext.field("props")};
@@ -443,10 +575,58 @@ zink_get_physical_device_info(struct zink_screen *screen)
%endif
%endfor
+ if (screen->vk_version < VK_MAKE_VERSION(1,2,0) && screen->instance_info.have_KHR_external_memory_capabilities) {
+ info->deviceid_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
+ info->deviceid_props.pNext = props.pNext;
+ props.pNext = &info->deviceid_props;
+ }
+
+ if (screen->vk_version >= VK_MAKE_VERSION(1,1,0)) {
+ info->subgroup.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ info->subgroup.pNext = props.pNext;
+ props.pNext = &info->subgroup;
+ }
+
// note: setting up local VkPhysicalDeviceProperties2.
screen->vk.GetPhysicalDeviceProperties2(screen->pdev, &props);
}
+ /* We re-apply the fields from VkPhysicalDeviceVulkanXYFeatures struct
+ * onto their respective fields in the VkPhysicalDeviceExtensionNameFeatures
+ * struct if the former is provided by the VK implementation.
+ *
+ * As for why this is done: the spec mentions that once an extension is
+ * promoted to core and its feature fields are added in VulkanXYFeatures,
+ * including both ExtensionNameFeatures and VulkanXYFeatures at the same
+ * time is prohibited when using vkGetPhysicalDeviceFeatures2.
+ */
+%for ext in extensions:
+%if ext.features_promoted:
+ if (info->have_vulkan${ext.core_since.struct()}) {
+ %for field in registry.get_registry_entry(ext.name).features_fields:
+ info->${ext.field("feats")}.${field} = info->feats${ext.core_since.struct()}.${field};
+ %endfor
+ }
+%endif
+%endfor
+
+ /* See above, but for VulkanXYProperties.
+ * Unlike VulkanXYFeatures with all the booleans, VulkanXYProperties can
+ * contain different types of data, including arrays. The C language hates us
+ * when we assign an array to another array, therefore we use an memcpy here.
+ */
+%for ext in extensions:
+%if ext.properties_promoted:
+ if (info->have_vulkan${ext.core_since.struct()}) {
+ %for field in registry.get_registry_entry(ext.name).properties_fields:
+ memcpy(&info->${ext.field("props")}.${field},
+ &info->props${ext.core_since.struct()}.${field},
+ sizeof(info->${ext.field("props")}.${field}));
+ %endfor
+ }
+%endif
+%endfor
+
// enable the extensions if they match the conditions given by ext.enable_conds
if (screen->vk.GetPhysicalDeviceProperties2) {
%for ext in extensions:
@@ -484,6 +664,36 @@ zink_get_physical_device_info(struct zink_screen *screen)
info->num_extensions = num_extensions;
+ info->feats.pNext = NULL;
+
+%for version in versions:
+%if version.device_version < (1,2,0):
+ if (VK_MAKE_VERSION(1,2,0) <= screen->vk_version) {
+ /* VkPhysicalDeviceVulkan11Features was added in 1.2, not 1.1 as one would think */
+%else:
+ if (${version.version()} <= screen->vk_version) {
+%endif
+ info->feats${version.struct()}.pNext = info->feats.pNext;
+ info->feats.pNext = &info->feats${version.struct()};
+ }
+%endfor
+
+%for ext in extensions:
+%if ext.has_features:
+<%helpers:guard ext="${ext}">
+%if ext.features_promoted:
+ if (info->have_${ext.name_with_vendor()} && !info->have_vulkan${ext.core_since.struct()}) {
+%else:
+ if (info->have_${ext.name_with_vendor()}) {
+%endif
+ info->${ext.field("feats")}.sType = ${ext.stype("FEATURES")};
+ info->${ext.field("feats")}.pNext = info->feats.pNext;
+ info->feats.pNext = &info->${ext.field("feats")};
+ }
+</%helpers:guard>
+%endif
+%endfor
+
return true;
fail:
@@ -495,8 +705,12 @@ zink_verify_device_extensions(struct zink_screen *screen)
{
%for ext in extensions:
%if registry.in_registry(ext.name):
+<%helpers:guard ext="${ext}">
if (screen->info.have_${ext.name_with_vendor()}) {
%for cmd in registry.get_registry_entry(ext.name).device_commands:
+%if cmd.find("win32"):
+#ifdef _WIN32
+%endif
if (!screen->vk.${cmd.lstrip("vk")}) {
#ifndef NDEBUG
screen->vk.${cmd.lstrip("vk")} = (PFN_${cmd})zink_stub_${cmd.lstrip("vk")};
@@ -504,8 +718,12 @@ zink_verify_device_extensions(struct zink_screen *screen)
screen->vk.${cmd.lstrip("vk")} = (PFN_${cmd})zink_stub_function_not_loaded;
#endif
}
+%if cmd.find("win32"):
+#endif
+%endif
%endfor
}
+</%helpers:guard>
%endif
%endfor
}
@@ -589,15 +807,21 @@ if __name__ == "__main__":
if not (entry.features_struct and ext.physical_device_struct("Features") == entry.features_struct):
error_count += 1
print("The extension {} does not provide a features struct.".format(ext.name))
+ ext.features_promoted = entry.features_promoted
if ext.has_properties:
if not (entry.properties_struct and ext.physical_device_struct("Properties") == entry.properties_struct):
error_count += 1
print("The extension {} does not provide a properties struct.".format(ext.name))
- print(entry.properties_struct, ext.physical_device_struct("Properties"))
+ ext.properties_promoted = entry.properties_promoted
- if entry.promoted_in:
+ if entry.promoted_in and entry.promoted_in <= versions[-1].struct_version:
ext.core_since = Version((*entry.promoted_in, 0))
+ else:
+ # even if the ext is promoted in a newer VK version, consider it
+ # unpromoted until there's an entry for that VK version in VERSIONS
+ ext.features_promoted = False
+ ext.properties_promoted = False
if error_count > 0:
print("zink_device_info.py: Found {} error(s) in total. Quitting.".format(error_count))
@@ -606,12 +830,12 @@ if __name__ == "__main__":
lookup = TemplateLookup()
lookup.put_string("helpers", include_template)
- with open(header_path, "w") as header_file:
+ with open(header_path, "w", encoding='utf-8') as header_file:
header = Template(header_code, lookup=lookup).render(extensions=extensions, versions=versions, registry=registry).strip()
header = replace_code(header, replacement)
print(header, file=header_file)
- with open(impl_path, "w") as impl_file:
+ with open(impl_path, "w", encoding='utf-8') as impl_file:
impl = Template(impl_code, lookup=lookup).render(extensions=extensions, versions=versions, registry=registry).strip()
impl = replace_code(impl, replacement)
print(impl, file=impl_file)
diff --git a/src/gallium/drivers/zink/zink_draw.cpp b/src/gallium/drivers/zink/zink_draw.cpp
index fde03630cb0..0da405ea7b7 100644
--- a/src/gallium/drivers/zink/zink_draw.cpp
+++ b/src/gallium/drivers/zink/zink_draw.cpp
@@ -1,6 +1,9 @@
+#include "zink_batch.h"
#include "zink_compiler.h"
#include "zink_context.h"
+#include "zink_descriptors.h"
#include "zink_program.h"
+#include "zink_program_state.hpp"
#include "zink_query.h"
#include "zink_resource.h"
#include "zink_screen.h"
@@ -8,57 +11,40 @@
#include "zink_surface.h"
#include "zink_inlines.h"
-#include "tgsi/tgsi_from_mesa.h"
#include "util/hash_table.h"
+#include "util/u_cpu_detect.h"
#include "util/u_debug.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
#include "util/u_prim.h"
#include "util/u_prim_restart.h"
-
static void
zink_emit_xfb_counter_barrier(struct zink_context *ctx)
{
- /* Between the pause and resume there needs to be a memory barrier for the counter buffers
- * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
- * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
- * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
- * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
- *
- * - from VK_EXT_transform_feedback spec
- */
for (unsigned i = 0; i < ctx->num_so_targets; i++) {
struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
if (!t)
continue;
struct zink_resource *res = zink_resource(t->counter_buffer);
- if (t->counter_buffer_valid)
- zink_resource_buffer_barrier(ctx, res, VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT,
- VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
- else
- zink_resource_buffer_barrier(ctx, res, VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT,
- VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
+ VkAccessFlags access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
+ VkPipelineStageFlags stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
+ if (t->counter_buffer_valid) {
+ /* Between the pause and resume there needs to be a memory barrier for the counter buffers
+ * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
+ * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
+ * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
+ * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
+ *
+ * - from VK_EXT_transform_feedback spec
+ */
+ access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
+ stage |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
+ }
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, access, stage);
+ if (!ctx->unordered_blitting)
+ res->obj->unordered_read = false;
}
- ctx->xfb_barrier = false;
-}
-
-static void
-zink_emit_xfb_vertex_input_barrier(struct zink_context *ctx, struct zink_resource *res)
-{
- /* A pipeline barrier is required between using the buffers as
- * transform feedback buffers and vertex buffers to
- * ensure all writes to the transform feedback buffers are visible
- * when the data is read as vertex attributes.
- * The source access is VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT
- * and the destination access is VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
- * for the pipeline stages VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
- * and VK_PIPELINE_STAGE_VERTEX_INPUT_BIT respectively.
- *
- * - 20.3.1. Drawing Transform Feedback
- */
- zink_resource_buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
}
static void
@@ -66,9 +52,9 @@ zink_emit_stream_output_targets(struct pipe_context *pctx)
{
struct zink_context *ctx = zink_context(pctx);
struct zink_batch *batch = &ctx->batch;
- VkBuffer buffers[PIPE_MAX_SO_OUTPUTS] = {0};
- VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {0};
- VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS] = {0};
+ VkBuffer buffers[PIPE_MAX_SO_BUFFERS] = {0};
+ VkDeviceSize buffer_offsets[PIPE_MAX_SO_BUFFERS] = {0};
+ VkDeviceSize buffer_sizes[PIPE_MAX_SO_BUFFERS] = {0};
for (unsigned i = 0; i < ctx->num_so_targets; i++) {
struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
@@ -84,12 +70,15 @@ zink_emit_stream_output_targets(struct pipe_context *pctx)
/* resource has been rebound */
t->counter_buffer_valid = false;
buffers[i] = res->obj->buffer;
- zink_resource_buffer_barrier(ctx, zink_resource(t->base.buffer),
- VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
zink_batch_reference_resource_rw(batch, res, true);
buffer_offsets[i] = t->base.buffer_offset;
buffer_sizes[i] = t->base.buffer_size;
res->so_valid = true;
+ if (!ctx->unordered_blitting) {
+ res->obj->unordered_read = res->obj->unordered_write = false;
+ res->obj->access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
+ res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
+ }
util_range_add(t->base.buffer, &res->valid_buffer_range, t->base.buffer_offset,
t->base.buffer_offset + t->base.buffer_size);
}
@@ -104,7 +93,9 @@ ALWAYS_INLINE static void
check_buffer_barrier(struct zink_context *ctx, struct pipe_resource *pres, VkAccessFlags flags, VkPipelineStageFlags pipeline)
{
struct zink_resource *res = zink_resource(pres);
- zink_resource_buffer_barrier(ctx, res, flags, pipeline);
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, flags, pipeline);
+ if (!ctx->unordered_blitting)
+ res->obj->unordered_read = false;
}
ALWAYS_INLINE static void
@@ -122,50 +113,70 @@ barrier_draw_buffers(struct zink_context *ctx, const struct pipe_draw_info *dinf
}
}
-template <zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_vertex_input HAS_VERTEX_INPUT>
+static void
+bind_vertex_buffers_dgc(struct zink_context *ctx)
+{
+ struct zink_vertex_elements_state *elems = ctx->element_state;
+
+ ctx->vertex_buffers_dirty = false;
+ if (!elems->hw_state.num_bindings)
+ return;
+ for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
+ struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->hw_state.binding_map[i];
+ assert(vb);
+ VkBindVertexBufferIndirectCommandNV *ptr;
+ VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV, (void**)&ptr);
+ token->vertexBindingUnit = ctx->element_state->hw_state.binding_map[i];
+ if (vb->buffer.resource) {
+ struct zink_resource *res = zink_resource(vb->buffer.resource);
+ assert(res->obj->bda);
+ ptr->bufferAddress = res->obj->bda + vb->buffer_offset;
+ ptr->size = res->base.b.width0;
+ ptr->stride = ctx->element_state->hw_state.b.strides[i];
+ } else {
+ ptr->bufferAddress = 0;
+ ptr->size = 0;
+ ptr->stride = 0;
+ }
+ }
+}
+
+template <zink_dynamic_state DYNAMIC_STATE>
static void
zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
{
VkBuffer buffers[PIPE_MAX_ATTRIBS];
VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS];
- VkDeviceSize buffer_strides[PIPE_MAX_ATTRIBS];
struct zink_vertex_elements_state *elems = ctx->element_state;
struct zink_screen *screen = zink_screen(ctx->base.screen);
- if (!elems->hw_state.num_bindings)
- return;
-
for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
- const unsigned buffer_id = ctx->element_state->binding_map[i];
- struct pipe_vertex_buffer *vb = ctx->vertex_buffers + buffer_id;
+ struct pipe_vertex_buffer *vb = ctx->vertex_buffers + elems->hw_state.binding_map[i];
assert(vb);
if (vb->buffer.resource) {
- buffers[i] = ctx->vbufs[buffer_id];
- assert(buffers[i]);
- if (HAS_VERTEX_INPUT)
- elems->hw_state.dynbindings[i].stride = vb->stride;
- buffer_offsets[i] = ctx->vbuf_offsets[buffer_id];
- buffer_strides[i] = vb->stride;
- zink_batch_resource_usage_set(&ctx->batch, zink_resource(vb->buffer.resource), false);
+ struct zink_resource *res = zink_resource(vb->buffer.resource);
+ assert(res->obj->buffer);
+ buffers[i] = res->obj->buffer;
+ buffer_offsets[i] = vb->buffer_offset;
} else {
buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
buffer_offsets[i] = 0;
- buffer_strides[i] = 0;
- if (HAS_VERTEX_INPUT)
- elems->hw_state.dynbindings[i].stride = 0;
}
}
- if (HAS_DYNAMIC_STATE && !HAS_VERTEX_INPUT)
- VKCTX(CmdBindVertexBuffers2EXT)(batch->state->cmdbuf, 0,
- elems->hw_state.num_bindings,
- buffers, buffer_offsets, NULL, buffer_strides);
- else
+ if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE &&
+ DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 &&
+ DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT) {
+ if (elems->hw_state.num_bindings)
+ VKCTX(CmdBindVertexBuffers2)(batch->state->cmdbuf, 0,
+ elems->hw_state.num_bindings,
+ buffers, buffer_offsets, NULL, elems->hw_state.b.strides);
+ } else if (elems->hw_state.num_bindings)
VKSCR(CmdBindVertexBuffers)(batch->state->cmdbuf, 0,
elems->hw_state.num_bindings,
buffers, buffer_offsets);
- if (HAS_VERTEX_INPUT)
+ if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2 || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT)
VKCTX(CmdSetVertexInputEXT)(batch->state->cmdbuf,
elems->hw_state.num_bindings, elems->hw_state.dynbindings,
elems->hw_state.num_attribs, elems->hw_state.dynattribs);
@@ -173,72 +184,53 @@ zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
ctx->vertex_buffers_dirty = false;
}
-static void
-update_gfx_program(struct zink_context *ctx)
+ALWAYS_INLINE static void
+update_drawid(struct zink_context *ctx, unsigned draw_id)
{
- if (ctx->last_vertex_stage_dirty) {
- enum pipe_shader_type pstage = pipe_shader_type_from_mesa(ctx->last_vertex_stage->nir->info.stage);
- ctx->dirty_shader_stages |= BITFIELD_BIT(pstage);
- memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
- &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
- sizeof(struct zink_vs_key_base));
- ctx->last_vertex_stage_dirty = false;
- }
- unsigned bits = BITFIELD_MASK(PIPE_SHADER_COMPUTE);
- if (ctx->gfx_dirty) {
- struct zink_gfx_program *prog = NULL;
-
- struct hash_table *ht = &ctx->program_cache[ctx->shader_stages >> 2];
- const uint32_t hash = ctx->gfx_hash;
- struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
- if (entry) {
- prog = (struct zink_gfx_program*)entry->data;
- u_foreach_bit(stage, prog->stages_present & ~ctx->dirty_shader_stages)
- ctx->gfx_pipeline_state.modules[stage] = prog->modules[stage]->shader;
- } else {
- ctx->dirty_shader_stages |= bits;
- prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.vertices_per_patch + 1);
- _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
- }
- zink_update_gfx_program(ctx, prog);
- if (prog && prog != ctx->curr_program)
- zink_batch_reference_program(&ctx->batch, &prog->base);
- if (ctx->curr_program)
- ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
- ctx->curr_program = prog;
- ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
- ctx->gfx_dirty = false;
- } else if (ctx->dirty_shader_stages & bits) {
- zink_update_gfx_program(ctx, ctx->curr_program);
- }
- ctx->dirty_shader_stages &= ~bits;
+ VKCTX(CmdPushConstants)(ctx->batch.state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS,
+ offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned),
+ &draw_id);
}
-static bool
-line_width_needed(enum pipe_prim_type reduced_prim,
- unsigned polygon_mode)
+static void
+update_drawid_dgc(struct zink_context *ctx, unsigned draw_id)
{
- switch (reduced_prim) {
- case PIPE_PRIM_POINTS:
- return false;
-
- case PIPE_PRIM_LINES:
- return true;
-
- case PIPE_PRIM_TRIANGLES:
- return polygon_mode == VK_POLYGON_MODE_LINE;
-
- default:
- unreachable("unexpected reduced prim");
- }
+ uint32_t *ptr;
+ VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr);
+ token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, draw_id);
+ token->pushconstantSize = sizeof(unsigned);
+ *ptr = draw_id;
}
ALWAYS_INLINE static void
-update_drawid(struct zink_context *ctx, unsigned draw_id)
+draw_indexed_dgc_need_index_buffer_unref(struct zink_context *ctx,
+ const struct pipe_draw_info *dinfo,
+ const struct pipe_draw_start_count_bias *draws,
+ unsigned num_draws,
+ unsigned draw_id,
+ bool needs_drawid)
{
- VKCTX(CmdPushConstants)(ctx->batch.state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT,
- offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned),
- &draw_id);
+ if (dinfo->increment_draw_id && needs_drawid) {
+ for (unsigned i = 0; i < num_draws; i++) {
+ update_drawid_dgc(ctx, draw_id);
+ VkDrawIndexedIndirectCommand *ptr, cmd = {
+ draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance
+ };
+ zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
+ *ptr = cmd;
+ draw_id++;
+ }
+ } else {
+ if (needs_drawid)
+ update_drawid_dgc(ctx, draw_id);
+ for (unsigned i = 0; i < num_draws; i++) {
+ VkDrawIndexedIndirectCommand *ptr, cmd = {
+ draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance
+ };
+ zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
+ *ptr = cmd;
+ }
+ }
}
ALWAYS_INLINE static void
@@ -269,6 +261,37 @@ draw_indexed_need_index_buffer_unref(struct zink_context *ctx,
}
}
+ALWAYS_INLINE static void
+draw_indexed_dgc(struct zink_context *ctx,
+ const struct pipe_draw_info *dinfo,
+ const struct pipe_draw_start_count_bias *draws,
+ unsigned num_draws,
+ unsigned draw_id,
+ bool needs_drawid)
+{
+ if (dinfo->increment_draw_id && needs_drawid) {
+ for (unsigned i = 0; i < num_draws; i++) {
+ update_drawid_dgc(ctx, draw_id);
+ VkDrawIndexedIndirectCommand *ptr, cmd = {
+ draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance
+ };
+ zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
+ *ptr = cmd;
+ draw_id++;
+ }
+ } else {
+ if (needs_drawid)
+ update_drawid_dgc(ctx, draw_id);
+ for (unsigned i = 0; i < num_draws; i++) {
+ VkDrawIndexedIndirectCommand *ptr, cmd = {
+ draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance
+ };
+ zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
+ *ptr = cmd;
+ }
+ }
+}
+
template <zink_multidraw HAS_MULTIDRAW>
ALWAYS_INLINE static void
draw_indexed(struct zink_context *ctx,
@@ -304,6 +327,37 @@ draw_indexed(struct zink_context *ctx,
}
}
+ALWAYS_INLINE static void
+draw_dgc(struct zink_context *ctx,
+ const struct pipe_draw_info *dinfo,
+ const struct pipe_draw_start_count_bias *draws,
+ unsigned num_draws,
+ unsigned draw_id,
+ bool needs_drawid)
+{
+ if (dinfo->increment_draw_id && needs_drawid) {
+ for (unsigned i = 0; i < num_draws; i++) {
+ update_drawid_dgc(ctx, draw_id);
+ VkDrawIndirectCommand *ptr, cmd = {
+ draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance
+ };
+ zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV, (void**)&ptr);
+ *ptr = cmd;
+ draw_id++;
+ }
+ } else {
+ if (needs_drawid)
+ update_drawid_dgc(ctx, draw_id);
+ for (unsigned i = 0; i < num_draws; i++) {
+ VkDrawIndirectCommand *ptr, cmd = {
+ draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance
+ };
+ zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV, (void**)&ptr);
+ *ptr = cmd;
+ }
+ }
+}
+
template <zink_multidraw HAS_MULTIDRAW>
ALWAYS_INLINE static void
draw(struct zink_context *ctx,
@@ -335,131 +389,107 @@ draw(struct zink_context *ctx,
}
}
-ALWAYS_INLINE static VkPipelineStageFlags
-find_pipeline_bits(uint32_t *mask)
+template <zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
+static bool
+update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum mesa_prim mode, bool can_dgc)
{
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) {
- if (mask[i]) {
- return zink_pipeline_flags_from_pipe_stage((enum pipe_shader_type)i);
- }
+ VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline;
+ const struct zink_screen *screen = zink_screen(ctx->base.screen);
+ bool shaders_changed = ctx->gfx_dirty || ctx->dirty_gfx_stages;
+ if (screen->optimal_keys && !ctx->is_generated_gs_bound)
+ zink_gfx_program_update_optimal(ctx);
+ else
+ zink_gfx_program_update(ctx);
+ bool pipeline_changed = false;
+ VkPipeline pipeline = VK_NULL_HANDLE;
+ if (!ctx->curr_program->base.uses_shobj) {
+ if (screen->info.have_EXT_graphics_pipeline_library)
+ pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
+ else
+ pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
}
- return 0;
-}
-
-static void
-update_barriers(struct zink_context *ctx, bool is_compute)
-{
- if (!ctx->need_barriers[is_compute]->entries)
- return;
- struct set *need_barriers = ctx->need_barriers[is_compute];
- ctx->barrier_set_idx[is_compute] = !ctx->barrier_set_idx[is_compute];
- ctx->need_barriers[is_compute] = &ctx->update_barriers[is_compute][ctx->barrier_set_idx[is_compute]];
- set_foreach(need_barriers, he) {
- struct zink_resource *res = (struct zink_resource *)he->key;
- VkPipelineStageFlags pipeline = 0;
- VkAccessFlags access = 0;
- if (res->bind_count[is_compute]) {
- if (res->write_bind_count[is_compute])
- access |= VK_ACCESS_SHADER_WRITE_BIT;
- if (res->write_bind_count[is_compute] != res->bind_count[is_compute]) {
- unsigned bind_count = res->bind_count[is_compute] - res->write_bind_count[is_compute];
- if (res->obj->is_buffer) {
- if (res->ubo_bind_count[is_compute]) {
- access |= VK_ACCESS_UNIFORM_READ_BIT;
- bind_count -= res->ubo_bind_count[is_compute];
- }
- if (!is_compute && res->vbo_bind_mask) {
- access |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
- pipeline |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
- bind_count -= util_bitcount(res->vbo_bind_mask);
- }
- bind_count -= res->so_bind_count;
- }
- if (bind_count)
- access |= VK_ACCESS_SHADER_READ_BIT;
- }
- if (is_compute)
- pipeline = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
- else if (!pipeline) {
- if (res->ubo_bind_count[0])
- pipeline |= find_pipeline_bits(res->ubo_bind_mask);
- if (!pipeline)
- pipeline |= find_pipeline_bits(res->ssbo_bind_mask);
- if (!pipeline)
- pipeline |= find_pipeline_bits(res->sampler_binds);
- if (!pipeline) //must be a shader image
- pipeline = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
- }
- if (res->base.b.target == PIPE_BUFFER)
- zink_resource_buffer_barrier(ctx, res, access, pipeline);
- else {
- VkImageLayout layout = zink_descriptor_util_image_layout_eval(res, is_compute);
- if (layout != res->layout)
- zink_resource_image_barrier(ctx, res, layout, access, pipeline);
+ if (pipeline) {
+ pipeline_changed = prev_pipeline != pipeline;
+ if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw) {
+ ctx->dgc.last_prog = ctx->curr_program;
+ if (unlikely(can_dgc && screen->info.nv_dgc_props.maxGraphicsShaderGroupCount == 1)) {
+ VkBindShaderGroupIndirectCommandNV *ptr;
+ zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV, (void**)&ptr);
+ util_dynarray_append(&ctx->dgc.pipelines, VkPipeline, pipeline);
+ /* zero-indexed -> base + group + num_pipelines-1 = base + num_pipelines */
+ ptr->groupIndex = util_dynarray_num_elements(&ctx->dgc.pipelines, VkPipeline) + 1;
+ } else {
+ VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
}
- /* always barrier on draw if this resource has either multiple image write binds or
- * image write binds and image read binds
- */
- if (res->write_bind_count[is_compute] && res->bind_count[is_compute] > 1)
- _mesa_set_add_pre_hashed(ctx->need_barriers[is_compute], he->hash, res);
}
- _mesa_set_remove(need_barriers, he);
- if (!need_barriers->entries)
- break;
+ ctx->shobj_draw = false;
+ } else {
+ if (BATCH_CHANGED || shaders_changed || !ctx->shobj_draw) {
+ VkShaderStageFlagBits stages[] = {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ VK_SHADER_STAGE_FRAGMENT_BIT,
+ };
+ /* always rebind all stages */
+ VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects);
+ VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE);
+ VKCTX(CmdSetTessellationDomainOriginEXT)(bs->cmdbuf, VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT);
+ VKCTX(CmdSetSampleLocationsEnableEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.sample_locations_enabled);
+ VKCTX(CmdSetRasterizationStreamEXT)(bs->cmdbuf, 0);
+ }
+ ctx->shobj_draw = true;
}
+ return pipeline_changed;
}
-template <bool BATCH_CHANGED>
-static bool
-update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum pipe_prim_type mode)
+static enum mesa_prim
+zink_prim_type(const struct zink_context *ctx,
+ const struct pipe_draw_info *dinfo)
{
- VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline;
- update_gfx_program(ctx);
- VkPipeline pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
- bool pipeline_changed = prev_pipeline != pipeline;
- if (BATCH_CHANGED || pipeline_changed)
- VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
- return pipeline_changed;
+ if (ctx->gfx_pipeline_state.shader_rast_prim != MESA_PRIM_COUNT)
+ return ctx->gfx_pipeline_state.shader_rast_prim;
+
+ return u_reduced_prim((enum mesa_prim)dinfo->mode);
}
-static bool
-hack_conditional_render(struct pipe_context *pctx,
- const struct pipe_draw_info *dinfo,
- unsigned drawid_offset,
- const struct pipe_draw_indirect_info *dindirect,
- const struct pipe_draw_start_count_bias *draws,
- unsigned num_draws)
+static enum mesa_prim
+zink_rast_prim(const struct zink_context *ctx,
+ const struct pipe_draw_info *dinfo)
{
- struct zink_context *ctx = zink_context(pctx);
- struct zink_batch_state *bs = ctx->batch.state;
- static bool warned;
- if (!warned) {
- fprintf(stderr, "ZINK: warning, this is cpu-based conditional rendering, say bye-bye to fps\n");
- warned = true;
- }
- if (!zink_check_conditional_render(ctx))
- return false;
- if (bs != ctx->batch.state) {
- bool prev = ctx->render_condition_active;
- ctx->render_condition_active = false;
- zink_select_draw_vbo(ctx);
- pctx->draw_vbo(pctx, dinfo, drawid_offset, dindirect, draws, num_draws);
- ctx->render_condition_active = prev;
- return false;
+ enum mesa_prim prim_type = zink_prim_type(ctx, dinfo);
+ assert(prim_type != MESA_PRIM_COUNT);
+
+ if (prim_type == MESA_PRIM_TRIANGLES &&
+ ctx->rast_state->base.fill_front != PIPE_POLYGON_MODE_FILL) {
+ switch(ctx->rast_state->base.fill_front) {
+ case PIPE_POLYGON_MODE_POINT:
+ return MESA_PRIM_POINTS;
+ case PIPE_POLYGON_MODE_LINE:
+ return MESA_PRIM_LINES;
+ default:
+ unreachable("unexpected polygon mode");
+ }
}
- return true;
+
+ return prim_type;
}
-template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2,
- zink_dynamic_vertex_input HAS_VERTEX_INPUT, bool BATCH_CHANGED>
+template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED, bool DRAW_STATE>
void
-zink_draw_vbo(struct pipe_context *pctx,
- const struct pipe_draw_info *dinfo,
- unsigned drawid_offset,
- const struct pipe_draw_indirect_info *dindirect,
- const struct pipe_draw_start_count_bias *draws,
- unsigned num_draws)
+zink_draw(struct pipe_context *pctx,
+ const struct pipe_draw_info *dinfo,
+ unsigned drawid_offset,
+ const struct pipe_draw_indirect_info *dindirect,
+ const struct pipe_draw_start_count_bias *draws,
+ unsigned num_draws,
+ struct pipe_vertex_state *vstate,
+ uint32_t partial_velem_mask)
{
+ if (!dindirect && (!draws[0].count || !dinfo->instance_count))
+ return;
+
struct zink_context *ctx = zink_context(pctx);
struct zink_screen *screen = zink_screen(pctx->screen);
struct zink_rasterizer_state *rast_state = ctx->rast_state;
@@ -468,58 +498,31 @@ zink_draw_vbo(struct pipe_context *pctx,
struct zink_so_target *so_target =
dindirect && dindirect->count_from_stream_output ?
zink_so_target(dindirect->count_from_stream_output) : NULL;
- VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS];
- VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS];
+ VkBuffer counter_buffers[PIPE_MAX_SO_BUFFERS];
+ VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_BUFFERS];
bool need_index_buffer_unref = false;
bool mode_changed = ctx->gfx_pipeline_state.gfx_prim_mode != dinfo->mode;
bool reads_drawid = ctx->shader_reads_drawid;
bool reads_basevertex = ctx->shader_reads_basevertex;
unsigned work_count = ctx->batch.work_count;
- enum pipe_prim_type mode = (enum pipe_prim_type)dinfo->mode;
-
- if (unlikely(!screen->info.have_EXT_conditional_rendering)) {
- if (!hack_conditional_render(pctx, dinfo, drawid_offset, dindirect, draws, num_draws))
- return;
- }
+ enum mesa_prim mode = (enum mesa_prim)dinfo->mode;
- zink_flush_memory_barrier(ctx, false);
- update_barriers(ctx, false);
+ if (ctx->memory_barrier && !ctx->blitting)
+ zink_flush_memory_barrier(ctx, false);
- if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter)) {
+ if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter && !ctx->blitting)) {
ctx->buffer_rebind_counter = screen->buffer_rebind_counter;
zink_rebind_all_buffers(ctx);
}
- uint8_t vertices_per_patch = ctx->gfx_pipeline_state.patch_vertices ? ctx->gfx_pipeline_state.patch_vertices - 1 : 0;
- if (ctx->gfx_pipeline_state.vertices_per_patch != vertices_per_patch)
- ctx->gfx_pipeline_state.dirty = true;
- bool drawid_broken = false;
- if (reads_drawid && (!dindirect || !dindirect->buffer))
- drawid_broken = (drawid_offset != 0 ||
- (!HAS_MULTIDRAW && num_draws > 1) ||
- (HAS_MULTIDRAW && num_draws > 1 && !dinfo->increment_draw_id));
- if (drawid_broken != zink_get_last_vertex_key(ctx)->push_drawid)
- zink_set_last_vertex_key(ctx)->push_drawid = drawid_broken;
- ctx->gfx_pipeline_state.vertices_per_patch = vertices_per_patch;
- if (mode_changed) {
- bool points_changed = false;
- if (mode == PIPE_PRIM_POINTS) {
- ctx->gfx_pipeline_state.has_points++;
- points_changed = true;
- } else if (ctx->gfx_pipeline_state.gfx_prim_mode == PIPE_PRIM_POINTS) {
- ctx->gfx_pipeline_state.has_points--;
- points_changed = true;
- }
- if (points_changed && ctx->rast_state->base.point_quad_rasterization)
- zink_set_fs_point_coord_key(ctx);
- }
- ctx->gfx_pipeline_state.gfx_prim_mode = mode;
- if (!HAS_DYNAMIC_STATE2) {
- if (ctx->gfx_pipeline_state.primitive_restart != dinfo->primitive_restart)
- ctx->gfx_pipeline_state.dirty = true;
- ctx->gfx_pipeline_state.primitive_restart = dinfo->primitive_restart;
+ if (unlikely(ctx->image_rebind_counter < screen->image_rebind_counter && !ctx->blitting)) {
+ ctx->image_rebind_counter = screen->image_rebind_counter;
+ zink_rebind_all_images(ctx);
}
+ if (mode_changed)
+ zink_flush_dgc_if_enabled(ctx);
+
unsigned index_offset = 0;
unsigned index_size = dinfo->index_size;
struct pipe_resource *index_buffer = NULL;
@@ -529,64 +532,190 @@ zink_draw_vbo(struct pipe_context *pctx,
debug_printf("util_upload_index_buffer() failed\n");
return;
}
- zink_batch_reference_resource_move(batch, zink_resource(index_buffer));
+ /* this will have extra refs from tc */
+ if (screen->threaded)
+ zink_batch_reference_resource_move(batch, zink_resource(index_buffer));
+ else
+ zink_batch_reference_resource(batch, zink_resource(index_buffer));
} else {
index_buffer = dinfo->index.resource;
zink_batch_reference_resource_rw(batch, zink_resource(index_buffer), false);
}
assert(index_size <= 4 && index_size != 3);
assert(index_size != 1 || screen->info.have_EXT_index_type_uint8);
- const VkIndexType index_type[3] = {
- VK_INDEX_TYPE_UINT8_EXT,
- VK_INDEX_TYPE_UINT16,
- VK_INDEX_TYPE_UINT32,
- };
- struct zink_resource *res = zink_resource(index_buffer);
- VKCTX(CmdBindIndexBuffer)(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]);
}
+ ctx->was_line_loop = dinfo->was_line_loop;
+
bool have_streamout = !!ctx->num_so_targets;
if (have_streamout) {
- if (ctx->xfb_barrier)
- zink_emit_xfb_counter_barrier(ctx);
- if (ctx->dirty_so_targets)
- zink_emit_stream_output_targets(pctx);
+ zink_emit_xfb_counter_barrier(ctx);
+ if (ctx->dirty_so_targets) {
+ /* have to loop here and below because barriers must be emitted out of renderpass,
+ * but xfb buffers can't be bound before the renderpass is active to avoid
+ * breaking from recursion
+ */
+ for (unsigned i = 0; i < ctx->num_so_targets; i++) {
+ struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
+ if (t) {
+ struct zink_resource *res = zink_resource(t->base.buffer);
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res,
+ VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
+ if (!ctx->unordered_blitting)
+ res->obj->unordered_read = res->obj->unordered_write = false;
+ }
+ }
+ }
}
- if (so_target)
- zink_emit_xfb_vertex_input_barrier(ctx, zink_resource(so_target->base.buffer));
-
barrier_draw_buffers(ctx, dinfo, dindirect, index_buffer);
+ /* this may re-emit draw buffer barriers, but such synchronization is harmless */
+ if (!ctx->blitting)
+ zink_update_barriers(ctx, false, index_buffer, dindirect ? dindirect->buffer : NULL, dindirect ? dindirect->indirect_draw_count : NULL);
+
+ bool can_dgc = false;
+ if (unlikely(zink_debug & ZINK_DEBUG_DGC))
+ can_dgc = !so_target && !ctx->num_so_targets && (!dindirect || !dindirect->buffer);
+
+ /* ensure synchronization between doing streamout with counter buffer
+ * and using counter buffer for indirect draw
+ */
+ if (so_target && so_target->counter_buffer_valid) {
+ struct zink_resource *res = zink_resource(so_target->counter_buffer);
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res,
+ VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT,
+ VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
+ if (!ctx->unordered_blitting)
+ res->obj->unordered_read = false;
+ }
+
+ zink_query_update_gs_states(ctx);
+
+ if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
+ zink_batch_no_rp(ctx);
+ VkMemoryBarrier mb;
+ mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ mb.pNext = NULL;
+ mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
+ mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
+ VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, 1, &mb, 0, NULL, 0, NULL);
+ }
+
+ zink_batch_rp(ctx);
+ /* check dead swapchain */
+ if (unlikely(!ctx->batch.in_rp))
+ return;
if (BATCH_CHANGED)
zink_update_descriptor_refs(ctx, false);
- zink_batch_rp(ctx);
- bool pipeline_changed = false;
- if (!HAS_DYNAMIC_STATE)
- pipeline_changed = update_gfx_pipeline<BATCH_CHANGED>(ctx, batch->state, mode);
+ /* these must be after renderpass start to avoid issues with recursion */
+ bool drawid_broken = false;
+ if (reads_drawid && (!dindirect || !dindirect->buffer))
+ drawid_broken = (drawid_offset != 0 ||
+ (!HAS_MULTIDRAW && num_draws > 1) ||
+ (HAS_MULTIDRAW && num_draws > 1 && !dinfo->increment_draw_id));
+ if (drawid_broken != zink_get_last_vertex_key(ctx)->push_drawid)
+ zink_set_last_vertex_key(ctx)->push_drawid = drawid_broken;
+
+ bool rast_prim_changed = false;
+ bool prim_changed = false;
+ bool rast_state_changed = ctx->rast_state_changed;
+ if (mode_changed || ctx->gfx_pipeline_state.modules_changed ||
+ rast_state_changed) {
+ enum mesa_prim rast_prim = zink_rast_prim(ctx, dinfo);
+ if (rast_prim != ctx->gfx_pipeline_state.rast_prim) {
+ bool points_changed =
+ (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) !=
+ (rast_prim == MESA_PRIM_POINTS);
+
+ prim_changed = ctx->gfx_pipeline_state.rast_prim != rast_prim;
+
+ static bool rect_warned = false;
+ if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3 && rast_prim == MESA_PRIM_LINES && !rect_warned &&
+ (VkLineRasterizationModeEXT)rast_state->hw_state.line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT) {
+ if (screen->info.line_rast_feats.rectangularLines)
+ rect_warned = true;
+ else
+ warn_missing_feature(rect_warned, "rectangularLines");
+ }
+
+ ctx->gfx_pipeline_state.rast_prim = rast_prim;
+ rast_prim_changed = true;
+
+ if (points_changed && ctx->rast_state->base.point_quad_rasterization)
+ zink_set_fs_point_coord_key(ctx);
+ }
+ }
+ ctx->gfx_pipeline_state.gfx_prim_mode = mode;
+
+ if ((mode_changed || prim_changed || rast_state_changed || ctx->gfx_pipeline_state.modules_changed)) {
+ zink_set_primitive_emulation_keys(ctx);
+ }
+
+ if (index_size) {
+ const VkIndexType index_type[3] = {
+ VK_INDEX_TYPE_UINT8_EXT,
+ VK_INDEX_TYPE_UINT16,
+ VK_INDEX_TYPE_UINT32,
+ };
+ struct zink_resource *res = zink_resource(index_buffer);
+ if (unlikely(can_dgc)) {
+ VkBindIndexBufferIndirectCommandNV *ptr;
+ zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV, (void**)&ptr);
+ ptr->bufferAddress = res->obj->bda + index_offset;
+ ptr->size = res->base.b.width0;
+ ptr->indexType = index_type[index_size >> 1];
+ } else {
+ VKCTX(CmdBindIndexBuffer)(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]);
+ }
+ }
+ if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE2) {
+ if (ctx->gfx_pipeline_state.dyn_state2.primitive_restart != dinfo->primitive_restart)
+ ctx->gfx_pipeline_state.dirty = true;
+ ctx->gfx_pipeline_state.dyn_state2.primitive_restart = dinfo->primitive_restart;
+ }
+
+ if (have_streamout && ctx->dirty_so_targets)
+ zink_emit_stream_output_targets(pctx);
+
+ bool pipeline_changed = update_gfx_pipeline<DYNAMIC_STATE, BATCH_CHANGED>(ctx, batch->state, mode, can_dgc);
- if (BATCH_CHANGED || ctx->vp_state_changed || (!HAS_DYNAMIC_STATE && pipeline_changed)) {
+ if (BATCH_CHANGED || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) {
VkViewport viewports[PIPE_MAX_VIEWPORTS];
for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
VkViewport viewport = {
ctx->vp_state.viewport_states[i].translate[0] - ctx->vp_state.viewport_states[i].scale[0],
ctx->vp_state.viewport_states[i].translate[1] - ctx->vp_state.viewport_states[i].scale[1],
- ctx->vp_state.viewport_states[i].scale[0] * 2,
+ MAX2(ctx->vp_state.viewport_states[i].scale[0] * 2, 1),
ctx->vp_state.viewport_states[i].scale[1] * 2,
- ctx->rast_state->base.clip_halfz ?
- ctx->vp_state.viewport_states[i].translate[2] :
- ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2],
- ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2]
+ CLAMP(ctx->rast_state->base.clip_halfz ?
+ ctx->vp_state.viewport_states[i].translate[2] :
+ ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2],
+ 0, 1),
+ CLAMP(ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2],
+ 0, 1)
};
+ if (!ctx->rast_state->base.half_pixel_center) {
+ /* magic constant value from dxvk */
+ float cf = 0.5f - (1.0f / 128.0f);
+ viewport.x += cf;
+ if (viewport.height < 0)
+ viewport.y += cf;
+ else
+ viewport.y -= cf;
+ }
viewports[i] = viewport;
}
- if (HAS_DYNAMIC_STATE)
- VKCTX(CmdSetViewportWithCountEXT)(batch->state->cmdbuf, ctx->vp_state.num_viewports, viewports);
+ if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
+ VKCTX(CmdSetViewportWithCount)(batch->state->cmdbuf, ctx->vp_state.num_viewports, viewports);
else
VKCTX(CmdSetViewport)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, viewports);
}
- if (BATCH_CHANGED || ctx->scissor_changed || ctx->vp_state_changed || (!HAS_DYNAMIC_STATE && pipeline_changed)) {
+ if (BATCH_CHANGED || ctx->scissor_changed || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) {
VkRect2D scissors[PIPE_MAX_VIEWPORTS];
if (ctx->rast_state->base.scissor) {
for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
@@ -603,8 +732,8 @@ zink_draw_vbo(struct pipe_context *pctx,
scissors[i].extent.height = ctx->fb_state.height;
}
}
- if (HAS_DYNAMIC_STATE)
- VKCTX(CmdSetScissorWithCountEXT)(batch->state->cmdbuf, ctx->vp_state.num_viewports, scissors);
+ if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
+ VKCTX(CmdSetScissorWithCount)(batch->state->cmdbuf, ctx->vp_state.num_viewports, scissors);
else
VKCTX(CmdSetScissor)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, scissors);
}
@@ -619,30 +748,27 @@ zink_draw_vbo(struct pipe_context *pctx,
ctx->stencil_ref_changed = false;
}
- if (HAS_DYNAMIC_STATE && (BATCH_CHANGED || ctx->dsa_state_changed)) {
- VKCTX(CmdSetDepthBoundsTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_bounds_test);
+ if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || ctx->dsa_state_changed)) {
+ VKCTX(CmdSetDepthBoundsTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_bounds_test);
if (dsa_state->hw_state.depth_bounds_test)
VKCTX(CmdSetDepthBounds)(batch->state->cmdbuf,
dsa_state->hw_state.min_depth_bounds,
dsa_state->hw_state.max_depth_bounds);
- VKCTX(CmdSetDepthTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_test);
- if (dsa_state->hw_state.depth_test)
- VKCTX(CmdSetDepthCompareOpEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_compare_op);
- VKCTX(CmdSetDepthWriteEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_write);
- VKCTX(CmdSetStencilTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.stencil_test);
+ VKCTX(CmdSetDepthTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_test);
+ VKCTX(CmdSetDepthCompareOp)(batch->state->cmdbuf, dsa_state->hw_state.depth_compare_op);
+ VKCTX(CmdSetDepthWriteEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_write);
+ VKCTX(CmdSetStencilTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.stencil_test);
if (dsa_state->hw_state.stencil_test) {
- VKCTX(CmdSetStencilOpEXT)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
+ VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
dsa_state->hw_state.stencil_front.failOp,
dsa_state->hw_state.stencil_front.passOp,
dsa_state->hw_state.stencil_front.depthFailOp,
dsa_state->hw_state.stencil_front.compareOp);
- VKCTX(CmdSetStencilOpEXT)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
+ VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
dsa_state->hw_state.stencil_back.failOp,
dsa_state->hw_state.stencil_back.passOp,
dsa_state->hw_state.stencil_back.depthFailOp,
dsa_state->hw_state.stencil_back.compareOp);
- }
- if (dsa_state->base.stencil[0].enabled) {
if (dsa_state->base.stencil[1].enabled) {
VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.writeMask);
VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.writeMask);
@@ -652,52 +778,97 @@ zink_draw_vbo(struct pipe_context *pctx,
VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask);
VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask);
}
+ } else {
+ VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask);
+ VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask);
+ VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS);
}
}
ctx->dsa_state_changed = false;
- bool rast_state_changed = ctx->rast_state_changed;
- if (HAS_DYNAMIC_STATE && (BATCH_CHANGED || rast_state_changed))
- VKCTX(CmdSetFrontFaceEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state1.front_face);
- if ((BATCH_CHANGED || rast_state_changed) &&
- screen->info.have_EXT_line_rasterization && rast_state->base.line_stipple_enable)
- VKCTX(CmdSetLineStippleEXT)(batch->state->cmdbuf, rast_state->base.line_stipple_factor, rast_state->base.line_stipple_pattern);
-
- if (BATCH_CHANGED || ctx->rast_state_changed || mode_changed) {
- enum pipe_prim_type reduced_prim = u_reduced_prim(mode);
-
- bool depth_bias = false;
- switch (reduced_prim) {
- case PIPE_PRIM_POINTS:
- depth_bias = rast_state->offset_point;
- break;
+ if (BATCH_CHANGED || rast_state_changed) {
+ if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) {
+ VKCTX(CmdSetFrontFace)(batch->state->cmdbuf, (VkFrontFace)ctx->gfx_pipeline_state.dyn_state1.front_face);
+ VKCTX(CmdSetCullMode)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state1.cull_mode);
+ }
- case PIPE_PRIM_LINES:
- depth_bias = rast_state->offset_line;
- break;
+ if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3) {
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE))
+ VKCTX(CmdSetLineStippleEXT)(batch->state->cmdbuf, rast_state->base.line_stipple_factor, rast_state->base.line_stipple_pattern);
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLIP))
+ VKCTX(CmdSetDepthClipEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.depth_clip);
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLAMP))
+ VKCTX(CmdSetDepthClampEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.depth_clamp);
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_POLYGON))
+ VKCTX(CmdSetPolygonModeEXT)(batch->state->cmdbuf, (VkPolygonMode)rast_state->hw_state.polygon_mode);
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_HALFZ))
+ VKCTX(CmdSetDepthClipNegativeOneToOneEXT)(batch->state->cmdbuf, !rast_state->hw_state.clip_halfz);
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_PV))
+ VKCTX(CmdSetProvokingVertexModeEXT)(batch->state->cmdbuf,
+ rast_state->hw_state.pv_last ?
+ VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT :
+ VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLIP))
+ VKCTX(CmdSetLineRasterizationModeEXT)(batch->state->cmdbuf, rast_state->dynamic_line_mode);
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE_ON))
+ VKCTX(CmdSetLineStippleEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.line_stipple_enable);
+ }
+ }
+ if ((BATCH_CHANGED || ctx->sample_mask_changed) && screen->have_full_ds3) {
+ VKCTX(CmdSetRasterizationSamplesEXT)(batch->state->cmdbuf, (VkSampleCountFlagBits)(ctx->gfx_pipeline_state.rast_samples + 1));
+ VKCTX(CmdSetSampleMaskEXT)(batch->state->cmdbuf, (VkSampleCountFlagBits)(ctx->gfx_pipeline_state.rast_samples + 1), &ctx->gfx_pipeline_state.sample_mask);
+ ctx->sample_mask_changed = false;
+ }
+ if ((BATCH_CHANGED || ctx->blend_state_changed)) {
+ if (ctx->gfx_pipeline_state.blend_state) {
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_A2C))
+ VKCTX(CmdSetAlphaToCoverageEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->alpha_to_coverage &&
+ ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0));
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_A21))
+ VKCTX(CmdSetAlphaToOneEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->alpha_to_one);
+ if (ctx->fb_state.nr_cbufs) {
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_ON))
+ VKCTX(CmdSetColorBlendEnableEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.enables);
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_WRITE))
+ VKCTX(CmdSetColorWriteMaskEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.wrmask);
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_EQ))
+ VKCTX(CmdSetColorBlendEquationEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.eq);
+ }
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_LOGIC_ON))
+ VKCTX(CmdSetLogicOpEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->logicop_enable);
+ if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_LOGIC))
+ VKCTX(CmdSetLogicOpEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->logicop_func);
+ }
+ }
+ ctx->ds3_states = 0;
- case PIPE_PRIM_TRIANGLES:
- depth_bias = rast_state->offset_tri;
- break;
+ if (BATCH_CHANGED ||
+ /* only re-emit on non-batch change when actually drawing lines */
+ ((ctx->line_width_changed || rast_prim_changed) && ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES)) {
+ VKCTX(CmdSetLineWidth)(batch->state->cmdbuf, rast_state->line_width);
+ ctx->line_width_changed = false;
+ }
- default:
- unreachable("unexpected reduced prim");
- }
+ if (BATCH_CHANGED || mode_changed ||
+ ctx->gfx_pipeline_state.modules_changed ||
+ rast_state_changed) {
+ bool depth_bias =
+ zink_prim_type(ctx, dinfo) == MESA_PRIM_TRIANGLES &&
+ rast_state->offset_fill;
- if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) {
- if (screen->info.feats.features.wideLines || rast_state->line_width == 1.0f)
- VKCTX(CmdSetLineWidth)(batch->state->cmdbuf, rast_state->line_width);
- else
- debug_printf("BUG: wide lines not supported, needs fallback!");
- }
- if (depth_bias)
- VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
- else
+ if (depth_bias) {
+ if (rast_state->base.offset_units_unscaled) {
+ VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units * ctx->depth_bias_scale_factor, rast_state->offset_clamp, rast_state->offset_scale);
+ } else {
+ VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
+ }
+ } else {
VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, 0.0f, 0.0f, 0.0f);
+ }
}
ctx->rast_state_changed = false;
- if (HAS_DYNAMIC_STATE) {
+ if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) {
if (ctx->sample_locations_changed) {
VkSampleLocationsInfoEXT loc;
zink_init_vk_sample_locations(ctx, &loc);
@@ -706,46 +877,118 @@ zink_draw_vbo(struct pipe_context *pctx,
ctx->sample_locations_changed = false;
}
- if ((BATCH_CHANGED || ctx->blend_state_changed) &&
- ctx->gfx_pipeline_state.blend_state->need_blend_constants) {
+ if (BATCH_CHANGED || ctx->blend_color_changed) {
VKCTX(CmdSetBlendConstants)(batch->state->cmdbuf, ctx->blend_constants);
}
ctx->blend_state_changed = false;
-
- if (BATCH_CHANGED || ctx->vertex_buffers_dirty)
- zink_bind_vertex_buffers<HAS_DYNAMIC_STATE, HAS_VERTEX_INPUT>(batch, ctx);
-
- zink_query_update_gs_states(ctx);
+ ctx->blend_color_changed = false;
+
+ if (!DRAW_STATE) {
+ if (BATCH_CHANGED || ctx->vertex_buffers_dirty) {
+ if (unlikely(can_dgc))
+ bind_vertex_buffers_dgc(ctx);
+ else if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || ctx->gfx_pipeline_state.uses_dynamic_stride)
+ zink_bind_vertex_buffers<DYNAMIC_STATE>(batch, ctx);
+ else
+ zink_bind_vertex_buffers<ZINK_NO_DYNAMIC_STATE>(batch, ctx);
+ }
+ }
if (BATCH_CHANGED) {
ctx->pipeline_changed[0] = false;
zink_select_draw_vbo(ctx);
}
- if (HAS_DYNAMIC_STATE) {
- update_gfx_pipeline<BATCH_CHANGED>(ctx, batch->state, mode);
- if (BATCH_CHANGED || mode_changed)
- VKCTX(CmdSetPrimitiveTopologyEXT)(batch->state->cmdbuf, zink_primitive_topology(mode));
- }
+ if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || mode_changed))
+ VKCTX(CmdSetPrimitiveTopology)(batch->state->cmdbuf, zink_primitive_topology(mode));
- if (HAS_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->primitive_restart != dinfo->primitive_restart)) {
- VKCTX(CmdSetPrimitiveRestartEnableEXT)(batch->state->cmdbuf, dinfo->primitive_restart);
+ if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->primitive_restart != dinfo->primitive_restart)) {
+ VKCTX(CmdSetPrimitiveRestartEnable)(batch->state->cmdbuf, dinfo->primitive_restart);
ctx->primitive_restart = dinfo->primitive_restart;
}
+ if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->rasterizer_discard_changed)) {
+ VKCTX(CmdSetRasterizerDiscardEnable)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard);
+ ctx->rasterizer_discard_changed = false;
+ }
+
if (zink_program_has_descriptors(&ctx->curr_program->base))
- screen->descriptors_update(ctx, false);
+ zink_descriptors_update(ctx, false);
+
+ if (ctx->di.any_bindless_dirty &&
+ /* some apps (d3dretrace) call MakeTextureHandleResidentARB randomly */
+ zink_program_has_descriptors(&ctx->curr_program->base) &&
+ ctx->curr_program->base.dd.bindless)
+ zink_descriptors_update_bindless(ctx);
if (reads_basevertex) {
unsigned draw_mode_is_indexed = index_size > 0;
- VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT,
- offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned),
- &draw_mode_is_indexed);
+ if (unlikely(can_dgc)) {
+ uint32_t *ptr;
+ VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr);
+ token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
+ token->pushconstantSize = sizeof(unsigned);
+ *ptr = draw_mode_is_indexed;
+ } else {
+ VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS,
+ offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned),
+ &draw_mode_is_indexed);
+ }
+ }
+ if (ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL] &&
+ ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated) {
+ if (unlikely(can_dgc)) {
+ float *ptr;
+ VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr);
+ token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, default_inner_level);
+ token->pushconstantSize = sizeof(float) * 6;
+ memcpy(ptr, &ctx->tess_levels[0], sizeof(float) * 6);
+ } else {
+ VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS,
+ offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6,
+ &ctx->tess_levels[0]);
+ }
+ }
+
+ if (!screen->optimal_keys) {
+ if (zink_get_fs_key(ctx)->lower_line_stipple ||
+ zink_get_gs_key(ctx)->lower_gl_point ||
+ zink_get_fs_key(ctx)->lower_line_smooth) {
+
+ assert(zink_get_gs_key(ctx)->lower_line_stipple ==
+ zink_get_fs_key(ctx)->lower_line_stipple);
+
+ assert(zink_get_gs_key(ctx)->lower_line_smooth ==
+ zink_get_fs_key(ctx)->lower_line_smooth);
+
+ float viewport_scale[2] = {
+ ctx->vp_state.viewport_states[0].scale[0],
+ ctx->vp_state.viewport_states[0].scale[1]
+ };
+ VKCTX(CmdPushConstants)(batch->state->cmdbuf,
+ ctx->curr_program->base.layout,
+ VK_SHADER_STAGE_ALL_GRAPHICS,
+ offsetof(struct zink_gfx_push_constant, viewport_scale),
+ sizeof(float) * 2, &viewport_scale);
+
+ uint32_t stipple = ctx->rast_state->base.line_stipple_pattern;
+ stipple |= ctx->rast_state->base.line_stipple_factor << 16;
+ VKCTX(CmdPushConstants)(batch->state->cmdbuf,
+ ctx->curr_program->base.layout,
+ VK_SHADER_STAGE_ALL_GRAPHICS,
+ offsetof(struct zink_gfx_push_constant, line_stipple_pattern),
+ sizeof(uint32_t), &stipple);
+
+ if (ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.lower_line_smooth) {
+ float line_width = ctx->rast_state->base.line_width;
+ VKCTX(CmdPushConstants)(batch->state->cmdbuf,
+ ctx->curr_program->base.layout,
+ VK_SHADER_STAGE_ALL_GRAPHICS,
+ offsetof(struct zink_gfx_push_constant, line_width),
+ sizeof(uint32_t), &line_width);
+ }
+ }
}
- if (ctx->curr_program->shaders[PIPE_SHADER_TESS_CTRL] && ctx->curr_program->shaders[PIPE_SHADER_TESS_CTRL]->is_generated)
- VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
- offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6,
- &ctx->tess_levels[0]);
if (have_streamout) {
for (unsigned i = 0; i < ctx->num_so_targets; i++) {
@@ -753,8 +996,10 @@ zink_draw_vbo(struct pipe_context *pctx,
counter_buffers[i] = VK_NULL_HANDLE;
if (t) {
struct zink_resource *res = zink_resource(t->counter_buffer);
- t->stride = ctx->last_vertex_stage->streamout.so_info.stride[i] * sizeof(uint32_t);
+ t->stride = ctx->last_vertex_stage->sinfo.stride[i];
zink_batch_reference_resource_rw(batch, res, true);
+ if (!ctx->unordered_blitting)
+ res->obj->unordered_read = res->obj->unordered_write = false;
if (t->counter_buffer_valid) {
counter_buffers[i] = res->obj->buffer;
counter_buffer_offsets[i] = t->counter_buffer_offset;
@@ -764,6 +1009,34 @@ zink_draw_vbo(struct pipe_context *pctx,
VKCTX(CmdBeginTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
}
+ bool marker = false;
+ if (unlikely(zink_tracing)) {
+ VkViewport viewport = {
+ ctx->vp_state.viewport_states[0].translate[0] - ctx->vp_state.viewport_states[0].scale[0],
+ ctx->vp_state.viewport_states[0].translate[1] - ctx->vp_state.viewport_states[0].scale[1],
+ MAX2(ctx->vp_state.viewport_states[0].scale[0] * 2, 1),
+ ctx->vp_state.viewport_states[0].scale[1] * 2,
+ CLAMP(ctx->rast_state->base.clip_halfz ?
+ ctx->vp_state.viewport_states[0].translate[2] :
+ ctx->vp_state.viewport_states[0].translate[2] - ctx->vp_state.viewport_states[0].scale[2],
+ 0, 1),
+ CLAMP(ctx->vp_state.viewport_states[0].translate[2] + ctx->vp_state.viewport_states[0].scale[2],
+ 0, 1)
+ };
+ if (ctx->blitting) {
+ bool is_zs = util_format_is_depth_or_stencil(ctx->sampler_views[MESA_SHADER_FRAGMENT][0]->format);
+ marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "u_blitter(%s->%s, %dx%d)",
+ util_format_short_name(ctx->sampler_views[MESA_SHADER_FRAGMENT][0]->format),
+ util_format_short_name((is_zs ? ctx->fb_state.zsbuf : ctx->fb_state.cbufs[0])->format),
+ lround(viewport.width), lround(viewport.height));
+ } else {
+ marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "draw(%u cbufs|%s, %dx%d)",
+ ctx->fb_state.nr_cbufs,
+ ctx->fb_state.zsbuf ? "zsbuf" : "",
+ lround(viewport.width), lround(viewport.height));
+ }
+ }
+
bool needs_drawid = reads_drawid && zink_get_last_vertex_key(ctx)->push_drawid;
work_count += num_draws;
if (index_size > 0) {
@@ -782,20 +1055,32 @@ zink_draw_vbo(struct pipe_context *pctx,
} else
VKCTX(CmdDrawIndexedIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
} else {
- if (need_index_buffer_unref)
+ if (unlikely(can_dgc)) {
+ if (need_index_buffer_unref)
+ draw_indexed_dgc_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
+ else
+ draw_indexed_dgc(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
+ } else if (need_index_buffer_unref) {
draw_indexed_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
- else
+ } else {
draw_indexed<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
+ }
}
} else {
if (so_target && screen->info.tf_props.transformFeedbackDraw) {
- if (needs_drawid)
- update_drawid(ctx, drawid_offset);
- zink_batch_reference_resource_rw(batch, zink_resource(so_target->base.buffer), false);
- zink_batch_reference_resource_rw(batch, zink_resource(so_target->counter_buffer), true);
- VKCTX(CmdDrawIndirectByteCountEXT)(batch->state->cmdbuf, dinfo->instance_count, dinfo->start_instance,
- zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0,
- MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride));
+ /* GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_api attempts a bogus xfb
+ * draw using a streamout target that has no data
+ * to avoid hanging the gpu, reject any such draws
+ */
+ if (so_target->counter_buffer_valid) {
+ if (needs_drawid)
+ update_drawid(ctx, drawid_offset);
+ zink_batch_reference_resource_rw(batch, zink_resource(so_target->base.buffer), false);
+ zink_batch_reference_resource_rw(batch, zink_resource(so_target->counter_buffer), true);
+ VKCTX(CmdDrawIndirectByteCountEXT)(batch->state->cmdbuf, dinfo->instance_count, dinfo->start_instance,
+ zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0,
+ MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride));
+ }
} else if (dindirect && dindirect->buffer) {
assert(num_draws == 1);
if (needs_drawid)
@@ -811,10 +1096,17 @@ zink_draw_vbo(struct pipe_context *pctx,
} else
VKCTX(CmdDrawIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
} else {
- draw<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
+ if (unlikely(can_dgc))
+ draw_dgc(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
+ else
+ draw<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
}
}
+ if (unlikely(zink_tracing))
+ zink_cmd_debug_marker_end(ctx, batch->state->cmdbuf, marker);
+
+ ctx->dgc.valid = can_dgc;
if (have_streamout) {
for (unsigned i = 0; i < ctx->num_so_targets; i++) {
struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
@@ -826,14 +1118,115 @@ zink_draw_vbo(struct pipe_context *pctx,
}
VKCTX(CmdEndTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
}
+
batch->has_work = true;
batch->last_was_compute = false;
ctx->batch.work_count = work_count;
/* flush if there's >100k draws */
- if (unlikely(work_count >= 30000) || ctx->oom_flush)
+ if (!ctx->unordered_blitting && (unlikely(work_count >= 30000) || ctx->oom_flush))
pctx->flush(pctx, NULL, 0);
}
+template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
+static void
+zink_draw_vbo(struct pipe_context *pctx,
+ const struct pipe_draw_info *info,
+ unsigned drawid_offset,
+ const struct pipe_draw_indirect_info *indirect,
+ const struct pipe_draw_start_count_bias *draws,
+ unsigned num_draws)
+{
+ zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, false>(pctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0);
+}
+
+template <util_popcnt HAS_POPCNT>
+static void
+zink_vertex_state_mask(struct zink_context *ctx, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask)
+{
+ struct zink_vertex_state *zstate = (struct zink_vertex_state *)vstate;
+ VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
+
+ if (partial_velem_mask == vstate->input.full_velem_mask) {
+ VKCTX(CmdSetVertexInputEXT)(cmdbuf,
+ zstate->velems.hw_state.num_bindings, zstate->velems.hw_state.dynbindings,
+ zstate->velems.hw_state.num_attribs, zstate->velems.hw_state.dynattribs);
+ return;
+ }
+
+ VkVertexInputAttributeDescription2EXT dynattribs[PIPE_MAX_ATTRIBS];
+ unsigned num_attribs = 0;
+ u_foreach_bit(elem, vstate->input.full_velem_mask & partial_velem_mask) {
+ unsigned idx = util_bitcount_fast<HAS_POPCNT>(vstate->input.full_velem_mask & BITFIELD_MASK(elem));
+ dynattribs[num_attribs] = zstate->velems.hw_state.dynattribs[idx];
+ dynattribs[num_attribs].location = num_attribs;
+ num_attribs++;
+ }
+
+ VKCTX(CmdSetVertexInputEXT)(cmdbuf,
+ zstate->velems.hw_state.num_bindings, zstate->velems.hw_state.dynbindings,
+ num_attribs, dynattribs);
+}
+
+template <util_popcnt HAS_POPCNT>
+static void
+zink_bind_vertex_state(struct zink_context *ctx, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask)
+{
+ struct zink_vertex_state *zstate = (struct zink_vertex_state *)vstate;
+ VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
+ if (!vstate->input.vbuffer.buffer.resource)
+ return;
+
+ zink_vertex_state_mask<HAS_POPCNT>(ctx, vstate, partial_velem_mask);
+
+ struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource);
+ zink_batch_resource_usage_set(&ctx->batch, res, false, true);
+ VkDeviceSize offset = vstate->input.vbuffer.buffer_offset;
+ if (unlikely(zink_debug & ZINK_DEBUG_DGC)) {
+ VkBindVertexBufferIndirectCommandNV *ptr;
+ VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV, (void**)&ptr);
+ token->vertexBindingUnit = 0;
+ token->vertexDynamicStride = VK_FALSE;
+ ptr->bufferAddress = res->obj->bda + offset;
+ ptr->size = res->base.b.width0;
+ ptr->stride = 0;
+ } else {
+ VKCTX(CmdBindVertexBuffers)(cmdbuf, 0,
+ zstate->velems.hw_state.num_bindings,
+ &res->obj->buffer, &offset);
+ }
+}
+
+template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, util_popcnt HAS_POPCNT, bool BATCH_CHANGED>
+static void
+zink_draw_vertex_state(struct pipe_context *pctx,
+ struct pipe_vertex_state *vstate,
+ uint32_t partial_velem_mask,
+ struct pipe_draw_vertex_state_info info,
+ const struct pipe_draw_start_count_bias *draws,
+ unsigned num_draws)
+{
+ struct pipe_draw_info dinfo = {};
+
+ dinfo.mode = info.mode;
+ dinfo.index_size = 4;
+ dinfo.instance_count = 1;
+ dinfo.index.resource = vstate->input.indexbuf;
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource);
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
+ if (!ctx->unordered_blitting)
+ res->obj->unordered_read = false;
+ zink_bind_vertex_state<HAS_POPCNT>(ctx, vstate, partial_velem_mask);
+
+ zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, true>(pctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask);
+ /* ensure ctx->vertex_buffers gets rebound on next non-vstate draw */
+ ctx->vertex_buffers_dirty = true;
+
+ if (info.take_vertex_state_ownership)
+ pipe_vertex_state_reference(&vstate, NULL);
+}
+
template <bool BATCH_CHANGED>
static void
zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
@@ -842,21 +1235,51 @@ zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
struct zink_screen *screen = zink_screen(pctx->screen);
struct zink_batch *batch = &ctx->batch;
- update_barriers(ctx, true);
- zink_flush_memory_barrier(ctx, true);
+ if (ctx->render_condition_active)
+ zink_start_conditional_render(ctx);
- if (zink_program_has_descriptors(&ctx->curr_compute->base))
- screen->descriptors_update(ctx, true);
+ if (info->indirect) {
+ /*
+ VK_ACCESS_INDIRECT_COMMAND_READ_BIT specifies read access to indirect command data read as
+ part of an indirect build, trace, drawing or dispatching command. Such access occurs in the
+ VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT pipeline stage.
+
+ - Chapter 7. Synchronization and Cache Control
+ */
+ check_buffer_barrier(ctx, info->indirect, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
+ }
- zink_program_update_compute_pipeline_state(ctx, ctx->curr_compute, info->block);
+ zink_update_barriers(ctx, true, NULL, info->indirect, NULL);
+ if (ctx->memory_barrier)
+ zink_flush_memory_barrier(ctx, true);
+
+ if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
+ zink_batch_no_rp(ctx);
+ VkMemoryBarrier mb;
+ mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ mb.pNext = NULL;
+ mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
+ mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
+ VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, 1, &mb, 0, NULL, 0, NULL);
+ }
+
+ zink_program_update_compute_pipeline_state(ctx, ctx->curr_compute, info);
VkPipeline prev_pipeline = ctx->compute_pipeline_state.pipeline;
- VkPipeline pipeline = zink_get_compute_pipeline(screen, ctx->curr_compute,
- &ctx->compute_pipeline_state);
if (BATCH_CHANGED) {
zink_update_descriptor_refs(ctx, true);
- zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base);
}
+ if (ctx->compute_dirty) {
+ /* update inlinable constants */
+ zink_update_compute_program(ctx);
+ ctx->compute_dirty = false;
+ }
+
+ VkPipeline pipeline = zink_get_compute_pipeline(screen, ctx->curr_compute,
+ &ctx->compute_pipeline_state);
if (prev_pipeline != pipeline || BATCH_CHANGED)
VKCTX(CmdBindPipeline)(batch->state->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
@@ -865,13 +1288,15 @@ zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
zink_select_launch_grid(ctx);
}
- if (BITSET_TEST(ctx->compute_stage->nir->info.system_values_read, SYSTEM_VALUE_WORK_DIM))
- VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_compute->base.layout, VK_SHADER_STAGE_COMPUTE_BIT,
- offsetof(struct zink_cs_push_constant, work_dim), sizeof(uint32_t),
- &info->work_dim);
+ if (zink_program_has_descriptors(&ctx->curr_compute->base))
+ zink_descriptors_update(ctx, true);
+ if (ctx->di.any_bindless_dirty && ctx->curr_compute->base.dd.bindless)
+ zink_descriptors_update_bindless(ctx);
batch->work_count++;
zink_batch_no_rp(ctx);
+ if (!ctx->queries_disabled)
+ zink_resume_cs_query(ctx);
if (info->indirect) {
VKCTX(CmdDispatchIndirect)(batch->state->cmdbuf, zink_resource(info->indirect)->obj->buffer, info->indirect_offset);
zink_batch_reference_resource_rw(batch, zink_resource(info->indirect), false);
@@ -880,57 +1305,44 @@ zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
batch->has_work = true;
batch->last_was_compute = true;
/* flush if there's >100k computes */
- if (unlikely(ctx->batch.work_count >= 30000) || ctx->oom_flush)
+ if (!ctx->unordered_blitting && (unlikely(ctx->batch.work_count >= 30000) || ctx->oom_flush))
pctx->flush(pctx, NULL, 0);
}
-template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2,
- zink_dynamic_vertex_input HAS_VERTEX_INPUT, bool BATCH_CHANGED>
-static void
-init_batch_changed_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
-{
- draw_vbo_array[HAS_MULTIDRAW][HAS_DYNAMIC_STATE][HAS_DYNAMIC_STATE2][HAS_VERTEX_INPUT][BATCH_CHANGED] =
- zink_draw_vbo<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, HAS_VERTEX_INPUT, BATCH_CHANGED>;
-}
-
-template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2,
- zink_dynamic_vertex_input HAS_VERTEX_INPUT>
+template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
static void
-init_vertex_input_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
+init_batch_changed_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
{
- init_batch_changed_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, HAS_VERTEX_INPUT, false>(ctx, draw_vbo_array);
- init_batch_changed_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, HAS_VERTEX_INPUT, true>(ctx, draw_vbo_array);
+ draw_vbo_array[HAS_MULTIDRAW][DYNAMIC_STATE][BATCH_CHANGED] = zink_draw_vbo<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED>;
+ draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][0][BATCH_CHANGED] = zink_draw_vertex_state<HAS_MULTIDRAW, DYNAMIC_STATE, POPCNT_NO, BATCH_CHANGED>;
+ draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][1][BATCH_CHANGED] = zink_draw_vertex_state<HAS_MULTIDRAW, DYNAMIC_STATE, POPCNT_YES, BATCH_CHANGED>;
}
-template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2>
+template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE>
static void
-init_dynamic_state2_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
+init_dynamic_state_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
{
- init_vertex_input_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, ZINK_NO_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array);
- init_vertex_input_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, ZINK_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array);
-}
-
-template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE>
-static void
-init_dynamic_state_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
-{
- init_dynamic_state2_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, ZINK_NO_DYNAMIC_STATE2>(ctx, draw_vbo_array);
- init_dynamic_state2_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, ZINK_DYNAMIC_STATE2>(ctx, draw_vbo_array);
+ init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, false>(ctx, draw_vbo_array, draw_state_array);
+ init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, true>(ctx, draw_vbo_array, draw_state_array);
}
template <zink_multidraw HAS_MULTIDRAW>
static void
-init_multidraw_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
+init_multidraw_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
{
- init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_NO_DYNAMIC_STATE>(ctx, draw_vbo_array);
- init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE>(ctx, draw_vbo_array);
+ init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_NO_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array);
+ init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array);
+ init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE2>(ctx, draw_vbo_array, draw_state_array);
+ init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_VERTEX_INPUT2>(ctx, draw_vbo_array, draw_state_array);
+ init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE3>(ctx, draw_vbo_array, draw_state_array);
+ init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array, draw_state_array);
}
static void
-init_all_draw_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
+init_all_draw_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
{
- init_multidraw_functions<ZINK_NO_MULTIDRAW>(ctx, draw_vbo_array);
- init_multidraw_functions<ZINK_MULTIDRAW>(ctx, draw_vbo_array);
+ init_multidraw_functions<ZINK_NO_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array);
+ init_multidraw_functions<ZINK_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array);
}
template <bool BATCH_CHANGED>
@@ -959,32 +1371,50 @@ zink_invalid_draw_vbo(struct pipe_context *pipe,
}
static void
+zink_invalid_draw_vertex_state(struct pipe_context *pipe,
+ struct pipe_vertex_state *vstate,
+ uint32_t partial_velem_mask,
+ struct pipe_draw_vertex_state_info info,
+ const struct pipe_draw_start_count_bias *draws,
+ unsigned num_draws)
+{
+ unreachable("vertex shader not bound");
+}
+
+static void
zink_invalid_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
{
unreachable("compute shader not bound");
}
+#define STAGE_BASE 0
+#define STAGE_BASE_GS (BITFIELD_BIT(MESA_SHADER_GEOMETRY) >> 1)
+#define STAGE_BASE_TES (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) >> 1)
+#define STAGE_BASE_TES_GS ((BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)) >> 1)
+#define STAGE_BASE_TCS_TES ((BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) >> 1)
+#define STAGE_BASE_TCS_TES_GS ((BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)) >> 1)
+
template <unsigned STAGE_MASK>
static uint32_t
hash_gfx_program(const void *key)
{
const struct zink_shader **shaders = (const struct zink_shader**)key;
- uint32_t base_hash = shaders[PIPE_SHADER_VERTEX]->hash ^ shaders[PIPE_SHADER_FRAGMENT]->hash;
- if (STAGE_MASK == 0) //VS+FS
+ uint32_t base_hash = shaders[MESA_SHADER_VERTEX]->hash ^ shaders[MESA_SHADER_FRAGMENT]->hash;
+ if (STAGE_MASK == STAGE_BASE) //VS+FS
return base_hash;
- if (STAGE_MASK == 1) //VS+GS+FS
- return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash;
+ if (STAGE_MASK == STAGE_BASE_GS) //VS+GS+FS
+ return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash;
/*VS+TCS+FS isn't a thing */
/*VS+TCS+GS+FS isn't a thing */
- if (STAGE_MASK == 4) //VS+TES+FS
- return base_hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
- if (STAGE_MASK == 5) //VS+TES+GS+FS
- return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
- if (STAGE_MASK == 6) //VS+TCS+TES+FS
- return base_hash ^ shaders[PIPE_SHADER_TESS_CTRL]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
+ if (STAGE_MASK == STAGE_BASE_TES) //VS+TES+FS
+ return base_hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
+ if (STAGE_MASK == STAGE_BASE_TES_GS) //VS+TES+GS+FS
+ return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
+ if (STAGE_MASK == STAGE_BASE_TCS_TES) //VS+TCS+TES+FS
+ return base_hash ^ shaders[MESA_SHADER_TESS_CTRL]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
/* all stages */
- return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash ^ shaders[PIPE_SHADER_TESS_CTRL]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
+ return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash ^ shaders[MESA_SHADER_TESS_CTRL]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
}
template <unsigned STAGE_MASK>
@@ -993,41 +1423,75 @@ equals_gfx_program(const void *a, const void *b)
{
const void **sa = (const void**)a;
const void **sb = (const void**)b;
- if (STAGE_MASK == 0) //VS+FS
- return !memcmp(a, b, sizeof(void*) * 2);
- if (STAGE_MASK == 1) //VS+GS+FS
- return !memcmp(a, b, sizeof(void*) * 3);
+ STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
+ STATIC_ASSERT(MESA_SHADER_TESS_CTRL == 1);
+ STATIC_ASSERT(MESA_SHADER_TESS_EVAL == 2);
+ STATIC_ASSERT(MESA_SHADER_GEOMETRY == 3);
+ STATIC_ASSERT(MESA_SHADER_FRAGMENT == 4);
+ if (STAGE_MASK == STAGE_BASE) //VS+FS
+ return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
+ sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT];
+ if (STAGE_MASK == STAGE_BASE_GS) //VS+GS+FS
+ return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
+ !memcmp(&sa[MESA_SHADER_GEOMETRY], &sb[MESA_SHADER_GEOMETRY], sizeof(void*) * 2);
/*VS+TCS+FS isn't a thing */
/*VS+TCS+GS+FS isn't a thing */
- if (STAGE_MASK == 4) //VS+TES+FS
- return sa[PIPE_SHADER_TESS_EVAL] == sb[PIPE_SHADER_TESS_EVAL] && !memcmp(a, b, sizeof(void*) * 2);
- if (STAGE_MASK == 5) //VS+TES+GS+FS
- return sa[PIPE_SHADER_TESS_EVAL] == sb[PIPE_SHADER_TESS_EVAL] && !memcmp(a, b, sizeof(void*) * 3);
- if (STAGE_MASK == 6) //VS+TCS+TES+FS
- return !memcmp(&sa[PIPE_SHADER_TESS_CTRL], &sb[PIPE_SHADER_TESS_CTRL], sizeof(void*) * 2) &&
- !memcmp(a, b, sizeof(void*) * 2);
+ if (STAGE_MASK == STAGE_BASE_TES) //VS+TES+FS
+ return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
+ sa[MESA_SHADER_TESS_EVAL] == sb[MESA_SHADER_TESS_EVAL] &&
+ sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT];
+ if (STAGE_MASK == STAGE_BASE_TES_GS) //VS+TES+GS+FS
+ return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
+ !memcmp(&sa[MESA_SHADER_TESS_EVAL], &sb[MESA_SHADER_TESS_EVAL], sizeof(void*) * 3);
+ if (STAGE_MASK == STAGE_BASE_TCS_TES) //VS+TCS+TES+FS
+ return !memcmp(sa, sb, sizeof(void*) * 3) &&
+ sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT];
/* all stages */
- return !memcmp(a, b, sizeof(void*) * ZINK_SHADER_COUNT);
+ return !memcmp(a, b, sizeof(void*) * ZINK_GFX_SHADER_COUNT);
}
extern "C"
void
zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen)
{
- pipe_draw_vbo_func draw_vbo_array[2][2][2][2] //multidraw, dynamic state, dynamic state2, dynamic vertex input,
- [2]; //batch changed
- init_all_draw_functions(ctx, draw_vbo_array);
+ pipe_draw_func draw_vbo_array[2][6] //multidraw, zink_dynamic_state
+ [2]; //batch changed
+ pipe_draw_vertex_state_func draw_state_array[2][6] //multidraw, zink_dynamic_state
+ [2][2]; //has_popcnt, batch changed
+ zink_dynamic_state dynamic;
+ if (screen->info.have_EXT_extended_dynamic_state) {
+ if (screen->info.have_EXT_extended_dynamic_state2) {
+ if (screen->info.have_EXT_extended_dynamic_state3) {
+ if (screen->info.have_EXT_vertex_input_dynamic_state)
+ dynamic = ZINK_DYNAMIC_VERTEX_INPUT;
+ else
+ dynamic = ZINK_DYNAMIC_STATE3;
+ } else {
+ if (screen->info.have_EXT_vertex_input_dynamic_state)
+ dynamic = ZINK_DYNAMIC_VERTEX_INPUT2;
+ else
+ dynamic = ZINK_DYNAMIC_STATE2;
+ }
+ } else {
+ dynamic = ZINK_DYNAMIC_STATE;
+ }
+ } else {
+ dynamic = ZINK_NO_DYNAMIC_STATE;
+ }
+ init_all_draw_functions(ctx, draw_vbo_array, draw_state_array);
memcpy(ctx->draw_vbo, &draw_vbo_array[screen->info.have_EXT_multi_draw]
- [screen->info.have_EXT_extended_dynamic_state]
- [screen->info.have_EXT_extended_dynamic_state2]
- [screen->info.have_EXT_vertex_input_dynamic_state],
+ [dynamic],
sizeof(ctx->draw_vbo));
+ memcpy(ctx->draw_state, &draw_state_array[screen->info.have_EXT_multi_draw]
+ [dynamic][util_get_cpu_caps()->has_popcnt],
+ sizeof(ctx->draw_state));
/* Bind a fake draw_vbo, so that draw_vbo isn't NULL, which would skip
* initialization of callbacks in upper layers (such as u_threaded_context).
*/
ctx->base.draw_vbo = zink_invalid_draw_vbo;
+ ctx->base.draw_vertex_state = zink_invalid_draw_vertex_state;
_mesa_hash_table_init(&ctx->program_cache[0], ctx, hash_gfx_program<0>, equals_gfx_program<0>);
_mesa_hash_table_init(&ctx->program_cache[1], ctx, hash_gfx_program<1>, equals_gfx_program<1>);
@@ -1037,6 +1501,8 @@ zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen)
_mesa_hash_table_init(&ctx->program_cache[5], ctx, hash_gfx_program<5>, equals_gfx_program<5>);
_mesa_hash_table_init(&ctx->program_cache[6], ctx, hash_gfx_program<6>, equals_gfx_program<6>);
_mesa_hash_table_init(&ctx->program_cache[7], ctx, hash_gfx_program<7>, equals_gfx_program<7>);
+ for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_lock); i++)
+ simple_mtx_init(&ctx->program_lock[i], mtx_plain);
}
void
@@ -1048,3 +1514,18 @@ zink_init_grid_functions(struct zink_context *ctx)
*/
ctx->base.launch_grid = zink_invalid_launch_grid;
}
+
+void
+zink_init_screen_pipeline_libs(struct zink_screen *screen)
+{
+ _mesa_set_init(&screen->pipeline_libs[0], screen, hash_gfx_program<0>, equals_gfx_program<0>);
+ _mesa_set_init(&screen->pipeline_libs[1], screen, hash_gfx_program<1>, equals_gfx_program<1>);
+ _mesa_set_init(&screen->pipeline_libs[2], screen, hash_gfx_program<2>, equals_gfx_program<2>);
+ _mesa_set_init(&screen->pipeline_libs[3], screen, hash_gfx_program<3>, equals_gfx_program<3>);
+ _mesa_set_init(&screen->pipeline_libs[4], screen, hash_gfx_program<4>, equals_gfx_program<4>);
+ _mesa_set_init(&screen->pipeline_libs[5], screen, hash_gfx_program<5>, equals_gfx_program<5>);
+ _mesa_set_init(&screen->pipeline_libs[6], screen, hash_gfx_program<6>, equals_gfx_program<6>);
+ _mesa_set_init(&screen->pipeline_libs[7], screen, hash_gfx_program<7>, equals_gfx_program<7>);
+ for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs_lock); i++)
+ simple_mtx_init(&screen->pipeline_libs_lock[i], mtx_plain);
+}
diff --git a/src/gallium/drivers/zink/zink_extensions.py b/src/gallium/drivers/zink/zink_extensions.py
index 52c7def4479..31634400ebd 100644
--- a/src/gallium/drivers/zink/zink_extensions.py
+++ b/src/gallium/drivers/zink/zink_extensions.py
@@ -67,13 +67,18 @@ class Extension:
core_since = None
# these are specific to zink_device_info.py:
- has_properties = False
- has_features = False
- guard = False
+ has_properties = False
+ has_features = False
+ guard = False
+ features_promoted = False
+ properties_promoted = False
+
+
+ # these are specific to zink_instance.py:
+ platform_guard = None
def __init__(self, name, alias="", required=False, nonstandard=False,
- properties=False, features=False, conditions=None, guard=False,
- core_since=None):
+ properties=False, features=False, conditions=None, guard=False):
self.name = name
self.alias = alias
self.is_required = required
@@ -82,7 +87,6 @@ class Extension:
self.has_features = features
self.enable_conds = conditions
self.guard = guard
- self.core_since = core_since
if alias == "" and (properties == True or features == True):
raise RuntimeError("alias must be available when properties and/or features are used")
@@ -98,13 +102,39 @@ class Extension:
# e.g.: "VK_EXT_robustness2" -> "Robustness2"
def name_in_camel_case(self):
return "".join([x.title() for x in self.name.split('_')[2:]])
-
- # e.g.: "VK_EXT_robustness2" -> "VK_EXT_ROBUSTNESS2_EXTENSION_NAME"
- # do note that inconsistencies exist, i.e. we have
- # VK_EXT_ROBUSTNESS_2_EXTENSION_NAME defined in the headers, but then
- # we also have VK_KHR_MAINTENANCE1_EXTENSION_NAME
+
+ # e.g.: "VK_EXT_robustness2" -> "VK_EXT_ROBUSTNESS_2"
+ def name_in_snake_uppercase(self):
+ def replace(original):
+ # we do not split the types into two, e.g. INT_32
+ match_types = re.match(".*(int|float)(8|16|32|64)$", original)
+
+ # do not match win32
+ match_os = re.match(".*win32$", original)
+
+ # try to match extensions with alphanumeric names, like robustness2
+ match_alphanumeric = re.match(r"([a-z]+)(\d+)", original)
+
+ if match_types is not None or match_os is not None:
+ return original.upper()
+
+ if match_alphanumeric is not None:
+ return (match_alphanumeric[1].upper()
+ + '_'
+ + match_alphanumeric[2])
+
+ return original.upper()
+
+ replaced = list(map(replace, self.name.split('_')))
+ return '_'.join(replaced)
+
+ # e.g.: "VK_EXT_robustness2" -> "ROBUSTNESS_2"
+ def pure_name_in_snake_uppercase(self):
+ return '_'.join(self.name_in_snake_uppercase().split('_')[2:])
+
+ # e.g.: "VK_EXT_robustness2" -> "VK_EXT_ROBUSTNESS_2_EXTENSION_NAME"
def extension_name(self):
- return self.name.upper() + "_EXTENSION_NAME"
+ return self.name_in_snake_uppercase() + "_EXTENSION_NAME"
# generate a C string literal for the extension
def extension_name_literal(self):
@@ -130,7 +160,7 @@ class Extension:
# for VK_EXT_transform_feedback and struct="FEATURES"
def stype(self, struct: str):
return ("VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_"
- + self.pure_name().upper()
+ + self.pure_name_in_snake_uppercase()
+ '_' + struct + '_'
+ self.vendor())
@@ -152,7 +182,13 @@ class ExtensionRegistryEntry:
instance_commands = None
constants = None
features_struct = None
+ features_fields = None
+ features_promoted = False
properties_struct = None
+ properties_fields = None
+ properties_promoted = False
+ # some instance extensions are locked behind certain platforms
+ platform_guard = ""
class ExtensionRegistry:
# key = extension name, value = registry entry
@@ -162,7 +198,9 @@ class ExtensionRegistry:
vkxml = ElementTree.parse(vkxml_path)
commands_type = dict()
- aliases = dict()
+ command_aliases = dict()
+ platform_guards = dict()
+ struct_aliases = dict()
for cmd in vkxml.findall("commands/command"):
name = cmd.find("./proto/name")
@@ -170,11 +208,26 @@ class ExtensionRegistry:
if name is not None and name.text:
commands_type[name.text] = cmd.find("./param/type").text
elif cmd.get("name") is not None:
- aliases[cmd.get("name")] = cmd.get("alias")
+ command_aliases[cmd.get("name")] = cmd.get("alias")
+
+ for typ in vkxml.findall("types/type"):
+ if typ.get("category") != "struct":
+ continue
- for (cmd, alias) in aliases.items():
+ name = typ.get("name")
+ alias = typ.get("alias")
+
+ if name and alias:
+ struct_aliases[name] = alias
+
+ for (cmd, alias) in command_aliases.items():
commands_type[cmd] = commands_type[alias]
+ for platform in vkxml.findall("platforms/platform"):
+ name = platform.get("name")
+ guard = platform.get("protect")
+ platform_guards[name] = guard
+
for ext in vkxml.findall("extensions/extension"):
# Reserved extensions are marked with `supported="disabled"`
if ext.get("supported") == "disabled":
@@ -189,6 +242,8 @@ class ExtensionRegistry:
entry.device_commands = []
entry.pdevice_commands = []
entry.instance_commands = []
+ entry.features_fields = []
+ entry.properties_fields = []
for cmd in ext.findall("require/command"):
cmd_name = cmd.get("name")
@@ -214,10 +269,51 @@ class ExtensionRegistry:
if (self.is_features_struct(ty_name) and
entry.features_struct is None):
entry.features_struct = ty_name
+
elif (self.is_properties_struct(ty_name) and
entry.properties_struct is None):
entry.properties_struct = ty_name
+ if entry.features_struct:
+ struct_name = entry.features_struct
+ if entry.features_struct in struct_aliases:
+ struct_name = struct_aliases[entry.features_struct]
+ entry.features_promoted = True
+
+ elif entry.promoted_in is not None:
+ # if the extension is promoted but a core-Vulkan alias is not
+ # available for the features, then consider the features struct
+ # non-core-promoted
+ entry.features_promoted = False
+
+ for field in vkxml.findall("./types/type[@name='{}']/member".format(struct_name)):
+ field_name = field.find("name").text
+
+ # we ignore sType and pNext since they are irrelevant
+ if field_name not in ["sType", "pNext"]:
+ entry.features_fields.append(field_name)
+
+ if entry.properties_struct:
+ struct_name = entry.properties_struct
+ if entry.properties_struct in struct_aliases:
+ struct_name = struct_aliases[entry.properties_struct]
+ entry.properties_promoted = True
+
+ elif entry.promoted_in is not None:
+ # if the extension is promoted but a core-Vulkan alias is not
+ # available for the properties, then it is not promoted to core
+ entry.properties_promoted = False
+
+ for field in vkxml.findall("./types/type[@name='{}']/member".format(struct_name)):
+ field_name = field.find("name").text
+
+ # we ignore sType and pNext since they are irrelevant
+ if field_name not in ["sType", "pNext"]:
+ entry.properties_fields.append(field_name)
+
+ if ext.get("platform") is not None:
+ entry.platform_guard = platform_guards[ext.get("platform")]
+
self.registry[name] = entry
def in_registry(self, ext_name: str):
diff --git a/src/gallium/drivers/zink/zink_fence.c b/src/gallium/drivers/zink/zink_fence.c
index b2118618bc0..86bc56cf119 100644
--- a/src/gallium/drivers/zink/zink_fence.c
+++ b/src/gallium/drivers/zink/zink_fence.c
@@ -28,14 +28,24 @@
#include "zink_resource.h"
#include "zink_screen.h"
+#include "util/os_file.h"
#include "util/set.h"
#include "util/u_memory.h"
+#ifdef _WIN32
+#include <windows.h>
+#include <vulkan/vulkan_win32.h>
+#endif
+
static void
destroy_fence(struct zink_screen *screen, struct zink_tc_fence *mfence)
{
+ if (mfence->fence)
+ util_dynarray_delete_unordered(&mfence->fence->mfences, struct zink_tc_fence *, mfence);
mfence->fence = NULL;
tc_unflushed_batch_token_reference(&mfence->tc_token, NULL);
+ if (mfence->sem)
+ VKSCR(DestroySemaphore)(screen->dev, mfence->sem, NULL);
FREE(mfence);
}
@@ -101,13 +111,13 @@ tc_fence_finish(struct zink_context *ctx, struct zink_tc_fence *mfence, uint64_t
/* this is a tc mfence, so we're just waiting on the queue mfence to complete
* after being signaled by the real mfence
*/
- if (*timeout_ns == PIPE_TIMEOUT_INFINITE) {
+ if (*timeout_ns == OS_TIMEOUT_INFINITE) {
util_queue_fence_wait(&mfence->ready);
} else {
if (!util_queue_fence_wait_timeout(&mfence->ready, abs_timeout))
return false;
}
- if (*timeout_ns && *timeout_ns != PIPE_TIMEOUT_INFINITE) {
+ if (*timeout_ns && *timeout_ns != OS_TIMEOUT_INFINITE) {
int64_t time_ns = os_time_get_nano();
*timeout_ns = abs_timeout > time_ns ? abs_timeout - time_ns : 0;
}
@@ -116,8 +126,8 @@ tc_fence_finish(struct zink_context *ctx, struct zink_tc_fence *mfence, uint64_t
return true;
}
-bool
-zink_vkfence_wait(struct zink_screen *screen, struct zink_fence *fence, uint64_t timeout_ns)
+static bool
+fence_wait(struct zink_screen *screen, struct zink_fence *fence, uint64_t timeout_ns)
{
if (screen->device_lost)
return true;
@@ -127,14 +137,7 @@ zink_vkfence_wait(struct zink_screen *screen, struct zink_fence *fence, uint64_t
assert(fence->batch_id);
assert(fence->submitted);
- bool success = false;
-
- VkResult ret;
- if (timeout_ns)
- ret = VKSCR(WaitForFences)(screen->dev, 1, &fence->fence, VK_TRUE, timeout_ns);
- else
- ret = VKSCR(GetFenceStatus)(screen->dev, fence->fence);
- success = zink_screen_handle_vkresult(screen, ret);
+ bool success = zink_screen_timeline_wait(screen, fence->batch_id, timeout_ns);
if (success) {
p_atomic_set(&fence->completed, true);
@@ -175,17 +178,22 @@ zink_fence_finish(struct zink_screen *screen, struct pipe_context *pctx, struct
struct zink_fence *fence = mfence->fence;
- unsigned submit_diff = zink_batch_state(mfence->fence)->submit_count - mfence->submit_count;
+ unsigned submit_diff = zink_batch_state(mfence->fence)->usage.submit_count - mfence->submit_count;
/* this batch is known to have finished because it has been submitted more than 1 time
* since the tc fence last saw it
*/
if (submit_diff > 1)
return true;
- if (fence->submitted && zink_screen_check_last_finished(screen, fence->batch_id))
+ /* - if fence is submitted, batch_id is nonzero and can be checked
+ * - if fence is not submitted here, it must be reset; batch_id will be 0 and submitted is false
+ * in either case, the fence has finished
+ */
+ if ((fence->submitted && zink_screen_check_last_finished(screen, fence->batch_id)) ||
+ (!fence->submitted && submit_diff))
return true;
- return zink_vkfence_wait(screen, fence, timeout_ns);
+ return fence_wait(screen, fence, timeout_ns);
}
static bool
@@ -196,25 +204,174 @@ fence_finish(struct pipe_screen *pscreen, struct pipe_context *pctx,
timeout_ns);
}
+static int
+fence_get_fd(struct pipe_screen *pscreen, struct pipe_fence_handle *pfence)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+ if (screen->device_lost)
+ return -1;
+
+ struct zink_tc_fence *mfence = (struct zink_tc_fence *)pfence;
+ if (!mfence->sem)
+ return -1;
+
+ const VkSemaphoreGetFdInfoKHR sgfi = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
+ .semaphore = mfence->sem,
+ .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
+ };
+ int fd = -1;
+ VkResult result = VKSCR(GetSemaphoreFdKHR)(screen->dev, &sgfi, &fd);
+ if (!zink_screen_handle_vkresult(screen, result)) {
+ mesa_loge("ZINK: vkGetSemaphoreFdKHR failed (%s)", vk_Result_to_str(result));
+ return -1;
+ }
+
+ return fd;
+}
+
+void
+zink_fence_server_signal(struct pipe_context *pctx, struct pipe_fence_handle *pfence)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_tc_fence *mfence = (struct zink_tc_fence *)pfence;
+
+ assert(!ctx->batch.state->signal_semaphore);
+ ctx->batch.state->signal_semaphore = mfence->sem;
+ ctx->batch.has_work = true;
+ struct zink_batch_state *bs = ctx->batch.state;
+ /* this must produce a synchronous flush that completes before the function returns */
+ pctx->flush(pctx, NULL, 0);
+ if (zink_screen(ctx->base.screen)->threaded_submit)
+ util_queue_fence_wait(&bs->flush_completed);
+}
+
void
zink_fence_server_sync(struct pipe_context *pctx, struct pipe_fence_handle *pfence)
{
- struct zink_tc_fence *mfence = zink_tc_fence(pfence);
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_tc_fence *mfence = (struct zink_tc_fence *)pfence;
- if (mfence->deferred_ctx == pctx)
+ if (mfence->deferred_ctx == pctx || !mfence->sem)
return;
- if (mfence->deferred_ctx) {
- zink_context(pctx)->batch.has_work = true;
- /* this must be the current batch */
- pctx->flush(pctx, NULL, 0);
+ mfence->deferred_ctx = pctx;
+ /* this will be applied on the next submit */
+ VkPipelineStageFlags flag = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+ util_dynarray_append(&ctx->batch.state->wait_semaphores, VkSemaphore, mfence->sem);
+ util_dynarray_append(&ctx->batch.state->wait_semaphore_stages, VkPipelineStageFlags, flag);
+ pipe_reference(NULL, &mfence->reference);
+ util_dynarray_append(&ctx->batch.state->fences, struct zink_tc_fence*, mfence);
+
+ /* transfer the external wait sempahore ownership to the next submit */
+ mfence->sem = VK_NULL_HANDLE;
+}
+
+void
+zink_create_fence_fd(struct pipe_context *pctx, struct pipe_fence_handle **pfence, int fd, enum pipe_fd_type type)
+{
+ struct zink_screen *screen = zink_screen(pctx->screen);
+ VkResult result;
+
+ assert(fd >= 0);
+
+ struct zink_tc_fence *mfence = zink_create_tc_fence();
+ if (!mfence)
+ goto fail_tc_fence_create;
+
+ const VkSemaphoreCreateInfo sci = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ };
+ result = VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &mfence->sem);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateSemaphore failed (%s)", vk_Result_to_str(result));
+ goto fail_sem_create;
+ }
+
+ int dup_fd = os_dupfd_cloexec(fd);
+ if (dup_fd < 0)
+ goto fail_fd_dup;
+
+ static const VkExternalSemaphoreHandleTypeFlagBits flags[] = {
+ [PIPE_FD_TYPE_NATIVE_SYNC] = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
+ [PIPE_FD_TYPE_SYNCOBJ] = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
+ };
+ assert(type < ARRAY_SIZE(flags));
+
+ const VkImportSemaphoreFdInfoKHR sdi = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
+ .semaphore = mfence->sem,
+ .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
+ .handleType = flags[type],
+ .fd = dup_fd,
+ };
+ result = VKSCR(ImportSemaphoreFdKHR)(screen->dev, &sdi);
+ if (!zink_screen_handle_vkresult(screen, result)) {
+ mesa_loge("ZINK: vkImportSemaphoreFdKHR failed (%s)", vk_Result_to_str(result));
+ goto fail_sem_import;
+ }
+
+ *pfence = (struct pipe_fence_handle *)mfence;
+ return;
+
+fail_sem_import:
+ close(dup_fd);
+fail_fd_dup:
+ VKSCR(DestroySemaphore)(screen->dev, mfence->sem, NULL);
+fail_sem_create:
+ FREE(mfence);
+fail_tc_fence_create:
+ *pfence = NULL;
+}
+
+#ifdef _WIN32
+void
+zink_create_fence_win32(struct pipe_screen *pscreen, struct pipe_fence_handle **pfence, void *handle, const void *name, enum pipe_fd_type type)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+ VkResult ret = VK_ERROR_UNKNOWN;
+ VkSemaphoreCreateInfo sci = {
+ VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ NULL,
+ 0
+ };
+ struct zink_tc_fence *mfence = zink_create_tc_fence();
+ VkExternalSemaphoreHandleTypeFlagBits flags[] = {
+ [PIPE_FD_TYPE_NATIVE_SYNC] = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT,
+ [PIPE_FD_TYPE_SYNCOBJ] = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT,
+ };
+ VkImportSemaphoreWin32HandleInfoKHR sdi = {0};
+ assert(type < ARRAY_SIZE(flags));
+
+ *pfence = NULL;
+
+ if (VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &mfence->sem) != VK_SUCCESS) {
+ FREE(mfence);
+ return;
}
- zink_fence_finish(zink_screen(pctx->screen), pctx, mfence, PIPE_TIMEOUT_INFINITE);
+
+ sdi.sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR;
+ sdi.semaphore = mfence->sem;
+ sdi.handleType = flags[type];
+ sdi.handle = handle;
+ sdi.name = (LPCWSTR)name;
+ ret = VKSCR(ImportSemaphoreWin32HandleKHR)(screen->dev, &sdi);
+
+ if (!zink_screen_handle_vkresult(screen, ret))
+ goto fail;
+ *pfence = (struct pipe_fence_handle *)mfence;
+ return;
+
+fail:
+ VKSCR(DestroySemaphore)(screen->dev, mfence->sem, NULL);
+ FREE(mfence);
}
+#endif
void
zink_screen_fence_init(struct pipe_screen *pscreen)
{
pscreen->fence_reference = fence_reference;
pscreen->fence_finish = fence_finish;
+ pscreen->fence_get_fd = fence_get_fd;
}
diff --git a/src/gallium/drivers/zink/zink_fence.h b/src/gallium/drivers/zink/zink_fence.h
index e900a4c69c0..22faa2e6de7 100644
--- a/src/gallium/drivers/zink/zink_fence.h
+++ b/src/gallium/drivers/zink/zink_fence.h
@@ -24,36 +24,7 @@
#ifndef ZINK_FENCE_H
#define ZINK_FENCE_H
-#include "util/simple_mtx.h"
-#include "util/u_inlines.h"
-#include "util/u_queue.h"
-
-#include <vulkan/vulkan.h>
-
-struct pipe_context;
-struct pipe_screen;
-struct zink_batch;
-struct zink_batch_state;
-struct zink_context;
-struct zink_screen;
-
-struct tc_unflushed_batch_token;
-
-struct zink_tc_fence {
- struct pipe_reference reference;
- uint32_t submit_count;
- struct util_queue_fence ready;
- struct tc_unflushed_batch_token *tc_token;
- struct pipe_context *deferred_ctx;
- struct zink_fence *fence;
-};
-
-struct zink_fence {
- VkFence fence;
- uint32_t batch_id;
- bool submitted;
- bool completed;
-};
+#include "zink_types.h"
static inline struct zink_fence *
zink_fence(void *pfence)
@@ -79,14 +50,19 @@ zink_fence_reference(struct zink_screen *screen,
struct zink_tc_fence *fence);
void
+zink_create_fence_fd(struct pipe_context *pctx, struct pipe_fence_handle **pfence, int fd, enum pipe_fd_type type);
+#if defined(_WIN32)
+void
+zink_create_fence_win32(struct pipe_screen *screen, struct pipe_fence_handle **pfence, void *handle, const void *name, enum pipe_fd_type type);
+#endif
+void
+zink_fence_server_signal(struct pipe_context *pctx, struct pipe_fence_handle *pfence);
+void
zink_fence_server_sync(struct pipe_context *pctx, struct pipe_fence_handle *pfence);
void
zink_screen_fence_init(struct pipe_screen *pscreen);
-bool
-zink_vkfence_wait(struct zink_screen *screen, struct zink_fence *fence, uint64_t timeout_ns);
-
void
zink_fence_clear_resources(struct zink_screen *screen, struct zink_fence *fence);
#endif
diff --git a/src/gallium/drivers/zink/zink_format.c b/src/gallium/drivers/zink/zink_format.c
index 358f409922e..cf36909d6ca 100644
--- a/src/gallium/drivers/zink/zink_format.c
+++ b/src/gallium/drivers/zink/zink_format.c
@@ -1,152 +1,6 @@
#include "util/format/u_format.h"
#include "zink_format.h"
-
-static const VkFormat formats[PIPE_FORMAT_COUNT] = {
-#define MAP_FORMAT_NORM(FMT) \
- [PIPE_FORMAT_ ## FMT ## _UNORM] = VK_FORMAT_ ## FMT ## _UNORM, \
- [PIPE_FORMAT_ ## FMT ## _SNORM] = VK_FORMAT_ ## FMT ## _SNORM,
-
-#define MAP_FORMAT_SCALED(FMT) \
- [PIPE_FORMAT_ ## FMT ## _USCALED] = VK_FORMAT_ ## FMT ## _USCALED, \
- [PIPE_FORMAT_ ## FMT ## _SSCALED] = VK_FORMAT_ ## FMT ## _SSCALED,
-
-#define MAP_FORMAT_INT(FMT) \
- [PIPE_FORMAT_ ## FMT ## _UINT] = VK_FORMAT_ ## FMT ## _UINT, \
- [PIPE_FORMAT_ ## FMT ## _SINT] = VK_FORMAT_ ## FMT ## _SINT,
-
-#define MAP_FORMAT_SRGB(FMT) \
- [PIPE_FORMAT_ ## FMT ## _SRGB] = VK_FORMAT_ ## FMT ## _SRGB,
-
-#define MAP_FORMAT_FLOAT(FMT) \
- [PIPE_FORMAT_ ## FMT ## _FLOAT] = VK_FORMAT_ ## FMT ## _SFLOAT,
-
- // one component
-
- // 8-bits
- MAP_FORMAT_NORM(R8)
- MAP_FORMAT_SCALED(R8)
- MAP_FORMAT_INT(R8)
- MAP_FORMAT_SRGB(R8)
- // 16-bits
- MAP_FORMAT_NORM(R16)
- MAP_FORMAT_SCALED(R16)
- MAP_FORMAT_INT(R16)
- MAP_FORMAT_FLOAT(R16)
- // 32-bits
- MAP_FORMAT_INT(R32)
- MAP_FORMAT_FLOAT(R32)
-
- // two components
-
- // 8-bits
- MAP_FORMAT_NORM(R8G8)
- MAP_FORMAT_SCALED(R8G8)
- MAP_FORMAT_INT(R8G8)
- MAP_FORMAT_SRGB(R8G8)
- // 16-bits
- MAP_FORMAT_NORM(R16G16)
- MAP_FORMAT_SCALED(R16G16)
- MAP_FORMAT_INT(R16G16)
- MAP_FORMAT_FLOAT(R16G16)
- // 32-bits
- MAP_FORMAT_INT(R32G32)
- MAP_FORMAT_FLOAT(R32G32)
-
- // three components
-
- // 8-bits
- MAP_FORMAT_NORM(R8G8B8)
- MAP_FORMAT_SCALED(R8G8B8)
- MAP_FORMAT_INT(R8G8B8)
- MAP_FORMAT_SRGB(R8G8B8)
- MAP_FORMAT_NORM(B8G8R8)
- MAP_FORMAT_SCALED(B8G8R8)
- MAP_FORMAT_INT(B8G8R8)
- MAP_FORMAT_SRGB(B8G8R8)
- // 16-bits
- MAP_FORMAT_NORM(R16G16B16)
- MAP_FORMAT_SCALED(R16G16B16)
- MAP_FORMAT_INT(R16G16B16)
- MAP_FORMAT_FLOAT(R16G16B16)
- // 32-bits
- MAP_FORMAT_INT(R32G32B32)
- MAP_FORMAT_FLOAT(R32G32B32)
-
- // four components
-
- // 8-bits
- MAP_FORMAT_NORM(R8G8B8A8)
- MAP_FORMAT_SCALED(R8G8B8A8)
- MAP_FORMAT_INT(R8G8B8A8)
- MAP_FORMAT_NORM(B8G8R8A8)
- MAP_FORMAT_SCALED(B8G8R8A8)
- MAP_FORMAT_INT(B8G8R8A8)
- MAP_FORMAT_SRGB(B8G8R8A8)
- [PIPE_FORMAT_RGBA8888_SRGB] = VK_FORMAT_A8B8G8R8_SRGB_PACK32,
- // 16-bits
- MAP_FORMAT_NORM(R16G16B16A16)
- MAP_FORMAT_SCALED(R16G16B16A16)
- MAP_FORMAT_INT(R16G16B16A16)
- MAP_FORMAT_FLOAT(R16G16B16A16)
- // 32-bits
- MAP_FORMAT_INT(R32G32B32A32)
- MAP_FORMAT_FLOAT(R32G32B32A32)
-
- // other color formats
- [PIPE_FORMAT_A4B4G4R4_UNORM] = VK_FORMAT_R4G4B4A4_UNORM_PACK16,
- [PIPE_FORMAT_A4R4G4B4_UNORM] = VK_FORMAT_B4G4R4A4_UNORM_PACK16,
- [PIPE_FORMAT_B4G4R4A4_UNORM] = VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT,
- [PIPE_FORMAT_R4G4B4A4_UNORM] = VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT,
- [PIPE_FORMAT_B5G6R5_UNORM] = VK_FORMAT_R5G6B5_UNORM_PACK16,
- [PIPE_FORMAT_R5G6B5_UNORM] = VK_FORMAT_B5G6R5_UNORM_PACK16,
-
- [PIPE_FORMAT_A1B5G5R5_UNORM] = VK_FORMAT_R5G5B5A1_UNORM_PACK16,
- [PIPE_FORMAT_A1R5G5B5_UNORM] = VK_FORMAT_B5G5R5A1_UNORM_PACK16,
- [PIPE_FORMAT_B5G5R5A1_UNORM] = VK_FORMAT_A1R5G5B5_UNORM_PACK16,
-
- [PIPE_FORMAT_R11G11B10_FLOAT] = VK_FORMAT_B10G11R11_UFLOAT_PACK32,
- [PIPE_FORMAT_R9G9B9E5_FLOAT] = VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
- /* ARB_vertex_type_2_10_10_10 */
- [PIPE_FORMAT_R10G10B10A2_UNORM] = VK_FORMAT_A2B10G10R10_UNORM_PACK32,
- [PIPE_FORMAT_R10G10B10A2_SNORM] = VK_FORMAT_A2B10G10R10_SNORM_PACK32,
- [PIPE_FORMAT_B10G10R10A2_UNORM] = VK_FORMAT_A2R10G10B10_UNORM_PACK32,
- [PIPE_FORMAT_B10G10R10A2_SNORM] = VK_FORMAT_A2R10G10B10_SNORM_PACK32,
- [PIPE_FORMAT_R10G10B10A2_USCALED] = VK_FORMAT_A2B10G10R10_USCALED_PACK32,
- [PIPE_FORMAT_R10G10B10A2_SSCALED] = VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
- [PIPE_FORMAT_B10G10R10A2_USCALED] = VK_FORMAT_A2R10G10B10_USCALED_PACK32,
- [PIPE_FORMAT_B10G10R10A2_SSCALED] = VK_FORMAT_A2R10G10B10_SSCALED_PACK32,
- [PIPE_FORMAT_R10G10B10A2_UINT] = VK_FORMAT_A2B10G10R10_UINT_PACK32,
- [PIPE_FORMAT_B10G10R10A2_UINT] = VK_FORMAT_A2R10G10B10_UINT_PACK32,
- [PIPE_FORMAT_B10G10R10A2_SINT] = VK_FORMAT_A2R10G10B10_SINT_PACK32,
-
- // depth/stencil formats
- [PIPE_FORMAT_Z32_FLOAT] = VK_FORMAT_D32_SFLOAT,
- [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = VK_FORMAT_D32_SFLOAT_S8_UINT,
- [PIPE_FORMAT_Z16_UNORM] = VK_FORMAT_D16_UNORM,
- [PIPE_FORMAT_Z16_UNORM_S8_UINT] = VK_FORMAT_D16_UNORM_S8_UINT,
- [PIPE_FORMAT_Z24X8_UNORM] = VK_FORMAT_X8_D24_UNORM_PACK32,
- [PIPE_FORMAT_Z24_UNORM_S8_UINT] = VK_FORMAT_D24_UNORM_S8_UINT,
- [PIPE_FORMAT_S8_UINT] = VK_FORMAT_S8_UINT,
-
- // compressed formats
- [PIPE_FORMAT_DXT1_RGB] = VK_FORMAT_BC1_RGB_UNORM_BLOCK,
- [PIPE_FORMAT_DXT1_RGBA] = VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
- [PIPE_FORMAT_DXT3_RGBA] = VK_FORMAT_BC2_UNORM_BLOCK,
- [PIPE_FORMAT_DXT5_RGBA] = VK_FORMAT_BC3_UNORM_BLOCK,
- [PIPE_FORMAT_DXT1_SRGB] = VK_FORMAT_BC1_RGB_SRGB_BLOCK,
- [PIPE_FORMAT_DXT1_SRGBA] = VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
- [PIPE_FORMAT_DXT3_SRGBA] = VK_FORMAT_BC2_SRGB_BLOCK,
- [PIPE_FORMAT_DXT5_SRGBA] = VK_FORMAT_BC3_SRGB_BLOCK,
-
- [PIPE_FORMAT_RGTC1_UNORM] = VK_FORMAT_BC4_UNORM_BLOCK,
- [PIPE_FORMAT_RGTC1_SNORM] = VK_FORMAT_BC4_SNORM_BLOCK,
- [PIPE_FORMAT_RGTC2_UNORM] = VK_FORMAT_BC5_UNORM_BLOCK,
- [PIPE_FORMAT_RGTC2_SNORM] = VK_FORMAT_BC5_SNORM_BLOCK,
- [PIPE_FORMAT_BPTC_RGBA_UNORM] = VK_FORMAT_BC7_UNORM_BLOCK,
- [PIPE_FORMAT_BPTC_SRGBA] = VK_FORMAT_BC7_SRGB_BLOCK,
- [PIPE_FORMAT_BPTC_RGB_FLOAT] = VK_FORMAT_BC6H_SFLOAT_BLOCK,
- [PIPE_FORMAT_BPTC_RGB_UFLOAT] = VK_FORMAT_BC6H_UFLOAT_BLOCK,
-};
+#include "util/u_math.h"
enum pipe_format
zink_decompose_vertex_format(enum pipe_format format)
@@ -195,12 +49,121 @@ zink_decompose_vertex_format(enum pipe_format format)
return new_format;
}
-VkFormat
-zink_pipe_format_to_vk_format(enum pipe_format format)
+bool
+zink_format_is_red_alpha(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_R4A4_UNORM:
+ case PIPE_FORMAT_R8A8_SINT:
+ case PIPE_FORMAT_R8A8_SNORM:
+ case PIPE_FORMAT_R8A8_UINT:
+ case PIPE_FORMAT_R8A8_UNORM:
+ case PIPE_FORMAT_R16A16_SINT:
+ case PIPE_FORMAT_R16A16_SNORM:
+ case PIPE_FORMAT_R16A16_UINT:
+ case PIPE_FORMAT_R16A16_UNORM:
+ case PIPE_FORMAT_R16A16_FLOAT:
+ case PIPE_FORMAT_R32A32_SINT:
+ case PIPE_FORMAT_R32A32_UINT:
+ case PIPE_FORMAT_R32A32_FLOAT:
+ return true;
+ default: break;
+ }
+ return false;
+}
+
+bool
+zink_format_is_emulated_alpha(enum pipe_format format)
+{
+ return util_format_is_alpha(format) ||
+ util_format_is_luminance(format) ||
+ util_format_is_luminance_alpha(format) ||
+ zink_format_is_red_alpha(format);
+}
+
+static enum pipe_format
+emulate_alpha(enum pipe_format format)
{
- return formats[format];
+ if (format == PIPE_FORMAT_A8_UNORM)
+ return PIPE_FORMAT_R8_UNORM;
+ if (format == PIPE_FORMAT_A8_UINT)
+ return PIPE_FORMAT_R8_UINT;
+ if (format == PIPE_FORMAT_A8_SNORM)
+ return PIPE_FORMAT_R8_SNORM;
+ if (format == PIPE_FORMAT_A8_SINT)
+ return PIPE_FORMAT_R8_SINT;
+ if (format == PIPE_FORMAT_A16_UNORM)
+ return PIPE_FORMAT_R16_UNORM;
+ if (format == PIPE_FORMAT_A16_UINT)
+ return PIPE_FORMAT_R16_UINT;
+ if (format == PIPE_FORMAT_A16_SNORM)
+ return PIPE_FORMAT_R16_SNORM;
+ if (format == PIPE_FORMAT_A16_SINT)
+ return PIPE_FORMAT_R16_SINT;
+ if (format == PIPE_FORMAT_A16_FLOAT)
+ return PIPE_FORMAT_R16_FLOAT;
+ if (format == PIPE_FORMAT_A32_UINT)
+ return PIPE_FORMAT_R32_UINT;
+ if (format == PIPE_FORMAT_A32_SINT)
+ return PIPE_FORMAT_R32_SINT;
+ if (format == PIPE_FORMAT_A32_FLOAT)
+ return PIPE_FORMAT_R32_FLOAT;
+ return format;
}
+static enum pipe_format
+emulate_red_alpha(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_R8A8_SINT:
+ return PIPE_FORMAT_R8G8_SINT;
+ case PIPE_FORMAT_R8A8_SNORM:
+ return PIPE_FORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_R8A8_UINT:
+ return PIPE_FORMAT_R8G8_UINT;
+ case PIPE_FORMAT_R8A8_UNORM:
+ return PIPE_FORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_R16A16_SINT:
+ return PIPE_FORMAT_R16G16_SINT;
+ case PIPE_FORMAT_R16A16_SNORM:
+ return PIPE_FORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_R16A16_UINT:
+ return PIPE_FORMAT_R16G16_UINT;
+ case PIPE_FORMAT_R16A16_UNORM:
+ return PIPE_FORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_R16A16_FLOAT:
+ return PIPE_FORMAT_R16G16_FLOAT;
+ case PIPE_FORMAT_R32A32_SINT:
+ return PIPE_FORMAT_R32G32_SINT;
+ case PIPE_FORMAT_R32A32_UINT:
+ return PIPE_FORMAT_R32G32_UINT;
+ case PIPE_FORMAT_R32A32_FLOAT:
+ return PIPE_FORMAT_R32G32_FLOAT;
+ default: break;
+ }
+ return format;
+}
+
+enum pipe_format
+zink_format_get_emulated_alpha(enum pipe_format format)
+{
+ if (util_format_is_alpha(format))
+ return emulate_alpha(format);
+ if (util_format_is_luminance(format))
+ return util_format_luminance_to_red(format);
+ if (util_format_is_luminance_alpha(format)) {
+ if (util_format_is_srgb(format))
+ return format;
+ if (format == PIPE_FORMAT_LATC2_UNORM)
+ return PIPE_FORMAT_RGTC2_UNORM;
+ if (format == PIPE_FORMAT_LATC2_SNORM)
+ return PIPE_FORMAT_RGTC2_SNORM;
+
+ format = util_format_luminance_to_red(format);
+ }
+
+ return emulate_red_alpha(format);
+}
bool
zink_format_is_voidable_rgba_variant(enum pipe_format format)
@@ -210,7 +173,8 @@ zink_format_is_voidable_rgba_variant(enum pipe_format format)
if(desc->block.width != 1 ||
desc->block.height != 1 ||
- (desc->block.bits != 32 && desc->block.bits != 64))
+ (desc->block.bits != 32 && desc->block.bits != 64 &&
+ desc->block.bits != 128))
return false;
if (desc->nr_channels != 4)
@@ -224,3 +188,71 @@ zink_format_is_voidable_rgba_variant(enum pipe_format format)
return true;
}
+
+void
+zink_format_clamp_channel_color(const struct util_format_description *desc, union pipe_color_union *dst, const union pipe_color_union *src, unsigned i)
+{
+ int non_void = util_format_get_first_non_void_channel(desc->format);
+ unsigned channel = desc->swizzle[i];
+
+ if (channel > PIPE_SWIZZLE_W || desc->channel[channel].type == UTIL_FORMAT_TYPE_VOID) {
+ if (non_void != -1) {
+ if (desc->channel[non_void].type == UTIL_FORMAT_TYPE_FLOAT) {
+ dst->f[i] = uif(UINT32_MAX);
+ } else {
+ if (desc->channel[non_void].normalized)
+ dst->f[i] = 1.0;
+ else if (desc->channel[non_void].type == UTIL_FORMAT_TYPE_SIGNED)
+ dst->i[i] = INT32_MAX;
+ else
+ dst->ui[i] = UINT32_MAX;
+ }
+ } else {
+ dst->ui[i] = src->ui[i];
+ }
+ return;
+ }
+
+ switch (desc->channel[channel].type) {
+ case UTIL_FORMAT_TYPE_VOID:
+ unreachable("handled above");
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if (desc->channel[channel].normalized)
+ dst->i[i] = src->i[i];
+ else {
+ dst->i[i] = MAX2(src->i[i], -(1<<(desc->channel[channel].size - 1)));
+ dst->i[i] = MIN2(dst->i[i], (1 << (desc->channel[channel].size - 1)) - 1);
+ }
+ break;
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if (desc->channel[channel].normalized)
+ dst->ui[i] = src->ui[i];
+ else
+ dst->ui[i] = MIN2(src->ui[i], BITFIELD_MASK(desc->channel[channel].size));
+ break;
+ case UTIL_FORMAT_TYPE_FIXED:
+ case UTIL_FORMAT_TYPE_FLOAT:
+ dst->ui[i] = src->ui[i];
+ break;
+ }
+}
+
+void
+zink_format_clamp_channel_srgb(const struct util_format_description *desc, union pipe_color_union *dst, const union pipe_color_union *src, unsigned i)
+{
+ unsigned channel = desc->swizzle[i];
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB &&
+ channel <= PIPE_SWIZZLE_W) {
+ switch (desc->channel[channel].type) {
+ case UTIL_FORMAT_TYPE_SIGNED:
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ dst->f[i] = CLAMP(src->f[i], 0.0, 1.0);
+ return;
+ default:
+ break;
+ }
+ }
+
+ dst->ui[i] = src->ui[i];
+}
diff --git a/src/gallium/drivers/zink/zink_format.h b/src/gallium/drivers/zink/zink_format.h
index 3324265177d..171c79360ad 100644
--- a/src/gallium/drivers/zink/zink_format.h
+++ b/src/gallium/drivers/zink/zink_format.h
@@ -24,17 +24,39 @@
#ifndef ZINK_FORMAT_H
#define ZINK_FORMAT_H
-#include "pipe/p_format.h"
+#include "util/format/u_formats.h"
+#include "util/format/u_format.h"
#include <stdbool.h>
-#include <vulkan/vulkan.h>
+#include <vulkan/vulkan_core.h>
+
+union pipe_color_union;
enum pipe_format
zink_decompose_vertex_format(enum pipe_format format);
-VkFormat
-zink_pipe_format_to_vk_format(enum pipe_format format);
-
bool
zink_format_is_voidable_rgba_variant(enum pipe_format format);
+bool
+zink_format_is_red_alpha(enum pipe_format format);
+bool
+zink_format_is_emulated_alpha(enum pipe_format format);
+enum pipe_format
+zink_format_get_emulated_alpha(enum pipe_format format);
+void
+zink_format_clamp_channel_color(const struct util_format_description *desc, union pipe_color_union *dst, const union pipe_color_union *src, unsigned i);
+void
+zink_format_clamp_channel_srgb(const struct util_format_description *desc, union pipe_color_union *dst, const union pipe_color_union *src, unsigned i);
+
+static inline bool
+zink_format_needs_mutable(enum pipe_format a, enum pipe_format b)
+{
+ if (a == b)
+ return false;
+ if (util_format_is_srgb(a))
+ return util_format_linear(a) != b;
+ if (util_format_is_srgb(b))
+ return util_format_linear(b) != a;
+ return true;
+}
#endif
diff --git a/src/gallium/drivers/zink/zink_format_test.c b/src/gallium/drivers/zink/zink_format_test.c
index 502a2cbb661..3ff587ba346 100644
--- a/src/gallium/drivers/zink/zink_format_test.c
+++ b/src/gallium/drivers/zink/zink_format_test.c
@@ -7,13 +7,18 @@ main(int argc, char *argv[])
int ret = 0;
for (int i = 0; i < PIPE_FORMAT_COUNT; ++i) {
enum pipe_format pipe_fmt = i;
- VkFormat vk_fmt = zink_pipe_format_to_vk_format(i);
+ VkFormat vk_fmt = vk_format_from_pipe_format(i);
/* skip unsupported formats */
if (vk_fmt == VK_FORMAT_UNDEFINED)
continue;
enum pipe_format roundtrip = vk_format_to_pipe_format(vk_fmt);
+
+ /* This one gets aliased to ETC2 rather than round tripping. */
+ if (pipe_fmt == PIPE_FORMAT_ETC1_RGB8 && roundtrip == PIPE_FORMAT_ETC2_RGB8)
+ continue;
+
if (roundtrip != pipe_fmt) {
fprintf(stderr, "Format does not roundtrip\n"
"\tgot: %s\n"
diff --git a/src/gallium/drivers/zink/zink_framebuffer.c b/src/gallium/drivers/zink/zink_framebuffer.c
index ef785c55319..991bd427bcc 100644
--- a/src/gallium/drivers/zink/zink_framebuffer.c
+++ b/src/gallium/drivers/zink/zink_framebuffer.c
@@ -37,7 +37,7 @@ zink_destroy_framebuffer(struct zink_screen *screen,
struct zink_framebuffer *fb)
{
hash_table_foreach(&fb->objects, he) {
-#if defined(_WIN64) || defined(__x86_64__)
+#if VK_USE_64_BIT_PTR_DEFINES
VKSCR(DestroyFramebuffer)(screen->dev, he->data, NULL);
#else
VkFramebuffer *ptr = he->data;
@@ -49,7 +49,7 @@ zink_destroy_framebuffer(struct zink_screen *screen,
}
void
-zink_init_framebuffer_imageless(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp)
+zink_init_framebuffer(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp)
{
VkFramebuffer ret;
@@ -60,7 +60,7 @@ zink_init_framebuffer_imageless(struct zink_screen *screen, struct zink_framebuf
struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&fb->objects, hash, rp);
if (he) {
-#if defined(_WIN64) || defined(__x86_64__)
+#if VK_USE_64_BIT_PTR_DEFINES
ret = (VkFramebuffer)he->data;
#else
VkFramebuffer *ptr = he->data;
@@ -69,6 +69,8 @@ zink_init_framebuffer_imageless(struct zink_screen *screen, struct zink_framebuf
goto out;
}
+ assert(rp->state.num_cbufs + rp->state.have_zsbuf + rp->state.num_cresolves + rp->state.num_zsresolves == fb->state.num_attachments);
+
VkFramebufferCreateInfo fci;
fci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
fci.flags = VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
@@ -88,7 +90,7 @@ zink_init_framebuffer_imageless(struct zink_screen *screen, struct zink_framebuf
if (VKSCR(CreateFramebuffer)(screen->dev, &fci, NULL, &ret) != VK_SUCCESS)
return;
-#if defined(_WIN64) || defined(__x86_64__)
+#if VK_USE_64_BIT_PTR_DEFINES
_mesa_hash_table_insert_pre_hashed(&fb->objects, hash, rp, ret);
#else
VkFramebuffer *ptr = ralloc(fb, VkFramebuffer);
@@ -110,8 +112,8 @@ populate_attachment_info(VkFramebufferAttachmentImageInfo *att, struct zink_surf
att->sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_ATTACHMENT_IMAGE_INFO;
att->pNext = NULL;
memcpy(&att->flags, &info->flags, offsetof(struct zink_surface_info, format));
- att->viewFormatCount = 1;
- att->pViewFormats = &info->format;
+ att->viewFormatCount = 1 + !!info->format[1];
+ att->pViewFormats = info->format;
}
static struct zink_framebuffer *
@@ -136,30 +138,53 @@ fail:
}
struct zink_framebuffer *
-zink_get_framebuffer_imageless(struct zink_context *ctx)
+zink_get_framebuffer(struct zink_context *ctx)
{
assert(zink_screen(ctx->base.screen)->info.have_KHR_imageless_framebuffer);
+ bool have_zsbuf = ctx->fb_state.zsbuf && zink_is_zsbuf_used(ctx);
struct zink_framebuffer_state state;
+ state.num_attachments = ctx->fb_state.nr_cbufs;
+
+ const unsigned cresolve_offset = ctx->fb_state.nr_cbufs + !!have_zsbuf;
+ unsigned num_resolves = 0;
for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
struct pipe_surface *psurf = ctx->fb_state.cbufs[i];
- if (!psurf)
- psurf = ctx->dummy_surface[util_logbase2_ceil(ctx->gfx_pipeline_state.rast_samples+1)];
+ if (!psurf) {
+ psurf = zink_get_dummy_pipe_surface(ctx, util_logbase2_ceil(ctx->gfx_pipeline_state.rast_samples+1));
+ }
struct zink_surface *surface = zink_csurface(psurf);
- memcpy(&state.infos[i], &surface->info, sizeof(surface->info));
+ struct zink_surface *transient = zink_transient_surface(psurf);
+ if (transient) {
+ memcpy(&state.infos[i], &transient->info, sizeof(transient->info));
+ memcpy(&state.infos[cresolve_offset + i], &surface->info, sizeof(surface->info));
+ num_resolves++;
+ } else {
+ memcpy(&state.infos[i], &surface->info, sizeof(surface->info));
+ }
}
- state.num_attachments = ctx->fb_state.nr_cbufs;
- if (ctx->fb_state.zsbuf) {
+ const unsigned zsresolve_offset = cresolve_offset + num_resolves;
+ if (have_zsbuf) {
struct pipe_surface *psurf = ctx->fb_state.zsbuf;
struct zink_surface *surface = zink_csurface(psurf);
- memcpy(&state.infos[state.num_attachments], &surface->info, sizeof(surface->info));
+ struct zink_surface *transient = zink_transient_surface(psurf);
+ if (transient) {
+ memcpy(&state.infos[state.num_attachments], &transient->info, sizeof(transient->info));
+ memcpy(&state.infos[zsresolve_offset], &surface->info, sizeof(surface->info));
+ num_resolves++;
+ } else {
+ memcpy(&state.infos[state.num_attachments], &surface->info, sizeof(surface->info));
+ }
state.num_attachments++;
}
+ /* avoid bitfield explosion */
+ assert(state.num_attachments + num_resolves < 16);
+ state.num_attachments += num_resolves;
state.width = MAX2(ctx->fb_state.width, 1);
state.height = MAX2(ctx->fb_state.height, 1);
- state.layers = MAX2(util_framebuffer_get_num_layers(&ctx->fb_state), 1) - 1;
+ state.layers = MAX2(zink_framebuffer_get_num_layers(&ctx->fb_state), 1) - 1;
state.samples = ctx->fb_state.samples - 1;
struct zink_framebuffer *fb;
@@ -174,140 +199,42 @@ zink_get_framebuffer_imageless(struct zink_context *ctx)
}
void
-zink_init_framebuffer(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp)
-{
- VkFramebuffer ret;
-
- if (fb->rp == rp)
- return;
-
- uint32_t hash = _mesa_hash_pointer(rp);
-
- struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&fb->objects, hash, rp);
- if (he) {
-#if defined(_WIN64) || defined(__x86_64__)
- ret = (VkFramebuffer)he->data;
-#else
- VkFramebuffer *ptr = he->data;
- ret = *ptr;
-#endif
- goto out;
- }
-
- VkFramebufferCreateInfo fci = {0};
- fci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
- fci.renderPass = rp->render_pass;
- fci.attachmentCount = fb->state.num_attachments;
- fci.pAttachments = fb->state.attachments;
- fci.width = fb->state.width;
- fci.height = fb->state.height;
- fci.layers = fb->state.layers + 1;
-
- if (VKSCR(CreateFramebuffer)(screen->dev, &fci, NULL, &ret) != VK_SUCCESS)
- return;
-#if defined(_WIN64) || defined(__x86_64__)
- _mesa_hash_table_insert_pre_hashed(&fb->objects, hash, rp, ret);
-#else
- VkFramebuffer *ptr = ralloc(fb, VkFramebuffer);
- if (!ptr) {
- VKSCR(DestroyFramebuffer)(screen->dev, ret, NULL);
- return;
- }
- *ptr = ret;
- _mesa_hash_table_insert_pre_hashed(&fb->objects, hash, rp, ptr);
-#endif
-out:
- fb->rp = rp;
- fb->fb = ret;
-}
-
-static struct zink_framebuffer *
-create_framebuffer(struct zink_context *ctx,
- struct zink_framebuffer_state *state,
- struct pipe_surface **attachments)
+debug_describe_zink_framebuffer(char* buf, const struct zink_framebuffer *ptr)
{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- struct zink_framebuffer *fb = rzalloc(NULL, struct zink_framebuffer);
- if (!fb)
- return NULL;
-
- unsigned num_attachments = 0;
- for (int i = 0; i < state->num_attachments; i++) {
- struct zink_surface *surf;
- if (state->attachments[i]) {
- surf = zink_csurface(attachments[i]);
- /* no ref! */
- fb->surfaces[i] = attachments[i];
- num_attachments++;
- util_dynarray_append(&surf->framebuffer_refs, struct zink_framebuffer*, fb);
- } else {
- surf = zink_csurface(ctx->dummy_surface[util_logbase2_ceil(state->samples+1)]);
- state->attachments[i] = surf->image_view;
- }
- }
- pipe_reference_init(&fb->reference, 1 + num_attachments);
-
- if (!_mesa_hash_table_init(&fb->objects, fb, _mesa_hash_pointer, _mesa_key_pointer_equal))
- goto fail;
- memcpy(&fb->state, state, sizeof(struct zink_framebuffer_state));
-
- return fb;
-fail:
- zink_destroy_framebuffer(screen, fb);
- return NULL;
+ sprintf(buf, "zink_framebuffer");
}
void
-debug_describe_zink_framebuffer(char* buf, const struct zink_framebuffer *ptr)
+zink_update_framebuffer_state(struct zink_context *ctx)
{
- sprintf(buf, "zink_framebuffer");
+ /* get_framebuffer adds a ref if the fb is reused or created;
+ * always do get_framebuffer first to avoid deleting the same fb
+ * we're about to use
+ */
+ struct zink_framebuffer *fb = zink_get_framebuffer(ctx);
+ ctx->fb_changed |= ctx->framebuffer != fb;
+ ctx->framebuffer = fb;
}
-struct zink_framebuffer *
-zink_get_framebuffer(struct zink_context *ctx)
+/* same as u_framebuffer_get_num_layers, but clamp to lowest layer count */
+unsigned
+zink_framebuffer_get_num_layers(const struct pipe_framebuffer_state *fb)
{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
-
- assert(!screen->info.have_KHR_imageless_framebuffer);
-
- struct pipe_surface *attachments[PIPE_MAX_COLOR_BUFS + 1] = {0};
-
- struct zink_framebuffer_state state = {0};
- for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
- struct pipe_surface *psurf = ctx->fb_state.cbufs[i];
- state.attachments[i] = psurf ? zink_csurface(psurf)->image_view : VK_NULL_HANDLE;
- attachments[i] = psurf;
- }
-
- state.num_attachments = ctx->fb_state.nr_cbufs;
- if (ctx->fb_state.zsbuf) {
- struct pipe_surface *psurf = ctx->fb_state.zsbuf;
- state.attachments[state.num_attachments] = psurf ? zink_csurface(psurf)->image_view : VK_NULL_HANDLE;
- attachments[state.num_attachments++] = psurf;
+ unsigned i, num_layers = UINT32_MAX;
+ if (!(fb->nr_cbufs || fb->zsbuf))
+ return MAX2(fb->layers, 1);
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ if (fb->cbufs[i]) {
+ unsigned num = fb->cbufs[i]->u.tex.last_layer -
+ fb->cbufs[i]->u.tex.first_layer + 1;
+ num_layers = MIN2(num_layers, num);
+ }
}
-
- state.width = MAX2(ctx->fb_state.width, 1);
- state.height = MAX2(ctx->fb_state.height, 1);
- state.layers = MAX2(util_framebuffer_get_num_layers(&ctx->fb_state), 1) - 1;
- state.samples = ctx->fb_state.samples - 1;
-
- struct zink_framebuffer *fb;
- simple_mtx_lock(&screen->framebuffer_mtx);
- struct hash_entry *entry = _mesa_hash_table_search(&screen->framebuffer_cache, &state);
- if (entry) {
- fb = (void*)entry->data;
- struct zink_framebuffer *fb_ref = NULL;
- /* this gains 1 ref every time we reuse it */
- zink_framebuffer_reference(screen, &fb_ref, fb);
- } else {
- /* this adds 1 extra ref on creation because all newly-created framebuffers are
- * going to be bound; necessary to handle framebuffers which have no "real" attachments
- * and are only using null surfaces since the only ref they get is the extra one here
- */
- fb = create_framebuffer(ctx, &state, attachments);
- _mesa_hash_table_insert(&screen->framebuffer_cache, &fb->state, fb);
+ if (fb->zsbuf) {
+ unsigned num = fb->zsbuf->u.tex.last_layer -
+ fb->zsbuf->u.tex.first_layer + 1;
+ num_layers = MIN2(num_layers, num);
}
- simple_mtx_unlock(&screen->framebuffer_mtx);
-
- return fb;
+ return MAX2(num_layers, 1);
}
diff --git a/src/gallium/drivers/zink/zink_framebuffer.h b/src/gallium/drivers/zink/zink_framebuffer.h
index 4fb8bf67b90..246fb486df7 100644
--- a/src/gallium/drivers/zink/zink_framebuffer.h
+++ b/src/gallium/drivers/zink/zink_framebuffer.h
@@ -24,47 +24,10 @@
#ifndef ZINK_FRAMEBUFFER_H
#define ZINK_FRAMEBUFFER_H
-#include "pipe/p_state.h"
-#include <vulkan/vulkan.h>
-
-#include "util/hash_table.h"
-#include "util/u_inlines.h"
-
-struct zink_context;
-struct zink_screen;
-struct zink_render_pass;
-
-struct zink_framebuffer_state {
- uint32_t width;
- uint16_t height;
- uint32_t layers:6;
- uint32_t samples:6;
- uint32_t num_attachments:4;
- union {
- VkImageView attachments[PIPE_MAX_COLOR_BUFS + 1];
- struct zink_surface_info infos[PIPE_MAX_COLOR_BUFS + 1];
- };
-};
-
-struct zink_framebuffer {
- struct pipe_reference reference;
-
- /* current objects */
- VkFramebuffer fb;
- struct zink_render_pass *rp;
-
- struct zink_framebuffer_state state;
- union {
- struct pipe_surface *surfaces[PIPE_MAX_COLOR_BUFS + 1];
- VkFramebufferAttachmentImageInfo infos[PIPE_MAX_COLOR_BUFS + 1];
- };
- struct hash_table objects;
-};
+#include "zink_types.h"
void
zink_init_framebuffer(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp);
-void
-zink_init_framebuffer_imageless(struct zink_screen *screen, struct zink_framebuffer *fb, struct zink_render_pass *rp);
void
zink_destroy_framebuffer(struct zink_screen *screen,
@@ -91,8 +54,10 @@ zink_framebuffer_reference(struct zink_screen *screen,
}
struct zink_framebuffer *
-zink_get_framebuffer_imageless(struct zink_context *ctx);
-
-struct zink_framebuffer *
zink_get_framebuffer(struct zink_context *ctx);
+
+void
+zink_update_framebuffer_state(struct zink_context *ctx);
+unsigned
+zink_framebuffer_get_num_layers(const struct pipe_framebuffer_state *fb);
#endif
diff --git a/src/gallium/drivers/zink/zink_inlines.h b/src/gallium/drivers/zink/zink_inlines.h
index fe873828423..44d4474d99a 100644
--- a/src/gallium/drivers/zink/zink_inlines.h
+++ b/src/gallium/drivers/zink/zink_inlines.h
@@ -6,7 +6,9 @@ static inline void
zink_select_draw_vbo(struct zink_context *ctx)
{
ctx->base.draw_vbo = ctx->draw_vbo[ctx->pipeline_changed[0]];
+ ctx->base.draw_vertex_state = ctx->draw_state[ctx->pipeline_changed[0]];
assert(ctx->base.draw_vbo);
+ assert(ctx->base.draw_vertex_state);
}
static inline void
diff --git a/src/gallium/drivers/zink/zink_instance.py b/src/gallium/drivers/zink/zink_instance.py
index 831be322d7b..b9c3c5a6ae0 100644
--- a/src/gallium/drivers/zink/zink_instance.py
+++ b/src/gallium/drivers/zink/zink_instance.py
@@ -28,6 +28,7 @@ from os import path
from xml.etree import ElementTree
from zink_extensions import Extension,Layer,ExtensionRegistry,Version
import sys
+import platform
# constructor: Extension(name, conditions=[], nonstandard=False)
# The attributes:
@@ -37,11 +38,24 @@ import sys
EXTENSIONS = [
Extension("VK_EXT_debug_utils"),
Extension("VK_KHR_get_physical_device_properties2"),
+ Extension("VK_KHR_external_memory_capabilities"),
+ Extension("VK_KHR_external_semaphore_capabilities"),
Extension("VK_MVK_moltenvk",
nonstandard=True),
Extension("VK_KHR_surface"),
+ Extension("VK_EXT_headless_surface"),
+ Extension("VK_KHR_wayland_surface",
+ conditions=["!display_dev"]),
+ Extension("VK_KHR_xcb_surface",
+ conditions=["!display_dev"]),
+ Extension("VK_KHR_win32_surface"),
]
+if platform.system() == "Darwin":
+ EXTENSIONS += [
+ Extension("VK_KHR_portability_enumeration"),
+ ]
+
# constructor: Layer(name, conditions=[])
# - conditions: See documentation of EXTENSIONS.
LAYERS = [
@@ -60,15 +74,17 @@ header_code = """
#ifndef ZINK_INSTANCE_H
#define ZINK_INSTANCE_H
-#include "os/os_process.h"
+#include "util/u_process.h"
-#include <vulkan/vulkan.h>
+#include <vulkan/vulkan_core.h>
-#if defined(__APPLE__)
+#ifdef __APPLE__
+#include "MoltenVK/mvk_vulkan.h"
// Source of MVK_VERSION
-#include "MoltenVK/vk_mvk_moltenvk.h"
-#endif
+#include "MoltenVK/mvk_config.h"
+#endif /* __APPLE__ */
+struct pipe_screen;
struct zink_screen;
struct zink_instance_info {
@@ -83,8 +99,8 @@ struct zink_instance_info {
%endfor
};
-VkInstance
-zink_create_instance(struct zink_instance_info *instance_info);
+bool
+zink_create_instance(struct zink_screen *screen, bool display_dev);
void
zink_verify_instance_extensions(struct zink_screen *screen);
@@ -103,16 +119,22 @@ void zink_stub_${cmd.lstrip("vk")}(void);
%endif
%endfor
+struct pipe_screen;
+struct pipe_resource;
+
#endif
"""
impl_code = """
+#include "vk_enum_to_str.h"
#include "zink_instance.h"
#include "zink_screen.h"
-VkInstance
-zink_create_instance(struct zink_instance_info *instance_info)
+bool
+zink_create_instance(struct zink_screen *screen, bool display_dev)
{
+ struct zink_instance_info *instance_info = &screen->instance_info;
+
/* reserve one slot for MoltenVK */
const char *layers[${len(layers) + 1}] = {0};
uint32_t num_layers = 0;
@@ -132,12 +154,24 @@ zink_create_instance(struct zink_instance_info *instance_info)
bool have_moltenvk_layer = false;
#endif
+ GET_PROC_ADDR_INSTANCE_LOCAL(screen, NULL, EnumerateInstanceExtensionProperties);
+ GET_PROC_ADDR_INSTANCE_LOCAL(screen, NULL, EnumerateInstanceLayerProperties);
+ if (!vk_EnumerateInstanceExtensionProperties ||
+ !vk_EnumerateInstanceLayerProperties)
+ return false;
+
// Build up the extensions from the reported ones but only for the unnamed layer
uint32_t extension_count = 0;
- if (vkEnumerateInstanceExtensionProperties(NULL, &extension_count, NULL) == VK_SUCCESS) {
+ if (vk_EnumerateInstanceExtensionProperties(NULL, &extension_count, NULL) != VK_SUCCESS) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: vkEnumerateInstanceExtensionProperties failed");
+ } else {
VkExtensionProperties *extension_props = malloc(extension_count * sizeof(VkExtensionProperties));
if (extension_props) {
- if (vkEnumerateInstanceExtensionProperties(NULL, &extension_count, extension_props) == VK_SUCCESS) {
+ if (vk_EnumerateInstanceExtensionProperties(NULL, &extension_count, extension_props) != VK_SUCCESS) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: vkEnumerateInstanceExtensionProperties failed");
+ } else {
for (uint32_t i = 0; i < extension_count; i++) {
%for ext in extensions:
if (!strcmp(extension_props[i].extensionName, ${ext.extension_name_literal()})) {
@@ -153,10 +187,16 @@ zink_create_instance(struct zink_instance_info *instance_info)
// Build up the layers from the reported ones
uint32_t layer_count = 0;
- if (vkEnumerateInstanceLayerProperties(&layer_count, NULL) == VK_SUCCESS) {
+ if (vk_EnumerateInstanceLayerProperties(&layer_count, NULL) != VK_SUCCESS) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: vkEnumerateInstanceLayerProperties failed");
+ } else {
VkLayerProperties *layer_props = malloc(layer_count * sizeof(VkLayerProperties));
if (layer_props) {
- if (vkEnumerateInstanceLayerProperties(&layer_count, layer_props) == VK_SUCCESS) {
+ if (vk_EnumerateInstanceLayerProperties(&layer_count, layer_props) != VK_SUCCESS) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: vkEnumerateInstanceLayerProperties failed");
+ } else {
for (uint32_t i = 0; i < layer_count; i++) {
%for layer in layers:
if (!strcmp(layer_props[i].layerName, ${layer.extension_name_literal()})) {
@@ -206,29 +246,36 @@ zink_create_instance(struct zink_instance_info *instance_info)
VkApplicationInfo ai = {0};
ai.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
- char proc_name[128];
- if (os_get_process_name(proc_name, ARRAY_SIZE(proc_name)))
- ai.pApplicationName = proc_name;
- else
- ai.pApplicationName = "unknown";
+ const char *proc_name = util_get_process_name();
+ if (!proc_name)
+ proc_name = "unknown";
+ ai.pApplicationName = proc_name;
ai.pEngineName = "mesa zink";
ai.apiVersion = instance_info->loader_version;
VkInstanceCreateInfo ici = {0};
ici.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+#ifdef __APPLE__
+ ici.flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
+#endif
ici.pApplicationInfo = &ai;
ici.ppEnabledExtensionNames = extensions;
ici.enabledExtensionCount = num_extensions;
ici.ppEnabledLayerNames = layers;
ici.enabledLayerCount = num_layers;
- VkInstance instance = VK_NULL_HANDLE;
- VkResult err = vkCreateInstance(&ici, NULL, &instance);
- if (err != VK_SUCCESS)
- return VK_NULL_HANDLE;
+ GET_PROC_ADDR_INSTANCE_LOCAL(screen, NULL, CreateInstance);
+ assert(vk_CreateInstance);
+
+ VkResult err = vk_CreateInstance(&ici, NULL, &screen->instance);
+ if (err != VK_SUCCESS) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: vkCreateInstance failed (%s)", vk_Result_to_str(err));
+ return false;
+ }
- return instance;
+ return true;
}
void
@@ -236,6 +283,9 @@ zink_verify_instance_extensions(struct zink_screen *screen)
{
%for ext in extensions:
%if registry.in_registry(ext.name):
+%if ext.platform_guard:
+#ifdef ${ext.platform_guard}
+%endif
if (screen->instance_info.have_${ext.name_with_vendor()}) {
%for cmd in registry.get_registry_entry(ext.name).instance_commands:
if (!screen->vk.${cmd.lstrip("vk")}) {
@@ -257,6 +307,9 @@ zink_verify_instance_extensions(struct zink_screen *screen)
%endfor
}
%endif
+%if ext.platform_guard:
+#endif
+%endif
%endfor
}
@@ -273,12 +326,18 @@ zink_verify_instance_extensions(struct zink_screen *screen)
%else:
<% generated_funcs.add(cmd) %>
%endif
+%if ext.platform_guard:
+#ifdef ${ext.platform_guard}
+%endif
void
zink_stub_${cmd.lstrip("vk")}()
{
mesa_loge("ZINK: ${cmd} is not loaded properly!");
abort();
}
+%if ext.platform_guard:
+#endif
+%endif
%endfor
%endif
%endfor
@@ -335,16 +394,19 @@ if __name__ == "__main__":
if entry.promoted_in:
ext.core_since = Version((*entry.promoted_in, 0))
+ if entry.platform_guard:
+ ext.platform_guard = entry.platform_guard
+
if error_count > 0:
print("zink_instance.py: Found {} error(s) in total. Quitting.".format(error_count))
exit(1)
- with open(header_path, "w") as header_file:
+ with open(header_path, "w", encoding='utf-8') as header_file:
header = Template(header_code).render(extensions=extensions, layers=layers, registry=registry).strip()
header = replace_code(header, replacement)
print(header, file=header_file)
- with open(impl_path, "w") as impl_file:
+ with open(impl_path, "w", encoding='utf-8') as impl_file:
impl = Template(impl_code).render(extensions=extensions, layers=layers, registry=registry).strip()
impl = replace_code(impl, replacement)
print(impl, file=impl_file)
diff --git a/src/gallium/drivers/zink/zink_kopper.c b/src/gallium/drivers/zink/zink_kopper.c
new file mode 100644
index 00000000000..d508e2e06d7
--- /dev/null
+++ b/src/gallium/drivers/zink/zink_kopper.c
@@ -0,0 +1,1162 @@
+/*
+ * Copyright 2020 Red Hat, Inc.
+ * Copyright © 2021 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/detect_os.h"
+#include "driver_trace/tr_screen.h"
+
+#include "zink_context.h"
+#include "zink_screen.h"
+#include "zink_surface.h"
+#include "zink_resource.h"
+#include "zink_kopper.h"
+
+static void
+zink_kopper_set_present_mode_for_interval(struct kopper_displaytarget *cdt, int interval)
+{
+#if DETECT_OS_WINDOWS
+ // not hooked up yet so let's not sabotage benchmarks
+ cdt->present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR;
+#else
+ assert(interval >= 0); /* TODO: VK_PRESENT_MODE_FIFO_RELAXED_KHR */
+ if (interval == 0) {
+ if (cdt->present_modes & BITFIELD_BIT(VK_PRESENT_MODE_IMMEDIATE_KHR))
+ cdt->present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR;
+ else
+ cdt->present_mode = VK_PRESENT_MODE_MAILBOX_KHR;
+ } else if (interval > 0) {
+ cdt->present_mode = VK_PRESENT_MODE_FIFO_KHR;
+ }
+ assert(cdt->present_modes & BITFIELD_BIT(cdt->present_mode));
+#endif
+}
+
+static void
+init_dt_type(struct kopper_displaytarget *cdt)
+{
+ VkStructureType type = cdt->info.bos.sType;
+ switch (type) {
+#ifdef VK_USE_PLATFORM_XCB_KHR
+ case VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR:
+ cdt->type = KOPPER_X11;
+ break;
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+ case VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR:
+ cdt->type = KOPPER_WAYLAND;
+ break;
+#endif
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+ case VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR:
+ cdt->type = KOPPER_WIN32;
+ break;
+#endif
+ default:
+ unreachable("unsupported!");
+ }
+}
+
+static VkSurfaceKHR
+kopper_CreateSurface(struct zink_screen *screen, struct kopper_displaytarget *cdt)
+{
+ VkSurfaceKHR surface = VK_NULL_HANDLE;
+ VkResult error = VK_SUCCESS;
+
+ init_dt_type(cdt);
+ VkStructureType type = cdt->info.bos.sType;
+ switch (type) {
+#ifdef VK_USE_PLATFORM_XCB_KHR
+ case VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR: {
+#ifdef GLX_USE_APPLE
+ error = VK_INCOMPLETE;
+#else
+ VkXcbSurfaceCreateInfoKHR *xcb = (VkXcbSurfaceCreateInfoKHR *)&cdt->info.bos;
+ error = VKSCR(CreateXcbSurfaceKHR)(screen->instance, xcb, NULL, &surface);
+#endif
+ break;
+ }
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+ case VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR: {
+ VkWaylandSurfaceCreateInfoKHR *wlsci = (VkWaylandSurfaceCreateInfoKHR *)&cdt->info.bos;
+ error = VKSCR(CreateWaylandSurfaceKHR)(screen->instance, wlsci, NULL, &surface);
+ break;
+ }
+#endif
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+ case VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR: {
+ VkWin32SurfaceCreateInfoKHR *win32 = (VkWin32SurfaceCreateInfoKHR *)&cdt->info.bos;
+ error = VKSCR(CreateWin32SurfaceKHR)(screen->instance, win32, NULL, &surface);
+ break;
+ }
+#endif
+ default:
+ unreachable("unsupported!");
+ }
+ if (error != VK_SUCCESS) {
+ return VK_NULL_HANDLE;
+ }
+
+ VkBool32 supported;
+ error = VKSCR(GetPhysicalDeviceSurfaceSupportKHR)(screen->pdev, screen->gfx_queue, surface, &supported);
+ if (!zink_screen_handle_vkresult(screen, error) || !supported)
+ goto fail;
+
+ unsigned count = 10;
+ VkPresentModeKHR modes[10];
+ error = VKSCR(GetPhysicalDeviceSurfacePresentModesKHR)(screen->pdev, surface, &count, modes);
+ if (!zink_screen_handle_vkresult(screen, error))
+ goto fail;
+
+ for (unsigned i = 0; i < count; i++) {
+ /* VK_PRESENT_MODE_SHARED_DEMAND_REFRESH_KHR and VK_PRESENT_MODE_SHARED_CONTINUOUS_REFRESH_KHR
+ * are not handled
+ */
+ assert(modes[i] <= VK_PRESENT_MODE_FIFO_RELAXED_KHR);
+ if (modes[i] <= VK_PRESENT_MODE_FIFO_RELAXED_KHR)
+ cdt->present_modes |= BITFIELD_BIT(modes[i]);
+ }
+
+ zink_kopper_set_present_mode_for_interval(cdt, cdt->info.initial_swap_interval);
+
+ return surface;
+fail:
+ VKSCR(DestroySurfaceKHR)(screen->instance, surface, NULL);
+ return VK_NULL_HANDLE;
+}
+
+static void
+destroy_swapchain(struct zink_screen *screen, struct kopper_swapchain *cswap)
+{
+ if (!cswap)
+ return;
+ util_queue_fence_destroy(&cswap->present_fence);
+ for (unsigned i = 0; i < cswap->num_images; i++) {
+ simple_mtx_lock(&screen->semaphores_lock);
+ util_dynarray_append(&screen->semaphores, VkSemaphore, cswap->images[i].acquire);
+ simple_mtx_unlock(&screen->semaphores_lock);
+ pipe_resource_reference(&cswap->images[i].readback, NULL);
+ }
+ free(cswap->images);
+ hash_table_foreach(cswap->presents, he) {
+ struct util_dynarray *arr = he->data;
+ simple_mtx_lock(&screen->semaphores_lock);
+ util_dynarray_append_dynarray(&screen->semaphores, arr);
+ simple_mtx_unlock(&screen->semaphores_lock);
+ util_dynarray_fini(arr);
+ free(arr);
+ }
+ _mesa_hash_table_destroy(cswap->presents, NULL);
+ VKSCR(DestroySwapchainKHR)(screen->dev, cswap->swapchain, NULL);
+ free(cswap);
+}
+
+static void
+prune_old_swapchains(struct zink_screen *screen, struct kopper_displaytarget *cdt, bool wait)
+{
+ while (cdt->old_swapchain) {
+ struct kopper_swapchain *cswap = cdt->old_swapchain;
+ if (cswap->async_presents) {
+ if (wait)
+ continue;
+ return;
+ }
+ struct zink_batch_usage *u = cswap->batch_uses;
+ if (!zink_screen_usage_check_completion(screen, u)) {
+ /* these can't ever be pruned */
+ if (!wait || zink_batch_usage_is_unflushed(u))
+ return;
+
+ zink_screen_timeline_wait(screen, u->usage, UINT64_MAX);
+ cswap->batch_uses = NULL;
+ }
+ cdt->old_swapchain = cswap->next;
+ destroy_swapchain(screen, cswap);
+ }
+}
+
+static struct hash_entry *
+find_dt_entry(struct zink_screen *screen, const struct kopper_displaytarget *cdt)
+{
+ struct hash_entry *he = NULL;
+ switch (cdt->type) {
+#ifdef VK_USE_PLATFORM_XCB_KHR
+ case KOPPER_X11: {
+ VkXcbSurfaceCreateInfoKHR *xcb = (VkXcbSurfaceCreateInfoKHR *)&cdt->info.bos;
+ he = _mesa_hash_table_search_pre_hashed(&screen->dts, xcb->window, (void*)(uintptr_t)xcb->window);
+ break;
+ }
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+ case KOPPER_WAYLAND: {
+ VkWaylandSurfaceCreateInfoKHR *wlsci = (VkWaylandSurfaceCreateInfoKHR *)&cdt->info.bos;
+ he = _mesa_hash_table_search(&screen->dts, wlsci->surface);
+ break;
+ }
+#endif
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+ case KOPPER_WIN32: {
+ VkWin32SurfaceCreateInfoKHR *win32 = (VkWin32SurfaceCreateInfoKHR *)&cdt->info.bos;
+ he = _mesa_hash_table_search(&screen->dts, win32->hwnd);
+ break;
+ }
+#endif
+ default:
+ unreachable("unsupported!");
+ }
+ return he;
+}
+
+void
+zink_kopper_deinit_displaytarget(struct zink_screen *screen, struct kopper_displaytarget *cdt)
+{
+ if (!cdt->surface)
+ return;
+ simple_mtx_lock(&screen->dt_lock);
+ struct hash_entry *he = find_dt_entry(screen, cdt);
+ assert(he);
+ /* this deinits the registered entry, which should always be the "right" entry */
+ cdt = he->data;
+ _mesa_hash_table_remove(&screen->dts, he);
+ simple_mtx_unlock(&screen->dt_lock);
+ destroy_swapchain(screen, cdt->swapchain);
+ prune_old_swapchains(screen, cdt, true);
+ VKSCR(DestroySurfaceKHR)(screen->instance, cdt->surface, NULL);
+ cdt->swapchain = cdt->old_swapchain = NULL;
+ cdt->surface = VK_NULL_HANDLE;
+}
+
+static struct kopper_swapchain *
+kopper_CreateSwapchain(struct zink_screen *screen, struct kopper_displaytarget *cdt, unsigned w, unsigned h, VkResult *result)
+{
+ VkResult error = VK_SUCCESS;
+ struct kopper_swapchain *cswap = CALLOC_STRUCT(kopper_swapchain);
+ if (!cswap) {
+ *result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ return NULL;
+ }
+ cswap->last_present_prune = 1;
+ util_queue_fence_init(&cswap->present_fence);
+
+ bool has_alpha = cdt->info.has_alpha && (cdt->caps.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR);
+ if (cdt->swapchain) {
+ cswap->scci = cdt->swapchain->scci;
+ /* avoid UAF if async present needs to-be-retired swapchain */
+ if (cdt->type == KOPPER_WAYLAND && cdt->swapchain->swapchain)
+ util_queue_fence_wait(&cdt->swapchain->present_fence);
+ cswap->scci.oldSwapchain = cdt->swapchain->swapchain;
+ } else {
+ cswap->scci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
+ cswap->scci.pNext = NULL;
+ cswap->scci.surface = cdt->surface;
+ cswap->scci.flags = zink_kopper_has_srgb(cdt) ? VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR : 0;
+ cswap->scci.imageFormat = cdt->formats[0];
+ cswap->scci.imageColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
+ // TODO: This is where you'd hook up stereo
+ cswap->scci.imageArrayLayers = 1;
+ cswap->scci.imageUsage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
+ if (cdt->caps.supportedUsageFlags & VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT)
+ cswap->scci.imageUsage |= VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
+ cswap->scci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
+ cswap->scci.queueFamilyIndexCount = 0;
+ cswap->scci.pQueueFamilyIndices = NULL;
+ cswap->scci.compositeAlpha = has_alpha ? VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR : VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+ cswap->scci.clipped = VK_TRUE;
+ }
+ cswap->scci.presentMode = cdt->present_mode;
+ cswap->scci.minImageCount = cdt->caps.minImageCount;
+ cswap->scci.preTransform = cdt->caps.currentTransform;
+ if (cdt->formats[1])
+ cswap->scci.pNext = &cdt->format_list;
+
+ /* different display platforms have, by vulkan spec, different sizing methodologies */
+ switch (cdt->type) {
+ case KOPPER_X11:
+ case KOPPER_WIN32:
+ /* With Xcb, minImageExtent, maxImageExtent, and currentExtent must always equal the window size.
+ * ...
+ * Due to above restrictions, it is only possible to create a new swapchain on this
+ * platform with imageExtent being equal to the current size of the window.
+ */
+ cswap->scci.imageExtent.width = cdt->caps.currentExtent.width;
+ cswap->scci.imageExtent.height = cdt->caps.currentExtent.height;
+ break;
+ case KOPPER_WAYLAND:
+ /* On Wayland, currentExtent is the special value (0xFFFFFFFF, 0xFFFFFFFF), indicating that the
+ * surface size will be determined by the extent of a swapchain targeting the surface. Whatever the
+ * application sets a swapchain’s imageExtent to will be the size of the window, after the first image is
+ * presented.
+ */
+ cswap->scci.imageExtent.width = w;
+ cswap->scci.imageExtent.height = h;
+ break;
+ default:
+ unreachable("unknown display platform");
+ }
+
+ error = VKSCR(CreateSwapchainKHR)(screen->dev, &cswap->scci, NULL,
+ &cswap->swapchain);
+ if (error == VK_ERROR_NATIVE_WINDOW_IN_USE_KHR) {
+ if (util_queue_is_initialized(&screen->flush_queue))
+ util_queue_finish(&screen->flush_queue);
+ simple_mtx_lock(&screen->queue_lock);
+ VkResult wait_result = VKSCR(QueueWaitIdle)(screen->queue);
+ simple_mtx_unlock(&screen->queue_lock);
+ if (wait_result != VK_SUCCESS)
+ mesa_loge("ZINK: vkQueueWaitIdle failed (%s)", vk_Result_to_str(wait_result));
+ error = VKSCR(CreateSwapchainKHR)(screen->dev, &cswap->scci, NULL,
+ &cswap->swapchain);
+ }
+ if (error != VK_SUCCESS) {
+ mesa_loge("CreateSwapchainKHR failed with %s\n", vk_Result_to_str(error));
+ free(cswap);
+ *result = error;
+ return NULL;
+ }
+ cswap->last_present = UINT32_MAX;
+
+ *result = VK_SUCCESS;
+ return cswap;
+}
+
+static VkResult
+kopper_GetSwapchainImages(struct zink_screen *screen, struct kopper_swapchain *cswap)
+{
+ VkResult error = VKSCR(GetSwapchainImagesKHR)(screen->dev, cswap->swapchain, &cswap->num_images, NULL);
+ zink_screen_handle_vkresult(screen, error);
+ if (error != VK_SUCCESS)
+ return error;
+ cswap->images = calloc(cswap->num_images, sizeof(struct kopper_swapchain_image));
+ if (!cswap->images) {
+ mesa_loge("ZINK: failed to allocate cswap->images!");
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ }
+ cswap->presents = _mesa_hash_table_create_u32_keys(NULL);
+ VkImage images[32];
+ error = VKSCR(GetSwapchainImagesKHR)(screen->dev, cswap->swapchain, &cswap->num_images, images);
+ assert(cswap->num_images <= ARRAY_SIZE(images));
+ if (zink_screen_handle_vkresult(screen, error)) {
+ for (unsigned i = 0; i < cswap->num_images; i++)
+ cswap->images[i].image = images[i];
+ }
+ cswap->max_acquires = cswap->num_images - cswap->scci.minImageCount + 1;
+ return error;
+}
+
+static VkResult
+update_caps(struct zink_screen *screen, struct kopper_displaytarget *cdt)
+{
+ VkResult error = VKSCR(GetPhysicalDeviceSurfaceCapabilitiesKHR)(screen->pdev, cdt->surface, &cdt->caps);
+ zink_screen_handle_vkresult(screen, error);
+ return error;
+}
+
+static VkResult
+update_swapchain(struct zink_screen *screen, struct kopper_displaytarget *cdt, unsigned w, unsigned h)
+{
+ VkResult error = update_caps(screen, cdt);
+ if (error != VK_SUCCESS)
+ return error;
+ struct kopper_swapchain *cswap = kopper_CreateSwapchain(screen, cdt, w, h, &error);
+ if (!cswap)
+ return error;
+ prune_old_swapchains(screen, cdt, false);
+ struct kopper_swapchain **pswap = &cdt->old_swapchain;
+ while (*pswap)
+ *pswap = (*pswap)->next;
+ *pswap = cdt->swapchain;
+ cdt->swapchain = cswap;
+
+ return kopper_GetSwapchainImages(screen, cdt->swapchain);
+}
+
+struct kopper_displaytarget *
+zink_kopper_displaytarget_create(struct zink_screen *screen, unsigned tex_usage,
+ enum pipe_format format, unsigned width,
+ unsigned height, unsigned alignment,
+ const void *loader_private, unsigned *stride)
+{
+ struct kopper_displaytarget *cdt;
+ const struct kopper_loader_info *info = loader_private;
+
+ {
+ struct kopper_displaytarget k;
+ struct hash_entry *he = NULL;
+ k.info = *info;
+ init_dt_type(&k);
+ simple_mtx_lock(&screen->dt_lock);
+ if (unlikely(!screen->dts.table)) {
+ switch (k.type) {
+ case KOPPER_X11:
+ _mesa_hash_table_init(&screen->dts, screen, NULL, _mesa_key_pointer_equal);
+ break;
+ case KOPPER_WAYLAND:
+ case KOPPER_WIN32:
+ _mesa_hash_table_init(&screen->dts, screen, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ break;
+ default:
+ unreachable("unknown kopper type");
+ }
+ } else {
+ he = find_dt_entry(screen, &k);
+ }
+ simple_mtx_unlock(&screen->dt_lock);
+ if (he) {
+ cdt = he->data;
+ p_atomic_inc(&cdt->refcount);
+ *stride = cdt->stride;
+ return cdt;
+ }
+ }
+
+ cdt = CALLOC_STRUCT(kopper_displaytarget);
+ if (!cdt)
+ return NULL;
+
+ cdt->refcount = 1;
+ cdt->loader_private = (void*)loader_private;
+ cdt->info = *info;
+
+ enum pipe_format srgb = PIPE_FORMAT_NONE;
+ if (screen->info.have_KHR_swapchain_mutable_format) {
+ srgb = util_format_is_srgb(format) ? util_format_linear(format) : util_format_srgb(format);
+ /* why do these helpers have different default return values? */
+ if (srgb == format)
+ srgb = PIPE_FORMAT_NONE;
+ }
+ cdt->formats[0] = zink_get_format(screen, format);
+ if (srgb) {
+ cdt->format_list.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO;
+ cdt->format_list.pNext = NULL;
+ cdt->format_list.viewFormatCount = 2;
+ cdt->format_list.pViewFormats = cdt->formats;
+
+ cdt->formats[1] = zink_get_format(screen, srgb);
+ }
+
+ cdt->surface = kopper_CreateSurface(screen, cdt);
+ if (!cdt->surface)
+ goto out;
+
+ if (update_swapchain(screen, cdt, width, height) != VK_SUCCESS)
+ goto out;
+
+ simple_mtx_lock(&screen->dt_lock);
+ switch (cdt->type) {
+#ifdef VK_USE_PLATFORM_XCB_KHR
+ case KOPPER_X11: {
+ VkXcbSurfaceCreateInfoKHR *xcb = (VkXcbSurfaceCreateInfoKHR *)&cdt->info.bos;
+ _mesa_hash_table_insert_pre_hashed(&screen->dts, xcb->window, (void*)(uintptr_t)xcb->window, cdt);
+ break;
+ }
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+ case KOPPER_WAYLAND: {
+ VkWaylandSurfaceCreateInfoKHR *wlsci = (VkWaylandSurfaceCreateInfoKHR *)&cdt->info.bos;
+ _mesa_hash_table_insert(&screen->dts, wlsci->surface, cdt);
+ break;
+ }
+#endif
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+ case KOPPER_WIN32: {
+ VkWin32SurfaceCreateInfoKHR *win32 = (VkWin32SurfaceCreateInfoKHR *)&cdt->info.bos;
+ _mesa_hash_table_insert(&screen->dts, win32->hwnd, cdt);
+ break;
+ }
+#endif
+ default:
+ unreachable("unsupported!");
+ }
+ simple_mtx_unlock(&screen->dt_lock);
+
+ *stride = cdt->stride;
+ return cdt;
+
+//moar cleanup
+out:
+ FREE(cdt);
+ return NULL;
+}
+
+void
+zink_kopper_displaytarget_destroy(struct zink_screen *screen, struct kopper_displaytarget *cdt)
+{
+ if (!p_atomic_dec_zero(&cdt->refcount))
+ return;
+ zink_kopper_deinit_displaytarget(screen, cdt);
+ FREE(cdt);
+}
+
+static VkResult
+kopper_acquire(struct zink_screen *screen, struct zink_resource *res, uint64_t timeout)
+{
+ struct kopper_displaytarget *cdt = res->obj->dt;
+
+ /* if:
+ * - we don't need a new image
+ * - we have a swapchain image
+ * - that image is either acquired or acquiring
+ *
+ * then this is a no-op
+ */
+ if (!res->obj->new_dt && res->obj->dt_idx != UINT32_MAX &&
+ (cdt->swapchain->images[res->obj->dt_idx].acquire || cdt->swapchain->images[res->obj->dt_idx].acquired))
+ return VK_SUCCESS;
+ VkSemaphore acquire = VK_NULL_HANDLE;
+
+ while (true) {
+ if (res->obj->new_dt) {
+ VkResult error = update_swapchain(screen, cdt, res->base.b.width0, res->base.b.height0);
+ zink_screen_handle_vkresult(screen, error);
+ if (error != VK_SUCCESS)
+ return error;
+ res->obj->new_dt = false;
+ res->layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ res->obj->access = 0;
+ res->obj->access_stage = 0;
+ }
+ if (timeout == UINT64_MAX && util_queue_is_initialized(&screen->flush_queue) &&
+ p_atomic_read_relaxed(&cdt->swapchain->num_acquires) >= cdt->swapchain->max_acquires) {
+ util_queue_fence_wait(&cdt->swapchain->present_fence);
+ /* With a sequence of
+ glDrawBuffer(GL_FRONT_AND_BACK);
+ glClearBufferfv(GL_COLOR, 0, purple);
+ glReadBuffer(GL_FRONT);
+ glReadPIxels(...);
+ kopper_present is never called, but with glReadPIxels the pipeline
+ is flushed, and since we draw to the front- and the backbuffer, two
+ swapchain images are acquired one after the other. Because with
+ that we possibly acquire too many images at once and when using
+ "timeout == UINT64_MAX" forwad progress of vkAcquireNextImageKHR
+ can no longer be guaranteed, i.e. the call may block indefinitely;
+ VUID-vkAcquireNextImageKHR-surface-07783 is raised to warn
+ about exceeding the limit for acquires.
+
+ So let's check whether the number of acquired images is still too
+ large after the fence was signalled, and if so then clear the timeout.
+ */
+ if (p_atomic_read_relaxed(&cdt->swapchain->num_acquires) >= cdt->swapchain->max_acquires)
+ timeout = 0;
+ }
+ VkResult ret;
+ if (!acquire) {
+ acquire = zink_create_semaphore(screen);
+ assert(acquire);
+ if (!acquire)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ }
+ ret = VKSCR(AcquireNextImageKHR)(screen->dev, cdt->swapchain->swapchain, timeout, acquire, VK_NULL_HANDLE, &res->obj->dt_idx);
+ if (ret != VK_SUCCESS && ret != VK_SUBOPTIMAL_KHR) {
+ if (ret == VK_ERROR_OUT_OF_DATE_KHR) {
+ res->obj->new_dt = true;
+ continue;
+ }
+ if (ret == VK_NOT_READY || ret == VK_TIMEOUT) {
+ if (timeout > 1000000)
+ unreachable("kopper_acquire: updated timeout after failure has become unreasonable large");
+ timeout += 4000;
+ continue;
+ }
+ VKSCR(DestroySemaphore)(screen->dev, acquire, NULL);
+ return ret;
+ }
+ break;
+ }
+
+ cdt->swapchain->images[res->obj->dt_idx].acquire = acquire;
+ if (cdt->swapchain->images[res->obj->dt_idx].readback)
+ zink_resource(cdt->swapchain->images[res->obj->dt_idx].readback)->valid = false;
+ res->obj->image = cdt->swapchain->images[res->obj->dt_idx].image;
+ if (!cdt->age_locked)
+ zink_kopper_update_last_written(res);
+ cdt->swapchain->images[res->obj->dt_idx].acquired = NULL;
+ if (!cdt->swapchain->images[res->obj->dt_idx].init) {
+ /* swapchain images are initially in the UNDEFINED layout */
+ res->layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ cdt->swapchain->images[res->obj->dt_idx].init = true;
+ }
+ if (timeout == UINT64_MAX) {
+ res->obj->indefinite_acquire = true;
+ p_atomic_inc(&cdt->swapchain->num_acquires);
+ }
+ cdt->swapchain->images[res->obj->dt_idx].dt_has_data = false;
+ return VK_SUCCESS;
+}
+
+static void
+kill_swapchain(struct zink_context *ctx, struct zink_resource *res)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ /* dead swapchain */
+ mesa_loge("zink: swapchain killed %p\n", res);
+ zink_batch_reference_resource(&ctx->batch, res);
+ struct pipe_resource *pres = screen->base.resource_create(&screen->base, &res->base.b);
+ zink_resource_object_reference(screen, &res->obj, zink_resource(pres)->obj);
+ res->layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ res->swapchain = false;
+ pipe_resource_reference(&pres, NULL);
+}
+
+static bool
+is_swapchain_kill(VkResult ret)
+{
+ return ret != VK_SUCCESS &&
+ ret != VK_TIMEOUT &&
+ ret != VK_NOT_READY &&
+ ret != VK_SUBOPTIMAL_KHR;
+}
+
+bool
+zink_kopper_acquire(struct zink_context *ctx, struct zink_resource *res, uint64_t timeout)
+{
+ assert(zink_is_swapchain(res));
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ if (!cdt)
+ /* dead swapchain */
+ return false;
+ if (cdt->is_kill) {
+ kill_swapchain(ctx, res);
+ return false;
+ }
+ const struct kopper_swapchain *cswap = cdt->swapchain;
+ res->obj->new_dt |= res->base.b.width0 != cswap->scci.imageExtent.width ||
+ res->base.b.height0 != cswap->scci.imageExtent.height;
+ VkResult ret = kopper_acquire(zink_screen(trace_screen_unwrap(ctx->base.screen)), res, timeout);
+ if (ret == VK_SUCCESS || ret == VK_SUBOPTIMAL_KHR) {
+ if (cswap != cdt->swapchain) {
+ ctx->swapchain_size = cdt->swapchain->scci.imageExtent;
+ res->base.b.width0 = ctx->swapchain_size.width;
+ res->base.b.height0 = ctx->swapchain_size.height;
+ }
+ } else if (is_swapchain_kill(ret)) {
+ kill_swapchain(ctx, res);
+ }
+ bool is_kill = is_swapchain_kill(ret);
+ zink_batch_usage_set(&cdt->swapchain->batch_uses, ctx->batch.state);
+ return !is_kill;
+}
+
+VkSemaphore
+zink_kopper_acquire_submit(struct zink_screen *screen, struct zink_resource *res)
+{
+ assert(res->obj->dt);
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ assert(res->obj->dt_idx != UINT32_MAX);
+ if (cdt->swapchain->images[res->obj->dt_idx].dt_has_data)
+ return VK_NULL_HANDLE;
+ assert(res->obj->dt_idx != UINT32_MAX);
+ if (cdt->swapchain->images[res->obj->dt_idx].acquired) {
+ assert(!cdt->swapchain->images[res->obj->dt_idx].acquire);
+ return VK_NULL_HANDLE;
+ }
+ assert(cdt->swapchain->images[res->obj->dt_idx].acquire);
+ cdt->swapchain->images[res->obj->dt_idx].acquired = res;
+ /* this is now owned by the batch */
+ VkSemaphore acquire = cdt->swapchain->images[res->obj->dt_idx].acquire;
+ cdt->swapchain->images[res->obj->dt_idx].acquire = VK_NULL_HANDLE;
+ cdt->swapchain->images[res->obj->dt_idx].dt_has_data = true;
+ return acquire;
+}
+
+VkSemaphore
+zink_kopper_present(struct zink_screen *screen, struct zink_resource *res)
+{
+ assert(res->obj->dt);
+ assert(!res->obj->present);
+ assert(zink_kopper_acquired(res->obj->dt, res->obj->dt_idx));
+ res->obj->present = zink_create_semaphore(screen);
+ return res->obj->present;
+}
+
+static void
+kopper_present(void *data, void *gdata, int thread_idx)
+{
+ struct zink_kopper_present_info *cpi = data;
+ struct kopper_displaytarget *cdt = cpi->res->obj->dt;
+ struct kopper_swapchain *swapchain = cpi->swapchain;
+ struct zink_screen *screen = gdata;
+ VkResult error = VK_SUCCESS;
+ cpi->info.pResults = &error;
+
+ simple_mtx_lock(&screen->queue_lock);
+ if (screen->driver_workarounds.implicit_sync && cdt->type != KOPPER_WIN32) {
+ if (!screen->fence) {
+ VkFenceCreateInfo fci = {0};
+ fci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+ VKSCR(CreateFence)(screen->dev, &fci, NULL, &screen->fence);
+ }
+ VKSCR(ResetFences)(screen->dev, 1, &screen->fence);
+ VkSubmitInfo si = {0};
+ si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ si.waitSemaphoreCount = 1;
+ si.pWaitSemaphores = cpi->info.pWaitSemaphores;
+ VkPipelineStageFlags stages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+ si.pWaitDstStageMask = &stages;
+
+ error = VKSCR(QueueSubmit)(screen->queue, 1, &si, screen->fence);
+ if (!zink_screen_handle_vkresult(screen, error)) {
+ simple_mtx_unlock(&screen->queue_lock);
+ VKSCR(DestroySemaphore)(screen->dev, cpi->sem, NULL);
+ goto out;
+ }
+ error = VKSCR(WaitForFences)(screen->dev, 1, &screen->fence, VK_TRUE, UINT64_MAX);
+ if (!zink_screen_handle_vkresult(screen, error)) {
+ simple_mtx_unlock(&screen->queue_lock);
+ VKSCR(DestroySemaphore)(screen->dev, cpi->sem, NULL);
+ goto out;
+ }
+ cpi->info.pWaitSemaphores = NULL;
+ cpi->info.waitSemaphoreCount = 0;
+ }
+ VkResult error2 = VKSCR(QueuePresentKHR)(screen->queue, &cpi->info);
+ zink_screen_debug_marker_end(screen, screen->frame_marker_emitted);
+ zink_screen_debug_marker_begin(screen, "frame");
+ simple_mtx_unlock(&screen->queue_lock);
+ swapchain->last_present = cpi->image;
+ if (cpi->indefinite_acquire)
+ p_atomic_dec(&swapchain->num_acquires);
+ if (error2 == VK_SUBOPTIMAL_KHR && cdt->swapchain == swapchain)
+ cpi->res->obj->new_dt = true;
+
+ /* it's illegal to destroy semaphores if they're in use by a cmdbuf.
+ * but what does "in use" actually mean?
+ * in truth, when using timelines, nobody knows. especially not VVL.
+ *
+ * thus, to avoid infinite error spam and thread-related races,
+ * present semaphores need their own free queue based on the
+ * last-known completed timeline id so that the semaphore persists through
+ * normal cmdbuf submit/signal and then also exists here when it's needed for the present operation
+ */
+ struct util_dynarray *arr;
+ for (; screen->last_finished && swapchain->last_present_prune != screen->last_finished; swapchain->last_present_prune++) {
+ struct hash_entry *he = _mesa_hash_table_search(swapchain->presents,
+ (void*)(uintptr_t)swapchain->last_present_prune);
+ if (he) {
+ arr = he->data;
+ simple_mtx_lock(&screen->semaphores_lock);
+ util_dynarray_append_dynarray(&screen->semaphores, arr);
+ simple_mtx_unlock(&screen->semaphores_lock);
+ util_dynarray_fini(arr);
+ free(arr);
+ _mesa_hash_table_remove(swapchain->presents, he);
+ }
+ }
+ /* queue this wait semaphore for deletion on completion of the next batch */
+ assert(screen->curr_batch > 0);
+ uint32_t next = (uint32_t)screen->curr_batch + 1;
+ /* handle overflow */
+ next = MAX2(next + 1, 1);
+ struct hash_entry *he = _mesa_hash_table_search(swapchain->presents, (void*)(uintptr_t)next);
+ if (he)
+ arr = he->data;
+ else {
+ arr = malloc(sizeof(struct util_dynarray));
+ if (!arr) {
+ mesa_loge("ZINK: failed to allocate arr!");
+ return;
+ }
+
+ util_dynarray_init(arr, NULL);
+ _mesa_hash_table_insert(swapchain->presents, (void*)(uintptr_t)next, arr);
+ }
+ util_dynarray_append(arr, VkSemaphore, cpi->sem);
+out:
+ if (thread_idx != -1) {
+ p_atomic_dec(&swapchain->async_presents);
+ struct pipe_resource *pres = &cpi->res->base.b;
+ pipe_resource_reference(&pres, NULL);
+ }
+ slab_free_st(&screen->present_mempool, cpi);
+}
+
+void
+zink_kopper_present_queue(struct zink_screen *screen, struct zink_resource *res, unsigned nrects, struct pipe_box *boxes)
+{
+ assert(res->obj->dt);
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ assert(zink_kopper_acquired(res->obj->dt, res->obj->dt_idx));
+ assert(res->obj->present);
+
+ /* always try to prune if the current swapchain has seen presents */
+ if (cdt->swapchain->last_present != UINT32_MAX)
+ prune_old_swapchains(screen, cdt, false);
+
+ struct zink_kopper_present_info *cpi = slab_alloc_st(&screen->present_mempool);
+ if (!cpi) {
+ mesa_loge("ZINK: failed to allocate cpi!");
+ return;
+ }
+
+ cpi->sem = res->obj->present;
+ cpi->res = res;
+ cpi->swapchain = cdt->swapchain;
+ cpi->indefinite_acquire = res->obj->indefinite_acquire;
+ cpi->image = res->obj->dt_idx;
+ cpi->info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
+ cpi->info.pNext = NULL;
+ cpi->info.waitSemaphoreCount = 1;
+ cpi->info.pWaitSemaphores = &cpi->sem;
+ cpi->info.swapchainCount = 1;
+ cpi->info.pSwapchains = &cdt->swapchain->swapchain;
+ cpi->info.pImageIndices = &cpi->image;
+ cpi->info.pResults = NULL;
+ res->obj->present = VK_NULL_HANDLE;
+ if (nrects) {
+ cpi->rinfo.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR;
+ cpi->rinfo.pNext = NULL;
+ cpi->rinfo.swapchainCount = 1;
+ cpi->rinfo.pRegions = &cpi->region;
+ cpi->region.rectangleCount = nrects;
+ cpi->region.pRectangles = cpi->regions;
+ for (unsigned i = 0; i < nrects; i++) {
+ cpi->regions[i].offset.x = boxes[i].x;
+ /*
+ 2) Where is the origin of the VkRectLayerKHR?
+
+ RESOLVED: The upper left corner of the presentable image(s) of the swapchain, per the definition of framebuffer coordinates.
+ */
+ cpi->regions[i].offset.y = cdt->swapchain->scci.imageExtent.height - boxes[i].y - boxes[i].height;
+ cpi->regions[i].extent.width = boxes[i].width;
+ cpi->regions[i].extent.height = boxes[i].height;
+ cpi->regions[i].extent.width = MIN2(cpi->regions[i].extent.width, cpi->swapchain->scci.imageExtent.width - cpi->regions[i].offset.x);
+ cpi->regions[i].extent.height = MIN2(cpi->regions[i].extent.height, cpi->swapchain->scci.imageExtent.height - cpi->regions[i].offset.y);
+ cpi->regions[i].layer = boxes[i].z;
+ }
+ cpi->info.pNext = &cpi->rinfo;
+ }
+ /* Ex GLX_EXT_buffer_age:
+ *
+ * Buffers' ages are initialized to 0 at buffer creation time.
+ * When a frame boundary is reached, the following occurs before
+ * any exchanging or copying of color buffers:
+ *
+ * * The current back buffer's age is set to 1.
+ * * Any other color buffers' ages are incremented by 1 if
+ * their age was previously greater than 0.
+ */
+ if (!cdt->age_locked) {
+ for (int i = 0; i < cdt->swapchain->num_images; i++) {
+ if (i == res->obj->dt_idx)
+ cdt->swapchain->images[i].age = 1;
+ else if (cdt->swapchain->images[i].age > 0)
+ cdt->swapchain->images[i].age += 1;
+ }
+ }
+ if (util_queue_is_initialized(&screen->flush_queue)) {
+ p_atomic_inc(&cpi->swapchain->async_presents);
+ struct pipe_resource *pres = NULL;
+ pipe_resource_reference(&pres, &res->base.b);
+ util_queue_add_job(&screen->flush_queue, cpi, &cdt->swapchain->present_fence,
+ kopper_present, NULL, 0);
+ } else {
+ kopper_present(cpi, screen, -1);
+ }
+ res->obj->indefinite_acquire = false;
+ cdt->swapchain->images[res->obj->dt_idx].acquired = NULL;
+ res->obj->dt_idx = UINT32_MAX;
+}
+
+void
+zink_kopper_update_last_written(struct zink_resource *res)
+{
+ res->obj->last_dt_idx = res->obj->dt_idx;
+}
+
+void
+zink_kopper_set_readback_needs_update(struct zink_resource *res)
+{
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ struct kopper_swapchain *cswap = cdt->swapchain;
+ cswap->images[res->obj->dt_idx].readback_needs_update = true;
+}
+
+static bool
+kopper_ensure_readback(struct zink_screen *screen, struct zink_resource *res)
+{
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ struct kopper_swapchain *cswap = cdt->swapchain;
+
+ for (unsigned i = 0; i < cswap->num_images; i++) {
+ if (cswap->images[i].readback)
+ return false;
+ struct pipe_resource templ = res->base.b;
+ templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+ cswap->images[i].readback = screen->base.resource_create(&screen->base, &templ);
+ }
+ return true;
+}
+
+bool
+zink_kopper_acquire_readback(struct zink_context *ctx, struct zink_resource *res, struct zink_resource **readback)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ assert(res->obj->dt);
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ const struct kopper_swapchain *cswap = cdt->swapchain;
+ uint32_t last_dt_idx = res->obj->last_dt_idx;
+ VkResult ret = VK_SUCCESS;
+
+ if (++cdt->readback_counter >= ZINK_READBACK_THRESHOLD) {
+ if (kopper_ensure_readback(screen, res) &&
+ res->obj->dt_idx != UINT32_MAX && cswap->images[res->obj->dt_idx].readback_needs_update)
+ zink_kopper_readback_update(ctx, res);
+ }
+ /* if this hasn't been presented or if it has data, use this as the readback target */
+ if (res->obj->last_dt_idx == UINT32_MAX ||
+ (res->obj->dt_idx != UINT32_MAX && cdt->swapchain->images[res->obj->dt_idx].age)) {
+ *readback = res;
+ return false;
+ }
+ if (cswap->images[last_dt_idx].acquired) {
+ struct zink_resource *rb = cswap->images[last_dt_idx].acquired;
+ *readback = rb;
+ return false;
+ }
+ if (cswap->images[last_dt_idx].readback) {
+ struct zink_resource *rb = zink_resource(cswap->images[res->obj->last_dt_idx].readback);
+ if (!cswap->images[last_dt_idx].readback_needs_update) {
+ *readback = rb;
+ return false;
+ }
+ }
+ while (res->obj->dt_idx != last_dt_idx) {
+ cdt->age_locked = true;
+ if (res->obj->dt_idx != UINT32_MAX && !zink_kopper_present_readback(ctx, res))
+ break;
+ cdt->age_locked = true;
+ do {
+ ret = kopper_acquire(screen, res, 0);
+ } while (!is_swapchain_kill(ret) && (ret == VK_NOT_READY || ret == VK_TIMEOUT));
+ if (is_swapchain_kill(ret)) {
+ kill_swapchain(ctx, res);
+ *readback = NULL;
+ cdt->age_locked = false;
+ return false;
+ }
+ }
+ if (cswap != cdt->swapchain) {
+ ctx->swapchain_size = cdt->swapchain->scci.imageExtent;
+ res->base.b.width0 = ctx->swapchain_size.width;
+ res->base.b.height0 = ctx->swapchain_size.height;
+ }
+ zink_batch_usage_set(&cdt->swapchain->batch_uses, ctx->batch.state);
+ *readback = res;
+ return true;
+}
+
+bool
+zink_kopper_present_readback(struct zink_context *ctx, struct zink_resource *res)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ VkSubmitInfo si = {0};
+ assert(zink_is_swapchain(res));
+ if (res->obj->last_dt_idx == UINT32_MAX)
+ return true;
+ if (res->layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR) {
+ zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 0, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+ ctx->base.flush(&ctx->base, NULL, 0);
+ }
+ si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ si.signalSemaphoreCount = 1;
+ VkPipelineStageFlags mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ si.pWaitDstStageMask = &mask;
+ VkSemaphore acquire = zink_kopper_acquire_submit(screen, res);
+ VkSemaphore present = res->obj->present ? res->obj->present : zink_kopper_present(screen, res);
+ if (screen->threaded_submit)
+ util_queue_finish(&screen->flush_queue);
+ si.waitSemaphoreCount = !!acquire;
+ si.pWaitSemaphores = &acquire;
+ si.pSignalSemaphores = &present;
+ simple_mtx_lock(&screen->queue_lock);
+ VkResult error = VKSCR(QueueSubmit)(screen->queue, 1, &si, VK_NULL_HANDLE);
+ simple_mtx_unlock(&screen->queue_lock);
+ if (!zink_screen_handle_vkresult(screen, error))
+ return false;
+
+ zink_kopper_present_queue(screen, res, 0, NULL);
+ if (util_queue_is_initialized(&screen->flush_queue)) {
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ util_queue_fence_wait(&cdt->swapchain->present_fence);
+ }
+
+ simple_mtx_lock(&screen->queue_lock);
+ error = VKSCR(QueueWaitIdle)(screen->queue);
+ simple_mtx_unlock(&screen->queue_lock);
+
+ simple_mtx_lock(&screen->semaphores_lock);
+ util_dynarray_append(&screen->semaphores, VkSemaphore, acquire);
+ simple_mtx_unlock(&screen->semaphores_lock);
+
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ cdt->age_locked = false;
+
+ return zink_screen_handle_vkresult(screen, error);
+}
+
+void
+zink_kopper_readback_update(struct zink_context *ctx, struct zink_resource *res)
+{
+ assert(res->obj->dt);
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ struct kopper_swapchain *cswap = cdt->swapchain;
+ assert(res->obj->dt_idx != UINT32_MAX);
+ struct pipe_resource *readback = cswap->images[res->obj->dt_idx].readback;
+ struct pipe_box box;
+ u_box_3d(0, 0, 0, res->base.b.width0, res->base.b.height0, res->base.b.depth0, &box);
+
+ if (cswap->images[res->obj->dt_idx].readback_needs_update && readback)
+ ctx->base.resource_copy_region(&ctx->base, readback, 0, 0, 0, 0, &res->base.b, 0, &box);
+ cswap->images[res->obj->dt_idx].readback_needs_update = false;
+}
+
+bool
+zink_kopper_update(struct pipe_screen *pscreen, struct pipe_resource *pres, int *w, int *h)
+{
+ struct zink_resource *res = zink_resource(pres);
+ struct zink_screen *screen = zink_screen(pscreen);
+ if (!res->obj->dt)
+ return false;
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ if (cdt->type != KOPPER_X11) {
+ *w = res->base.b.width0;
+ *h = res->base.b.height0;
+ return true;
+ }
+ VkResult ret = update_caps(screen, cdt);
+ if (ret != VK_SUCCESS) {
+ mesa_loge("zink: failed to update swapchain capabilities: %s", vk_Result_to_str(ret));
+ cdt->is_kill = true;
+ return false;
+ }
+ *w = cdt->caps.currentExtent.width;
+ *h = cdt->caps.currentExtent.height;
+ return true;
+}
+
+bool
+zink_kopper_is_cpu(const struct pipe_screen *pscreen)
+{
+ const struct zink_screen *screen = (const struct zink_screen*)pscreen;
+ return screen->is_cpu;
+}
+
+void
+zink_kopper_fixup_depth_buffer(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (!ctx->fb_state.zsbuf)
+ return;
+
+ assert(ctx->fb_state.zsbuf->texture->bind & PIPE_BIND_DISPLAY_TARGET);
+
+ struct zink_resource *res = zink_resource(ctx->fb_state.zsbuf->texture);
+ struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf);
+ struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)ctx->fb_state.zsbuf;
+ if (surf->info.width == ctx->fb_state.width &&
+ surf->info.height == ctx->fb_state.height)
+ return;
+
+ struct pipe_resource templ = *ctx->fb_state.zsbuf->texture;
+ templ.width0 = ctx->fb_state.width;
+ templ.height0 = ctx->fb_state.height;
+ struct pipe_resource *pz = screen->base.resource_create(&screen->base, &templ);
+ struct zink_resource *z = zink_resource(pz);
+ zink_resource_object_reference(screen, &res->obj, z->obj);
+ res->base.b.width0 = ctx->fb_state.width;
+ res->base.b.height0 = ctx->fb_state.height;
+ pipe_resource_reference(&pz, NULL);
+
+ ctx->fb_state.zsbuf->width = ctx->fb_state.width;
+ ctx->fb_state.zsbuf->height = ctx->fb_state.height;
+ struct pipe_surface *psurf = ctx->base.create_surface(&ctx->base, &res->base.b, ctx->fb_state.zsbuf);
+ struct zink_ctx_surface *cz = (struct zink_ctx_surface*)psurf;
+
+ /* oh god why */
+ zink_surface_reference(screen, &csurf->surf, cz->surf);
+ pipe_surface_release(&ctx->base, &psurf);
+}
+
+bool
+zink_kopper_check(struct pipe_resource *pres)
+{
+ struct zink_resource *res = zink_resource(pres);
+ assert(pres->bind & PIPE_BIND_DISPLAY_TARGET);
+ if (!res->obj->dt)
+ return false;
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ return !cdt->is_kill;
+}
+
+void
+zink_kopper_set_swap_interval(struct pipe_screen *pscreen, struct pipe_resource *pres, int interval)
+{
+ struct zink_resource *res = zink_resource(pres);
+ struct zink_screen *screen = zink_screen(pscreen);
+ assert(res->obj->dt);
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ VkPresentModeKHR old_present_mode = cdt->present_mode;
+
+ zink_kopper_set_present_mode_for_interval(cdt, interval);
+
+ if (old_present_mode != cdt->present_mode)
+ update_swapchain(screen, cdt, cdt->caps.currentExtent.width, cdt->caps.currentExtent.height);
+}
+
+int
+zink_kopper_query_buffer_age(struct pipe_context *pctx, struct pipe_resource *pres)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_resource *res = zink_resource(pres);
+ assert(res->obj->dt);
+ struct kopper_displaytarget *cdt = res->obj->dt;
+
+ ctx = zink_tc_context_unwrap(pctx, zink_screen(pctx->screen)->threaded);
+
+ /* Returning 0 here isn't ideal (yes, the buffer is undefined, because you
+ * lost it) but threading the error up is more hassle than it's worth.
+ */
+ if (!zink_kopper_acquired(res->obj->dt, res->obj->dt_idx))
+ if (!zink_kopper_acquire(ctx, res, UINT64_MAX))
+ return 0;
+
+ return cdt->swapchain->images[res->obj->dt_idx].age;
+}
+
+static void
+swapchain_prune_batch_usage(struct kopper_swapchain *cswap, const struct zink_batch_usage *u)
+{
+ if (cswap->batch_uses == u)
+ cswap->batch_uses = NULL;
+}
+
+void
+zink_kopper_prune_batch_usage(struct kopper_displaytarget *cdt, const struct zink_batch_usage *u)
+{
+ struct kopper_swapchain *cswap = cdt->swapchain;
+ swapchain_prune_batch_usage(cswap, u);
+ for (cswap = cdt->old_swapchain; cswap; cswap = cswap->next)
+ swapchain_prune_batch_usage(cswap, u);
+}
diff --git a/src/gallium/drivers/zink/zink_kopper.h b/src/gallium/drivers/zink/zink_kopper.h
new file mode 100644
index 00000000000..89106975266
--- /dev/null
+++ b/src/gallium/drivers/zink/zink_kopper.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright © 2021 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+ */
+
+#ifndef ZINK_KOPPER_H
+#define ZINK_KOPPER_H
+
+#include "kopper_interface.h"
+#include "util/u_queue.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct zink_batch_usage;
+
+/* number of times a swapchain can be read without forcing readback mode */
+#define ZINK_READBACK_THRESHOLD 3
+
+struct kopper_swapchain_image {
+ bool init;
+ bool readback_needs_update;
+ bool dt_has_data;
+ int age;
+ VkImage image;
+ struct zink_resource *acquired;
+ struct pipe_resource *readback;
+ VkSemaphore acquire;
+ VkImageLayout layout;
+};
+
+struct kopper_swapchain {
+ struct kopper_swapchain *next;
+ VkSwapchainKHR swapchain;
+
+ unsigned last_present;
+ unsigned num_images;
+ uint32_t last_present_prune;
+ struct hash_table *presents;
+ VkSwapchainCreateInfoKHR scci;
+ unsigned num_acquires;
+ unsigned max_acquires;
+ unsigned async_presents;
+ struct util_queue_fence present_fence;
+ struct zink_batch_usage *batch_uses;
+ struct kopper_swapchain_image *images;
+};
+
+enum kopper_type {
+ KOPPER_X11,
+ KOPPER_WAYLAND,
+ KOPPER_WIN32
+};
+
+struct kopper_displaytarget
+{
+ unsigned refcount;
+ VkFormat formats[2];
+ unsigned width;
+ unsigned height;
+ unsigned stride;
+ void *loader_private;
+
+ VkSurfaceKHR surface;
+ uint32_t present_modes; //VkPresentModeKHR bitmask
+ struct kopper_swapchain *swapchain;
+ struct kopper_swapchain *old_swapchain;
+
+ struct kopper_loader_info info;
+
+ VkSurfaceCapabilitiesKHR caps;
+ VkImageFormatListCreateInfo format_list;
+ enum kopper_type type;
+ bool is_kill;
+ VkPresentModeKHR present_mode;
+ unsigned readback_counter;
+
+ bool age_locked; //disables buffer age during readback
+};
+
+struct zink_kopper_present_info {
+ VkPresentInfoKHR info;
+ VkPresentRegionsKHR rinfo;
+ VkPresentRegionKHR region;
+ VkRectLayerKHR regions[64];
+ uint32_t image;
+ struct kopper_swapchain *swapchain;
+ struct zink_resource *res;
+ VkSemaphore sem;
+ bool indefinite_acquire;
+};
+
+struct zink_context;
+struct zink_screen;
+struct zink_resource;
+
+static inline bool
+zink_kopper_has_srgb(const struct kopper_displaytarget *cdt)
+{
+ return cdt->formats[1] != VK_FORMAT_UNDEFINED;
+}
+
+static inline bool
+zink_kopper_last_present_eq(const struct kopper_displaytarget *cdt, uint32_t idx)
+{
+ return cdt->swapchain->last_present == idx;
+}
+
+static inline bool
+zink_kopper_acquired(const struct kopper_displaytarget *cdt, uint32_t idx)
+{
+ return idx != UINT32_MAX && cdt->swapchain->images[idx].acquired;
+}
+
+void
+zink_kopper_update_last_written(struct zink_resource *res);
+
+struct kopper_displaytarget *
+zink_kopper_displaytarget_create(struct zink_screen *screen, unsigned tex_usage,
+ enum pipe_format format, unsigned width,
+ unsigned height, unsigned alignment,
+ const void *loader_private, unsigned *stride);
+void
+zink_kopper_displaytarget_destroy(struct zink_screen *screen, struct kopper_displaytarget *cdt);
+
+
+bool
+zink_kopper_acquire(struct zink_context *ctx, struct zink_resource *res, uint64_t timeout);
+VkSemaphore
+zink_kopper_acquire_submit(struct zink_screen *screen, struct zink_resource *res);
+VkSemaphore
+zink_kopper_present(struct zink_screen *screen, struct zink_resource *res);
+void
+zink_kopper_present_queue(struct zink_screen *screen, struct zink_resource *res, unsigned nrects, struct pipe_box *boxes);
+bool
+zink_kopper_acquire_readback(struct zink_context *ctx, struct zink_resource *res, struct zink_resource **readback);
+bool
+zink_kopper_present_readback(struct zink_context *ctx, struct zink_resource *res);
+void
+zink_kopper_readback_update(struct zink_context *ctx, struct zink_resource *res);
+void
+zink_kopper_deinit_displaytarget(struct zink_screen *screen, struct kopper_displaytarget *cdt);
+bool
+zink_kopper_update(struct pipe_screen *pscreen, struct pipe_resource *pres, int *w, int *h);
+bool
+zink_kopper_is_cpu(const struct pipe_screen *pscreen);
+void
+zink_kopper_fixup_depth_buffer(struct zink_context *ctx);
+bool
+zink_kopper_check(struct pipe_resource *pres);
+void
+zink_kopper_set_swap_interval(struct pipe_screen *pscreen, struct pipe_resource *pres, int interval);
+int
+zink_kopper_query_buffer_age(struct pipe_context *pctx, struct pipe_resource *pres);
+void
+zink_kopper_prune_batch_usage(struct kopper_displaytarget *cdt, const struct zink_batch_usage *u);
+void
+zink_kopper_set_readback_needs_update(struct zink_resource *res);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/zink/zink_lower_cubemap_to_array.c b/src/gallium/drivers/zink/zink_lower_cubemap_to_array.c
new file mode 100644
index 00000000000..55a8425f130
--- /dev/null
+++ b/src/gallium/drivers/zink/zink_lower_cubemap_to_array.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright © Microsoft Corporation
+ * Copyright © 2022 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir_builder.h"
+#include "nir_builtin_builder.h"
+
+
+static const struct glsl_type *
+make_2darray_sampler_from_cubemap(const struct glsl_type *type)
+{
+ return glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ?
+ glsl_sampler_type(
+ GLSL_SAMPLER_DIM_2D,
+ false, true,
+ glsl_get_sampler_result_type(type)) : type;
+}
+
+static const struct glsl_type *
+make_2darray_from_cubemap_with_array(const struct glsl_type *type)
+{
+ if (glsl_type_is_array(type)) {
+ const struct glsl_type *new_type = glsl_without_array(type);
+ return new_type != type ? glsl_array_type(make_2darray_from_cubemap_with_array(glsl_without_array(type)),
+ glsl_get_length(type), 0) : type;
+ }
+ return make_2darray_sampler_from_cubemap(type);
+}
+
+static bool
+lower_cubemap_to_array_filter(const nir_instr *instr, const void *mask)
+{
+ const uint32_t *nonseamless_cube_mask = mask;
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+ if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
+ return false;
+
+ switch (tex->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_txd:
+ case nir_texop_txl:
+ case nir_texop_txs:
+ case nir_texop_lod:
+ case nir_texop_tg4:
+ break;
+ default:
+ return false;
+ }
+ return (BITFIELD_BIT(tex->sampler_index) & (*nonseamless_cube_mask)) != 0;
+ }
+
+ return false;
+}
+
+typedef struct {
+ nir_def *rx;
+ nir_def *ry;
+ nir_def *rz;
+ nir_def *arx;
+ nir_def *ary;
+ nir_def *arz;
+ nir_def *array;
+} coord_t;
+
+
+/* This is taken from from sp_tex_sample:convert_cube */
+static nir_def *
+evaluate_face_x(nir_builder *b, coord_t *coord)
+{
+ nir_def *sign = nir_fsign(b, coord->rx);
+ nir_def *positive = nir_fge_imm(b, coord->rx, 0.0);
+ nir_def *ima = nir_fdiv(b, nir_imm_float(b, -0.5), coord->arx);
+
+ nir_def *x = nir_fadd_imm(b, nir_fmul(b, nir_fmul(b, sign, ima), coord->rz), 0.5);
+ nir_def *y = nir_fadd_imm(b, nir_fmul(b, ima, coord->ry), 0.5);
+ nir_def *face = nir_bcsel(b, positive, nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
+
+ if (coord->array)
+ face = nir_fadd(b, face, coord->array);
+
+ return nir_vec3(b, x,y, face);
+}
+
+static nir_def *
+evaluate_face_y(nir_builder *b, coord_t *coord)
+{
+ nir_def *sign = nir_fsign(b, coord->ry);
+ nir_def *positive = nir_fge_imm(b, coord->ry, 0.0);
+ nir_def *ima = nir_fdiv(b, nir_imm_float(b, 0.5), coord->ary);
+
+ nir_def *x = nir_fadd_imm(b, nir_fmul(b, ima, coord->rx), 0.5);
+ nir_def *y = nir_fadd_imm(b, nir_fmul(b, nir_fmul(b, sign, ima), coord->rz), 0.5);
+ nir_def *face = nir_bcsel(b, positive, nir_imm_float(b, 2.0), nir_imm_float(b, 3.0));
+
+ if (coord->array)
+ face = nir_fadd(b, face, coord->array);
+
+ return nir_vec3(b, x,y, face);
+}
+
+static nir_def *
+evaluate_face_z(nir_builder *b, coord_t *coord)
+{
+ nir_def *sign = nir_fsign(b, coord->rz);
+ nir_def *positive = nir_fge_imm(b, coord->rz, 0.0);
+ nir_def *ima = nir_fdiv(b, nir_imm_float(b, -0.5), coord->arz);
+
+ nir_def *x = nir_fadd_imm(b, nir_fmul(b, nir_fmul(b, sign, ima), nir_fneg(b, coord->rx)), 0.5);
+ nir_def *y = nir_fadd_imm(b, nir_fmul(b, ima, coord->ry), 0.5);
+ nir_def *face = nir_bcsel(b, positive, nir_imm_float(b, 4.0), nir_imm_float(b, 5.0));
+
+ if (coord->array)
+ face = nir_fadd(b, face, coord->array);
+
+ return nir_vec3(b, x,y, face);
+}
+
+static nir_def *
+create_array_tex_from_cube_tex(nir_builder *b, nir_tex_instr *tex, nir_def *coord, nir_texop op)
+{
+ nir_tex_instr *array_tex;
+
+ unsigned num_srcs = tex->num_srcs;
+ if (op == nir_texop_txf && nir_tex_instr_src_index(tex, nir_tex_src_comparator) != -1)
+ num_srcs--;
+ array_tex = nir_tex_instr_create(b->shader, num_srcs);
+ array_tex->op = op;
+ array_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+ array_tex->is_array = true;
+ array_tex->is_shadow = tex->is_shadow;
+ array_tex->is_sparse = tex->is_sparse;
+ array_tex->is_new_style_shadow = tex->is_new_style_shadow;
+ array_tex->texture_index = tex->texture_index;
+ array_tex->sampler_index = tex->sampler_index;
+ array_tex->dest_type = tex->dest_type;
+ array_tex->coord_components = 3;
+
+ nir_src coord_src = nir_src_for_ssa(coord);
+ unsigned s = 0;
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ if (op == nir_texop_txf && tex->src[i].src_type == nir_tex_src_comparator)
+ continue;
+ nir_src *psrc = (tex->src[i].src_type == nir_tex_src_coord) ?
+ &coord_src : &tex->src[i].src;
+
+ array_tex->src[s].src_type = tex->src[i].src_type;
+ if (psrc->ssa->num_components != nir_tex_instr_src_size(array_tex, s)) {
+ nir_def *c = nir_trim_vector(b, psrc->ssa,
+ nir_tex_instr_src_size(array_tex, s));
+ array_tex->src[s].src = nir_src_for_ssa(c);
+ } else
+ array_tex->src[s].src = nir_src_for_ssa(psrc->ssa);
+ s++;
+ }
+
+ nir_def_init(&array_tex->instr, &array_tex->def,
+ nir_tex_instr_dest_size(array_tex),
+ tex->def.bit_size);
+ nir_builder_instr_insert(b, &array_tex->instr);
+ return &array_tex->def;
+}
+
+static nir_def *
+handle_cube_edge(nir_builder *b, nir_def *x, nir_def *y, nir_def *face, nir_def *array_slice_cube_base, nir_def *tex_size)
+{
+ enum cube_remap
+ {
+ cube_remap_zero = 0,
+ cube_remap_x,
+ cube_remap_y,
+ cube_remap_tex_size,
+ cube_remap_tex_size_minus_x,
+ cube_remap_tex_size_minus_y,
+
+ cube_remap_size,
+ };
+
+ struct cube_remap_table
+ {
+ enum cube_remap remap_x;
+ enum cube_remap remap_y;
+ uint32_t remap_face;
+ };
+
+ static const struct cube_remap_table cube_remap_neg_x[6] =
+ {
+ {cube_remap_tex_size, cube_remap_y, 4},
+ {cube_remap_tex_size, cube_remap_y, 5},
+ {cube_remap_y, cube_remap_zero, 1},
+ {cube_remap_tex_size_minus_y, cube_remap_tex_size, 1},
+ {cube_remap_tex_size, cube_remap_y, 1},
+ {cube_remap_tex_size, cube_remap_y, 0},
+ };
+
+ static const struct cube_remap_table cube_remap_pos_x[6] =
+ {
+ {cube_remap_zero, cube_remap_y, 5},
+ {cube_remap_zero, cube_remap_y, 4},
+ {cube_remap_tex_size_minus_y, cube_remap_zero, 0},
+ {cube_remap_y, cube_remap_tex_size, 0},
+ {cube_remap_zero, cube_remap_y, 0},
+ {cube_remap_zero, cube_remap_y, 1},
+ };
+
+ static const struct cube_remap_table cube_remap_neg_y[6] =
+ {
+ {cube_remap_tex_size, cube_remap_tex_size_minus_x, 2},
+ {cube_remap_zero, cube_remap_x, 2},
+ {cube_remap_tex_size_minus_x, cube_remap_zero, 5},
+ {cube_remap_x, cube_remap_tex_size, 4},
+ {cube_remap_x, cube_remap_tex_size, 2},
+ {cube_remap_tex_size_minus_x, cube_remap_zero, 2},
+ };
+
+ static const struct cube_remap_table cube_remap_pos_y[6] =
+ {
+ {cube_remap_tex_size, cube_remap_x, 3},
+ {cube_remap_zero, cube_remap_tex_size_minus_x, 3},
+ {cube_remap_x, cube_remap_zero, 4},
+ {cube_remap_tex_size_minus_x, cube_remap_tex_size, 5},
+ {cube_remap_x, cube_remap_zero, 3},
+ {cube_remap_tex_size_minus_x, cube_remap_tex_size, 3},
+ };
+
+ static const struct cube_remap_table* remap_tables[4] = {
+ cube_remap_neg_x,
+ cube_remap_pos_x,
+ cube_remap_neg_y,
+ cube_remap_pos_y
+ };
+
+ nir_def *zero = nir_imm_int(b, 0);
+
+ /* Doesn't matter since the texture is square */
+ tex_size = nir_channel(b, tex_size, 0);
+
+ nir_def *x_on = nir_iand(b, nir_ige(b, x, zero), nir_ige(b, tex_size, x));
+ nir_def *y_on = nir_iand(b, nir_ige(b, y, zero), nir_ige(b, tex_size, y));
+ nir_def *one_on = nir_ixor(b, x_on, y_on);
+
+ /* If the sample did not fall off the face in either dimension, then set output = input */
+ nir_def *x_result = x;
+ nir_def *y_result = y;
+ nir_def *face_result = face;
+
+ /* otherwise, if the sample fell off the face in either the X or the Y direction, remap to the new face */
+ nir_def *remap_predicates[4] =
+ {
+ nir_iand(b, one_on, nir_ilt(b, x, zero)),
+ nir_iand(b, one_on, nir_ilt(b, tex_size, x)),
+ nir_iand(b, one_on, nir_ilt(b, y, zero)),
+ nir_iand(b, one_on, nir_ilt(b, tex_size, y)),
+ };
+
+ nir_def *remap_array[cube_remap_size];
+
+ remap_array[cube_remap_zero] = zero;
+ remap_array[cube_remap_x] = x;
+ remap_array[cube_remap_y] = y;
+ remap_array[cube_remap_tex_size] = tex_size;
+ remap_array[cube_remap_tex_size_minus_x] = nir_isub(b, tex_size, x);
+ remap_array[cube_remap_tex_size_minus_y] = nir_isub(b, tex_size, y);
+
+ /* For each possible way the sample could have fallen off */
+ for (unsigned i = 0; i < 4; i++) {
+ const struct cube_remap_table* remap_table = remap_tables[i];
+
+ /* For each possible original face */
+ for (unsigned j = 0; j < 6; j++) {
+ nir_def *predicate = nir_iand(b, remap_predicates[i], nir_ieq_imm(b, face, j));
+
+ x_result = nir_bcsel(b, predicate, remap_array[remap_table[j].remap_x], x_result);
+ y_result = nir_bcsel(b, predicate, remap_array[remap_table[j].remap_y], y_result);
+ face_result = nir_bcsel(b, predicate, remap_array[remap_table[j].remap_face], face_result);
+ }
+ }
+
+ return nir_vec3(b, x_result, y_result, nir_iadd(b, face_result, array_slice_cube_base));
+}
+
+static nir_def *
+handle_cube_gather(nir_builder *b, nir_tex_instr *tex, nir_def *coord)
+{
+ tex->is_array = true;
+ nir_def *tex_size = nir_get_texture_size(b, tex);
+
+ /* nir_get_texture_size puts the cursor before the tex op */
+ b->cursor = nir_after_instr(coord->parent_instr);
+
+ nir_def *const_05 = nir_imm_float(b, 0.5f);
+ nir_def *texel_coords = nir_fmul(b, nir_trim_vector(b, coord, 2),
+ nir_i2f32(b, nir_trim_vector(b, tex_size, 2)));
+
+ nir_def *x_orig = nir_channel(b, texel_coords, 0);
+ nir_def *y_orig = nir_channel(b, texel_coords, 1);
+
+ nir_def *x_pos = nir_f2i32(b, nir_fadd(b, x_orig, const_05));
+ nir_def *x_neg = nir_f2i32(b, nir_fsub(b, x_orig, const_05));
+ nir_def *y_pos = nir_f2i32(b, nir_fadd(b, y_orig, const_05));
+ nir_def *y_neg = nir_f2i32(b, nir_fsub(b, y_orig, const_05));
+ nir_def *coords[4][2] = {
+ { x_neg, y_pos },
+ { x_pos, y_pos },
+ { x_pos, y_neg },
+ { x_neg, y_neg },
+ };
+
+ nir_def *array_slice_2d = nir_f2i32(b, nir_channel(b, coord, 2));
+ nir_def *face = nir_imod_imm(b, array_slice_2d, 6);
+ nir_def *array_slice_cube_base = nir_isub(b, array_slice_2d, face);
+
+ nir_def *channels[4];
+ for (unsigned i = 0; i < 4; ++i) {
+ nir_def *final_coord = handle_cube_edge(b, coords[i][0], coords[i][1], face, array_slice_cube_base, tex_size);
+ nir_def *sampled_val = create_array_tex_from_cube_tex(b, tex, final_coord, nir_texop_txf);
+ channels[i] = nir_channel(b, sampled_val, tex->component);
+ }
+
+ return nir_vec(b, channels, 4);
+}
+
+static nir_def *
+lower_cube_coords(nir_builder *b, nir_def *coord, bool is_array)
+{
+ coord_t coords;
+ coords.rx = nir_channel(b, coord, 0);
+ coords.ry = nir_channel(b, coord, 1);
+ coords.rz = nir_channel(b, coord, 2);
+ coords.arx = nir_fabs(b, coords.rx);
+ coords.ary = nir_fabs(b, coords.ry);
+ coords.arz = nir_fabs(b, coords.rz);
+ coords.array = NULL;
+ if (is_array)
+ coords.array = nir_fmul_imm(b, nir_channel(b, coord, 3), 6.0f);
+
+ nir_def *use_face_x = nir_iand(b,
+ nir_fge(b, coords.arx, coords.ary),
+ nir_fge(b, coords.arx, coords.arz));
+
+ nir_if *use_face_x_if = nir_push_if(b, use_face_x);
+ nir_def *face_x_coord = evaluate_face_x(b, &coords);
+ nir_if *use_face_x_else = nir_push_else(b, use_face_x_if);
+
+ nir_def *use_face_y = nir_iand(b,
+ nir_fge(b, coords.ary, coords.arx),
+ nir_fge(b, coords.ary, coords.arz));
+
+ nir_if *use_face_y_if = nir_push_if(b, use_face_y);
+ nir_def *face_y_coord = evaluate_face_y(b, &coords);
+ nir_if *use_face_y_else = nir_push_else(b, use_face_y_if);
+
+ nir_def *face_z_coord = evaluate_face_z(b, &coords);
+
+ nir_pop_if(b, use_face_y_else);
+ nir_def *face_y_or_z_coord = nir_if_phi(b, face_y_coord, face_z_coord);
+ nir_pop_if(b, use_face_x_else);
+
+ // This contains in xy the normalized sample coordinates, and in z the face index
+ nir_def *coord_and_face = nir_if_phi(b, face_x_coord, face_y_or_z_coord);
+
+ return coord_and_face;
+}
+
+static void
+rewrite_cube_var_type(nir_builder *b, nir_tex_instr *tex)
+{
+ unsigned index = tex->texture_index;
+ nir_variable *sampler = NULL;
+ int highest = -1;
+ nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) {
+ if (!glsl_type_is_sampler(glsl_without_array(var->type)))
+ continue;
+ unsigned size = glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
+ if (var->data.driver_location == index ||
+ (var->data.driver_location < index && var->data.driver_location + size > index)) {
+ sampler = var;
+ break;
+ }
+ /* handle array sampler access: use the next-closest sampler */
+ if (var->data.driver_location > highest && var->data.driver_location < index) {
+ highest = var->data.driver_location;
+ sampler = var;
+ }
+ }
+ assert(sampler);
+ sampler->type = make_2darray_from_cubemap_with_array(sampler->type);
+}
+
+/* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */
+/* tex(s, coord) = txl(s, coord, lod(s, coord).x) */
+static nir_tex_instr *
+lower_tex_to_txl(nir_builder *b, nir_tex_instr *tex)
+{
+ b->cursor = nir_after_instr(&tex->instr);
+ int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
+ unsigned num_srcs = bias_idx >= 0 ? tex->num_srcs : tex->num_srcs + 1;
+ nir_tex_instr *txl = nir_tex_instr_create(b->shader, num_srcs);
+
+ txl->op = nir_texop_txl;
+ txl->sampler_dim = tex->sampler_dim;
+ txl->dest_type = tex->dest_type;
+ txl->coord_components = tex->coord_components;
+ txl->texture_index = tex->texture_index;
+ txl->sampler_index = tex->sampler_index;
+ txl->is_array = tex->is_array;
+ txl->is_shadow = tex->is_shadow;
+ txl->is_sparse = tex->is_sparse;
+ txl->is_new_style_shadow = tex->is_new_style_shadow;
+
+ unsigned s = 0;
+ for (int i = 0; i < tex->num_srcs; i++) {
+ if (i == bias_idx)
+ continue;
+ txl->src[s].src = nir_src_for_ssa(tex->src[i].src.ssa);
+ txl->src[s].src_type = tex->src[i].src_type;
+ s++;
+ }
+ nir_def *lod = nir_get_texture_lod(b, tex);
+
+ if (bias_idx >= 0)
+ lod = nir_fadd(b, lod, tex->src[bias_idx].src.ssa);
+ lod = nir_fadd_imm(b, lod, -1.0);
+ txl->src[s] = nir_tex_src_for_ssa(nir_tex_src_lod, lod);
+
+ b->cursor = nir_before_instr(&tex->instr);
+ nir_def_init(&txl->instr, &txl->def,
+ tex->def.num_components,
+ tex->def.bit_size);
+ nir_builder_instr_insert(b, &txl->instr);
+ nir_def_rewrite_uses(&tex->def, &txl->def);
+ return txl;
+}
+
+static nir_def *
+lower_cube_sample(nir_builder *b, nir_tex_instr *tex)
+{
+ if (!tex->is_shadow && (tex->op == nir_texop_txb || tex->op == nir_texop_tex)) {
+ tex = lower_tex_to_txl(b, tex);
+ }
+
+ int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+ assert(coord_index >= 0);
+
+ /* Evaluate the face and the xy coordinates for a 2D tex op */
+ nir_def *coord = tex->src[coord_index].src.ssa;
+ nir_def *coord_and_face = lower_cube_coords(b, coord, tex->is_array);
+
+ rewrite_cube_var_type(b, tex);
+
+ if (tex->op == nir_texop_tg4 && !tex->is_shadow)
+ return handle_cube_gather(b, tex, coord_and_face);
+ else
+ return create_array_tex_from_cube_tex(b, tex, coord_and_face, tex->op);
+}
+
+static nir_def *
+lower_cube_txs(nir_builder *b, nir_tex_instr *tex)
+{
+ b->cursor = nir_after_instr(&tex->instr);
+
+ rewrite_cube_var_type(b, tex);
+ unsigned num_components = tex->def.num_components;
+ /* force max components to unbreak textureSize().xy */
+ tex->def.num_components = 3;
+ tex->is_array = true;
+ nir_def *array_dim = nir_channel(b, &tex->def, 2);
+ nir_def *cube_array_dim = nir_idiv(b, array_dim, nir_imm_int(b, 6));
+ nir_def *size = nir_vec3(b, nir_channel(b, &tex->def, 0),
+ nir_channel(b, &tex->def, 1),
+ cube_array_dim);
+ return nir_trim_vector(b, size, num_components);
+}
+
+static nir_def *
+lower_cubemap_to_array_tex(nir_builder *b, nir_tex_instr *tex)
+{
+ switch (tex->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_txd:
+ case nir_texop_txl:
+ case nir_texop_lod:
+ case nir_texop_tg4:
+ return lower_cube_sample(b, tex);
+ case nir_texop_txs:
+ return lower_cube_txs(b, tex);
+ default:
+ unreachable("Unsupported cupe map texture operation");
+ }
+}
+
+static nir_def *
+lower_cubemap_to_array_impl(nir_builder *b, nir_instr *instr,
+ UNUSED void *_options)
+{
+ if (instr->type == nir_instr_type_tex)
+ return lower_cubemap_to_array_tex(b, nir_instr_as_tex(instr));
+ return NULL;
+}
+
+bool
+zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
+bool
+zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask)
+{
+ return nir_shader_lower_instructions(s,
+ lower_cubemap_to_array_filter,
+ lower_cubemap_to_array_impl,
+ &nonseamless_cube_mask);
+}
diff --git a/src/gallium/drivers/zink/zink_pipeline.c b/src/gallium/drivers/zink/zink_pipeline.c
index b16b64701dc..80d2f5479ce 100644
--- a/src/gallium/drivers/zink/zink_pipeline.c
+++ b/src/gallium/drivers/zink/zink_pipeline.c
@@ -26,6 +26,7 @@
#include "zink_pipeline.h"
#include "zink_compiler.h"
+#include "nir_to_spirv/nir_to_spirv.h"
#include "zink_context.h"
#include "zink_program.h"
#include "zink_render_pass.h"
@@ -35,35 +36,37 @@
#include "util/u_debug.h"
#include "util/u_prim.h"
-static VkBlendFactor
-clamp_void_blend_factor(VkBlendFactor f)
-{
- if (f == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA)
- return VK_BLEND_FACTOR_ZERO;
- if (f == VK_BLEND_FACTOR_DST_ALPHA)
- return VK_BLEND_FACTOR_ONE;
- return f;
-}
-
VkPipeline
zink_create_gfx_pipeline(struct zink_screen *screen,
struct zink_gfx_program *prog,
+ struct zink_shader_object *objs,
struct zink_gfx_pipeline_state *state,
- VkPrimitiveTopology primitive_topology)
+ const uint8_t *binding_map,
+ VkPrimitiveTopology primitive_topology,
+ bool optimize,
+ struct util_dynarray *dgc)
{
- struct zink_rasterizer_hw_state *hw_rast_state = (void*)state;
+ struct zink_rasterizer_hw_state *hw_rast_state = (void*)&state->dyn_state3;
VkPipelineVertexInputStateCreateInfo vertex_input_state;
- if (!screen->info.have_EXT_vertex_input_dynamic_state) {
+ bool needs_vi = !screen->info.have_EXT_vertex_input_dynamic_state;
+ if (needs_vi) {
memset(&vertex_input_state, 0, sizeof(vertex_input_state));
vertex_input_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
vertex_input_state.pVertexBindingDescriptions = state->element_state->b.bindings;
vertex_input_state.vertexBindingDescriptionCount = state->element_state->num_bindings;
vertex_input_state.pVertexAttributeDescriptions = state->element_state->attribs;
vertex_input_state.vertexAttributeDescriptionCount = state->element_state->num_attribs;
+ if (!screen->info.have_EXT_extended_dynamic_state || !state->uses_dynamic_stride) {
+ for (int i = 0; i < state->element_state->num_bindings; ++i) {
+ const unsigned buffer_id = binding_map[i];
+ VkVertexInputBindingDescription *binding = &state->element_state->b.bindings[i];
+ binding->stride = state->vertex_strides[buffer_id];
+ }
+ }
}
VkPipelineVertexInputDivisorStateCreateInfoEXT vdiv_state;
- if (!screen->info.have_EXT_vertex_input_dynamic_state && state->element_state->b.divisors_present) {
+ if (needs_vi && state->element_state->b.divisors_present) {
memset(&vdiv_state, 0, sizeof(vdiv_state));
vertex_input_state.pNext = &vdiv_state;
vdiv_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT;
@@ -78,72 +81,87 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
switch (primitive_topology) {
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ if (screen->info.have_EXT_primitive_topology_list_restart) {
+ primitive_state.primitiveRestartEnable = state->dyn_state2.primitive_restart ? VK_TRUE : VK_FALSE;
+ break;
+ }
+ FALLTHROUGH;
case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
- if (state->primitive_restart)
- debug_printf("restart_index set with unsupported primitive topology %u\n", primitive_topology);
+ if (state->dyn_state2.primitive_restart)
+ mesa_loge("zink: restart_index set with unsupported primitive topology %s\n", vk_PrimitiveTopology_to_str(primitive_topology));
primitive_state.primitiveRestartEnable = VK_FALSE;
break;
default:
- primitive_state.primitiveRestartEnable = state->primitive_restart ? VK_TRUE : VK_FALSE;
+ primitive_state.primitiveRestartEnable = state->dyn_state2.primitive_restart ? VK_TRUE : VK_FALSE;
}
}
- VkPipelineColorBlendAttachmentState blend_att[PIPE_MAX_COLOR_BUFS];
VkPipelineColorBlendStateCreateInfo blend_state = {0};
blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
if (state->blend_state) {
- unsigned num_attachments = state->render_pass->state.num_rts;
- if (state->render_pass->state.have_zsbuf)
+ unsigned num_attachments = state->render_pass ?
+ state->render_pass->state.num_rts :
+ state->rendering_info.colorAttachmentCount;
+ if (state->render_pass && state->render_pass->state.have_zsbuf)
num_attachments--;
- if (state->void_alpha_attachments) {
- for (unsigned i = 0; i < num_attachments; i++) {
- blend_att[i] = state->blend_state->attachments[i];
- if (state->void_alpha_attachments & BITFIELD_BIT(i)) {
- blend_att[i].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
- blend_att[i].srcColorBlendFactor = clamp_void_blend_factor(blend_att[i].srcColorBlendFactor);
- blend_att[i].dstColorBlendFactor = clamp_void_blend_factor(blend_att[i].dstColorBlendFactor);
- }
- }
- blend_state.pAttachments = blend_att;
- } else
- blend_state.pAttachments = state->blend_state->attachments;
+ blend_state.pAttachments = state->blend_state->attachments;
blend_state.attachmentCount = num_attachments;
blend_state.logicOpEnable = state->blend_state->logicop_enable;
blend_state.logicOp = state->blend_state->logicop_func;
}
+ if (state->rast_attachment_order)
+ blend_state.flags |= VK_PIPELINE_COLOR_BLEND_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_BIT_EXT;
VkPipelineMultisampleStateCreateInfo ms_state = {0};
ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
ms_state.rasterizationSamples = state->rast_samples + 1;
if (state->blend_state) {
ms_state.alphaToCoverageEnable = state->blend_state->alpha_to_coverage;
- if (state->blend_state->alpha_to_one && !screen->info.feats.features.alphaToOne)
- warn_missing_feature("alphaToOne");
+ if (state->blend_state->alpha_to_one && !screen->info.feats.features.alphaToOne) {
+ static bool warned = false;
+ warn_missing_feature(warned, "alphaToOne");
+ }
ms_state.alphaToOneEnable = state->blend_state->alpha_to_one;
}
- ms_state.pSampleMask = state->sample_mask ? &state->sample_mask : NULL;
- if (hw_rast_state->force_persample_interp) {
+ /* "If pSampleMask is NULL, it is treated as if the mask has all bits set to 1."
+ * - Chapter 27. Rasterization
+ *
+ * thus it never makes sense to leave this as NULL since gallium will provide correct
+ * data here as long as sample_mask is initialized on context creation
+ */
+ ms_state.pSampleMask = &state->sample_mask;
+ if (state->force_persample_interp) {
ms_state.sampleShadingEnable = VK_TRUE;
ms_state.minSampleShading = 1.0;
+ } else if (state->min_samples > 0) {
+ ms_state.sampleShadingEnable = VK_TRUE;
+ ms_state.minSampleShading = (float)(state->rast_samples + 1) / (state->min_samples + 1);
}
VkPipelineViewportStateCreateInfo viewport_state = {0};
+ VkPipelineViewportDepthClipControlCreateInfoEXT clip = {
+ VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT,
+ NULL,
+ VK_TRUE
+ };
viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
viewport_state.viewportCount = screen->info.have_EXT_extended_dynamic_state ? 0 : state->dyn_state1.num_viewports;
viewport_state.pViewports = NULL;
viewport_state.scissorCount = screen->info.have_EXT_extended_dynamic_state ? 0 : state->dyn_state1.num_viewports;
viewport_state.pScissors = NULL;
+ if (screen->info.have_EXT_depth_clip_control && !hw_rast_state->clip_halfz)
+ viewport_state.pNext = &clip;
VkPipelineRasterizationStateCreateInfo rast_state = {0};
rast_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rast_state.depthClampEnable = hw_rast_state->depth_clamp;
- rast_state.rasterizerDiscardEnable = hw_rast_state->rasterizer_discard;
+ rast_state.rasterizerDiscardEnable = state->dyn_state2.rasterizer_discard;
rast_state.polygonMode = hw_rast_state->polygon_mode;
- rast_state.cullMode = hw_rast_state->cull_mode;
+ rast_state.cullMode = state->dyn_state1.cull_mode;
rast_state.frontFace = state->dyn_state1.front_face;
rast_state.depthBiasEnable = VK_TRUE;
@@ -152,6 +170,17 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
rast_state.depthBiasSlopeFactor = 0.0;
rast_state.lineWidth = 1.0f;
+ VkPipelineRasterizationDepthClipStateCreateInfoEXT depth_clip_state = {0};
+ depth_clip_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT;
+ depth_clip_state.depthClipEnable = hw_rast_state->depth_clip;
+ if (screen->info.have_EXT_depth_clip_enable) {
+ depth_clip_state.pNext = rast_state.pNext;
+ rast_state.pNext = &depth_clip_state;
+ } else {
+ static bool warned = false;
+ warn_missing_feature(warned, "VK_EXT_depth_clip_enable");
+ }
+
VkPipelineRasterizationProvokingVertexStateCreateInfoEXT pv_state;
pv_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT;
pv_state.provokingVertexMode = hw_rast_state->pv_last ?
@@ -174,7 +203,7 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
depth_stencil_state.back = state->dyn_state1.depth_stencil_alpha_state->stencil_back;
depth_stencil_state.depthWriteEnable = state->dyn_state1.depth_stencil_alpha_state->depth_write;
- VkDynamicState dynamicStateEnables[30] = {
+ VkDynamicState dynamicStateEnables[80] = {
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
@@ -182,19 +211,20 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
};
unsigned state_count = 4;
if (screen->info.have_EXT_extended_dynamic_state) {
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT;
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT;
dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS;
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT;
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT;
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT;
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_COMPARE_OP;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE;
dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK;
dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK;
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_OP_EXT;
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT;
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_FRONT_FACE_EXT;
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_OP;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_FRONT_FACE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_CULL_MODE;
if (state->sample_locations_enabled)
dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT;
} else {
@@ -203,35 +233,134 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
}
if (screen->info.have_EXT_vertex_input_dynamic_state)
dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_EXT;
- else if (screen->info.have_EXT_extended_dynamic_state)
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT;
- if (screen->info.have_EXT_extended_dynamic_state2)
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT;
+ else if (screen->info.have_EXT_extended_dynamic_state && state->uses_dynamic_stride && state->element_state->num_attribs)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE;
+ if (screen->info.have_EXT_extended_dynamic_state2) {
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE;
+ if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT;
+ }
+ if (screen->info.have_EXT_extended_dynamic_state3) {
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_POLYGON_MODE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT;
+ if (!screen->driver_workarounds.no_linestipple) {
+ if (screen->info.dynamic_state3_feats.extendedDynamicState3LineStippleEnable)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_EXT;
+ }
+ if (screen->have_full_ds3) {
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SAMPLE_MASK_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_RASTERIZATION_SAMPLES_EXT;
+ if (state->blend_state) {
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LOGIC_OP_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT;
+ if (screen->info.feats.features.alphaToOne)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT;
+ if (state->rendering_info.colorAttachmentCount) {
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT;
+ }
+ }
+ }
+ }
+ if (screen->info.have_EXT_color_write_enable)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT;
+
+ assert(state->rast_prim != MESA_PRIM_COUNT || zink_debug & ZINK_DEBUG_SHADERDB);
VkPipelineRasterizationLineStateCreateInfoEXT rast_line_state;
- if (screen->info.have_EXT_line_rasterization) {
+ if (screen->info.have_EXT_line_rasterization &&
+ !state->shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.lower_line_smooth) {
rast_line_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT;
rast_line_state.pNext = rast_state.pNext;
rast_line_state.stippledLineEnable = VK_FALSE;
- rast_line_state.lineRasterizationMode = hw_rast_state->line_mode;
+ rast_line_state.lineRasterizationMode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
+
+ if (state->rast_prim == MESA_PRIM_LINES) {
+ const char *features[4][2] = {
+ [VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT] = {"",""},
+ [VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT] = {"rectangularLines", "stippledRectangularLines"},
+ [VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT] = {"bresenhamLines", "stippledBresenhamLines"},
+ [VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT] = {"smoothLines", "stippledSmoothLines"},
+ };
+ static bool warned[6] = {0};
+ const VkPhysicalDeviceLineRasterizationFeaturesEXT *line_feats = &screen->info.line_rast_feats;
+ /* line features can be represented as an array VkBool32[6],
+ * with the 3 base features preceding the 3 (matching) stippled features
+ */
+ const VkBool32 *feat = &line_feats->rectangularLines;
+ unsigned mode_idx = hw_rast_state->line_mode - VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
+ /* add base mode index, add 3 if stippling is enabled */
+ mode_idx += hw_rast_state->line_stipple_enable * 3;
+ if (*(feat + mode_idx))
+ rast_line_state.lineRasterizationMode = hw_rast_state->line_mode;
+ else if (hw_rast_state->line_stipple_enable &&
+ screen->driver_workarounds.no_linestipple) {
+ /* drop line stipple, we can emulate it */
+ mode_idx -= hw_rast_state->line_stipple_enable * 3;
+ if (*(feat + mode_idx))
+ rast_line_state.lineRasterizationMode = hw_rast_state->line_mode;
+ /* non-strictLine default lines are either parallelogram or bresenham which while not in GL spec,
+ * in practice end up being within the two-pixel exception in the GL spec.
+ */
+ else if ((mode_idx != 1) || screen->info.props.limits.strictLines)
+ warn_missing_feature(warned[mode_idx], features[hw_rast_state->line_mode][0]);
+ } else if ((mode_idx != 1) || screen->info.props.limits.strictLines)
+ warn_missing_feature(warned[mode_idx], features[hw_rast_state->line_mode][hw_rast_state->line_stipple_enable]);
+ }
if (hw_rast_state->line_stipple_enable) {
- dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_EXT;
+ if (!screen->info.have_EXT_extended_dynamic_state3)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_EXT;
rast_line_state.stippledLineEnable = VK_TRUE;
}
+
rast_state.pNext = &rast_line_state;
}
+ assert(state_count < ARRAY_SIZE(dynamicStateEnables));
VkPipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo = {0};
pipelineDynamicStateCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
pipelineDynamicStateCreateInfo.pDynamicStates = dynamicStateEnables;
- pipelineDynamicStateCreateInfo.dynamicStateCount = state_count;
VkGraphicsPipelineCreateInfo pci = {0};
pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
+ if (zink_debug & ZINK_DEBUG_SHADERDB)
+ pci.flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
+ if (!optimize)
+ pci.flags |= VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
+ if (screen->info.have_EXT_attachment_feedback_loop_dynamic_state) {
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT;
+ } else {
+ static bool feedback_warn = false;
+ if (state->feedback_loop) {
+ if (screen->info.have_EXT_attachment_feedback_loop_layout)
+ pci.flags |= VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
+ else
+ warn_missing_feature(feedback_warn, "EXT_attachment_feedback_loop_layout");
+ }
+ if (state->feedback_loop_zs) {
+ if (screen->info.have_EXT_attachment_feedback_loop_layout)
+ pci.flags |= VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
+ else
+ warn_missing_feature(feedback_warn, "EXT_attachment_feedback_loop_layout");
+ }
+ }
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
pci.layout = prog->base.layout;
- pci.renderPass = state->render_pass->render_pass;
- if (!screen->info.have_EXT_vertex_input_dynamic_state)
+ if (state->render_pass)
+ pci.renderPass = state->render_pass->render_pass;
+ else
+ pci.pNext = &state->rendering_info;
+ if (needs_vi)
pci.pVertexInputState = &vertex_input_state;
pci.pInputAssemblyState = &primitive_state;
pci.pRasterizationState = &rast_state;
@@ -240,29 +369,39 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
pci.pViewportState = &viewport_state;
pci.pDepthStencilState = &depth_stencil_state;
pci.pDynamicState = &pipelineDynamicStateCreateInfo;
+ pipelineDynamicStateCreateInfo.dynamicStateCount = state_count;
VkPipelineTessellationStateCreateInfo tci = {0};
VkPipelineTessellationDomainOriginStateCreateInfo tdci = {0};
- if (prog->shaders[PIPE_SHADER_TESS_CTRL] && prog->shaders[PIPE_SHADER_TESS_EVAL]) {
+ unsigned tess_bits = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
+ if ((prog->stages_present & tess_bits) == tess_bits) {
tci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
- tci.patchControlPoints = state->vertices_per_patch + 1;
+ tci.patchControlPoints = state->dyn_state2.vertices_per_patch;
pci.pTessellationState = &tci;
tci.pNext = &tdci;
tdci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO;
tdci.domainOrigin = VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT;
}
- VkPipelineShaderStageCreateInfo shader_stages[ZINK_SHADER_COUNT];
+ VkPipelineShaderStageCreateInfo shader_stages[ZINK_GFX_SHADER_COUNT];
+ VkShaderModuleCreateInfo smci[ZINK_GFX_SHADER_COUNT] = {0};
uint32_t num_stages = 0;
- for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
- if (!prog->modules[i])
+ for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
+ if (!(prog->stages_present & BITFIELD_BIT(i)))
continue;
VkPipelineShaderStageCreateInfo stage = {0};
stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
- stage.stage = zink_shader_stage(i);
- stage.module = prog->modules[i]->shader;
+ stage.stage = mesa_to_vk_shader_stage(i);
stage.pName = "main";
+ if (objs[i].mod) {
+ stage.module = objs[i].mod;
+ } else {
+ smci[i].sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+ stage.pNext = &smci[i];
+ smci[i].codeSize = objs[i].spirv->num_words * sizeof(uint32_t);
+ smci[i].pCode = objs[i].spirv->words;
+ }
shader_stages[num_stages++] = stage;
}
assert(num_stages > 0);
@@ -270,13 +409,39 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
pci.pStages = shader_stages;
pci.stageCount = num_stages;
- VkPipeline pipeline;
- if (vkCreateGraphicsPipelines(screen->dev, prog->base.pipeline_cache, 1, &pci,
- NULL, &pipeline) != VK_SUCCESS) {
- debug_printf("vkCreateGraphicsPipelines failed\n");
- return VK_NULL_HANDLE;
+ VkGraphicsShaderGroupCreateInfoNV gci = {
+ VK_STRUCTURE_TYPE_GRAPHICS_SHADER_GROUP_CREATE_INFO_NV,
+ NULL,
+ pci.stageCount,
+ pci.pStages,
+ pci.pVertexInputState,
+ pci.pTessellationState
+ };
+ VkGraphicsPipelineShaderGroupsCreateInfoNV dgci = {
+ VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_SHADER_GROUPS_CREATE_INFO_NV,
+ pci.pNext,
+ 1,
+ &gci,
+ dgc ? util_dynarray_num_elements(dgc, VkPipeline) : 0,
+ dgc ? dgc->data : NULL
+ };
+ if (zink_debug & ZINK_DEBUG_DGC) {
+ pci.flags |= VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV;
+ pci.pNext = &dgci;
}
+ VkPipeline pipeline;
+ u_rwlock_wrlock(&prog->base.pipeline_cache_lock);
+ VkResult result;
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline),
+ u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result));
+ return VK_NULL_HANDLE;
+ }
+ );
+
return pipeline;
}
@@ -286,38 +451,533 @@ zink_create_compute_pipeline(struct zink_screen *screen, struct zink_compute_pro
VkComputePipelineCreateInfo pci = {0};
pci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
pci.layout = comp->base.layout;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
VkPipelineShaderStageCreateInfo stage = {0};
stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
- stage.module = comp->module->shader;
+ stage.module = comp->curr->obj.mod;
stage.pName = "main";
VkSpecializationInfo sinfo = {0};
- VkSpecializationMapEntry me[3];
- if (state->use_local_size) {
- stage.pSpecializationInfo = &sinfo;
- sinfo.mapEntryCount = 3;
- sinfo.pMapEntries = &me[0];
- sinfo.dataSize = sizeof(state->local_size);
- sinfo.pData = &state->local_size[0];
- uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
- for (int i = 0; i < 3; i++) {
+ VkSpecializationMapEntry me[4];
+ uint32_t data[4];
+ if (state) {
+ int i = 0;
+
+ if (comp->use_local_size) {
+ sinfo.mapEntryCount += 3;
+ sinfo.dataSize += sizeof(state->local_size);
+
+ uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
+ for (int l = 0; l < 3; l++, i++) {
+ data[i] = state->local_size[l];
+ me[i].size = sizeof(uint32_t);
+ me[i].constantID = ids[l];
+ me[i].offset = i * sizeof(uint32_t);
+ }
+ }
+
+ if (comp->has_variable_shared_mem) {
+ sinfo.mapEntryCount += 1;
+ sinfo.dataSize += sizeof(uint32_t);
+ data[i] = state->variable_shared_mem;
me[i].size = sizeof(uint32_t);
- me[i].constantID = ids[i];
+ me[i].constantID = ZINK_VARIABLE_SHARED_MEM;
me[i].offset = i * sizeof(uint32_t);
+ i++;
}
+
+ if (sinfo.dataSize) {
+ stage.pSpecializationInfo = &sinfo;
+ sinfo.pData = data;
+ sinfo.pMapEntries = me;
+ }
+
+ assert(i <= ARRAY_SIZE(data));
+ STATIC_ASSERT(ARRAY_SIZE(data) == ARRAY_SIZE(me));
}
pci.stage = stage;
VkPipeline pipeline;
- if (vkCreateComputePipelines(screen->dev, comp->base.pipeline_cache, 1, &pci,
- NULL, &pipeline) != VK_SUCCESS) {
- debug_printf("vkCreateComputePipelines failed\n");
- return VK_NULL_HANDLE;
+ VkResult result;
+ u_rwlock_wrlock(&comp->base.pipeline_cache_lock);
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(CreateComputePipelines)(screen->dev, comp->base.pipeline_cache, 1, &pci, NULL, &pipeline),
+ u_rwlock_wrunlock(&comp->base.pipeline_cache_lock);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateComputePipelines failed (%s)", vk_Result_to_str(result));
+ return VK_NULL_HANDLE;
+ }
+ );
+
+ return pipeline;
+}
+
+VkPipeline
+zink_create_gfx_pipeline_output(struct zink_screen *screen, struct zink_gfx_pipeline_state *state)
+{
+ VkGraphicsPipelineLibraryCreateInfoEXT gplci = {
+ VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT,
+ &state->rendering_info,
+ VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT,
+ };
+
+ VkPipelineColorBlendStateCreateInfo blend_state = {0};
+ blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
+ if (state->rast_attachment_order)
+ blend_state.flags |= VK_PIPELINE_COLOR_BLEND_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_BIT_EXT;
+
+ VkPipelineMultisampleStateCreateInfo ms_state = {0};
+ ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
+ if (state->force_persample_interp) {
+ ms_state.sampleShadingEnable = VK_TRUE;
+ ms_state.minSampleShading = 1.0;
+ } else if (state->min_samples > 0) {
+ ms_state.sampleShadingEnable = VK_TRUE;
+ ms_state.minSampleShading = (float)(state->rast_samples + 1) / (state->min_samples + 1);
+ }
+
+ VkDynamicState dynamicStateEnables[30] = {
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ };
+ unsigned state_count = 1;
+ if (screen->info.have_EXT_extended_dynamic_state) {
+ if (state->sample_locations_enabled)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT;
+ }
+ if (screen->info.have_EXT_color_write_enable)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT;
+
+ if (screen->have_full_ds3) {
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SAMPLE_MASK_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_RASTERIZATION_SAMPLES_EXT;
+ if (state->blend_state) {
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LOGIC_OP_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT;
+ if (screen->info.feats.features.alphaToOne)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT;
+ if (state->rendering_info.colorAttachmentCount) {
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT;
+ }
+ }
+ } else {
+ if (state->blend_state) {
+ blend_state.pAttachments = state->blend_state->attachments;
+ blend_state.attachmentCount = state->rendering_info.colorAttachmentCount;
+ blend_state.logicOpEnable = state->blend_state->logicop_enable;
+ blend_state.logicOp = state->blend_state->logicop_func;
+
+ ms_state.alphaToCoverageEnable = state->blend_state->alpha_to_coverage;
+ if (state->blend_state->alpha_to_one && !screen->info.feats.features.alphaToOne) {
+ static bool warned = false;
+ warn_missing_feature(warned, "alphaToOne");
+ }
+ ms_state.alphaToOneEnable = state->blend_state->alpha_to_one;
+ }
+ ms_state.rasterizationSamples = state->rast_samples + 1;
+ /* "If pSampleMask is NULL, it is treated as if the mask has all bits set to 1."
+ * - Chapter 27. Rasterization
+ *
+ * thus it never makes sense to leave this as NULL since gallium will provide correct
+ * data here as long as sample_mask is initialized on context creation
+ */
+ ms_state.pSampleMask = &state->sample_mask;
+ }
+ assert(state_count < ARRAY_SIZE(dynamicStateEnables));
+
+ VkPipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo = {0};
+ pipelineDynamicStateCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
+ pipelineDynamicStateCreateInfo.pDynamicStates = dynamicStateEnables;
+
+ VkGraphicsPipelineCreateInfo pci = {0};
+ pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
+ pci.pNext = &gplci;
+ pci.flags = VK_PIPELINE_CREATE_LIBRARY_BIT_KHR | VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT;
+ if (screen->info.have_EXT_attachment_feedback_loop_dynamic_state) {
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT;
+ } else {
+ static bool feedback_warn = false;
+ if (state->feedback_loop) {
+ if (screen->info.have_EXT_attachment_feedback_loop_layout)
+ pci.flags |= VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
+ else
+ warn_missing_feature(feedback_warn, "EXT_attachment_feedback_loop_layout");
+ }
+ if (state->feedback_loop_zs) {
+ if (screen->info.have_EXT_attachment_feedback_loop_layout)
+ pci.flags |= VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
+ else
+ warn_missing_feature(feedback_warn, "EXT_attachment_feedback_loop_layout");
+ }
+ }
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
+ pipelineDynamicStateCreateInfo.dynamicStateCount = state_count;
+ if (!screen->have_full_ds3)
+ pci.pColorBlendState = &blend_state;
+ pci.pMultisampleState = &ms_state;
+ pci.pDynamicState = &pipelineDynamicStateCreateInfo;
+
+ VkPipeline pipeline;
+ VkResult result;
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, NULL, &pipeline),
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result));
+ return VK_NULL_HANDLE;
+ }
+ );
+
+ return pipeline;
+}
+
+VkPipeline
+zink_create_gfx_pipeline_input(struct zink_screen *screen,
+ struct zink_gfx_pipeline_state *state,
+ const uint8_t *binding_map,
+ VkPrimitiveTopology primitive_topology)
+{
+ VkGraphicsPipelineLibraryCreateInfoEXT gplci = {
+ VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT,
+ NULL,
+ VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT
+ };
+
+ VkPipelineVertexInputStateCreateInfo vertex_input_state;
+ memset(&vertex_input_state, 0, sizeof(vertex_input_state));
+ vertex_input_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
+ if (!screen->info.have_EXT_vertex_input_dynamic_state || !state->uses_dynamic_stride) {
+ vertex_input_state.pVertexBindingDescriptions = state->element_state->b.bindings;
+ vertex_input_state.vertexBindingDescriptionCount = state->element_state->num_bindings;
+ vertex_input_state.pVertexAttributeDescriptions = state->element_state->attribs;
+ vertex_input_state.vertexAttributeDescriptionCount = state->element_state->num_attribs;
+ if (!state->uses_dynamic_stride) {
+ for (int i = 0; i < state->element_state->num_bindings; ++i) {
+ const unsigned buffer_id = binding_map[i];
+ VkVertexInputBindingDescription *binding = &state->element_state->b.bindings[i];
+ binding->stride = state->vertex_strides[buffer_id];
+ }
+ }
+ }
+
+ VkPipelineVertexInputDivisorStateCreateInfoEXT vdiv_state;
+ if (!screen->info.have_EXT_vertex_input_dynamic_state && state->element_state->b.divisors_present) {
+ memset(&vdiv_state, 0, sizeof(vdiv_state));
+ vertex_input_state.pNext = &vdiv_state;
+ vdiv_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT;
+ vdiv_state.vertexBindingDivisorCount = state->element_state->b.divisors_present;
+ vdiv_state.pVertexBindingDivisors = state->element_state->b.divisors;
+ }
+
+ VkPipelineInputAssemblyStateCreateInfo primitive_state = {0};
+ primitive_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
+ primitive_state.topology = primitive_topology;
+ assert(screen->info.have_EXT_extended_dynamic_state2);
+
+ VkDynamicState dynamicStateEnables[30];
+ unsigned state_count = 0;
+ if (screen->info.have_EXT_vertex_input_dynamic_state)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_EXT;
+ else if (state->uses_dynamic_stride && state->element_state->num_attribs)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE;
+ assert(state_count < ARRAY_SIZE(dynamicStateEnables));
+
+ VkPipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo = {0};
+ pipelineDynamicStateCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
+ pipelineDynamicStateCreateInfo.pDynamicStates = dynamicStateEnables;
+ pipelineDynamicStateCreateInfo.dynamicStateCount = state_count;
+
+ VkGraphicsPipelineCreateInfo pci = {0};
+ pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
+ pci.pNext = &gplci;
+ pci.flags = VK_PIPELINE_CREATE_LIBRARY_BIT_KHR | VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
+ pci.pVertexInputState = &vertex_input_state;
+ pci.pInputAssemblyState = &primitive_state;
+ pci.pDynamicState = &pipelineDynamicStateCreateInfo;
+
+ VkPipeline pipeline;
+ VkResult result;
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, NULL, &pipeline),
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result));
+ return VK_NULL_HANDLE;
+ }
+ );
+
+ return pipeline;
+}
+
+static VkPipeline
+create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_object *objs, unsigned stage_mask, VkPipelineLayout layout, VkPipelineCache pipeline_cache)
+{
+ assert(screen->info.have_EXT_extended_dynamic_state && screen->info.have_EXT_extended_dynamic_state2);
+ VkPipelineRenderingCreateInfo rendering_info;
+ rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO;
+ rendering_info.pNext = NULL;
+ rendering_info.viewMask = 0;
+ VkGraphicsPipelineLibraryCreateInfoEXT gplci = {
+ VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT,
+ &rendering_info,
+ 0
+ };
+ if (stage_mask & BITFIELD_BIT(MESA_SHADER_VERTEX))
+ gplci.flags |= VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT;
+ if (stage_mask & BITFIELD_BIT(MESA_SHADER_FRAGMENT))
+ gplci.flags |= VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
+
+ VkPipelineViewportStateCreateInfo viewport_state = {0};
+ viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
+ viewport_state.viewportCount = 0;
+ viewport_state.pViewports = NULL;
+ viewport_state.scissorCount = 0;
+ viewport_state.pScissors = NULL;
+
+ VkPipelineRasterizationStateCreateInfo rast_state = {0};
+ rast_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
+ rast_state.depthBiasEnable = VK_TRUE;
+
+ VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {0};
+ depth_stencil_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
+
+ VkDynamicState dynamicStateEnables[64] = {
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_DEPTH_BIAS,
+ VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ };
+ unsigned state_count = 3;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_COMPARE_OP;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_OP;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_FRONT_FACE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_CULL_MODE;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE;
+ if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT;
+
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_POLYGON_MODE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE_EXT;
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT;
+ if (screen->info.dynamic_state3_feats.extendedDynamicState3LineStippleEnable)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT;
+ if (!screen->driver_workarounds.no_linestipple)
+ dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_LINE_STIPPLE_EXT;
+ assert(state_count < ARRAY_SIZE(dynamicStateEnables));
+
+ VkPipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo = {0};
+ pipelineDynamicStateCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
+ pipelineDynamicStateCreateInfo.pDynamicStates = dynamicStateEnables;
+ pipelineDynamicStateCreateInfo.dynamicStateCount = state_count;
+
+ VkGraphicsPipelineCreateInfo pci = {0};
+ pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
+ pci.pNext = &gplci;
+ pci.flags = VK_PIPELINE_CREATE_LIBRARY_BIT_KHR;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
+ pci.layout = layout;
+ pci.pRasterizationState = &rast_state;
+ pci.pViewportState = &viewport_state;
+ pci.pDepthStencilState = &depth_stencil_state;
+ pci.pDynamicState = &pipelineDynamicStateCreateInfo;
+
+ VkPipelineTessellationStateCreateInfo tci = {0};
+ VkPipelineTessellationDomainOriginStateCreateInfo tdci = {0};
+ unsigned tess_bits = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
+ if ((stage_mask & tess_bits) == tess_bits) {
+ tci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
+ //this is a wild guess; pray for extendedDynamicState2PatchControlPoints
+ if (!screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) {
+ static bool warned = false;
+ warn_missing_feature(warned, "extendedDynamicState2PatchControlPoints");
+ }
+ tci.patchControlPoints = 32;
+ pci.pTessellationState = &tci;
+ tci.pNext = &tdci;
+ tdci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO;
+ tdci.domainOrigin = VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT;
+ }
+
+ VkPipelineShaderStageCreateInfo shader_stages[ZINK_GFX_SHADER_COUNT];
+ uint32_t num_stages = 0;
+ for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
+ if (!(stage_mask & BITFIELD_BIT(i)))
+ continue;
+
+ VkPipelineShaderStageCreateInfo stage = {0};
+ stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ stage.stage = mesa_to_vk_shader_stage(i);
+ stage.module = objs[i].mod;
+ stage.pName = "main";
+ shader_stages[num_stages++] = stage;
}
- zink_screen_update_pipeline_cache(screen, &comp->base);
+ assert(num_stages > 0);
+
+ pci.pStages = shader_stages;
+ pci.stageCount = num_stages;
+ /* Only keep LTO information for full pipeline libs. For separable shaders, they will only
+ * ever be used with fast linking, and to optimize them a new pipeline lib will be created with full
+ * link time information for the full set of shader stages (rather than linking in these single-stage libs).
+ */
+ if (num_stages > 1)
+ pci.flags |= VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT;
+
+ VkPipeline pipeline;
+ VkResult result;
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(CreateGraphicsPipelines)(screen->dev, pipeline_cache, 1, &pci, NULL, &pipeline),
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
+ return VK_NULL_HANDLE;
+ }
+ );
return pipeline;
}
+
+VkPipeline
+zink_create_gfx_pipeline_library(struct zink_screen *screen, struct zink_gfx_program *prog)
+{
+ u_rwlock_wrlock(&prog->base.pipeline_cache_lock);
+ VkPipeline pipeline = create_gfx_pipeline_library(screen, prog->objs, prog->stages_present, prog->base.layout, prog->base.pipeline_cache);
+ u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
+ return pipeline;
+}
+
+VkPipeline
+zink_create_gfx_pipeline_separate(struct zink_screen *screen, struct zink_shader_object *objs, VkPipelineLayout layout, gl_shader_stage stage)
+{
+ return create_gfx_pipeline_library(screen, objs, BITFIELD_BIT(stage), layout, VK_NULL_HANDLE);
+}
+
+VkPipeline
+zink_create_gfx_pipeline_combined(struct zink_screen *screen, struct zink_gfx_program *prog, VkPipeline input, VkPipeline *library, unsigned libcount, VkPipeline output, bool optimized, bool testonly)
+{
+ VkPipeline libraries[4];
+ VkPipelineLibraryCreateInfoKHR libstate = {0};
+ libstate.sType = VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR;
+ if (input)
+ libraries[libstate.libraryCount++] = input;
+ for (unsigned i = 0; i < libcount; i++)
+ libraries[libstate.libraryCount++] = library[i];
+ if (output)
+ libraries[libstate.libraryCount++] = output;
+ libstate.pLibraries = libraries;
+
+ VkGraphicsPipelineCreateInfo pci = {0};
+ pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
+ pci.layout = prog->base.layout;
+ if (optimized)
+ pci.flags = VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT;
+ else
+ pci.flags = VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
+ if (testonly)
+ pci.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
+ pci.pNext = &libstate;
+
+ if (!input && !output)
+ pci.flags |= VK_PIPELINE_CREATE_LIBRARY_BIT_KHR;
+
+ VkPipeline pipeline;
+ u_rwlock_wrlock(&prog->base.pipeline_cache_lock);
+ VkResult result;
+ VRAM_ALLOC_LOOP(result,
+ VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline),
+ u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
+ if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED) {
+ mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
+ return VK_NULL_HANDLE;
+ }
+ );
+
+ return pipeline;
+}
+
+
+/* vertex input pipeline library states with dynamic vertex input: only the topology matters */
+struct zink_gfx_input_key *
+zink_find_or_create_input_dynamic(struct zink_context *ctx, VkPrimitiveTopology vkmode)
+{
+ uint32_t hash = hash_gfx_input_dynamic(&ctx->gfx_pipeline_state.input);
+ struct set_entry *he = _mesa_set_search_pre_hashed(&ctx->gfx_inputs, hash, &ctx->gfx_pipeline_state.input);
+ if (!he) {
+ struct zink_gfx_input_key *ikey = rzalloc(ctx, struct zink_gfx_input_key);
+ ikey->idx = ctx->gfx_pipeline_state.idx;
+ ikey->pipeline = zink_create_gfx_pipeline_input(zink_screen(ctx->base.screen), &ctx->gfx_pipeline_state, NULL, vkmode);
+ he = _mesa_set_add_pre_hashed(&ctx->gfx_inputs, hash, ikey);
+ }
+ return (struct zink_gfx_input_key *)he->key;
+}
+
+/* vertex input pipeline library states without dynamic vertex input: everything is hashed */
+struct zink_gfx_input_key *
+zink_find_or_create_input(struct zink_context *ctx, VkPrimitiveTopology vkmode)
+{
+ uint32_t hash = hash_gfx_input(&ctx->gfx_pipeline_state.input);
+ struct set_entry *he = _mesa_set_search_pre_hashed(&ctx->gfx_inputs, hash, &ctx->gfx_pipeline_state.input);
+ if (!he) {
+ struct zink_gfx_input_key *ikey = rzalloc(ctx, struct zink_gfx_input_key);
+ if (ctx->gfx_pipeline_state.uses_dynamic_stride) {
+ memcpy(ikey, &ctx->gfx_pipeline_state.input, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask));
+ ikey->element_state = ctx->gfx_pipeline_state.element_state;
+ } else {
+ memcpy(ikey, &ctx->gfx_pipeline_state.input, offsetof(struct zink_gfx_input_key, pipeline));
+ }
+ ikey->pipeline = zink_create_gfx_pipeline_input(zink_screen(ctx->base.screen), &ctx->gfx_pipeline_state, ikey->element_state->binding_map, vkmode);
+ he = _mesa_set_add_pre_hashed(&ctx->gfx_inputs, hash, ikey);
+ }
+ return (struct zink_gfx_input_key*)he->key;
+}
+
+/* fragment output pipeline library states with dynamic state3 */
+struct zink_gfx_output_key *
+zink_find_or_create_output_ds3(struct zink_context *ctx)
+{
+ uint32_t hash = hash_gfx_output_ds3(&ctx->gfx_pipeline_state);
+ struct set_entry *he = _mesa_set_search_pre_hashed(&ctx->gfx_outputs, hash, &ctx->gfx_pipeline_state);
+ if (!he) {
+ struct zink_gfx_output_key *okey = rzalloc(ctx, struct zink_gfx_output_key);
+ memcpy(okey, &ctx->gfx_pipeline_state, sizeof(uint32_t));
+ okey->pipeline = zink_create_gfx_pipeline_output(zink_screen(ctx->base.screen), &ctx->gfx_pipeline_state);
+ he = _mesa_set_add_pre_hashed(&ctx->gfx_outputs, hash, okey);
+ }
+ return (struct zink_gfx_output_key*)he->key;
+}
+
+/* fragment output pipeline library states without dynamic state3 */
+struct zink_gfx_output_key *
+zink_find_or_create_output(struct zink_context *ctx)
+{
+ uint32_t hash = hash_gfx_output(&ctx->gfx_pipeline_state);
+ struct set_entry *he = _mesa_set_search_pre_hashed(&ctx->gfx_outputs, hash, &ctx->gfx_pipeline_state);
+ if (!he) {
+ struct zink_gfx_output_key *okey = rzalloc(ctx, struct zink_gfx_output_key);
+ memcpy(okey, &ctx->gfx_pipeline_state, offsetof(struct zink_gfx_output_key, pipeline));
+ okey->pipeline = zink_create_gfx_pipeline_output(zink_screen(ctx->base.screen), &ctx->gfx_pipeline_state);
+ he = _mesa_set_add_pre_hashed(&ctx->gfx_outputs, hash, okey);
+ }
+ return (struct zink_gfx_output_key*)he->key;
+}
diff --git a/src/gallium/drivers/zink/zink_pipeline.h b/src/gallium/drivers/zink/zink_pipeline.h
index 4acc6c44285..355670e9d46 100644
--- a/src/gallium/drivers/zink/zink_pipeline.h
+++ b/src/gallium/drivers/zink/zink_pipeline.h
@@ -24,87 +24,49 @@
#ifndef ZINK_PIPELINE_H
#define ZINK_PIPELINE_H
-#include <vulkan/vulkan.h>
-#include "pipe/p_state.h"
-#include "zink_shader_keys.h"
-#include "zink_state.h"
+#include "zink_types.h"
-struct zink_blend_state;
-struct zink_depth_stencil_alpha_state;
-struct zink_gfx_program;
-struct zink_compute_program;
-struct zink_rasterizer_state;
-struct zink_render_pass;
-struct zink_screen;
-struct zink_vertex_elements_state;
-
-struct zink_gfx_pipeline_state {
- uint32_t rast_state : ZINK_RAST_HW_STATE_SIZE; //zink_rasterizer_hw_state
- uint32_t vertices_per_patch:5;
- uint32_t rast_samples:7;
- uint32_t void_alpha_attachments:PIPE_MAX_COLOR_BUFS;
- VkSampleMask sample_mask;
-
- unsigned rp_state;
- uint32_t blend_id;
-
- /* Pre-hashed value for table lookup, invalid when zero.
- * Members after this point are not included in pipeline state hash key */
- uint32_t hash;
- bool dirty;
-
- struct {
- struct zink_depth_stencil_alpha_hw_state *depth_stencil_alpha_state; //non-dynamic state
- VkFrontFace front_face;
- unsigned num_viewports;
- } dyn_state1;
-
- bool primitive_restart; //dynamic state2
-
- VkShaderModule modules[PIPE_SHADER_TYPES - 1];
- bool modules_changed;
-
- struct zink_vertex_elements_hw_state *element_state;
- uint32_t vertex_hash;
-
- uint32_t final_hash;
-
- uint32_t vertex_buffers_enabled_mask;
- uint32_t vertex_strides[PIPE_MAX_ATTRIBS];
- bool sample_locations_enabled;
- bool have_EXT_extended_dynamic_state;
- bool have_EXT_extended_dynamic_state2;
- uint8_t has_points; //either gs outputs points or prim type is points
- struct {
- struct zink_shader_key key[5];
- struct zink_shader_key last_vertex;
- } shader_keys;
- struct zink_blend_state *blend_state;
- struct zink_render_pass *render_pass;
- VkPipeline pipeline;
- uint8_t patch_vertices;
- unsigned idx : 8;
- enum pipe_prim_type gfx_prim_mode; //pending mode
-};
-
-struct zink_compute_pipeline_state {
- /* Pre-hashed value for table lookup, invalid when zero.
- * Members after this point are not included in pipeline state hash key */
- uint32_t hash;
- bool dirty;
- bool use_local_size;
- uint32_t local_size[3];
+#ifdef __cplusplus
+extern "C" {
+#endif
- VkPipeline pipeline;
-};
+struct zink_gfx_output_key *
+zink_find_or_create_output(struct zink_context *ctx);
+struct zink_gfx_output_key *
+zink_find_or_create_output_ds3(struct zink_context *ctx);
+struct zink_gfx_input_key *
+zink_find_or_create_input(struct zink_context *ctx, VkPrimitiveTopology vkmode);
+struct zink_gfx_input_key *
+zink_find_or_create_input_dynamic(struct zink_context *ctx, VkPrimitiveTopology vkmode);
VkPipeline
zink_create_gfx_pipeline(struct zink_screen *screen,
struct zink_gfx_program *prog,
+ struct zink_shader_object *objs,
struct zink_gfx_pipeline_state *state,
- VkPrimitiveTopology primitive_topology);
+ const uint8_t *binding_map,
+ VkPrimitiveTopology primitive_topology,
+ bool optimize,
+ struct util_dynarray *dgc);
VkPipeline
zink_create_compute_pipeline(struct zink_screen *screen, struct zink_compute_program *comp, struct zink_compute_pipeline_state *state);
+
+VkPipeline
+zink_create_gfx_pipeline_input(struct zink_screen *screen,
+ struct zink_gfx_pipeline_state *state,
+ const uint8_t *binding_map,
+ VkPrimitiveTopology primitive_topology);
+VkPipeline
+zink_create_gfx_pipeline_library(struct zink_screen *screen, struct zink_gfx_program *prog);
+VkPipeline
+zink_create_gfx_pipeline_output(struct zink_screen *screen, struct zink_gfx_pipeline_state *state);
+VkPipeline
+zink_create_gfx_pipeline_combined(struct zink_screen *screen, struct zink_gfx_program *prog, VkPipeline input, VkPipeline *library, unsigned libcount, VkPipeline output, bool optimized, bool testonly);
+VkPipeline
+zink_create_gfx_pipeline_separate(struct zink_screen *screen, struct zink_shader_object *objs, VkPipelineLayout layout, gl_shader_stage stage);
+#ifdef __cplusplus
+}
+#endif
#endif
diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c
index 0aedff77d4e..4034713cbde 100644
--- a/src/gallium/drivers/zink/zink_program.c
+++ b/src/gallium/drivers/zink/zink_program.c
@@ -27,32 +27,28 @@
#include "zink_context.h"
#include "zink_descriptors.h"
#include "zink_helpers.h"
+#include "zink_pipeline.h"
#include "zink_render_pass.h"
#include "zink_resource.h"
#include "zink_screen.h"
#include "zink_state.h"
#include "zink_inlines.h"
-#include "util/hash_table.h"
-#include "util/set.h"
+#include "util/memstream.h"
#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
-#include "tgsi/tgsi_from_mesa.h"
+#include "nir_serialize.h"
+#include "nir/nir_draw_helpers.h"
/* for pipeline cache */
#define XXH_INLINE_ALL
#include "util/xxhash.h"
-struct gfx_pipeline_cache_entry {
- struct zink_gfx_pipeline_state state;
- VkPipeline pipeline;
-};
-
-struct compute_pipeline_cache_entry {
- struct zink_compute_pipeline_state state;
- VkPipeline pipeline;
-};
+static void
+precompile_job(void *data, void *gdata, int thread_index);
+struct zink_gfx_program *
+create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch);
void
debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr)
@@ -66,298 +62,1099 @@ debug_describe_zink_compute_program(char *buf, const struct zink_compute_program
sprintf(buf, "zink_compute_program");
}
-static bool
-shader_key_matches(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms)
+ALWAYS_INLINE static bool
+shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms)
{
- if (zm->key_size != key->size || zm->num_uniforms != num_uniforms)
+ if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
+ zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
return false;
- return !memcmp(zm->key, key, zm->key_size) &&
- (!num_uniforms || !memcmp(zm->key + zm->key_size, key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)));
+ const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
+ return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) &&
+ (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size,
+ key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)));
+}
+
+ALWAYS_INLINE static bool
+shader_key_matches(const struct zink_shader_module *zm,
+ const struct zink_shader_key *key, unsigned num_uniforms,
+ bool has_inline, bool has_nonseamless)
+{
+ const uint32_t nonseamless_size = !has_nonseamless && zm->has_nonseamless ? sizeof(uint32_t) : 0;
+ if (has_inline) {
+ if (zm->num_uniforms != num_uniforms ||
+ (num_uniforms &&
+ memcmp(zm->key + zm->key_size + nonseamless_size,
+ key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))))
+ return false;
+ }
+ if (!has_nonseamless) {
+ if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
+ (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)))
+ return false;
+ }
+ if (zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
+ return false;
+ return !memcmp(zm->key, key, zm->key_size);
}
static uint32_t
shader_module_hash(const struct zink_shader_module *zm)
{
- unsigned key_size = zm->key_size + zm->num_uniforms * sizeof(uint32_t);
+ const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
+ unsigned key_size = zm->key_size + nonseamless_size + zm->num_uniforms * sizeof(uint32_t);
return _mesa_hash_data(zm->key, key_size);
}
-static struct zink_shader_module *
-get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
- struct zink_shader *zs, struct zink_gfx_program *prog,
- struct zink_gfx_pipeline_state *state)
+ALWAYS_INLINE static void
+gather_shader_module_info(struct zink_context *ctx, struct zink_screen *screen,
+ struct zink_shader *zs, struct zink_gfx_program *prog,
+ struct zink_gfx_pipeline_state *state,
+ bool has_inline, //is inlining enabled?
+ bool has_nonseamless, //is nonseamless ext present?
+ unsigned *inline_size, unsigned *nonseamless_size)
{
- gl_shader_stage stage = zs->nir->info.stage;
- enum pipe_shader_type pstage = pipe_shader_type_from_mesa(stage);
- VkShaderModule mod;
- struct zink_shader_module *zm = NULL;
- unsigned base_size = 0;
- struct zink_shader_key *key = &state->shader_keys.key[pstage];
-
- if (ctx && zs->nir->info.num_inlinable_uniforms &&
- ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(pstage)) {
- if (prog->inlined_variant_count[pstage] < ZINK_MAX_INLINED_VARIANTS)
- base_size = zs->nir->info.num_inlinable_uniforms;
+ gl_shader_stage stage = zs->info.stage;
+ struct zink_shader_key *key = &state->shader_keys.key[stage];
+ if (has_inline && ctx && zs->info.num_inlinable_uniforms &&
+ ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(stage)) {
+ if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[stage] < ZINK_MAX_INLINED_VARIANTS))
+ *inline_size = zs->info.num_inlinable_uniforms;
else
key->inline_uniforms = false;
}
+ if (!has_nonseamless && key->base.nonseamless_cube_mask)
+ *nonseamless_size = sizeof(uint32_t);
+}
- struct zink_shader_module *iter, *next;
- LIST_FOR_EACH_ENTRY_SAFE(iter, next, &prog->shader_cache[pstage][!!base_size], list) {
- if (!shader_key_matches(iter, key, base_size))
- continue;
- list_delinit(&iter->list);
- zm = iter;
- break;
- }
-
+ALWAYS_INLINE static struct zink_shader_module *
+create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
+ struct zink_shader *zs, struct zink_gfx_program *prog,
+ gl_shader_stage stage,
+ struct zink_gfx_pipeline_state *state,
+ unsigned inline_size, unsigned nonseamless_size,
+ bool has_inline, //is inlining enabled?
+ bool has_nonseamless) //is nonseamless ext present?
+{
+ struct zink_shader_module *zm;
+ const struct zink_shader_key *key = &state->shader_keys.key[stage];
+ /* non-generated tcs won't use the shader key */
+ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
+ const bool shadow_needs_shader_swizzle = key->base.needs_zs_shader_swizzle ||
+ (stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle);
+ zm = malloc(sizeof(struct zink_shader_module) + key->size +
+ (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
+ (shadow_needs_shader_swizzle ? sizeof(struct zink_zs_swizzle_key) : 0));
if (!zm) {
- zm = malloc(sizeof(struct zink_shader_module) + key->size + base_size * sizeof(uint32_t));
- if (!zm) {
- return NULL;
- }
- mod = zink_shader_compile(screen, zs, prog->nir[stage], key);
- if (!mod) {
- FREE(zm);
- return NULL;
- }
- zm->shader = mod;
- list_inithead(&zm->list);
- zm->num_uniforms = base_size;
+ return NULL;
+ }
+ unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices;
+ if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
+ assert(ctx); //TODO async
+ zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
+ } else {
+ zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage], &prog->base);
+ }
+ if (!zm->obj.mod) {
+ FREE(zm);
+ return NULL;
+ }
+ zm->shobj = prog->base.uses_shobj;
+ zm->num_uniforms = inline_size;
+ if (!is_nongenerated_tcs) {
zm->key_size = key->size;
memcpy(zm->key, key, key->size);
- if (base_size)
- memcpy(zm->key + key->size, &key->base, base_size * sizeof(uint32_t));
+ } else {
+ zm->key_size = 0;
+ memset(zm->key, 0, key->size);
+ }
+ if (!has_nonseamless && nonseamless_size) {
+ /* nonseamless mask gets added to base key if it exists */
+ memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size);
+ }
+ zm->needs_zs_shader_swizzle = shadow_needs_shader_swizzle;
+ zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size;
+ if (inline_size)
+ memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
+ if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
+ zm->hash = patch_vertices;
+ else
zm->hash = shader_module_hash(zm);
- zm->default_variant = !base_size && list_is_empty(&prog->shader_cache[pstage][0]);
- if (base_size)
- prog->inlined_variant_count[pstage]++;
+ if (unlikely(shadow_needs_shader_swizzle)) {
+ memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
+ zm->hash ^= _mesa_hash_data(&ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
+ }
+ zm->default_variant = !shadow_needs_shader_swizzle && !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
+ if (inline_size)
+ prog->inlined_variant_count[stage]++;
+ util_dynarray_append(&prog->shader_cache[stage][has_nonseamless ? 0 : !!nonseamless_size][!!inline_size], void*, zm);
+ return zm;
+}
+
+ALWAYS_INLINE static struct zink_shader_module *
+get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
+ struct zink_shader *zs, struct zink_gfx_program *prog,
+ gl_shader_stage stage,
+ struct zink_gfx_pipeline_state *state,
+ unsigned inline_size, unsigned nonseamless_size,
+ bool has_inline, //is inlining enabled?
+ bool has_nonseamless) //is nonseamless ext present?
+{
+ const struct zink_shader_key *key = &state->shader_keys.key[stage];
+ /* non-generated tcs won't use the shader key */
+ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
+ const bool shadow_needs_shader_swizzle = unlikely(key->base.needs_zs_shader_swizzle) ||
+ (stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle));
+
+ struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
+ unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
+ struct zink_shader_module **pzm = shader_cache->data;
+ for (unsigned i = 0; i < count; i++) {
+ struct zink_shader_module *iter = pzm[i];
+ if (is_nongenerated_tcs) {
+ if (!shader_key_matches_tcs_nongenerated(iter, key, has_inline ? !!inline_size : 0))
+ continue;
+ } else {
+ if (stage == MESA_SHADER_VERTEX && iter->key_size != key->size)
+ continue;
+ if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
+ continue;
+ if (unlikely(shadow_needs_shader_swizzle)) {
+ /* shadow swizzle data needs a manual compare since it's so fat */
+ if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
+ &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
+ continue;
+ }
+ }
+ if (i > 0) {
+ struct zink_shader_module *zero = pzm[0];
+ pzm[0] = iter;
+ pzm[i] = zero;
+ }
+ return iter;
+ }
+
+ return NULL;
+}
+
+ALWAYS_INLINE static struct zink_shader_module *
+create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
+ struct zink_shader *zs, struct zink_gfx_program *prog,
+ gl_shader_stage stage,
+ struct zink_gfx_pipeline_state *state)
+{
+ struct zink_shader_module *zm;
+ uint16_t *key;
+ unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
+ bool shadow_needs_shader_swizzle = false;
+ if (zs == prog->last_vertex_stage) {
+ key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
+ } else if (stage == MESA_SHADER_FRAGMENT) {
+ key = (uint16_t*)&state->shader_keys_optimal.key.fs;
+ shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
+ } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
+ key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
+ } else {
+ key = NULL;
+ }
+ size_t key_size = sizeof(uint16_t);
+ zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_zs_swizzle_key) : 0));
+ if (!zm) {
+ return NULL;
+ }
+ if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
+ assert(ctx || screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints);
+ unsigned patch_vertices = 3;
+ if (ctx) {
+ struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
+ patch_vertices = tcs->patch_vertices;
+ }
+ zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
+ } else {
+ zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]),
+ (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base);
}
- list_add(&zm->list, &prog->shader_cache[pstage][!!base_size]);
+ if (!zm->obj.mod) {
+ FREE(zm);
+ return NULL;
+ }
+ zm->shobj = prog->base.uses_shobj;
+ /* non-generated tcs won't use the shader key */
+ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
+ if (key && !is_nongenerated_tcs) {
+ zm->key_size = key_size;
+ uint16_t *data = (uint16_t*)zm->key;
+ /* sanitize actual key bits */
+ *data = (*key) & mask;
+ if (unlikely(shadow_needs_shader_swizzle))
+ memcpy(&data[1], &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
+ }
+ zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
+ util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
return zm;
}
+ALWAYS_INLINE static struct zink_shader_module *
+get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
+ struct zink_shader *zs, struct zink_gfx_program *prog,
+ gl_shader_stage stage,
+ struct zink_gfx_pipeline_state *state)
+{
+ /* non-generated tcs won't use the shader key */
+ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
+ bool shadow_needs_shader_swizzle = false;
+ uint16_t *key;
+ unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
+ if (zs == prog->last_vertex_stage) {
+ key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
+ } else if (stage == MESA_SHADER_FRAGMENT) {
+ key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
+ shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
+ } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
+ key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
+ } else {
+ key = NULL;
+ }
+ struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0];
+ unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
+ struct zink_shader_module **pzm = shader_cache->data;
+ for (unsigned i = 0; i < count; i++) {
+ struct zink_shader_module *iter = pzm[i];
+ if (is_nongenerated_tcs) {
+ /* always match */
+ } else if (key) {
+ uint16_t val = (*key) & mask;
+ /* no key is bigger than uint16_t */
+ if (memcmp(iter->key, &val, sizeof(uint16_t)))
+ continue;
+ if (unlikely(shadow_needs_shader_swizzle)) {
+ /* shadow swizzle data needs a manual compare since it's so fat */
+ if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
+ continue;
+ }
+ }
+ if (i > 0) {
+ struct zink_shader_module *zero = pzm[0];
+ pzm[0] = iter;
+ pzm[i] = zero;
+ }
+ return iter;
+ }
+
+ return NULL;
+}
+
static void
zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm)
{
- VKSCR(DestroyShaderModule)(screen->dev, zm->shader, NULL);
+ if (zm->shobj)
+ VKSCR(DestroyShaderEXT)(screen->dev, zm->obj.obj, NULL);
+ else
+ VKSCR(DestroyShaderModule)(screen->dev, zm->obj.mod, NULL);
+ ralloc_free(zm->obj.spirv);
free(zm);
}
static void
-destroy_shader_cache(struct zink_screen *screen, struct list_head *sc)
+destroy_shader_cache(struct zink_screen *screen, struct util_dynarray *sc)
{
- struct zink_shader_module *zm, *next;
- LIST_FOR_EACH_ENTRY_SAFE(zm, next, sc, list) {
- list_delinit(&zm->list);
+ while (util_dynarray_contains(sc, void*)) {
+ struct zink_shader_module *zm = util_dynarray_pop(sc, struct zink_shader_module*);
zink_destroy_shader_module(screen, zm);
}
}
-static void
-update_shader_modules(struct zink_context *ctx,
+ALWAYS_INLINE static void
+update_gfx_shader_modules(struct zink_context *ctx,
struct zink_screen *screen,
struct zink_gfx_program *prog, uint32_t mask,
- struct zink_gfx_pipeline_state *state)
+ struct zink_gfx_pipeline_state *state,
+ bool has_inline, //is inlining enabled?
+ bool has_nonseamless) //is nonseamless ext present?
{
bool hash_changed = false;
bool default_variants = true;
- bool first = !prog->modules[PIPE_SHADER_VERTEX];
+ assert(prog->objs[MESA_SHADER_VERTEX].mod);
uint32_t variant_hash = prog->last_variant_hash;
- u_foreach_bit(pstage, mask) {
- assert(prog->shaders[pstage]);
- struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[pstage], prog, state);
- if (prog->modules[pstage] == zm)
+ prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
+ for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
+ if (!(mask & BITFIELD_BIT(i)))
continue;
- if (prog->modules[pstage])
- variant_hash ^= prog->modules[pstage]->hash;
+
+ assert(prog->shaders[i]);
+
+ unsigned inline_size = 0, nonseamless_size = 0;
+ gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, has_inline, has_nonseamless, &inline_size, &nonseamless_size);
+ struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
+ inline_size, nonseamless_size, has_inline, has_nonseamless);
+ if (!zm)
+ zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
+ inline_size, nonseamless_size, has_inline, has_nonseamless);
+ state->modules[i] = zm->obj.mod;
+ if (prog->objs[i].mod == zm->obj.mod)
+ continue;
+ prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
+ variant_hash ^= prog->module_hash[i];
hash_changed = true;
default_variants &= zm->default_variant;
- prog->modules[pstage] = zm;
- variant_hash ^= prog->modules[pstage]->hash;
- state->modules[pstage] = zm->shader;
+ prog->objs[i] = zm->obj;
+ prog->objects[i] = zm->obj.obj;
+ prog->module_hash[i] = zm->hash;
+ if (has_inline) {
+ if (zm->num_uniforms)
+ prog->inline_variants |= BITFIELD_BIT(i);
+ else
+ prog->inline_variants &= ~BITFIELD_BIT(i);
+ }
+ variant_hash ^= prog->module_hash[i];
}
if (hash_changed && state) {
- if (!first && likely(state->pipeline)) //avoid on first hash
- state->final_hash ^= prog->last_variant_hash;
-
- if (default_variants && !first)
+ if (default_variants)
prog->last_variant_hash = prog->default_variant_hash;
- else {
+ else
prog->last_variant_hash = variant_hash;
- if (first) {
- p_atomic_dec(&prog->base.reference.count);
- prog->default_variant_hash = prog->last_variant_hash;
- }
- }
- state->final_hash ^= prog->last_variant_hash;
state->modules_changed = true;
}
}
+static void
+generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
+{
+ assert(!prog->objs[MESA_SHADER_VERTEX].mod);
+ uint32_t variant_hash = 0;
+ bool default_variants = true;
+ for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
+ if (!(prog->stages_present & BITFIELD_BIT(i)))
+ continue;
+
+ assert(prog->shaders[i]);
+
+ unsigned inline_size = 0, nonseamless_size = 0;
+ gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state,
+ screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map,
+ &inline_size, &nonseamless_size);
+ struct zink_shader_module *zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
+ inline_size, nonseamless_size,
+ screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
+ state->modules[i] = zm->obj.mod;
+ prog->objs[i] = zm->obj;
+ prog->objects[i] = zm->obj.obj;
+ prog->module_hash[i] = zm->hash;
+ if (zm->num_uniforms)
+ prog->inline_variants |= BITFIELD_BIT(i);
+ default_variants &= zm->default_variant;
+ variant_hash ^= prog->module_hash[i];
+ }
+
+ state->modules_changed = true;
+
+ prog->last_variant_hash = variant_hash;
+ if (default_variants)
+ prog->default_variant_hash = prog->last_variant_hash;
+}
+
+static void
+generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
+{
+ assert(!prog->objs[MESA_SHADER_VERTEX].mod);
+ for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
+ if (!(prog->stages_present & BITFIELD_BIT(i)))
+ continue;
+
+ assert(prog->shaders[i]);
+
+ struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
+ prog->objs[i] = zm->obj;
+ prog->objects[i] = zm->obj.obj;
+ }
+
+ state->modules_changed = true;
+ prog->last_variant_hash = state->optimal_key;
+}
+
static uint32_t
-hash_gfx_pipeline_state(const void *key)
+hash_pipeline_lib_generated_tcs(const void *key)
{
- const struct zink_gfx_pipeline_state *state = key;
- uint32_t hash = _mesa_hash_data(key, offsetof(struct zink_gfx_pipeline_state, hash));
- if (!state->have_EXT_extended_dynamic_state2)
- hash = XXH32(&state->primitive_restart, 1, hash);
- if (state->have_EXT_extended_dynamic_state)
- return hash;
- return XXH32(&state->dyn_state1, sizeof(state->dyn_state1), hash);
+ const struct zink_gfx_library_key *gkey = key;
+ return gkey->optimal_key;
}
+
static bool
-equals_gfx_pipeline_state(const void *a, const void *b)
+equals_pipeline_lib_generated_tcs(const void *a, const void *b)
{
- const struct zink_gfx_pipeline_state *sa = a;
- const struct zink_gfx_pipeline_state *sb = b;
- if (!sa->have_EXT_extended_dynamic_state) {
- if (sa->vertex_buffers_enabled_mask != sb->vertex_buffers_enabled_mask)
- return false;
- /* if we don't have dynamic states, we have to hash the enabled vertex buffer bindings */
- uint32_t mask_a = sa->vertex_buffers_enabled_mask;
- uint32_t mask_b = sb->vertex_buffers_enabled_mask;
- while (mask_a || mask_b) {
- unsigned idx_a = u_bit_scan(&mask_a);
- unsigned idx_b = u_bit_scan(&mask_b);
- if (sa->vertex_strides[idx_a] != sb->vertex_strides[idx_b])
- return false;
- }
- if (sa->dyn_state1.front_face != sb->dyn_state1.front_face)
- return false;
- if (!!sa->dyn_state1.depth_stencil_alpha_state != !!sb->dyn_state1.depth_stencil_alpha_state ||
- (sa->dyn_state1.depth_stencil_alpha_state &&
- memcmp(sa->dyn_state1.depth_stencil_alpha_state, sb->dyn_state1.depth_stencil_alpha_state,
- sizeof(struct zink_depth_stencil_alpha_hw_state))))
- return false;
+ return !memcmp(a, b, sizeof(uint32_t));
+}
+
+static uint32_t
+hash_pipeline_lib(const void *key)
+{
+ const struct zink_gfx_library_key *gkey = key;
+ /* remove generated tcs bits */
+ return zink_shader_key_optimal_no_tcs(gkey->optimal_key);
+}
+
+static bool
+equals_pipeline_lib(const void *a, const void *b)
+{
+ const struct zink_gfx_library_key *ak = a;
+ const struct zink_gfx_library_key *bk = b;
+ /* remove generated tcs bits */
+ uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key);
+ uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key);
+ return val_a == val_b;
+}
+
+uint32_t
+hash_gfx_input_dynamic(const void *key)
+{
+ const struct zink_gfx_input_key *ikey = key;
+ return ikey->idx;
+}
+
+static bool
+equals_gfx_input_dynamic(const void *a, const void *b)
+{
+ const struct zink_gfx_input_key *ikey_a = a;
+ const struct zink_gfx_input_key *ikey_b = b;
+ return ikey_a->idx == ikey_b->idx;
+}
+
+uint32_t
+hash_gfx_input(const void *key)
+{
+ const struct zink_gfx_input_key *ikey = key;
+ if (ikey->uses_dynamic_stride)
+ return ikey->input;
+ return _mesa_hash_data(key, offsetof(struct zink_gfx_input_key, pipeline));
+}
+
+static bool
+equals_gfx_input(const void *a, const void *b)
+{
+ const struct zink_gfx_input_key *ikey_a = a;
+ const struct zink_gfx_input_key *ikey_b = b;
+ if (ikey_a->uses_dynamic_stride)
+ return ikey_a->element_state == ikey_b->element_state &&
+ !memcmp(a, b, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask));
+ return !memcmp(a, b, offsetof(struct zink_gfx_input_key, pipeline));
+}
+
+uint32_t
+hash_gfx_output_ds3(const void *key)
+{
+ const uint8_t *data = key;
+ return _mesa_hash_data(data, sizeof(uint32_t));
+}
+
+static bool
+equals_gfx_output_ds3(const void *a, const void *b)
+{
+ const uint8_t *da = a;
+ const uint8_t *db = b;
+ return !memcmp(da, db, sizeof(uint32_t));
+}
+
+uint32_t
+hash_gfx_output(const void *key)
+{
+ const uint8_t *data = key;
+ return _mesa_hash_data(data, offsetof(struct zink_gfx_output_key, pipeline));
+}
+
+static bool
+equals_gfx_output(const void *a, const void *b)
+{
+ const uint8_t *da = a;
+ const uint8_t *db = b;
+ return !memcmp(da, db, offsetof(struct zink_gfx_output_key, pipeline));
+}
+
+ALWAYS_INLINE static void
+update_gfx_program_nonseamless(struct zink_context *ctx, struct zink_gfx_program *prog, bool has_nonseamless)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (screen->driconf.inline_uniforms || prog->needs_inlining)
+ update_gfx_shader_modules(ctx, screen, prog,
+ ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
+ true, has_nonseamless);
+ else
+ update_gfx_shader_modules(ctx, screen, prog,
+ ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
+ false, has_nonseamless);
+}
+
+static void
+update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (screen->info.have_EXT_non_seamless_cube_map)
+ update_gfx_program_nonseamless(ctx, prog, true);
+ else
+ update_gfx_program_nonseamless(ctx, prog, false);
+}
+
+void
+zink_gfx_program_update(struct zink_context *ctx)
+{
+ if (ctx->last_vertex_stage_dirty) {
+ gl_shader_stage pstage = ctx->last_vertex_stage->info.stage;
+ ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage);
+ memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
+ &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
+ sizeof(struct zink_vs_key_base));
+ ctx->last_vertex_stage_dirty = false;
}
- if (!sa->have_EXT_extended_dynamic_state2) {
- if (sa->primitive_restart != sb->primitive_restart)
- return false;
+ if (ctx->gfx_dirty) {
+ struct zink_gfx_program *prog = NULL;
+
+ simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
+ struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
+ const uint32_t hash = ctx->gfx_hash;
+ struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
+ /* this must be done before prog is updated */
+ if (ctx->curr_program)
+ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
+ if (entry) {
+ prog = (struct zink_gfx_program*)entry->data;
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
+ if (prog->stages_present & ~ctx->dirty_gfx_stages & BITFIELD_BIT(i))
+ ctx->gfx_pipeline_state.modules[i] = prog->objs[i].mod;
+ }
+ /* ensure variants are always updated if keys have changed since last use */
+ ctx->dirty_gfx_stages |= prog->stages_present;
+ update_gfx_program(ctx, prog);
+ } else {
+ ctx->dirty_gfx_stages |= ctx->shader_stages;
+ prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash);
+ zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
+ _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
+ prog->base.removed = false;
+ generate_gfx_program_modules(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
+ }
+ simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
+ if (prog && prog != ctx->curr_program)
+ zink_batch_reference_program(&ctx->batch, &prog->base);
+ ctx->curr_program = prog;
+ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
+ ctx->gfx_dirty = false;
+ } else if (ctx->dirty_gfx_stages) {
+ /* remove old hash */
+ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
+ update_gfx_program(ctx, ctx->curr_program);
+ /* apply new hash */
+ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
}
- return !memcmp(sa->modules, sb->modules, sizeof(sa->modules)) &&
- !memcmp(a, b, offsetof(struct zink_gfx_pipeline_state, hash));
+ ctx->dirty_gfx_stages = 0;
+}
+
+ALWAYS_INLINE static bool
+update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (screen->info.have_EXT_graphics_pipeline_library)
+ util_queue_fence_wait(&prog->base.cache_fence);
+ struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
+ if (!zm) {
+ zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
+ perf_debug(ctx, "zink[gfx_compile]: %s shader variant required\n", _mesa_shader_stage_to_string(pstage));
+ }
+
+ bool changed = prog->objs[pstage].mod != zm->obj.mod;
+ prog->objs[pstage] = zm->obj;
+ prog->objects[pstage] = zm->obj.obj;
+ return changed;
+}
+
+static void
+update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog)
+{
+ const union zink_shader_key_optimal *key = (union zink_shader_key_optimal*)&ctx->gfx_pipeline_state.optimal_key;
+ const union zink_shader_key_optimal *last_prog_key = (union zink_shader_key_optimal*)&prog->last_variant_hash;
+ if (key->vs_bits != last_prog_key->vs_bits) {
+ assert(!prog->is_separable);
+ bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->info.stage);
+ ctx->gfx_pipeline_state.modules_changed |= changed;
+ }
+ const bool shadow_needs_shader_swizzle = last_prog_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
+ if (key->fs_bits != last_prog_key->fs_bits ||
+ /* always recheck shadow swizzles since they aren't directly part of the key */
+ unlikely(shadow_needs_shader_swizzle)) {
+ assert(!prog->is_separable);
+ bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
+ ctx->gfx_pipeline_state.modules_changed |= changed;
+ if (unlikely(shadow_needs_shader_swizzle)) {
+ struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
+ ctx->gfx_pipeline_state.shadow = (struct zink_zs_swizzle_key*)pzm[0]->key + sizeof(uint16_t);
+ }
+ }
+ if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
+ key->tcs_bits != last_prog_key->tcs_bits) {
+ assert(!prog->is_separable);
+ bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL);
+ ctx->gfx_pipeline_state.modules_changed |= changed;
+ }
+ prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
+}
+
+static struct zink_gfx_program *
+replace_separable_prog(struct zink_context *ctx, struct hash_entry *entry, struct zink_gfx_program *prog)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_gfx_program *real = prog->full_prog ?
+ prog->full_prog :
+ /* this will be NULL with ZINK_DEBUG_NOOPT */
+ zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash);
+ entry->data = real;
+ entry->key = real->shaders;
+ real->base.removed = false;
+ zink_gfx_program_reference(screen, &prog->full_prog, NULL);
+ prog->base.removed = true;
+ return real;
}
void
-zink_update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
+zink_gfx_program_update_optimal(struct zink_context *ctx)
{
- update_shader_modules(ctx, zink_screen(ctx->base.screen), prog, ctx->dirty_shader_stages & prog->stages_present, &ctx->gfx_pipeline_state);
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (ctx->gfx_dirty) {
+ struct zink_gfx_program *prog = NULL;
+ ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val);
+ struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
+ const uint32_t hash = ctx->gfx_hash;
+ simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
+ struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
+
+ if (ctx->curr_program)
+ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
+ if (entry) {
+ prog = (struct zink_gfx_program*)entry->data;
+ bool must_replace = prog->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (prog->is_separable && !zink_can_use_pipeline_libs(ctx));
+ if (prog->is_separable) {
+ /* shader variants can't be handled by separable programs: sync and compile */
+ if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace)
+ util_queue_fence_wait(&prog->base.cache_fence);
+ /* If the optimized linked pipeline is done compiling, swap it into place. */
+ if (util_queue_fence_is_signalled(&prog->base.cache_fence) &&
+ /* but only if needed for ZINK_DEBUG=noopt */
+ (!(zink_debug & ZINK_DEBUG_NOOPT) || !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace)) {
+ prog = replace_separable_prog(ctx, entry, prog);
+ }
+ }
+ update_gfx_program_optimal(ctx, prog);
+ } else {
+ ctx->dirty_gfx_stages |= ctx->shader_stages;
+ prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
+ prog->base.removed = false;
+ _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
+ if (!prog->is_separable) {
+ zink_screen_get_pipeline_cache(screen, &prog->base, false);
+ perf_debug(ctx, "zink[gfx_compile]: new program created (probably legacy GL features in use)\n");
+ generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state);
+ }
+ }
+ simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
+ if (prog && prog != ctx->curr_program)
+ zink_batch_reference_program(&ctx->batch, &prog->base);
+ ctx->curr_program = prog;
+ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
+ } else if (ctx->dirty_gfx_stages) {
+ /* remove old hash */
+ ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val);
+ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
+
+ bool must_replace = ctx->curr_program->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (ctx->curr_program->is_separable && !zink_can_use_pipeline_libs(ctx));
+ if (must_replace || (ctx->curr_program->is_separable && !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key))) {
+ struct zink_gfx_program *prog = ctx->curr_program;
+
+ util_queue_fence_wait(&prog->base.cache_fence);
+ /* shader variants can't be handled by separable programs: sync and compile */
+ perf_debug(ctx, "zink[gfx_compile]: non-default shader variant required with separate shader object program\n");
+ struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
+ const uint32_t hash = ctx->gfx_hash;
+ simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
+ struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
+ ctx->curr_program = replace_separable_prog(ctx, entry, prog);
+ simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
+ }
+ update_gfx_program_optimal(ctx, ctx->curr_program);
+ /* apply new hash */
+ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
+ }
+ ctx->dirty_gfx_stages = 0;
+ ctx->gfx_dirty = false;
+ ctx->last_vertex_stage_dirty = false;
}
-VkPipelineLayout
-zink_pipeline_layout_create(struct zink_screen *screen, struct zink_program *pg)
+static void
+optimized_compile_job(void *data, void *gdata, int thread_index)
{
- VkPipelineLayoutCreateInfo plci = {0};
- plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
+ struct zink_gfx_pipeline_cache_entry *pc_entry = data;
+ struct zink_screen *screen = gdata;
+ VkPipeline pipeline;
+ if (pc_entry->gpl.gkey)
+ pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->gpl.ikey->pipeline, &pc_entry->gpl.gkey->pipeline, 1, pc_entry->gpl.okey->pipeline, true, false);
+ else
+ pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, pc_entry->prog->objs, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true, NULL);
+ if (pipeline) {
+ pc_entry->gpl.unoptimized_pipeline = pc_entry->pipeline;
+ pc_entry->pipeline = pipeline;
+ }
+}
- plci.pSetLayouts = pg->dsl;
- plci.setLayoutCount = pg->num_dsl;
+static void
+optimized_shobj_compile_job(void *data, void *gdata, int thread_index)
+{
+ struct zink_gfx_pipeline_cache_entry *pc_entry = data;
+ struct zink_screen *screen = gdata;
- VkPushConstantRange pcr[2] = {0};
- if (pg->is_compute) {
- if (((struct zink_compute_program*)pg)->shader->nir->info.stage == MESA_SHADER_KERNEL) {
- pcr[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
- pcr[0].offset = 0;
- pcr[0].size = sizeof(struct zink_cs_push_constant);
- plci.pushConstantRangeCount = 1;
+ struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT];
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
+ objs[i].mod = VK_NULL_HANDLE;
+ objs[i].spirv = pc_entry->shobjs[i].spirv;
+ }
+ pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true, NULL);
+ /* no unoptimized_pipeline dance */
+}
+
+void
+zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (screen->driver_workarounds.disable_optimized_compile)
+ return;
+ if (zink_debug & ZINK_DEBUG_NOBGC) {
+ if (pc_entry->prog->base.uses_shobj)
+ optimized_shobj_compile_job(pc_entry, screen, 0);
+ else
+ optimized_compile_job(pc_entry, screen, 0);
+ } else {
+ util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence,
+ pc_entry->prog->base.uses_shobj ? optimized_shobj_compile_job : optimized_compile_job, NULL, 0);
+ }
+}
+
+void
+zink_program_finish(struct zink_context *ctx, struct zink_program *pg)
+{
+ util_queue_fence_wait(&pg->cache_fence);
+ if (pg->is_compute)
+ return;
+ struct zink_gfx_program *prog = (struct zink_gfx_program*)pg;
+ for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
+ for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
+ hash_table_foreach(&prog->pipelines[r][i], entry) {
+ struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
+ util_queue_fence_wait(&pc_entry->fence);
+ }
+ }
+ }
+}
+
+static void
+update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_shader *zs = comp->shader;
+ struct zink_shader_module *zm = NULL;
+ unsigned inline_size = 0, nonseamless_size = 0, zs_swizzle_size = 0;
+ struct zink_shader_key *key = &ctx->compute_pipeline_state.key;
+ ASSERTED bool check_robustness = screen->driver_workarounds.lower_robustImageAccess2 && (ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS);
+ assert(zink_cs_key(key)->robust_access == check_robustness);
+
+ if (ctx && zs->info.num_inlinable_uniforms &&
+ ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(MESA_SHADER_COMPUTE)) {
+ if (screen->is_cpu || comp->inlined_variant_count < ZINK_MAX_INLINED_VARIANTS)
+ inline_size = zs->info.num_inlinable_uniforms;
+ else
+ key->inline_uniforms = false;
+ }
+ if (key->base.nonseamless_cube_mask)
+ nonseamless_size = sizeof(uint32_t);
+ if (key->base.needs_zs_shader_swizzle)
+ zs_swizzle_size = sizeof(struct zink_zs_swizzle_key);
+
+ if (inline_size || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size) {
+ struct util_dynarray *shader_cache = &comp->shader_cache[!!nonseamless_size];
+ unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
+ struct zink_shader_module **pzm = shader_cache->data;
+ for (unsigned i = 0; i < count; i++) {
+ struct zink_shader_module *iter = pzm[i];
+ if (!shader_key_matches(iter, key, inline_size,
+ screen->driconf.inline_uniforms,
+ screen->info.have_EXT_non_seamless_cube_map))
+ continue;
+ if (unlikely(zs_swizzle_size)) {
+ /* zs swizzle data needs a manual compare since it's so fat */
+ if (memcmp(iter->key + iter->key_size + nonseamless_size + inline_size * sizeof(uint32_t),
+ &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size))
+ continue;
+ }
+ if (i > 0) {
+ struct zink_shader_module *zero = pzm[0];
+ pzm[0] = iter;
+ pzm[i] = zero;
+ }
+ zm = iter;
}
} else {
- pcr[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
- pcr[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
- pcr[0].size = 2 * sizeof(unsigned);
- pcr[1].stageFlags = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
- pcr[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level);
- pcr[1].size = sizeof(float) * 6;
- plci.pushConstantRangeCount = 2;
+ zm = comp->module;
+ }
+
+ if (!zm) {
+ zm = malloc(sizeof(struct zink_shader_module) + nonseamless_size + inline_size * sizeof(uint32_t) + zs_swizzle_size);
+ if (!zm) {
+ return;
+ }
+ zm->shobj = false;
+ zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base);
+ if (!zm->obj.spirv) {
+ FREE(zm);
+ return;
+ }
+ zm->num_uniforms = inline_size;
+ zm->key_size = key->size;
+ memcpy(zm->key, key, key->size);
+ zm->has_nonseamless = !!nonseamless_size;
+ zm->needs_zs_shader_swizzle = !!zs_swizzle_size;
+ assert(nonseamless_size || inline_size || zink_cs_key(key)->robust_access || zs_swizzle_size);
+ if (nonseamless_size)
+ memcpy(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size);
+ if (inline_size)
+ memcpy(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
+ if (zs_swizzle_size)
+ memcpy(zm->key + zm->key_size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size);
+
+ zm->hash = shader_module_hash(zm);
+ zm->default_variant = false;
+ if (inline_size)
+ comp->inlined_variant_count++;
+
+ /* this is otherwise the default variant, which is stored as comp->module */
+ if (zm->num_uniforms || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size)
+ util_dynarray_append(&comp->shader_cache[!!nonseamless_size], void*, zm);
+ }
+ if (comp->curr == zm)
+ return;
+ ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
+ comp->curr = zm;
+ ctx->compute_pipeline_state.module_hash = zm->hash;
+ ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
+ ctx->compute_pipeline_state.module_changed = true;
+}
+
+void
+zink_update_compute_program(struct zink_context *ctx)
+{
+ util_queue_fence_wait(&ctx->curr_compute->base.cache_fence);
+ update_cs_shader_module(ctx, ctx->curr_compute);
+}
+
+VkPipelineLayout
+zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags)
+{
+ VkPipelineLayoutCreateInfo plci = {0};
+ plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
+ plci.flags = flags;
+
+ plci.pSetLayouts = dsl;
+ plci.setLayoutCount = num_dsl;
+
+ VkPushConstantRange pcr;
+ if (!is_compute) {
+ pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
+ pcr.offset = 0;
+ pcr.size = sizeof(struct zink_gfx_push_constant);
+ plci.pushConstantRangeCount = 1;
+ plci.pPushConstantRanges = &pcr;
}
- plci.pPushConstantRanges = &pcr[0];
VkPipelineLayout layout;
- if (VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout) != VK_SUCCESS) {
- debug_printf("vkCreatePipelineLayout failed!\n");
+ VkResult result = VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout);
+ if (result != VK_SUCCESS) {
+ mesa_loge("vkCreatePipelineLayout failed (%s)", vk_Result_to_str(result));
return VK_NULL_HANDLE;
}
return layout;
}
-static void
-assign_io(struct zink_gfx_program *prog, struct zink_shader *stages[ZINK_SHADER_COUNT])
+static void *
+create_program(struct zink_context *ctx, bool is_compute)
{
- struct zink_shader *shaders[PIPE_SHADER_TYPES];
-
- /* build array in pipeline order */
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++)
- shaders[tgsi_processor_to_shader_stage(i)] = stages[i];
+ struct zink_program *pg = rzalloc_size(NULL, is_compute ? sizeof(struct zink_compute_program) : sizeof(struct zink_gfx_program));
+ if (!pg)
+ return NULL;
+
+ pipe_reference_init(&pg->reference, 1);
+ u_rwlock_init(&pg->pipeline_cache_lock);
+ util_queue_fence_init(&pg->cache_fence);
+ pg->is_compute = is_compute;
+ pg->ctx = ctx;
+ return (void*)pg;
+}
+static void
+assign_io(struct zink_screen *screen,
+ nir_shader *shaders[ZINK_GFX_SHADER_COUNT])
+{
for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
- nir_shader *producer = shaders[i]->nir;
- for (unsigned j = i + 1; j < ZINK_SHADER_COUNT; i++, j++) {
- struct zink_shader *consumer = shaders[j];
+ nir_shader *producer = shaders[i];
+ for (unsigned j = i + 1; j < ZINK_GFX_SHADER_COUNT; i++, j++) {
+ nir_shader *consumer = shaders[j];
if (!consumer)
continue;
- if (!prog->nir[producer->info.stage])
- prog->nir[producer->info.stage] = nir_shader_clone(prog, producer);
- if (!prog->nir[j])
- prog->nir[j] = nir_shader_clone(prog, consumer->nir);
- zink_compiler_assign_io(prog->nir[producer->info.stage], prog->nir[j]);
+ zink_compiler_assign_io(screen, producer, consumer);
i = j;
break;
}
}
}
+void
+zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs)
+{
+ if (!p_atomic_dec_zero(&libs->refcount))
+ return;
+
+ simple_mtx_destroy(&libs->lock);
+ set_foreach_remove(&libs->libs, he) {
+ struct zink_gfx_library_key *gkey = (void*)he->key;
+ VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL);
+ FREE(gkey);
+ }
+ ralloc_free(libs->libs.table);
+ FREE(libs);
+}
+
+static struct zink_gfx_lib_cache *
+create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs)
+{
+ struct zink_gfx_lib_cache *libs = CALLOC_STRUCT(zink_gfx_lib_cache);
+ libs->stages_present = prog->stages_present;
+ if (generated_tcs)
+ libs->stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
+ simple_mtx_init(&libs->lock, mtx_plain);
+ if (generated_tcs)
+ _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs);
+ else
+ _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib, equals_pipeline_lib);
+ return libs;
+}
+
+static struct zink_gfx_lib_cache *
+find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog)
+{
+ unsigned stages_present = prog->stages_present;
+ bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated;
+ if (generated_tcs)
+ stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
+ unsigned idx = zink_program_cache_stages(stages_present);
+ struct set *ht = &screen->pipeline_libs[idx];
+ const uint32_t hash = prog->gfx_hash;
+
+ simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
+ bool found = false;
+ struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found);
+ struct zink_gfx_lib_cache *libs;
+ if (found) {
+ libs = (void*)entry->key;
+ } else {
+ libs = create_lib_cache(prog, generated_tcs);
+ memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders));
+ entry->key = libs;
+ unsigned refs = 0;
+ for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
+ if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) {
+ simple_mtx_lock(&prog->shaders[i]->lock);
+ util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs);
+ simple_mtx_unlock(&prog->shaders[i]->lock);
+ refs++;
+ }
+ }
+ p_atomic_set(&libs->refcount, refs);
+ }
+ simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
+ return libs;
+}
+
struct zink_gfx_program *
zink_create_gfx_program(struct zink_context *ctx,
- struct zink_shader *stages[ZINK_SHADER_COUNT],
- unsigned vertices_per_patch)
+ struct zink_shader **stages,
+ unsigned vertices_per_patch,
+ uint32_t gfx_hash)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
- struct zink_gfx_program *prog = rzalloc(NULL, struct zink_gfx_program);
+ struct zink_gfx_program *prog = create_program(ctx, false);
if (!prog)
goto fail;
- pipe_reference_init(&prog->base.reference, 1);
+ prog->gfx_hash = gfx_hash;
+ prog->base.removed = true;
+ prog->optimal_keys = screen->optimal_keys;
+
+ nir_shader *nir[ZINK_GFX_SHADER_COUNT];
- for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
- list_inithead(&prog->shader_cache[i][0]);
- list_inithead(&prog->shader_cache[i][1]);
+ prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX] &&
+ prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
+ for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
+ util_dynarray_init(&prog->shader_cache[i][0][0], prog);
+ util_dynarray_init(&prog->shader_cache[i][0][1], prog);
+ util_dynarray_init(&prog->shader_cache[i][1][0], prog);
+ util_dynarray_init(&prog->shader_cache[i][1][1], prog);
if (stages[i]) {
prog->shaders[i] = stages[i];
prog->stages_present |= BITFIELD_BIT(i);
+ if (i != MESA_SHADER_FRAGMENT)
+ prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
+ prog->needs_inlining |= prog->shaders[i]->needs_inlining;
+ nir[i] = zink_shader_deserialize(screen, stages[i]);
+ } else {
+ nir[i] = NULL;
}
}
- if (stages[PIPE_SHADER_TESS_EVAL] && !stages[PIPE_SHADER_TESS_CTRL]) {
- prog->shaders[PIPE_SHADER_TESS_EVAL]->generated =
- prog->shaders[PIPE_SHADER_TESS_CTRL] =
- zink_shader_tcs_create(screen, stages[PIPE_SHADER_VERTEX], vertices_per_patch);
- prog->stages_present |= BITFIELD_BIT(PIPE_SHADER_TESS_CTRL);
+ if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
+ prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs =
+ prog->shaders[MESA_SHADER_TESS_CTRL] =
+ zink_shader_tcs_create(screen, nir[MESA_SHADER_TESS_EVAL], vertices_per_patch, &nir[MESA_SHADER_TESS_CTRL]);
+ prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
}
+ prog->stages_remaining = prog->stages_present;
- assign_io(prog, prog->shaders);
+ assign_io(screen, nir);
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
+ if (nir[i])
+ zink_shader_serialize_blob(nir[i], &prog->blobs[i]);
+ ralloc_free(nir[i]);
+ }
- if (stages[PIPE_SHADER_GEOMETRY])
- prog->last_vertex_stage = stages[PIPE_SHADER_GEOMETRY];
- else if (stages[PIPE_SHADER_TESS_EVAL])
- prog->last_vertex_stage = stages[PIPE_SHADER_TESS_EVAL];
- else
- prog->last_vertex_stage = stages[PIPE_SHADER_VERTEX];
- for (int i = 0; i < ARRAY_SIZE(prog->pipelines); ++i) {
- _mesa_hash_table_init(&prog->pipelines[i], prog, NULL, equals_gfx_pipeline_state);
- /* only need first 3/4 for point/line/tri/patch */
- if (screen->info.have_EXT_extended_dynamic_state &&
- i == (prog->last_vertex_stage->nir->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
- break;
+ if (stages[MESA_SHADER_GEOMETRY])
+ prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY];
+ else if (stages[MESA_SHADER_TESS_EVAL])
+ prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL];
+ else
+ prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
+
+ for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
+ for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
+ _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
+ /* only need first 3/4 for point/line/tri/patch */
+ if (screen->info.have_EXT_extended_dynamic_state &&
+ i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
+ break;
+ }
}
+ if (screen->optimal_keys)
+ prog->libs = find_or_create_lib_cache(screen, prog);
+ if (prog->libs)
+ p_atomic_inc(&prog->libs->refcount);
+
struct mesa_sha1 sctx;
_mesa_sha1_init(&sctx);
- for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
+ for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
if (prog->shaders[i]) {
simple_mtx_lock(&prog->shaders[i]->lock);
_mesa_set_add(prog->shaders[i]->programs, prog);
@@ -367,11 +1164,11 @@ zink_create_gfx_program(struct zink_context *ctx,
}
}
_mesa_sha1_final(&sctx, prog->base.sha1);
+ p_atomic_dec(&prog->base.reference.count);
- if (!screen->descriptor_program_init(ctx, &prog->base))
+ if (!zink_descriptor_program_init(ctx, &prog->base))
goto fail;
- zink_screen_get_pipeline_cache(screen, &prog->base);
return prog;
fail:
@@ -380,130 +1177,260 @@ fail:
return NULL;
}
+/* Creates a replacement, optimized zink_gfx_program for this set of separate shaders, which will
+ * be swapped in in place of the fast-linked separable program once it's done compiling.
+ */
+static void
+create_linked_separable_job(void *data, void *gdata, int thread_index)
+{
+ struct zink_gfx_program *prog = data;
+ prog->full_prog = zink_create_gfx_program(prog->base.ctx, prog->shaders, 0, prog->gfx_hash);
+ /* add an ownership ref */
+ zink_gfx_program_reference(zink_screen(prog->base.ctx->base.screen), NULL, prog->full_prog);
+ precompile_job(prog->full_prog, gdata, thread_index);
+}
+
+struct zink_gfx_program *
+create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ bool is_separate = true;
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
+ is_separate &= !stages[i] || stages[i]->info.separate_shader;
+ /* filter cases that need real pipelines */
+ if (!is_separate ||
+ /* TODO: maybe try variants? grimace */
+ !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) ||
+ !zink_can_use_pipeline_libs(ctx))
+ return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
+ /* ensure async shader creation is done */
+ if (stages[i]) {
+ util_queue_fence_wait(&stages[i]->precompile.fence);
+ if (!stages[i]->precompile.obj.mod)
+ return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
+ }
+ }
+
+ struct zink_gfx_program *prog = create_program(ctx, false);
+ if (!prog)
+ goto fail;
+
+ prog->is_separable = true;
+ prog->gfx_hash = ctx->gfx_hash;
+ prog->base.uses_shobj = screen->info.have_EXT_shader_object;
+
+ prog->stages_remaining = prog->stages_present = ctx->shader_stages;
+ memcpy(prog->shaders, stages, sizeof(prog->shaders));
+ prog->last_vertex_stage = ctx->last_vertex_stage;
+
+ if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
+ prog->shaders[MESA_SHADER_TESS_CTRL] = stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs;
+ prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
+ }
+
+ if (!screen->info.have_EXT_shader_object) {
+ prog->libs = create_lib_cache(prog, false);
+ /* this libs cache is owned by the program */
+ p_atomic_set(&prog->libs->refcount, 1);
+ }
+
+ unsigned refs = 0;
+ for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
+ if (prog->shaders[i]) {
+ simple_mtx_lock(&prog->shaders[i]->lock);
+ _mesa_set_add(prog->shaders[i]->programs, prog);
+ simple_mtx_unlock(&prog->shaders[i]->lock);
+ if (screen->info.have_EXT_shader_object) {
+ if (!prog->objects[i])
+ prog->objects[i] = prog->shaders[i]->precompile.obj.obj;
+ }
+ refs++;
+ }
+ }
+ /* We can do this add after the _mesa_set_adds above because we know the prog->shaders[] are
+ * referenced by the draw state and zink_gfx_shader_free() can't be called on them while we're in here.
+ */
+ p_atomic_add(&prog->base.reference.count, refs - 1);
+
+ for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
+ for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
+ _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
+ /* only need first 3/4 for point/line/tri/patch */
+ if (screen->info.have_EXT_extended_dynamic_state &&
+ i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
+ break;
+ }
+ }
+
+ for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
+ if (!prog->shaders[i] || !prog->shaders[i]->precompile.dsl)
+ continue;
+ int idx = !i ? 0 : screen->info.have_EXT_shader_object ? i : 1;
+ prog->base.dd.binding_usage |= BITFIELD_BIT(idx);
+ prog->base.dsl[idx] = prog->shaders[i]->precompile.dsl;
+ /* guarantee a null dsl if previous stages don't have descriptors */
+ if (prog->shaders[i]->precompile.dsl)
+ prog->base.num_dsl = idx + 1;
+ prog->base.dd.bindless |= prog->shaders[i]->bindless;
+ }
+ if (prog->base.dd.bindless) {
+ prog->base.num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES;
+ prog->base.dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout;
+ }
+ prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
+
+ prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
+
+ if (!screen->info.have_EXT_shader_object) {
+ VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl};
+ struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
+ if (!gkey) {
+ mesa_loge("ZINK: failed to allocate gkey!");
+ goto fail;
+ }
+ gkey->optimal_key = prog->last_variant_hash;
+ assert(gkey->optimal_key);
+ gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false, false);
+ _mesa_set_add(&prog->libs->libs, gkey);
+ }
+
+ if (!(zink_debug & ZINK_DEBUG_NOOPT))
+ util_queue_add_job(&screen->cache_get_thread, prog, &prog->base.cache_fence, create_linked_separable_job, NULL, 0);
+
+ return prog;
+fail:
+ if (prog)
+ zink_destroy_gfx_program(screen, prog);
+ return NULL;
+}
+
static uint32_t
-hash_compute_pipeline_state(const void *key)
+hash_compute_pipeline_state_local_size(const void *key)
{
const struct zink_compute_pipeline_state *state = key;
uint32_t hash = _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
- if (state->use_local_size)
- hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash);
+ hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash);
return hash;
}
-void
-zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const uint block[3])
+static uint32_t
+hash_compute_pipeline_state(const void *key)
{
- struct zink_shader *zs = comp->shader;
- bool use_local_size = !(zs->nir->info.workgroup_size[0] ||
- zs->nir->info.workgroup_size[1] ||
- zs->nir->info.workgroup_size[2]);
- if (ctx->compute_pipeline_state.use_local_size != use_local_size)
- ctx->compute_pipeline_state.dirty = true;
- ctx->compute_pipeline_state.use_local_size = use_local_size;
+ const struct zink_compute_pipeline_state *state = key;
+ return _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
+}
- if (ctx->compute_pipeline_state.use_local_size) {
+void
+zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const struct pipe_grid_info *info)
+{
+ if (comp->use_local_size) {
for (int i = 0; i < ARRAY_SIZE(ctx->compute_pipeline_state.local_size); i++) {
- if (ctx->compute_pipeline_state.local_size[i] != block[i])
+ if (ctx->compute_pipeline_state.local_size[i] != info->block[i])
ctx->compute_pipeline_state.dirty = true;
- ctx->compute_pipeline_state.local_size[i] = block[i];
+ ctx->compute_pipeline_state.local_size[i] = info->block[i];
}
- } else
- ctx->compute_pipeline_state.local_size[0] =
- ctx->compute_pipeline_state.local_size[1] =
- ctx->compute_pipeline_state.local_size[2] = 0;
+ }
+ if (ctx->compute_pipeline_state.variable_shared_mem != info->variable_shared_mem) {
+ ctx->compute_pipeline_state.dirty = true;
+ ctx->compute_pipeline_state.variable_shared_mem = info->variable_shared_mem;
+ }
}
static bool
equals_compute_pipeline_state(const void *a, const void *b)
{
- return memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) == 0;
+ const struct zink_compute_pipeline_state *sa = a;
+ const struct zink_compute_pipeline_state *sb = b;
+ return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
+ sa->module == sb->module;
}
-struct zink_compute_program *
-zink_create_compute_program(struct zink_context *ctx, struct zink_shader *shader)
+static bool
+equals_compute_pipeline_state_local_size(const void *a, const void *b)
{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- struct zink_compute_program *comp = rzalloc(NULL, struct zink_compute_program);
- if (!comp)
- goto fail;
+ const struct zink_compute_pipeline_state *sa = a;
+ const struct zink_compute_pipeline_state *sb = b;
+ return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
+ !memcmp(sa->local_size, sb->local_size, sizeof(sa->local_size)) &&
+ sa->module == sb->module;
+}
- pipe_reference_init(&comp->base.reference, 1);
- comp->base.is_compute = true;
+static void
+precompile_compute_job(void *data, void *gdata, int thread_index)
+{
+ struct zink_compute_program *comp = data;
+ struct zink_screen *screen = gdata;
- comp->module = CALLOC_STRUCT(zink_shader_module);
+ comp->shader = zink_shader_create(screen, comp->nir);
+ comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
assert(comp->module);
- comp->module->shader = zink_shader_compile(screen, shader, shader->nir, NULL);
- assert(comp->module->shader);
-
- comp->pipelines = _mesa_hash_table_create(NULL, hash_compute_pipeline_state,
- equals_compute_pipeline_state);
-
- _mesa_set_add(shader->programs, comp);
- comp->shader = shader;
- memcpy(comp->base.sha1, shader->base.sha1, sizeof(shader->base.sha1));
-
- if (!screen->descriptor_program_init(ctx, &comp->base))
- goto fail;
-
- zink_screen_get_pipeline_cache(screen, &comp->base);
- return comp;
-
-fail:
- if (comp)
- zink_destroy_compute_program(screen, comp);
- return NULL;
+ comp->module->shobj = false;
+ comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, &comp->base);
+ /* comp->nir will be freed by zink_shader_compile */
+ comp->nir = NULL;
+ assert(comp->module->obj.spirv);
+ util_dynarray_init(&comp->shader_cache[0], comp);
+ util_dynarray_init(&comp->shader_cache[1], comp);
+
+ struct mesa_sha1 sha1_ctx;
+ _mesa_sha1_init(&sha1_ctx);
+ _mesa_sha1_update(&sha1_ctx, comp->shader->blob.data, comp->shader->blob.size);
+ _mesa_sha1_final(&sha1_ctx, comp->base.sha1);
+
+ zink_descriptor_program_init(comp->base.ctx, &comp->base);
+
+ zink_screen_get_pipeline_cache(screen, &comp->base, true);
+ if (comp->base.can_precompile)
+ comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL);
+ if (comp->base_pipeline)
+ zink_screen_update_pipeline_cache(screen, &comp->base, true);
}
-uint32_t
-zink_program_get_descriptor_usage(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type)
+static struct zink_compute_program *
+create_compute_program(struct zink_context *ctx, nir_shader *nir)
{
- struct zink_shader *zs = NULL;
- switch (stage) {
- case PIPE_SHADER_VERTEX:
- case PIPE_SHADER_TESS_CTRL:
- case PIPE_SHADER_TESS_EVAL:
- case PIPE_SHADER_GEOMETRY:
- case PIPE_SHADER_FRAGMENT:
- zs = ctx->gfx_stages[stage];
- break;
- case PIPE_SHADER_COMPUTE: {
- zs = ctx->compute_stage;
- break;
- }
- default:
- unreachable("unknown shader type");
- }
- if (!zs)
- return 0;
- switch (type) {
- case ZINK_DESCRIPTOR_TYPE_UBO:
- return zs->ubos_used;
- case ZINK_DESCRIPTOR_TYPE_SSBO:
- return zs->ssbos_used;
- case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
- return BITSET_TEST_RANGE(zs->nir->info.textures_used, 0, PIPE_MAX_SAMPLERS - 1);
- case ZINK_DESCRIPTOR_TYPE_IMAGE:
- return zs->nir->info.images_used;
- default:
- unreachable("unknown descriptor type!");
- }
- return 0;
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_compute_program *comp = create_program(ctx, true);
+ if (!comp)
+ return NULL;
+ simple_mtx_init(&comp->cache_lock, mtx_plain);
+ comp->scratch_size = nir->scratch_size;
+ comp->nir = nir;
+ comp->num_inlinable_uniforms = nir->info.num_inlinable_uniforms;
+
+ comp->use_local_size = !(nir->info.workgroup_size[0] ||
+ nir->info.workgroup_size[1] ||
+ nir->info.workgroup_size[2]);
+ comp->has_variable_shared_mem = nir->info.cs.has_variable_shared_mem;
+ comp->base.can_precompile = !comp->use_local_size &&
+ (screen->info.have_EXT_non_seamless_cube_map || !zink_shader_has_cubes(nir)) &&
+ (screen->info.rb2_feats.robustImageAccess2 || !(ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS));
+ _mesa_hash_table_init(&comp->pipelines, comp, NULL, comp->use_local_size ?
+ equals_compute_pipeline_state_local_size :
+ equals_compute_pipeline_state);
+ if (zink_debug & ZINK_DEBUG_NOBGC)
+ precompile_compute_job(comp, screen, 0);
+ else
+ util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence,
+ precompile_compute_job, NULL, 0);
+ return comp;
}
bool
-zink_program_descriptor_is_buffer(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type, unsigned i)
+zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i)
{
struct zink_shader *zs = NULL;
switch (stage) {
- case PIPE_SHADER_VERTEX:
- case PIPE_SHADER_TESS_CTRL:
- case PIPE_SHADER_TESS_EVAL:
- case PIPE_SHADER_GEOMETRY:
- case PIPE_SHADER_FRAGMENT:
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_GEOMETRY:
+ case MESA_SHADER_FRAGMENT:
zs = ctx->gfx_stages[stage];
break;
- case PIPE_SHADER_COMPUTE: {
- zs = ctx->compute_stage;
+ case MESA_SHADER_COMPUTE: {
+ zs = ctx->curr_compute->shader;
break;
}
default:
@@ -518,6 +1445,8 @@ static unsigned
get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type)
{
switch (type) {
+ case ZINK_DESCRIPTOR_TYPE_UNIFORMS:
+ return !!zs->has_uniforms;
case ZINK_DESCRIPTOR_TYPE_UBO:
case ZINK_DESCRIPTOR_TYPE_SSBO:
return zs->num_bindings[type];
@@ -531,15 +1460,15 @@ get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type)
}
unsigned
-zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type, bool is_compute)
+zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type)
{
unsigned num_bindings = 0;
- if (is_compute) {
+ if (pg->is_compute) {
struct zink_compute_program *comp = (void*)pg;
return get_num_bindings(comp->shader, type);
}
struct zink_gfx_program *prog = (void*)pg;
- for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) {
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
if (prog->shaders[i])
num_bindings += get_num_bindings(prog->shaders[i], type);
}
@@ -547,182 +1476,109 @@ zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descrip
}
unsigned
-zink_program_num_bindings(const struct zink_program *pg, bool is_compute)
+zink_program_num_bindings(const struct zink_program *pg)
{
unsigned num_bindings = 0;
- for (unsigned i = 0; i < ZINK_DESCRIPTOR_TYPES; i++)
- num_bindings += zink_program_num_bindings_typed(pg, i, is_compute);
+ for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
+ num_bindings += zink_program_num_bindings_typed(pg, i);
return num_bindings;
}
+static void
+deinit_program(struct zink_screen *screen, struct zink_program *pg)
+{
+ util_queue_fence_wait(&pg->cache_fence);
+ if (pg->layout)
+ VKSCR(DestroyPipelineLayout)(screen->dev, pg->layout, NULL);
+
+ if (pg->pipeline_cache)
+ VKSCR(DestroyPipelineCache)(screen->dev, pg->pipeline_cache, NULL);
+ u_rwlock_destroy(&pg->pipeline_cache_lock);
+ zink_descriptor_program_deinit(screen, pg);
+}
+
void
zink_destroy_gfx_program(struct zink_screen *screen,
struct zink_gfx_program *prog)
{
- if (prog->base.layout)
- VKSCR(DestroyPipelineLayout)(screen->dev, prog->base.layout, NULL);
-
- for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
- if (prog->shaders[i]) {
- _mesa_set_remove_key(prog->shaders[i]->programs, prog);
- prog->shaders[i] = NULL;
- }
- destroy_shader_cache(screen, &prog->shader_cache[i][0]);
- destroy_shader_cache(screen, &prog->shader_cache[i][1]);
- ralloc_free(prog->nir[i]);
- }
-
- unsigned max_idx = ARRAY_SIZE(prog->pipelines);
+ unsigned max_idx = ARRAY_SIZE(prog->pipelines[0]);
if (screen->info.have_EXT_extended_dynamic_state) {
/* only need first 3/4 for point/line/tri/patch */
if ((prog->stages_present &
- (BITFIELD_BIT(PIPE_SHADER_TESS_EVAL) | BITFIELD_BIT(PIPE_SHADER_GEOMETRY))) ==
- BITFIELD_BIT(PIPE_SHADER_TESS_EVAL))
+ (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) ==
+ BITFIELD_BIT(MESA_SHADER_TESS_EVAL))
max_idx = 4;
else
max_idx = 3;
max_idx++;
}
- for (int i = 0; i < max_idx; ++i) {
- hash_table_foreach(&prog->pipelines[i], entry) {
- struct gfx_pipeline_cache_entry *pc_entry = entry->data;
+ if (prog->is_separable)
+ zink_gfx_program_reference(screen, &prog->full_prog, NULL);
+ for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
+ for (int i = 0; i < max_idx; ++i) {
+ hash_table_foreach(&prog->pipelines[r][i], entry) {
+ struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
+
+ util_queue_fence_wait(&pc_entry->fence);
+ VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
+ VKSCR(DestroyPipeline)(screen->dev, pc_entry->gpl.unoptimized_pipeline, NULL);
+ free(pc_entry);
+ }
+ }
+ }
+
+ deinit_program(screen, &prog->base);
- VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
- free(pc_entry);
+ for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
+ if (prog->shaders[i]) {
+ _mesa_set_remove_key(prog->shaders[i]->programs, prog);
+ prog->shaders[i] = NULL;
+ }
+ if (!prog->is_separable) {
+ destroy_shader_cache(screen, &prog->shader_cache[i][0][0]);
+ destroy_shader_cache(screen, &prog->shader_cache[i][0][1]);
+ destroy_shader_cache(screen, &prog->shader_cache[i][1][0]);
+ destroy_shader_cache(screen, &prog->shader_cache[i][1][1]);
+ blob_finish(&prog->blobs[i]);
}
}
- if (prog->base.pipeline_cache)
- VKSCR(DestroyPipelineCache)(screen->dev, prog->base.pipeline_cache, NULL);
- screen->descriptor_program_deinit(screen, &prog->base);
+ if (prog->libs)
+ zink_gfx_lib_cache_unref(screen, prog->libs);
ralloc_free(prog);
}
void
zink_destroy_compute_program(struct zink_screen *screen,
- struct zink_compute_program *comp)
+ struct zink_compute_program *comp)
{
- if (comp->base.layout)
- VKSCR(DestroyPipelineLayout)(screen->dev, comp->base.layout, NULL);
+ deinit_program(screen, &comp->base);
+
+ assert(comp->shader);
+ assert(!comp->shader->spirv);
+
+ zink_shader_free(screen, comp->shader);
- if (comp->shader)
- _mesa_set_remove_key(comp->shader->programs, comp);
+ destroy_shader_cache(screen, &comp->shader_cache[0]);
+ destroy_shader_cache(screen, &comp->shader_cache[1]);
- hash_table_foreach(comp->pipelines, entry) {
+ hash_table_foreach(&comp->pipelines, entry) {
struct compute_pipeline_cache_entry *pc_entry = entry->data;
VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
free(pc_entry);
}
- _mesa_hash_table_destroy(comp->pipelines, NULL);
- VKSCR(DestroyShaderModule)(screen->dev, comp->module->shader, NULL);
- free(comp->module);
- if (comp->base.pipeline_cache)
- VKSCR(DestroyPipelineCache)(screen->dev, comp->base.pipeline_cache, NULL);
- screen->descriptor_program_deinit(screen, &comp->base);
+ VKSCR(DestroyPipeline)(screen->dev, comp->base_pipeline, NULL);
+ zink_destroy_shader_module(screen, comp->module);
ralloc_free(comp);
}
-static unsigned
-get_pipeline_idx(bool have_EXT_extended_dynamic_state, enum pipe_prim_type mode, VkPrimitiveTopology vkmode)
-{
- /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT specifies that the topology state in
- * VkPipelineInputAssemblyStateCreateInfo only specifies the topology class,
- * and the specific topology order and adjacency must be set dynamically
- * with vkCmdSetPrimitiveTopologyEXT before any drawing commands.
- */
- if (have_EXT_extended_dynamic_state) {
- if (mode == PIPE_PRIM_PATCHES)
- return 3;
- switch (u_reduced_prim(mode)) {
- case PIPE_PRIM_POINTS:
- return 0;
- case PIPE_PRIM_LINES:
- return 1;
- default:
- return 2;
- }
- }
- return vkmode;
-}
-
-
-VkPipeline
-zink_get_gfx_pipeline(struct zink_context *ctx,
- struct zink_gfx_program *prog,
- struct zink_gfx_pipeline_state *state,
- enum pipe_prim_type mode)
+ALWAYS_INLINE static bool
+compute_can_shortcut(const struct zink_compute_program *comp)
{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- const bool have_EXT_vertex_input_dynamic_state = screen->info.have_EXT_vertex_input_dynamic_state;
- const bool have_EXT_extended_dynamic_state = screen->info.have_EXT_extended_dynamic_state;
-
- VkPrimitiveTopology vkmode = zink_primitive_topology(mode);
- const unsigned idx = get_pipeline_idx(screen->info.have_EXT_extended_dynamic_state, mode, vkmode);
- assert(idx <= ARRAY_SIZE(prog->pipelines));
- if (!state->dirty && !state->modules_changed &&
- (have_EXT_vertex_input_dynamic_state || !ctx->vertex_state_changed) &&
- idx == state->idx)
- return state->pipeline;
-
- struct hash_entry *entry = NULL;
-
- if (state->dirty) {
- if (state->pipeline) //avoid on first hash
- state->final_hash ^= state->hash;
- state->hash = hash_gfx_pipeline_state(state);
- state->final_hash ^= state->hash;
- state->dirty = false;
- }
- if (!have_EXT_vertex_input_dynamic_state && ctx->vertex_state_changed) {
- if (state->pipeline)
- state->final_hash ^= state->vertex_hash;
- if (!have_EXT_extended_dynamic_state) {
- uint32_t hash = 0;
- /* if we don't have dynamic states, we have to hash the enabled vertex buffer bindings */
- uint32_t vertex_buffers_enabled_mask = state->vertex_buffers_enabled_mask;
- hash = XXH32(&vertex_buffers_enabled_mask, sizeof(uint32_t), hash);
-
- for (unsigned i = 0; i < state->element_state->num_bindings; i++) {
- struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->binding_map[i];
- state->vertex_strides[i] = vb->buffer.resource ? vb->stride : 0;
- hash = XXH32(&state->vertex_strides[i], sizeof(uint32_t), hash);
- }
- state->vertex_hash = hash ^ state->element_state->hash;
- } else
- state->vertex_hash = state->element_state->hash;
- state->final_hash ^= state->vertex_hash;
- }
- state->modules_changed = false;
- ctx->vertex_state_changed = false;
-
- entry = _mesa_hash_table_search_pre_hashed(&prog->pipelines[idx], state->final_hash, state);
-
- if (!entry) {
- util_queue_fence_wait(&prog->base.cache_fence);
- VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog,
- state, vkmode);
- if (pipeline == VK_NULL_HANDLE)
- return VK_NULL_HANDLE;
-
- zink_screen_update_pipeline_cache(screen, &prog->base);
- struct gfx_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(gfx_pipeline_cache_entry);
- if (!pc_entry)
- return VK_NULL_HANDLE;
-
- memcpy(&pc_entry->state, state, sizeof(*state));
- pc_entry->pipeline = pipeline;
-
- entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[idx], state->final_hash, pc_entry, pc_entry);
- assert(entry);
- }
-
- struct gfx_pipeline_cache_entry *cache_entry = entry->data;
- state->pipeline = cache_entry->pipeline;
- state->idx = idx;
- return state->pipeline;
+ return !comp->use_local_size && !comp->curr->num_uniforms && !comp->curr->has_nonseamless;
}
VkPipeline
@@ -731,98 +1587,213 @@ zink_get_compute_pipeline(struct zink_screen *screen,
struct zink_compute_pipeline_state *state)
{
struct hash_entry *entry = NULL;
+ struct compute_pipeline_cache_entry *cache_entry;
- if (!state->dirty)
+ if (!state->dirty && !state->module_changed)
return state->pipeline;
if (state->dirty) {
- state->hash = hash_compute_pipeline_state(state);
+ if (state->pipeline) //avoid on first hash
+ state->final_hash ^= state->hash;
+ if (comp->use_local_size)
+ state->hash = hash_compute_pipeline_state_local_size(state);
+ else
+ state->hash = hash_compute_pipeline_state(state);
state->dirty = false;
+ state->final_hash ^= state->hash;
}
- entry = _mesa_hash_table_search_pre_hashed(comp->pipelines, state->hash, state);
+
+ util_queue_fence_wait(&comp->base.cache_fence);
+ if (comp->base_pipeline && compute_can_shortcut(comp)) {
+ state->pipeline = comp->base_pipeline;
+ return state->pipeline;
+ }
+ entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
if (!entry) {
- util_queue_fence_wait(&comp->base.cache_fence);
+ simple_mtx_lock(&comp->cache_lock);
+ entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
+ if (entry) {
+ simple_mtx_unlock(&comp->cache_lock);
+ goto out;
+ }
VkPipeline pipeline = zink_create_compute_pipeline(screen, comp, state);
- if (pipeline == VK_NULL_HANDLE)
+ if (pipeline == VK_NULL_HANDLE) {
+ simple_mtx_unlock(&comp->cache_lock);
return VK_NULL_HANDLE;
+ }
+
+ zink_screen_update_pipeline_cache(screen, &comp->base, false);
+ if (compute_can_shortcut(comp)) {
+ simple_mtx_unlock(&comp->cache_lock);
+ /* don't add base pipeline to cache */
+ state->pipeline = comp->base_pipeline = pipeline;
+ return state->pipeline;
+ }
struct compute_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(compute_pipeline_cache_entry);
- if (!pc_entry)
+ if (!pc_entry) {
+ simple_mtx_unlock(&comp->cache_lock);
return VK_NULL_HANDLE;
+ }
memcpy(&pc_entry->state, state, sizeof(*state));
pc_entry->pipeline = pipeline;
- entry = _mesa_hash_table_insert_pre_hashed(comp->pipelines, state->hash, pc_entry, pc_entry);
+ entry = _mesa_hash_table_insert_pre_hashed(&comp->pipelines, state->final_hash, pc_entry, pc_entry);
assert(entry);
+ simple_mtx_unlock(&comp->cache_lock);
}
-
- struct compute_pipeline_cache_entry *cache_entry = entry->data;
+out:
+ cache_entry = entry->data;
state->pipeline = cache_entry->pipeline;
return state->pipeline;
}
-static inline void
-bind_stage(struct zink_context *ctx, enum pipe_shader_type stage,
- struct zink_shader *shader)
+static void
+bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader)
{
- if (shader && shader->nir->info.num_inlinable_uniforms)
+ /* RADV doesn't support binding pipelines in DGC */
+ if (zink_screen(ctx->base.screen)->info.nv_dgc_props.maxGraphicsShaderGroupCount == 0)
+ zink_flush_dgc_if_enabled(ctx);
+ if (shader && shader->info.num_inlinable_uniforms)
ctx->shader_has_inlinable_uniforms_mask |= 1 << stage;
else
ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage);
- if (stage == PIPE_SHADER_COMPUTE) {
- if (shader && shader != ctx->compute_stage) {
- struct hash_entry *entry = _mesa_hash_table_search(&ctx->compute_program_cache, shader);
- if (entry) {
- ctx->compute_pipeline_state.dirty = true;
- ctx->curr_compute = entry->data;
- } else {
- struct zink_compute_program *comp = zink_create_compute_program(ctx, shader);
- _mesa_hash_table_insert(&ctx->compute_program_cache, comp->shader, comp);
- ctx->compute_pipeline_state.dirty = true;
- ctx->curr_compute = comp;
- zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base);
- }
- } else if (!shader)
- ctx->curr_compute = NULL;
- ctx->compute_stage = shader;
- zink_select_launch_grid(ctx);
+ if (ctx->gfx_stages[stage])
+ ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
+
+ if (stage == MESA_SHADER_GEOMETRY && ctx->is_generated_gs_bound && (!shader || !shader->non_fs.parent)) {
+ ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
+ ctx->is_generated_gs_bound = false;
+ }
+
+ ctx->gfx_stages[stage] = shader;
+ ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX];
+ ctx->gfx_pipeline_state.modules_changed = true;
+ if (shader) {
+ ctx->shader_stages |= BITFIELD_BIT(stage);
+ ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
} else {
- if (ctx->gfx_stages[stage])
- ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
- ctx->gfx_stages[stage] = shader;
- ctx->gfx_dirty = ctx->gfx_stages[PIPE_SHADER_FRAGMENT] && ctx->gfx_stages[PIPE_SHADER_VERTEX];
- ctx->gfx_pipeline_state.modules_changed = true;
- if (shader) {
- ctx->shader_stages |= BITFIELD_BIT(stage);
- ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
- } else {
- ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE;
- if (ctx->curr_program)
- ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
- ctx->curr_program = NULL;
- ctx->shader_stages &= ~BITFIELD_BIT(stage);
+ ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE;
+ if (ctx->curr_program)
+ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
+ ctx->curr_program = NULL;
+ ctx->shader_stages &= ~BITFIELD_BIT(stage);
+ }
+}
+
+static enum mesa_prim
+gs_output_to_reduced_prim_type(struct shader_info *info)
+{
+ switch (info->gs.output_primitive) {
+ case MESA_PRIM_POINTS:
+ return MESA_PRIM_POINTS;
+
+ case MESA_PRIM_LINES:
+ case MESA_PRIM_LINE_LOOP:
+ case MESA_PRIM_LINE_STRIP:
+ case MESA_PRIM_LINES_ADJACENCY:
+ case MESA_PRIM_LINE_STRIP_ADJACENCY:
+ return MESA_PRIM_LINES;
+
+ case MESA_PRIM_TRIANGLES:
+ case MESA_PRIM_TRIANGLE_STRIP:
+ case MESA_PRIM_TRIANGLE_FAN:
+ case MESA_PRIM_TRIANGLES_ADJACENCY:
+ case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ return MESA_PRIM_TRIANGLES;
+
+ default:
+ unreachable("unexpected output primitive type");
+ }
+}
+
+static enum mesa_prim
+update_rast_prim(struct zink_shader *shader)
+{
+ struct shader_info *info = &shader->info;
+ if (info->stage == MESA_SHADER_GEOMETRY)
+ return gs_output_to_reduced_prim_type(info);
+ else if (info->stage == MESA_SHADER_TESS_EVAL) {
+ if (info->tess.point_mode)
+ return MESA_PRIM_POINTS;
+ else {
+ switch (info->tess._primitive_mode) {
+ case TESS_PRIMITIVE_ISOLINES:
+ return MESA_PRIM_LINES;
+ case TESS_PRIMITIVE_TRIANGLES:
+ case TESS_PRIMITIVE_QUADS:
+ return MESA_PRIM_TRIANGLES;
+ default:
+ return MESA_PRIM_COUNT;
+ }
}
}
+ return MESA_PRIM_COUNT;
+}
+
+static void
+unbind_generated_gs(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
+{
+ if (prev_shader->non_fs.is_generated)
+ ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
+
+ if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
+ ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.parent ==
+ prev_shader) {
+ bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
+ }
}
static void
-bind_last_vertex_stage(struct zink_context *ctx)
+bind_last_vertex_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
{
- enum pipe_shader_type old = ctx->last_vertex_stage ? pipe_shader_type_from_mesa(ctx->last_vertex_stage->nir->info.stage) : PIPE_SHADER_TYPES;
- if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
- ctx->last_vertex_stage = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
- else if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
- ctx->last_vertex_stage = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
+ if (prev_shader && stage < MESA_SHADER_GEOMETRY)
+ unbind_generated_gs(ctx, stage, prev_shader);
+
+ gl_shader_stage old = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_STAGES;
+ if (ctx->gfx_stages[MESA_SHADER_GEOMETRY])
+ ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_GEOMETRY];
+ else if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
+ ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
else
- ctx->last_vertex_stage = ctx->gfx_stages[PIPE_SHADER_VERTEX];
- enum pipe_shader_type current = ctx->last_vertex_stage ? pipe_shader_type_from_mesa(ctx->last_vertex_stage->nir->info.stage) : PIPE_SHADER_VERTEX;
+ ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_VERTEX];
+ gl_shader_stage current = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_VERTEX;
+
+ /* update rast_prim */
+ ctx->gfx_pipeline_state.shader_rast_prim =
+ ctx->last_vertex_stage ? update_rast_prim(ctx->last_vertex_stage) :
+ MESA_PRIM_COUNT;
+
if (old != current) {
- if (old != PIPE_SHADER_TYPES) {
- memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base));
- ctx->dirty_shader_stages |= BITFIELD_BIT(old);
+ if (!zink_screen(ctx->base.screen)->optimal_keys) {
+ if (old != MESA_SHADER_STAGES) {
+ memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base));
+ ctx->dirty_gfx_stages |= BITFIELD_BIT(old);
+ } else {
+ /* always unset vertex shader values when changing to a non-vs last stage */
+ memset(&ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base));
+ }
+ }
+
+ unsigned num_viewports = ctx->vp_state.num_viewports;
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ /* number of enabled viewports is based on whether last vertex stage writes viewport index */
+ if (ctx->last_vertex_stage) {
+ if (ctx->last_vertex_stage->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK))
+ ctx->vp_state.num_viewports = MIN2(screen->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS);
+ else
+ ctx->vp_state.num_viewports = 1;
+ } else {
+ ctx->vp_state.num_viewports = 1;
+ }
+ ctx->vp_state_changed |= num_viewports != ctx->vp_state.num_viewports;
+ if (!screen->info.have_EXT_extended_dynamic_state) {
+ if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports)
+ ctx->gfx_pipeline_state.dirty = true;
+ ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports;
}
ctx->last_vertex_stage_dirty = true;
}
@@ -833,21 +1804,42 @@ zink_bind_vs_state(struct pipe_context *pctx,
void *cso)
{
struct zink_context *ctx = zink_context(pctx);
- if (!cso && !ctx->gfx_stages[PIPE_SHADER_VERTEX])
+ if (!cso && !ctx->gfx_stages[MESA_SHADER_VERTEX])
return;
- void *prev = ctx->gfx_stages[PIPE_SHADER_VERTEX];
- bind_stage(ctx, PIPE_SHADER_VERTEX, cso);
+ struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_VERTEX];
+ bind_gfx_stage(ctx, MESA_SHADER_VERTEX, cso);
+ bind_last_vertex_stage(ctx, MESA_SHADER_VERTEX, prev_shader);
if (cso) {
struct zink_shader *zs = cso;
- ctx->shader_reads_drawid = BITSET_TEST(zs->nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
- ctx->shader_reads_basevertex = BITSET_TEST(zs->nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
+ ctx->shader_reads_drawid = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
+ ctx->shader_reads_basevertex = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
} else {
ctx->shader_reads_drawid = false;
ctx->shader_reads_basevertex = false;
}
- if (ctx->last_vertex_stage == prev)
- ctx->last_vertex_stage = cso;
+}
+
+/* if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count:
+ * in GL, samples==1 means ignore gl_SampleMask[]
+ * in VK, gl_SampleMask[] is never ignored
+ */
+void
+zink_update_fs_key_samples(struct zink_context *ctx)
+{
+ if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT])
+ return;
+ if (zink_shader_uses_samples(ctx->gfx_stages[MESA_SHADER_FRAGMENT])) {
+ bool samples = zink_get_fs_base_key(ctx)->samples;
+ if (samples != (ctx->fb_state.samples > 1))
+ zink_set_fs_base_key(ctx)->samples = ctx->fb_state.samples > 1;
+ }
+}
+void zink_update_gs_key_rectangular_line(struct zink_context *ctx)
+{
+ bool line_rectangular = zink_get_gs_key(ctx)->line_rectangular;
+ if (line_rectangular != ctx->rast_state->base.line_rectangular)
+ zink_set_gs_key(ctx)->line_rectangular = ctx->rast_state->base.line_rectangular;
}
static void
@@ -855,18 +1847,42 @@ zink_bind_fs_state(struct pipe_context *pctx,
void *cso)
{
struct zink_context *ctx = zink_context(pctx);
- if (!cso && !ctx->gfx_stages[PIPE_SHADER_FRAGMENT])
+ if (!cso && !ctx->gfx_stages[MESA_SHADER_FRAGMENT])
+ return;
+ if (ctx->disable_fs && !ctx->disable_color_writes && cso != ctx->null_fs) {
+ ctx->saved_fs = cso;
+ zink_set_null_fs(ctx);
return;
- bind_stage(ctx, PIPE_SHADER_FRAGMENT, cso);
+ }
+ bool writes_cbuf0 = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? (ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0 : true;
+ unsigned shadow_mask = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask : 0;
+ bind_gfx_stage(ctx, MESA_SHADER_FRAGMENT, cso);
ctx->fbfetch_outputs = 0;
if (cso) {
- nir_shader *nir = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]->nir;
- if (nir->info.fs.uses_fbfetch_output) {
- nir_foreach_shader_out_variable(var, ctx->gfx_stages[PIPE_SHADER_FRAGMENT]->nir) {
- if (var->data.fb_fetch_output)
- ctx->fbfetch_outputs |= BITFIELD_BIT(var->data.location - FRAG_RESULT_DATA0);
- }
+ shader_info *info = &ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info;
+ bool new_writes_cbuf0 = (info->outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0;
+ if (ctx->gfx_pipeline_state.blend_state && ctx->gfx_pipeline_state.blend_state->alpha_to_coverage &&
+ writes_cbuf0 != new_writes_cbuf0 && zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state3) {
+ ctx->blend_state_changed = true;
+ ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_A2C);
+ }
+ if (info->fs.uses_fbfetch_output) {
+ if (info->outputs_read & (BITFIELD_BIT(FRAG_RESULT_DEPTH) | BITFIELD_BIT(FRAG_RESULT_STENCIL)))
+ ctx->fbfetch_outputs |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS);
+ ctx->fbfetch_outputs |= info->outputs_read >> FRAG_RESULT_DATA0;
}
+ zink_update_fs_key_samples(ctx);
+ if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) {
+ if (ctx->gfx_pipeline_state.rast_attachment_order != info->fs.uses_fbfetch_output)
+ ctx->gfx_pipeline_state.dirty = true;
+ ctx->gfx_pipeline_state.rast_attachment_order = info->fs.uses_fbfetch_output;
+ }
+ zink_set_zs_needs_shader_swizzle_key(ctx, MESA_SHADER_FRAGMENT, false);
+ if (shadow_mask != ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask &&
+ !zink_screen(pctx->screen)->driver_workarounds.needs_zs_shader_swizzle)
+ zink_update_shadow_samplerviews(ctx, shadow_mask | ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask);
+ if (!ctx->track_renderpasses && !ctx->blitting)
+ ctx->rp_tc_info_updated = true;
}
zink_update_fbfetch(ctx);
}
@@ -876,25 +1892,17 @@ zink_bind_gs_state(struct pipe_context *pctx,
void *cso)
{
struct zink_context *ctx = zink_context(pctx);
- if (!cso && !ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
+ if (!cso && !ctx->gfx_stages[MESA_SHADER_GEOMETRY])
return;
- bool had_points = ctx->gfx_stages[PIPE_SHADER_GEOMETRY] ? ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->nir->info.gs.output_primitive == GL_POINTS : false;
- bind_stage(ctx, PIPE_SHADER_GEOMETRY, cso);
- bind_last_vertex_stage(ctx);
- if (cso) {
- if (!had_points && ctx->last_vertex_stage->nir->info.gs.output_primitive == GL_POINTS)
- ctx->gfx_pipeline_state.has_points++;
- } else {
- if (had_points)
- ctx->gfx_pipeline_state.has_points--;
- }
+ bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, cso);
+ bind_last_vertex_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
}
static void
zink_bind_tcs_state(struct pipe_context *pctx,
void *cso)
{
- bind_stage(zink_context(pctx), PIPE_SHADER_TESS_CTRL, cso);
+ bind_gfx_stage(zink_context(pctx), MESA_SHADER_TESS_CTRL, cso);
}
static void
@@ -902,17 +1910,18 @@ zink_bind_tes_state(struct pipe_context *pctx,
void *cso)
{
struct zink_context *ctx = zink_context(pctx);
- if (!cso && !ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
+ if (!cso && !ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
return;
- if (!!ctx->gfx_stages[PIPE_SHADER_TESS_EVAL] != !!cso) {
+ if (!!ctx->gfx_stages[MESA_SHADER_TESS_EVAL] != !!cso) {
if (!cso) {
/* if unsetting a TESS that uses a generated TCS, ensure the TCS is unset */
- if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]->generated)
- ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = NULL;
+ if (ctx->gfx_stages[MESA_SHADER_TESS_CTRL] == ctx->gfx_stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs)
+ ctx->gfx_stages[MESA_SHADER_TESS_CTRL] = NULL;
}
}
- bind_stage(ctx, PIPE_SHADER_TESS_EVAL, cso);
- bind_last_vertex_stage(ctx);
+ struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
+ bind_gfx_stage(ctx, MESA_SHADER_TESS_EVAL, cso);
+ bind_last_vertex_stage(ctx, MESA_SHADER_TESS_EVAL, prev_shader);
}
static void *
@@ -925,32 +1934,324 @@ zink_create_cs_state(struct pipe_context *pctx,
else
nir = (struct nir_shader *)shader->prog;
- return zink_shader_create(zink_screen(pctx->screen), nir, NULL);
+ if (nir->info.uses_bindless)
+ zink_descriptors_init_bindless(zink_context(pctx));
+
+ return create_compute_program(zink_context(pctx), nir);
}
static void
zink_bind_cs_state(struct pipe_context *pctx,
void *cso)
{
- bind_stage(zink_context(pctx), PIPE_SHADER_COMPUTE, cso);
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_compute_program *comp = cso;
+ if (comp && comp->num_inlinable_uniforms)
+ ctx->shader_has_inlinable_uniforms_mask |= 1 << MESA_SHADER_COMPUTE;
+ else
+ ctx->shader_has_inlinable_uniforms_mask &= ~(1 << MESA_SHADER_COMPUTE);
+
+ if (ctx->curr_compute) {
+ zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base);
+ ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
+ ctx->compute_pipeline_state.module = VK_NULL_HANDLE;
+ ctx->compute_pipeline_state.module_hash = 0;
+ }
+ ctx->compute_pipeline_state.dirty = true;
+ ctx->curr_compute = comp;
+ if (comp && comp != ctx->curr_compute) {
+ ctx->compute_pipeline_state.module_hash = ctx->curr_compute->curr->hash;
+ if (util_queue_fence_is_signalled(&comp->base.cache_fence))
+ ctx->compute_pipeline_state.module = ctx->curr_compute->curr->obj.mod;
+ ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
+ if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask)
+ ctx->compute_dirty = true;
+ }
+ zink_select_launch_grid(ctx);
+}
+
+static void
+zink_get_compute_state_info(struct pipe_context *pctx, void *cso, struct pipe_compute_state_object_info *info)
+{
+ struct zink_compute_program *comp = cso;
+ struct zink_screen *screen = zink_screen(pctx->screen);
+
+ info->max_threads = screen->info.props.limits.maxComputeWorkGroupInvocations;
+ info->private_memory = comp->scratch_size;
+ if (screen->info.props11.subgroupSize) {
+ info->preferred_simd_size = screen->info.props11.subgroupSize;
+ info->simd_sizes = info->preferred_simd_size;
+ } else {
+ // just guess it
+ info->preferred_simd_size = 64;
+ // only used for actual subgroup support
+ info->simd_sizes = 0;
+ }
+}
+
+static void
+zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso)
+{
+ struct zink_compute_program *comp = cso;
+ zink_compute_program_reference(zink_screen(pctx->screen), &comp, NULL);
+}
+
+/* caller must lock prog->libs->lock */
+struct zink_gfx_library_key *
+zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
+{
+ struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
+ if (!gkey) {
+ mesa_loge("ZINK: failed to allocate gkey!");
+ return NULL;
+ }
+
+ gkey->optimal_key = state->optimal_key;
+ assert(gkey->optimal_key);
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
+ gkey->modules[i] = prog->objs[i].mod;
+ gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog);
+ _mesa_set_add(&prog->libs->libs, gkey);
+ return gkey;
+}
+
+static const char *
+print_exe_stages(VkShaderStageFlags stages)
+{
+ if (stages == VK_SHADER_STAGE_VERTEX_BIT)
+ return "VS";
+ if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
+ return "VS+GS";
+ if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
+ return "VS+TCS+TES";
+ if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
+ return "VS+TCS+TES+GS";
+ if (stages == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
+ return "TCS";
+ if (stages == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
+ return "TES";
+ if (stages == VK_SHADER_STAGE_GEOMETRY_BIT)
+ return "GS";
+ if (stages == VK_SHADER_STAGE_FRAGMENT_BIT)
+ return "FS";
+ if (stages == VK_SHADER_STAGE_COMPUTE_BIT)
+ return "CS";
+ unreachable("unhandled combination of stages!");
+}
+
+static void
+print_pipeline_stats(struct zink_screen *screen, VkPipeline pipeline, struct util_debug_callback *debug)
+{
+ VkPipelineInfoKHR pinfo = {
+ VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR,
+ NULL,
+ pipeline
+ };
+ unsigned exe_count = 0;
+ VkPipelineExecutablePropertiesKHR props[10] = {0};
+ for (unsigned i = 0; i < ARRAY_SIZE(props); i++) {
+ props[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR;
+ props[i].pNext = NULL;
+ }
+ VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, NULL);
+ VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, props);
+ for (unsigned e = 0; e < exe_count; e++) {
+ VkPipelineExecutableInfoKHR info = {
+ VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR,
+ NULL,
+ pipeline,
+ e
+ };
+ unsigned count = 0;
+
+ struct u_memstream stream;
+ char *print_buf;
+ size_t print_buf_sz;
+
+ if (!u_memstream_open(&stream, &print_buf, &print_buf_sz)) {
+ mesa_loge("ZINK: failed to open memstream!");
+ return;
+ }
+
+ FILE *f = u_memstream_get(&stream);
+ fprintf(f, "type: %s", props[e].name);
+ VkPipelineExecutableStatisticKHR *stats = NULL;
+ VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, NULL);
+ stats = calloc(count, sizeof(VkPipelineExecutableStatisticKHR));
+ if (!stats) {
+ mesa_loge("ZINK: failed to allocate stats!");
+ return;
+ }
+
+ for (unsigned i = 0; i < count; i++)
+ stats[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR;
+ VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, stats);
+
+ for (unsigned i = 0; i < count; i++) {
+ fprintf(f, ", ");
+ switch (stats[i].format) {
+ case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
+ fprintf(f, "%s: %u", stats[i].name, stats[i].value.b32);
+ break;
+ case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
+ fprintf(f, "%s: %" PRIi64, stats[i].name, stats[i].value.i64);
+ break;
+ case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
+ fprintf(f, "%s: %" PRIu64, stats[i].name, stats[i].value.u64);
+ break;
+ case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
+ fprintf(f, "%s: %g", stats[i].name, stats[i].value.f64);
+ break;
+ default:
+ unreachable("unknown statistic");
+ }
+ }
+
+ /* print_buf is only valid after flushing. */
+ fflush(f);
+ util_debug_message(debug, SHADER_INFO, "%s\n", print_buf);
+
+ u_memstream_close(&stream);
+ free(print_buf);
+ }
+}
+
+static void
+precompile_job(void *data, void *gdata, int thread_index)
+{
+ struct zink_screen *screen = gdata;
+ struct zink_gfx_program *prog = data;
+
+ struct zink_gfx_pipeline_state state = {0};
+ state.shader_keys_optimal.key.vs_base.last_vertex_stage = true;
+ state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard
+ state.optimal_key = state.shader_keys_optimal.key.val;
+ generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
+ zink_screen_get_pipeline_cache(screen, &prog->base, true);
+ if (!screen->info.have_EXT_shader_object) {
+ simple_mtx_lock(&prog->libs->lock);
+ zink_create_pipeline_lib(screen, prog, &state);
+ simple_mtx_unlock(&prog->libs->lock);
+ }
+ zink_screen_update_pipeline_cache(screen, &prog->base, true);
+}
+
+static void
+precompile_separate_shader_job(void *data, void *gdata, int thread_index)
+{
+ struct zink_screen *screen = gdata;
+ struct zink_shader *zs = data;
+
+ zs->precompile.obj = zink_shader_compile_separate(screen, zs);
+ if (!screen->info.have_EXT_shader_object) {
+ struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT] = {0};
+ objs[zs->info.stage].mod = zs->precompile.obj.mod;
+ zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, objs, zs->precompile.layout, zs->info.stage);
+ }
+}
+
+static void
+zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
+{
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_shader **zshaders = (struct zink_shader **)shaders;
+ if (shaders[MESA_SHADER_COMPUTE])
+ return;
+ /* explicitly block sample shading: this needs full pipelines always */
+ if (zshaders[MESA_SHADER_FRAGMENT] && zshaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading)
+ return;
+ /* can't precompile fixedfunc */
+ if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) {
+ /* handled directly from shader create */
+ return;
+ }
+ unsigned hash = 0;
+ unsigned shader_stages = 0;
+ for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
+ if (zshaders[i]) {
+ hash ^= zshaders[i]->hash;
+ shader_stages |= BITFIELD_BIT(i);
+ }
+ }
+ unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
+ unsigned tess = shader_stages & tess_stages;
+ /* can't do fixedfunc tes either */
+ if (tess && !shaders[MESA_SHADER_TESS_EVAL])
+ return;
+ struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)];
+ simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
+ /* link can be called repeatedly with the same shaders: ignore */
+ if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) {
+ simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
+ return;
+ }
+ struct zink_gfx_program *prog = zink_create_gfx_program(ctx, zshaders, 3, hash);
+ u_foreach_bit(i, shader_stages)
+ assert(prog->shaders[i]);
+ _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
+ prog->base.removed = false;
+ simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
+ if (zink_debug & ZINK_DEBUG_SHADERDB) {
+ struct zink_screen *screen = zink_screen(pctx->screen);
+ if (screen->optimal_keys)
+ generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state);
+ else
+ generate_gfx_program_modules(ctx, screen, prog, &ctx->gfx_pipeline_state);
+ VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, &ctx->gfx_pipeline_state,
+ ctx->gfx_pipeline_state.element_state->binding_map,
+ shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true, NULL);
+ print_pipeline_stats(screen, pipeline, &ctx->dbg);
+ VKSCR(DestroyPipeline)(screen->dev, pipeline, NULL);
+ } else {
+ if (zink_screen(pctx->screen)->info.have_EXT_shader_object)
+ prog->base.uses_shobj = !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
+ if (zink_debug & ZINK_DEBUG_NOBGC)
+ precompile_job(prog, pctx->screen, 0);
+ else
+ util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0);
+ }
}
void
zink_delete_shader_state(struct pipe_context *pctx, void *cso)
{
- zink_shader_free(zink_context(pctx), cso);
+ zink_gfx_shader_free(zink_screen(pctx->screen), cso);
}
void *
zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
{
+ struct zink_screen *screen = zink_screen(pctx->screen);
nir_shader *nir;
if (shader->type != PIPE_SHADER_IR_NIR)
nir = zink_tgsi_to_nir(pctx->screen, shader->tokens);
else
nir = (struct nir_shader *)shader->ir.nir;
- return zink_shader_create(zink_screen(pctx->screen), nir, &shader->stream_output);
+ if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output)
+ zink_descriptor_util_init_fbfetch(zink_context(pctx));
+ if (nir->info.uses_bindless)
+ zink_descriptors_init_bindless(zink_context(pctx));
+
+ void *ret = zink_shader_create(zink_screen(pctx->screen), nir);
+
+ if (!(zink_debug & ZINK_DEBUG_NOPC)) {
+ if (nir->info.separate_shader && zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB &&
+ (screen->info.have_EXT_shader_object ||
+ (screen->info.have_EXT_graphics_pipeline_library && (nir->info.stage == MESA_SHADER_FRAGMENT || nir->info.stage == MESA_SHADER_VERTEX)))) {
+ struct zink_shader *zs = ret;
+ /* sample shading can't precompile */
+ if (nir->info.stage != MESA_SHADER_FRAGMENT || !nir->info.fs.uses_sample_shading) {
+ if (zink_debug & ZINK_DEBUG_NOBGC)
+ precompile_separate_shader_job(zs, screen, 0);
+ else
+ util_queue_add_job(&screen->cache_get_thread, zs, &zs->precompile.fence, precompile_separate_shader_job, NULL, 0);
+ }
+ }
+ }
+ ralloc_free(nir);
+
+ return ret;
}
static void
@@ -968,6 +2269,7 @@ zink_create_cached_shader_state(struct pipe_context *pctx, const struct pipe_sha
return util_live_shader_cache_get(pctx, &screen->shaders, shader, &cache_hit);
}
+
void
zink_program_init(struct zink_context *ctx)
{
@@ -993,5 +2295,257 @@ zink_program_init(struct zink_context *ctx)
ctx->base.create_compute_state = zink_create_cs_state;
ctx->base.bind_compute_state = zink_bind_cs_state;
- ctx->base.delete_compute_state = zink_delete_shader_state;
+ ctx->base.get_compute_state_info = zink_get_compute_state_info;
+ ctx->base.delete_compute_state = zink_delete_cs_shader_state;
+
+ if (zink_screen(ctx->base.screen)->info.have_EXT_vertex_input_dynamic_state)
+ _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input_dynamic, equals_gfx_input_dynamic);
+ else
+ _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input, equals_gfx_input);
+ if (zink_screen(ctx->base.screen)->have_full_ds3)
+ _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output_ds3, equals_gfx_output_ds3);
+ else
+ _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output, equals_gfx_output);
+ /* validate struct packing */
+ STATIC_ASSERT(offsetof(struct zink_gfx_output_key, sample_mask) == sizeof(uint32_t));
+ STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_pipeline_state, input) ==
+ offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_input_key, input));
+ STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_strides) - offsetof(struct zink_gfx_pipeline_state, input) ==
+ offsetof(struct zink_gfx_input_key, vertex_strides) - offsetof(struct zink_gfx_input_key, input));
+ STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, element_state) - offsetof(struct zink_gfx_pipeline_state, input) ==
+ offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input));
+
+ STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
+
+ /* no precompile at all */
+ if (zink_debug & ZINK_DEBUG_NOPC)
+ return;
+
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ if (screen->info.have_EXT_graphics_pipeline_library || screen->info.have_EXT_shader_object || zink_debug & ZINK_DEBUG_SHADERDB)
+ ctx->base.link_shader = zink_link_gfx_shader;
+}
+
+bool
+zink_set_rasterizer_discard(struct zink_context *ctx, bool disable)
+{
+ bool value = disable ? false : (ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false);
+ bool changed = ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard != value;
+ ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard = value;
+ if (!changed)
+ return false;
+ if (!zink_screen(ctx->base.screen)->info.have_EXT_extended_dynamic_state2)
+ ctx->gfx_pipeline_state.dirty |= true;
+ ctx->rasterizer_discard_changed = true;
+ return true;
+}
+
+void
+zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data,
+ struct util_queue_fence *fence,
+ pipe_driver_thread_func execute,
+ pipe_driver_thread_func cleanup,
+ const size_t job_size)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+ util_queue_add_job(&screen->cache_get_thread, data, fence, execute, cleanup, job_size);
+}
+
+static bool
+has_edge_flags(struct zink_context *ctx)
+{
+ switch(ctx->gfx_pipeline_state.gfx_prim_mode) {
+ case MESA_PRIM_POINTS:
+ case MESA_PRIM_LINE_STRIP:
+ case MESA_PRIM_LINE_STRIP_ADJACENCY:
+ case MESA_PRIM_LINES:
+ case MESA_PRIM_LINE_LOOP:
+ case MESA_PRIM_LINES_ADJACENCY:
+ case MESA_PRIM_TRIANGLE_STRIP:
+ case MESA_PRIM_TRIANGLE_FAN:
+ case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ case MESA_PRIM_QUAD_STRIP:
+ case MESA_PRIM_PATCHES:
+ return false;
+ case MESA_PRIM_TRIANGLES:
+ case MESA_PRIM_TRIANGLES_ADJACENCY:
+ case MESA_PRIM_QUADS:
+ case MESA_PRIM_POLYGON:
+ case MESA_PRIM_COUNT:
+ default:
+ break;
+ }
+ return (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES ||
+ ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) &&
+ ctx->gfx_stages[MESA_SHADER_VERTEX]->has_edgeflags;
+}
+
+static enum zink_rast_prim
+zink_rast_prim_for_pipe(enum mesa_prim prim)
+{
+ switch (prim) {
+ case MESA_PRIM_POINTS:
+ return ZINK_PRIM_POINTS;
+ case MESA_PRIM_LINES:
+ return ZINK_PRIM_LINES;
+ case MESA_PRIM_TRIANGLES:
+ default:
+ return ZINK_PRIM_TRIANGLES;
+ }
+}
+
+static enum mesa_prim
+zink_tess_prim_type(struct zink_shader *tess)
+{
+ if (tess->info.tess.point_mode)
+ return MESA_PRIM_POINTS;
+ else {
+ switch (tess->info.tess._primitive_mode) {
+ case TESS_PRIMITIVE_ISOLINES:
+ return MESA_PRIM_LINES;
+ case TESS_PRIMITIVE_TRIANGLES:
+ case TESS_PRIMITIVE_QUADS:
+ return MESA_PRIM_TRIANGLES;
+ default:
+ return MESA_PRIM_COUNT;
+ }
+ }
+}
+
+static inline void
+zink_add_inline_uniform(nir_shader *shader, int offset)
+{
+ shader->info.inlinable_uniform_dw_offsets[shader->info.num_inlinable_uniforms] = offset;
+ ++shader->info.num_inlinable_uniforms;
+}
+
+static unsigned
+encode_lower_pv_mode(enum mesa_prim prim_type)
+{
+ switch (prim_type) {
+ case MESA_PRIM_TRIANGLE_STRIP:
+ case MESA_PRIM_QUAD_STRIP:
+ return ZINK_PVE_PRIMITIVE_TRISTRIP;
+ case MESA_PRIM_TRIANGLE_FAN:
+ return ZINK_PVE_PRIMITIVE_FAN;
+ default:
+ return ZINK_PVE_PRIMITIVE_SIMPLE;
+ }
+}
+
+void
+zink_set_primitive_emulation_keys(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ bool lower_line_stipple = false, lower_line_smooth = false;
+ unsigned lower_pv_mode = 0;
+ if (!screen->optimal_keys) {
+ lower_line_stipple = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES &&
+ screen->driver_workarounds.no_linestipple &&
+ ctx->rast_state->base.line_stipple_enable &&
+ !ctx->num_so_targets;
+
+ bool lower_point_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS &&
+ screen->driconf.emulate_point_smooth &&
+ ctx->rast_state->base.point_smooth;
+ if (zink_get_fs_key(ctx)->lower_line_stipple != lower_line_stipple) {
+ assert(zink_get_gs_key(ctx)->lower_line_stipple ==
+ zink_get_fs_key(ctx)->lower_line_stipple);
+ zink_set_fs_key(ctx)->lower_line_stipple = lower_line_stipple;
+ zink_set_gs_key(ctx)->lower_line_stipple = lower_line_stipple;
+ }
+
+ lower_line_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES &&
+ screen->driver_workarounds.no_linesmooth &&
+ ctx->rast_state->base.line_smooth &&
+ !ctx->num_so_targets;
+
+ if (zink_get_fs_key(ctx)->lower_line_smooth != lower_line_smooth) {
+ assert(zink_get_gs_key(ctx)->lower_line_smooth ==
+ zink_get_fs_key(ctx)->lower_line_smooth);
+ zink_set_fs_key(ctx)->lower_line_smooth = lower_line_smooth;
+ zink_set_gs_key(ctx)->lower_line_smooth = lower_line_smooth;
+ }
+
+ if (zink_get_fs_key(ctx)->lower_point_smooth != lower_point_smooth) {
+ zink_set_fs_key(ctx)->lower_point_smooth = lower_point_smooth;
+ }
+
+ lower_pv_mode = ctx->gfx_pipeline_state.dyn_state3.pv_last &&
+ !screen->info.have_EXT_provoking_vertex;
+ if (lower_pv_mode)
+ lower_pv_mode = encode_lower_pv_mode(ctx->gfx_pipeline_state.gfx_prim_mode);
+
+ if (zink_get_gs_key(ctx)->lower_pv_mode != lower_pv_mode)
+ zink_set_gs_key(ctx)->lower_pv_mode = lower_pv_mode;
+ }
+
+ bool lower_edge_flags = has_edge_flags(ctx);
+
+ bool lower_quad_prim = ctx->gfx_pipeline_state.gfx_prim_mode == MESA_PRIM_QUADS;
+
+ bool lower_filled_quad = lower_quad_prim &&
+ ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_TRIANGLES;
+
+ if (lower_line_stipple || lower_line_smooth ||
+ lower_edge_flags || lower_quad_prim ||
+ lower_pv_mode || zink_get_gs_key(ctx)->lower_gl_point) {
+ enum pipe_shader_type prev_vertex_stage =
+ ctx->gfx_stages[MESA_SHADER_TESS_EVAL] ?
+ MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+ enum zink_rast_prim zink_prim_type =
+ zink_rast_prim_for_pipe(ctx->gfx_pipeline_state.rast_prim);
+
+ //when using transform feedback primitives must be tessellated
+ lower_filled_quad |= lower_quad_prim && ctx->gfx_stages[prev_vertex_stage]->info.has_transform_feedback_varyings;
+
+ if (!ctx->gfx_stages[MESA_SHADER_GEOMETRY] || (ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated &&
+ ctx->gfx_stages[MESA_SHADER_GEOMETRY]->info.gs.input_primitive != ctx->gfx_pipeline_state.gfx_prim_mode)) {
+
+ if (!ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]) {
+ nir_shader *prev_stage = zink_shader_deserialize(screen, ctx->gfx_stages[prev_vertex_stage]);
+ nir_shader *nir;
+ if (lower_filled_quad) {
+ nir = zink_create_quads_emulation_gs(
+ &screen->nir_options,
+ prev_stage);
+ } else {
+ enum mesa_prim prim = ctx->gfx_pipeline_state.gfx_prim_mode;
+ if (prev_vertex_stage == MESA_SHADER_TESS_EVAL)
+ prim = zink_tess_prim_type(ctx->gfx_stages[MESA_SHADER_TESS_EVAL]);
+ nir = nir_create_passthrough_gs(
+ &screen->nir_options,
+ prev_stage,
+ prim,
+ ctx->gfx_pipeline_state.rast_prim,
+ lower_edge_flags,
+ lower_line_stipple || lower_quad_prim);
+ }
+ zink_lower_system_values_to_inlined_uniforms(nir);
+
+ zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK);
+ zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK+1);
+ zink_add_inline_uniform(nir, ZINK_INLINE_VAL_PV_LAST_VERT);
+ ralloc_free(prev_stage);
+ struct zink_shader *shader = zink_shader_create(screen, nir);
+ shader->needs_inlining = true;
+ ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type] = shader;
+ shader->non_fs.is_generated = true;
+ shader->non_fs.parent = ctx->gfx_stages[prev_vertex_stage];
+ shader->can_inline = true;
+ memcpy(shader->sinfo.stride, ctx->gfx_stages[prev_vertex_stage]->sinfo.stride, sizeof(shader->sinfo.stride));
+ }
+
+ ctx->base.bind_gs_state(&ctx->base,
+ ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]);
+ ctx->is_generated_gs_bound = true;
+ }
+
+ ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 3,
+ (uint32_t []){ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags,
+ ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags >> 32,
+ ctx->gfx_pipeline_state.dyn_state3.pv_last});
+ } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
+ ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)
+ ctx->base.bind_gs_state(&ctx->base, NULL);
}
diff --git a/src/gallium/drivers/zink/zink_program.h b/src/gallium/drivers/zink/zink_program.h
index f4c65f6c7e2..b4b2bd21bcd 100644
--- a/src/gallium/drivers/zink/zink_program.h
+++ b/src/gallium/drivers/zink/zink_program.h
@@ -24,102 +24,19 @@
#ifndef ZINK_PROGRAM_H
#define ZINK_PROGRAM_H
-#include <vulkan/vulkan.h>
-
-#include "compiler/shader_enums.h"
-#include "pipe/p_state.h"
-#include "util/u_inlines.h"
-
-#include "zink_context.h"
-#include "zink_compiler.h"
-#include "zink_shader_keys.h"
+#include "zink_types.h"
#ifdef __cplusplus
extern "C" {
#endif
+#include "util/u_prim.h"
-struct zink_screen;
-struct zink_shader;
-struct zink_gfx_pipeline_state;
-struct zink_descriptor_set;
-
-struct hash_table;
-struct set;
-struct util_dynarray;
-
-struct zink_program;
-
-struct zink_gfx_push_constant {
- unsigned draw_mode_is_indexed;
- unsigned draw_id;
- float default_inner_level[2];
- float default_outer_level[4];
-};
-
-struct zink_cs_push_constant {
- unsigned work_dim;
-};
-
-/* a shader module is used for directly reusing a shader module between programs,
- * e.g., in the case where we're swapping out only one shader,
- * allowing us to skip going through shader keys
- */
-struct zink_shader_module {
- struct list_head list;
- VkShaderModule shader;
- uint32_t hash;
- bool default_variant;
- uint8_t num_uniforms;
- uint8_t key_size;
- uint8_t key[0]; /* | key | uniforms | */
-};
-
-struct zink_program {
- struct pipe_reference reference;
- unsigned char sha1[20];
- struct util_queue_fence cache_fence;
- VkPipelineCache pipeline_cache;
- size_t pipeline_cache_size;
- struct zink_batch_usage *batch_uses;
- bool is_compute;
-
- struct zink_program_descriptor_data *dd;
-
- VkPipelineLayout layout;
- VkDescriptorSetLayout dsl[ZINK_DESCRIPTOR_TYPES + 1]; // one for each type + push
- unsigned num_dsl;
-
- bool removed;
+struct compute_pipeline_cache_entry {
+ struct zink_compute_pipeline_state state;
+ VkPipeline pipeline;
};
#define ZINK_MAX_INLINED_VARIANTS 5
-struct zink_gfx_program {
- struct zink_program base;
-
- uint32_t stages_present; //mask of stages present in this program
- struct nir_shader *nir[ZINK_SHADER_COUNT];
-
- struct zink_shader_module *modules[ZINK_SHADER_COUNT]; // compute stage doesn't belong here
-
- struct zink_shader *last_vertex_stage;
-
- struct list_head shader_cache[ZINK_SHADER_COUNT][2]; //normal, inline uniforms
- unsigned inlined_variant_count[ZINK_SHADER_COUNT];
-
- struct zink_shader *shaders[ZINK_SHADER_COUNT];
- struct hash_table pipelines[11]; // number of draw modes we support
- uint32_t default_variant_hash;
- uint32_t last_variant_hash;
-};
-
-struct zink_compute_program {
- struct zink_program base;
-
- struct zink_shader_module *module;
- struct zink_shader *shader;
- struct hash_table *pipelines;
-};
-
static inline enum zink_descriptor_type
zink_desc_type_from_vktype(VkDescriptorType type)
{
@@ -127,6 +44,8 @@ zink_desc_type_from_vktype(VkDescriptorType type)
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
return ZINK_DESCRIPTOR_TYPE_UBO;
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
return ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
@@ -141,44 +60,47 @@ zink_desc_type_from_vktype(VkDescriptorType type)
}
static inline VkPrimitiveTopology
-zink_primitive_topology(enum pipe_prim_type mode)
+zink_primitive_topology(enum mesa_prim mode)
{
switch (mode) {
- case PIPE_PRIM_POINTS:
+ case MESA_PRIM_POINTS:
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
- case PIPE_PRIM_LINES:
+ case MESA_PRIM_LINES:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
- case PIPE_PRIM_LINE_STRIP:
+ case MESA_PRIM_LINE_STRIP:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
- case PIPE_PRIM_TRIANGLES:
+ case MESA_PRIM_TRIANGLES:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
- case PIPE_PRIM_TRIANGLE_STRIP:
+ case MESA_PRIM_TRIANGLE_STRIP:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
- case PIPE_PRIM_TRIANGLE_FAN:
+ case MESA_PRIM_TRIANGLE_FAN:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ case MESA_PRIM_LINE_STRIP_ADJACENCY:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
- case PIPE_PRIM_LINES_ADJACENCY:
+ case MESA_PRIM_LINES_ADJACENCY:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ case MESA_PRIM_TRIANGLES_ADJACENCY:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
- case PIPE_PRIM_PATCHES:
+ case MESA_PRIM_PATCHES:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
+ case MESA_PRIM_QUADS:
+ return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
+
default:
- unreachable("unexpected enum pipe_prim_type");
+ unreachable("unexpected enum mesa_prim");
}
}
@@ -188,38 +110,61 @@ void *
zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader);
unsigned
-zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type, bool is_compute);
+zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type);
unsigned
-zink_program_num_bindings(const struct zink_program *pg, bool is_compute);
+zink_program_num_bindings(const struct zink_program *pg);
bool
-zink_program_descriptor_is_buffer(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type, unsigned i);
+zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i);
+
+void
+zink_gfx_program_update(struct zink_context *ctx);
+void
+zink_gfx_program_update_optimal(struct zink_context *ctx);
+
+struct zink_gfx_library_key *
+zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state);
+uint32_t hash_gfx_output(const void *key);
+uint32_t hash_gfx_output_ds3(const void *key);
+uint32_t hash_gfx_input(const void *key);
+uint32_t hash_gfx_input_dynamic(const void *key);
+
+void
+zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry);
void
-zink_update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog);
+zink_program_finish(struct zink_context *ctx, struct zink_program *pg);
+
+static inline unsigned
+get_primtype_idx(enum mesa_prim mode)
+{
+ if (mode == MESA_PRIM_PATCHES)
+ return 3;
+ switch (u_reduced_prim(mode)) {
+ case MESA_PRIM_POINTS:
+ return 0;
+ case MESA_PRIM_LINES:
+ return 1;
+ default:
+ return 2;
+ }
+}
struct zink_gfx_program *
zink_create_gfx_program(struct zink_context *ctx,
- struct zink_shader *stages[ZINK_SHADER_COUNT],
- unsigned vertices_per_patch);
+ struct zink_shader **stages,
+ unsigned vertices_per_patch,
+ uint32_t gfx_hash);
void
zink_destroy_gfx_program(struct zink_screen *screen,
struct zink_gfx_program *prog);
-
-VkPipeline
-zink_get_gfx_pipeline(struct zink_context *ctx,
- struct zink_gfx_program *prog,
- struct zink_gfx_pipeline_state *state,
- enum pipe_prim_type mode);
-
+void
+zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs);
void
zink_program_init(struct zink_context *ctx);
-uint32_t
-zink_program_get_descriptor_usage(struct zink_context *ctx, enum pipe_shader_type stage, enum zink_descriptor_type type);
-
void
debug_describe_zink_gfx_program(char* buf, const struct zink_gfx_program *ptr);
@@ -240,11 +185,9 @@ zink_gfx_program_reference(struct zink_screen *screen,
return ret;
}
-struct zink_compute_program *
-zink_create_compute_program(struct zink_context *ctx, struct zink_shader *shader);
void
zink_destroy_compute_program(struct zink_screen *screen,
- struct zink_compute_program *comp);
+ struct zink_compute_program *comp);
void
debug_describe_zink_compute_program(char* buf, const struct zink_compute_program *ptr);
@@ -266,12 +209,30 @@ zink_compute_program_reference(struct zink_screen *screen,
return ret;
}
+static inline bool
+zink_program_reference(struct zink_screen *screen,
+ struct zink_program **dst,
+ struct zink_program *src)
+{
+ struct zink_program *pg = src ? src : dst ? *dst : NULL;
+ if (!pg)
+ return false;
+ if (pg->is_compute) {
+ struct zink_compute_program *comp = (struct zink_compute_program*)pg;
+ return zink_compute_program_reference(screen, &comp, NULL);
+ } else {
+ struct zink_gfx_program *prog = (struct zink_gfx_program*)pg;
+ return zink_gfx_program_reference(screen, &prog, NULL);
+ }
+}
+
VkPipelineLayout
-zink_pipeline_layout_create(struct zink_screen *screen, struct zink_program *pg);
+zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags);
void
-zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const uint block[3]);
-
+zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const struct pipe_grid_info *info);
+void
+zink_update_compute_program(struct zink_context *ctx);
VkPipeline
zink_get_compute_pipeline(struct zink_screen *screen,
struct zink_compute_program *comp,
@@ -283,58 +244,225 @@ zink_program_has_descriptors(const struct zink_program *pg)
return pg->num_dsl > 0;
}
+static inline struct zink_fs_key_base *
+zink_set_fs_base_key(struct zink_context *ctx)
+{
+ ctx->dirty_gfx_stages |= BITFIELD_BIT(MESA_SHADER_FRAGMENT);
+ return zink_screen(ctx->base.screen)->optimal_keys ?
+ &ctx->gfx_pipeline_state.shader_keys_optimal.key.fs :
+ &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.base;
+}
+
+static inline const struct zink_fs_key_base *
+zink_get_fs_base_key(const struct zink_context *ctx)
+{
+ return zink_screen(ctx->base.screen)->optimal_keys ?
+ &ctx->gfx_pipeline_state.shader_keys_optimal.key.fs :
+ &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.base;
+}
+
static inline struct zink_fs_key *
zink_set_fs_key(struct zink_context *ctx)
{
- ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_FRAGMENT);
- return (struct zink_fs_key *)&ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_FRAGMENT];
+ assert(!zink_screen(ctx->base.screen)->optimal_keys);
+ ctx->dirty_gfx_stages |= BITFIELD_BIT(MESA_SHADER_FRAGMENT);
+ return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs;
}
static inline const struct zink_fs_key *
-zink_get_fs_key(struct zink_context *ctx)
+zink_get_fs_key(const struct zink_context *ctx)
+{
+ assert(!zink_screen(ctx->base.screen)->optimal_keys);
+ return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs;
+}
+
+static inline struct zink_gs_key *
+zink_set_gs_key(struct zink_context *ctx)
+{
+ ctx->dirty_gfx_stages |= BITFIELD_BIT(MESA_SHADER_GEOMETRY);
+ assert(!zink_screen(ctx->base.screen)->optimal_keys);
+ return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_GEOMETRY].key.gs;
+}
+
+static inline const struct zink_gs_key *
+zink_get_gs_key(const struct zink_context *ctx)
+{
+ return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_GEOMETRY].key.gs;
+}
+
+static inline bool
+zink_set_tcs_key_patches(struct zink_context *ctx, uint8_t patch_vertices)
{
- return (const struct zink_fs_key *)&ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_FRAGMENT];
+ struct zink_tcs_key *tcs = zink_screen(ctx->base.screen)->optimal_keys ?
+ &ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs :
+ &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs;
+ if (tcs->patch_vertices == patch_vertices)
+ return false;
+ ctx->dirty_gfx_stages |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
+ tcs->patch_vertices = patch_vertices;
+ return true;
}
+static inline const struct zink_tcs_key *
+zink_get_tcs_key(const struct zink_context *ctx)
+{
+ return zink_screen(ctx->base.screen)->optimal_keys ?
+ &ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs :
+ &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs;
+}
+
+void
+zink_update_fs_key_samples(struct zink_context *ctx);
+
+void
+zink_update_gs_key_rectangular_line(struct zink_context *ctx);
+
static inline struct zink_vs_key *
zink_set_vs_key(struct zink_context *ctx)
{
- ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_VERTEX);
- return (struct zink_vs_key *)&ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_VERTEX];
+ ctx->dirty_gfx_stages |= BITFIELD_BIT(MESA_SHADER_VERTEX);
+ assert(!zink_screen(ctx->base.screen)->optimal_keys);
+ return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs;
}
static inline const struct zink_vs_key *
-zink_get_vs_key(struct zink_context *ctx)
+zink_get_vs_key(const struct zink_context *ctx)
{
- return (const struct zink_vs_key *)&ctx->gfx_pipeline_state.shader_keys.key[PIPE_SHADER_VERTEX];
+ assert(!zink_screen(ctx->base.screen)->optimal_keys);
+ return &ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs;
}
static inline struct zink_vs_key_base *
zink_set_last_vertex_key(struct zink_context *ctx)
{
ctx->last_vertex_stage_dirty = true;
- return (struct zink_vs_key_base *)&ctx->gfx_pipeline_state.shader_keys.last_vertex;
+ return zink_screen(ctx->base.screen)->optimal_keys ?
+ &ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base :
+ &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base;
}
static inline const struct zink_vs_key_base *
-zink_get_last_vertex_key(struct zink_context *ctx)
+zink_get_last_vertex_key(const struct zink_context *ctx)
{
- return (const struct zink_vs_key_base *)&ctx->gfx_pipeline_state.shader_keys.last_vertex;
+ return zink_screen(ctx->base.screen)->optimal_keys ?
+ &ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base :
+ &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base;
}
static inline void
zink_set_fs_point_coord_key(struct zink_context *ctx)
{
- const struct zink_fs_key *fs = zink_get_fs_key(ctx);
- bool disable = !ctx->gfx_pipeline_state.has_points || !ctx->rast_state->base.sprite_coord_enable;
+ const struct zink_fs_key_base *fs = zink_get_fs_base_key(ctx);
+ bool disable = ctx->gfx_pipeline_state.rast_prim != MESA_PRIM_POINTS;
uint8_t coord_replace_bits = disable ? 0 : ctx->rast_state->base.sprite_coord_enable;
- bool coord_replace_yinvert = disable ? false : !!ctx->rast_state->base.sprite_coord_mode;
- if (fs->coord_replace_bits != coord_replace_bits || fs->coord_replace_yinvert != coord_replace_yinvert) {
- zink_set_fs_key(ctx)->coord_replace_bits = coord_replace_bits;
- zink_set_fs_key(ctx)->coord_replace_yinvert = coord_replace_yinvert;
+ bool point_coord_yinvert = disable ? false : !!ctx->rast_state->base.sprite_coord_mode;
+ if (fs->coord_replace_bits != coord_replace_bits || fs->point_coord_yinvert != point_coord_yinvert) {
+ zink_set_fs_base_key(ctx)->coord_replace_bits = coord_replace_bits;
+ zink_set_fs_base_key(ctx)->point_coord_yinvert = point_coord_yinvert;
}
}
+void
+zink_set_primitive_emulation_keys(struct zink_context *ctx);
+
+void
+zink_create_primitive_emulation_gs(struct zink_context *ctx);
+
+static inline const struct zink_shader_key_base *
+zink_get_shader_key_base(const struct zink_context *ctx, gl_shader_stage pstage)
+{
+ assert(!zink_screen(ctx->base.screen)->optimal_keys);
+ return &ctx->gfx_pipeline_state.shader_keys.key[pstage].base;
+}
+
+static inline struct zink_shader_key_base *
+zink_set_shader_key_base(struct zink_context *ctx, gl_shader_stage pstage)
+{
+ ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage);
+ assert(!zink_screen(ctx->base.screen)->optimal_keys);
+ return &ctx->gfx_pipeline_state.shader_keys.key[pstage].base;
+}
+
+static inline void
+zink_set_zs_needs_shader_swizzle_key(struct zink_context *ctx, gl_shader_stage pstage, bool swizzle_update)
+{
+ if (!zink_screen(ctx->base.screen)->driver_workarounds.needs_zs_shader_swizzle) {
+ if (pstage != MESA_SHADER_FRAGMENT)
+ return;
+ const struct zink_fs_key_base *fs = zink_get_fs_base_key(ctx);
+ bool enable = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && (ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask & ctx->di.zs_swizzle[pstage].mask) > 0;
+ if (enable != fs->shadow_needs_shader_swizzle || (enable && swizzle_update))
+ zink_set_fs_base_key(ctx)->shadow_needs_shader_swizzle = enable;
+ return;
+ }
+ bool enable = !!ctx->di.zs_swizzle[pstage].mask;
+ const struct zink_shader_key_base *key = zink_get_shader_key_base(ctx, pstage);
+ if (enable != key->needs_zs_shader_swizzle || (enable && swizzle_update))
+ zink_set_shader_key_base(ctx, pstage)->needs_zs_shader_swizzle = enable;
+}
+
+ALWAYS_INLINE static bool
+zink_can_use_pipeline_libs(const struct zink_context *ctx)
+{
+ return
+ !ctx->gfx_pipeline_state.render_pass &&
+ /* this is just terrible */
+ !zink_get_fs_base_key(ctx)->shadow_needs_shader_swizzle &&
+ /* TODO: is sample shading even possible to handle with GPL? */
+ !ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading &&
+ !zink_get_fs_base_key(ctx)->fbfetch_ms &&
+ !ctx->gfx_pipeline_state.force_persample_interp &&
+ !ctx->gfx_pipeline_state.min_samples &&
+ !ctx->is_generated_gs_bound;
+}
+
+/* stricter requirements */
+ALWAYS_INLINE static bool
+zink_can_use_shader_objects(const struct zink_context *ctx)
+{
+ return
+ !ctx->gfx_pipeline_state.render_pass &&
+ ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) &&
+ /* TODO: is sample shading even possible to handle with GPL? */
+ !ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading &&
+ !ctx->gfx_pipeline_state.force_persample_interp &&
+ !ctx->gfx_pipeline_state.min_samples &&
+ !ctx->is_generated_gs_bound;
+}
+
+bool
+zink_set_rasterizer_discard(struct zink_context *ctx, bool disable);
+void
+zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data,
+ struct util_queue_fence *fence,
+ pipe_driver_thread_func execute,
+ pipe_driver_thread_func cleanup,
+ const size_t job_size);
+equals_gfx_pipeline_state_func
+zink_get_gfx_pipeline_eq_func(struct zink_screen *screen, struct zink_gfx_program *prog);
+
+/* determines whether the 'samples' shader key is valid */
+static inline bool
+zink_shader_uses_samples(const struct zink_shader *zs)
+{
+ assert(zs->info.stage == MESA_SHADER_FRAGMENT);
+ return zs->uses_sample || zs->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
+}
+
+static inline uint32_t
+zink_sanitize_optimal_key(struct zink_shader **shaders, uint32_t val)
+{
+ union zink_shader_key_optimal k;
+ if (shaders[MESA_SHADER_TESS_EVAL] && !shaders[MESA_SHADER_TESS_CTRL])
+ k.val = val;
+ else
+ k.val = zink_shader_key_optimal_no_tcs(val);
+ if (!zink_shader_uses_samples(shaders[MESA_SHADER_FRAGMENT]))
+ k.fs.samples = false;
+ if (!(shaders[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)))
+ k.fs.force_dual_color_blend = false;
+ return k.val;
+}
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/drivers/zink/zink_program_state.hpp b/src/gallium/drivers/zink/zink_program_state.hpp
new file mode 100644
index 00000000000..2cabc678660
--- /dev/null
+++ b/src/gallium/drivers/zink/zink_program_state.hpp
@@ -0,0 +1,423 @@
+/*
+ * Copyright © 2022 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+ */
+
+
+/**
+ * this file is used to optimize pipeline state management
+ * pipeline state comparisons are the most significant cause of CPU overhead aside from descriptors,
+ * so more effort must be taken to reduce it by any means
+ */
+#include "zink_types.h"
+#include "zink_pipeline.h"
+#include "zink_program.h"
+#include "zink_screen.h"
+
+/* runtime-optimized pipeline state hashing */
+template <zink_dynamic_state DYNAMIC_STATE>
+static uint32_t
+hash_gfx_pipeline_state(const void *key, struct zink_screen *screen)
+{
+ const struct zink_gfx_pipeline_state *state = (const struct zink_gfx_pipeline_state *)key;
+ uint32_t hash = _mesa_hash_data(key, screen->have_full_ds3 ?
+ offsetof(struct zink_gfx_pipeline_state, sample_mask) :
+ offsetof(struct zink_gfx_pipeline_state, hash));
+ if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE2)
+ hash = XXH32(&state->dyn_state3, sizeof(state->dyn_state3), hash);
+ if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE3)
+ hash = XXH32(&state->dyn_state2, sizeof(state->dyn_state2), hash);
+ if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
+ return hash;
+ return XXH32(&state->dyn_state1, sizeof(state->dyn_state1), hash);
+}
+
+template <bool HAS_DYNAMIC>
+static unsigned
+get_pipeline_idx(enum mesa_prim mode, VkPrimitiveTopology vkmode)
+{
+ /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY specifies that the topology state in
+ * VkPipelineInputAssemblyStateCreateInfo only specifies the topology class,
+ * and the specific topology order and adjacency must be set dynamically
+ * with vkCmdSetPrimitiveTopology before any drawing commands.
+ */
+ if (HAS_DYNAMIC) {
+ return get_primtype_idx(mode);
+ }
+ return vkmode;
+}
+
+/*
+ VUID-vkCmdBindVertexBuffers2-pStrides-06209
+ If pStrides is not NULL each element of pStrides must be either 0 or greater than or equal
+ to the maximum extent of all vertex input attributes fetched from the corresponding
+ binding, where the extent is calculated as the VkVertexInputAttributeDescription::offset
+ plus VkVertexInputAttributeDescription::format size
+
+ * thus, if the stride doesn't meet the minimum requirement for a binding,
+ * disable the dynamic state here and use a fully-baked pipeline
+ */
+static bool
+check_vertex_strides(struct zink_context *ctx)
+{
+ const struct zink_vertex_elements_state *ves = ctx->element_state;
+ for (unsigned i = 0; i < ves->hw_state.num_bindings; i++) {
+ const struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ves->hw_state.binding_map[i];
+ unsigned stride = vb->buffer.resource ? ves->hw_state.b.strides[i] : 0;
+ if (stride && stride < ves->min_stride[i])
+ return false;
+ }
+ return true;
+}
+
+/* runtime-optimized function to recalc pipeline state and find a usable pipeline:
+ * in theory, zink supports many feature levels,
+ * but it's important to provide a more optimized codepath for drivers that support all the best features
+ */
+template <zink_dynamic_state DYNAMIC_STATE, bool HAVE_LIB>
+VkPipeline
+zink_get_gfx_pipeline(struct zink_context *ctx,
+ struct zink_gfx_program *prog,
+ struct zink_gfx_pipeline_state *state,
+ enum mesa_prim mode)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ bool uses_dynamic_stride = state->uses_dynamic_stride;
+
+ VkPrimitiveTopology vkmode = zink_primitive_topology(mode);
+ const unsigned idx = screen->info.dynamic_state3_props.dynamicPrimitiveTopologyUnrestricted ?
+ 0 :
+ get_pipeline_idx<DYNAMIC_STATE >= ZINK_DYNAMIC_STATE>(mode, vkmode);
+ assert(idx <= ARRAY_SIZE(prog->pipelines[0]));
+ if (!state->dirty && !state->modules_changed &&
+ ((DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2) && !ctx->vertex_state_changed) &&
+ idx == state->idx)
+ return state->pipeline;
+
+ struct hash_entry *entry = NULL;
+
+ /* recalc the base pipeline state hash */
+ if (state->dirty) {
+ if (state->pipeline) //avoid on first hash
+ state->final_hash ^= state->hash;
+ state->hash = hash_gfx_pipeline_state<DYNAMIC_STATE>(state, screen);
+ state->final_hash ^= state->hash;
+ state->dirty = false;
+ }
+ /* extra safety asserts for optimal path to catch refactoring bugs */
+ if (prog->optimal_keys) {
+ ASSERTED const union zink_shader_key_optimal *opt = (union zink_shader_key_optimal*)&prog->last_variant_hash;
+ ASSERTED uint32_t sanitized = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val);
+ assert(opt->val == sanitized);
+ assert(state->optimal_key == sanitized);
+ }
+ /* recalc vertex state if missing optimal extensions */
+ if (DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT && ctx->vertex_state_changed) {
+ if (state->pipeline)
+ state->final_hash ^= state->vertex_hash;
+ /* even if dynamic stride is available, it may not be usable with the current pipeline */
+ if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
+ uses_dynamic_stride = check_vertex_strides(ctx);
+ if (!uses_dynamic_stride) {
+ uint32_t hash = 0;
+ /* if we don't have dynamic states, we have to hash the enabled vertex buffer bindings */
+ uint32_t vertex_buffers_enabled_mask = state->vertex_buffers_enabled_mask;
+ hash = XXH32(&vertex_buffers_enabled_mask, sizeof(uint32_t), hash);
+
+ for (unsigned i = 0; i < state->element_state->num_bindings; i++) {
+ const unsigned buffer_id = ctx->element_state->hw_state.binding_map[i];
+ struct pipe_vertex_buffer *vb = ctx->vertex_buffers + buffer_id;
+ state->vertex_strides[buffer_id] = vb->buffer.resource ? state->element_state->b.strides[i] : 0;
+ hash = XXH32(&state->vertex_strides[buffer_id], sizeof(uint32_t), hash);
+ }
+ state->vertex_hash = hash ^ state->element_state->hash;
+ } else
+ state->vertex_hash = state->element_state->hash;
+ state->final_hash ^= state->vertex_hash;
+ }
+ state->modules_changed = false;
+ state->uses_dynamic_stride = uses_dynamic_stride;
+ state->idx = idx;
+ ctx->vertex_state_changed = false;
+
+ const int rp_idx = state->render_pass ? 1 : 0;
+ /* shortcut for reusing previous pipeline across program changes */
+ if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2) {
+ if (prog->last_finalized_hash[rp_idx][idx] == state->final_hash &&
+ !prog->inline_variants && likely(prog->last_pipeline[rp_idx][idx]) &&
+ /* this data is too big to compare in the fast-path */
+ likely(!prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask)) {
+ state->pipeline = prog->last_pipeline[rp_idx][idx]->pipeline;
+ return state->pipeline;
+ }
+ }
+ entry = _mesa_hash_table_search_pre_hashed(&prog->pipelines[rp_idx][idx], state->final_hash, state);
+
+ if (!entry) {
+ /* always wait on async precompile/cache fence */
+ util_queue_fence_wait(&prog->base.cache_fence);
+ struct zink_gfx_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(zink_gfx_pipeline_cache_entry);
+ if (!pc_entry)
+ return VK_NULL_HANDLE;
+ /* cache entries must have all state needed to construct pipelines
+ * TODO: maybe optimize this since all these values aren't actually needed
+ */
+ memcpy(&pc_entry->state, state, sizeof(*state));
+ pc_entry->state.rendering_info.pColorAttachmentFormats = pc_entry->state.rendering_formats;
+ pc_entry->prog = prog;
+ /* init the optimized background compile fence */
+ util_queue_fence_init(&pc_entry->fence);
+ entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[rp_idx][idx], state->final_hash, pc_entry, pc_entry);
+ if (prog->base.uses_shobj && !prog->is_separable) {
+ memcpy(pc_entry->shobjs, prog->objs, sizeof(prog->objs));
+ zink_gfx_program_compile_queue(ctx, pc_entry);
+ } else if (HAVE_LIB && zink_can_use_pipeline_libs(ctx)) {
+ /* this is the graphics pipeline library path: find/construct all partial pipelines */
+ simple_mtx_lock(&prog->libs->lock);
+ struct set_entry *he = _mesa_set_search(&prog->libs->libs, &ctx->gfx_pipeline_state.optimal_key);
+ struct zink_gfx_library_key *gkey;
+ if (he) {
+ gkey = (struct zink_gfx_library_key *)he->key;
+ } else {
+ assert(!prog->is_separable);
+ gkey = zink_create_pipeline_lib(screen, prog, &ctx->gfx_pipeline_state);
+ }
+ simple_mtx_unlock(&prog->libs->lock);
+ struct zink_gfx_input_key *ikey = DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT ?
+ zink_find_or_create_input_dynamic(ctx, vkmode) :
+ zink_find_or_create_input(ctx, vkmode);
+ struct zink_gfx_output_key *okey = DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3 && screen->have_full_ds3 ?
+ zink_find_or_create_output_ds3(ctx) :
+ zink_find_or_create_output(ctx);
+ /* partial pipelines are stored to the cache entry for async optimized pipeline compiles */
+ pc_entry->gpl.ikey = ikey;
+ pc_entry->gpl.gkey = gkey;
+ pc_entry->gpl.okey = okey;
+ /* try to hit optimized compile cache first if possible */
+ if (!prog->is_separable)
+ pc_entry->pipeline = zink_create_gfx_pipeline_combined(screen, prog, ikey->pipeline, &gkey->pipeline, 1, okey->pipeline, true, true);
+ if (!pc_entry->pipeline) {
+ /* create the non-optimized pipeline first using fast-linking to avoid stuttering */
+ pc_entry->pipeline = zink_create_gfx_pipeline_combined(screen, prog, ikey->pipeline, &gkey->pipeline, 1, okey->pipeline, false, false);
+ if (!prog->is_separable)
+ /* trigger async optimized pipeline compile if this was the fast-linked unoptimized pipeline */
+ zink_gfx_program_compile_queue(ctx, pc_entry);
+ }
+ } else {
+ /* optimize by default only when expecting precompiles in order to reduce stuttering */
+ if (DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT)
+ pc_entry->pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, state->element_state->binding_map, vkmode, !HAVE_LIB, NULL);
+ else
+ pc_entry->pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, NULL, vkmode, !HAVE_LIB, NULL);
+ if (HAVE_LIB && !prog->is_separable)
+ /* trigger async optimized pipeline compile if this was an unoptimized pipeline */
+ zink_gfx_program_compile_queue(ctx, pc_entry);
+ }
+ if (pc_entry->pipeline == VK_NULL_HANDLE)
+ return VK_NULL_HANDLE;
+
+ zink_screen_update_pipeline_cache(screen, &prog->base, false);
+ }
+
+ struct zink_gfx_pipeline_cache_entry *cache_entry = (struct zink_gfx_pipeline_cache_entry *)entry->data;
+ state->pipeline = cache_entry->pipeline;
+ /* update states for fastpath */
+ if (DYNAMIC_STATE >= ZINK_DYNAMIC_VERTEX_INPUT) {
+ prog->last_finalized_hash[rp_idx][idx] = state->final_hash;
+ prog->last_pipeline[rp_idx][idx] = cache_entry;
+ }
+ return state->pipeline;
+}
+
+/* runtime-optimized pipeline state comparisons */
+template <zink_pipeline_dynamic_state DYNAMIC_STATE, unsigned STAGE_MASK>
+static bool
+equals_gfx_pipeline_state(const void *a, const void *b)
+{
+ const struct zink_gfx_pipeline_state *sa = (const struct zink_gfx_pipeline_state *)a;
+ const struct zink_gfx_pipeline_state *sb = (const struct zink_gfx_pipeline_state *)b;
+ if (DYNAMIC_STATE < ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT) {
+ if (sa->uses_dynamic_stride != sb->uses_dynamic_stride)
+ return false;
+ }
+ if (DYNAMIC_STATE == ZINK_PIPELINE_NO_DYNAMIC_STATE ||
+ (DYNAMIC_STATE < ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT && !sa->uses_dynamic_stride)) {
+ if (sa->vertex_buffers_enabled_mask != sb->vertex_buffers_enabled_mask)
+ return false;
+ /* if we don't have dynamic states, we have to hash the enabled vertex buffer bindings */
+ uint32_t mask_a = sa->vertex_buffers_enabled_mask;
+ uint32_t mask_b = sb->vertex_buffers_enabled_mask;
+ while (mask_a || mask_b) {
+ unsigned idx_a = u_bit_scan(&mask_a);
+ unsigned idx_b = u_bit_scan(&mask_b);
+ if (sa->vertex_strides[idx_a] != sb->vertex_strides[idx_b])
+ return false;
+ }
+ }
+
+ /* each dynamic state extension has its own struct on the pipeline state to compare
+ * if all extensions are supported, none of them are accessed
+ */
+ if (DYNAMIC_STATE == ZINK_PIPELINE_NO_DYNAMIC_STATE) {
+ if (memcmp(&sa->dyn_state1, &sb->dyn_state1, offsetof(struct zink_pipeline_dynamic_state1, depth_stencil_alpha_state)))
+ return false;
+ if (!!sa->dyn_state1.depth_stencil_alpha_state != !!sb->dyn_state1.depth_stencil_alpha_state ||
+ (sa->dyn_state1.depth_stencil_alpha_state &&
+ memcmp(sa->dyn_state1.depth_stencil_alpha_state, sb->dyn_state1.depth_stencil_alpha_state,
+ sizeof(struct zink_depth_stencil_alpha_hw_state))))
+ return false;
+ }
+ if (DYNAMIC_STATE < ZINK_PIPELINE_DYNAMIC_STATE3) {
+ if (DYNAMIC_STATE < ZINK_PIPELINE_DYNAMIC_STATE2) {
+ if (memcmp(&sa->dyn_state2, &sb->dyn_state2, sizeof(sa->dyn_state2)))
+ return false;
+ }
+ if (memcmp(&sa->dyn_state3, &sb->dyn_state3, sizeof(sa->dyn_state3)))
+ return false;
+ } else if (DYNAMIC_STATE != ZINK_PIPELINE_DYNAMIC_STATE2_PCP &&
+ DYNAMIC_STATE != ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT2_PCP &&
+ DYNAMIC_STATE != ZINK_PIPELINE_DYNAMIC_STATE3_PCP &&
+ DYNAMIC_STATE != ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT_PCP &&
+ (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) &&
+ !(STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_CTRL))) {
+ if (sa->dyn_state2.vertices_per_patch != sb->dyn_state2.vertices_per_patch)
+ return false;
+ }
+ /* optimal keys are the fastest path: only a single uint32_t comparison for all shader module variants */
+ if (STAGE_MASK & STAGE_MASK_OPTIMAL) {
+ if (sa->optimal_key != sb->optimal_key)
+ return false;
+ if (STAGE_MASK & STAGE_MASK_OPTIMAL_SHADOW) {
+ if (sa->shadow != sb->shadow)
+ return false;
+ }
+ } else {
+ if (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) {
+ if (sa->modules[MESA_SHADER_TESS_CTRL] != sb->modules[MESA_SHADER_TESS_CTRL])
+ return false;
+ }
+ if (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) {
+ if (sa->modules[MESA_SHADER_TESS_EVAL] != sb->modules[MESA_SHADER_TESS_EVAL])
+ return false;
+ }
+ if (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_GEOMETRY)) {
+ if (sa->modules[MESA_SHADER_GEOMETRY] != sb->modules[MESA_SHADER_GEOMETRY])
+ return false;
+ }
+ if (sa->modules[MESA_SHADER_VERTEX] != sb->modules[MESA_SHADER_VERTEX])
+ return false;
+ if (sa->modules[MESA_SHADER_FRAGMENT] != sb->modules[MESA_SHADER_FRAGMENT])
+ return false;
+ }
+ /* the base pipeline state is a 12 byte comparison */
+ return !memcmp(a, b, offsetof(struct zink_gfx_pipeline_state, hash));
+}
+
+/* below is a bunch of code to pick the right equals_gfx_pipeline_state template for runtime */
+template <zink_pipeline_dynamic_state DYNAMIC_STATE, unsigned STAGE_MASK>
+static equals_gfx_pipeline_state_func
+get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys, bool shadow_needs_shader_swizzle)
+{
+ if (optimal_keys) {
+ if (shadow_needs_shader_swizzle)
+ return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL | STAGE_MASK_OPTIMAL_SHADOW>;
+ return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL>;
+ }
+ return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK>;
+}
+
+template <zink_pipeline_dynamic_state DYNAMIC_STATE>
+static equals_gfx_pipeline_state_func
+get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys)
+{
+ bool shadow_needs_shader_swizzle = prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask > 0;
+ unsigned vertex_stages = prog->stages_present & BITFIELD_MASK(MESA_SHADER_FRAGMENT);
+ if (vertex_stages & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) {
+ if (prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
+ vertex_stages &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
+ }
+ if (vertex_stages & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) {
+ if (vertex_stages == BITFIELD_MASK(MESA_SHADER_FRAGMENT))
+ /* all stages */
+ return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
+ BITFIELD_MASK(MESA_SHADER_COMPUTE)>(optimal_keys, shadow_needs_shader_swizzle);
+ if (vertex_stages == BITFIELD_MASK(MESA_SHADER_GEOMETRY))
+ /* tess only: includes generated tcs too */
+ return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
+ BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
+ if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)))
+ /* geom only */
+ return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
+ BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
+ }
+ if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_FRAGMENT) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)))
+ /* all stages but tcs */
+ return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
+ BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)>(optimal_keys, shadow_needs_shader_swizzle);
+ if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_GEOMETRY) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)))
+ /* tess only: generated tcs */
+ return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
+ BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~(BITFIELD_BIT(MESA_SHADER_GEOMETRY) | BITFIELD_BIT(MESA_SHADER_TESS_CTRL))>(optimal_keys, shadow_needs_shader_swizzle);
+ if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)))
+ /* geom only */
+ return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
+ BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
+ return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
+ BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT)>(optimal_keys, shadow_needs_shader_swizzle);
+}
+
+equals_gfx_pipeline_state_func
+zink_get_gfx_pipeline_eq_func(struct zink_screen *screen, struct zink_gfx_program *prog)
+{
+ if (screen->info.have_EXT_extended_dynamic_state) {
+ if (screen->info.have_EXT_extended_dynamic_state2) {
+ if (screen->info.have_EXT_extended_dynamic_state3) {
+ if (screen->info.have_EXT_vertex_input_dynamic_state) {
+ if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints)
+ return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT_PCP>(prog, screen->optimal_keys);
+ else
+ return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT>(prog, screen->optimal_keys);
+ } else {
+ if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints)
+ return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_STATE3_PCP>(prog, screen->optimal_keys);
+ else
+ return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_STATE3>(prog, screen->optimal_keys);
+ }
+ }
+ if (screen->info.have_EXT_vertex_input_dynamic_state) {
+ if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints)
+ return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT2_PCP>(prog, screen->optimal_keys);
+ else
+ return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT2>(prog, screen->optimal_keys);
+ } else {
+ if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints)
+ return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_STATE2_PCP>(prog, screen->optimal_keys);
+ else
+ return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_STATE2>(prog, screen->optimal_keys);
+ }
+ }
+ return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_DYNAMIC_STATE>(prog, screen->optimal_keys);
+ }
+ return get_gfx_pipeline_stage_eq_func<ZINK_PIPELINE_NO_DYNAMIC_STATE>(prog, screen->optimal_keys);
+}
diff --git a/src/gallium/drivers/zink/zink_public.h b/src/gallium/drivers/zink/zink_public.h
index a5a4f6bca42..cb3bf6e7daf 100644
--- a/src/gallium/drivers/zink/zink_public.h
+++ b/src/gallium/drivers/zink/zink_public.h
@@ -29,7 +29,7 @@ struct sw_winsys;
struct pipe_screen_config;
struct pipe_screen *
-zink_create_screen(struct sw_winsys *winsys);
+zink_create_screen(struct sw_winsys *winsys, const struct pipe_screen_config *config);
struct pipe_screen *
zink_drm_create_screen(int fd, const struct pipe_screen_config *config);
diff --git a/src/gallium/drivers/zink/zink_query.c b/src/gallium/drivers/zink/zink_query.c
index 8b8d1cc44c3..589d8288293 100644
--- a/src/gallium/drivers/zink/zink_query.c
+++ b/src/gallium/drivers/zink/zink_query.c
@@ -1,55 +1,82 @@
#include "zink_query.h"
#include "zink_context.h"
-#include "zink_fence.h"
+#include "zink_clear.h"
+#include "zink_program.h"
#include "zink_resource.h"
#include "zink_screen.h"
-#include "util/hash_table.h"
-#include "util/set.h"
#include "util/u_dump.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
-#if defined(PIPE_ARCH_X86_64) || defined(PIPE_ARCH_PPC_64) || defined(PIPE_ARCH_AARCH64) || defined(PIPE_ARCH_MIPS64)
-#define NUM_QUERIES 5000
-#else
#define NUM_QUERIES 500
-#endif
+
+#define ZINK_QUERY_RENDER_PASSES (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+
+struct zink_query_pool {
+ struct list_head list;
+ VkQueryType vk_query_type;
+ VkQueryPipelineStatisticFlags pipeline_stats;
+ VkQueryPool query_pool;
+ unsigned last_range;
+ unsigned refcount;
+};
struct zink_query_buffer {
struct list_head list;
unsigned num_results;
- struct pipe_resource *buffer;
- struct pipe_resource *xfb_buffers[PIPE_MAX_VERTEX_STREAMS - 1];
+ struct pipe_resource *buffers[PIPE_MAX_VERTEX_STREAMS];
+};
+
+struct zink_vk_query {
+ struct zink_query_pool *pool;
+ unsigned query_id;
+ bool needs_reset;
+ bool started;
+ uint32_t refcount;
+};
+
+struct zink_query_start {
+ union {
+ struct {
+ bool have_gs;
+ bool have_xfb;
+ bool was_line_loop;
+ };
+ uint32_t data;
+ };
+ struct zink_vk_query *vkq[PIPE_MAX_VERTEX_STREAMS];
};
struct zink_query {
struct threaded_query base;
enum pipe_query_type type;
- VkQueryPool query_pool;
- VkQueryPool xfb_query_pool[PIPE_MAX_VERTEX_STREAMS - 1]; //stream 0 is in the base pool
- unsigned curr_query, last_start;
+ /* Everytime the gallium query needs
+ * another vulkan query, add a new start.
+ */
+ struct util_dynarray starts;
+ unsigned start_offset;
VkQueryType vkqtype;
unsigned index;
bool precise;
- bool xfb_running;
- bool xfb_overflow;
bool active; /* query is considered active by vk */
bool needs_reset; /* query is considered active by vk and cannot be destroyed */
bool dead; /* query should be destroyed when its fence finishes */
bool needs_update; /* query needs to update its qbos */
+ bool needs_rast_discard_workaround; /* query needs discard disabled */
+ bool suspended;
+ bool started_in_rp; //needs to be stopped in rp
struct list_head active_list;
struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */
- bool have_gs[NUM_QUERIES]; /* geometry shaders use GEOMETRY_SHADER_PRIMITIVES_BIT */
- bool have_xfb[NUM_QUERIES]; /* xfb was active during this query */
+ bool has_draws; /* have_gs and have_xfb are valid for idx=curr_query */
- struct zink_batch_usage *batch_id; //batch that the query was started in
+ struct zink_batch_usage *batch_uses; //batch that the query was started in
struct list_head buffers;
union {
@@ -61,34 +88,20 @@ struct zink_query {
bool predicate_dirty;
};
-static void
-update_qbo(struct zink_context *ctx, struct zink_query *q);
-static void
-reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q);
+static const struct pipe_driver_query_info zink_specific_queries[] = {
+ {"render-passes", ZINK_QUERY_RENDER_PASSES, { 0 }},
+};
-static inline unsigned
-get_num_results(enum pipe_query_type query_type)
+static inline int
+get_num_starts(struct zink_query *q)
{
- switch (query_type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
- case PIPE_QUERY_TIME_ELAPSED:
- case PIPE_QUERY_TIMESTAMP:
- case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
- return 1;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- return 2;
- default:
- debug_printf("unknown query: %s\n",
- util_str_query_type(query_type, true));
- unreachable("zink: unknown query type");
- }
+ return util_dynarray_num_elements(&q->starts, struct zink_query_start);
}
+static void
+update_query_id(struct zink_context *ctx, struct zink_query *q);
+
+
static VkQueryPipelineStatisticFlags
pipeline_statistic_convert(enum pipe_statistics_query_index idx)
{
@@ -110,6 +123,164 @@ pipeline_statistic_convert(enum pipe_statistics_query_index idx)
}
static void
+begin_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index,
+ VkQueryControlFlags flags)
+{
+ struct zink_batch *batch = &ctx->batch;
+ if (!vkq->started) {
+ VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf,
+ vkq->pool->query_pool,
+ vkq->query_id,
+ flags,
+ index);
+ vkq->started = true;
+ }
+}
+
+static void
+end_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index)
+{
+ struct zink_batch *batch = &ctx->batch;
+ if (vkq->started) {
+ VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf,
+ vkq->pool->query_pool,
+ vkq->query_id, index);
+ vkq->started = false;
+ }
+}
+
+static void
+reset_vk_query_pool(struct zink_context *ctx, struct zink_vk_query *vkq)
+{
+ struct zink_batch *batch = &ctx->batch;
+ if (vkq->needs_reset) {
+ VKCTX(CmdResetQueryPool)(batch->state->reordered_cmdbuf, vkq->pool->query_pool, vkq->query_id, 1);
+ batch->state->has_barriers = true;
+ }
+ vkq->needs_reset = false;
+}
+
+void
+zink_context_destroy_query_pools(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ list_for_each_entry_safe(struct zink_query_pool, pool, &ctx->query_pools, list) {
+ VKSCR(DestroyQueryPool)(screen->dev, pool->query_pool, NULL);
+ list_del(&pool->list);
+ FREE(pool);
+ }
+}
+
+static struct zink_query_pool *
+find_or_allocate_qp(struct zink_context *ctx, struct zink_query *q, unsigned idx)
+{
+ VkQueryPipelineStatisticFlags pipeline_stats = 0;
+ if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
+ pipeline_stats = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
+ VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT;
+ else if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE)
+ pipeline_stats = pipeline_statistic_convert(q->index);
+
+ VkQueryType vk_query_type = q->vkqtype;
+ /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */
+ if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && idx == 1) {
+ vk_query_type = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
+ pipeline_stats = 0;
+ }
+
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ list_for_each_entry(struct zink_query_pool, pool, &ctx->query_pools, list) {
+ if (pool->vk_query_type == vk_query_type) {
+ if (vk_query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
+ if (pool->pipeline_stats == pipeline_stats)
+ return pool;
+ } else
+ return pool;
+ }
+ }
+
+ struct zink_query_pool *new_pool = CALLOC_STRUCT(zink_query_pool);
+ if (!new_pool)
+ return NULL;
+
+ new_pool->vk_query_type = vk_query_type;
+ new_pool->pipeline_stats = pipeline_stats;
+
+ VkQueryPoolCreateInfo pool_create = {0};
+ pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
+ pool_create.queryType = vk_query_type;
+ pool_create.queryCount = NUM_QUERIES;
+ pool_create.pipelineStatistics = pipeline_stats;
+
+ VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &new_pool->query_pool);
+ if (status != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateQueryPool failed (%s)", vk_Result_to_str(status));
+ FREE(new_pool);
+ return NULL;
+ }
+
+ list_addtail(&new_pool->list, &ctx->query_pools);
+ return new_pool;
+}
+
+static void
+update_qbo(struct zink_context *ctx, struct zink_query *q);
+static void
+reset_qbos(struct zink_context *ctx, struct zink_query *q);
+
+
+static bool
+is_emulated_primgen(const struct zink_query *q)
+{
+ return q->type == PIPE_QUERY_PRIMITIVES_GENERATED &&
+ q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT;
+}
+
+static inline unsigned
+get_num_query_pools(struct zink_query *q)
+{
+ if (is_emulated_primgen(q))
+ return 2;
+ return 1;
+}
+
+static inline unsigned
+get_num_queries(struct zink_query *q)
+{
+ if (is_emulated_primgen(q))
+ return 2;
+ if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+ return PIPE_MAX_VERTEX_STREAMS;
+ return 1;
+}
+
+static inline unsigned
+get_num_results(struct zink_query *q)
+{
+ if (q->type < PIPE_QUERY_DRIVER_SPECIFIC &&
+ q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
+ return 1;
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+ case PIPE_QUERY_TIME_ELAPSED:
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
+ return 1;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ return 2;
+ default:
+ debug_printf("unknown query: %s\n",
+ util_str_query_type(q->type, true));
+ unreachable("zink: unknown query type");
+ }
+}
+
+static void
timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp)
{
/* The number of valid bits in a timestamp value is determined by
@@ -123,11 +294,11 @@ timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp)
* can be obtained from VkPhysicalDeviceLimits::timestampPeriod
* - 17.5. Timestamp Queries
*/
- *timestamp *= screen->info.props.limits.timestampPeriod;
+ *timestamp *= (double)screen->info.props.limits.timestampPeriod;
}
static VkQueryType
-convert_query_type(unsigned query_type, bool *precise)
+convert_query_type(struct zink_screen *screen, enum pipe_query_type query_type, bool *precise)
{
*precise = false;
switch (query_type) {
@@ -140,8 +311,11 @@ convert_query_type(unsigned query_type, bool *precise)
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_TIMESTAMP:
return VK_QUERY_TYPE_TIMESTAMP;
- case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
case PIPE_QUERY_PRIMITIVES_GENERATED:
+ return screen->info.have_EXT_primitives_generated_query ?
+ VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT :
+ VK_QUERY_TYPE_PIPELINE_STATISTICS;
+ case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
return VK_QUERY_TYPE_PIPELINE_STATISTICS;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
@@ -157,7 +331,7 @@ convert_query_type(unsigned query_type, bool *precise)
static bool
needs_stats_list(struct zink_query *query)
{
- return query->type == PIPE_QUERY_PRIMITIVES_GENERATED ||
+ return is_emulated_primgen(query) ||
query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
}
@@ -183,22 +357,6 @@ is_bool_query(struct zink_query *query)
query->type == PIPE_QUERY_GPU_FINISHED;
}
-static void
-qbo_sync_from_prev(struct zink_context *ctx, struct zink_query *query, unsigned id_offset, unsigned last_start)
-{
- assert(id_offset);
-
- struct zink_query_buffer *prev = list_last_entry(&query->buffers, struct zink_query_buffer, list);
- unsigned result_size = get_num_results(query->type) * sizeof(uint64_t);
- /* this is get_buffer_offset() but without the zink_query object */
- unsigned qbo_offset = last_start * get_num_results(query->type) * sizeof(uint64_t);
- query->curr_query = id_offset;
- query->curr_qbo->num_results = id_offset;
- zink_copy_buffer(ctx, zink_resource(query->curr_qbo->buffer), zink_resource(prev->buffer), 0,
- qbo_offset,
- id_offset * result_size);
-}
-
static bool
qbo_append(struct pipe_screen *screen, struct zink_query *query)
{
@@ -207,60 +365,69 @@ qbo_append(struct pipe_screen *screen, struct zink_query *query)
struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer);
if (!qbo)
return false;
- qbo->buffer = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
- PIPE_USAGE_STAGING,
- /* this is the maximum possible size of the results in a given buffer */
- NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t));
- if (!qbo->buffer)
- goto fail;
- if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) {
- /* need separate xfb buffer */
- qbo->xfb_buffers[0] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
- PIPE_USAGE_STAGING,
- /* this is the maximum possible size of the results in a given buffer */
- NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t));
- if (!qbo->xfb_buffers[0])
+ int num_buffers = get_num_queries(query);
+
+ for (unsigned i = 0; i < num_buffers; i++) {
+ qbo->buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
+ PIPE_USAGE_STAGING,
+ /* this is the maximum possible size of the results in a given buffer */
+ (query->type == PIPE_QUERY_TIMESTAMP ? 1 : NUM_QUERIES) * get_num_results(query) * sizeof(uint64_t));
+ if (!qbo->buffers[i])
goto fail;
- } else if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
- /* need to monitor all xfb streams */
- for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++) {
- /* need separate xfb buffer */
- qbo->xfb_buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
- PIPE_USAGE_STAGING,
- /* this is the maximum possible size of the results in a given buffer */
- NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t));
- if (!qbo->xfb_buffers[i])
- goto fail;
- }
}
list_addtail(&qbo->list, &query->buffers);
return true;
fail:
- pipe_resource_reference(&qbo->buffer, NULL);
- for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++)
- pipe_resource_reference(&qbo->xfb_buffers[i], NULL);
+ for (unsigned i = 0; i < num_buffers; i++)
+ pipe_resource_reference(&qbo->buffers[i], NULL);
FREE(qbo);
return false;
}
static void
-destroy_query(struct zink_screen *screen, struct zink_query *query)
+unref_vk_pool(struct zink_context *ctx, struct zink_query_pool *pool)
+{
+ if (!pool || --pool->refcount)
+ return;
+ util_dynarray_append(&ctx->batch.state->dead_querypools, VkQueryPool, pool->query_pool);
+ if (list_is_linked(&pool->list))
+ list_del(&pool->list);
+ FREE(pool);
+}
+
+static void
+unref_vk_query(struct zink_context *ctx, struct zink_vk_query *vkq)
+{
+ if (!vkq)
+ return;
+ unref_vk_pool(ctx, vkq->pool);
+ vkq->refcount--;
+ if (vkq->refcount == 0)
+ FREE(vkq);
+}
+
+static void
+destroy_query(struct zink_context *ctx, struct zink_query *query)
{
- assert(zink_screen_usage_check_completion(screen, query->batch_id));
- if (query->query_pool)
- VKSCR(DestroyQueryPool)(screen->dev, query->query_pool, NULL);
+ ASSERTED struct zink_screen *screen = zink_screen(ctx->base.screen);
+ assert(zink_screen_usage_check_completion(screen, query->batch_uses));
struct zink_query_buffer *qbo, *next;
+
+ struct zink_query_start *starts = query->starts.data;
+ unsigned num_starts = query->starts.capacity / sizeof(struct zink_query_start);
+ for (unsigned j = 0; j < num_starts; j++) {
+ for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
+ unref_vk_query(ctx, starts[j].vkq[i]);
+ }
+ }
+
+ util_dynarray_fini(&query->starts);
LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) {
- pipe_resource_reference(&qbo->buffer, NULL);
- for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++)
- pipe_resource_reference(&qbo->xfb_buffers[i], NULL);
+ for (unsigned i = 0; i < ARRAY_SIZE(qbo->buffers); i++)
+ pipe_resource_reference(&qbo->buffers[i], NULL);
FREE(qbo);
}
- for (unsigned i = 0; i < ARRAY_SIZE(query->xfb_query_pool); i++) {
- if (query->xfb_query_pool[i])
- VKSCR(DestroyQueryPool)(screen->dev, query->xfb_query_pool[i], NULL);
- }
pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL);
FREE(query);
}
@@ -272,13 +439,66 @@ reset_qbo(struct zink_query *q)
q->curr_qbo->num_results = 0;
}
+static void
+query_pool_get_range(struct zink_context *ctx, struct zink_query *q)
+{
+ bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
+ struct zink_query_start *start;
+ int num_queries = get_num_queries(q);
+ if (!is_timestamp || get_num_starts(q) == 0) {
+ size_t size = q->starts.capacity;
+ start = util_dynarray_grow(&q->starts, struct zink_query_start, 1);
+ if (size != q->starts.capacity) {
+ /* when resizing, always zero the new data to avoid garbage */
+ uint8_t *data = q->starts.data;
+ memset(data + size, 0, q->starts.capacity - size);
+ }
+ } else {
+ start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
+ }
+ start->data = 0;
+
+ unsigned num_pools = get_num_query_pools(q);
+ for (unsigned i = 0; i < num_queries; i++) {
+ int pool_idx = num_pools > 1 ? i : 0;
+ /* try and find the active query for this */
+ struct zink_vk_query *vkq;
+ int xfb_idx = num_queries == 4 ? i : q->index;
+ if ((q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
+ (pool_idx == 1)) && ctx->curr_xfb_queries[xfb_idx]) {
+ vkq = ctx->curr_xfb_queries[xfb_idx];
+ vkq->refcount++;
+ vkq->pool->refcount++;
+ } else {
+ struct zink_query_pool *pool = find_or_allocate_qp(ctx, q, pool_idx);
+ if (pool->last_range == NUM_QUERIES) {
+ list_del(&pool->list);
+ pool = find_or_allocate_qp(ctx, q, pool_idx);
+ }
+ vkq = CALLOC_STRUCT(zink_vk_query);
+ if (!vkq) {
+ mesa_loge("ZINK: failed to allocate vkq!");
+ return;
+ }
+
+ pool->refcount++;
+ vkq->refcount = 1;
+ vkq->needs_reset = true;
+ vkq->pool = pool;
+ vkq->started = false;
+ vkq->query_id = pool->last_range++;
+ }
+ unref_vk_query(ctx, start->vkq[i]);
+ start->vkq[i] = vkq;
+ }
+}
+
static struct pipe_query *
zink_create_query(struct pipe_context *pctx,
unsigned query_type, unsigned index)
{
struct zink_screen *screen = zink_screen(pctx->screen);
struct zink_query *query = CALLOC_STRUCT(zink_query);
- VkQueryPoolCreateInfo pool_create = {0};
if (!query)
return NULL;
@@ -286,50 +506,37 @@ zink_create_query(struct pipe_context *pctx,
query->index = index;
query->type = query_type;
- if (query->type == PIPE_QUERY_GPU_FINISHED)
+
+ if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
return (struct pipe_query *)query;
- query->vkqtype = convert_query_type(query_type, &query->precise);
+
+ if (query->type == PIPE_QUERY_GPU_FINISHED || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT)
+ return (struct pipe_query *)query;
+ query->vkqtype = convert_query_type(screen, query_type, &query->precise);
if (query->vkqtype == -1)
return NULL;
+ util_dynarray_init(&query->starts, NULL);
+
assert(!query->precise || query->vkqtype == VK_QUERY_TYPE_OCCLUSION);
- query->curr_query = 0;
+ /* use emulated path for drivers without full support */
+ if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && index &&
+ !screen->info.primgen_feats.primitivesGeneratedQueryWithNonZeroStreams)
+ query->vkqtype = VK_QUERY_TYPE_PIPELINE_STATISTICS;
- pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
- pool_create.queryType = query->vkqtype;
- pool_create.queryCount = NUM_QUERIES;
- if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED)
- pool_create.pipelineStatistics = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
- VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT;
- else if (query_type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE)
- pool_create.pipelineStatistics = pipeline_statistic_convert(index);
-
- VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->query_pool);
- if (status != VK_SUCCESS)
- goto fail;
- if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) {
- /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */
- pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
- pool_create.queryType = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
- pool_create.queryCount = NUM_QUERIES;
-
- status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->xfb_query_pool[0]);
- if (status != VK_SUCCESS)
- goto fail;
- } else if (query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
- /* need to monitor all xfb streams */
- for (unsigned i = 0; i < ARRAY_SIZE(query->xfb_query_pool); i++) {
- status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->xfb_query_pool[i]);
- if (status != VK_SUCCESS)
- goto fail;
- }
+ if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
+ query->needs_rast_discard_workaround = !screen->info.primgen_feats.primitivesGeneratedQueryWithRasterizerDiscard;
+ } else if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) {
+ query->needs_rast_discard_workaround = true;
}
+
if (!qbo_append(pctx->screen, query))
goto fail;
struct zink_batch *batch = &zink_context(pctx)->batch;
batch->has_work = true;
query->needs_reset = true;
+ query->predicate_dirty = true;
if (query->type == PIPE_QUERY_TIMESTAMP) {
query->active = true;
/* defer pool reset until end_query since we're guaranteed to be threadsafe then */
@@ -337,7 +544,7 @@ zink_create_query(struct pipe_context *pctx,
}
return (struct pipe_query *)query;
fail:
- destroy_query(screen, query);
+ destroy_query(zink_context(pctx), query);
return NULL;
}
@@ -345,37 +552,39 @@ static void
zink_destroy_query(struct pipe_context *pctx,
struct pipe_query *q)
{
- struct zink_screen *screen = zink_screen(pctx->screen);
struct zink_query *query = (struct zink_query *)q;
/* only destroy if this query isn't active on any batches,
* otherwise just mark dead and wait
*/
- if (query->batch_id) {
- p_atomic_set(&query->dead, true);
+ if (query->batch_uses) {
+ query->dead = true;
return;
}
- destroy_query(screen, query);
+ destroy_query(zink_context(pctx), query);
}
void
-zink_prune_query(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_query *query)
+zink_prune_query(struct zink_batch_state *bs, struct zink_query *query)
{
- if (!zink_batch_usage_matches(query->batch_id, bs))
+ if (!zink_batch_usage_matches(query->batch_uses, bs))
return;
- query->batch_id = NULL;
- if (p_atomic_read(&query->dead))
- destroy_query(screen, query);
+ query->batch_uses = NULL;
+ if (query->dead)
+ destroy_query(bs->ctx, query);
}
static void
check_query_results(struct zink_query *query, union pipe_query_result *result,
- int num_results, uint64_t *results, uint64_t *xfb_results)
+ int num_starts, uint64_t *results, uint64_t *xfb_results)
{
uint64_t last_val = 0;
- int result_size = get_num_results(query->type);
- for (int i = 0; i < num_results * result_size; i += result_size) {
+ int result_size = get_num_results(query);
+ int idx = 0;
+ util_dynarray_foreach(&query->starts, struct zink_query_start, start) {
+ unsigned i = idx * result_size;
+ idx++;
switch (query->type) {
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
@@ -396,11 +605,13 @@ check_query_results(struct zink_query *query, union pipe_query_result *result,
result->u64 += results[i];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
- if (query->have_xfb[query->last_start + i / 2] || query->index)
+ if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
+ result->u64 += results[i];
+ else if (start->have_xfb || query->index)
result->u64 += xfb_results[i + 1];
else
- /* if a given draw had a geometry shader, we need to use the second result */
- result->u64 += results[i + query->have_gs[query->last_start + i / 2]];
+ /* if a given draw had a geometry shader, we need to use the first result */
+ result->u64 += results[i + !start->have_gs];
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
/* A query pool created with this type will capture 2 integers -
@@ -417,11 +628,18 @@ check_query_results(struct zink_query *query, union pipe_query_result *result,
* for the specified vertex stream output from the last vertex processing stage.
* - from VK_EXT_transform_feedback spec
*/
- if (query->have_xfb[query->last_start + i / 2])
+ if (start->have_xfb)
result->b |= results[i] != results[i + 1];
break;
case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
- result->u64 += results[i];
+ switch (query->index) {
+ case PIPE_STAT_QUERY_IA_VERTICES:
+ result->u64 += start->was_line_loop ? results[i] / 2 : results[i];
+ break;
+ default:
+ result->u64 += results[i];
+ break;
+ }
break;
default:
@@ -450,60 +668,54 @@ get_query_result(struct pipe_context *pctx,
util_query_clear_result(result, query->type);
- int num_results = query->curr_query - query->last_start;
- int result_size = get_num_results(query->type) * sizeof(uint64_t);
+ int num_starts = get_num_starts(query);
+ /* no results: return zero */
+ if (!num_starts)
+ return true;
+ int result_size = get_num_results(query) * sizeof(uint64_t);
+ int num_maps = get_num_queries(query);
struct zink_query_buffer *qbo;
- struct pipe_transfer *xfer;
+ struct pipe_transfer *xfer[PIPE_MAX_VERTEX_STREAMS] = { 0 };
LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) {
- uint64_t *xfb_results = NULL;
- uint64_t *results;
- bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT;
- results = pipe_buffer_map_range(pctx, qbo->buffer, 0,
- (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer);
- if (!results) {
- if (wait)
- debug_printf("zink: qbo read failed!");
- return false;
- }
- struct pipe_transfer *xfb_xfer = NULL;
- if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) {
- xfb_results = pipe_buffer_map_range(pctx, qbo->xfb_buffers[0], 0,
- qbo->num_results * result_size, flags, &xfb_xfer);
- if (!xfb_results) {
+ uint64_t *results[PIPE_MAX_VERTEX_STREAMS] = { NULL, NULL };
+ bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP;
+ if (!qbo->num_results)
+ continue;
+
+ for (unsigned i = 0; i < num_maps; i++) {
+ results[i] = pipe_buffer_map_range(pctx, qbo->buffers[i], 0,
+ (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer[i]);
+ if (!results[i]) {
if (wait)
- debug_printf("zink: xfb qbo read failed!");
- pipe_buffer_unmap(pctx, xfer);
- return false;
+ debug_printf("zink: qbo read failed!");
+ goto fail;
}
}
- check_query_results(query, result, is_timestamp ? 1 : qbo->num_results, results, xfb_results);
- pipe_buffer_unmap(pctx, xfer);
- if (xfb_xfer)
- pipe_buffer_unmap(pctx, xfb_xfer);
if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
- for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers) && !result->b; i++) {
- uint64_t *results = pipe_buffer_map_range(pctx, qbo->xfb_buffers[i],
- 0,
- qbo->num_results * result_size, flags, &xfer);
- if (!results) {
- if (wait)
- debug_printf("zink: qbo read failed!");
- return false;
- }
- check_query_results(query, result, num_results, results, xfb_results);
- pipe_buffer_unmap(pctx, xfer);
+ for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS && !result->b; i++) {
+ check_query_results(query, result, num_starts, results[i], NULL);
}
- /* if overflow is detected we can stop */
- if (result->b)
- break;
- }
+ } else
+ check_query_results(query, result, num_starts, results[0], results[1]);
+
+ for (unsigned i = 0 ; i < num_maps; i++)
+ pipe_buffer_unmap(pctx, xfer[i]);
+
+ /* if overflow is detected we can stop */
+ if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE && result->b)
+ break;
}
if (is_time_query(query))
timestamp_to_nanoseconds(screen, &result->u64);
return true;
+fail:
+ for (unsigned i = 0 ; i < num_maps; i++)
+ if (xfer[i])
+ pipe_buffer_unmap(pctx, xfer[i]);
+ return false;
}
static void
@@ -512,7 +724,7 @@ force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_qu
struct pipe_context *pctx = &ctx->base;
unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
struct zink_query *query = (struct zink_query*)pquery;
- union pipe_query_result result;
+ union pipe_query_result result = {0};
if (query->needs_update)
update_qbo(ctx, query);
@@ -534,14 +746,14 @@ force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_qu
u32 = result.b;
else
u32 = MIN2(limit, result.u64);
- pipe_buffer_write(pctx, pres, offset, result_size, &u32);
+ tc_buffer_write(pctx, pres, offset, result_size, &u32);
} else {
uint64_t u64;
if (is_bool_query(query))
u64 = result.b;
else
u64 = result.u64;
- pipe_buffer_write(pctx, pres, offset, result_size, &u64);
+ tc_buffer_write(pctx, pres, offset, result_size, &u64);
}
}
@@ -552,49 +764,50 @@ copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query,
{
struct zink_batch *batch = &ctx->batch;
unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t);
- unsigned base_result_size = get_num_results(query->type) * type_size;
+ unsigned base_result_size = get_num_results(query) * type_size;
unsigned result_size = base_result_size * num_results;
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
result_size += type_size;
+
+ bool marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "update_qbo(%s: id=%u, num_results=%d)", vk_QueryType_to_str(query->vkqtype), query_id, num_results);
+
zink_batch_no_rp(ctx);
/* if it's a single query that doesn't need special handling, we can copy it and be done */
zink_batch_reference_resource_rw(batch, res, true);
- zink_resource_buffer_barrier(ctx, res, VK_ACCESS_TRANSFER_WRITE_BIT, 0);
+ res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
+ res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size);
assert(query_id < NUM_QUERIES);
+ res->obj->unordered_read = res->obj->unordered_write = false;
VKCTX(CmdCopyQueryPoolResults)(batch->state->cmdbuf, pool, query_id, num_results, res->obj->buffer,
- offset, 0, flags);
+ offset, base_result_size, flags);
+ zink_cmd_debug_marker_end(ctx, batch->state->cmdbuf, marker);
}
static void
copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags)
{
- copy_pool_results_to_buffer(ctx, query, query->query_pool, query->last_start, res, offset, num_results, flags);
+ struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
+ copy_pool_results_to_buffer(ctx, query, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, res, offset, num_results, flags);
}
+
static void
-reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
+reset_query_range(struct zink_context *ctx, struct zink_query *q)
+{
+ int num_queries = get_num_queries(q);
+ struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
+ for (unsigned i = 0; i < num_queries; i++) {
+ reset_vk_query_pool(ctx, start->vkq[i]);
+ }
+}
+
+static void
+reset_qbos(struct zink_context *ctx, struct zink_query *q)
{
- unsigned last_start = q->last_start;
- unsigned id_offset = q->curr_query - q->last_start;
- /* This command must only be called outside of a render pass instance
- *
- * - vkCmdResetQueryPool spec
- */
- zink_batch_no_rp(ctx);
if (q->needs_update)
update_qbo(ctx, q);
- VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->query_pool, 0, NUM_QUERIES);
- if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED)
- VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->xfb_query_pool[0], 0, NUM_QUERIES);
- else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
- for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++)
- VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->xfb_query_pool[i], 0, NUM_QUERIES);
- }
- memset(q->have_gs, 0, sizeof(q->have_gs));
- memset(q->have_xfb, 0, sizeof(q->have_xfb));
- q->last_start = q->curr_query = 0;
q->needs_reset = false;
/* create new qbo for non-timestamp queries:
* timestamp queries should never need more than 2 entries in the qbo
@@ -605,51 +818,63 @@ reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query
reset_qbo(q);
else
debug_printf("zink: qbo alloc failed on reset!");
- if (id_offset)
- qbo_sync_from_prev(ctx, q, id_offset, last_start);
}
static inline unsigned
-get_buffer_offset(struct zink_query *q, struct pipe_resource *pres, unsigned query_id)
+get_buffer_offset(struct zink_query *q)
{
- return (query_id - q->last_start) * get_num_results(q->type) * sizeof(uint64_t);
+ return (get_num_starts(q) - 1) * get_num_results(q) * sizeof(uint64_t);
}
static void
update_qbo(struct zink_context *ctx, struct zink_query *q)
{
struct zink_query_buffer *qbo = q->curr_qbo;
- unsigned offset = 0;
- uint32_t query_id = q->curr_query - 1;
- bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP || q->type == PIPE_QUERY_TIMESTAMP_DISJOINT;
+ unsigned num_starts = get_num_starts(q);
+ struct zink_query_start *starts = q->starts.data;
+ bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
/* timestamp queries just write to offset 0 always */
- if (!is_timestamp)
- offset = get_buffer_offset(q, qbo->buffer, query_id);
- copy_pool_results_to_buffer(ctx, q, q->query_pool, query_id, zink_resource(qbo->buffer),
- offset,
- 1, VK_QUERY_RESULT_64_BIT);
-
- if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- q->type == PIPE_QUERY_PRIMITIVES_GENERATED ||
- q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
- copy_pool_results_to_buffer(ctx, q,
- q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool,
- query_id,
- zink_resource(qbo->xfb_buffers[0] ? qbo->xfb_buffers[0] : qbo->buffer),
- get_buffer_offset(q, qbo->xfb_buffers[0] ? qbo->xfb_buffers[0] : qbo->buffer, query_id),
- 1, VK_QUERY_RESULT_64_BIT);
- }
-
- else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
- for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) {
- copy_pool_results_to_buffer(ctx, q, q->xfb_query_pool[i], query_id, zink_resource(qbo->xfb_buffers[i]),
- get_buffer_offset(q, qbo->xfb_buffers[i], query_id),
- 1, VK_QUERY_RESULT_64_BIT);
+ int num_queries = get_num_queries(q);
+ unsigned num_results = qbo->num_results;
+ for (unsigned i = 0; i < num_queries; i++) {
+ unsigned start_offset = q->start_offset;
+ while (start_offset < num_starts) {
+ unsigned num_merged_copies = 0;
+ VkQueryPool qp = starts[start_offset].vkq[i]->pool->query_pool;
+ unsigned base_id = starts[start_offset].vkq[i]->query_id;
+ /* iterate over all the starts to see how many can be merged */
+ for (unsigned j = start_offset; j < num_starts; j++, num_merged_copies++) {
+ if (starts[j].vkq[i]->pool->query_pool != qp || starts[j].vkq[i]->query_id != base_id + num_merged_copies)
+ break;
+ }
+ assert(num_merged_copies);
+ unsigned cur_offset = start_offset * get_num_results(q) * sizeof(uint64_t);
+ unsigned offset = is_timestamp ? 0 : cur_offset;
+ copy_pool_results_to_buffer(ctx, q, starts[start_offset].vkq[i]->pool->query_pool, starts[start_offset].vkq[i]->query_id,
+ zink_resource(qbo->buffers[i]),
+ offset,
+ num_merged_copies,
+ /*
+ there is an implicit execution dependency from
+ each such query command to all query commands previously submitted to the same queue. There
+ is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
+ include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
+ the results of vkCmdEndQuery are available.
+
+ * - Chapter 18. Queries
+ */
+ VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
+ if (!is_timestamp)
+ q->curr_qbo->num_results += num_merged_copies;
+ start_offset += num_merged_copies;
}
}
+ q->start_offset += q->curr_qbo->num_results - num_results;
+
+
+ if (is_timestamp)
+ q->curr_qbo->num_results = 1;
- if (!is_timestamp)
- q->curr_qbo->num_results++;
q->needs_update = false;
}
@@ -658,53 +883,82 @@ begin_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_quer
{
VkQueryControlFlags flags = 0;
+ if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
+ return;
+
+ if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_CS_INVOCATIONS && ctx->batch.in_rp) {
+ /* refuse to start CS queries in renderpasses */
+ if (!list_is_linked(&q->active_list))
+ list_addtail(&q->active_list, &ctx->suspended_queries);
+ q->suspended = true;
+ return;
+ }
+
+ zink_flush_dgc_if_enabled(ctx);
+
+ update_query_id(ctx, q);
q->predicate_dirty = true;
if (q->needs_reset)
- reset_pool(ctx, batch, q);
- assert(q->curr_query < NUM_QUERIES);
+ reset_qbos(ctx, q);
+ reset_query_range(ctx, q);
q->active = true;
batch->has_work = true;
+
+ struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
if (q->type == PIPE_QUERY_TIME_ELAPSED) {
- VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, q->query_pool, q->curr_query);
- q->curr_query++;
- update_qbo(ctx, q);
- zink_batch_usage_set(&q->batch_id, batch->state);
- _mesa_set_add(batch->state->active_queries, q);
+ VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
+ if (!batch->in_rp)
+ update_qbo(ctx, q);
+ zink_batch_usage_set(&q->batch_uses, batch->state);
+ _mesa_set_add(&batch->state->active_queries, q);
}
/* ignore the rest of begin_query for timestamps */
if (is_time_query(q))
return;
+
+ /* A query must either begin and end inside the same subpass of a render pass
+ instance, or must both begin and end outside of a render pass instance
+ (i.e. contain entire render pass instances).
+ - 18.2. Query Operation
+ */
+ q->started_in_rp = ctx->batch.in_rp;
+
if (q->precise)
flags |= VK_QUERY_CONTROL_PRECISE_BIT;
+
if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- q->type == PIPE_QUERY_PRIMITIVES_GENERATED ||
+ is_emulated_primgen(q) ||
q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
- VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf,
- q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool,
- q->curr_query,
- flags,
- q->index);
- q->xfb_running = true;
+ struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
+ assert(!ctx->curr_xfb_queries[q->index] || ctx->curr_xfb_queries[q->index] == vkq);
+ ctx->curr_xfb_queries[q->index] = vkq;
+
+ begin_vk_query_indexed(ctx, vkq, q->index, flags);
} else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
- VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf,
- q->query_pool,
- q->curr_query,
- flags,
- 0);
- for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++)
- VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf,
- q->xfb_query_pool[i],
- q->curr_query,
- flags,
- i + 1);
- q->xfb_running = true;
+ for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
+ assert(!ctx->curr_xfb_queries[i] || ctx->curr_xfb_queries[i] == start->vkq[i]);
+ ctx->curr_xfb_queries[i] = start->vkq[i];
+
+ begin_vk_query_indexed(ctx, start->vkq[i], i, flags);
+ }
+ } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
+ begin_vk_query_indexed(ctx, start->vkq[0], q->index, flags);
+ }
+ if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
+ VKCTX(CmdBeginQuery)(batch->state->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, flags);
+ if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_IA_VERTICES) {
+ assert(!ctx->vertices_query);
+ ctx->vertices_query = q;
}
- if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT)
- VKCTX(CmdBeginQuery)(batch->state->cmdbuf, q->query_pool, q->curr_query, flags);
if (needs_stats_list(q))
list_addtail(&q->stats_list, &ctx->primitives_generated_queries);
- zink_batch_usage_set(&q->batch_id, batch->state);
- _mesa_set_add(batch->state->active_queries, q);
+ zink_batch_usage_set(&q->batch_uses, batch->state);
+ _mesa_set_add(&batch->state->active_queries, q);
+ if (q->needs_rast_discard_workaround) {
+ ctx->primitives_generated_active = true;
+ if (zink_set_rasterizer_discard(ctx, true))
+ zink_set_null_fs(ctx);
+ }
}
static bool
@@ -715,11 +969,28 @@ zink_begin_query(struct pipe_context *pctx,
struct zink_context *ctx = zink_context(pctx);
struct zink_batch *batch = &ctx->batch;
- query->last_start = query->curr_query;
/* drop all past results */
reset_qbo(query);
- begin_query(ctx, batch, query);
+ if (query->type < PIPE_QUERY_DRIVER_SPECIFIC && query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
+ ctx->occlusion_query_active = true;
+ if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
+ ctx->fs_query_active = true;
+
+ query->predicate_dirty = true;
+
+ util_dynarray_clear(&query->starts);
+ query->start_offset = 0;
+
+ if (batch->in_rp) {
+ begin_query(ctx, batch, query);
+ } else {
+ /* never directly start queries out of renderpass, always defer */
+ list_addtail(&query->active_list, &ctx->suspended_queries);
+ query->suspended = true;
+ if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
+ ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
+ }
return true;
}
@@ -727,46 +998,59 @@ zink_begin_query(struct pipe_context *pctx,
static void
update_query_id(struct zink_context *ctx, struct zink_query *q)
{
- if (++q->curr_query == NUM_QUERIES) {
- /* always reset on start; this ensures we can actually submit the batch that the current query is on */
- q->needs_reset = true;
- }
+ query_pool_get_range(ctx, q);
ctx->batch.has_work = true;
-
- if (ctx->batch.in_rp)
- q->needs_update = true;
- else
- update_qbo(ctx, q);
+ q->has_draws = false;
}
static void
end_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
{
+ if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
+ return;
+
+ zink_flush_dgc_if_enabled(ctx);
+
ASSERTED struct zink_query_buffer *qbo = q->curr_qbo;
assert(qbo);
assert(!is_time_query(q));
q->active = false;
+ assert(q->started_in_rp == batch->in_rp);
+ struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
+
if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- q->type == PIPE_QUERY_PRIMITIVES_GENERATED ||
- q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
- VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf,
- q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool,
- q->curr_query, q->index);
- }
+ is_emulated_primgen(q) ||
+ q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
+ struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
+ end_vk_query_indexed(ctx, vkq, q->index);
+ ctx->curr_xfb_queries[q->index] = NULL;
+ }
else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
- VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf, q->query_pool, q->curr_query, 0);
- for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) {
- VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf, q->xfb_query_pool[i], q->curr_query, i + 1);
+ for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
+ end_vk_query_indexed(ctx, start->vkq[i], i);
+ ctx->curr_xfb_queries[i] = NULL;
}
+ } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
+ end_vk_query_indexed(ctx, start->vkq[0], q->index);
}
- if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && !is_time_query(q))
- VKCTX(CmdEndQuery)(batch->state->cmdbuf, q->query_pool, q->curr_query);
+ if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT &&
+ q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && !is_time_query(q))
+ VKCTX(CmdEndQuery)(batch->state->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
+
+ if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
+ q->index == PIPE_STAT_QUERY_IA_VERTICES)
+ ctx->vertices_query = NULL;
if (needs_stats_list(q))
list_delinit(&q->stats_list);
- update_query_id(ctx, q);
+ q->needs_update = true;
+ if (q->needs_rast_discard_workaround) {
+ ctx->primitives_generated_active = false;
+ if (zink_set_rasterizer_discard(ctx, false))
+ zink_set_null_fs(ctx);
+ }
}
static bool
@@ -777,6 +1061,9 @@ zink_end_query(struct pipe_context *pctx,
struct zink_query *query = (struct zink_query *)q;
struct zink_batch *batch = &ctx->batch;
+ if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT || query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
+ return true;
+
if (query->type == PIPE_QUERY_GPU_FINISHED) {
pctx->flush(pctx, &query->fence, PIPE_FLUSH_DEFERRED);
return true;
@@ -785,18 +1072,41 @@ zink_end_query(struct pipe_context *pctx,
/* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */
threaded_context_unwrap_sync(pctx);
- if (needs_stats_list(query))
+ if (query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
+ ctx->occlusion_query_active = true;
+ if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
+ ctx->fs_query_active = true;
+
+ bool unset_null_fs = query->type == PIPE_QUERY_PRIMITIVES_GENERATED && (ctx->primitives_generated_suspended || ctx->primitives_generated_active);
+ if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
+ ctx->primitives_generated_suspended = false;
+
+ if (list_is_linked(&query->stats_list))
list_delinit(&query->stats_list);
+ if (query->suspended) {
+ list_delinit(&query->active_list);
+ query->suspended = false;
+ }
if (is_time_query(query)) {
+ update_query_id(ctx, query);
if (query->needs_reset)
- reset_pool(ctx, batch, query);
+ reset_qbos(ctx, query);
+ reset_query_range(ctx, query);
+ struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- query->query_pool, query->curr_query);
- zink_batch_usage_set(&query->batch_id, batch->state);
- _mesa_set_add(batch->state->active_queries, query);
- update_query_id(ctx, query);
- } else if (query->active)
+ start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
+ zink_batch_usage_set(&query->batch_uses, batch->state);
+ _mesa_set_add(&batch->state->active_queries, query);
+ query->needs_update = true;
+ } else if (query->active) {
+ /* this should be a tc-optimized query end that doesn't split a renderpass */
+ if (!query->started_in_rp)
+ zink_batch_no_rp(ctx);
end_query(ctx, batch, query);
+ }
+
+ if (unset_null_fs)
+ zink_set_null_fs(ctx);
return true;
}
@@ -810,69 +1120,154 @@ zink_get_query_result(struct pipe_context *pctx,
struct zink_query *query = (void*)q;
struct zink_context *ctx = zink_context(pctx);
+ if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT) {
+ result->timestamp_disjoint.frequency = zink_screen(pctx->screen)->info.props.limits.timestampPeriod * 1000000.0;
+ result->timestamp_disjoint.disjoint = false;
+ return true;
+ }
+
if (query->type == PIPE_QUERY_GPU_FINISHED) {
struct pipe_screen *screen = pctx->screen;
result->b = screen->fence_finish(screen, query->base.flushed ? NULL : pctx,
- query->fence, wait ? PIPE_TIMEOUT_INFINITE : 0);
+ query->fence, wait ? OS_TIMEOUT_INFINITE : 0);
return result->b;
}
- if (query->needs_update)
+ if (query->type == ZINK_QUERY_RENDER_PASSES) {
+ result->u64 = ctx->hud.render_passes;
+ ctx->hud.render_passes = 0;
+ return true;
+ }
+
+ if (query->needs_update) {
+ assert(!ctx->tc || !threaded_query(q)->flushed);
update_qbo(ctx, query);
+ }
- if (zink_batch_usage_is_unflushed(query->batch_id)) {
+ if (zink_batch_usage_is_unflushed(query->batch_uses)) {
if (!threaded_query(q)->flushed)
pctx->flush(pctx, NULL, 0);
if (!wait)
return false;
- } else if (!threaded_query(q)->flushed &&
- /* timeline drivers can wait during buffer map */
- !zink_screen(pctx->screen)->info.have_KHR_timeline_semaphore)
- zink_batch_usage_check_completion(ctx, query->batch_id);
+ }
return get_query_result(pctx, q, wait, result);
}
-void
-zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch)
+static void
+suspend_query(struct zink_context *ctx, struct zink_query *query)
{
- set_foreach(batch->state->active_queries, entry) {
+ /* if a query isn't active here then we don't need to reactivate it on the next batch */
+ if (query->active && !is_time_query(query))
+ end_query(ctx, &ctx->batch, query);
+ if (query->needs_update && !ctx->batch.in_rp)
+ update_qbo(ctx, query);
+}
+
+static void
+suspend_queries(struct zink_context *ctx, bool rp_only)
+{
+ set_foreach(&ctx->batch.state->active_queries, entry) {
struct zink_query *query = (void*)entry->key;
- /* if a query isn't active here then we don't need to reactivate it on the next batch */
+ if (query->suspended || (rp_only && !query->started_in_rp))
+ continue;
if (query->active && !is_time_query(query)) {
- end_query(ctx, batch, query);
/* the fence is going to steal the set off the batch, so we have to copy
* the active queries onto a list
*/
list_addtail(&query->active_list, &ctx->suspended_queries);
+ query->suspended = true;
+ if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
+ ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
}
- if (query->needs_update)
- update_qbo(ctx, query);
- if (query->last_start && query->curr_query > NUM_QUERIES / 2)
- reset_pool(ctx, batch, query);
+ suspend_query(ctx, query);
}
}
void
+zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch)
+{
+ suspend_queries(ctx, false);
+}
+
+void
zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch)
{
struct zink_query *query, *next;
LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
- begin_query(ctx, batch, query);
list_delinit(&query->active_list);
+ query->suspended = false;
+ if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
+ ctx->primitives_generated_suspended = false;
+ if (query->needs_update && !ctx->batch.in_rp)
+ update_qbo(ctx, query);
+ begin_query(ctx, batch, query);
+ }
+}
+
+void
+zink_resume_cs_query(struct zink_context *ctx)
+{
+ struct zink_query *query, *next;
+ LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
+ if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_CS_INVOCATIONS) {
+ list_delinit(&query->active_list);
+ query->suspended = false;
+ begin_query(ctx, &ctx->batch, query);
+ }
}
}
void
+zink_query_renderpass_suspend(struct zink_context *ctx)
+{
+ suspend_queries(ctx, true);
+}
+
+void
zink_query_update_gs_states(struct zink_context *ctx)
{
struct zink_query *query;
+ bool suspendall = false;
+ bool have_gs = !!ctx->gfx_stages[MESA_SHADER_GEOMETRY];
+ bool have_xfb = !!ctx->num_so_targets;
+
LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
- assert(query->curr_query < ARRAY_SIZE(query->have_gs));
+ struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
assert(query->active);
- query->have_gs[query->curr_query] = !!ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
- query->have_xfb[query->curr_query] = !!ctx->num_so_targets;
+ if (query->has_draws) {
+ if (last_start->have_gs != have_gs ||
+ last_start->have_xfb != have_xfb) {
+ suspendall = true;
+ }
+ }
+ }
+
+ if (ctx->vertices_query) {
+ query = ctx->vertices_query;
+ struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
+ assert(query->active);
+ if (last_start->was_line_loop != ctx->was_line_loop) {
+ suspendall = true;
+ }
+ }
+ if (suspendall) {
+ zink_suspend_queries(ctx, &ctx->batch);
+ zink_resume_queries(ctx, &ctx->batch);
+ }
+
+ LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
+ struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
+ last_start->have_gs = have_gs;
+ last_start->have_xfb = have_xfb;
+ query->has_draws = true;
+ }
+ if (ctx->vertices_query) {
+ query = ctx->vertices_query;
+ struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
+ last_start->was_line_loop = ctx->was_line_loop;
+ query->has_draws = true;
}
}
@@ -880,19 +1275,29 @@ static void
zink_set_active_query_state(struct pipe_context *pctx, bool enable)
{
struct zink_context *ctx = zink_context(pctx);
+ /* unordered blits already disable queries */
+ if (ctx->unordered_blitting)
+ return;
ctx->queries_disabled = !enable;
struct zink_batch *batch = &ctx->batch;
if (ctx->queries_disabled)
zink_suspend_queries(ctx, batch);
- else
+ else if (ctx->batch.in_rp)
zink_resume_queries(ctx, batch);
}
void
+zink_query_sync(struct zink_context *ctx, struct zink_query *query)
+{
+ if (query->needs_update)
+ update_qbo(ctx, query);
+}
+
+void
zink_start_conditional_render(struct zink_context *ctx)
{
- if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering))
+ if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || ctx->render_condition.active)
return;
struct zink_batch *batch = &ctx->batch;
VkConditionalRenderingFlagsEXT begin_flags = 0;
@@ -902,32 +1307,22 @@ zink_start_conditional_render(struct zink_context *ctx)
begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer;
begin_info.flags = begin_flags;
+ ctx->render_condition.query->predicate->obj->unordered_read = false;
VKCTX(CmdBeginConditionalRenderingEXT)(batch->state->cmdbuf, &begin_info);
zink_batch_reference_resource_rw(batch, ctx->render_condition.query->predicate, false);
+ ctx->render_condition.active = true;
}
void
zink_stop_conditional_render(struct zink_context *ctx)
{
+ zink_flush_dgc_if_enabled(ctx);
struct zink_batch *batch = &ctx->batch;
zink_clear_apply_conditionals(ctx);
- if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering))
+ if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || !ctx->render_condition.active)
return;
VKCTX(CmdEndConditionalRenderingEXT)(batch->state->cmdbuf);
-}
-
-bool
-zink_check_conditional_render(struct zink_context *ctx)
-{
- if (!ctx->render_condition_active)
- return true;
- assert(ctx->render_condition.query);
-
- union pipe_query_result result;
- zink_get_query_result(&ctx->base, (struct pipe_query*)ctx->render_condition.query, true, &result);
- return is_bool_query(ctx->render_condition.query) ?
- ctx->render_condition.inverted != result.b :
- ctx->render_condition.inverted != !!result.u64;
+ ctx->render_condition.active = false;
}
static void
@@ -941,12 +1336,12 @@ zink_render_condition(struct pipe_context *pctx,
zink_batch_no_rp(ctx);
VkQueryResultFlagBits flags = 0;
+ zink_flush_dgc_if_enabled(ctx);
if (query == NULL) {
/* force conditional clears if they exist */
if (ctx->clears_enabled && !ctx->batch.in_rp)
zink_batch_rp(ctx);
- if (ctx->batch.in_rp)
- zink_stop_conditional_render(ctx);
+ zink_stop_conditional_render(ctx);
ctx->render_condition_active = false;
ctx->render_condition.query = NULL;
return;
@@ -969,14 +1364,21 @@ zink_render_condition(struct pipe_context *pctx,
flags |= VK_QUERY_RESULT_WAIT_BIT;
flags |= VK_QUERY_RESULT_64_BIT;
- int num_results = query->curr_query - query->last_start;
- if (query->type != PIPE_QUERY_PRIMITIVES_GENERATED &&
- !is_so_overflow_query(query)) {
- copy_results_to_buffer(ctx, query, res, 0, num_results, flags);
+ int num_results = get_num_starts(query);
+ if (num_results) {
+ if (!is_emulated_primgen(query) &&
+ !is_so_overflow_query(query) &&
+ num_results == 1) {
+ copy_results_to_buffer(ctx, query, res, 0, num_results, flags);
+ } else {
+ /* these need special handling */
+ force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0);
+ }
} else {
- /* these need special handling */
- force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0);
+ uint64_t zero = 0;
+ tc_buffer_write(pctx, &res->base.b, 0, sizeof(zero), &zero);
}
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT);
query->predicate_dirty = false;
}
ctx->render_condition.inverted = condition;
@@ -989,7 +1391,7 @@ zink_render_condition(struct pipe_context *pctx,
static void
zink_get_query_result_resource(struct pipe_context *pctx,
struct pipe_query *pquery,
- bool wait,
+ enum pipe_query_flags flags,
enum pipe_query_value_type result_type,
int index,
struct pipe_resource *pres,
@@ -1001,8 +1403,15 @@ zink_get_query_result_resource(struct pipe_context *pctx,
struct zink_resource *res = zink_resource(pres);
unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT;
- unsigned num_queries = query->curr_query - query->last_start;
- unsigned query_id = query->last_start;
+ unsigned num_queries = get_num_starts(query);
+
+ /* it's possible that a query may have no data at all: write out zeroes to the buffer and return */
+ uint64_t u64[4] = {0};
+ unsigned src_offset = result_size * get_num_results(query);
+ if (!num_queries) {
+ tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
+ return;
+ }
if (index == -1) {
/* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data
@@ -1014,31 +1423,45 @@ zink_get_query_result_resource(struct pipe_context *pctx,
*/
VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT;
- if (zink_batch_usage_check_completion(ctx, query->batch_id)) {
- uint64_t u64[2] = {0};
- if (VKCTX(GetQueryPoolResults)(screen->dev, query->query_pool, query_id, 1, 2 * result_size, u64,
- 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag) == VK_SUCCESS) {
- pipe_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + result_size);
+ if (zink_batch_usage_check_completion(ctx, query->batch_uses)) {
+ struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
+ unsigned query_id = start->vkq[0]->query_id;
+ VkResult result = VKCTX(GetQueryPoolResults)(screen->dev, start->vkq[0]->pool->query_pool, query_id, 1,
+ sizeof(u64), u64, 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
+ if (result == VK_SUCCESS) {
+ tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
return;
+ } else {
+ mesa_loge("ZINK: vkGetQueryPoolResults failed (%s)", vk_Result_to_str(result));
}
}
- struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, result_size * 2);
+ struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size);
copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
- zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size, result_size);
+ zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size);
pipe_resource_reference(&staging, NULL);
return;
}
+ /*
+ there is an implicit execution dependency from
+ each such query command to all query commands previously submitted to the same queue. There
+ is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
+ include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
+ the results of vkCmdEndQuery are available.
+
+ * - Chapter 18. Queries
+ */
+ size_flags |= VK_QUERY_RESULT_WAIT_BIT;
if (!is_time_query(query) && !is_bool_query(query)) {
- if (num_queries == 1 && query->type != PIPE_QUERY_PRIMITIVES_GENERATED &&
+ if (num_queries == 1 && !is_emulated_primgen(query) &&
query->type != PIPE_QUERY_PRIMITIVES_EMITTED &&
!is_bool_query(query)) {
if (size_flags == VK_QUERY_RESULT_64_BIT) {
if (query->needs_update)
update_qbo(ctx, query);
/* internal qbo always writes 64bit value so we can just direct copy */
- zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffer), offset,
- get_buffer_offset(query, query->curr_qbo->buffer, query->last_start),
+ zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset,
+ get_buffer_offset(query),
result_size);
} else
/* have to do a new copy for 32bit */
@@ -1055,16 +1478,33 @@ zink_get_query_result_resource(struct pipe_context *pctx,
force_cpu_read(ctx, pquery, result_type, pres, offset);
}
-static uint64_t
-zink_get_timestamp(struct pipe_context *pctx)
+uint64_t
+zink_get_timestamp(struct pipe_screen *pscreen)
{
- struct zink_screen *screen = zink_screen(pctx->screen);
+ struct zink_screen *screen = zink_screen(pscreen);
uint64_t timestamp, deviation;
- assert(screen->info.have_EXT_calibrated_timestamps);
- VkCalibratedTimestampInfoEXT cti = {0};
- cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
- cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;
- VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, &timestamp, &deviation);
+ if (screen->info.have_EXT_calibrated_timestamps) {
+ VkCalibratedTimestampInfoEXT cti = {0};
+ cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
+ cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;
+ VkResult result = VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, &timestamp, &deviation);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkGetCalibratedTimestampsEXT failed (%s)", vk_Result_to_str(result));
+ }
+ } else {
+ zink_screen_lock_context(screen);
+ struct pipe_context *pctx = &screen->copy_context->base;
+ struct pipe_query *pquery = pctx->create_query(pctx, PIPE_QUERY_TIMESTAMP, 0);
+ if (!pquery)
+ return 0;
+ union pipe_query_result result = {0};
+ pctx->begin_query(pctx, pquery);
+ pctx->end_query(pctx, pquery);
+ pctx->get_query_result(pctx, pquery, true, &result);
+ pctx->destroy_query(pctx, pquery);
+ zink_screen_unlock_context(screen);
+ timestamp = result.u64;
+ }
timestamp_to_nanoseconds(screen, &timestamp);
return timestamp;
}
@@ -1084,5 +1524,32 @@ zink_context_query_init(struct pipe_context *pctx)
pctx->get_query_result_resource = zink_get_query_result_resource;
pctx->set_active_query_state = zink_set_active_query_state;
pctx->render_condition = zink_render_condition;
- pctx->get_timestamp = zink_get_timestamp;
+}
+
+int
+zink_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index,
+ struct pipe_driver_query_group_info *info)
+{
+ if (!info)
+ return 1;
+
+ assert(index == 0);
+ info->name = "Zink counters";
+ info->max_active_queries = ARRAY_SIZE(zink_specific_queries);
+ info->num_queries = ARRAY_SIZE(zink_specific_queries);
+
+ return 1;
+}
+
+int
+zink_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+ struct pipe_driver_query_info *info)
+{
+ if (!info)
+ return ARRAY_SIZE(zink_specific_queries);
+
+ assert(index < ARRAY_SIZE(zink_specific_queries));
+ *info = zink_specific_queries[index];
+
+ return 1;
}
diff --git a/src/gallium/drivers/zink/zink_query.h b/src/gallium/drivers/zink/zink_query.h
index 73fd31eeda7..2b96a72c700 100644
--- a/src/gallium/drivers/zink/zink_query.h
+++ b/src/gallium/drivers/zink/zink_query.h
@@ -25,13 +25,9 @@
#define ZINK_QUERY_H
#include <stdbool.h>
+#include <inttypes.h>
+#include "zink_types.h"
-struct zink_batch;
-struct zink_batch_state;
-struct zink_context;
-struct zink_fence;
-struct zink_query;
-struct zink_screen;
#ifdef __cplusplus
extern "C" {
#endif
@@ -43,7 +39,15 @@ void
zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch);
void
-zink_prune_query(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_query *query);
+zink_query_renderpass_suspend(struct zink_context *ctx);
+
+void
+zink_resume_cs_query(struct zink_context *ctx);
+
+void
+zink_prune_query(struct zink_batch_state *bs, struct zink_query *query);
+void
+zink_query_sync(struct zink_context *ctx, struct zink_query *query);
void
zink_query_update_gs_states(struct zink_context *ctx);
@@ -54,8 +58,19 @@ zink_start_conditional_render(struct zink_context *ctx);
void
zink_stop_conditional_render(struct zink_context *ctx);
-bool
-zink_check_conditional_render(struct zink_context *ctx);
+void
+zink_context_destroy_query_pools(struct zink_context *ctx);
+uint64_t
+zink_get_timestamp(struct pipe_screen *pscreen);
+
+int
+zink_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index,
+ struct pipe_driver_query_group_info *info);
+
+int
+zink_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+ struct pipe_driver_query_info *info);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/drivers/zink/zink_render_pass.c b/src/gallium/drivers/zink/zink_render_pass.c
index 132dcd0d278..d2b907ba78b 100644
--- a/src/gallium/drivers/zink/zink_render_pass.c
+++ b/src/gallium/drivers/zink/zink_render_pass.c
@@ -21,63 +21,125 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "zink_context.h"
+#include "zink_clear.h"
+#include "zink_framebuffer.h"
+#include "zink_kopper.h"
+#include "zink_query.h"
#include "zink_render_pass.h"
-
+#include "zink_resource.h"
#include "zink_screen.h"
+#include "zink_surface.h"
#include "util/u_memory.h"
#include "util/u_string.h"
+#include "util/u_blitter.h"
+
+static VkAttachmentLoadOp
+get_rt_loadop(const struct zink_rt_attrib *rt, bool clear)
+{
+ return clear ? VK_ATTACHMENT_LOAD_OP_CLEAR :
+ /* TODO: need replicate EXT */
+ //rt->resolve || rt->invalid ?
+ rt->invalid ?
+ VK_ATTACHMENT_LOAD_OP_DONT_CARE :
+ VK_ATTACHMENT_LOAD_OP_LOAD;
+}
+
+static VkImageLayout
+get_color_rt_layout(const struct zink_rt_attrib *rt)
+{
+ return rt->feedback_loop ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+}
+
+static VkImageLayout
+get_zs_rt_layout(const struct zink_rt_attrib *rt)
+{
+ bool has_clear = rt->clear_color || rt->clear_stencil;
+ if (rt->feedback_loop)
+ return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
+ return rt->needs_write || has_clear ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
+}
static VkRenderPass
-create_render_pass(struct zink_screen *screen, struct zink_render_pass_state *state, struct zink_render_pass_pipeline_state *pstate)
+create_render_pass2(struct zink_screen *screen, struct zink_render_pass_state *state, struct zink_render_pass_pipeline_state *pstate)
{
- VkAttachmentReference color_refs[PIPE_MAX_COLOR_BUFS], zs_ref;
- VkAttachmentReference input_attachments[PIPE_MAX_COLOR_BUFS];
- VkAttachmentDescription attachments[PIPE_MAX_COLOR_BUFS + 1];
+ VkAttachmentReference2 color_refs[PIPE_MAX_COLOR_BUFS], color_resolves[PIPE_MAX_COLOR_BUFS], zs_ref, zs_resolve;
+ VkAttachmentReference2 input_attachments[PIPE_MAX_COLOR_BUFS];
+ VkAttachmentDescription2 attachments[2 * (PIPE_MAX_COLOR_BUFS + 1)];
VkPipelineStageFlags dep_pipeline = 0;
VkAccessFlags dep_access = 0;
unsigned input_count = 0;
+ const unsigned cresolve_offset = state->num_cbufs + state->have_zsbuf;
+ const unsigned zsresolve_offset = cresolve_offset + state->num_cresolves;
pstate->num_attachments = state->num_cbufs;
+ pstate->num_cresolves = state->num_cresolves;
+ pstate->num_zsresolves = state->num_zsresolves;
+ pstate->fbfetch = 0;
+ pstate->msaa_samples = state->msaa_samples;
for (int i = 0; i < state->num_cbufs; i++) {
struct zink_rt_attrib *rt = state->rts + i;
+ attachments[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
+ attachments[i].pNext = NULL;
attachments[i].flags = 0;
pstate->attachments[i].format = attachments[i].format = rt->format;
pstate->attachments[i].samples = attachments[i].samples = rt->samples;
- attachments[i].loadOp = rt->clear_color ? VK_ATTACHMENT_LOAD_OP_CLEAR :
- state->swapchain_init && rt->swapchain ?
- VK_ATTACHMENT_LOAD_OP_DONT_CARE :
- VK_ATTACHMENT_LOAD_OP_LOAD;
+ attachments[i].loadOp = get_rt_loadop(rt, rt->clear_color);
+
+ /* TODO: need replicate EXT */
+ //attachments[i].storeOp = rt->resolve ? VK_ATTACHMENT_STORE_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE;
attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
/* if layout changes are ever handled here, need VkAttachmentSampleLocationsEXT */
- VkImageLayout layout = rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ VkImageLayout layout = get_color_rt_layout(rt);
attachments[i].initialLayout = layout;
attachments[i].finalLayout = layout;
+ color_refs[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2;
+ color_refs[i].pNext = NULL;
color_refs[i].attachment = i;
color_refs[i].layout = layout;
+ color_refs[i].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
dep_pipeline |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- if (rt->fbfetch)
- memcpy(&input_attachments[input_count++], &color_refs[i], sizeof(VkAttachmentReference));
+ if (rt->fbfetch) {
+ memcpy(&input_attachments[input_count++], &color_refs[i], sizeof(VkAttachmentReference2));
+ dep_pipeline |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+ dep_access |= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+ pstate->fbfetch = 1;
+ }
dep_access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
if (attachments[i].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD)
dep_access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
+
+ if (rt->resolve) {
+ memcpy(&attachments[cresolve_offset + i], &attachments[i], sizeof(VkAttachmentDescription2));
+ attachments[cresolve_offset + i].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+ attachments[cresolve_offset + i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ attachments[cresolve_offset + i].samples = 1;
+ memcpy(&color_resolves[i], &color_refs[i], sizeof(VkAttachmentReference2));
+ color_resolves[i].attachment = cresolve_offset + i;
+ if (attachments[cresolve_offset + i].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD)
+ dep_access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
+ }
}
int num_attachments = state->num_cbufs;
if (state->have_zsbuf) {
struct zink_rt_attrib *rt = state->rts + state->num_cbufs;
- bool has_clear = rt->clear_color || rt->clear_stencil;
- VkImageLayout write_layout = rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
- VkImageLayout layout = rt->needs_write || has_clear ? write_layout : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
+ VkImageLayout layout = get_zs_rt_layout(rt);
+ attachments[num_attachments].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
+ attachments[num_attachments].pNext = NULL;
attachments[num_attachments].flags = 0;
pstate->attachments[num_attachments].format = attachments[num_attachments].format = rt->format;
pstate->attachments[num_attachments].samples = attachments[num_attachments].samples = rt->samples;
- attachments[num_attachments].loadOp = rt->clear_color ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD;
+ attachments[num_attachments].loadOp = get_rt_loadop(rt, rt->clear_color);
+ attachments[num_attachments].stencilLoadOp = get_rt_loadop(rt, rt->clear_stencil);
+ /* TODO: need replicate EXT */
+ //attachments[num_attachments].storeOp = rt->resolve ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE;
+ //attachments[num_attachments].stencilStoreOp = rt->resolve ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE;
attachments[num_attachments].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
- attachments[num_attachments].stencilLoadOp = rt->clear_stencil ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD;
attachments[num_attachments].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
/* if layout changes are ever handled here, need VkAttachmentSampleLocationsEXT */
attachments[num_attachments].initialLayout = layout;
@@ -90,36 +152,93 @@ create_render_pass(struct zink_screen *screen, struct zink_render_pass_state *st
attachments[num_attachments].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD)
dep_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
- zs_ref.attachment = num_attachments++;
+ zs_ref.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2;
+ zs_ref.pNext = NULL;
+ zs_ref.attachment = num_attachments;
zs_ref.layout = layout;
+ if (rt->resolve) {
+ memcpy(&attachments[zsresolve_offset], &attachments[num_attachments], sizeof(VkAttachmentDescription2));
+ attachments[zsresolve_offset].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+ attachments[zsresolve_offset].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+ attachments[zsresolve_offset].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ attachments[zsresolve_offset].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
+ attachments[zsresolve_offset].samples = 1;
+ memcpy(&zs_resolve, &zs_ref, sizeof(VkAttachmentReference2));
+ zs_resolve.attachment = zsresolve_offset;
+ if (attachments[zsresolve_offset].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD ||
+ attachments[zsresolve_offset].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD)
+ dep_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
+ }
+ num_attachments++;
pstate->num_attachments++;
}
+ pstate->color_read = (dep_access & VK_ACCESS_COLOR_ATTACHMENT_READ_BIT) > 0;
+ pstate->depth_read = (dep_access & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT) > 0;
+ pstate->depth_write = (dep_access & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT) > 0;
- VkSubpassDependency deps[] = {
- [0] = {VK_SUBPASS_EXTERNAL, 0, dep_pipeline, dep_pipeline, 0, dep_access, VK_DEPENDENCY_BY_REGION_BIT},
- [1] = {0, VK_SUBPASS_EXTERNAL, dep_pipeline, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, dep_access, 0, VK_DEPENDENCY_BY_REGION_BIT}
+ if (!screen->info.have_KHR_synchronization2)
+ dep_pipeline = MAX2(dep_pipeline, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
+
+ VkDependencyFlags flag = screen->info.have_KHR_synchronization2 ? VK_DEPENDENCY_BY_REGION_BIT : 0;
+ VkSubpassDependency2 deps[] = {
+ {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, VK_SUBPASS_EXTERNAL, 0, dep_pipeline, dep_pipeline, 0, dep_access, flag, 0},
+ {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, 0, VK_SUBPASS_EXTERNAL, dep_pipeline, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, dep_access, 0, flag, 0}
+ };
+ VkPipelineStageFlags input_dep = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+ //if (zs_fbfetch) input_dep |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+ VkAccessFlags input_access = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+ //if (zs_fbfetch) input_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
+ VkSubpassDependency2 fbfetch_deps[] = {
+ {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, VK_SUBPASS_EXTERNAL, 0, dep_pipeline, dep_pipeline, 0, dep_access, flag, 0},
+ {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, 0, 0, dep_pipeline, input_dep, dep_access, input_access, flag, 0},
+ {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, 0, VK_SUBPASS_EXTERNAL, dep_pipeline, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, dep_access, 0, flag, 0}
};
- VkSubpassDescription subpass = {0};
+ VkSubpassDescription2 subpass = {0};
+ if (pstate->fbfetch && screen->info.have_EXT_rasterization_order_attachment_access)
+ subpass.flags |= VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT;
+ VkSubpassDescriptionDepthStencilResolve zsresolve;
+ subpass.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2;
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = state->num_cbufs;
subpass.pColorAttachments = color_refs;
subpass.pDepthStencilAttachment = state->have_zsbuf ? &zs_ref : NULL;
subpass.inputAttachmentCount = input_count;
subpass.pInputAttachments = input_attachments;
+ if (state->num_cresolves)
+ subpass.pResolveAttachments = color_resolves;
+ if (state->num_zsresolves) {
+ subpass.pNext = &zsresolve;
+ zsresolve.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE;
+ zsresolve.pNext = NULL;
+ zsresolve.depthResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
+ zsresolve.stencilResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
+ zsresolve.pDepthStencilResolveAttachment = &zs_resolve;
+ } else
+ subpass.pNext = NULL;
+
+ VkMultisampledRenderToSingleSampledInfoEXT msrtss = {
+ VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT,
+ &subpass.pNext,
+ VK_TRUE,
+ state->msaa_samples,
+ };
+ if (state->msaa_samples)
+ subpass.pNext = &msrtss;
- VkRenderPassCreateInfo rpci = {0};
- rpci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
- rpci.attachmentCount = num_attachments;
+ VkRenderPassCreateInfo2 rpci = {0};
+ rpci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2;
+ rpci.attachmentCount = num_attachments + state->num_cresolves + state->num_zsresolves;
rpci.pAttachments = attachments;
rpci.subpassCount = 1;
rpci.pSubpasses = &subpass;
- rpci.dependencyCount = 2;
- rpci.pDependencies = deps;
+ rpci.dependencyCount = input_count ? 3 : 2;
+ rpci.pDependencies = input_count ? fbfetch_deps : deps;
VkRenderPass render_pass;
- if (VKSCR(CreateRenderPass)(screen->dev, &rpci, NULL, &render_pass) != VK_SUCCESS) {
- debug_printf("vkCreateRenderPass failed\n");
+ VkResult result = VKSCR(CreateRenderPass2)(screen->dev, &rpci, NULL, &render_pass);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateRenderPass2 failed (%s)", vk_Result_to_str(result));
return VK_NULL_HANDLE;
}
@@ -135,7 +254,7 @@ zink_create_render_pass(struct zink_screen *screen,
if (!rp)
goto fail;
- rp->render_pass = create_render_pass(screen, state, pstate);
+ rp->render_pass = create_render_pass2(screen, state, pstate);
if (!rp->render_pass)
goto fail;
memcpy(&rp->state, state, sizeof(struct zink_render_pass_state));
@@ -156,26 +275,622 @@ zink_destroy_render_pass(struct zink_screen *screen,
}
VkImageLayout
-zink_render_pass_attachment_get_barrier_info(const struct zink_render_pass *rp, unsigned idx,
+zink_render_pass_attachment_get_barrier_info(const struct zink_rt_attrib *rt, bool color,
VkPipelineStageFlags *pipeline, VkAccessFlags *access)
{
*access = 0;
- assert(idx < rp->state.num_rts);
- const struct zink_rt_attrib *rt = &rp->state.rts[idx];
- if (idx < rp->state.num_cbufs) {
+ if (color) {
*pipeline = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
*access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
- if (!rt->clear_color && (!rp->state.swapchain_init || !rt->swapchain))
+ if (!rt->clear_color && !rt->invalid)
*access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
- return rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ return get_color_rt_layout(rt);
}
- assert(rp->state.have_zsbuf);
*pipeline = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
- if (!rp->state.rts[idx].clear_color && !rp->state.rts[idx].clear_stencil)
+ if (!rt->clear_color && !rt->clear_stencil)
*access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
- if (!rp->state.rts[idx].clear_color && !rp->state.rts[idx].clear_stencil && !rp->state.rts[idx].needs_write)
- return VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
- *access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
- return rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+ if (rt->clear_color || rt->clear_stencil || rt->needs_write)
+ *access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ return get_zs_rt_layout(rt);
+}
+
+VkImageLayout
+zink_tc_renderpass_info_parse(struct zink_context *ctx, const struct tc_renderpass_info *info, unsigned idx, VkPipelineStageFlags *pipeline, VkAccessFlags *access)
+{
+ if (idx < PIPE_MAX_COLOR_BUFS) {
+ *pipeline = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ *access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ if (info->cbuf_load & BITFIELD_BIT(idx))
+ *access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
+ return (ctx->feedback_loops & BITFIELD_BIT(idx)) ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT :
+ (info->cbuf_fbfetch & BITFIELD_BIT(idx)) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ } else {
+ *access = 0;
+ if (info->zsbuf_load || info->zsbuf_read_dsa)
+ *access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
+ if (info->zsbuf_clear | info->zsbuf_clear_partial | info->zsbuf_write_fs | info->zsbuf_write_dsa)
+ *access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ assert(*access);
+ *pipeline = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+ if (ctx->feedback_loops & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS))
+ return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
+ return (info->zsbuf_clear | info->zsbuf_clear_partial | info->zsbuf_write_fs | info->zsbuf_write_dsa) ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
+ }
+}
+
+static size_t
+rp_state_size(const struct zink_render_pass_pipeline_state *pstate)
+{
+ return offsetof(struct zink_render_pass_pipeline_state, attachments) +
+ sizeof(pstate->attachments[0]) * pstate->num_attachments;
+}
+
+static uint32_t
+hash_rp_state(const void *key)
+{
+ const struct zink_render_pass_pipeline_state *s = key;
+ return _mesa_hash_data(key, rp_state_size(s));
+}
+
+static bool
+equals_rp_state(const void *a, const void *b)
+{
+ return !memcmp(a, b, rp_state_size(a));
+}
+
+static uint32_t
+hash_render_pass_state(const void *key)
+{
+ struct zink_render_pass_state* s = (struct zink_render_pass_state*)key;
+ return _mesa_hash_data(key, offsetof(struct zink_render_pass_state, rts) + sizeof(s->rts[0]) * s->num_rts);
+}
+
+static bool
+equals_render_pass_state(const void *a, const void *b)
+{
+ const struct zink_render_pass_state *s_a = a, *s_b = b;
+ if (s_a->num_rts != s_b->num_rts)
+ return false;
+ return memcmp(a, b, offsetof(struct zink_render_pass_state, rts) + sizeof(s_a->rts[0]) * s_a->num_rts) == 0;
+}
+
+void
+zink_init_zs_attachment(struct zink_context *ctx, struct zink_rt_attrib *rt)
+{
+ const struct pipe_framebuffer_state *fb = &ctx->fb_state;
+ struct zink_resource *zsbuf = zink_resource(fb->zsbuf->texture);
+ struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS];
+ struct zink_surface *transient = zink_transient_surface(fb->zsbuf);
+ rt->format = zsbuf->format;
+ rt->samples = MAX3(transient ? transient->base.nr_samples : 0, fb->zsbuf->texture->nr_samples, 1);
+ rt->clear_color = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) &&
+ !zink_fb_clear_first_needs_explicit(fb_clear) &&
+ (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH);
+ rt->clear_stencil = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) &&
+ !zink_fb_clear_first_needs_explicit(fb_clear) &&
+ (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_STENCIL);
+ const uint64_t outputs_written = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ?
+ ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written : 0;
+ bool needs_write_z = (ctx->dsa_state && ctx->dsa_state->hw_state.depth_write) ||
+ outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
+ needs_write_z |= transient || rt->clear_color ||
+ (zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH));
+
+ bool needs_write_s = (ctx->dsa_state && (util_writes_stencil(&ctx->dsa_state->base.stencil[0]) || util_writes_stencil(&ctx->dsa_state->base.stencil[1]))) ||
+ rt->clear_stencil || (outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) ||
+ (zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_STENCIL));
+ rt->needs_write = needs_write_z | needs_write_s;
+ rt->invalid = !zsbuf->valid;
+ rt->feedback_loop = (ctx->feedback_loops & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS)) > 0;
+}
+
+void
+zink_tc_init_zs_attachment(struct zink_context *ctx, const struct tc_renderpass_info *info, struct zink_rt_attrib *rt)
+{
+ const struct pipe_framebuffer_state *fb = &ctx->fb_state;
+ struct zink_resource *zsbuf = zink_resource(fb->zsbuf->texture);
+ struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS];
+ struct zink_surface *transient = zink_transient_surface(fb->zsbuf);
+ rt->format = zsbuf->format;
+ rt->samples = MAX3(transient ? transient->base.nr_samples : 0, fb->zsbuf->texture->nr_samples, 1);
+ rt->clear_color = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) &&
+ !zink_fb_clear_first_needs_explicit(fb_clear) &&
+ (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH);
+ rt->clear_stencil = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) &&
+ !zink_fb_clear_first_needs_explicit(fb_clear) &&
+ (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_STENCIL);
+ rt->needs_write = info->zsbuf_clear | info->zsbuf_clear_partial | info->zsbuf_write_fs | info->zsbuf_write_dsa;
+ rt->invalid = !zsbuf->valid;
+ rt->feedback_loop = (ctx->feedback_loops & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS)) > 0;
+}
+
+void
+zink_init_color_attachment(struct zink_context *ctx, unsigned i, struct zink_rt_attrib *rt)
+{
+ const struct pipe_framebuffer_state *fb = &ctx->fb_state;
+ struct pipe_surface *psurf = fb->cbufs[i];
+ if (psurf) {
+ struct zink_surface *surf = zink_csurface(psurf);
+ struct zink_surface *transient = zink_transient_surface(psurf);
+ rt->format = surf->info.format[0];
+ rt->samples = MAX3(transient ? transient->base.nr_samples : 0, psurf->texture->nr_samples, 1);
+ rt->clear_color = zink_fb_clear_enabled(ctx, i) && !zink_fb_clear_first_needs_explicit(&ctx->fb_clears[i]);
+ rt->invalid = !zink_resource(psurf->texture)->valid;
+ rt->fbfetch = (ctx->fbfetch_outputs & BITFIELD_BIT(i)) > 0;
+ rt->feedback_loop = (ctx->feedback_loops & BITFIELD_BIT(i)) > 0;
+ } else {
+ memset(rt, 0, sizeof(struct zink_rt_attrib));
+ rt->format = VK_FORMAT_R8G8B8A8_UNORM;
+ rt->samples = fb->samples;
+ }
+}
+
+void
+zink_tc_init_color_attachment(struct zink_context *ctx, const struct tc_renderpass_info *info, unsigned i, struct zink_rt_attrib *rt)
+{
+ const struct pipe_framebuffer_state *fb = &ctx->fb_state;
+ struct pipe_surface *psurf = fb->cbufs[i];
+ if (psurf) {
+ struct zink_surface *surf = zink_csurface(psurf);
+ struct zink_surface *transient = zink_transient_surface(psurf);
+ rt->format = surf->info.format[0];
+ rt->samples = MAX3(transient ? transient->base.nr_samples : 0, psurf->texture->nr_samples, 1);
+ rt->clear_color = zink_fb_clear_enabled(ctx, i) && !zink_fb_clear_first_needs_explicit(&ctx->fb_clears[i]);
+ rt->invalid = !zink_resource(psurf->texture)->valid;
+ rt->fbfetch = (info->cbuf_fbfetch & BITFIELD_BIT(i)) > 0;
+ rt->feedback_loop = (ctx->feedback_loops & BITFIELD_BIT(i)) > 0;
+ } else {
+ memset(rt, 0, sizeof(struct zink_rt_attrib));
+ rt->format = VK_FORMAT_R8G8B8A8_UNORM;
+ rt->samples = fb->samples;
+ }
+}
+
+static struct zink_render_pass *
+get_render_pass(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ const struct pipe_framebuffer_state *fb = &ctx->fb_state;
+ struct zink_render_pass_state state = {0};
+ uint32_t clears = 0;
+ bool have_zsbuf = fb->zsbuf && zink_is_zsbuf_used(ctx);
+ bool use_tc_info = !ctx->blitting && ctx->track_renderpasses;
+ state.samples = fb->samples > 0;
+
+ for (int i = 0; i < fb->nr_cbufs; i++) {
+ if (use_tc_info)
+ zink_tc_init_color_attachment(ctx, &ctx->dynamic_fb.tc_info, i, &state.rts[i]);
+ else
+ zink_init_color_attachment(ctx, i, &state.rts[i]);
+ struct pipe_surface *surf = fb->cbufs[i];
+ if (surf) {
+ clears |= !!state.rts[i].clear_color ? PIPE_CLEAR_COLOR0 << i : 0;
+ struct zink_surface *transient = zink_transient_surface(surf);
+ if (transient) {
+ state.num_cresolves++;
+ state.rts[i].resolve = true;
+ if (!state.rts[i].clear_color)
+ state.msaa_expand_mask |= BITFIELD_BIT(i);
+ } else {
+ state.rts[i].resolve = false;
+ }
+ }
+ state.num_rts++;
+ }
+ state.msaa_samples = screen->info.have_EXT_multisampled_render_to_single_sampled && ctx->transient_attachments ?
+ ctx->gfx_pipeline_state.rast_samples + 1 : 0;
+ state.num_cbufs = fb->nr_cbufs;
+ assert(!state.num_cresolves || state.num_cbufs == state.num_cresolves);
+
+ if (have_zsbuf) {
+ if (use_tc_info)
+ zink_tc_init_zs_attachment(ctx, &ctx->dynamic_fb.tc_info, &state.rts[fb->nr_cbufs]);
+ else
+ zink_init_zs_attachment(ctx, &state.rts[fb->nr_cbufs]);
+ struct zink_surface *transient = zink_transient_surface(fb->zsbuf);
+ if (transient) {
+ state.num_zsresolves = 1;
+ state.rts[fb->nr_cbufs].resolve = true;
+ }
+ if (state.rts[fb->nr_cbufs].clear_color)
+ clears |= PIPE_CLEAR_DEPTH;
+ if (state.rts[fb->nr_cbufs].clear_stencil)
+ clears |= PIPE_CLEAR_STENCIL;
+ state.num_rts++;
+ }
+ state.have_zsbuf = have_zsbuf;
+ assert(clears == ctx->rp_clears_enabled);
+ state.clears = clears;
+ uint32_t hash = hash_render_pass_state(&state);
+ struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->render_pass_cache, hash,
+ &state);
+ struct zink_render_pass *rp;
+ if (entry) {
+ rp = entry->data;
+ assert(rp->state.clears == clears);
+ } else {
+ struct zink_render_pass_pipeline_state pstate;
+ pstate.samples = state.samples;
+ rp = zink_create_render_pass(screen, &state, &pstate);
+ if (!_mesa_hash_table_insert_pre_hashed(ctx->render_pass_cache, hash, &rp->state, rp))
+ return NULL;
+ bool found = false;
+ struct set_entry *cache_entry = _mesa_set_search_or_add(&ctx->render_pass_state_cache, &pstate, &found);
+ struct zink_render_pass_pipeline_state *ppstate;
+ if (!found) {
+ cache_entry->key = ralloc(ctx, struct zink_render_pass_pipeline_state);
+ ppstate = (void*)cache_entry->key;
+ memcpy(ppstate, &pstate, rp_state_size(&pstate));
+ ppstate->id = ctx->render_pass_state_cache.entries;
+ }
+ ppstate = (void*)cache_entry->key;
+ rp->pipeline_state = ppstate->id;
+ }
+ return rp;
+}
+
+/* check whether the active rp needs to be split to replace it with rp2 */
+static bool
+rp_must_change(const struct zink_render_pass *rp, const struct zink_render_pass *rp2, bool in_rp)
+{
+ if (rp == rp2)
+ return false;
+ unsigned num_cbufs = rp->state.num_cbufs;
+ if (rp->pipeline_state != rp2->pipeline_state) {
+ /* if any core attrib bits are different, must split */
+ if (rp->state.val != rp2->state.val)
+ return true;
+ for (unsigned i = 0; i < num_cbufs; i++) {
+ const struct zink_rt_attrib *rt = &rp->state.rts[i];
+ const struct zink_rt_attrib *rt2 = &rp2->state.rts[i];
+ /* if layout changed, must split */
+ if (get_color_rt_layout(rt) != get_color_rt_layout(rt2))
+ return true;
+ }
+ }
+ if (rp->state.have_zsbuf) {
+ const struct zink_rt_attrib *rt = &rp->state.rts[num_cbufs];
+ const struct zink_rt_attrib *rt2 = &rp2->state.rts[num_cbufs];
+ /* if zs layout has gone from read-only to read-write, split renderpass */
+ if (get_zs_rt_layout(rt) == VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL &&
+ get_zs_rt_layout(rt2) == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL)
+ return true;
+ }
+ /* any other change doesn't require splitting a renderpass */
+ return !in_rp;
+}
+
+static void
+setup_framebuffer(struct zink_context *ctx)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_render_pass *rp = ctx->gfx_pipeline_state.render_pass;
+
+ zink_update_vk_sample_locations(ctx);
+
+ if (ctx->rp_changed || ctx->rp_layout_changed || (!ctx->batch.in_rp && ctx->rp_loadop_changed)) {
+ /* 0. ensure no stale pointers are set */
+ ctx->gfx_pipeline_state.next_render_pass = NULL;
+ /* 1. calc new rp */
+ rp = get_render_pass(ctx);
+ /* 2. evaluate whether to use new rp */
+ if (ctx->gfx_pipeline_state.render_pass) {
+ /* 2a. if previous rp exists, check whether new rp MUST be used */
+ bool must_change = rp_must_change(ctx->gfx_pipeline_state.render_pass, rp, ctx->batch.in_rp);
+ ctx->fb_changed |= must_change;
+ if (!must_change)
+ /* 2b. if non-essential attribs have changed, store for later use and continue on */
+ ctx->gfx_pipeline_state.next_render_pass = rp;
+ } else {
+ /* 2c. no previous rp in use, use this one */
+ ctx->fb_changed = true;
+ }
+ } else if (ctx->gfx_pipeline_state.next_render_pass) {
+ /* previous rp was calculated but deferred: use it */
+ assert(!ctx->batch.in_rp);
+ rp = ctx->gfx_pipeline_state.next_render_pass;
+ ctx->gfx_pipeline_state.next_render_pass = NULL;
+ ctx->fb_changed = true;
+ }
+ if (rp->pipeline_state != ctx->gfx_pipeline_state.rp_state) {
+ ctx->gfx_pipeline_state.rp_state = rp->pipeline_state;
+ ctx->gfx_pipeline_state.dirty = true;
+ }
+
+ ctx->rp_loadop_changed = false;
+ ctx->rp_layout_changed = false;
+ ctx->rp_changed = false;
+
+ if (zink_render_update_swapchain(ctx))
+ zink_render_fixup_swapchain(ctx);
+
+ if (!ctx->fb_changed)
+ return;
+
+ zink_update_framebuffer_state(ctx);
+ zink_init_framebuffer(screen, ctx->framebuffer, rp);
+ ctx->fb_changed = false;
+ ctx->gfx_pipeline_state.render_pass = rp;
+ zink_batch_no_rp(ctx);
+}
+
+static bool
+prep_fb_attachments(struct zink_context *ctx, VkImageView *att)
+{
+ bool have_zsbuf = ctx->fb_state.zsbuf && zink_is_zsbuf_used(ctx);
+ const unsigned cresolve_offset = ctx->fb_state.nr_cbufs + !!have_zsbuf;
+ unsigned num_resolves = 0;
+ for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]);
+ struct zink_surface *transient = zink_transient_surface(ctx->fb_state.cbufs[i]);
+ if (transient) {
+ att[i] = zink_prep_fb_attachment(ctx, transient, i);
+ att[i + cresolve_offset] = zink_prep_fb_attachment(ctx, surf, i);
+ num_resolves++;
+ } else {
+ att[i] = zink_prep_fb_attachment(ctx, surf, i);
+ if (!att[i])
+ /* dead swapchain */
+ return false;
+ }
+ }
+ if (have_zsbuf) {
+ struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf);
+ struct zink_surface *transient = zink_transient_surface(ctx->fb_state.zsbuf);
+ if (transient) {
+ att[ctx->fb_state.nr_cbufs] = zink_prep_fb_attachment(ctx, transient, ctx->fb_state.nr_cbufs);
+ att[cresolve_offset + num_resolves] = zink_prep_fb_attachment(ctx, surf, ctx->fb_state.nr_cbufs);
+ } else {
+ att[ctx->fb_state.nr_cbufs] = zink_prep_fb_attachment(ctx, surf, ctx->fb_state.nr_cbufs);
+ }
+ }
+ return true;
+}
+
+static unsigned
+begin_render_pass(struct zink_context *ctx)
+{
+ struct zink_batch *batch = &ctx->batch;
+ struct pipe_framebuffer_state *fb_state = &ctx->fb_state;
+
+ VkRenderPassBeginInfo rpbi = {0};
+ rpbi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
+ rpbi.renderPass = ctx->gfx_pipeline_state.render_pass->render_pass;
+ rpbi.renderArea.offset.x = 0;
+ rpbi.renderArea.offset.y = 0;
+ rpbi.renderArea.extent.width = fb_state->width;
+ rpbi.renderArea.extent.height = fb_state->height;
+
+ if (ctx->fb_state.cbufs[0]) {
+ struct zink_resource *res = zink_resource(ctx->fb_state.cbufs[0]->texture);
+ if (zink_is_swapchain(res)) {
+ if (res->use_damage)
+ rpbi.renderArea = res->damage;
+ }
+ }
+
+ VkClearValue clears[PIPE_MAX_COLOR_BUFS + 1] = {0};
+ unsigned clear_buffers = 0;
+ uint32_t clear_validate = 0;
+ for (int i = 0; i < fb_state->nr_cbufs; i++) {
+ /* these are no-ops */
+ if (!fb_state->cbufs[i] || !zink_fb_clear_enabled(ctx, i))
+ continue;
+ /* these need actual clear calls inside the rp */
+ struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(&ctx->fb_clears[i], 0);
+ if (zink_fb_clear_needs_explicit(&ctx->fb_clears[i])) {
+ clear_buffers |= (PIPE_CLEAR_COLOR0 << i);
+ if (zink_fb_clear_count(&ctx->fb_clears[i]) < 2 ||
+ zink_fb_clear_element_needs_explicit(clear))
+ continue;
+ }
+ /* we now know there's one clear that can be done here */
+ memcpy(&clears[i].color, &clear->color, sizeof(float) * 4);
+ rpbi.clearValueCount = i + 1;
+ clear_validate |= PIPE_CLEAR_COLOR0 << i;
+ assert(ctx->framebuffer->rp->state.clears);
+ }
+ if (fb_state->zsbuf && zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS)) {
+ struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS];
+ struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0);
+ if (!zink_fb_clear_element_needs_explicit(clear)) {
+ clears[fb_state->nr_cbufs].depthStencil.depth = clear->zs.depth;
+ clears[fb_state->nr_cbufs].depthStencil.stencil = clear->zs.stencil;
+ rpbi.clearValueCount = fb_state->nr_cbufs + 1;
+ clear_validate |= clear->zs.bits;
+ assert(ctx->framebuffer->rp->state.clears);
+ }
+ if (zink_fb_clear_needs_explicit(fb_clear)) {
+ for (int j = !zink_fb_clear_element_needs_explicit(clear);
+ (clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL && j < zink_fb_clear_count(fb_clear);
+ j++)
+ clear_buffers |= zink_fb_clear_element(fb_clear, j)->zs.bits;
+ }
+ }
+ assert(clear_validate == ctx->framebuffer->rp->state.clears);
+ rpbi.pClearValues = &clears[0];
+ rpbi.framebuffer = ctx->framebuffer->fb;
+
+ assert(ctx->gfx_pipeline_state.render_pass && ctx->framebuffer);
+
+ VkRenderPassAttachmentBeginInfo infos;
+ VkImageView att[2 * (PIPE_MAX_COLOR_BUFS + 1)];
+ infos.sType = VK_STRUCTURE_TYPE_RENDER_PASS_ATTACHMENT_BEGIN_INFO;
+ infos.pNext = NULL;
+ infos.attachmentCount = ctx->framebuffer->state.num_attachments;
+ infos.pAttachments = att;
+ if (!prep_fb_attachments(ctx, att))
+ return 0;
+ ctx->zsbuf_unused = !zink_is_zsbuf_used(ctx);
+ /* this can be set if fbfetch is activated */
+ ctx->rp_changed = false;
+#ifndef NDEBUG
+ bool zsbuf_used = ctx->fb_state.zsbuf && zink_is_zsbuf_used(ctx);
+ const unsigned cresolve_offset = ctx->fb_state.nr_cbufs + !!zsbuf_used;
+ unsigned num_cresolves = 0;
+ for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ if (ctx->fb_state.cbufs[i]) {
+ struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]);
+ struct zink_surface *transient = zink_transient_surface(ctx->fb_state.cbufs[i]);
+ if (surf->base.format == ctx->fb_state.cbufs[i]->format) {
+ if (transient) {
+ num_cresolves++;
+ assert(zink_resource(transient->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[i].usage);
+ assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[cresolve_offset].usage);
+ } else {
+ assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[i].usage);
+ }
+ }
+ }
+ }
+ if (ctx->gfx_pipeline_state.render_pass->state.have_zsbuf) {
+ struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf);
+ struct zink_surface *transient = zink_transient_surface(ctx->fb_state.zsbuf);
+ if (transient) {
+ assert(zink_resource(transient->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].usage);
+ assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[cresolve_offset + num_cresolves].usage);
+ } else {
+ assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].usage);
+ }
+ }
+#endif
+ rpbi.pNext = &infos;
+
+ VKCTX(CmdBeginRenderPass)(batch->state->cmdbuf, &rpbi, VK_SUBPASS_CONTENTS_INLINE);
+ batch->in_rp = true;
+ return clear_buffers;
+}
+
+unsigned
+zink_begin_render_pass(struct zink_context *ctx)
+{
+ setup_framebuffer(ctx);
+ if (ctx->batch.in_rp)
+ return 0;
+
+ if (ctx->framebuffer->rp->state.msaa_expand_mask) {
+ uint32_t rp_state = ctx->gfx_pipeline_state.rp_state;
+ struct zink_render_pass *rp = ctx->gfx_pipeline_state.render_pass;
+ struct zink_framebuffer *fb = ctx->framebuffer;
+ bool blitting = ctx->blitting;
+
+ u_foreach_bit(i, ctx->framebuffer->rp->state.msaa_expand_mask) {
+ struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)ctx->fb_state.cbufs[i];
+ /* skip replicate blit if the image will be full-cleared */
+ if ((i == PIPE_MAX_COLOR_BUFS && (ctx->rp_clears_enabled & PIPE_CLEAR_DEPTHSTENCIL)) ||
+ (ctx->rp_clears_enabled >> 2) & BITFIELD_BIT(i)) {
+ csurf->transient_init |= zink_fb_clear_full_exists(ctx, i);
+ }
+ if (csurf->transient_init)
+ continue;
+ struct pipe_surface *dst_view = (struct pipe_surface*)csurf->transient;
+ assert(dst_view);
+ struct pipe_sampler_view src_templ, *src_view;
+ struct pipe_resource *src = ctx->fb_state.cbufs[i]->texture;
+ struct pipe_box dstbox;
+
+ u_box_3d(0, 0, 0, ctx->fb_state.width, ctx->fb_state.height,
+ 1 + dst_view->u.tex.last_layer - dst_view->u.tex.first_layer, &dstbox);
+
+ util_blitter_default_src_texture(ctx->blitter, &src_templ, src, ctx->fb_state.cbufs[i]->u.tex.level);
+ src_view = ctx->base.create_sampler_view(&ctx->base, src, &src_templ);
+
+ zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES);
+ ctx->blitting = false;
+ zink_blit_barriers(ctx, zink_resource(src), zink_resource(dst_view->texture), true);
+ ctx->blitting = true;
+ unsigned clear_mask = i == PIPE_MAX_COLOR_BUFS ?
+ (BITFIELD_MASK(PIPE_MAX_COLOR_BUFS) << 2) :
+ (PIPE_CLEAR_DEPTHSTENCIL | ((BITFIELD_MASK(PIPE_MAX_COLOR_BUFS) & ~BITFIELD_BIT(i)) << 2));
+ unsigned clears_enabled = ctx->clears_enabled & clear_mask;
+ unsigned rp_clears_enabled = ctx->rp_clears_enabled & clear_mask;
+ ctx->clears_enabled &= ~clear_mask;
+ ctx->rp_clears_enabled &= ~clear_mask;
+ util_blitter_blit_generic(ctx->blitter, dst_view, &dstbox,
+ src_view, &dstbox, ctx->fb_state.width, ctx->fb_state.height,
+ PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
+ false, false, 0);
+ ctx->clears_enabled = clears_enabled;
+ ctx->rp_clears_enabled = rp_clears_enabled;
+ ctx->blitting = false;
+ if (blitting) {
+ zink_blit_barriers(ctx, NULL, zink_resource(dst_view->texture), true);
+ zink_blit_barriers(ctx, NULL, zink_resource(src), true);
+ }
+ ctx->blitting = blitting;
+ pipe_sampler_view_reference(&src_view, NULL);
+ csurf->transient_init = true;
+ }
+ ctx->rp_layout_changed = ctx->rp_loadop_changed = false;
+ ctx->fb_changed = ctx->rp_changed = false;
+ ctx->gfx_pipeline_state.rp_state = rp_state;
+ ctx->gfx_pipeline_state.render_pass = rp;
+ /* manually re-set fb: depth buffer may have been eliminated */
+ ctx->framebuffer = fb;
+ ctx->framebuffer->rp = rp;
+ }
+ assert(ctx->gfx_pipeline_state.render_pass);
+ return begin_render_pass(ctx);
+}
+
+void
+zink_end_render_pass(struct zink_context *ctx)
+{
+ if (ctx->batch.in_rp) {
+ VKCTX(CmdEndRenderPass)(ctx->batch.state->cmdbuf);
+
+ for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)ctx->fb_state.cbufs[i];
+ if (csurf)
+ csurf->transient_init = true;
+ }
+ }
+ ctx->batch.in_rp = false;
+}
+
+bool
+zink_init_render_pass(struct zink_context *ctx)
+{
+ _mesa_set_init(&ctx->render_pass_state_cache, ctx, hash_rp_state, equals_rp_state);
+ ctx->render_pass_cache = _mesa_hash_table_create(NULL,
+ hash_render_pass_state,
+ equals_render_pass_state);
+ return !!ctx->render_pass_cache;
+}
+
+void
+zink_render_fixup_swapchain(struct zink_context *ctx)
+{
+ if ((ctx->swapchain_size.width || ctx->swapchain_size.height)) {
+ unsigned old_w = ctx->fb_state.width;
+ unsigned old_h = ctx->fb_state.height;
+ ctx->fb_state.width = ctx->swapchain_size.width;
+ ctx->fb_state.height = ctx->swapchain_size.height;
+ ctx->dynamic_fb.info.renderArea.extent.width = MIN2(ctx->dynamic_fb.info.renderArea.extent.width, ctx->fb_state.width);
+ ctx->dynamic_fb.info.renderArea.extent.height = MIN2(ctx->dynamic_fb.info.renderArea.extent.height, ctx->fb_state.height);
+ zink_kopper_fixup_depth_buffer(ctx);
+ if (ctx->fb_state.width != old_w || ctx->fb_state.height != old_h)
+ ctx->scissor_changed = true;
+ if (ctx->framebuffer)
+ zink_update_framebuffer_state(ctx);
+ ctx->swapchain_size.width = ctx->swapchain_size.height = 0;
+ }
+}
+
+bool
+zink_render_update_swapchain(struct zink_context *ctx)
+{
+ bool has_swapchain = false;
+ for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+ if (!ctx->fb_state.cbufs[i])
+ continue;
+ struct zink_resource *res = zink_resource(ctx->fb_state.cbufs[i]->texture);
+ if (zink_is_swapchain(res)) {
+ has_swapchain = true;
+ if (zink_kopper_acquire(ctx, res, UINT64_MAX))
+ zink_surface_swapchain_update(ctx, zink_csurface(ctx->fb_state.cbufs[i]));
+ }
+ }
+ return has_swapchain;
}
diff --git a/src/gallium/drivers/zink/zink_render_pass.h b/src/gallium/drivers/zink/zink_render_pass.h
index 38efbc6a5b7..3d5bd417ab1 100644
--- a/src/gallium/drivers/zink/zink_render_pass.h
+++ b/src/gallium/drivers/zink/zink_render_pass.h
@@ -24,52 +24,7 @@
#ifndef ZINK_RENDERPASS_H
#define ZINK_RENDERPASS_H
-#include <vulkan/vulkan.h>
-
-#include "pipe/p_state.h"
-#include "util/u_inlines.h"
-
-struct zink_screen;
-
-struct zink_rt_attrib {
- VkFormat format;
- VkSampleCountFlagBits samples;
- bool clear_color;
- bool clear_stencil;
- bool fbfetch;
- union {
- bool swapchain;
- bool needs_write;
- };
-};
-
-struct zink_render_pass_state {
- uint8_t num_cbufs : 4; /* PIPE_MAX_COLOR_BUFS = 8 */
- uint8_t have_zsbuf : 1;
- bool samples; //for fs samplemask
- bool swapchain_init;
- struct zink_rt_attrib rts[PIPE_MAX_COLOR_BUFS + 1];
- unsigned num_rts;
- uint32_t clears; //for extra verification and update flagging
-};
-
-struct zink_pipeline_rt {
- VkFormat format;
- VkSampleCountFlagBits samples;
-};
-
-struct zink_render_pass_pipeline_state {
- uint32_t num_attachments:31;
- bool samples:1; //for fs samplemask
- struct zink_pipeline_rt attachments[PIPE_MAX_COLOR_BUFS + 1];
- unsigned id;
-};
-
-struct zink_render_pass {
- VkRenderPass render_pass;
- struct zink_render_pass_state state;
- unsigned pipeline_state;
-};
+#include "zink_types.h"
struct zink_render_pass *
zink_create_render_pass(struct zink_screen *screen,
@@ -80,6 +35,28 @@ void
zink_destroy_render_pass(struct zink_screen *screen,
struct zink_render_pass *rp);
+
+unsigned
+zink_begin_render_pass(struct zink_context *ctx);
+void
+zink_end_render_pass(struct zink_context *ctx);
+
VkImageLayout
-zink_render_pass_attachment_get_barrier_info(const struct zink_render_pass *rp, unsigned idx, VkPipelineStageFlags *pipeline, VkAccessFlags *access);
+zink_render_pass_attachment_get_barrier_info(const struct zink_rt_attrib *rt, bool color, VkPipelineStageFlags *pipeline, VkAccessFlags *access);
+VkImageLayout
+zink_tc_renderpass_info_parse(struct zink_context *ctx, const struct tc_renderpass_info *info, unsigned idx, VkPipelineStageFlags *pipeline, VkAccessFlags *access);
+bool
+zink_init_render_pass(struct zink_context *ctx);
+bool
+zink_render_update_swapchain(struct zink_context *ctx);
+void
+zink_render_fixup_swapchain(struct zink_context *ctx);
+void
+zink_init_zs_attachment(struct zink_context *ctx, struct zink_rt_attrib *rt);
+void
+zink_init_color_attachment(struct zink_context *ctx, unsigned i, struct zink_rt_attrib *rt);
+void
+zink_tc_init_zs_attachment(struct zink_context *ctx, const struct tc_renderpass_info *info, struct zink_rt_attrib *rt);
+void
+zink_tc_init_color_attachment(struct zink_context *ctx, const struct tc_renderpass_info *info, unsigned i, struct zink_rt_attrib *rt);
#endif
diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c
index 81cd735a8d6..16b3b0413c6 100644
--- a/src/gallium/drivers/zink/zink_resource.c
+++ b/src/gallium/drivers/zink/zink_resource.c
@@ -24,17 +24,19 @@
#include "zink_resource.h"
#include "zink_batch.h"
+#include "zink_clear.h"
#include "zink_context.h"
#include "zink_fence.h"
+#include "zink_format.h"
#include "zink_program.h"
#include "zink_screen.h"
+#include "zink_kopper.h"
#ifdef VK_USE_PLATFORM_METAL_EXT
#include "QuartzCore/CAMetalLayer.h"
#endif
-#include "vulkan/wsi/wsi_common.h"
-#include "util/slab.h"
+#include "vk_format.h"
#include "util/u_blitter.h"
#include "util/u_debug.h"
#include "util/format/u_format.h"
@@ -43,14 +45,13 @@
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#include "util/os_file.h"
-#include "frontend/sw_winsys.h"
+#include "frontend/winsys_handle.h"
-#ifndef _WIN32
+#if !defined(__APPLE__)
#define ZINK_USE_DMABUF
#endif
-#ifdef ZINK_USE_DMABUF
-#include <xf86drm.h>
+#if defined(ZINK_USE_DMABUF) && !defined(_WIN32)
#include "drm-uapi/drm_fourcc.h"
#else
/* these won't actually be used */
@@ -58,17 +59,126 @@
#define DRM_FORMAT_MOD_LINEAR 0
#endif
+#ifdef __APPLE__
+#include "MoltenVK/mvk_vulkan.h"
+// Source of MVK_VERSION
+#include "MoltenVK/mvk_config.h"
+#endif /* __APPLE__ */
+
+#define ZINK_EXTERNAL_MEMORY_HANDLE 999
+
+
+
+struct zink_debug_mem_entry {
+ uint32_t count;
+ uint64_t size;
+ const char *name;
+};
+
+static const char *
+zink_debug_mem_add(struct zink_screen *screen, uint64_t size, const char *name)
+{
+ assert(name);
+
+ simple_mtx_lock(&screen->debug_mem_lock);
+ struct hash_entry *entry = _mesa_hash_table_search(screen->debug_mem_sizes, name);
+ struct zink_debug_mem_entry *debug_bos;
+
+ if (!entry) {
+ debug_bos = calloc(1, sizeof(struct zink_debug_mem_entry));
+ debug_bos->name = strdup(name);
+ _mesa_hash_table_insert(screen->debug_mem_sizes, debug_bos->name, debug_bos);
+ } else {
+ debug_bos = (struct zink_debug_mem_entry *) entry->data;
+ }
+
+ debug_bos->count++;
+ debug_bos->size += align(size, 4096);
+ simple_mtx_unlock(&screen->debug_mem_lock);
+
+ return debug_bos->name;
+}
+
+static void
+zink_debug_mem_del(struct zink_screen *screen, struct zink_bo *bo)
+{
+ simple_mtx_lock(&screen->debug_mem_lock);
+ struct hash_entry *entry = _mesa_hash_table_search(screen->debug_mem_sizes, bo->name);
+ /* If we're finishing the BO, it should have been added already */
+ assert(entry);
+
+ struct zink_debug_mem_entry *debug_bos = entry->data;
+ debug_bos->count--;
+ debug_bos->size -= align(zink_bo_get_size(bo), 4096);
+ if (!debug_bos->count) {
+ _mesa_hash_table_remove(screen->debug_mem_sizes, entry);
+ free((void*)debug_bos->name);
+ free(debug_bos);
+ }
+ simple_mtx_unlock(&screen->debug_mem_lock);
+}
+
+static int
+debug_bos_count_compare(const void *in_a, const void *in_b)
+{
+ struct zink_debug_mem_entry *a = *(struct zink_debug_mem_entry **)in_a;
+ struct zink_debug_mem_entry *b = *(struct zink_debug_mem_entry **)in_b;
+ return a->count - b->count;
+}
+
+void
+zink_debug_mem_print_stats(struct zink_screen *screen)
+{
+ simple_mtx_lock(&screen->debug_mem_lock);
+
+ /* Put the HT's sizes data in an array so we can sort by number of allocations. */
+ struct util_dynarray dyn;
+ util_dynarray_init(&dyn, NULL);
+
+ uint32_t size = 0;
+ uint32_t count = 0;
+ hash_table_foreach(screen->debug_mem_sizes, entry)
+ {
+ struct zink_debug_mem_entry *debug_bos = entry->data;
+ util_dynarray_append(&dyn, struct zink_debug_mem_entry *, debug_bos);
+ size += debug_bos->size / 1024;
+ count += debug_bos->count;
+ }
+
+ qsort(dyn.data,
+ util_dynarray_num_elements(&dyn, struct zink_debug_mem_entry *),
+ sizeof(struct zink_debug_mem_entryos_entry *), debug_bos_count_compare);
+
+ util_dynarray_foreach(&dyn, struct zink_debug_mem_entry *, entryp)
+ {
+ struct zink_debug_mem_entry *debug_bos = *entryp;
+ mesa_logi("%30s: %4d bos, %lld kb\n", debug_bos->name, debug_bos->count,
+ (long long) (debug_bos->size / 1024));
+ }
+
+ mesa_logi("submitted %d bos (%d MB)\n", count, DIV_ROUND_UP(size, 1024));
+
+ util_dynarray_fini(&dyn);
+
+ simple_mtx_unlock(&screen->debug_mem_lock);
+}
static bool
equals_ivci(const void *a, const void *b)
{
- return memcmp(a, b, sizeof(VkImageViewCreateInfo)) == 0;
+ const uint8_t *pa = a;
+ const uint8_t *pb = b;
+ size_t offset = offsetof(VkImageViewCreateInfo, flags);
+ return memcmp(pa + offset, pb + offset, sizeof(VkImageViewCreateInfo) - offset) == 0;
}
static bool
equals_bvci(const void *a, const void *b)
{
- return memcmp(a, b, sizeof(VkBufferViewCreateInfo)) == 0;
+ const uint8_t *pa = a;
+ const uint8_t *pb = b;
+ size_t offset = offsetof(VkBufferViewCreateInfo, flags);
+ return memcmp(pa + offset, pb + offset, sizeof(VkBufferViewCreateInfo) - offset) == 0;
}
static void
@@ -86,16 +196,35 @@ void
zink_destroy_resource_object(struct zink_screen *screen, struct zink_resource_object *obj)
{
if (obj->is_buffer) {
- util_dynarray_foreach(&obj->tmp, VkBuffer, buffer)
- VKSCR(DestroyBuffer)(screen->dev, *buffer, NULL);
- VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL);
+ while (util_dynarray_contains(&obj->views, VkBufferView))
+ VKSCR(DestroyBufferView)(screen->dev, util_dynarray_pop(&obj->views, VkBufferView), NULL);
} else {
+ while (util_dynarray_contains(&obj->views, VkImageView))
+ VKSCR(DestroyImageView)(screen->dev, util_dynarray_pop(&obj->views, VkImageView), NULL);
+ }
+ if (!obj->dt && zink_debug & ZINK_DEBUG_MEM)
+ zink_debug_mem_del(screen, obj->bo);
+ util_dynarray_fini(&obj->views);
+ for (unsigned i = 0; i < ARRAY_SIZE(obj->copies); i++)
+ util_dynarray_fini(&obj->copies[i]);
+ if (obj->is_buffer) {
+ VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL);
+ VKSCR(DestroyBuffer)(screen->dev, obj->storage_buffer, NULL);
+ } else if (obj->dt) {
+ zink_kopper_displaytarget_destroy(screen, obj->dt);
+ } else if (!obj->is_aux) {
VKSCR(DestroyImage)(screen->dev, obj->image, NULL);
+ } else {
+#if defined(ZINK_USE_DMABUF) && !defined(_WIN32)
+ close(obj->handle);
+#endif
}
- util_dynarray_fini(&obj->tmp);
- zink_descriptor_set_refs_clear(&obj->desc_set_refs, obj);
- zink_bo_unref(screen, obj->bo);
+ simple_mtx_destroy(&obj->view_lock);
+ if (obj->dt) {
+ FREE(obj->bo); //this is a dummy struct
+ } else
+ zink_bo_unref(screen, obj->bo);
FREE(obj);
}
@@ -108,15 +237,19 @@ zink_resource_destroy(struct pipe_screen *pscreen,
if (pres->target == PIPE_BUFFER) {
util_range_destroy(&res->valid_buffer_range);
util_idalloc_mt_free(&screen->buffer_ids, res->base.buffer_id_unique);
+ assert(!_mesa_hash_table_num_entries(&res->bufferview_cache));
simple_mtx_destroy(&res->bufferview_mtx);
- } else
+ ralloc_free(res->bufferview_cache.table);
+ } else {
+ assert(!_mesa_hash_table_num_entries(&res->surface_cache));
simple_mtx_destroy(&res->surface_mtx);
+ ralloc_free(res->surface_cache.table);
+ }
/* no need to do anything for the caches, these objects own the resource lifetimes */
zink_resource_object_reference(screen, &res->obj, NULL);
- zink_resource_object_reference(screen, &res->scanout_obj, NULL);
threaded_resource_deinit(pres);
- FREE(res);
+ FREE_CL(res);
}
static VkImageAspectFlags
@@ -147,39 +280,71 @@ create_bci(struct zink_screen *screen, const struct pipe_resource *templ, unsign
bci.flags = 0;
assert(bci.size > 0);
- bci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
- VK_BUFFER_USAGE_TRANSFER_DST_BIT |
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
-
- bci.usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
- VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT |
- VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
- VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
- VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
- VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
+ if (bind & ZINK_BIND_DESCRIPTOR) {
+ /* gallium sizes are all uint32_t, while the total size of this buffer may exceed that limit */
+ bci.usage = 0;
+ bci.usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
+ VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT;
+ } else {
+ bci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+
+ bci.usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT |
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
+ VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
+ VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
+ }
+ if (screen->info.have_KHR_buffer_device_address)
+ bci.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
if (bind & PIPE_BIND_SHADER_IMAGE)
bci.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
+ if (bind & PIPE_BIND_QUERY_BUFFER)
+ bci.usage |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT;
+
if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
- bci.flags |= VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
+ bci.flags |= VK_BUFFER_CREATE_SPARSE_BINDING_BIT | VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
return bci;
}
-static bool
+typedef enum {
+ USAGE_FAIL_NONE,
+ USAGE_FAIL_ERROR,
+ USAGE_FAIL_SUBOPTIMAL,
+} usage_fail;
+
+static usage_fail
check_ici(struct zink_screen *screen, VkImageCreateInfo *ici, uint64_t modifier)
{
VkImageFormatProperties image_props;
VkResult ret;
+ bool optimalDeviceAccess = true;
assert(modifier == DRM_FORMAT_MOD_INVALID ||
(VKSCR(GetPhysicalDeviceImageFormatProperties2) && screen->info.have_EXT_image_drm_format_modifier));
if (VKSCR(GetPhysicalDeviceImageFormatProperties2)) {
VkImageFormatProperties2 props2;
props2.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2;
props2.pNext = NULL;
+ VkSamplerYcbcrConversionImageFormatProperties ycbcr_props;
+ ycbcr_props.sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES;
+ ycbcr_props.pNext = NULL;
+ if (screen->info.have_KHR_sampler_ycbcr_conversion)
+ props2.pNext = &ycbcr_props;
+ VkHostImageCopyDevicePerformanceQueryEXT hic = {
+ VK_STRUCTURE_TYPE_HOST_IMAGE_COPY_DEVICE_PERFORMANCE_QUERY_EXT,
+ props2.pNext,
+ };
+ if (screen->info.have_EXT_host_image_copy && ici->usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT)
+ props2.pNext = &hic;
VkPhysicalDeviceImageFormatInfo2 info;
info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2;
+ /* possibly VkImageFormatListCreateInfo */
+ info.pNext = ici->pNext;
info.format = ici->format;
info.type = ici->imageType;
info.tiling = ici->tiling;
@@ -189,47 +354,84 @@ check_ici(struct zink_screen *screen, VkImageCreateInfo *ici, uint64_t modifier)
VkPhysicalDeviceImageDrmFormatModifierInfoEXT mod_info;
if (modifier != DRM_FORMAT_MOD_INVALID) {
mod_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT;
- mod_info.pNext = NULL;
+ mod_info.pNext = info.pNext;
mod_info.drmFormatModifier = modifier;
mod_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
mod_info.queueFamilyIndexCount = 0;
+ mod_info.pQueueFamilyIndices = NULL;
info.pNext = &mod_info;
- } else
- info.pNext = NULL;
+ }
ret = VKSCR(GetPhysicalDeviceImageFormatProperties2)(screen->pdev, &info, &props2);
+ /* this is using VK_IMAGE_CREATE_EXTENDED_USAGE_BIT and can't be validated */
+ if (vk_format_aspects(ici->format) & VK_IMAGE_ASPECT_PLANE_1_BIT)
+ ret = VK_SUCCESS;
image_props = props2.imageFormatProperties;
+ if (screen->info.have_EXT_host_image_copy && ici->usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT)
+ optimalDeviceAccess = hic.optimalDeviceAccess;
} else
ret = VKSCR(GetPhysicalDeviceImageFormatProperties)(screen->pdev, ici->format, ici->imageType,
ici->tiling, ici->usage, ici->flags, &image_props);
- return ret == VK_SUCCESS;
+ if (ret != VK_SUCCESS)
+ return USAGE_FAIL_ERROR;
+ if (ici->extent.depth > image_props.maxExtent.depth ||
+ ici->extent.height > image_props.maxExtent.height ||
+ ici->extent.width > image_props.maxExtent.width)
+ return USAGE_FAIL_ERROR;
+ if (ici->mipLevels > image_props.maxMipLevels)
+ return USAGE_FAIL_ERROR;
+ if (ici->arrayLayers > image_props.maxArrayLayers)
+ return USAGE_FAIL_ERROR;
+ if (!(ici->samples & image_props.sampleCounts))
+ return USAGE_FAIL_ERROR;
+ if (!optimalDeviceAccess)
+ return USAGE_FAIL_SUBOPTIMAL;
+ return USAGE_FAIL_NONE;
}
static VkImageUsageFlags
-get_image_usage_for_feats(struct zink_screen *screen, VkFormatFeatureFlags feats, const struct pipe_resource *templ, unsigned bind)
+get_image_usage_for_feats(struct zink_screen *screen, VkFormatFeatureFlags2 feats, const struct pipe_resource *templ, unsigned bind, bool *need_extended)
{
VkImageUsageFlags usage = 0;
- /* sadly, gallium doesn't let us know if it'll ever need this, so we have to assume */
- if (feats & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)
- usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
- if (feats & VK_FORMAT_FEATURE_TRANSFER_DST_BIT)
- usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
- if (feats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT && (bind & (PIPE_BIND_LINEAR | PIPE_BIND_SHARED)) != (PIPE_BIND_LINEAR | PIPE_BIND_SHARED))
- usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
-
- if ((templ->nr_samples <= 1 || screen->info.feats.features.shaderStorageImageMultisample) &&
- (bind & PIPE_BIND_SHADER_IMAGE)) {
- if (feats & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)
+ bool is_planar = util_format_get_num_planes(templ->format) > 1;
+ *need_extended = false;
+
+ if (bind & ZINK_BIND_TRANSIENT)
+ usage |= VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT;
+ else {
+ /* sadly, gallium doesn't let us know if it'll ever need this, so we have to assume */
+ if (is_planar || (feats & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT))
+ usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+ if (is_planar || (feats & VK_FORMAT_FEATURE_TRANSFER_DST_BIT))
+ usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+ if (feats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)
+ usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
+
+ if ((is_planar || (feats & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) && (bind & PIPE_BIND_SHADER_IMAGE)) {
+ assert(templ->nr_samples <= 1 || screen->info.feats.features.shaderStorageImageMultisample);
usage |= VK_IMAGE_USAGE_STORAGE_BIT;
+ }
}
if (bind & PIPE_BIND_RENDER_TARGET) {
if (feats & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) {
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
- if ((bind & (PIPE_BIND_LINEAR | PIPE_BIND_SHARED)) != (PIPE_BIND_LINEAR | PIPE_BIND_SHARED))
+ if (!(bind & ZINK_BIND_TRANSIENT) && (bind & (PIPE_BIND_LINEAR | PIPE_BIND_SHARED)) != (PIPE_BIND_LINEAR | PIPE_BIND_SHARED))
usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
- } else
+ if (!(bind & ZINK_BIND_TRANSIENT) && screen->info.have_EXT_attachment_feedback_loop_layout)
+ usage |= VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
+ } else {
+ /* trust that gallium isn't going to give us anything wild */
+ *need_extended = true;
+ return 0;
+ }
+ } else if ((bind & PIPE_BIND_SAMPLER_VIEW) && !util_format_is_depth_or_stencil(templ->format)) {
+ if (!(feats & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
+ /* ensure we can u_blitter this later */
+ *need_extended = true;
return 0;
+ }
+ usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
}
if (bind & PIPE_BIND_DEPTH_STENCIL) {
@@ -237,6 +439,8 @@ get_image_usage_for_feats(struct zink_screen *screen, VkFormatFeatureFlags feats
usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
else
return 0;
+ if (screen->info.have_EXT_attachment_feedback_loop_layout && !(bind & ZINK_BIND_TRANSIENT))
+ usage |= VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
/* this is unlikely to occur and has been included for completeness */
} else if (bind & PIPE_BIND_SAMPLER_VIEW && !(usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT)) {
if (feats & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)
@@ -245,11 +449,12 @@ get_image_usage_for_feats(struct zink_screen *screen, VkFormatFeatureFlags feats
return 0;
}
- if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
- usage |= VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT;
-
if (bind & PIPE_BIND_STREAM_OUTPUT)
usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
+
+ if (screen->info.have_EXT_host_image_copy && feats & VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT)
+ usage |= VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT;
+
return usage;
}
@@ -265,50 +470,142 @@ find_modifier_feats(const struct zink_modifier_prop *prop, uint64_t modifier, ui
return 0;
}
+/* check HIC optimalness */
+static bool
+suboptimal_check_ici(struct zink_screen *screen, VkImageCreateInfo *ici, uint64_t *mod)
+{
+ usage_fail fail = check_ici(screen, ici, *mod);
+ if (!fail)
+ return true;
+ if (fail == USAGE_FAIL_SUBOPTIMAL) {
+ ici->usage &= ~VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT;
+ fail = check_ici(screen, ici, *mod);
+ if (!fail)
+ return true;
+ }
+ return false;
+}
+
+/* If the driver can't do mutable with this ICI, then try again after removing mutable (and
+ * thus also the list of formats we might might mutate to)
+ */
+static bool
+double_check_ici(struct zink_screen *screen, VkImageCreateInfo *ici, VkImageUsageFlags usage, uint64_t *mod)
+{
+ if (!usage)
+ return false;
+
+ ici->usage = usage;
+
+ if (suboptimal_check_ici(screen, ici, mod))
+ return true;
+ usage_fail fail = check_ici(screen, ici, *mod);
+ if (!fail)
+ return true;
+ if (fail == USAGE_FAIL_SUBOPTIMAL) {
+ ici->usage &= ~VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT;
+ fail = check_ici(screen, ici, *mod);
+ if (!fail)
+ return true;
+ }
+ const void *pNext = ici->pNext;
+ if (pNext) {
+ VkBaseOutStructure *prev = NULL;
+ VkBaseOutStructure *fmt_list = NULL;
+ vk_foreach_struct(strct, (void*)ici->pNext) {
+ if (strct->sType == VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO) {
+ fmt_list = strct;
+ if (prev) {
+ prev->pNext = strct->pNext;
+ } else {
+ ici->pNext = strct->pNext;
+ }
+ fmt_list->pNext = NULL;
+ break;
+ }
+ prev = strct;
+ }
+ ici->flags &= ~VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ if (suboptimal_check_ici(screen, ici, mod))
+ return true;
+ fmt_list->pNext = (void*)ici->pNext;
+ ici->pNext = fmt_list;
+ ici->flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ }
+ return false;
+}
+
static VkImageUsageFlags
-get_image_usage(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe_resource *templ, unsigned bind, unsigned modifiers_count, const uint64_t *modifiers, uint64_t *mod)
+get_image_usage(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe_resource *templ, unsigned bind, unsigned modifiers_count, uint64_t *modifiers, uint64_t *mod)
{
VkImageTiling tiling = ici->tiling;
+ bool need_extended = false;
*mod = DRM_FORMAT_MOD_INVALID;
if (modifiers_count) {
bool have_linear = false;
const struct zink_modifier_prop *prop = &screen->modifier_props[templ->format];
assert(tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT);
+ bool found = false;
+ uint64_t good_mod = 0;
+ VkImageUsageFlags good_usage = 0;
for (unsigned i = 0; i < modifiers_count; i++) {
if (modifiers[i] == DRM_FORMAT_MOD_LINEAR) {
have_linear = true;
+ if (!screen->info.have_EXT_image_drm_format_modifier)
+ break;
continue;
}
VkFormatFeatureFlags feats = find_modifier_feats(prop, modifiers[i], mod);
if (feats) {
- VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind);
- if (usage) {
- ici->usage = usage;
- if (check_ici(screen, ici, *mod))
- return usage;
+ VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind, &need_extended);
+ assert(!need_extended);
+ if (double_check_ici(screen, ici, usage, mod)) {
+ if (!found) {
+ found = true;
+ good_mod = modifiers[i];
+ good_usage = usage;
+ }
+ } else {
+ modifiers[i] = DRM_FORMAT_MOD_LINEAR;
}
}
}
+ if (found) {
+ *mod = good_mod;
+ return good_usage;
+ }
/* only try linear if no other options available */
if (have_linear) {
VkFormatFeatureFlags feats = find_modifier_feats(prop, DRM_FORMAT_MOD_LINEAR, mod);
if (feats) {
- VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind);
- if (usage) {
- ici->usage = usage;
- if (check_ici(screen, ici, *mod))
- return usage;
- }
+ VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind, &need_extended);
+ assert(!need_extended);
+ if (double_check_ici(screen, ici, usage, mod))
+ return usage;
}
}
- } else
- {
- VkFormatProperties props = screen->format_props[templ->format];
- VkFormatFeatureFlags feats = tiling == VK_IMAGE_TILING_LINEAR ? props.linearTilingFeatures : props.optimalTilingFeatures;
- VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind);
- if (usage) {
- ici->usage = usage;
- if (check_ici(screen, ici, *mod))
+ } else {
+ struct zink_format_props props = screen->format_props[templ->format];
+ VkFormatFeatureFlags2 feats = tiling == VK_IMAGE_TILING_LINEAR ? props.linearTilingFeatures : props.optimalTilingFeatures;
+ if (ici->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)
+ feats = UINT32_MAX;
+ VkImageUsageFlags usage = get_image_usage_for_feats(screen, feats, templ, bind, &need_extended);
+ if (need_extended) {
+ ici->flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT | VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ feats = UINT32_MAX;
+ usage = get_image_usage_for_feats(screen, feats, templ, bind, &need_extended);
+ }
+ if (double_check_ici(screen, ici, usage, mod))
+ return usage;
+ if (util_format_is_depth_or_stencil(templ->format)) {
+ if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL)) {
+ usage &= ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
+ if (double_check_ici(screen, ici, usage, mod))
+ return usage;
+ }
+ } else if (!(templ->bind & PIPE_BIND_RENDER_TARGET)) {
+ usage &= ~VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ if (double_check_ici(screen, ici, usage, mod))
return usage;
}
}
@@ -317,24 +614,114 @@ get_image_usage(struct zink_screen *screen, VkImageCreateInfo *ici, const struct
}
static uint64_t
-create_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe_resource *templ, bool dmabuf, unsigned bind, unsigned modifiers_count, const uint64_t *modifiers, bool *success)
+eval_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe_resource *templ, unsigned bind, unsigned modifiers_count, uint64_t *modifiers, bool *success)
+{
+ /* sampleCounts will be set to VK_SAMPLE_COUNT_1_BIT if at least one of the following conditions is true:
+ * - flags contains VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT
+ *
+ * 44.1.1. Supported Sample Counts
+ */
+ bool want_cube = ici->samples == 1 &&
+ (templ->target == PIPE_TEXTURE_CUBE ||
+ templ->target == PIPE_TEXTURE_CUBE_ARRAY ||
+ (templ->target == PIPE_TEXTURE_2D_ARRAY && ici->extent.width == ici->extent.height && ici->arrayLayers >= 6));
+
+ if (ici->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+ modifiers_count = 0;
+
+ bool first = true;
+ bool tried[2] = {0};
+ uint64_t mod = DRM_FORMAT_MOD_INVALID;
+retry:
+ while (!ici->usage) {
+ if (!first) {
+ switch (ici->tiling) {
+ case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT:
+ ici->tiling = VK_IMAGE_TILING_OPTIMAL;
+ modifiers_count = 0;
+ break;
+ case VK_IMAGE_TILING_OPTIMAL:
+ ici->tiling = VK_IMAGE_TILING_LINEAR;
+ break;
+ case VK_IMAGE_TILING_LINEAR:
+ if (bind & PIPE_BIND_LINEAR) {
+ *success = false;
+ return DRM_FORMAT_MOD_INVALID;
+ }
+ ici->tiling = VK_IMAGE_TILING_OPTIMAL;
+ break;
+ default:
+ unreachable("unhandled tiling mode");
+ }
+ if (tried[ici->tiling]) {
+ if (ici->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) {
+ *success = false;
+ return DRM_FORMAT_MOD_INVALID;
+ }
+ ici->flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT | VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ tried[0] = false;
+ tried[1] = false;
+ first = true;
+ goto retry;
+ }
+ }
+ ici->usage = get_image_usage(screen, ici, templ, bind, modifiers_count, modifiers, &mod);
+ first = false;
+ if (ici->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+ tried[ici->tiling] = true;
+ }
+ if (want_cube) {
+ ici->flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
+ if ((get_image_usage(screen, ici, templ, bind, modifiers_count, modifiers, &mod) & ici->usage) != ici->usage)
+ ici->flags &= ~VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
+ }
+
+ *success = true;
+ return mod;
+}
+
+static void
+init_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe_resource *templ, unsigned bind, unsigned modifiers_count)
{
ici->sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
- ici->pNext = NULL;
- ici->flags = modifiers_count || dmabuf || bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DEPTH_STENCIL) ? 0 : VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ /* pNext may already be set */
+ if (util_format_get_num_planes(templ->format) > 1)
+ ici->flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
+ else if (bind & ZINK_BIND_MUTABLE)
+ ici->flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ else
+ ici->flags = 0;
+ if (ici->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)
+ /* unset VkImageFormatListCreateInfo if mutable */
+ ici->pNext = NULL;
+ else if (ici->pNext)
+ /* add mutable if VkImageFormatListCreateInfo */
+ ici->flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
ici->usage = 0;
ici->queueFamilyIndexCount = 0;
+ ici->pQueueFamilyIndices = NULL;
+
+ /* assume we're going to be doing some CompressedTexSubImage */
+ if (util_format_is_compressed(templ->format) && (ici->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
+ !vk_find_struct_const(ici->pNext, IMAGE_FORMAT_LIST_CREATE_INFO))
+ ici->flags |= VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT;
+
+ if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
+ ici->flags |= VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT;
+ bool need_2D = false;
switch (templ->target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
- ici->imageType = VK_IMAGE_TYPE_1D;
+ if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
+ need_2D |= screen->need_2D_sparse;
+ if (util_format_is_depth_or_stencil(templ->format))
+ need_2D |= screen->need_2D_zs;
+ ici->imageType = need_2D ? VK_IMAGE_TYPE_2D : VK_IMAGE_TYPE_1D;
break;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
- ici->flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
- FALLTHROUGH;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_RECT:
@@ -343,8 +730,10 @@ create_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe
case PIPE_TEXTURE_3D:
ici->imageType = VK_IMAGE_TYPE_3D;
- if (bind & PIPE_BIND_RENDER_TARGET)
+ if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE))
ici->flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
+ if (screen->info.have_EXT_image_2d_view_of_3d)
+ ici->flags |= VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT;
break;
case PIPE_BUFFER:
@@ -366,265 +755,208 @@ create_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe
ici->mipLevels = templ->last_level + 1;
ici->arrayLayers = MAX2(templ->array_size, 1);
ici->samples = templ->nr_samples ? templ->nr_samples : VK_SAMPLE_COUNT_1_BIT;
- ici->tiling = modifiers_count ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : bind & PIPE_BIND_LINEAR ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
+ ici->tiling = screen->info.have_EXT_image_drm_format_modifier && modifiers_count ?
+ VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
+ bind & (PIPE_BIND_LINEAR | ZINK_BIND_DMABUF) ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
+ /* XXX: does this have perf implications anywhere? hopefully not */
+ if (ici->samples == VK_SAMPLE_COUNT_1_BIT &&
+ screen->info.have_EXT_multisampled_render_to_single_sampled &&
+ ici->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+ ici->flags |= VK_IMAGE_CREATE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_BIT_EXT;
ici->sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ici->initialLayout = dmabuf ? VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED;
-
- if (templ->target == PIPE_TEXTURE_CUBE ||
- templ->target == PIPE_TEXTURE_CUBE_ARRAY ||
- (templ->target == PIPE_TEXTURE_2D_ARRAY &&
- ici->extent.width == ici->extent.height &&
- ici->arrayLayers >= 6)) {
- VkImageFormatProperties props;
- if (vkGetPhysicalDeviceImageFormatProperties(screen->pdev, ici->format,
- ici->imageType, ici->tiling,
- ici->usage, ici->flags |
- VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT,
- &props) == VK_SUCCESS) {
- if (props.sampleCounts & ici->samples)
- ici->flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
- }
- }
+ ici->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
if (templ->target == PIPE_TEXTURE_CUBE)
ici->arrayLayers *= 6;
-
- if (templ->usage == PIPE_USAGE_STAGING &&
- templ->format != PIPE_FORMAT_B4G4R4A4_UNORM &&
- templ->format != PIPE_FORMAT_B4G4R4A4_UINT)
- ici->tiling = VK_IMAGE_TILING_LINEAR;
-
- bool first = true;
- bool tried[2] = {0};
- uint64_t mod = DRM_FORMAT_MOD_INVALID;
- while (!ici->usage) {
- if (!first) {
- switch (ici->tiling) {
- case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT:
- ici->tiling = VK_IMAGE_TILING_OPTIMAL;
- modifiers_count = 0;
- break;
- case VK_IMAGE_TILING_OPTIMAL:
- ici->tiling = VK_IMAGE_TILING_LINEAR;
- break;
- case VK_IMAGE_TILING_LINEAR:
- if (bind & PIPE_BIND_LINEAR) {
- *success = false;
- return DRM_FORMAT_MOD_INVALID;
- }
- ici->tiling = VK_IMAGE_TILING_OPTIMAL;
- break;
- default:
- unreachable("unhandled tiling mode");
- }
- if (tried[ici->tiling]) {
- *success = false;
- return DRM_FORMAT_MOD_INVALID;
- }
- }
- ici->usage = get_image_usage(screen, ici, templ, bind, modifiers_count, modifiers, &mod);
- first = false;
- if (ici->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
- tried[ici->tiling] = true;
- }
-
- *success = true;
- return mod;
}
-static struct zink_resource_object *
-resource_object_create(struct zink_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle, bool *optimal_tiling,
- const uint64_t *modifiers, int modifiers_count)
+static inline bool
+create_sampler_conversion(VkImageCreateInfo ici, struct zink_screen *screen,
+ struct zink_resource_object *obj)
{
- struct zink_resource_object *obj = CALLOC_STRUCT(zink_resource_object);
- if (!obj)
- return NULL;
-
- VkMemoryRequirements reqs;
- VkMemoryPropertyFlags flags;
- bool need_dedicated = false;
- VkExternalMemoryHandleTypeFlags export_types = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
-
- VkExternalMemoryHandleTypeFlags external = 0;
- if (whandle) {
- if (whandle->type == WINSYS_HANDLE_TYPE_FD)
- external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
- else
- unreachable("unknown handle type");
+ if (obj->vkfeats & VK_FORMAT_FEATURE_DISJOINT_BIT)
+ ici.flags |= VK_IMAGE_CREATE_DISJOINT_BIT;
+ VkSamplerYcbcrConversionCreateInfo sycci = {0};
+ sycci.sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO;
+ sycci.pNext = NULL;
+ sycci.format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+ sycci.ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709;
+ sycci.ycbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
+ sycci.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+ sycci.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+ sycci.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+ sycci.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+ if (!obj->vkfeats || (obj->vkfeats & VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT)) {
+ sycci.xChromaOffset = VK_CHROMA_LOCATION_COSITED_EVEN;
+ sycci.yChromaOffset = VK_CHROMA_LOCATION_COSITED_EVEN;
+ } else {
+ assert(obj->vkfeats & VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT);
+ sycci.xChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
+ sycci.yChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
}
+ sycci.chromaFilter = VK_FILTER_LINEAR;
+ sycci.forceExplicitReconstruction = VK_FALSE;
+ VkResult res = VKSCR(CreateSamplerYcbcrConversion)(screen->dev, &sycci, NULL, &obj->sampler_conversion);
+ if (res != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateSamplerYcbcrConversion failed");
+ return false;
+ }
+ return true;
+}
- /* TODO: remove linear for wsi */
- bool scanout = templ->bind & PIPE_BIND_SCANOUT;
- bool shared = templ->bind & PIPE_BIND_SHARED;
- if (shared && screen->info.have_EXT_external_memory_dma_buf)
- export_types |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
-
- pipe_reference_init(&obj->reference, 1);
- util_dynarray_init(&obj->tmp, NULL);
- util_dynarray_init(&obj->desc_set_refs.refs, NULL);
- if (templ->target == PIPE_BUFFER) {
- VkBufferCreateInfo bci = create_bci(screen, templ, templ->bind);
+static const VkImageAspectFlags plane_aspects[] = {
+ VK_IMAGE_ASPECT_PLANE_0_BIT,
+ VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+};
- if (VKSCR(CreateBuffer)(screen->dev, &bci, NULL, &obj->buffer) != VK_SUCCESS) {
- debug_printf("vkCreateBuffer failed\n");
- goto fail1;
+static inline bool
+get_image_memory_requirement(struct zink_screen *screen, struct zink_resource_object *obj,
+ unsigned num_planes, VkMemoryRequirements *reqs)
+{
+ bool need_dedicated = false;
+ if (VKSCR(GetImageMemoryRequirements2)) {
+ VkMemoryRequirements2 req2;
+ req2.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
+ VkImageMemoryRequirementsInfo2 info2;
+ info2.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2;
+ info2.pNext = NULL;
+ info2.image = obj->image;
+ VkMemoryDedicatedRequirements ded;
+ ded.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS;
+ ded.pNext = NULL;
+ req2.pNext = &ded;
+ VkImagePlaneMemoryRequirementsInfo plane;
+ plane.sType = VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO;
+ plane.pNext = NULL;
+ if (num_planes > 1)
+ info2.pNext = &plane;
+ unsigned offset = 0;
+ for (unsigned i = 0; i < num_planes; i++) {
+ assert(i < ARRAY_SIZE(plane_aspects));
+ plane.planeAspect = plane_aspects[i];
+ VKSCR(GetImageMemoryRequirements2)(screen->dev, &info2, &req2);
+ if (!i)
+ reqs->alignment = req2.memoryRequirements.alignment;
+ obj->plane_offsets[i] = offset;
+ offset += req2.memoryRequirements.size;
+ reqs->size += req2.memoryRequirements.size;
+ reqs->memoryTypeBits |= req2.memoryRequirements.memoryTypeBits;
+ need_dedicated |= ded.prefersDedicatedAllocation || ded.requiresDedicatedAllocation;
}
-
- VKSCR(GetBufferMemoryRequirements)(screen->dev, obj->buffer, &reqs);
- if (templ->usage == PIPE_USAGE_STAGING)
- flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
- else if (templ->usage == PIPE_USAGE_STREAM)
- flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
- else if (templ->usage == PIPE_USAGE_IMMUTABLE)
- flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- else
- flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- obj->is_buffer = true;
- obj->transfer_dst = true;
} else {
- bool winsys_modifier = shared && whandle && whandle->modifier != DRM_FORMAT_MOD_INVALID;
- const uint64_t *ici_modifiers = winsys_modifier ? &whandle->modifier : modifiers;
- unsigned ici_modifier_count = winsys_modifier ? 1 : modifiers_count;
- bool success = false;
- VkImageCreateInfo ici;
- uint64_t mod = create_ici(screen, &ici, templ, !!external, templ->bind, ici_modifier_count, ici_modifiers, &success);
- VkExternalMemoryImageCreateInfo emici;
- VkImageDrmFormatModifierExplicitCreateInfoEXT idfmeci;
- VkImageDrmFormatModifierListCreateInfoEXT idfmlci;
- if (!success)
- goto fail1;
-
- if (shared || external) {
- emici.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
- emici.pNext = NULL;
- emici.handleTypes = export_types;
- ici.pNext = &emici;
-
- assert(ici.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT || mod != DRM_FORMAT_MOD_INVALID);
- if (winsys_modifier && ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
- assert(mod == whandle->modifier);
- idfmeci.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT;
- idfmeci.pNext = ici.pNext;
- idfmeci.drmFormatModifier = mod;
-
- /* TODO: store these values from other planes in their
- * respective zink_resource, and walk the next-pointers to
- * build up the planar array here instead.
- */
- assert(util_format_get_num_planes(templ->format) == 1);
- idfmeci.drmFormatModifierPlaneCount = 1;
- VkSubresourceLayout plane_layout = {
- .offset = whandle->offset,
- .size = 0,
- .rowPitch = whandle->stride,
- .arrayPitch = 0,
- .depthPitch = 0,
- };
- idfmeci.pPlaneLayouts = &plane_layout;
-
- ici.pNext = &idfmeci;
- } else if (ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
- idfmlci.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;
- idfmlci.pNext = ici.pNext;
- idfmlci.drmFormatModifierCount = modifiers_count;
- idfmlci.pDrmFormatModifiers = modifiers;
- ici.pNext = &idfmlci;
- } else if (ici.tiling == VK_IMAGE_TILING_OPTIMAL) {
- // TODO: remove for wsi
- if (!external)
- ici.pNext = NULL;
- scanout = false;
- shared = false;
- }
- }
-
- if (optimal_tiling)
- *optimal_tiling = ici.tiling == VK_IMAGE_TILING_OPTIMAL;
-
- if (ici.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT)
- obj->transfer_dst = true;
+ VKSCR(GetImageMemoryRequirements)(screen->dev, obj->image, reqs);
+ }
+ return need_dedicated;
+}
- if (ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
- obj->modifier_aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT;
+static inline VkFormatFeatureFlags
+get_format_feature_flags(VkImageCreateInfo ici, struct zink_screen *screen, const struct pipe_resource *templ)
+{
+ VkFormatFeatureFlags feats = 0;
+ switch (ici.tiling) {
+ case VK_IMAGE_TILING_LINEAR:
+ feats = screen->format_props[templ->format].linearTilingFeatures;
+ break;
+ case VK_IMAGE_TILING_OPTIMAL:
+ feats = screen->format_props[templ->format].optimalTilingFeatures;
+ break;
+ case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT:
+ feats = VK_FORMAT_FEATURE_FLAG_BITS_MAX_ENUM;
+ /*
+ If is tiling then VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, the value of
+ imageCreateFormatFeatures is found by calling vkGetPhysicalDeviceFormatProperties2
+ with VkImageFormatProperties::format equal to VkImageCreateInfo::format and with
+ VkDrmFormatModifierPropertiesListEXT chained into VkImageFormatProperties2; by
+ collecting all members of the returned array
+ VkDrmFormatModifierPropertiesListEXT::pDrmFormatModifierProperties
+ whose drmFormatModifier belongs to imageCreateDrmFormatModifiers; and by taking the bitwise
+ intersection, over the collected array members, of drmFormatModifierTilingFeatures.
+ (The resultant imageCreateFormatFeatures may be empty).
+ * -Chapter 12. Resource Creation
+ */
+ for (unsigned i = 0; i < screen->modifier_props[templ->format].drmFormatModifierCount; i++)
+ feats &= screen->modifier_props[templ->format].pDrmFormatModifierProperties[i].drmFormatModifierTilingFeatures;
+ break;
+ default:
+ unreachable("unknown tiling");
+ }
+ return feats;
+}
- struct wsi_image_create_info image_wsi_info = {
- VK_STRUCTURE_TYPE_WSI_IMAGE_CREATE_INFO_MESA,
- NULL,
- .scanout = true,
- };
+#if !defined(_WIN32)
+ #define ZINK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_BIT VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT
+#else
+ #define ZINK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_BIT VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
+#endif
- if ((screen->needs_mesa_wsi || screen->needs_mesa_flush_wsi) && scanout) {
- image_wsi_info.pNext = ici.pNext;
- ici.pNext = &image_wsi_info;
- }
- VkResult result = VKSCR(CreateImage)(screen->dev, &ici, NULL, &obj->image);
- if (result != VK_SUCCESS) {
- debug_printf("vkCreateImage failed\n");
- goto fail1;
- }
+struct mem_alloc_info {
+ struct winsys_handle *whandle;
+ VkMemoryPropertyFlags flags;
+ enum zink_alloc_flag aflags;
+ bool need_dedicated;
+ bool shared;
+ const void *user_mem;
+ VkExternalMemoryHandleTypeFlags external;
+ VkExternalMemoryHandleTypeFlags export_types;
+};
- if (VKSCR(GetImageMemoryRequirements2)) {
- VkMemoryRequirements2 req2;
- req2.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
- VkImageMemoryRequirementsInfo2 info2;
- info2.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2;
- info2.pNext = NULL;
- info2.image = obj->image;
- VkMemoryDedicatedRequirements ded;
- ded.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS;
- ded.pNext = NULL;
- req2.pNext = &ded;
- VKSCR(GetImageMemoryRequirements2)(screen->dev, &info2, &req2);
- memcpy(&reqs, &req2.memoryRequirements, sizeof(VkMemoryRequirements));
- need_dedicated = ded.prefersDedicatedAllocation || ded.requiresDedicatedAllocation;
+static inline bool
+get_export_flags(struct zink_screen *screen, const struct pipe_resource *templ, struct mem_alloc_info *alloc_info)
+{
+ bool needs_export = (templ->bind & (ZINK_BIND_VIDEO | ZINK_BIND_DMABUF)) != 0;
+ if (alloc_info->whandle) {
+ if (alloc_info->whandle->type == WINSYS_HANDLE_TYPE_FD ||
+ alloc_info->whandle->type == ZINK_EXTERNAL_MEMORY_HANDLE)
+ needs_export |= true;
+ else
+ unreachable("unknown handle type");
+ }
+ if (needs_export) {
+ if (alloc_info->whandle && alloc_info->whandle->type == ZINK_EXTERNAL_MEMORY_HANDLE) {
+ alloc_info->external = ZINK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_BIT;
+ } else if (screen->info.have_EXT_external_memory_dma_buf) {
+ alloc_info->external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ alloc_info->export_types |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
} else {
- VKSCR(GetImageMemoryRequirements)(screen->dev, obj->image, &reqs);
+ return false;
}
- if (templ->usage == PIPE_USAGE_STAGING && ici.tiling == VK_IMAGE_TILING_LINEAR)
- flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
- else
- flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
-
- obj->vkflags = ici.flags;
- obj->vkusage = ici.usage;
}
- obj->alignment = reqs.alignment;
+ if (alloc_info->user_mem) {
+ assert(!alloc_info->whandle);
+ alloc_info->external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+ alloc_info->export_types = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+ }
+ /* we may export WINSYS_HANDLE_TYPE_FD handle which is dma-buf */
+ if (templ->bind & PIPE_BIND_SHARED && screen->info.have_EXT_external_memory_dma_buf)
+ alloc_info->export_types |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ return true;
+}
- if (templ->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT || templ->usage == PIPE_USAGE_DYNAMIC)
- flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
- else if (!(flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
- templ->usage == PIPE_USAGE_STAGING)
- flags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+enum resource_object_create_result {
+ roc_success,
+ roc_success_early_return,
+ roc_fail_and_free_object,
+ roc_fail_and_cleanup_object,
+ roc_fail_and_cleanup_all
+};
+static inline enum resource_object_create_result
+allocate_bo(struct zink_screen *screen, const struct pipe_resource *templ,
+ VkMemoryRequirements *reqs, struct zink_resource_object *obj,
+ struct mem_alloc_info *alloc_info)
+{
VkMemoryAllocateInfo mai;
- enum zink_alloc_flag aflags = templ->flags & PIPE_RESOURCE_FLAG_SPARSE ? ZINK_ALLOC_SPARSE : 0;
mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
mai.pNext = NULL;
- mai.allocationSize = reqs.size;
- enum zink_heap heap = zink_heap_from_domain_flags(flags, aflags);
- mai.memoryTypeIndex = screen->heap_map[heap];
- if (unlikely(!(reqs.memoryTypeBits & BITFIELD_BIT(mai.memoryTypeIndex)))) {
- /* not valid based on reqs; demote to more compatible type */
- switch (heap) {
- case ZINK_HEAP_DEVICE_LOCAL_VISIBLE:
- heap = ZINK_HEAP_DEVICE_LOCAL;
- break;
- case ZINK_HEAP_HOST_VISIBLE_CACHED:
- heap = ZINK_HEAP_HOST_VISIBLE_COHERENT;
- break;
- default:
- break;
- }
- mai.memoryTypeIndex = screen->heap_map[heap];
- assert(reqs.memoryTypeBits & BITFIELD_BIT(mai.memoryTypeIndex));
+ mai.allocationSize = reqs->size;
+ enum zink_heap heap = zink_heap_from_domain_flags(alloc_info->flags, alloc_info->aflags);
+ if (templ->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) {
+ if (!(vk_domain_from_heap(heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
+ heap = zink_heap_from_domain_flags(alloc_info->flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, alloc_info->aflags);
}
- VkMemoryType mem_type = screen->info.mem_props.memoryTypes[mai.memoryTypeIndex];
- obj->coherent = mem_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
- if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE))
- obj->host_visible = mem_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
-
VkMemoryDedicatedAllocateInfo ded_alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
.pNext = mai.pNext,
@@ -632,85 +964,595 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
.buffer = VK_NULL_HANDLE,
};
- if (screen->info.have_KHR_dedicated_allocation && need_dedicated) {
+ if (screen->info.have_KHR_dedicated_allocation && alloc_info->need_dedicated) {
ded_alloc_info.pNext = mai.pNext;
mai.pNext = &ded_alloc_info;
}
VkExportMemoryAllocateInfo emai;
- if (templ->bind & PIPE_BIND_SHARED && shared) {
+ if ((templ->bind & ZINK_BIND_VIDEO) || ((templ->bind & PIPE_BIND_SHARED) && alloc_info->shared) || (templ->bind & ZINK_BIND_DMABUF)) {
emai.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
- emai.handleTypes = export_types;
+ emai.handleTypes = alloc_info->export_types;
emai.pNext = mai.pNext;
mai.pNext = &emai;
+ obj->exportable = true;
}
+#ifdef ZINK_USE_DMABUF
+
+#if !defined(_WIN32)
VkImportMemoryFdInfoKHR imfi = {
VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
NULL,
};
- if (whandle) {
+ if (alloc_info->whandle) {
imfi.pNext = NULL;
- imfi.handleType = external;
- imfi.fd = os_dupfd_cloexec(whandle->handle);
+ imfi.handleType = alloc_info->external;
+ imfi.fd = os_dupfd_cloexec(alloc_info->whandle->handle);
if (imfi.fd < 0) {
mesa_loge("ZINK: failed to dup dmabuf fd: %s\n", strerror(errno));
- goto fail1;
+ return roc_fail_and_cleanup_object;
}
imfi.pNext = mai.pNext;
mai.pNext = &imfi;
}
-
- struct wsi_memory_allocate_info memory_wsi_info = {
- VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA,
+#else
+ VkImportMemoryWin32HandleInfoKHR imfi = {
+ VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR,
NULL,
};
- if (screen->needs_mesa_wsi && scanout) {
- memory_wsi_info.implicit_sync = true;
+ if (alloc_info->whandle) {
+ HANDLE source_target = GetCurrentProcess();
+ HANDLE out_handle;
+
+ bool result = DuplicateHandle(source_target, alloc_info->whandle->handle, source_target, &out_handle, 0, false, DUPLICATE_SAME_ACCESS);
- memory_wsi_info.pNext = mai.pNext;
- mai.pNext = &memory_wsi_info;
+ if (!result || !out_handle) {
+ mesa_loge("ZINK: failed to DuplicateHandle with winerr: %08x\n", (int)GetLastError());
+ return roc_fail_and_cleanup_object;
+ }
+
+ imfi.pNext = NULL;
+ imfi.handleType = alloc_info->external;
+ imfi.handle = out_handle;
+
+ imfi.pNext = mai.pNext;
+ mai.pNext = &imfi;
}
+#endif
- unsigned alignment = MAX2(reqs.alignment, 256);
+#endif
+
+ VkImportMemoryHostPointerInfoEXT imhpi = {
+ VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+ NULL,
+ };
+ if (alloc_info->user_mem) {
+ imhpi.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+ imhpi.pHostPointer = (void*)alloc_info->user_mem;
+ imhpi.pNext = mai.pNext;
+ mai.pNext = &imhpi;
+ }
+
+ unsigned alignment = MAX2(reqs->alignment, 256);
if (templ->usage == PIPE_USAGE_STAGING && obj->is_buffer)
alignment = MAX2(alignment, screen->info.props.limits.minMemoryMapAlignment);
obj->alignment = alignment;
- obj->bo = zink_bo(zink_bo_create(screen, reqs.size, alignment, heap, mai.pNext ? ZINK_ALLOC_NO_SUBALLOC : 0, mai.pNext));
- if (!obj->bo)
- goto fail2;
- if (aflags == ZINK_ALLOC_SPARSE) {
+
+ if (zink_mem_type_idx_from_types(screen, heap, reqs->memoryTypeBits) == UINT32_MAX) {
+ /* not valid based on reqs; demote to more compatible type */
+ switch (heap) {
+ case ZINK_HEAP_DEVICE_LOCAL_VISIBLE:
+ heap = ZINK_HEAP_DEVICE_LOCAL;
+ break;
+ case ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED:
+ heap = ZINK_HEAP_HOST_VISIBLE_COHERENT;
+ break;
+ default:
+ break;
+ }
+ assert(zink_mem_type_idx_from_types(screen, heap, reqs->memoryTypeBits) != UINT32_MAX);
+ }
+
+ while (1) {
+ /* iterate over all available memory types to reduce chance of oom */
+ for (unsigned i = 0; !obj->bo && i < screen->heap_count[heap]; i++) {
+ if (!(reqs->memoryTypeBits & BITFIELD_BIT(screen->heap_map[heap][i])))
+ continue;
+
+ mai.memoryTypeIndex = screen->heap_map[heap][i];
+ obj->bo = zink_bo(zink_bo_create(screen, reqs->size, alignment, heap, mai.pNext ? ZINK_ALLOC_NO_SUBALLOC : 0, mai.memoryTypeIndex, mai.pNext));
+ }
+
+ if (obj->bo || heap != ZINK_HEAP_DEVICE_LOCAL_VISIBLE)
+ break;
+
+ /* demote BAR allocations to a different heap on failure to avoid oom */
+ if (templ->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT || templ->usage == PIPE_USAGE_DYNAMIC)
+ heap = ZINK_HEAP_HOST_VISIBLE_COHERENT;
+ else
+ heap = ZINK_HEAP_DEVICE_LOCAL;
+ };
+
+ return obj->bo ? roc_success : roc_fail_and_cleanup_object;
+}
+
+static inline bool
+update_alloc_info_flags(struct zink_screen *screen, const struct pipe_resource *templ,
+ VkMemoryRequirements *reqs, struct mem_alloc_info *alloc_info)
+{
+ if (templ->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT || templ->usage == PIPE_USAGE_DYNAMIC)
+ alloc_info->flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+ else if (!(alloc_info->flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
+ templ->usage == PIPE_USAGE_STAGING)
+ alloc_info->flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+
+ if (templ->bind & ZINK_BIND_TRANSIENT)
+ alloc_info->flags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
+
+ if (alloc_info->user_mem) {
+ VkExternalMemoryHandleTypeFlagBits handle_type = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+ VkMemoryHostPointerPropertiesEXT memory_host_pointer_properties = {0};
+ memory_host_pointer_properties.sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT;
+ memory_host_pointer_properties.pNext = NULL;
+ VkResult res = VKSCR(GetMemoryHostPointerPropertiesEXT)(screen->dev, handle_type, alloc_info->user_mem, &memory_host_pointer_properties);
+ if (res != VK_SUCCESS) {
+ mesa_loge("ZINK: vkGetMemoryHostPointerPropertiesEXT failed");
+ return false;
+ }
+ reqs->memoryTypeBits &= memory_host_pointer_properties.memoryTypeBits;
+ alloc_info->flags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ }
+
+ alloc_info->aflags = templ->flags & PIPE_RESOURCE_FLAG_SPARSE ? ZINK_ALLOC_SPARSE : 0;
+ return true;
+}
+
+static inline void
+update_obj_info(struct zink_screen *screen, struct zink_resource_object *obj,
+ const struct pipe_resource *templ, struct mem_alloc_info *alloc_info)
+{
+ if (alloc_info->aflags == ZINK_ALLOC_SPARSE) {
obj->size = templ->width0;
} else {
obj->offset = zink_bo_get_offset(obj->bo);
obj->size = zink_bo_get_size(obj->bo);
}
- if (templ->target == PIPE_BUFFER) {
- if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE))
- if (VKSCR(BindBufferMemory)(screen->dev, obj->buffer, zink_bo_get_mem(obj->bo), obj->offset) != VK_SUCCESS)
- goto fail3;
+ obj->coherent = screen->info.mem_props.memoryTypes[obj->bo->base.base.placement].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+ if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE)) {
+ obj->host_visible = screen->info.mem_props.memoryTypes[obj->bo->base.base.placement].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ }
+}
+
+static inline void
+debug_resource_mem(struct zink_resource_object *obj, const struct pipe_resource *templ, struct zink_screen *screen)
+{
+ char buf[4096];
+ unsigned idx = 0;
+ if (obj->is_buffer) {
+ size_t size = (size_t)DIV_ROUND_UP(obj->size, 1024);
+ if (templ->bind == PIPE_BIND_QUERY_BUFFER && templ->usage == PIPE_USAGE_STAGING) //internal qbo
+ idx += snprintf(buf, sizeof(buf), "QBO(%zu)", size);
+ else
+ idx += snprintf(buf, sizeof(buf), "BUF(%zu)", size);
+ } else {
+ idx += snprintf(buf, sizeof(buf), "IMG(%s:%ux%ux%u)", util_format_short_name(templ->format), templ->width0, templ->height0, templ->depth0);
+ }
+ /*
+ zink_vkflags_func flag_func = obj->is_buffer ? (zink_vkflags_func)vk_BufferCreateFlagBits_to_str : (zink_vkflags_func)vk_ImageCreateFlagBits_to_str;
+ zink_vkflags_func usage_func = obj->is_buffer ? (zink_vkflags_func)vk_BufferUsageFlagBits_to_str : (zink_vkflags_func)vk_ImageUsageFlagBits_to_str;
+ if (obj->vkflags) {
+ buf[idx++] = '[';
+ idx += zink_string_vkflags_unroll(&buf[idx], sizeof(buf) - idx, obj->vkflags, flag_func);
+ buf[idx++] = ']';
+ }
+ if (obj->vkusage) {
+ buf[idx++] = '[';
+ idx += zink_string_vkflags_unroll(&buf[idx], sizeof(buf) - idx, obj->vkusage, usage_func);
+ buf[idx++] = ']';
+ }
+ */
+ buf[idx] = 0;
+ obj->bo->name = zink_debug_mem_add(screen, obj->size, buf);
+}
+
+static inline enum resource_object_create_result
+allocate_bo_and_update_obj(struct zink_screen *screen, const struct pipe_resource *templ,
+ VkMemoryRequirements *reqs, struct zink_resource_object *obj,
+ struct mem_alloc_info *alloc_info)
+{
+ if (!update_alloc_info_flags(screen, templ, reqs, alloc_info))
+ return roc_fail_and_free_object;
+
+ enum resource_object_create_result retval = allocate_bo(screen, templ, reqs, obj, alloc_info);
+ assert(retval != roc_success_early_return);
+ if (retval != roc_success)
+ return retval;
+
+ update_obj_info(screen, obj, templ, alloc_info);
+
+ if (zink_debug & ZINK_DEBUG_MEM)
+ debug_resource_mem(obj, templ, screen);
+ return roc_success;
+}
+
+static inline enum resource_object_create_result
+create_buffer(struct zink_screen *screen, struct zink_resource_object *obj,
+ const struct pipe_resource *templ, uint64_t *modifiers,
+ int modifiers_count, struct mem_alloc_info *alloc_info)
+{
+ VkBufferCreateInfo bci = create_bci(screen, templ, templ->bind);
+ VkExternalMemoryBufferCreateInfo embci;
+ VkMemoryRequirements reqs = {0};
+
+ embci.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO;
+ if (alloc_info->external) {
+ embci.pNext = bci.pNext;
+ embci.handleTypes = alloc_info->export_types;
+ bci.pNext = &embci;
+ }
+
+ if (VKSCR(CreateBuffer)(screen->dev, &bci, NULL, &obj->buffer) != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateBuffer failed");
+ return roc_fail_and_free_object;
+ }
+
+ if (!(templ->bind & (PIPE_BIND_SHADER_IMAGE | ZINK_BIND_DESCRIPTOR))) {
+ bci.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
+ if (VKSCR(CreateBuffer)(screen->dev, &bci, NULL, &obj->storage_buffer) != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateBuffer failed");
+ VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL);
+ return roc_fail_and_free_object;
+ }
+ }
+
+ if (modifiers_count) {
+ assert(modifiers_count == 3);
+ /* this is the DGC path because there's no other way to pass mem bits and I don't wanna copy/paste everything around */
+ reqs.size = modifiers[0];
+ reqs.alignment = modifiers[1];
+ reqs.memoryTypeBits = modifiers[2];
} else {
- if (VKSCR(BindImageMemory)(screen->dev, obj->image, zink_bo_get_mem(obj->bo), obj->offset) != VK_SUCCESS)
- goto fail3;
+ VKSCR(GetBufferMemoryRequirements)(screen->dev, obj->buffer, &reqs);
}
- return obj;
-fail3:
- zink_bo_unref(screen, obj->bo);
+ if (templ->usage == PIPE_USAGE_STAGING)
+ alloc_info->flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ else if (templ->usage == PIPE_USAGE_STREAM)
+ alloc_info->flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ else if (templ->usage == PIPE_USAGE_IMMUTABLE)
+ alloc_info->flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ else
+ alloc_info->flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+
+ obj->is_buffer = true;
+ obj->transfer_dst = true;
+ obj->vkflags = bci.flags;
+ obj->vkusage = bci.usage;
+
+ enum resource_object_create_result retval = allocate_bo_and_update_obj(screen, templ, &reqs, obj, alloc_info);
+ assert(retval != roc_success_early_return);
+ if (retval != roc_success)
+ return retval;
+
+ if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE)) {
+ if (VKSCR(BindBufferMemory)(screen->dev, obj->buffer, zink_bo_get_mem(obj->bo), obj->offset) != VK_SUCCESS) {
+ mesa_loge("ZINK: vkBindBufferMemory failed");
+ return roc_fail_and_cleanup_all ;
+ }
+ if (obj->storage_buffer && VKSCR(BindBufferMemory)(screen->dev, obj->storage_buffer, zink_bo_get_mem(obj->bo), obj->offset) != VK_SUCCESS) {
+ mesa_loge("ZINK: vkBindBufferMemory failed");
+ return roc_fail_and_cleanup_all;
+ }
+ }
+ return roc_success;
+}
-fail2:
- if (templ->target == PIPE_BUFFER)
- VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL);
+static inline enum resource_object_create_result
+create_image(struct zink_screen *screen, struct zink_resource_object *obj,
+ const struct pipe_resource *templ, bool *linear,
+ uint64_t *modifiers, int modifiers_count,
+ struct mem_alloc_info *alloc_info)
+{
+ VkMemoryRequirements reqs = {0};
+ bool winsys_modifier = (alloc_info->export_types & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT) &&
+ alloc_info->whandle &&
+ alloc_info->whandle->modifier != DRM_FORMAT_MOD_INVALID;
+ uint64_t *ici_modifiers = winsys_modifier ? &alloc_info->whandle->modifier : modifiers;
+ unsigned ici_modifier_count = winsys_modifier ? 1 : modifiers_count;
+ VkImageCreateInfo ici;
+ enum pipe_format srgb = PIPE_FORMAT_NONE;
+ /* we often need to be able to mutate between srgb and linear, but we don't need general
+ * image view/shader image format compatibility (that path means losing fast clears or compression on some hardware).
+ */
+ if (!(templ->bind & ZINK_BIND_MUTABLE)) {
+ srgb = util_format_is_srgb(templ->format) ? util_format_linear(templ->format) : util_format_srgb(templ->format);
+ /* why do these helpers have different default return values? */
+ if (srgb == templ->format)
+ srgb = PIPE_FORMAT_NONE;
+ }
+ VkFormat formats[2];
+ VkImageFormatListCreateInfo format_list;
+ if (srgb) {
+ formats[0] = zink_get_format(screen, templ->format);
+ formats[1] = zink_get_format(screen, srgb);
+ /* only use format list if both formats have supported vk equivalents */
+ if (formats[0] && formats[1]) {
+ format_list.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO;
+ format_list.pNext = NULL;
+ format_list.viewFormatCount = 2;
+ format_list.pViewFormats = formats;
+ ici.pNext = &format_list;
+ } else {
+ ici.pNext = NULL;
+ }
+ } else {
+ ici.pNext = NULL;
+ }
+ init_ici(screen, &ici, templ, templ->bind, ici_modifier_count);
+
+ bool success = false;
+ uint64_t mod = eval_ici(screen, &ici, templ, templ->bind, ici_modifier_count, ici_modifiers, &success);
+ if (ici.format == VK_FORMAT_A8_UNORM_KHR && !success) {
+ ici.format = zink_get_format(screen, zink_format_get_emulated_alpha(templ->format));
+ mod = eval_ici(screen, &ici, templ, templ->bind, ici_modifier_count, ici_modifiers, &success);
+ }
+ if (!success)
+ return roc_fail_and_free_object;
+
+ if (ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && srgb &&
+ util_format_get_nr_components(srgb) == 4 &&
+ !(ici.flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) {
+ mesa_loge("zink: refusing to create possibly-srgb dmabuf due to missing driver support: %s not supported!", util_format_name(srgb));
+ return roc_fail_and_free_object;
+ }
+ VkExternalMemoryImageCreateInfo emici;
+ VkImageDrmFormatModifierExplicitCreateInfoEXT idfmeci;
+ VkImageDrmFormatModifierListCreateInfoEXT idfmlci;
+ VkSubresourceLayout plane_layouts[4];
+ VkSubresourceLayout plane_layout = {
+ .offset = alloc_info->whandle ? alloc_info->whandle->offset : 0,
+ .size = 0,
+ .rowPitch = alloc_info->whandle ? alloc_info->whandle->stride : 0,
+ .arrayPitch = 0,
+ .depthPitch = 0,
+ };
+
+ obj->render_target = (ici.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) != 0;
+
+ if (alloc_info->shared || alloc_info->external) {
+ emici.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
+ emici.pNext = ici.pNext;
+ emici.handleTypes = alloc_info->export_types;
+ ici.pNext = &emici;
+
+ assert(ici.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT || mod != DRM_FORMAT_MOD_INVALID);
+ if (alloc_info->whandle && ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+ assert(mod == alloc_info->whandle->modifier || !winsys_modifier);
+ idfmeci.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT;
+ idfmeci.pNext = ici.pNext;
+ idfmeci.drmFormatModifier = mod;
+ idfmeci.drmFormatModifierPlaneCount = obj->plane_count;
+
+ plane_layouts[0] = plane_layout;
+ struct pipe_resource *pnext = templ->next;
+ for (unsigned i = 1; i < obj->plane_count; i++, pnext = pnext->next) {
+ struct zink_resource *next = zink_resource(pnext);
+ obj->plane_offsets[i] = plane_layouts[i].offset = next->obj->plane_offsets[i];
+ obj->plane_strides[i] = plane_layouts[i].rowPitch = next->obj->plane_strides[i];
+ plane_layouts[i].size = 0;
+ plane_layouts[i].arrayPitch = 0;
+ plane_layouts[i].depthPitch = 0;
+ }
+ idfmeci.pPlaneLayouts = plane_layouts;
+
+ ici.pNext = &idfmeci;
+ } else if (ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+ idfmlci.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;
+ idfmlci.pNext = ici.pNext;
+ idfmlci.drmFormatModifierCount = modifiers_count;
+ idfmlci.pDrmFormatModifiers = modifiers;
+ ici.pNext = &idfmlci;
+ } else if (ici.tiling == VK_IMAGE_TILING_OPTIMAL) {
+ alloc_info->shared = false;
+ }
+ } else if (alloc_info->user_mem) {
+ emici.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
+ emici.pNext = ici.pNext;
+ emici.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+ ici.pNext = &emici;
+ }
+
+ if (linear)
+ *linear = ici.tiling == VK_IMAGE_TILING_LINEAR;
+
+ if (ici.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT)
+ obj->transfer_dst = true;
+
+#if defined(ZINK_USE_DMABUF) && !defined(_WIN32)
+ if (obj->is_aux) {
+ obj->modifier = mod;
+ obj->modifier_aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT << alloc_info->whandle->plane;
+ obj->plane_offsets[alloc_info->whandle->plane] = alloc_info->whandle->offset;
+ obj->plane_strides[alloc_info->whandle->plane] = alloc_info->whandle->stride;
+ obj->handle = os_dupfd_cloexec(alloc_info->whandle->handle);
+ if (obj->handle < 0) {
+ mesa_loge("ZINK: failed to dup dmabuf fd: %s\n", strerror(errno));
+ return roc_fail_and_free_object;
+ }
+ return roc_success_early_return;
+ }
+#endif
+
+ obj->vkfeats = get_format_feature_flags(ici, screen, templ);;
+ if (util_format_is_yuv(templ->format)) {
+ if (!create_sampler_conversion(ici, screen, obj))
+ return roc_fail_and_free_object;
+ } else if (alloc_info->whandle) {
+ obj->plane_strides[alloc_info->whandle->plane] = alloc_info->whandle->stride;
+ }
+
+ VkResult result = VKSCR(CreateImage)(screen->dev, &ici, NULL, &obj->image);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateImage failed (%s)", vk_Result_to_str(result));
+ return roc_fail_and_free_object;
+ }
+
+ if (ici.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+ VkImageDrmFormatModifierPropertiesEXT modprops = {0};
+ modprops.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT;
+ result = VKSCR(GetImageDrmFormatModifierPropertiesEXT)(screen->dev, obj->image, &modprops);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkGetImageDrmFormatModifierPropertiesEXT failed");
+ return roc_fail_and_free_object;
+ }
+ obj->modifier = modprops.drmFormatModifier;
+ unsigned num_dmabuf_planes = screen->base.get_dmabuf_modifier_planes(&screen->base, obj->modifier, templ->format);
+ obj->modifier_aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT;
+ if (num_dmabuf_planes > 1)
+ obj->modifier_aspect |= VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT;
+ if (num_dmabuf_planes > 2)
+ obj->modifier_aspect |= VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
+ if (num_dmabuf_planes > 3)
+ obj->modifier_aspect |= VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT;
+ assert(num_dmabuf_planes <= 4);
+ }
+
+ unsigned num_planes = util_format_get_num_planes(templ->format);
+ alloc_info->need_dedicated = get_image_memory_requirement(screen, obj, num_planes, &reqs);
+ if (templ->usage == PIPE_USAGE_STAGING && ici.tiling == VK_IMAGE_TILING_LINEAR)
+ alloc_info->flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
else
- VKSCR(DestroyImage)(screen->dev, obj->image, NULL);
-fail1:
- FREE(obj);
- return NULL;
+ alloc_info->flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+
+ obj->vkflags = ici.flags;
+ obj->vkusage = ici.usage;
+
+ enum resource_object_create_result retval = allocate_bo_and_update_obj(screen, templ, &reqs, obj, alloc_info);
+ assert(retval != roc_success_early_return);
+ if (retval != roc_success)
+ return retval;
+
+ if (num_planes > 1) {
+ VkBindImageMemoryInfo infos[3];
+ VkBindImagePlaneMemoryInfo planes[3];
+ for (unsigned i = 0; i < num_planes; i++) {
+ infos[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
+ infos[i].image = obj->image;
+ infos[i].memory = zink_bo_get_mem(obj->bo);
+ infos[i].memoryOffset = obj->plane_offsets[i];
+ if (templ->bind & ZINK_BIND_VIDEO) {
+ infos[i].pNext = &planes[i];
+ planes[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
+ planes[i].pNext = NULL;
+ planes[i].planeAspect = plane_aspects[i];
+ }
+ }
+ if (VKSCR(BindImageMemory2)(screen->dev, num_planes, infos) != VK_SUCCESS) {
+ mesa_loge("ZINK: vkBindImageMemory2 failed");
+ return roc_fail_and_cleanup_all;
+ }
+ } else {
+ if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE))
+ if (VKSCR(BindImageMemory)(screen->dev, obj->image, zink_bo_get_mem(obj->bo), obj->offset) != VK_SUCCESS) {
+ mesa_loge("ZINK: vkBindImageMemory failed");
+ return roc_fail_and_cleanup_all;
+ }
+ }
+
+ return roc_success;
+}
+
+static struct zink_resource_object *
+resource_object_create(struct zink_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle, bool *linear,
+ uint64_t *modifiers, int modifiers_count, const void *loader_private, const void *user_mem)
+{
+ struct zink_resource_object *obj = CALLOC_STRUCT(zink_resource_object);
+ unsigned max_level = 0;
+ if (!obj)
+ return NULL;
+ simple_mtx_init(&obj->view_lock, mtx_plain);
+ util_dynarray_init(&obj->views, NULL);
+ u_rwlock_init(&obj->copy_lock);
+ obj->unordered_read = true;
+ obj->unordered_write = true;
+ obj->unsync_access = true;
+ obj->last_dt_idx = obj->dt_idx = UINT32_MAX; //TODO: unionize
+
+ struct mem_alloc_info alloc_info = {
+ .whandle = whandle,
+ .need_dedicated = false,
+ .export_types = ZINK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_BIT,
+ .shared = templ->bind & PIPE_BIND_SHARED,
+ .user_mem = user_mem
+ };
+
+ /* figure out aux plane count */
+ if (whandle && whandle->plane >= util_format_get_num_planes(whandle->format))
+ obj->is_aux = true;
+ struct pipe_resource *pnext = templ->next;
+ for (obj->plane_count = 1; pnext; obj->plane_count++, pnext = pnext->next) {
+ struct zink_resource *next = zink_resource(pnext);
+ if (!next->obj->is_aux)
+ break;
+ }
+
+ if (!get_export_flags(screen, templ, &alloc_info)) {
+ /* can't export anything, fail early */
+ return NULL;
+ }
+
+ pipe_reference_init(&obj->reference, 1);
+ if (loader_private) {
+ obj->bo = CALLOC_STRUCT(zink_bo);
+ if (!obj->bo) {
+ mesa_loge("ZINK: failed to allocate obj->bo!");
+ return NULL;
+ }
+
+ obj->transfer_dst = true;
+ return obj;
+ }
+
+ enum resource_object_create_result create_result;
+ if (templ->target == PIPE_BUFFER) {
+ max_level = 1;
+ create_result = create_buffer(screen, obj, templ, modifiers, modifiers_count, &alloc_info);
+ } else {
+ max_level = templ->last_level + 1;
+ create_result = create_image(screen, obj, templ, linear, modifiers, modifiers_count,
+ &alloc_info);
+ }
+
+ switch (create_result) {
+ case roc_success:
+ for (unsigned i = 0; i < max_level; i++)
+ util_dynarray_init(&obj->copies[i], NULL);
+ FALLTHROUGH;
+ case roc_success_early_return:
+ return obj;
+
+ case roc_fail_and_cleanup_all:
+ zink_bo_unref(screen, obj->bo);
+ FALLTHROUGH;
+ case roc_fail_and_cleanup_object:
+ if (templ->target == PIPE_BUFFER) {
+ VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL);
+ VKSCR(DestroyBuffer)(screen->dev, obj->storage_buffer, NULL);
+ } else
+ VKSCR(DestroyImage)(screen->dev, obj->image, NULL);
+ FALLTHROUGH;
+ case roc_fail_and_free_object:
+ FREE(obj);
+ return NULL;
+ default:
+ unreachable("Invalid create object result code");
+ }
}
static struct pipe_resource *
@@ -718,53 +1560,52 @@ resource_create(struct pipe_screen *pscreen,
const struct pipe_resource *templ,
struct winsys_handle *whandle,
unsigned external_usage,
- const uint64_t *modifiers, int modifiers_count)
+ const uint64_t *modifiers, int modifiers_count,
+ const void *loader_private, const void *user_mem)
{
struct zink_screen *screen = zink_screen(pscreen);
- struct zink_resource *res = CALLOC_STRUCT(zink_resource);
+ struct zink_resource *res = CALLOC_STRUCT_CL(zink_resource);
+
+ if (!res) {
+ mesa_loge("ZINK: failed to allocate res!");
+ return NULL;
+ }
- if (modifiers_count > 0) {
+ if (modifiers_count > 0 && screen->info.have_EXT_image_drm_format_modifier) {
/* for rebinds */
res->modifiers_count = modifiers_count;
res->modifiers = mem_dup(modifiers, modifiers_count * sizeof(uint64_t));
if (!res->modifiers) {
- FREE(res);
+ FREE_CL(res);
return NULL;
}
- /* TODO: remove this when multi-plane modifiers are supported */
- const struct zink_modifier_prop *prop = &screen->modifier_props[templ->format];
- for (unsigned i = 0; i < modifiers_count; i++) {
- for (unsigned j = 0; j < prop->drmFormatModifierCount; j++) {
- if (prop->pDrmFormatModifierProperties[j].drmFormatModifier == modifiers[i]) {
- if (prop->pDrmFormatModifierProperties[j].drmFormatModifierPlaneCount != 1)
- res->modifiers[i] = DRM_FORMAT_MOD_INVALID;
- break;
- }
- }
- }
}
res->base.b = *templ;
- threaded_resource_init(&res->base.b);
+ bool allow_cpu_storage = (templ->target == PIPE_BUFFER) &&
+ (templ->width0 < 0x1000);
+ threaded_resource_init(&res->base.b, allow_cpu_storage);
pipe_reference_init(&res->base.b.reference, 1);
res->base.b.screen = pscreen;
- bool optimal_tiling = false;
+ bool linear = false;
struct pipe_resource templ2 = *templ;
- unsigned scanout_flags = templ->bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED);
- if (!(templ->bind & PIPE_BIND_LINEAR))
- templ2.bind &= ~scanout_flags;
- res->obj = resource_object_create(screen, &templ2, whandle, &optimal_tiling, NULL, 0);
+ if (templ2.flags & PIPE_RESOURCE_FLAG_SPARSE &&
+ (util_res_sample_count(templ) == 1 || screen->info.feats.features.shaderStorageImageMultisample))
+ templ2.bind |= PIPE_BIND_SHADER_IMAGE;
+ res->obj = resource_object_create(screen, &templ2, whandle, &linear, res->modifiers, res->modifiers_count, loader_private, user_mem);
if (!res->obj) {
free(res->modifiers);
- FREE(res);
+ FREE_CL(res);
return NULL;
}
+ res->queue = VK_QUEUE_FAMILY_IGNORED;
res->internal_format = templ->format;
if (templ->target == PIPE_BUFFER) {
util_range_init(&res->valid_buffer_range);
+ res->base.b.bind |= PIPE_BIND_SHADER_IMAGE;
if (!screen->resizable_bar && templ->width0 >= 8196) {
/* We don't want to evict buffers from VRAM by mapping them for CPU access,
* because they might never be moved back again. If a buffer is large enough,
@@ -775,39 +1616,90 @@ resource_create(struct pipe_screen *pscreen,
*/
res->base.b.flags |= PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY;
}
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB || zink_debug & ZINK_DEBUG_DGC)
+ zink_resource_get_address(screen, res);
} else {
+ if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
+ res->base.b.bind |= PIPE_BIND_SHADER_IMAGE;
+ if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) {
+ uint32_t count = 1;
+ VKSCR(GetImageSparseMemoryRequirements)(screen->dev, res->obj->image, &count, &res->sparse);
+ res->base.b.nr_sparse_levels = res->sparse.imageMipTailFirstLod;
+ }
res->format = zink_get_format(screen, templ->format);
- res->dmabuf_acquire = whandle && whandle->type == WINSYS_HANDLE_TYPE_FD;
- res->layout = res->dmabuf_acquire ? VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED;
- res->optimal_tiling = optimal_tiling;
+ if (templ->target == PIPE_TEXTURE_1D || templ->target == PIPE_TEXTURE_1D_ARRAY) {
+ res->need_2D = (screen->need_2D_zs && util_format_is_depth_or_stencil(templ->format)) ||
+ (screen->need_2D_sparse && (templ->flags & PIPE_RESOURCE_FLAG_SPARSE));
+ }
+ res->dmabuf = whandle && whandle->type == WINSYS_HANDLE_TYPE_FD;
+ if (res->dmabuf)
+ res->queue = VK_QUEUE_FAMILY_FOREIGN_EXT;
+ res->layout = res->dmabuf ? VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED;
+ res->linear = linear;
res->aspect = aspect_from_format(templ->format);
- if (scanout_flags && optimal_tiling) {
- // TODO: remove for wsi
- templ2 = res->base.b;
- templ2.bind = scanout_flags | PIPE_BIND_LINEAR;
- res->scanout_obj = resource_object_create(screen, &templ2, whandle, &optimal_tiling, res->modifiers, res->modifiers_count);
- assert(!optimal_tiling);
+ }
+
+ if (loader_private) {
+ if (templ->bind & PIPE_BIND_DISPLAY_TARGET) {
+ /* backbuffer */
+ res->obj->dt = zink_kopper_displaytarget_create(screen,
+ res->base.b.bind,
+ res->base.b.format,
+ templ->width0,
+ templ->height0,
+ 64, loader_private,
+ &res->dt_stride);
+ if (!res->obj->dt) {
+ mesa_loge("zink: could not create swapchain");
+ FREE(res->obj);
+ free(res->modifiers);
+ FREE_CL(res);
+ return NULL;
+ }
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ if (cdt->swapchain->num_acquires) {
+ /* this should be a reused swapchain after a MakeCurrent dance that deleted the original resource */
+ for (unsigned i = 0; i < cdt->swapchain->num_images; i++) {
+ if (!cdt->swapchain->images[i].acquired)
+ continue;
+ res->obj->dt_idx = i;
+ res->obj->image = cdt->swapchain->images[i].image;
+ res->layout = cdt->swapchain->images[i].layout;
+ }
+ }
+ } else {
+ /* frontbuffer */
+ struct zink_resource *back = (void*)loader_private;
+ struct kopper_displaytarget *cdt = back->obj->dt;
+ cdt->refcount++;
+ assert(back->obj->dt);
+ res->obj->dt = back->obj->dt;
}
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ if (zink_kopper_has_srgb(cdt))
+ res->obj->vkflags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ if (cdt->swapchain->scci.flags == VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR)
+ res->obj->vkflags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
+ res->obj->vkusage = cdt->swapchain->scci.imageUsage;
+ res->base.b.bind |= PIPE_BIND_DISPLAY_TARGET;
+ res->linear = false;
+ res->swapchain = true;
}
- if (screen->winsys && (templ->bind & PIPE_BIND_DISPLAY_TARGET)) {
- struct sw_winsys *winsys = screen->winsys;
- res->dt = winsys->displaytarget_create(screen->winsys,
- res->base.b.bind,
- res->base.b.format,
- templ->width0,
- templ->height0,
- 64, NULL,
- &res->dt_stride);
+ if (!res->obj->host_visible) {
+ res->base.b.flags |= PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY;
+ res->base.allow_cpu_storage = false;
}
if (res->obj->is_buffer) {
res->base.buffer_id_unique = util_idalloc_mt_alloc(&screen->buffer_ids);
- _mesa_hash_table_init(&res->bufferview_cache, screen, NULL, equals_bvci);
+ _mesa_hash_table_init(&res->bufferview_cache, NULL, NULL, equals_bvci);
simple_mtx_init(&res->bufferview_mtx, mtx_plain);
} else {
- _mesa_hash_table_init(&res->surface_cache, screen, NULL, equals_ivci);
+ _mesa_hash_table_init(&res->surface_cache, NULL, NULL, equals_ivci);
simple_mtx_init(&res->surface_mtx, mtx_plain);
}
+ if (res->obj->exportable)
+ res->base.b.bind |= ZINK_BIND_DMABUF;
return &res->base.b;
}
@@ -815,14 +1707,68 @@ static struct pipe_resource *
zink_resource_create(struct pipe_screen *pscreen,
const struct pipe_resource *templ)
{
- return resource_create(pscreen, templ, NULL, 0, NULL, 0);
+ return resource_create(pscreen, templ, NULL, 0, NULL, 0, NULL, NULL);
}
static struct pipe_resource *
zink_resource_create_with_modifiers(struct pipe_screen *pscreen, const struct pipe_resource *templ,
const uint64_t *modifiers, int modifiers_count)
{
- return resource_create(pscreen, templ, NULL, 0, modifiers, modifiers_count);
+ return resource_create(pscreen, templ, NULL, 0, modifiers, modifiers_count, NULL, NULL);
+}
+
+static struct pipe_resource *
+zink_resource_create_drawable(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ,
+ const void *loader_private)
+{
+ return resource_create(pscreen, templ, NULL, 0, NULL, 0, loader_private, NULL);
+}
+
+static bool
+add_resource_bind(struct zink_context *ctx, struct zink_resource *res, unsigned bind)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ assert((res->base.b.bind & bind) == 0);
+ res->base.b.bind |= bind;
+ struct zink_resource_object *old_obj = res->obj;
+ if (bind & ZINK_BIND_DMABUF && !res->modifiers_count && screen->info.have_EXT_image_drm_format_modifier) {
+ res->modifiers_count = 1;
+ res->modifiers = malloc(res->modifiers_count * sizeof(uint64_t));
+ if (!res->modifiers) {
+ mesa_loge("ZINK: failed to allocate res->modifiers!");
+ return false;
+ }
+
+ res->modifiers[0] = DRM_FORMAT_MOD_LINEAR;
+ }
+ struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, &res->linear, res->modifiers, res->modifiers_count, NULL, NULL);
+ if (!new_obj) {
+ debug_printf("new backing resource alloc failed!\n");
+ res->base.b.bind &= ~bind;
+ return false;
+ }
+ struct zink_resource staging = *res;
+ staging.obj = old_obj;
+ staging.all_binds = 0;
+ res->layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ res->obj = new_obj;
+ res->queue = VK_QUEUE_FAMILY_IGNORED;
+ for (unsigned i = 0; i <= res->base.b.last_level; i++) {
+ struct pipe_box box;
+ u_box_3d(0, 0, 0,
+ u_minify(res->base.b.width0, i),
+ u_minify(res->base.b.height0, i), res->base.b.array_size, &box);
+ box.depth = util_num_layers(&res->base.b, i);
+ ctx->base.resource_copy_region(&ctx->base, &res->base.b, i, 0, 0, 0, &staging.base.b, i, &box);
+ }
+ if (old_obj->exportable) {
+ simple_mtx_lock(&ctx->batch.state->exportable_lock);
+ _mesa_set_remove_key(&ctx->batch.state->dmabuf_exports, &staging);
+ simple_mtx_unlock(&ctx->batch.state->exportable_lock);
+ }
+ zink_resource_object_reference(screen, &old_obj, NULL);
+ return true;
}
static bool
@@ -837,14 +1783,37 @@ zink_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
{
struct zink_screen *screen = zink_screen(pscreen);
struct zink_resource *res = zink_resource(pres);
- //TODO: remove for wsi
- struct zink_resource_object *obj = res->scanout_obj ? res->scanout_obj : res->obj;
- VkImageAspectFlags aspect = obj->modifier_aspect ? obj->modifier_aspect : res->aspect;
+ struct zink_resource_object *obj = res->obj;
struct winsys_handle whandle;
+ VkImageAspectFlags aspect;
+ if (obj->modifier_aspect) {
+ switch (plane) {
+ case 0:
+ aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT;
+ break;
+ case 1:
+ aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT;
+ break;
+ case 2:
+ aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
+ break;
+ case 3:
+ aspect = VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT;
+ break;
+ default:
+ unreachable("how many planes you got in this thing?");
+ }
+ } else if (res->obj->sampler_conversion) {
+ aspect = VK_IMAGE_ASPECT_PLANE_0_BIT;
+ } else {
+ aspect = res->aspect;
+ }
switch (param) {
case PIPE_RESOURCE_PARAM_NPLANES:
- /* not yet implemented */
- *value = 1;
+ if (screen->info.have_EXT_image_drm_format_modifier)
+ *value = screen->base.get_dmabuf_modifier_planes(&screen->base, obj->modifier, res->internal_format);
+ else
+ *value = 1;
break;
case PIPE_RESOURCE_PARAM_STRIDE: {
@@ -872,16 +1841,7 @@ zink_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
}
case PIPE_RESOURCE_PARAM_MODIFIER: {
- *value = DRM_FORMAT_MOD_INVALID;
- if (!screen->info.have_EXT_image_drm_format_modifier)
- return false;
- if (!res->modifiers)
- return false;
- VkImageDrmFormatModifierPropertiesEXT prop;
- prop.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT;
- prop.pNext = NULL;
- if (VKSCR(GetImageDrmFormatModifierPropertiesEXT)(screen->dev, obj->image, &prop) == VK_SUCCESS)
- *value = prop.drmFormatModifier;
+ *value = obj->modifier;
break;
}
@@ -900,13 +1860,15 @@ zink_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
break;
}
- case PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED:
+ return false;
case PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS:
+ case PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED:
case PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD: {
+#ifdef ZINK_USE_DMABUF
memset(&whandle, 0, sizeof(whandle));
if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED)
whandle.type = WINSYS_HANDLE_TYPE_SHARED;
- else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS)
+ if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS)
whandle.type = WINSYS_HANDLE_TYPE_KMS;
else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD)
whandle.type = WINSYS_HANDLE_TYPE_FD;
@@ -914,8 +1876,16 @@ zink_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
if (!pscreen->resource_get_handle(pscreen, pctx, pres, &whandle, handle_usage))
return false;
+#ifdef _WIN32
+ *value = (uintptr_t)whandle.handle;
+#else
*value = whandle.handle;
+#endif
break;
+#else
+ (void)whandle;
+ return false;
+#endif
}
}
return true;
@@ -928,34 +1898,76 @@ zink_resource_get_handle(struct pipe_screen *pscreen,
struct winsys_handle *whandle,
unsigned usage)
{
+ if (tex->target == PIPE_BUFFER)
+ tc_buffer_disable_cpu_storage(tex);
if (whandle->type == WINSYS_HANDLE_TYPE_FD || whandle->type == WINSYS_HANDLE_TYPE_KMS) {
#ifdef ZINK_USE_DMABUF
struct zink_resource *res = zink_resource(tex);
struct zink_screen *screen = zink_screen(pscreen);
- //TODO: remove for wsi
- struct zink_resource_object *obj = res->scanout_obj ? res->scanout_obj : res->obj;
+ struct zink_resource_object *obj = res->obj;
+
+#if !defined(_WIN32)
+ if (whandle->type == WINSYS_HANDLE_TYPE_KMS && screen->drm_fd == -1) {
+ whandle->handle = -1;
+ } else {
+ if (!res->obj->exportable) {
+ assert(!zink_resource_usage_is_unflushed(res));
+ if (!screen->info.have_EXT_image_drm_format_modifier) {
+ static bool warned = false;
+ warn_missing_feature(warned, "EXT_image_drm_format_modifier");
+ return false;
+ }
+ unsigned bind = ZINK_BIND_DMABUF;
+ if (!(res->base.b.bind & PIPE_BIND_SHARED))
+ bind |= PIPE_BIND_SHARED;
+ zink_screen_lock_context(screen);
+ if (!add_resource_bind(screen->copy_context, res, bind)) {
+ zink_screen_unlock_context(screen);
+ return false;
+ }
+ if (res->all_binds)
+ p_atomic_inc(&screen->image_rebind_counter);
+ screen->copy_context->base.flush(&screen->copy_context->base, NULL, 0);
+ zink_screen_unlock_context(screen);
+ obj = res->obj;
+ }
- VkMemoryGetFdInfoKHR fd_info = {0};
- int fd;
- fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
+ VkMemoryGetFdInfoKHR fd_info = {0};
+ int fd;
+ fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
+ fd_info.memory = zink_bo_get_mem(obj->bo);
+ if (whandle->type == WINSYS_HANDLE_TYPE_FD)
+ fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ else
+ fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+ VkResult result = VKSCR(GetMemoryFdKHR)(screen->dev, &fd_info, &fd);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkGetMemoryFdKHR failed");
+ return false;
+ }
+ if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
+ uint32_t h;
+ bool ret = zink_bo_get_kms_handle(screen, obj->bo, fd, &h);
+ close(fd);
+ if (!ret)
+ return false;
+ fd = h;
+ }
+
+ whandle->handle = fd;
+ }
+#else
+ VkMemoryGetWin32HandleInfoKHR handle_info = {0};
+ HANDLE handle;
+ handle_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
//TODO: remove for wsi
- fd_info.memory = zink_bo_get_mem(obj->bo);
- if (whandle->type == WINSYS_HANDLE_TYPE_FD)
- fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
- else
- fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
- VkResult result = VKSCR(GetMemoryFdKHR)(screen->dev, &fd_info, &fd);
+ handle_info.memory = zink_bo_get_mem(obj->bo);
+ handle_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
+ VkResult result = VKSCR(GetMemoryWin32HandleKHR)(screen->dev, &handle_info, &handle);
if (result != VK_SUCCESS)
return false;
- if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
- uint32_t h;
- bool success = drmPrimeFDToHandle(screen->drm_fd, fd, &h) == 0;
- close(fd);
- if (!success)
- return false;
- fd = h;
- }
- whandle->handle = fd;
+ whandle->handle = handle;
+#endif
uint64_t value;
zink_resource_get_param(pscreen, context, tex, 0, 0, 0, PIPE_RESOURCE_PARAM_MODIFIER, 0, &value);
whandle->modifier = value;
@@ -981,23 +1993,118 @@ zink_resource_from_handle(struct pipe_screen *pscreen,
!zink_screen(pscreen)->info.have_EXT_image_drm_format_modifier)
return NULL;
- /* ignore any AUX planes, as well as planar formats */
- if (templ->format == PIPE_FORMAT_NONE ||
- util_format_get_num_planes(templ->format) != 1)
- return NULL;
+ struct pipe_resource templ2 = *templ;
+ if (templ->format == PIPE_FORMAT_NONE)
+ templ2.format = whandle->format;
- uint64_t modifier = DRM_FORMAT_MOD_INVALID;
- int modifier_count = 0;
- if (whandle->modifier != DRM_FORMAT_MOD_INVALID) {
+ uint64_t modifier = DRM_FORMAT_MOD_LINEAR;
+ int modifier_count = 1;
+ if (whandle->modifier != DRM_FORMAT_MOD_INVALID)
modifier = whandle->modifier;
- modifier_count = 1;
+ else {
+ if (!zink_screen(pscreen)->driver_workarounds.can_do_invalid_linear_modifier) {
+ mesa_loge("zink: display server doesn't support DRI3 modifiers and driver can't handle INVALID<->LINEAR!");
+ return NULL;
+ }
+ whandle->modifier = modifier;
+ }
+ templ2.bind |= ZINK_BIND_DMABUF;
+ struct pipe_resource *pres = resource_create(pscreen, &templ2, whandle, usage, &modifier, modifier_count, NULL, NULL);
+ if (pres) {
+ struct zink_resource *res = zink_resource(pres);
+ if (pres->target != PIPE_BUFFER)
+ res->valid = true;
+ else
+ tc_buffer_disable_cpu_storage(pres);
+ res->internal_format = whandle->format;
}
- return resource_create(pscreen, templ, whandle, usage, &modifier, modifier_count);
+ return pres;
#else
return NULL;
#endif
}
+static struct pipe_resource *
+zink_resource_from_user_memory(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ,
+ void *user_memory)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+ VkDeviceSize alignMask = screen->info.ext_host_mem_props.minImportedHostPointerAlignment - 1;
+
+ /* Validate the user_memory pointer and fail early.
+ * minImportedHostPointerAlignment is required to be POT */
+ if (((uintptr_t)user_memory) & alignMask)
+ return NULL;
+
+ return resource_create(pscreen, templ, NULL, 0, NULL, 0, NULL, user_memory);
+}
+
+struct zink_memory_object {
+ struct pipe_memory_object b;
+ struct winsys_handle whandle;
+};
+
+static struct pipe_memory_object *
+zink_memobj_create_from_handle(struct pipe_screen *pscreen, struct winsys_handle *whandle, bool dedicated)
+{
+ struct zink_memory_object *memobj = CALLOC_STRUCT(zink_memory_object);
+ if (!memobj)
+ return NULL;
+ memcpy(&memobj->whandle, whandle, sizeof(struct winsys_handle));
+ memobj->whandle.type = ZINK_EXTERNAL_MEMORY_HANDLE;
+
+#ifdef ZINK_USE_DMABUF
+
+#if !defined(_WIN32)
+ memobj->whandle.handle = os_dupfd_cloexec(whandle->handle);
+#else
+ HANDLE source_target = GetCurrentProcess();
+ HANDLE out_handle;
+
+ DuplicateHandle(source_target, whandle->handle, source_target, &out_handle, 0, false, DUPLICATE_SAME_ACCESS);
+ memobj->whandle.handle = out_handle;
+
+#endif /* _WIN32 */
+#endif /* ZINK_USE_DMABUF */
+
+ return (struct pipe_memory_object *)memobj;
+}
+
+static void
+zink_memobj_destroy(struct pipe_screen *pscreen, struct pipe_memory_object *pmemobj)
+{
+#ifdef ZINK_USE_DMABUF
+ struct zink_memory_object *memobj = (struct zink_memory_object *)pmemobj;
+
+#if !defined(_WIN32)
+ close(memobj->whandle.handle);
+#else
+ CloseHandle(memobj->whandle.handle);
+#endif /* _WIN32 */
+#endif /* ZINK_USE_DMABUF */
+
+ FREE(pmemobj);
+}
+
+static struct pipe_resource *
+zink_resource_from_memobj(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ,
+ struct pipe_memory_object *pmemobj,
+ uint64_t offset)
+{
+ struct zink_memory_object *memobj = (struct zink_memory_object *)pmemobj;
+
+ struct pipe_resource *pres = resource_create(pscreen, templ, &memobj->whandle, 0, NULL, 0, NULL, NULL);
+ if (pres) {
+ if (pres->target != PIPE_BUFFER)
+ zink_resource(pres)->valid = true;
+ else
+ tc_buffer_disable_cpu_storage(pres);
+ }
+ return pres;
+}
+
static bool
invalidate_buffer(struct zink_context *ctx, struct zink_resource *res)
{
@@ -1008,7 +2115,10 @@ invalidate_buffer(struct zink_context *ctx, struct zink_resource *res)
if (res->base.b.flags & PIPE_RESOURCE_FLAG_SPARSE)
return false;
- if (res->valid_buffer_range.start > res->valid_buffer_range.end)
+ struct pipe_box box;
+ u_box_3d(0, 0, 0, res->base.b.width0, 0, 0, &box);
+ if (res->valid_buffer_range.start > res->valid_buffer_range.end &&
+ !zink_resource_copy_box_intersects(res, 0, &box))
return false;
if (res->so_valid)
@@ -1020,17 +2130,19 @@ invalidate_buffer(struct zink_context *ctx, struct zink_resource *res)
if (!zink_resource_has_usage(res))
return false;
- struct zink_resource_object *old_obj = res->obj;
- struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, NULL, NULL, 0);
+ struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, NULL, NULL, 0, NULL, 0);
if (!new_obj) {
- debug_printf("new backing resource alloc failed!");
+ debug_printf("new backing resource alloc failed!\n");
return false;
}
+ bool needs_bda = !!res->obj->bda;
/* this ref must be transferred before rebind or else BOOM */
zink_batch_reference_resource_move(&ctx->batch, res);
res->obj = new_obj;
+ res->queue = VK_QUEUE_FAMILY_IGNORED;
+ if (needs_bda)
+ zink_resource_get_address(screen, res);
zink_resource_rebind(ctx, res);
- zink_descriptor_set_refs_clear(&old_obj->desc_set_refs, old_obj);
return true;
}
@@ -1040,6 +2152,12 @@ zink_resource_invalidate(struct pipe_context *pctx, struct pipe_resource *pres)
{
if (pres->target == PIPE_BUFFER)
invalidate_buffer(zink_context(pctx), zink_resource(pres));
+ else {
+ struct zink_resource *res = zink_resource(pres);
+ if (res->valid && res->fb_bind_count)
+ zink_context(pctx)->rp_loadop_changed = true;
+ res->valid = false;
+ }
}
static void
@@ -1058,13 +2176,9 @@ zink_transfer_copy_bufimage(struct zink_context *ctx,
if (buf2img)
box.x = trans->offset;
- if (dst->obj->transfer_dst)
- zink_copy_image_buffer(ctx, dst, src, trans->base.b.level, buf2img ? x : 0,
- box.y, box.z, trans->base.b.level, &box, trans->base.b.usage);
- else
- util_blitter_copy_texture(ctx->blitter, &dst->base.b, trans->base.b.level,
- x, box.y, box.z, &src->base.b,
- 0, &box);
+ assert(dst->obj->transfer_dst);
+ zink_copy_image_buffer(ctx, dst, src, trans->base.b.level, buf2img ? x : 0,
+ box.y, box.z, trans->base.b.level, &box, trans->base.b.usage);
}
ALWAYS_INLINE static void
@@ -1117,15 +2231,14 @@ create_transfer(struct zink_context *ctx, struct pipe_resource *pres, unsigned u
struct zink_transfer *trans;
if (usage & PIPE_MAP_THREAD_SAFE)
- trans = malloc(sizeof(*trans));
+ trans = calloc(1, sizeof(*trans));
else if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
- trans = slab_alloc(&ctx->transfer_pool_unsync);
+ trans = slab_zalloc(&ctx->transfer_pool_unsync);
else
- trans = slab_alloc(&ctx->transfer_pool);
+ trans = slab_zalloc(&ctx->transfer_pool);
if (!trans)
return NULL;
- memset(trans, 0, sizeof(*trans));
pipe_resource_reference(&trans->base.b.resource, pres);
trans->base.b.usage = usage;
@@ -1170,7 +2283,8 @@ zink_buffer_map(struct pipe_context *pctx,
* in which case it can be mapped unsynchronized. */
if (!(usage & (PIPE_MAP_UNSYNCHRONIZED | TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
usage & PIPE_MAP_WRITE && !res->base.is_shared &&
- !util_ranges_intersect(&res->valid_buffer_range, box->x, box->x + box->width)) {
+ !util_ranges_intersect(&res->valid_buffer_range, box->x, box->x + box->width) &&
+ !zink_resource_copy_box_intersects(res, 0, box)) {
usage |= PIPE_MAP_UNSYNCHRONIZED;
}
@@ -1204,6 +2318,7 @@ zink_buffer_map(struct pipe_context *pctx,
}
}
+ unsigned map_offset = box->x;
if (usage & PIPE_MAP_DISCARD_RANGE &&
(!res->obj->host_visible ||
!(usage & (PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_PERSISTENT)))) {
@@ -1225,13 +2340,13 @@ zink_buffer_map(struct pipe_context *pctx,
mgr = ctx->tc->base.stream_uploader;
else
mgr = ctx->base.stream_uploader;
- u_upload_alloc(mgr, 0, box->width + box->x,
+ u_upload_alloc(mgr, 0, box->width,
screen->info.props.limits.minMemoryMapAlignment, &offset,
(struct pipe_resource **)&trans->staging_res, (void **)&ptr);
res = zink_resource(trans->staging_res);
- trans->offset = offset + box->x;
+ trans->offset = offset;
usage |= PIPE_MAP_UNSYNCHRONIZED;
- ptr = ((uint8_t *)ptr) + box->x;
+ ptr = ((uint8_t *)ptr);
} else {
/* At this point, the buffer is always idle (we checked it above). */
usage |= PIPE_MAP_UNSYNCHRONIZED;
@@ -1243,30 +2358,45 @@ zink_buffer_map(struct pipe_context *pctx,
if (!zink_resource_usage_check_completion(screen, res, ZINK_RESOURCE_ACCESS_WRITE))
goto success;
usage |= PIPE_MAP_UNSYNCHRONIZED;
- } else if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
- (((usage & PIPE_MAP_READ) && !(usage & PIPE_MAP_PERSISTENT) && res->base.b.usage != PIPE_USAGE_STAGING) || !res->obj->host_visible)) {
- assert(!(usage & (TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_THREAD_SAFE)));
- if (!res->obj->host_visible || !(usage & PIPE_MAP_ONCE)) {
- trans->offset = box->x % screen->info.props.limits.minMemoryMapAlignment;
+ } else if (((usage & PIPE_MAP_READ) && !(usage & PIPE_MAP_PERSISTENT) &&
+ ((screen->info.mem_props.memoryTypes[res->obj->bo->base.base.placement].propertyFlags & VK_STAGING_RAM) != VK_STAGING_RAM)) ||
+ !res->obj->host_visible) {
+ /* any read, non-HV write, or unmappable that reaches this point needs staging */
+ if ((usage & PIPE_MAP_READ) || !res->obj->host_visible || res->base.b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY) {
+overwrite:
+ trans->offset = box->x % MAX2(screen->info.props.limits.minMemoryMapAlignment, 1 << MIN_SLAB_ORDER);
trans->staging_res = pipe_buffer_create(&screen->base, PIPE_BIND_LINEAR, PIPE_USAGE_STAGING, box->width + trans->offset);
if (!trans->staging_res)
goto fail;
struct zink_resource *staging_res = zink_resource(trans->staging_res);
- zink_copy_buffer(ctx, staging_res, res, trans->offset, box->x, box->width);
+ if (usage & (PIPE_MAP_THREAD_SAFE | PIPE_MAP_UNSYNCHRONIZED | TC_TRANSFER_MAP_THREADED_UNSYNC)) {
+ assert(ctx != screen->copy_context);
+ /* this map can't access the passed context: use the copy context */
+ zink_screen_lock_context(screen);
+ ctx = screen->copy_context;
+ }
+ if (usage & PIPE_MAP_READ)
+ zink_copy_buffer(ctx, staging_res, res, trans->offset, box->x, box->width);
res = staging_res;
usage &= ~PIPE_MAP_UNSYNCHRONIZED;
- ptr = map_resource(screen, res);
- ptr = ((uint8_t *)ptr) + trans->offset;
+ map_offset = trans->offset;
}
}
if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
- if (usage & PIPE_MAP_WRITE)
+ if (usage & PIPE_MAP_WRITE) {
+ if (!(usage & PIPE_MAP_READ)) {
+ zink_resource_usage_try_wait(ctx, res, ZINK_RESOURCE_ACCESS_RW);
+ if (zink_resource_has_unflushed_usage(res))
+ goto overwrite;
+ }
zink_resource_usage_wait(ctx, res, ZINK_RESOURCE_ACCESS_RW);
- else
+ } else
zink_resource_usage_wait(ctx, res, ZINK_RESOURCE_ACCESS_WRITE);
res->obj->access = 0;
res->obj->access_stage = 0;
+ res->obj->last_write = 0;
+ zink_resource_copies_reset(res);
}
if (!ptr) {
@@ -1279,7 +2409,7 @@ zink_buffer_map(struct pipe_context *pctx,
ptr = map_resource(screen, res);
if (!ptr)
goto fail;
- ptr = ((uint8_t *)ptr) + box->x;
+ ptr = ((uint8_t *)ptr) + map_offset;
}
if (!res->obj->coherent
@@ -1296,6 +2426,7 @@ zink_buffer_map(struct pipe_context *pctx,
VkDeviceSize offset = res->obj->offset + trans->offset;
VkMappedMemoryRange range = zink_resource_init_mem_range(screen, res->obj, offset, size);
if (VKSCR(InvalidateMappedMemoryRanges)(screen->dev, 1, &range) != VK_SUCCESS) {
+ mesa_loge("ZINK: vkInvalidateMappedMemoryRanges failed");
zink_bo_unmap(screen, res->obj->bo);
goto fail;
}
@@ -1303,14 +2434,17 @@ zink_buffer_map(struct pipe_context *pctx,
trans->base.b.usage = usage;
if (usage & PIPE_MAP_WRITE)
util_range_add(&res->base.b, &res->valid_buffer_range, box->x, box->x + box->width);
- if ((usage & PIPE_MAP_PERSISTENT) && !(usage & PIPE_MAP_COHERENT))
- res->obj->persistent_maps++;
success:
+ /* ensure the copy context gets unlocked */
+ if (ctx == screen->copy_context)
+ zink_screen_unlock_context(screen);
*transfer = &trans->base.b;
return ptr;
fail:
+ if (ctx == screen->copy_context)
+ zink_screen_unlock_context(screen);
destroy_transfer(ctx, trans);
return NULL;
}
@@ -1331,15 +2465,20 @@ zink_image_map(struct pipe_context *pctx,
return NULL;
trans->base.b.level = level;
+ if (zink_is_swapchain(res))
+ /* this is probably a multi-chain which has already been acquired */
+ zink_kopper_acquire(ctx, res, 0);
void *ptr;
- if (usage & PIPE_MAP_WRITE && !(usage & PIPE_MAP_READ))
- /* this is like a blit, so we can potentially dump some clears or maybe we have to */
- zink_fb_clears_apply_or_discard(ctx, pres, zink_rect_from_box(box), false);
- else if (usage & PIPE_MAP_READ)
- /* if the map region intersects with any clears then we have to apply them */
- zink_fb_clears_apply_region(ctx, pres, zink_rect_from_box(box));
- if (res->optimal_tiling || !res->obj->host_visible) {
+ if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
+ if (usage & PIPE_MAP_WRITE && !(usage & PIPE_MAP_READ))
+ /* this is like a blit, so we can potentially dump some clears or maybe we have to */
+ zink_fb_clears_apply_or_discard(ctx, pres, zink_rect_from_box(box), false);
+ else if (usage & PIPE_MAP_READ)
+ /* if the map region intersects with any clears then we have to apply them */
+ zink_fb_clears_apply_region(ctx, pres, zink_rect_from_box(box));
+ }
+ if (!res->linear || !res->obj->host_visible) {
enum pipe_format format = pres->format;
if (usage & PIPE_MAP_DEPTH_ONLY)
format = util_format_get_depth_only(pres->format);
@@ -1351,6 +2490,7 @@ zink_image_map(struct pipe_context *pctx,
box->height);
struct pipe_resource templ = *pres;
+ templ.next = NULL;
templ.format = format;
templ.usage = usage & PIPE_MAP_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
templ.target = PIPE_BUFFER;
@@ -1368,6 +2508,7 @@ zink_image_map(struct pipe_context *pctx,
struct zink_resource *staging_res = zink_resource(trans->staging_res);
if (usage & PIPE_MAP_READ) {
+ assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC));
/* force multi-context sync */
if (zink_resource_usage_is_unflushed_write(res))
zink_resource_usage_wait(ctx, res, ZINK_RESOURCE_ACCESS_WRITE);
@@ -1378,18 +2519,19 @@ zink_image_map(struct pipe_context *pctx,
ptr = map_resource(screen, staging_res);
} else {
- assert(!res->optimal_tiling);
+ assert(res->linear);
ptr = map_resource(screen, res);
if (!ptr)
goto fail;
if (zink_resource_has_usage(res)) {
+ assert(!(usage & PIPE_MAP_UNSYNCHRONIZED));
if (usage & PIPE_MAP_WRITE)
zink_fence_wait(pctx);
else
zink_resource_usage_wait(ctx, res, ZINK_RESOURCE_ACCESS_WRITE);
}
VkImageSubresource isr = {
- res->obj->modifier_aspect ? res->obj->modifier_aspect : res->aspect,
+ res->modifiers ? res->obj->modifier_aspect : res->aspect,
level,
0
};
@@ -1410,17 +2552,24 @@ zink_image_map(struct pipe_context *pctx,
if (!res->obj->coherent) {
VkDeviceSize size = (VkDeviceSize)box->width * box->height * desc->block.bits / 8;
VkMappedMemoryRange range = zink_resource_init_mem_range(screen, res->obj, res->obj->offset + offset, size);
- VKSCR(FlushMappedMemoryRanges)(screen->dev, 1, &range);
+ if (VKSCR(FlushMappedMemoryRanges)(screen->dev, 1, &range) != VK_SUCCESS) {
+ mesa_loge("ZINK: vkFlushMappedMemoryRanges failed");
+ }
}
ptr = ((uint8_t *)ptr) + offset;
}
if (!ptr)
goto fail;
+ if (usage & PIPE_MAP_WRITE) {
+ if (!res->valid && res->fb_bind_count) {
+ assert(!(usage & PIPE_MAP_UNSYNCHRONIZED));
+ ctx->rp_loadop_changed = true;
+ }
+ res->valid = true;
+ }
if (sizeof(void*) == 4)
trans->base.b.usage |= ZINK_MAP_TEMPORARY;
- if ((usage & PIPE_MAP_PERSISTENT) && !(usage & PIPE_MAP_COHERENT))
- res->obj->persistent_maps++;
*transfer = &trans->base.b;
return ptr;
@@ -1431,6 +2580,110 @@ fail:
}
static void
+zink_image_subdata(struct pipe_context *pctx,
+ struct pipe_resource *pres,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ uintptr_t layer_stride)
+{
+ struct zink_screen *screen = zink_screen(pctx->screen);
+ struct zink_context *ctx = zink_context(pctx);
+ struct zink_resource *res = zink_resource(pres);
+
+ /* flush clears to avoid subdata conflict */
+ if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC) &&
+ (res->obj->vkusage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT))
+ zink_fb_clears_apply_or_discard(ctx, pres, zink_rect_from_box(box), false);
+ /* only use HIC if supported on image and no pending usage */
+ while (res->obj->vkusage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT &&
+ zink_resource_usage_check_completion(screen, res, ZINK_RESOURCE_ACCESS_RW)) {
+ /* uninit images are always supported */
+ bool change_layout = res->layout == VK_IMAGE_LAYOUT_UNDEFINED || res->layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
+ if (!change_layout) {
+ /* image in some other layout: test for support */
+ bool can_copy_layout = false;
+ for (unsigned i = 0; i < screen->info.hic_props.copyDstLayoutCount; i++) {
+ if (screen->info.hic_props.pCopyDstLayouts[i] == res->layout) {
+ can_copy_layout = true;
+ break;
+ }
+ }
+ /* some layouts don't permit HIC copies */
+ if (!can_copy_layout)
+ break;
+ }
+ bool is_arrayed = false;
+ switch (pres->target) {
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ is_arrayed = true;
+ break;
+ default: break;
+ }
+ /* recalc strides into texel strides because HIC spec is insane */
+ unsigned vk_stride = util_format_get_stride(pres->format, 1);
+ stride /= vk_stride;
+ unsigned vk_layer_stride = util_format_get_2d_size(pres->format, stride, 1) * vk_stride;
+ layer_stride /= vk_layer_stride;
+
+ VkHostImageLayoutTransitionInfoEXT t = {
+ VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO_EXT,
+ NULL,
+ res->obj->image,
+ res->layout,
+ /* GENERAL support is guaranteed */
+ VK_IMAGE_LAYOUT_GENERAL,
+ {res->aspect, 0, VK_REMAINING_MIP_LEVELS, 0, VK_REMAINING_ARRAY_LAYERS}
+ };
+ /* only pre-transition uninit images to avoid thrashing */
+ if (change_layout) {
+ VKSCR(TransitionImageLayoutEXT)(screen->dev, 1, &t);
+ res->layout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+ VkMemoryToImageCopyEXT region = {
+ VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT,
+ NULL,
+ data,
+ stride,
+ layer_stride,
+ {res->aspect, level, is_arrayed ? box->z : 0, is_arrayed ? box->depth : 1},
+ {box->x, box->y, is_arrayed ? 0 : box->z},
+ {box->width, box->height, is_arrayed ? 1 : box->depth}
+ };
+ VkCopyMemoryToImageInfoEXT copy = {
+ VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO_EXT,
+ NULL,
+ 0,
+ res->obj->image,
+ res->layout,
+ 1,
+ &region
+ };
+ VKSCR(CopyMemoryToImageEXT)(screen->dev, &copy);
+ if (change_layout && screen->can_hic_shader_read && !pres->last_level && !box->x && !box->y && !box->z &&
+ box->width == pres->width0 && box->height == pres->height0 &&
+ ((is_arrayed && box->depth == pres->array_size) || (!is_arrayed && box->depth == pres->depth0))) {
+ /* assume full copy single-mip images use shader read access */
+ t.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
+ t.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ VKSCR(TransitionImageLayoutEXT)(screen->dev, 1, &t);
+ res->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ /* assume multi-mip where further subdata calls may happen */
+ }
+ /* make sure image is marked as having data */
+ res->valid = true;
+ return;
+ }
+ /* fallback case for per-resource unsupported or device-level unsupported */
+ u_default_texture_subdata(pctx, pres, level, usage, box, data, stride, layer_stride);
+}
+
+static void
zink_transfer_flush_region(struct pipe_context *pctx,
struct pipe_transfer *ptrans,
const struct pipe_box *box)
@@ -1443,47 +2696,282 @@ zink_transfer_flush_region(struct pipe_context *pctx,
struct zink_screen *screen = zink_screen(pctx->screen);
struct zink_resource *m = trans->staging_res ? zink_resource(trans->staging_res) :
res;
- ASSERTED VkDeviceSize size, offset;
+ ASSERTED VkDeviceSize size, src_offset, dst_offset = 0;
if (m->obj->is_buffer) {
size = box->width;
- offset = trans->offset;
+ src_offset = box->x + (trans->staging_res ? trans->offset : ptrans->box.x);
+ dst_offset = box->x + ptrans->box.x;
} else {
size = (VkDeviceSize)box->width * box->height * util_format_get_blocksize(m->base.b.format);
- offset = trans->offset +
+ src_offset = trans->offset +
box->z * trans->depthPitch +
util_format_get_2d_size(m->base.b.format, trans->base.b.stride, box->y) +
util_format_get_stride(m->base.b.format, box->x);
- assert(offset + size <= res->obj->size);
+ assert(src_offset + size <= res->obj->size);
}
if (!m->obj->coherent) {
VkMappedMemoryRange range = zink_resource_init_mem_range(screen, m->obj, m->obj->offset, m->obj->size);
- VKSCR(FlushMappedMemoryRanges)(screen->dev, 1, &range);
+ if (VKSCR(FlushMappedMemoryRanges)(screen->dev, 1, &range) != VK_SUCCESS) {
+ mesa_loge("ZINK: vkFlushMappedMemoryRanges failed");
+ }
}
if (trans->staging_res) {
struct zink_resource *staging_res = zink_resource(trans->staging_res);
if (ptrans->resource->target == PIPE_BUFFER)
- zink_copy_buffer(ctx, res, staging_res, box->x, offset, box->width);
+ zink_copy_buffer(ctx, res, staging_res, dst_offset, src_offset, size);
else
zink_transfer_copy_bufimage(ctx, res, staging_res, trans);
}
}
}
+/* used to determine whether to emit a TRANSFER_DST barrier on copies */
+bool
+zink_resource_copy_box_intersects(struct zink_resource *res, unsigned level, const struct pipe_box *box)
+{
+ /* if there are no valid copy rects tracked, this needs a barrier */
+ if (!res->obj->copies_valid)
+ return true;
+ /* untracked huge miplevel */
+ if (level >= ARRAY_SIZE(res->obj->copies))
+ return true;
+ u_rwlock_rdlock(&res->obj->copy_lock);
+ struct pipe_box *b = res->obj->copies[level].data;
+ unsigned num_boxes = util_dynarray_num_elements(&res->obj->copies[level], struct pipe_box);
+ bool (*intersect)(const struct pipe_box *, const struct pipe_box *);
+ /* determine intersection function based on dimensionality */
+ switch (res->base.b.target) {
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ intersect = u_box_test_intersection_1d;
+ break;
+
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D:
+ intersect = u_box_test_intersection_2d;
+ break;
+
+ default:
+ intersect = u_box_test_intersection_3d;
+ break;
+ }
+ /* if any of the tracked boxes intersect with this one, a barrier is needed */
+ bool ret = false;
+ for (unsigned i = 0; i < num_boxes; i++) {
+ if (intersect(box, b + i)) {
+ ret = true;
+ break;
+ }
+ }
+ u_rwlock_rdunlock(&res->obj->copy_lock);
+ /* no intersection = no barrier */
+ return ret;
+}
+
+/* track a new region for TRANSFER_DST barrier emission */
+void
+zink_resource_copy_box_add(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box)
+{
+ u_rwlock_wrlock(&res->obj->copy_lock);
+ if (res->obj->copies_valid) {
+ struct pipe_box *b = res->obj->copies[level].data;
+ unsigned num_boxes = util_dynarray_num_elements(&res->obj->copies[level], struct pipe_box);
+ for (unsigned i = 0; i < num_boxes; i++) {
+ switch (res->base.b.target) {
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ /* no-op included region */
+ if (b[i].x <= box->x && b[i].x + b[i].width >= box->x + box->width)
+ goto out;
+
+ /* try to merge adjacent regions */
+ if (b[i].x == box->x + box->width) {
+ b[i].x -= box->width;
+ b[i].width += box->width;
+ goto out;
+ }
+ if (b[i].x + b[i].width == box->x) {
+ b[i].width += box->width;
+ goto out;
+ }
+
+ /* try to merge into region */
+ if (box->x <= b[i].x && box->x + box->width >= b[i].x + b[i].width) {
+ *b = *box;
+ goto out;
+ }
+ break;
+
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D:
+ /* no-op included region */
+ if (b[i].x <= box->x && b[i].x + b[i].width >= box->x + box->width &&
+ b[i].y <= box->y && b[i].y + b[i].height >= box->y + box->height)
+ goto out;
+
+ /* try to merge adjacent regions */
+ if (b[i].y == box->y && b[i].height == box->height) {
+ if (b[i].x == box->x + box->width) {
+ b[i].x -= box->width;
+ b[i].width += box->width;
+ goto out;
+ }
+ if (b[i].x + b[i].width == box->x) {
+ b[i].width += box->width;
+ goto out;
+ }
+ } else if (b[i].x == box->x && b[i].width == box->width) {
+ if (b[i].y == box->y + box->height) {
+ b[i].y -= box->height;
+ b[i].height += box->height;
+ goto out;
+ }
+ if (b[i].y + b[i].height == box->y) {
+ b[i].height += box->height;
+ goto out;
+ }
+ }
+
+ /* try to merge into region */
+ if (box->x <= b[i].x && box->x + box->width >= b[i].x + b[i].width &&
+ box->y <= b[i].y && box->y + box->height >= b[i].y + b[i].height) {
+ *b = *box;
+ goto out;
+ }
+ break;
+
+ default:
+ /* no-op included region */
+ if (b[i].x <= box->x && b[i].x + b[i].width >= box->x + box->width &&
+ b[i].y <= box->y && b[i].y + b[i].height >= box->y + box->height &&
+ b[i].z <= box->z && b[i].z + b[i].depth >= box->z + box->depth)
+ goto out;
+
+ /* try to merge adjacent regions */
+ if (b[i].z == box->z && b[i].depth == box->depth) {
+ if (b[i].y == box->y && b[i].height == box->height) {
+ if (b[i].x == box->x + box->width) {
+ b[i].x -= box->width;
+ b[i].width += box->width;
+ goto out;
+ }
+ if (b[i].x + b[i].width == box->x) {
+ b[i].width += box->width;
+ goto out;
+ }
+ } else if (b[i].x == box->x && b[i].width == box->width) {
+ if (b[i].y == box->y + box->height) {
+ b[i].y -= box->height;
+ b[i].height += box->height;
+ goto out;
+ }
+ if (b[i].y + b[i].height == box->y) {
+ b[i].height += box->height;
+ goto out;
+ }
+ }
+ } else if (b[i].x == box->x && b[i].width == box->width) {
+ if (b[i].y == box->y && b[i].height == box->height) {
+ if (b[i].z == box->z + box->depth) {
+ b[i].z -= box->depth;
+ b[i].depth += box->depth;
+ goto out;
+ }
+ if (b[i].z + b[i].depth == box->z) {
+ b[i].depth += box->depth;
+ goto out;
+ }
+ } else if (b[i].z == box->z && b[i].depth == box->depth) {
+ if (b[i].y == box->y + box->height) {
+ b[i].y -= box->height;
+ b[i].height += box->height;
+ goto out;
+ }
+ if (b[i].y + b[i].height == box->y) {
+ b[i].height += box->height;
+ goto out;
+ }
+ }
+ } else if (b[i].y == box->y && b[i].height == box->height) {
+ if (b[i].z == box->z && b[i].depth == box->depth) {
+ if (b[i].x == box->x + box->width) {
+ b[i].x -= box->width;
+ b[i].width += box->width;
+ goto out;
+ }
+ if (b[i].x + b[i].width == box->x) {
+ b[i].width += box->width;
+ goto out;
+ }
+ } else if (b[i].x == box->x && b[i].width == box->width) {
+ if (b[i].z == box->z + box->depth) {
+ b[i].z -= box->depth;
+ b[i].depth += box->depth;
+ goto out;
+ }
+ if (b[i].z + b[i].depth == box->z) {
+ b[i].depth += box->depth;
+ goto out;
+ }
+ }
+ }
+
+ /* try to merge into region */
+ if (box->x <= b[i].x && box->x + box->width >= b[i].x + b[i].width &&
+ box->y <= b[i].y && box->y + box->height >= b[i].y + b[i].height &&
+ box->z <= b[i].z && box->z + box->depth >= b[i].z + b[i].depth)
+ goto out;
+
+ break;
+ }
+ }
+ }
+ util_dynarray_append(&res->obj->copies[level], struct pipe_box, *box);
+ if (!res->copies_warned && util_dynarray_num_elements(&res->obj->copies[level], struct pipe_box) > 100) {
+ perf_debug(ctx, "zink: PERF WARNING! > 100 copy boxes detected for %p\n", res);
+ mesa_logw("zink: PERF WARNING! > 100 copy boxes detected for %p\n", res);
+ res->copies_warned = true;
+ }
+ res->obj->copies_valid = true;
+out:
+ u_rwlock_wrunlock(&res->obj->copy_lock);
+}
+
+void
+zink_resource_copies_reset(struct zink_resource *res)
+{
+ if (!res->obj->copies_valid)
+ return;
+ u_rwlock_wrlock(&res->obj->copy_lock);
+ unsigned max_level = res->base.b.target == PIPE_BUFFER ? 1 : (res->base.b.last_level + 1);
+ if (res->base.b.target == PIPE_BUFFER) {
+ /* flush transfer regions back to valid range on reset */
+ struct pipe_box *b = res->obj->copies[0].data;
+ unsigned num_boxes = util_dynarray_num_elements(&res->obj->copies[0], struct pipe_box);
+ for (unsigned i = 0; i < num_boxes; i++)
+ util_range_add(&res->base.b, &res->valid_buffer_range, b[i].x, b[i].x + b[i].width);
+ }
+ for (unsigned i = 0; i < max_level; i++)
+ util_dynarray_clear(&res->obj->copies[i]);
+ res->obj->copies_valid = false;
+ res->obj->copies_need_reset = false;
+ u_rwlock_wrunlock(&res->obj->copy_lock);
+}
+
static void
transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans)
{
struct zink_context *ctx = zink_context(pctx);
- struct zink_resource *res = zink_resource(ptrans->resource);
struct zink_transfer *trans = (struct zink_transfer *)ptrans;
if (!(trans->base.b.usage & (PIPE_MAP_FLUSH_EXPLICIT | PIPE_MAP_COHERENT))) {
- zink_transfer_flush_region(pctx, ptrans, &ptrans->box);
+ /* flush_region is relative to the mapped region: use only the extents */
+ struct pipe_box box = ptrans->box;
+ box.x = box.y = box.z = 0;
+ zink_transfer_flush_region(pctx, ptrans, &box);
}
- if ((trans->base.b.usage & PIPE_MAP_PERSISTENT) && !(trans->base.b.usage & PIPE_MAP_COHERENT))
- res->obj->persistent_maps--;
-
if (trans->staging_res)
pipe_resource_reference(&trans->staging_res, NULL);
pipe_resource_reference(&trans->base.b.resource, NULL);
@@ -1500,6 +2988,16 @@ do_transfer_unmap(struct zink_screen *screen, struct zink_transfer *trans)
unmap_resource(screen, res);
}
+void
+zink_screen_buffer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptrans)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+ struct zink_transfer *trans = (struct zink_transfer *)ptrans;
+ if (trans->base.b.usage & PIPE_MAP_ONCE && !trans->staging_res)
+ do_transfer_unmap(screen, trans);
+ transfer_unmap(NULL, ptrans);
+}
+
static void
zink_buffer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans)
{
@@ -1555,79 +3053,50 @@ zink_resource_get_separate_stencil(struct pipe_resource *pres)
}
-VkBuffer
-zink_resource_tmp_buffer(struct zink_screen *screen, struct zink_resource *res, unsigned offset_add, unsigned add_binds, unsigned *offset_out)
+static bool
+resource_object_add_bind(struct zink_context *ctx, struct zink_resource *res, unsigned bind)
{
- VkBufferCreateInfo bci = create_bci(screen, &res->base.b, res->base.b.bind | add_binds);
- VkDeviceSize size = bci.size - offset_add;
- VkDeviceSize offset = offset_add;
- if (offset_add) {
- assert(bci.size > offset_add);
-
- align_offset_size(res->obj->alignment, &offset, &size, bci.size);
+ /* base resource already has the cap */
+ if (res->base.b.bind & bind)
+ return true;
+ if (res->obj->is_buffer) {
+ unreachable("zink: all buffers should have this bit");
+ return true;
}
- bci.size = size;
+ assert(!res->obj->dt);
+ zink_fb_clears_apply_region(ctx, &res->base.b, (struct u_rect){0, res->base.b.width0, 0, res->base.b.height0});
+ bool ret = add_resource_bind(ctx, res, bind);
+ if (ret)
+ zink_resource_rebind(ctx, res);
- VkBuffer buffer;
- if (VKSCR(CreateBuffer)(screen->dev, &bci, NULL, &buffer) != VK_SUCCESS)
- return VK_NULL_HANDLE;
- VKSCR(BindBufferMemory)(screen->dev, buffer, zink_bo_get_mem(res->obj->bo), res->obj->offset + offset);
- if (offset_out)
- *offset_out = offset_add - offset;
- return buffer;
+ return ret;
}
bool
zink_resource_object_init_storage(struct zink_context *ctx, struct zink_resource *res)
{
- struct zink_screen *screen = zink_screen(ctx->base.screen);
- /* base resource already has the cap */
- if (res->base.b.bind & PIPE_BIND_SHADER_IMAGE)
- return true;
- if (res->obj->is_buffer) {
- if (res->base.b.bind & PIPE_BIND_SHADER_IMAGE)
- return true;
-
- VkBuffer buffer = zink_resource_tmp_buffer(screen, res, 0, PIPE_BIND_SHADER_IMAGE, NULL);
- if (!buffer)
- return false;
- util_dynarray_append(&res->obj->tmp, VkBuffer, res->obj->buffer);
- res->obj->buffer = buffer;
- res->base.b.bind |= PIPE_BIND_SHADER_IMAGE;
- } else {
- zink_fb_clears_apply_region(ctx, &res->base.b, (struct u_rect){0, res->base.b.width0, 0, res->base.b.height0});
- zink_resource_image_barrier(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 0, 0);
- res->base.b.bind |= PIPE_BIND_SHADER_IMAGE;
- struct zink_resource_object *old_obj = res->obj;
- struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, &res->optimal_tiling, res->modifiers, res->modifiers_count);
- if (!new_obj) {
- debug_printf("new backing resource alloc failed!");
- res->base.b.bind &= ~PIPE_BIND_SHADER_IMAGE;
- return false;
- }
- struct zink_resource staging = *res;
- staging.obj = old_obj;
- bool needs_unref = true;
- if (zink_resource_has_usage(res)) {
- zink_batch_reference_resource_move(&ctx->batch, res);
- needs_unref = false;
- }
- res->obj = new_obj;
- zink_descriptor_set_refs_clear(&old_obj->desc_set_refs, old_obj);
- for (unsigned i = 0; i <= res->base.b.last_level; i++) {
- struct pipe_box box = {0, 0, 0,
- u_minify(res->base.b.width0, i),
- u_minify(res->base.b.height0, i), res->base.b.array_size};
- box.depth = util_num_layers(&res->base.b, i);
- ctx->base.resource_copy_region(&ctx->base, &res->base.b, i, 0, 0, 0, &staging.base.b, i, &box);
- }
- if (needs_unref)
- zink_resource_object_reference(screen, &old_obj, NULL);
- }
+ return resource_object_add_bind(ctx, res, PIPE_BIND_SHADER_IMAGE);
+}
- zink_resource_rebind(ctx, res);
+bool
+zink_resource_object_init_mutable(struct zink_context *ctx, struct zink_resource *res)
+{
+ return resource_object_add_bind(ctx, res, ZINK_BIND_MUTABLE);
+}
- return true;
+VkDeviceAddress
+zink_resource_get_address(struct zink_screen *screen, struct zink_resource *res)
+{
+ assert(res->obj->is_buffer);
+ if (!res->obj->bda) {
+ VkBufferDeviceAddressInfo info = {
+ VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
+ NULL,
+ res->obj->buffer
+ };
+ res->obj->bda = VKSCR(GetBufferDeviceAddress)(screen->dev, &info);
+ }
+ return res->obj->bda;
}
void
@@ -1650,17 +3119,17 @@ zink_resource_setup_transfer_layouts(struct zink_context *ctx, struct zink_resou
* VK_IMAGE_LAYOUT_GENERAL. And since this isn't a present-related
* operation, VK_IMAGE_LAYOUT_GENERAL seems most appropriate.
*/
- zink_resource_image_barrier(ctx, src,
+ zink_screen(ctx->base.screen)->image_barrier(ctx, src,
VK_IMAGE_LAYOUT_GENERAL,
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT);
} else {
- zink_resource_image_barrier(ctx, src,
+ zink_screen(ctx->base.screen)->image_barrier(ctx, src,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
VK_ACCESS_TRANSFER_READ_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT);
- zink_resource_image_barrier(ctx, dst,
+ zink_screen(ctx->base.screen)->image_barrier(ctx, dst,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT);
@@ -1717,15 +3186,28 @@ bool
zink_screen_resource_init(struct pipe_screen *pscreen)
{
struct zink_screen *screen = zink_screen(pscreen);
- pscreen->resource_create = zink_resource_create;
+ pscreen->resource_create = u_transfer_helper_resource_create;
pscreen->resource_create_with_modifiers = zink_resource_create_with_modifiers;
- pscreen->resource_destroy = zink_resource_destroy;
- pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl, true, true, false, false);
-
- if (screen->info.have_KHR_external_memory_fd) {
+ pscreen->resource_create_drawable = zink_resource_create_drawable;
+ pscreen->resource_destroy = u_transfer_helper_resource_destroy;
+ pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl,
+ U_TRANSFER_HELPER_SEPARATE_Z32S8 | U_TRANSFER_HELPER_SEPARATE_STENCIL |
+ U_TRANSFER_HELPER_INTERLEAVE_IN_PLACE |
+ U_TRANSFER_HELPER_MSAA_MAP |
+ (!screen->have_D24_UNORM_S8_UINT ? U_TRANSFER_HELPER_Z24_IN_Z32F : 0));
+
+ if (screen->info.have_KHR_external_memory_fd || screen->info.have_KHR_external_memory_win32) {
pscreen->resource_get_handle = zink_resource_get_handle;
pscreen->resource_from_handle = zink_resource_from_handle;
}
+ if (screen->info.have_EXT_external_memory_host) {
+ pscreen->resource_from_user_memory = zink_resource_from_user_memory;
+ }
+ if (screen->instance_info.have_KHR_external_memory_capabilities) {
+ pscreen->memobj_create_from_handle = zink_memobj_create_from_handle;
+ pscreen->memobj_destroy = zink_memobj_destroy;
+ pscreen->resource_from_memobj = zink_resource_from_memobj;
+ }
pscreen->resource_get_param = zink_resource_get_param;
return true;
}
@@ -1735,11 +3217,11 @@ zink_context_resource_init(struct pipe_context *pctx)
{
pctx->buffer_map = zink_buffer_map;
pctx->buffer_unmap = zink_buffer_unmap;
- pctx->texture_map = u_transfer_helper_deinterleave_transfer_map;
- pctx->texture_unmap = u_transfer_helper_deinterleave_transfer_unmap;
+ pctx->texture_map = u_transfer_helper_transfer_map;
+ pctx->texture_unmap = u_transfer_helper_transfer_unmap;
pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
pctx->buffer_subdata = zink_buffer_subdata;
- pctx->texture_subdata = u_default_texture_subdata;
+ pctx->texture_subdata = zink_image_subdata;
pctx->invalidate_resource = zink_resource_invalidate;
}
diff --git a/src/gallium/drivers/zink/zink_resource.h b/src/gallium/drivers/zink/zink_resource.h
index 98520404fb0..c7185d32125 100644
--- a/src/gallium/drivers/zink/zink_resource.h
+++ b/src/gallium/drivers/zink/zink_resource.h
@@ -24,143 +24,26 @@
#ifndef ZINK_RESOURCE_H
#define ZINK_RESOURCE_H
-struct pipe_screen;
-struct sw_displaytarget;
-struct zink_batch;
-struct zink_context;
-struct zink_bo;
-
-#include "util/hash_table.h"
-#include "util/simple_mtx.h"
-#include "util/u_transfer.h"
-#include "util/u_range.h"
-#include "util/u_dynarray.h"
-#include "util/u_threaded_context.h"
-
-#include "zink_batch.h"
-#include "zink_descriptors.h"
-
-#include <vulkan/vulkan.h>
+#include "zink_types.h"
#define ZINK_MAP_TEMPORARY (PIPE_MAP_DRV_PRV << 0)
-
-struct mem_key {
- unsigned seen_count;
- struct {
- unsigned heap_index;
- VkMemoryRequirements reqs;
- } key;
-};
-
-struct zink_resource_object {
- struct pipe_reference reference;
-
- VkPipelineStageFlagBits access_stage;
- VkAccessFlags access;
- bool unordered_barrier;
-
- unsigned persistent_maps; //if nonzero, requires vkFlushMappedMemoryRanges during batch use
- struct zink_descriptor_refs desc_set_refs;
-
- struct zink_batch_usage *reads;
- struct zink_batch_usage *writes;
-
- struct util_dynarray tmp;
-
- union {
- VkBuffer buffer;
- VkImage image;
- };
-
- VkSampleLocationsInfoEXT zs_evaluate;
- bool needs_zs_evaluate;
-
- bool storage_init; //layout was set for image
- bool transfer_dst;
- bool is_buffer;
- VkImageAspectFlags modifier_aspect;
-
- struct zink_bo *bo;
- VkDeviceSize offset, size, alignment;
- VkImageCreateFlags vkflags;
- VkImageUsageFlags vkusage;
-
- bool host_visible;
- bool coherent;
-};
-
-struct zink_resource {
- struct threaded_resource base;
-
- enum pipe_format internal_format:16;
-
- struct zink_resource_object *obj;
- struct zink_resource_object *scanout_obj; //TODO: remove for wsi
- bool scanout_obj_init;
- union {
- struct {
- struct util_range valid_buffer_range;
- uint32_t vbo_bind_mask : PIPE_MAX_ATTRIBS;
- uint8_t ubo_bind_count[2];
- uint8_t so_bind_count;
- bool so_valid;
- uint32_t ubo_bind_mask[PIPE_SHADER_TYPES];
- uint32_t ssbo_bind_mask[PIPE_SHADER_TYPES];
- };
- struct {
- VkFormat format;
- VkImageLayout layout;
- VkImageAspectFlags aspect;
- bool optimal_tiling;
- uint8_t fb_binds;
- };
- };
- uint32_t sampler_binds[PIPE_SHADER_TYPES];
- uint16_t image_bind_count[2]; //gfx, compute
- uint16_t write_bind_count[2]; //gfx, compute
- union {
- uint16_t bind_count[2]; //gfx, compute
- uint32_t all_binds;
- };
-
- union {
- struct {
- struct hash_table bufferview_cache;
- simple_mtx_t bufferview_mtx;
- };
- struct {
- struct hash_table surface_cache;
- simple_mtx_t surface_mtx;
- };
- };
-
- bool dmabuf_acquire;
- struct sw_displaytarget *dt;
- unsigned dt_stride;
-
- uint8_t modifiers_count;
- uint64_t *modifiers;
-};
-
-struct zink_transfer {
- struct threaded_transfer base;
- struct pipe_resource *staging_res;
- unsigned offset;
- unsigned depthPitch;
-};
-
-static inline struct zink_resource *
-zink_resource(struct pipe_resource *r)
-{
- return (struct zink_resource *)r;
-}
+#define ZINK_BIND_DESCRIPTOR (1u << 27)
+#define ZINK_BIND_MUTABLE (1u << 28)
+#define ZINK_BIND_DMABUF (1u << 29)
+#define ZINK_BIND_TRANSIENT (1u << 30) //transient fb attachment
+#define ZINK_BIND_VIDEO (1u << 31)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
bool
zink_screen_resource_init(struct pipe_screen *pscreen);
void
zink_context_resource_init(struct pipe_context *pctx);
-
+void
+zink_screen_buffer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptrans);
void
zink_get_depth_stencil_resources(struct pipe_resource *res,
struct zink_resource **out_z,
@@ -189,20 +72,36 @@ zink_resource_object_reference(struct zink_screen *screen,
if (dst) *dst = src;
}
-VkBuffer
-zink_resource_tmp_buffer(struct zink_screen *screen, struct zink_resource *res, unsigned offset_add, unsigned add_binds, unsigned *offset);
-
bool
zink_resource_object_init_storage(struct zink_context *ctx, struct zink_resource *res);
+bool
+zink_resource_object_init_mutable(struct zink_context *ctx, struct zink_resource *res);
-static inline bool
+VkDeviceAddress
+zink_resource_get_address(struct zink_screen *screen, struct zink_resource *res);
+
+static ALWAYS_INLINE bool
zink_resource_has_binds(const struct zink_resource *res)
{
return res->all_binds > 0;
}
-#ifndef __cplusplus
+static ALWAYS_INLINE bool
+zink_is_swapchain(const struct zink_resource *res)
+{
+ return res->swapchain;
+}
+
+bool
+zink_resource_copy_box_intersects(struct zink_resource *res, unsigned level, const struct pipe_box *box);
+void
+zink_resource_copy_box_add(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box);
+void
+zink_resource_copies_reset(struct zink_resource *res);
+
+#include "zink_batch.h"
#include "zink_bo.h"
+#include "zink_kopper.h"
static inline bool
zink_resource_usage_is_unflushed(const struct zink_resource *res)
@@ -213,7 +112,7 @@ zink_resource_usage_is_unflushed(const struct zink_resource *res)
static inline bool
zink_resource_usage_is_unflushed_write(const struct zink_resource *res)
{
- return zink_batch_usage_is_unflushed(res->obj->bo->writes);
+ return zink_batch_usage_is_unflushed(res->obj->bo->writes.u);
}
@@ -241,6 +140,18 @@ zink_resource_usage_check_completion(struct zink_screen *screen, struct zink_res
return zink_bo_usage_check_completion(screen, res->obj->bo, access);
}
+static inline bool
+zink_resource_usage_check_completion_fast(struct zink_screen *screen, struct zink_resource *res, enum zink_resource_access access)
+{
+ return zink_bo_usage_check_completion_fast(screen, res->obj->bo, access);
+}
+
+static inline void
+zink_resource_usage_try_wait(struct zink_context *ctx, struct zink_resource *res, enum zink_resource_access access)
+{
+ zink_bo_usage_try_wait(ctx, res->obj->bo, access);
+}
+
static inline void
zink_resource_usage_wait(struct zink_context *ctx, struct zink_resource *res, enum zink_resource_access access)
{
@@ -251,6 +162,7 @@ static inline void
zink_resource_usage_set(struct zink_resource *res, struct zink_batch_state *bs, bool write)
{
zink_bo_usage_set(res->obj->bo, bs, write);
+ res->obj->unsync_access = false;
}
static inline bool
@@ -259,5 +171,31 @@ zink_resource_object_usage_unset(struct zink_resource_object *obj, struct zink_b
return zink_bo_usage_unset(obj->bo, bs);
}
+static inline void
+zink_batch_resource_usage_set(struct zink_batch *batch, struct zink_resource *res, bool write, bool is_buffer)
+{
+ if (!is_buffer) {
+ if (res->obj->dt) {
+ VkSemaphore acquire = zink_kopper_acquire_submit(zink_screen(batch->state->ctx->base.screen), res);
+ if (acquire)
+ util_dynarray_append(&batch->state->acquires, VkSemaphore, acquire);
+ }
+ if (write) {
+ if (!res->valid && res->fb_bind_count)
+ batch->state->ctx->rp_loadop_changed = true;
+ res->valid = true;
+ }
+ }
+ zink_resource_usage_set(res, batch->state, write);
+
+ batch->has_work = true;
+}
+
+void
+zink_debug_mem_print_stats(struct zink_screen *screen);
+
+#ifdef __cplusplus
+}
#endif
+
#endif
diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c
index b40f1e39387..ae33ac21f11 100644
--- a/src/gallium/drivers/zink/zink_screen.c
+++ b/src/gallium/drivers/zink/zink_screen.c
@@ -23,44 +23,75 @@
#include "zink_screen.h"
+#include "zink_kopper.h"
#include "zink_compiler.h"
#include "zink_context.h"
-#include "zink_device_info.h"
#include "zink_descriptors.h"
#include "zink_fence.h"
+#include "vk_format.h"
#include "zink_format.h"
#include "zink_framebuffer.h"
-#include "zink_instance.h"
#include "zink_program.h"
#include "zink_public.h"
+#include "zink_query.h"
#include "zink_resource.h"
+#include "zink_state.h"
#include "nir_to_spirv/nir_to_spirv.h" // for SPIRV_VERSION
-#include "os/os_process.h"
#include "util/u_debug.h"
-#include "util/format/u_format.h"
-#include "util/hash_table.h"
+#include "util/u_dl.h"
#include "util/os_file.h"
-#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_screen.h"
#include "util/u_string.h"
+#include "util/perf/u_trace.h"
#include "util/u_transfer_helper.h"
+#include "util/hex.h"
#include "util/xmlconfig.h"
#include "util/u_cpu_detect.h"
-#include "frontend/sw_winsys.h"
+#ifdef HAVE_LIBDRM
+#include <xf86drm.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
+#include <sys/sysmacros.h>
+#endif
+#endif
+
+static int num_screens = 0;
+bool zink_tracing = false;
#if DETECT_OS_WINDOWS
#include <io.h>
+#define VK_LIBNAME "vulkan-1.dll"
#else
#include <unistd.h>
+#if DETECT_OS_APPLE
+#define VK_LIBNAME "libvulkan.1.dylib"
+#elif DETECT_OS_ANDROID
+#define VK_LIBNAME "libvulkan.so"
+#else
+#define VK_LIBNAME "libvulkan.so.1"
+#endif
#endif
-#if defined(__APPLE__)
+#ifdef __APPLE__
+#include "MoltenVK/mvk_vulkan.h"
// Source of MVK_VERSION
-#include "MoltenVK/vk_mvk_moltenvk.h"
+#include "MoltenVK/mvk_config.h"
+#define VK_NO_PROTOTYPES
+#include "MoltenVK/mvk_deprecated_api.h"
+#include "MoltenVK/mvk_private_api.h"
+#endif /* __APPLE__ */
+
+#ifdef HAVE_LIBDRM
+#include "drm-uapi/dma-buf.h"
+#include <xf86drm.h>
#endif
static const struct debug_named_value
@@ -69,6 +100,25 @@ zink_debug_options[] = {
{ "spirv", ZINK_DEBUG_SPIRV, "Dump SPIR-V during program compile" },
{ "tgsi", ZINK_DEBUG_TGSI, "Dump TGSI during program compile" },
{ "validation", ZINK_DEBUG_VALIDATION, "Dump Validation layer output" },
+ { "vvl", ZINK_DEBUG_VALIDATION, "Dump Validation layer output" },
+ { "sync", ZINK_DEBUG_SYNC, "Force synchronization before draws/dispatches" },
+ { "compact", ZINK_DEBUG_COMPACT, "Use only 4 descriptor sets" },
+ { "noreorder", ZINK_DEBUG_NOREORDER, "Do not reorder command streams" },
+ { "gpl", ZINK_DEBUG_GPL, "Force using Graphics Pipeline Library for all shaders" },
+ { "shaderdb", ZINK_DEBUG_SHADERDB, "Do stuff to make shader-db work" },
+ { "rp", ZINK_DEBUG_RP, "Enable renderpass tracking/optimizations" },
+ { "norp", ZINK_DEBUG_NORP, "Disable renderpass tracking/optimizations" },
+ { "map", ZINK_DEBUG_MAP, "Track amount of mapped VRAM" },
+ { "flushsync", ZINK_DEBUG_FLUSHSYNC, "Force synchronous flushes/presents" },
+ { "noshobj", ZINK_DEBUG_NOSHOBJ, "Disable EXT_shader_object" },
+ { "optimal_keys", ZINK_DEBUG_OPTIMAL_KEYS, "Debug/use optimal_keys" },
+ { "noopt", ZINK_DEBUG_NOOPT, "Disable async optimized pipeline compiles" },
+ { "nobgc", ZINK_DEBUG_NOBGC, "Disable all async pipeline compiles" },
+ { "dgc", ZINK_DEBUG_DGC, "Use DGC (driver testing only)" },
+ { "mem", ZINK_DEBUG_MEM, "Debug memory allocations" },
+ { "quiet", ZINK_DEBUG_QUIET, "Suppress warnings" },
+ { "ioopt", ZINK_DEBUG_IOOPT, "Optimize IO" },
+ { "nopc", ZINK_DEBUG_NOPC, "No precompilation" },
DEBUG_NAMED_VALUE_END
};
@@ -82,17 +132,18 @@ static const struct debug_named_value
zink_descriptor_options[] = {
{ "auto", ZINK_DESCRIPTOR_MODE_AUTO, "Automatically detect best mode" },
{ "lazy", ZINK_DESCRIPTOR_MODE_LAZY, "Don't cache, do least amount of updates" },
- { "nofallback", ZINK_DESCRIPTOR_MODE_NOFALLBACK, "Cache, never use lazy fallback" },
- { "notemplates", ZINK_DESCRIPTOR_MODE_NOTEMPLATES, "Cache, but disable templated updates" },
+ { "db", ZINK_DESCRIPTOR_MODE_DB, "Use descriptor buffers" },
DEBUG_NAMED_VALUE_END
};
DEBUG_GET_ONCE_FLAGS_OPTION(zink_descriptor_mode, "ZINK_DESCRIPTORS", zink_descriptor_options, ZINK_DESCRIPTOR_MODE_AUTO)
+enum zink_descriptor_mode zink_descriptor_mode;
+
static const char *
zink_get_vendor(struct pipe_screen *pscreen)
{
- return "Collabora Ltd";
+ return "Mesa";
}
static const char *
@@ -108,23 +159,85 @@ static const char *
zink_get_name(struct pipe_screen *pscreen)
{
struct zink_screen *screen = zink_screen(pscreen);
+ const char *driver_name = vk_DriverId_to_str(screen->info.driver_props.driverID) + strlen("VK_DRIVER_ID_");
static char buf[1000];
- snprintf(buf, sizeof(buf), "zink (%s)", screen->info.props.deviceName);
+ snprintf(buf, sizeof(buf), "zink Vulkan %d.%d(%s (%s))",
+ VK_VERSION_MAJOR(screen->info.device_version),
+ VK_VERSION_MINOR(screen->info.device_version),
+ screen->info.props.deviceName,
+ strstr(vk_DriverId_to_str(screen->info.driver_props.driverID), "VK_DRIVER_ID_") ? driver_name : "Driver Unknown"
+ );
return buf;
}
+static void
+zink_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+ if (screen->vk_version >= VK_MAKE_VERSION(1,2,0)) {
+ memcpy(uuid, screen->info.props11.driverUUID, VK_UUID_SIZE);
+ } else {
+ memcpy(uuid, screen->info.deviceid_props.driverUUID, VK_UUID_SIZE);
+ }
+}
+
+static void
+zink_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+ if (screen->vk_version >= VK_MAKE_VERSION(1,2,0)) {
+ memcpy(uuid, screen->info.props11.deviceUUID, VK_UUID_SIZE);
+ } else {
+ memcpy(uuid, screen->info.deviceid_props.deviceUUID, VK_UUID_SIZE);
+ }
+}
+
+static void
+zink_get_device_luid(struct pipe_screen *pscreen, char *luid)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+ if (screen->info.have_vulkan12) {
+ memcpy(luid, screen->info.props11.deviceLUID, VK_LUID_SIZE);
+ } else {
+ memcpy(luid, screen->info.deviceid_props.deviceLUID, VK_LUID_SIZE);
+ }
+}
+
static uint32_t
-hash_framebuffer_state(const void *key)
+zink_get_device_node_mask(struct pipe_screen *pscreen)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+ if (screen->info.have_vulkan12) {
+ return screen->info.props11.deviceNodeMask;
+ } else {
+ return screen->info.deviceid_props.deviceNodeMask;
+ }
+}
+
+static void
+zink_set_max_shader_compiler_threads(struct pipe_screen *pscreen, unsigned max_threads)
{
- struct zink_framebuffer_state* s = (struct zink_framebuffer_state*)key;
- return _mesa_hash_data(key, offsetof(struct zink_framebuffer_state, attachments) + sizeof(s->attachments[0]) * s->num_attachments);
+ struct zink_screen *screen = zink_screen(pscreen);
+ util_queue_adjust_num_threads(&screen->cache_get_thread, max_threads, false);
}
static bool
-equals_framebuffer_state(const void *a, const void *b)
+zink_is_parallel_shader_compilation_finished(struct pipe_screen *screen, void *shader, enum pipe_shader_type shader_type)
{
- struct zink_framebuffer_state *s = (struct zink_framebuffer_state*)a;
- return memcmp(a, b, offsetof(struct zink_framebuffer_state, attachments) + sizeof(s->attachments[0]) * s->num_attachments) == 0;
+ if (shader_type == MESA_SHADER_COMPUTE) {
+ struct zink_program *pg = shader;
+ return !pg->can_precompile || util_queue_fence_is_signalled(&pg->cache_fence);
+ }
+
+ struct zink_shader *zs = shader;
+ if (!util_queue_fence_is_signalled(&zs->precompile.fence))
+ return false;
+ bool finished = true;
+ set_foreach(zs->programs, entry) {
+ struct zink_gfx_program *prog = (void*)entry->key;
+ finished &= util_queue_fence_is_signalled(&prog->base.cache_fence);
+ }
+ return finished;
}
static VkDeviceSize
@@ -139,19 +252,87 @@ get_video_mem(struct zink_screen *screen)
return size;
}
-static void
+/**
+ * Creates the disk cache used by mesa/st frontend for caching the GLSL -> NIR
+ * path.
+ *
+ * The output that gets stored in the frontend's cache is the result of
+ * zink_shader_finalize(). So, our sha1 cache key here needs to include
+ * everything that would change the NIR we generate from a given set of GLSL
+ * source, including our driver build, the Vulkan device and driver (which could
+ * affect the pipe caps we show the frontend), and any debug flags that change
+ * codegen.
+ *
+ * This disk cache also gets used by zink itself for storing its output from NIR
+ * -> SPIRV translation.
+ */
+static bool
disk_cache_init(struct zink_screen *screen)
{
+ if (zink_debug & ZINK_DEBUG_SHADERDB)
+ return true;
+
#ifdef ENABLE_SHADER_CACHE
- static char buf[1000];
- snprintf(buf, sizeof(buf), "zink_%x04x", screen->info.props.vendorID);
+ struct mesa_sha1 ctx;
+ _mesa_sha1_init(&ctx);
+
+#ifdef HAVE_DL_ITERATE_PHDR
+ /* Hash in the zink driver build. */
+ const struct build_id_note *note =
+ build_id_find_nhdr_for_addr(disk_cache_init);
+ unsigned build_id_len = build_id_length(note);
+ assert(note && build_id_len == 20); /* sha1 */
+ _mesa_sha1_update(&ctx, build_id_data(note), build_id_len);
+#endif
+
+ /* Hash in the Vulkan pipeline cache UUID to identify the combination of
+ * vulkan device and driver (or any inserted layer that would invalidate our
+ * cached pipelines).
+ *
+ * "Although they have identical descriptions, VkPhysicalDeviceIDProperties
+ * ::deviceUUID may differ from
+ * VkPhysicalDeviceProperties2::pipelineCacheUUID. The former is intended to
+ * identify and correlate devices across API and driver boundaries, while the
+ * latter is used to identify a compatible device and driver combination to
+ * use when serializing and de-serializing pipeline state."
+ */
+ _mesa_sha1_update(&ctx, screen->info.props.pipelineCacheUUID, VK_UUID_SIZE);
+
+ /* Hash in our debug flags that affect NIR generation as of finalize_nir */
+ unsigned shader_debug_flags = zink_debug & ZINK_DEBUG_COMPACT;
+ _mesa_sha1_update(&ctx, &shader_debug_flags, sizeof(shader_debug_flags));
+
+ /* Some of the driconf options change shaders. Let's just hash the whole
+ * thing to not forget any (especially as options get added).
+ */
+ _mesa_sha1_update(&ctx, &screen->driconf, sizeof(screen->driconf));
- screen->disk_cache = disk_cache_create(buf, screen->info.props.deviceName, 0);
- if (screen->disk_cache) {
- util_queue_init(&screen->cache_put_thread, "zcq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL, screen);
- util_queue_init(&screen->cache_get_thread, "zcfq", 8, 4, UTIL_QUEUE_INIT_RESIZE_IF_FULL, screen);
+ /* EXT_shader_object causes different descriptor layouts for separate shaders */
+ _mesa_sha1_update(&ctx, &screen->info.have_EXT_shader_object, sizeof(screen->info.have_EXT_shader_object));
+
+ /* Finish the sha1 and format it as text. */
+ unsigned char sha1[20];
+ _mesa_sha1_final(&ctx, sha1);
+
+ char cache_id[20 * 2 + 1];
+ mesa_bytes_to_hex(cache_id, sha1, 20);
+
+ screen->disk_cache = disk_cache_create("zink", cache_id, 0);
+
+ if (!screen->disk_cache)
+ return true;
+
+ if (!util_queue_init(&screen->cache_put_thread, "zcq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL, screen)) {
+ mesa_loge("zink: Failed to create disk cache queue\n");
+
+ disk_cache_destroy(screen->disk_cache);
+ screen->disk_cache = NULL;
+
+ return false;
}
#endif
+
+ return true;
}
@@ -161,30 +342,45 @@ cache_put_job(void *data, void *gdata, int thread_index)
struct zink_program *pg = data;
struct zink_screen *screen = gdata;
size_t size = 0;
- if (VKSCR(GetPipelineCacheData)(screen->dev, pg->pipeline_cache, &size, NULL) != VK_SUCCESS)
+ u_rwlock_rdlock(&pg->pipeline_cache_lock);
+ VkResult result = VKSCR(GetPipelineCacheData)(screen->dev, pg->pipeline_cache, &size, NULL);
+ if (result != VK_SUCCESS) {
+ u_rwlock_rdunlock(&pg->pipeline_cache_lock);
+ mesa_loge("ZINK: vkGetPipelineCacheData failed (%s)", vk_Result_to_str(result));
return;
- if (pg->pipeline_cache_size == size)
+ }
+ if (pg->pipeline_cache_size == size) {
+ u_rwlock_rdunlock(&pg->pipeline_cache_lock);
return;
+ }
void *pipeline_data = malloc(size);
- if (!pipeline_data)
+ if (!pipeline_data) {
+ u_rwlock_rdunlock(&pg->pipeline_cache_lock);
return;
- if (VKSCR(GetPipelineCacheData)(screen->dev, pg->pipeline_cache, &size, pipeline_data) == VK_SUCCESS) {
+ }
+ result = VKSCR(GetPipelineCacheData)(screen->dev, pg->pipeline_cache, &size, pipeline_data);
+ u_rwlock_rdunlock(&pg->pipeline_cache_lock);
+ if (result == VK_SUCCESS) {
pg->pipeline_cache_size = size;
cache_key key;
disk_cache_compute_key(screen->disk_cache, pg->sha1, sizeof(pg->sha1), key);
disk_cache_put_nocopy(screen->disk_cache, key, pipeline_data, size, NULL);
+ } else {
+ mesa_loge("ZINK: vkGetPipelineCacheData failed (%s)", vk_Result_to_str(result));
}
}
void
-zink_screen_update_pipeline_cache(struct zink_screen *screen, struct zink_program *pg)
+zink_screen_update_pipeline_cache(struct zink_screen *screen, struct zink_program *pg, bool in_thread)
{
- util_queue_fence_init(&pg->cache_fence);
- if (!screen->disk_cache)
+ if (!screen->disk_cache || !pg->pipeline_cache)
return;
- util_queue_add_job(&screen->cache_put_thread, pg, NULL, cache_put_job, NULL, 0);
+ if (in_thread)
+ cache_put_job(pg, screen, 0);
+ else if (util_queue_fence_is_signalled(&pg->cache_fence))
+ util_queue_add_job(&screen->cache_put_thread, pg, &pg->cache_fence, cache_put_job, NULL, 0);
}
static void
@@ -196,7 +392,7 @@ cache_get_job(void *data, void *gdata, int thread_index)
VkPipelineCacheCreateInfo pcci;
pcci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
pcci.pNext = NULL;
- pcci.flags = screen->info.have_EXT_pipeline_creation_cache_control ? VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT : 0;
+ pcci.flags = screen->info.have_EXT_pipeline_creation_cache_control ? VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT : 0;
pcci.initialDataSize = 0;
pcci.pInitialData = NULL;
@@ -204,18 +400,24 @@ cache_get_job(void *data, void *gdata, int thread_index)
disk_cache_compute_key(screen->disk_cache, pg->sha1, sizeof(pg->sha1), key);
pcci.pInitialData = disk_cache_get(screen->disk_cache, key, &pg->pipeline_cache_size);
pcci.initialDataSize = pg->pipeline_cache_size;
- VKSCR(CreatePipelineCache)(screen->dev, &pcci, NULL, &pg->pipeline_cache);
+
+ VkResult res = VKSCR(CreatePipelineCache)(screen->dev, &pcci, NULL, &pg->pipeline_cache);
+ if (res != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreatePipelineCache failed (%s)", vk_Result_to_str(res));
+ }
free((void*)pcci.pInitialData);
}
void
-zink_screen_get_pipeline_cache(struct zink_screen *screen, struct zink_program *pg)
+zink_screen_get_pipeline_cache(struct zink_screen *screen, struct zink_program *pg, bool in_thread)
{
- util_queue_fence_init(&pg->cache_fence);
if (!screen->disk_cache)
return;
- util_queue_add_job(&screen->cache_get_thread, pg, &pg->cache_fence, cache_get_job, NULL, 0);
+ if (in_thread)
+ cache_get_job(pg, screen, 0);
+ else
+ util_queue_add_job(&screen->cache_get_thread, pg, &pg->cache_fence, cache_get_job, NULL, 0);
}
static int
@@ -231,7 +433,7 @@ zink_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
switch (param) {
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
- RET((uint32_t []){ 32 });
+ RET((uint32_t []){ 64 });
case PIPE_COMPUTE_CAP_IR_TARGET:
if (ret)
@@ -262,13 +464,21 @@ zink_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
RET((uint32_t []) { 1 });
- case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
RET((uint32_t []) { screen->info.props11.subgroupSize });
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
- case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
- case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ RET((uint64_t []) { screen->clamp_video_mem });
+
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+ RET((uint64_t []) { screen->total_video_mem });
+
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ // no way in vulkan to retrieve this information.
+ RET((uint32_t []) { 1 });
+
+ case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
// XXX: I think these are for Clover...
@@ -279,63 +489,131 @@ zink_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
}
}
+static uint32_t
+get_smallest_buffer_heap(struct zink_screen *screen)
+{
+ enum zink_heap heaps[] = {
+ ZINK_HEAP_DEVICE_LOCAL,
+ ZINK_HEAP_DEVICE_LOCAL_VISIBLE,
+ ZINK_HEAP_HOST_VISIBLE_COHERENT,
+ ZINK_HEAP_HOST_VISIBLE_COHERENT
+ };
+ unsigned size = UINT32_MAX;
+ for (unsigned i = 0; i < ARRAY_SIZE(heaps); i++) {
+ for (unsigned j = 0; j < screen->heap_count[i]; j++) {
+ unsigned heap_idx = screen->info.mem_props.memoryTypes[screen->heap_map[i][j]].heapIndex;
+ size = MIN2(screen->info.mem_props.memoryHeaps[heap_idx].size, size);
+ }
+ }
+ return size;
+}
+
+static inline bool
+have_fp32_filter_linear(struct zink_screen *screen)
+{
+ const VkFormat fp32_formats[] = {
+ VK_FORMAT_R32_SFLOAT,
+ VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32B32_SFLOAT,
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_D32_SFLOAT,
+ };
+ for (int i = 0; i < ARRAY_SIZE(fp32_formats); ++i) {
+ VkFormatProperties props;
+ VKSCR(GetPhysicalDeviceFormatProperties)(screen->pdev,
+ fp32_formats[i],
+ &props);
+ if (((props.linearTilingFeatures | props.optimalTilingFeatures) &
+ (VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)) ==
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) {
+ return false;
+ }
+ }
+ return true;
+}
+
static int
zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
{
struct zink_screen *screen = zink_screen(pscreen);
switch (param) {
+ case PIPE_CAP_NULL_TEXTURES:
+ return screen->info.rb_image_feats.robustImageAccess;
+ case PIPE_CAP_TEXRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARTIAL_STRIDE:
+ return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return screen->info.feats.features.samplerAnisotropy;
case PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART:
return 1;
case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART: {
- uint32_t modes = BITFIELD_BIT(PIPE_PRIM_LINE_STRIP) |
- BITFIELD_BIT(PIPE_PRIM_TRIANGLE_STRIP) |
- BITFIELD_BIT(PIPE_PRIM_LINE_STRIP_ADJACENCY) |
- BITFIELD_BIT(PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY);
+ uint32_t modes = BITFIELD_BIT(MESA_PRIM_LINE_STRIP) |
+ BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP) |
+ BITFIELD_BIT(MESA_PRIM_LINE_STRIP_ADJACENCY) |
+ BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP_ADJACENCY);
if (screen->have_triangle_fans)
- modes |= BITFIELD_BIT(PIPE_PRIM_TRIANGLE_FAN);
+ modes |= BITFIELD_BIT(MESA_PRIM_TRIANGLE_FAN);
if (screen->info.have_EXT_primitive_topology_list_restart) {
- modes |= BITFIELD_BIT(PIPE_PRIM_POINTS) |
- BITFIELD_BIT(PIPE_PRIM_LINES) |
- BITFIELD_BIT(PIPE_PRIM_TRIANGLES) |
- BITFIELD_BIT(PIPE_PRIM_TRIANGLES_ADJACENCY);
+ modes |= BITFIELD_BIT(MESA_PRIM_POINTS) |
+ BITFIELD_BIT(MESA_PRIM_LINES) |
+ BITFIELD_BIT(MESA_PRIM_LINES_ADJACENCY) |
+ BITFIELD_BIT(MESA_PRIM_TRIANGLES) |
+ BITFIELD_BIT(MESA_PRIM_TRIANGLES_ADJACENCY);
if (screen->info.list_restart_feats.primitiveTopologyPatchListRestart)
- modes |= BITFIELD_BIT(PIPE_PRIM_PATCHES);
+ modes |= BITFIELD_BIT(MESA_PRIM_PATCHES);
}
return modes;
}
case PIPE_CAP_SUPPORTED_PRIM_MODES: {
- uint32_t modes = BITFIELD_MASK(PIPE_PRIM_MAX);
- modes &= ~BITFIELD_BIT(PIPE_PRIM_QUADS);
- modes &= ~BITFIELD_BIT(PIPE_PRIM_QUAD_STRIP);
- modes &= ~BITFIELD_BIT(PIPE_PRIM_POLYGON);
- modes &= ~BITFIELD_BIT(PIPE_PRIM_LINE_LOOP);
+ uint32_t modes = BITFIELD_MASK(MESA_PRIM_COUNT);
if (!screen->have_triangle_fans)
- modes &= ~BITFIELD_BIT(PIPE_PRIM_TRIANGLE_FAN);
+ modes &= ~BITFIELD_BIT(MESA_PRIM_QUADS);
+ modes &= ~BITFIELD_BIT(MESA_PRIM_QUAD_STRIP);
+ modes &= ~BITFIELD_BIT(MESA_PRIM_POLYGON);
+ modes &= ~BITFIELD_BIT(MESA_PRIM_LINE_LOOP);
+ if (!screen->have_triangle_fans)
+ modes &= ~BITFIELD_BIT(MESA_PRIM_TRIANGLE_FAN);
return modes;
}
case PIPE_CAP_FBFETCH:
return 1;
-
+ case PIPE_CAP_FBFETCH_COHERENT:
+ return screen->info.have_EXT_rasterization_order_attachment_access;
+
+ case PIPE_CAP_MEMOBJ:
+ return screen->instance_info.have_KHR_external_memory_capabilities && (screen->info.have_KHR_external_memory_fd || screen->info.have_KHR_external_memory_win32);
+ case PIPE_CAP_FENCE_SIGNAL:
+ return screen->info.have_KHR_external_semaphore_fd || screen->info.have_KHR_external_semaphore_win32;
+ case PIPE_CAP_NATIVE_FENCE_FD:
+ return screen->instance_info.have_KHR_external_semaphore_capabilities && screen->info.have_KHR_external_semaphore_fd;
+ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ return screen->info.have_EXT_external_memory_host;
+
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ return screen->info.have_vulkan11 || screen->info.have_KHR_maintenance2;
+
+ case PIPE_CAP_VALIDATE_ALL_DIRTY_STATES:
+ case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+ case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE:
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_QUERY_MEMORY_INFO:
case PIPE_CAP_NPOT_TEXTURES:
case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
- case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
- case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
- case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+ case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_CLIP_HALFZ:
- case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_TEXTURE_QUERY_SAMPLES:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_QUERY_SO_OVERFLOW:
case PIPE_CAP_GL_SPIRV:
@@ -343,20 +621,36 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0:
case PIPE_CAP_PACKED_UNIFORMS:
- case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
+ case PIPE_CAP_CULL_DISTANCE_NOCOMBINE:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+ case PIPE_CAP_ALLOW_GLTHREAD_BUFFER_SUBDATA_OPT:
return 1;
- case PIPE_CAP_DRAW_PARAMETERS:
- return screen->info.feats11.shaderDrawParameters || screen->info.have_KHR_shader_draw_parameters;
+ case PIPE_CAP_DRAW_VERTEX_STATE:
+ return screen->info.have_EXT_vertex_input_dynamic_state;
- case PIPE_CAP_TGSI_VOTE:
- return screen->spirv_version >= SPIRV_VERSION(1, 3);
+ case PIPE_CAP_SURFACE_SAMPLE_COUNT:
+ return screen->vk_version >= VK_MAKE_VERSION(1,2,0);
+ case PIPE_CAP_SHADER_GROUP_VOTE:
+ if (screen->info.have_vulkan11 &&
+ (screen->info.subgroup.supportedOperations & VK_SUBGROUP_FEATURE_VOTE_BIT) &&
+ (screen->info.subgroup.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT))
+ return true;
+ if (screen->info.have_EXT_shader_subgroup_vote)
+ return true;
+ return false;
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
- return screen->info.have_EXT_provoking_vertex;
+ return 1;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
- return screen->info.have_KHR_sampler_mirror_clamp_to_edge;
+ return screen->info.have_KHR_sampler_mirror_clamp_to_edge || (screen->info.have_vulkan12 && screen->info.feats12.samplerMirrorClampToEdge);
+
+ case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+ return 1;
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
return screen->info.feats.features.depthBiasClamp;
@@ -365,15 +659,26 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return screen->info.feats.features.pipelineStatisticsQuery;
case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
- return screen->info.feats.features.robustBufferAccess;
+ return screen->info.feats.features.robustBufferAccess &&
+ (screen->info.rb2_feats.robustImageAccess2 || screen->driver_workarounds.lower_robustImageAccess2);
case PIPE_CAP_MULTI_DRAW_INDIRECT:
return screen->info.feats.features.multiDrawIndirect;
+ case PIPE_CAP_IMAGE_ATOMIC_FLOAT_ADD:
+ return (screen->info.have_EXT_shader_atomic_float &&
+ screen->info.atomic_float_feats.shaderSharedFloat32AtomicAdd &&
+ screen->info.atomic_float_feats.shaderBufferFloat32AtomicAdd);
+ case PIPE_CAP_SHADER_ATOMIC_INT64:
+ return (screen->info.have_KHR_shader_atomic_int64 &&
+ screen->info.atomic_int_feats.shaderSharedInt64Atomics &&
+ screen->info.atomic_int_feats.shaderBufferInt64Atomics);
+
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
return screen->info.have_KHR_draw_indirect_count;
case PIPE_CAP_START_INSTANCE:
+ case PIPE_CAP_DRAW_PARAMETERS:
return (screen->info.have_vulkan12 && screen->info.feats11.shaderDrawParameters) ||
screen->info.have_KHR_shader_draw_parameters;
@@ -383,8 +688,10 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_VERTEX_STREAMS:
return screen->info.tf_props.maxTransformFeedbackStreams;
+ case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
+ return screen->info.have_NV_compute_shader_derivatives;
+
case PIPE_CAP_INT64:
- case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_DOUBLES:
return 1;
@@ -411,14 +718,22 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK:
return screen->info.have_EXT_fragment_shader_interlock;
- case PIPE_CAP_TGSI_CLOCK:
+ case PIPE_CAP_SHADER_CLOCK:
return screen->info.have_KHR_shader_clock;
- case PIPE_CAP_POINT_SPRITE:
- return 1;
+ case PIPE_CAP_SHADER_BALLOT:
+ if (screen->info.props11.subgroupSize > 64)
+ return false;
+ if (screen->info.have_vulkan11 &&
+ screen->info.subgroup.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT)
+ return true;
+ if (screen->info.have_EXT_shader_subgroup_ballot)
+ return true;
+ return false;
- case PIPE_CAP_TGSI_BALLOT:
- return screen->vk_version >= VK_MAKE_VERSION(1,2,0) && screen->info.props11.subgroupSize <= 64;
+ case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION:
+ return screen->spirv_version >= SPIRV_VERSION(1, 6) ||
+ screen->info.have_EXT_shader_demote_to_helper_invocation;
case PIPE_CAP_SAMPLE_SHADING:
return screen->info.feats.features.sampleRateShading;
@@ -426,20 +741,33 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_SWIZZLE:
return 1;
+ case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY:
+ return 1;
+
case PIPE_CAP_GL_CLAMP:
return 0;
- case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
- /* This is also broken on the other AMD drivers for old HW, but
- * there's no obvious way to test for that.
+ case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
+ return 0; /* Assume that the vk driver is capable of moving imm arrays to some sort of constant storage on its own. */
+
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: {
+ enum pipe_quirk_texture_border_color_swizzle quirk = PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_ALPHA_NOT_W;
+ if (!screen->info.border_color_feats.customBorderColorWithoutFormat)
+ return quirk | PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO;
+ /* assume that if drivers don't implement this extension they either:
+ * - don't support custom border colors
+ * - handle things correctly
+ * - hate border color accuracy
*/
- if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
- screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY)
- return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
- return 0;
+ if (screen->info.have_EXT_border_color_swizzle &&
+ !screen->info.border_swizzle_feats.borderColorSwizzleFromImage)
+ return quirk | PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
+ return quirk;
+ }
case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
- return screen->info.props.limits.maxImageDimension2D;
+ return MIN2(screen->info.props.limits.maxImageDimension1D,
+ screen->info.props.limits.maxImageDimension2D);
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
return 1 + util_logbase2(screen->info.props.limits.maxImageDimension3D);
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
@@ -447,7 +775,6 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
- case PIPE_CAP_VERTEX_SHADER_SATURATE:
return 1;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
@@ -455,6 +782,9 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_FUNC:
return screen->info.feats.features.independentBlend;
+ case PIPE_CAP_DITHERING:
+ return 0;
+
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
return screen->info.have_EXT_transform_feedback ? screen->info.tf_props.maxTransformFeedbackBuffers : 0;
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
@@ -465,13 +795,12 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return screen->info.props.limits.maxImageArrayLayers;
case PIPE_CAP_DEPTH_CLIP_DISABLE:
- return screen->info.feats.features.depthClamp;
+ return screen->info.have_EXT_depth_clip_enable;
case PIPE_CAP_SHADER_STENCIL_EXPORT:
return screen->info.have_EXT_shader_stencil_export;
- case PIPE_CAP_TGSI_INSTANCEID:
- case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ case PIPE_CAP_VS_INSTANCEID:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1;
@@ -497,11 +826,16 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return screen->info.props.limits.minUniformBufferOffsetAlignment;
case PIPE_CAP_QUERY_TIMESTAMP:
- return screen->info.have_EXT_calibrated_timestamps &&
- screen->timestamp_valid_bits > 0;
+ return screen->timestamp_valid_bits > 0;
+
+ case PIPE_CAP_QUERY_TIMESTAMP_BITS:
+ return screen->timestamp_valid_bits;
+
+ case PIPE_CAP_TIMER_RESOLUTION:
+ return ceil(screen->info.props.limits.timestampPeriod);
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
- return screen->info.props.limits.minMemoryMapAlignment;
+ return 1 << MIN_SLAB_ORDER;
case PIPE_CAP_CUBE_MAP_ARRAY:
return screen->info.feats.features.imageCubeArray;
@@ -510,14 +844,30 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PRIMITIVE_RESTART:
return 1;
+ case PIPE_CAP_BINDLESS_TEXTURE:
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB &&
+ (screen->info.db_props.maxDescriptorBufferBindings < 2 || screen->info.db_props.maxSamplerDescriptorBufferBindings < 2))
+ return 0;
+ return screen->info.have_EXT_descriptor_indexing;
+
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return screen->info.props.limits.minTexelBufferOffsetAlignment;
- case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
- return 1;
+ case PIPE_CAP_TEXTURE_TRANSFER_MODES: {
+ enum pipe_texture_transfer_mode mode = PIPE_TEXTURE_TRANSFER_BLIT;
+ if (!screen->is_cpu &&
+ /* this needs substantial perf tuning */
+ screen->info.driver_props.driverID != VK_DRIVER_ID_MESA_TURNIP &&
+ screen->info.have_KHR_8bit_storage &&
+ screen->info.have_KHR_16bit_storage &&
+ screen->info.have_KHR_shader_float16_int8)
+ mode |= PIPE_TEXTURE_TRANSFER_COMPUTE;
+ return mode;
+ }
- case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- return screen->info.props.limits.maxTexelBufferElements;
+ case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
+ return MIN2(get_smallest_buffer_heap(screen),
+ screen->info.props.limits.maxTexelBufferElements);
case PIPE_CAP_ENDIANNESS:
return PIPE_ENDIAN_NATIVE; /* unsure */
@@ -528,6 +878,9 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_IMAGE_LOAD_FORMATTED:
return screen->info.feats.features.shaderStorageImageReadWithoutFormat;
+ case PIPE_CAP_IMAGE_STORE_FORMATTED:
+ return screen->info.feats.features.shaderStorageImageWriteWithoutFormat;
+
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
return 1;
@@ -545,9 +898,13 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return screen->info.props.limits.maxTexelGatherOffset;
case PIPE_CAP_SAMPLER_REDUCTION_MINMAX_ARB:
- return screen->vk_version >= VK_MAKE_VERSION(1,2,0) || screen->info.have_EXT_sampler_filter_minmax;
+ return screen->info.feats12.samplerFilterMinmax || screen->info.have_EXT_sampler_filter_minmax;
- case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_OPENCL_INTEGER_FUNCTIONS:
+ case PIPE_CAP_INTEGER_MULTIPLY_32X16:
+ return screen->info.have_INTEL_shader_integer_functions2;
+
+ case PIPE_CAP_FS_FINE_DERIVATIVE:
return 1;
case PIPE_CAP_VENDOR_ID:
@@ -556,7 +913,7 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return screen->info.props.deviceID;
case PIPE_CAP_ACCELERATED:
- return 1;
+ return !screen->is_cpu;
case PIPE_CAP_VIDEO_MEMORY:
return get_video_mem(screen) >> 20;
case PIPE_CAP_UMA:
@@ -568,14 +925,16 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SAMPLER_VIEW_TARGET:
return 1;
- case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
- case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
+ case PIPE_CAP_VS_LAYER_VIEWPORT:
+ case PIPE_CAP_TES_LAYER_VIEWPORT:
return screen->info.have_EXT_shader_viewport_index_layer ||
(screen->spirv_version >= SPIRV_VERSION(1, 5) &&
screen->info.feats12.shaderOutputLayer &&
screen->info.feats12.shaderOutputViewportIndex);
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ return have_fp32_filter_linear(screen);
+
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 1;
@@ -592,15 +951,31 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return screen->info.feats.features.shaderCullDistance;
case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
-
- return screen->info.feats.features.sparseBinding ? ZINK_SPARSE_BUFFER_PAGE_SIZE : 0;
+ return screen->info.feats.features.sparseResidencyBuffer ? ZINK_SPARSE_BUFFER_PAGE_SIZE : 0;
+
+ /* Sparse texture */
+ case PIPE_CAP_MAX_SPARSE_TEXTURE_SIZE:
+ return screen->info.feats.features.sparseResidencyImage2D ?
+ zink_get_param(pscreen, PIPE_CAP_MAX_TEXTURE_2D_SIZE) : 0;
+ case PIPE_CAP_MAX_SPARSE_3D_TEXTURE_SIZE:
+ return screen->info.feats.features.sparseResidencyImage3D ?
+ (1 << (zink_get_param(pscreen, PIPE_CAP_MAX_TEXTURE_3D_LEVELS) - 1)) : 0;
+ case PIPE_CAP_MAX_SPARSE_ARRAY_TEXTURE_LAYERS:
+ return screen->info.feats.features.sparseResidencyImage2D ?
+ zink_get_param(pscreen, PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS) : 0;
+ case PIPE_CAP_SPARSE_TEXTURE_FULL_ARRAY_CUBE_MIPMAPS:
+ return screen->info.feats.features.sparseResidencyImage2D ? 1 : 0;
+ case PIPE_CAP_QUERY_SPARSE_TEXTURE_RESIDENCY:
+ return screen->info.feats.features.sparseResidency2Samples &&
+ screen->info.feats.features.shaderResourceResidency ? 1 : 0;
+ case PIPE_CAP_CLAMP_SPARSE_TEXTURE_LOD:
+ return screen->info.feats.features.shaderResourceMinLod &&
+ screen->info.feats.features.sparseResidency2Samples &&
+ screen->info.feats.features.shaderResourceResidency ? 1 : 0;
case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
return screen->info.props.limits.viewportSubPixelBits;
- case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
- return 0; /* not sure */
-
case PIPE_CAP_MAX_GS_INVOCATIONS:
return screen->info.props.limits.maxGeometryShaderInvocations;
@@ -608,44 +983,51 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
/* gallium handles this automatically */
return 0;
- case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
+ case PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT:
/* 1<<27 is required by VK spec */
assert(screen->info.props.limits.maxStorageBufferRange >= 1 << 27);
- /* but Gallium can't handle values that are too big, so clamp to VK spec minimum */
- return 1 << 27;
+ /* clamp to VK spec minimum */
+ return MIN2(get_smallest_buffer_heap(screen), screen->info.props.limits.maxStorageBufferRange);
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return 1;
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
return 0;
- case PIPE_CAP_NIR_COMPACT_ARRAYS:
- return 1;
-
- case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_FS_POINT_IS_SYSVAL:
return 1;
case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED:
return 1;
+ case PIPE_CAP_POINT_SIZE_FIXED:
+ return screen->info.have_KHR_maintenance5 ? PIPE_POINT_SIZE_LOWER_USER_ONLY : PIPE_POINT_SIZE_LOWER_ALWAYS;
case PIPE_CAP_FLATSHADE:
case PIPE_CAP_ALPHA_TEST:
case PIPE_CAP_CLIP_PLANES:
- case PIPE_CAP_POINT_SIZE_FIXED:
case PIPE_CAP_TWO_SIDED_COLOR:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
- return screen->info.props.limits.maxTessellationControlPerVertexOutputComponents / 4;
+ return screen->info.props.limits.maxTessellationControlPerPatchOutputComponents / 4;
case PIPE_CAP_MAX_VARYINGS:
/* need to reserve up to 60 of our varying components and 16 slots for streamout */
return MIN2(screen->info.props.limits.maxVertexOutputComponents / 4 / 2, 16);
case PIPE_CAP_DMABUF:
- return screen->info.have_KHR_external_memory_fd && screen->info.have_EXT_external_memory_dma_buf && screen->info.have_EXT_queue_family_foreign;
+#if defined(HAVE_LIBDRM) && (DETECT_OS_LINUX || DETECT_OS_BSD)
+ return screen->info.have_KHR_external_memory_fd &&
+ screen->info.have_EXT_external_memory_dma_buf &&
+ screen->info.have_EXT_queue_family_foreign
+ ? DRM_PRIME_CAP_IMPORT | DRM_PRIME_CAP_EXPORT
+ : 0;
+#else
+ return 0;
+#endif
case PIPE_CAP_DEPTH_BOUNDS_TEST:
return screen->info.feats.features.depthBounds;
@@ -667,14 +1049,38 @@ zink_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
struct zink_screen *screen = zink_screen(pscreen);
switch (param) {
+ case PIPE_CAPF_MIN_LINE_WIDTH:
+ case PIPE_CAPF_MIN_LINE_WIDTH_AA:
+ if (!screen->info.feats.features.wideLines)
+ return 1.0f;
+ return MAX2(screen->info.props.limits.lineWidthRange[0], 0.01);
+
+ case PIPE_CAPF_MIN_POINT_SIZE:
+ case PIPE_CAPF_MIN_POINT_SIZE_AA:
+ if (!screen->info.feats.features.largePoints)
+ return 1.0f;
+ return MAX2(screen->info.props.limits.pointSizeRange[0], 0.01);
+
+
+ case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
+ if (!screen->info.feats.features.wideLines)
+ return 0.1f;
+ return screen->info.props.limits.lineWidthGranularity;
+
+ case PIPE_CAPF_POINT_SIZE_GRANULARITY:
+ if (!screen->info.feats.features.largePoints)
+ return 0.1f;
+ return screen->info.props.limits.pointSizeGranularity;
+
+
case PIPE_CAPF_MAX_LINE_WIDTH:
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
if (!screen->info.feats.features.wideLines)
return 1.0f;
return screen->info.props.limits.lineWidthRange[1];
- case PIPE_CAPF_MAX_POINT_WIDTH:
- case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ case PIPE_CAPF_MAX_POINT_SIZE:
+ case PIPE_CAPF_MAX_POINT_SIZE_AA:
if (!screen->info.feats.features.largePoints)
return 1.0f;
return screen->info.props.limits.pointSizeRange[1];
@@ -699,7 +1105,7 @@ zink_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
static int
zink_get_shader_param(struct pipe_screen *pscreen,
- enum pipe_shader_type shader,
+ gl_shader_stage shader,
enum pipe_shader_cap param)
{
struct zink_screen *screen = zink_screen(pscreen);
@@ -707,22 +1113,22 @@ zink_get_shader_param(struct pipe_screen *pscreen,
switch (param) {
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
switch (shader) {
- case PIPE_SHADER_FRAGMENT:
- case PIPE_SHADER_VERTEX:
+ case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_VERTEX:
return INT_MAX;
- case PIPE_SHADER_TESS_CTRL:
- case PIPE_SHADER_TESS_EVAL:
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
if (screen->info.feats.features.tessellationShader &&
screen->info.have_KHR_maintenance2)
return INT_MAX;
break;
- case PIPE_SHADER_GEOMETRY:
+ case MESA_SHADER_GEOMETRY:
if (screen->info.feats.features.geometryShader)
return INT_MAX;
break;
- case PIPE_SHADER_COMPUTE:
+ case MESA_SHADER_COMPUTE:
return INT_MAX;
default:
break;
@@ -737,49 +1143,59 @@ zink_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_INPUTS: {
uint32_t max = 0;
switch (shader) {
- case PIPE_SHADER_VERTEX:
+ case MESA_SHADER_VERTEX:
max = MIN2(screen->info.props.limits.maxVertexInputAttributes, PIPE_MAX_ATTRIBS);
break;
- case PIPE_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_CTRL:
max = screen->info.props.limits.maxTessellationControlPerVertexInputComponents / 4;
break;
- case PIPE_SHADER_TESS_EVAL:
+ case MESA_SHADER_TESS_EVAL:
max = screen->info.props.limits.maxTessellationEvaluationInputComponents / 4;
break;
- case PIPE_SHADER_GEOMETRY:
- max = screen->info.props.limits.maxGeometryInputComponents;
+ case MESA_SHADER_GEOMETRY:
+ max = screen->info.props.limits.maxGeometryInputComponents / 4;
break;
- case PIPE_SHADER_FRAGMENT:
+ case MESA_SHADER_FRAGMENT:
/* intel drivers report fewer components, but it's a value that's compatible
* with what we need for GL, so we can still force a conformant value here
*/
- if (screen->info.driver_props.driverID == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR ||
- screen->info.driver_props.driverID == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR)
+ if (screen->info.driver_props.driverID == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA ||
+ screen->info.driver_props.driverID == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS ||
+ (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_VENUS
+ && screen->info.props.vendorID == 0x8086))
return 32;
max = screen->info.props.limits.maxFragmentInputComponents / 4;
break;
default:
return 0; /* unsupported stage */
}
+ switch (shader) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_GEOMETRY:
+ /* last vertex stage must support streamout, and this is capped in glsl compiler */
+ return MIN2(max, MAX_VARYING);
+ default: break;
+ }
return MIN2(max, 64); // prevent overflowing struct shader_info::inputs_read
}
case PIPE_SHADER_CAP_MAX_OUTPUTS: {
uint32_t max = 0;
switch (shader) {
- case PIPE_SHADER_VERTEX:
+ case MESA_SHADER_VERTEX:
max = screen->info.props.limits.maxVertexOutputComponents / 4;
break;
- case PIPE_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_CTRL:
max = screen->info.props.limits.maxTessellationControlPerVertexOutputComponents / 4;
break;
- case PIPE_SHADER_TESS_EVAL:
+ case MESA_SHADER_TESS_EVAL:
max = screen->info.props.limits.maxTessellationEvaluationOutputComponents / 4;
break;
- case PIPE_SHADER_GEOMETRY:
+ case MESA_SHADER_GEOMETRY:
max = screen->info.props.limits.maxGeometryOutputComponents / 4;
break;
- case PIPE_SHADER_FRAGMENT:
+ case MESA_SHADER_FRAGMENT:
max = screen->info.props.limits.maxColorAttachments;
break;
default:
@@ -788,11 +1204,12 @@ zink_get_shader_param(struct pipe_screen *pscreen,
return MIN2(max, 64); // prevent overflowing struct shader_info::outputs_read/written
}
- case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
/* At least 16384 is guaranteed by VK spec */
assert(screen->info.props.limits.maxUniformBufferRange >= 16384);
/* but Gallium can't handle values that are too big */
- return MIN2(screen->info.props.limits.maxUniformBufferRange, 1 << 31);
+ return MIN3(get_smallest_buffer_heap(screen),
+ screen->info.props.limits.maxUniformBufferRange, BITFIELD_BIT(31));
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return MIN2(screen->info.props.limits.maxPerStageDescriptorUniformBuffers,
@@ -805,22 +1222,23 @@ zink_get_shader_param(struct pipe_screen *pscreen,
return 1;
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
- return 1;
-
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
- case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ return 1;
+
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_INT64_ATOMICS:
case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
return 0; /* not implemented */
case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
- return screen->info.feats11.uniformAndStorageBuffer16BitAccess ||
- (screen->info.have_KHR_16bit_storage && screen->info.storage_16bit_feats.uniformAndStorageBuffer16BitAccess);
+ //enabling this breaks GTF-GL46.gtf21.GL2Tests.glGetUniform.glGetUniform
+ //return screen->info.feats11.uniformAndStorageBuffer16BitAccess ||
+ //(screen->info.have_KHR_16bit_storage && screen->info.storage_16bit_feats.uniformAndStorageBuffer16BitAccess);
+ return 0;
case PIPE_SHADER_CAP_FP16_DERIVATIVES:
- return screen->info.feats11.storageInputOutput16 ||
- (screen->info.have_KHR_16bit_storage && screen->info.storage_16bit_feats.storageInputOutput16);
+ return 0; //spirv requires 32bit derivative srcs and dests
case PIPE_SHADER_CAP_FP16:
return screen->info.feats12.shaderFloat16 ||
(screen->info.have_KHR_shader_float16_int8 &&
@@ -829,9 +1247,6 @@ zink_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_INT16:
return screen->info.feats.features.shaderInt16;
- case PIPE_SHADER_CAP_PREFERRED_IR:
- return PIPE_SHADER_IR_NIR;
-
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 0; /* not implemented */
@@ -841,28 +1256,20 @@ zink_get_shader_param(struct pipe_screen *pscreen,
screen->info.props.limits.maxPerStageDescriptorSampledImages),
PIPE_MAX_SAMPLERS);
- case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
- case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
- case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
- return 0; /* not implemented */
-
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0; /* no idea */
- case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
- return 0;
-
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
switch (shader) {
- case PIPE_SHADER_VERTEX:
- case PIPE_SHADER_TESS_CTRL:
- case PIPE_SHADER_TESS_EVAL:
- case PIPE_SHADER_GEOMETRY:
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_GEOMETRY:
if (!screen->info.feats.features.vertexPipelineStoresAndAtomics)
return 0;
break;
- case PIPE_SHADER_FRAGMENT:
+ case MESA_SHADER_FRAGMENT:
if (!screen->info.feats.features.fragmentStoresAndAtomics)
return 0;
break;
@@ -881,18 +1288,14 @@ zink_get_shader_param(struct pipe_screen *pscreen,
if (screen->info.feats.features.shaderStorageImageExtendedFormats &&
screen->info.feats.features.shaderStorageImageWriteWithoutFormat)
return MIN2(screen->info.props.limits.maxPerStageDescriptorStorageImages,
- PIPE_MAX_SHADER_IMAGES);
+ ZINK_MAX_SHADER_IMAGES);
return 0;
- case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
- case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
- return 0; /* unsure */
-
- case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
- case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 0; /* not implemented */
+ case PIPE_SHADER_CAP_CONT_SUPPORTED:
+ return 1;
}
/* should only get here on unhandled cases */
@@ -916,6 +1319,23 @@ vk_sample_count_flags(uint32_t sample_count)
}
static bool
+zink_is_compute_copy_faster(struct pipe_screen *pscreen,
+ enum pipe_format src_format,
+ enum pipe_format dst_format,
+ unsigned width,
+ unsigned height,
+ unsigned depth,
+ bool cpu)
+{
+ if (cpu)
+ /* very basic for now, probably even worse for some cases,
+ * but fixes lots of others
+ */
+ return width * height * depth > 64 * 64;
+ return false;
+}
+
+static bool
zink_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
@@ -925,6 +1345,9 @@ zink_is_format_supported(struct pipe_screen *pscreen,
{
struct zink_screen *screen = zink_screen(pscreen);
+ if (storage_sample_count && !screen->info.feats.features.shaderStorageImageMultisample && bind & PIPE_BIND_SHADER_IMAGE)
+ return false;
+
if (format == PIPE_FORMAT_NONE)
return screen->info.props.limits.framebufferNoAttachmentsSampleCounts &
vk_sample_count_flags(sample_count);
@@ -939,7 +1362,8 @@ zink_is_format_supported(struct pipe_screen *pscreen,
return false;
}
- VkFormat vkformat = zink_get_format(screen, format);
+ /* always use superset to determine feature support */
+ VkFormat vkformat = zink_get_format(screen, PIPE_FORMAT_A8_UNORM ? zink_format_get_emulated_alpha(format) : format);
if (vkformat == VK_FORMAT_UNDEFINED)
return false;
@@ -984,9 +1408,80 @@ zink_is_format_supported(struct pipe_screen *pscreen,
if (!(screen->info.props.limits.storageImageSampleCounts & sample_mask))
return false;
}
+ VkResult ret;
+ VkImageFormatProperties image_props;
+ VkImageFormatProperties2 props2;
+ props2.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2;
+ props2.pNext = NULL;
+ VkPhysicalDeviceImageFormatInfo2 info;
+ info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2;
+ info.pNext = NULL;
+ info.format = vkformat;
+ info.flags = 0;
+ info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+ info.tiling = VK_IMAGE_TILING_OPTIMAL;
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY: {
+ bool need_2D = false;
+ if (util_format_is_depth_or_stencil(format))
+ need_2D |= screen->need_2D_zs;
+ info.type = need_2D ? VK_IMAGE_TYPE_2D : VK_IMAGE_TYPE_1D;
+ break;
+ }
+
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
+ FALLTHROUGH;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_RECT:
+ info.type = VK_IMAGE_TYPE_2D;
+ break;
+
+ case PIPE_TEXTURE_3D:
+ info.type = VK_IMAGE_TYPE_3D;
+ if (bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL))
+ info.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
+ if (screen->info.have_EXT_image_2d_view_of_3d)
+ info.flags |= VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT;
+ break;
+
+ default:
+ unreachable("unknown texture target");
+ }
+ u_foreach_bit(b, bind) {
+ switch (1<<b) {
+ case PIPE_BIND_RENDER_TARGET:
+ info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ break;
+ case PIPE_BIND_DEPTH_STENCIL:
+ info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
+ break;
+ case PIPE_BIND_SAMPLER_VIEW:
+ info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
+ break;
+ }
+ }
+
+ if (VKSCR(GetPhysicalDeviceImageFormatProperties2)) {
+ ret = VKSCR(GetPhysicalDeviceImageFormatProperties2)(screen->pdev, &info, &props2);
+ /* this is using VK_IMAGE_CREATE_EXTENDED_USAGE_BIT and can't be validated */
+ if (vk_format_aspects(vkformat) & VK_IMAGE_ASPECT_PLANE_1_BIT)
+ ret = VK_SUCCESS;
+ image_props = props2.imageFormatProperties;
+ } else {
+ ret = VKSCR(GetPhysicalDeviceImageFormatProperties)(screen->pdev, vkformat, info.type,
+ info.tiling, info.usage, info.flags, &image_props);
+ }
+ if (ret != VK_SUCCESS)
+ return false;
+ if (!(sample_count & image_props.sampleCounts))
+ return false;
}
- VkFormatProperties props = screen->format_props[format];
+ struct zink_format_props props = screen->format_props[format];
if (target == PIPE_BUFFER) {
if (bind & PIPE_BIND_VERTEX_BUFFER) {
@@ -1041,14 +1536,23 @@ zink_is_format_supported(struct pipe_screen *pscreen,
return false;
}
- if (util_format_is_compressed(format)) {
- const struct util_format_description *desc = util_format_description(format);
- if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC &&
- !screen->info.feats.features.textureCompressionBC)
- return false;
+ return true;
+}
+
+static void
+zink_set_damage_region(struct pipe_screen *pscreen, struct pipe_resource *pres, unsigned int nrects, const struct pipe_box *rects)
+{
+ struct zink_resource *res = zink_resource(pres);
+
+ for (unsigned i = 0; i < nrects; i++) {
+ int y = pres->height0 - rects[i].y - rects[i].height;
+ res->damage.extent.width = MAX2(res->damage.extent.width, rects[i].x + rects[i].width);
+ res->damage.extent.height = MAX2(res->damage.extent.height, y + rects[i].height);
+ res->damage.offset.x = MIN2(res->damage.offset.x, rects[i].x);
+ res->damage.offset.y = MIN2(res->damage.offset.y, y);
}
- return true;
+ res->use_damage = nrects > 0;
}
static void
@@ -1056,95 +1560,207 @@ zink_destroy_screen(struct pipe_screen *pscreen)
{
struct zink_screen *screen = zink_screen(pscreen);
+#ifdef HAVE_RENDERDOC_APP_H
+ if (screen->renderdoc_capture_all && p_atomic_dec_zero(&num_screens))
+ screen->renderdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(screen->instance), NULL);
+#endif
+
+ hash_table_foreach(&screen->dts, entry)
+ zink_kopper_deinit_displaytarget(screen, entry->data);
+
+ if (screen->copy_context)
+ screen->copy_context->base.destroy(&screen->copy_context->base);
+
+ struct zink_batch_state *bs = screen->free_batch_states;
+ while (bs) {
+ struct zink_batch_state *bs_next = bs->next;
+ zink_batch_state_destroy(screen, bs);
+ bs = bs_next;
+ }
+
if (VK_NULL_HANDLE != screen->debugUtilsCallbackHandle) {
VKSCR(DestroyDebugUtilsMessengerEXT)(screen->instance, screen->debugUtilsCallbackHandle, NULL);
}
- if (!screen->info.have_KHR_imageless_framebuffer) {
- hash_table_foreach(&screen->framebuffer_cache, entry) {
- struct zink_framebuffer* fb = (struct zink_framebuffer*)entry->data;
- zink_destroy_framebuffer(screen, fb);
- }
- simple_mtx_destroy(&screen->framebuffer_mtx);
- }
+ util_vertex_state_cache_deinit(&screen->vertex_state_cache);
+
+ if (screen->gfx_push_constant_layout)
+ VKSCR(DestroyPipelineLayout)(screen->dev, screen->gfx_push_constant_layout, NULL);
u_transfer_helper_destroy(pscreen->transfer_helper);
+ if (util_queue_is_initialized(&screen->cache_get_thread)) {
+ util_queue_finish(&screen->cache_get_thread);
+ util_queue_destroy(&screen->cache_get_thread);
+ }
#ifdef ENABLE_SHADER_CACHE
- if (screen->disk_cache) {
+ if (screen->disk_cache && util_queue_is_initialized(&screen->cache_put_thread)) {
util_queue_finish(&screen->cache_put_thread);
- util_queue_finish(&screen->cache_get_thread);
disk_cache_wait_for_idle(screen->disk_cache);
util_queue_destroy(&screen->cache_put_thread);
- util_queue_destroy(&screen->cache_get_thread);
}
#endif
disk_cache_destroy(screen->disk_cache);
+
+ /* we don't have an API to check if a set is already initialized */
+ for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs); i++)
+ if (screen->pipeline_libs[i].table)
+ _mesa_set_clear(&screen->pipeline_libs[i], NULL);
+
zink_bo_deinit(screen);
util_live_shader_cache_deinit(&screen->shaders);
+ zink_descriptor_layouts_deinit(screen);
+
if (screen->sem)
VKSCR(DestroySemaphore)(screen->dev, screen->sem, NULL);
- if (screen->prev_sem)
- VKSCR(DestroySemaphore)(screen->dev, screen->prev_sem, NULL);
- if (screen->threaded)
+ if (screen->fence)
+ VKSCR(DestroyFence)(screen->dev, screen->fence, NULL);
+
+ if (util_queue_is_initialized(&screen->flush_queue))
util_queue_destroy(&screen->flush_queue);
- VKSCR(DestroyDevice)(screen->dev, NULL);
- vkDestroyInstance(screen->instance, NULL);
+ while (util_dynarray_contains(&screen->semaphores, VkSemaphore))
+ VKSCR(DestroySemaphore)(screen->dev, util_dynarray_pop(&screen->semaphores, VkSemaphore), NULL);
+ while (util_dynarray_contains(&screen->fd_semaphores, VkSemaphore))
+ VKSCR(DestroySemaphore)(screen->dev, util_dynarray_pop(&screen->fd_semaphores, VkSemaphore), NULL);
+ if (screen->bindless_layout)
+ VKSCR(DestroyDescriptorSetLayout)(screen->dev, screen->bindless_layout, NULL);
+
+ if (screen->dev)
+ VKSCR(DestroyDevice)(screen->dev, NULL);
+
+ if (screen->instance)
+ VKSCR(DestroyInstance)(screen->instance, NULL);
+
util_idalloc_mt_fini(&screen->buffer_ids);
+ if (screen->loader_lib)
+ util_dl_close(screen->loader_lib);
+
if (screen->drm_fd != -1)
close(screen->drm_fd);
slab_destroy_parent(&screen->transfer_pool);
+ slab_destroy(&screen->present_mempool);
ralloc_free(screen);
+ glsl_type_singleton_decref();
}
-static void
-choose_pdev(struct zink_screen *screen)
+static int
+zink_get_display_device(const struct zink_screen *screen, uint32_t pdev_count,
+ const VkPhysicalDevice *pdevs, int64_t dev_major,
+ int64_t dev_minor)
{
- uint32_t i, pdev_count;
- VkPhysicalDevice *pdevs;
- VkResult result = vkEnumeratePhysicalDevices(screen->instance, &pdev_count, NULL);
- if (result != VK_SUCCESS)
- return;
+ VkPhysicalDeviceDrmPropertiesEXT drm_props = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT,
+ };
+ VkPhysicalDeviceProperties2 props = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
+ .pNext = &drm_props,
+ };
+
+ for (uint32_t i = 0; i < pdev_count; ++i) {
+ VKSCR(GetPhysicalDeviceProperties2)(pdevs[i], &props);
+ if (drm_props.renderMajor == dev_major &&
+ drm_props.renderMinor == dev_minor)
+ return i;
+ }
+
+ return -1;
+}
- assert(pdev_count > 0);
+static int
+zink_get_cpu_device_type(const struct zink_screen *screen, uint32_t pdev_count,
+ const VkPhysicalDevice *pdevs)
+{
+ VkPhysicalDeviceProperties props;
- pdevs = malloc(sizeof(*pdevs) * pdev_count);
- result = vkEnumeratePhysicalDevices(screen->instance, &pdev_count, pdevs);
- assert(result == VK_SUCCESS);
- assert(pdev_count > 0);
+ for (uint32_t i = 0; i < pdev_count; ++i) {
+ VKSCR(GetPhysicalDeviceProperties)(pdevs[i], &props);
- VkPhysicalDeviceProperties *props = &screen->info.props;
- for (i = 0; i < pdev_count; ++i) {
- vkGetPhysicalDeviceProperties(pdevs[i], props);
+ /* if user wants cpu, only give them cpu */
+ if (props.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU)
+ return i;
+ }
-#ifdef ZINK_WITH_SWRAST_VK
- char *use_lavapipe = getenv("ZINK_USE_LAVAPIPE");
- if (use_lavapipe) {
- if (props->deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) {
- screen->pdev = pdevs[i];
- screen->info.device_version = props->apiVersion;
- break;
- }
- continue;
+ mesa_loge("ZINK: CPU device requested but none found!");
+
+ return -1;
+}
+
+static void
+choose_pdev(struct zink_screen *screen, int64_t dev_major, int64_t dev_minor)
+{
+ bool cpu = debug_get_bool_option("LIBGL_ALWAYS_SOFTWARE", false) ||
+ debug_get_bool_option("D3D_ALWAYS_SOFTWARE", false);
+
+ if (cpu || (dev_major > 0 && dev_major < 255)) {
+ uint32_t pdev_count;
+ int idx;
+ VkPhysicalDevice *pdevs;
+ VkResult result = VKSCR(EnumeratePhysicalDevices)(screen->instance, &pdev_count, NULL);
+ if (result != VK_SUCCESS) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: vkEnumeratePhysicalDevices failed (%s)", vk_Result_to_str(result));
+ return;
}
-#endif
- if (props->deviceType != VK_PHYSICAL_DEVICE_TYPE_CPU) {
- screen->pdev = pdevs[i];
- screen->info.device_version = props->apiVersion;
- break;
+
+ assert(pdev_count > 0);
+
+ pdevs = malloc(sizeof(*pdevs) * pdev_count);
+ if (!pdevs) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: failed to allocate pdevs!");
+ return;
+ }
+ result = VKSCR(EnumeratePhysicalDevices)(screen->instance, &pdev_count, pdevs);
+ assert(result == VK_SUCCESS);
+ assert(pdev_count > 0);
+
+ if (cpu)
+ idx = zink_get_cpu_device_type(screen, pdev_count, pdevs);
+ else
+ idx = zink_get_display_device(screen, pdev_count, pdevs, dev_major,
+ dev_minor);
+
+ if (idx != -1)
+ /* valid cpu device */
+ screen->pdev = pdevs[idx];
+
+ free(pdevs);
+
+ if (idx == -1)
+ return;
+
+ } else {
+ VkPhysicalDevice pdev;
+ unsigned pdev_count = 1;
+ VkResult result = VKSCR(EnumeratePhysicalDevices)(screen->instance, &pdev_count, &pdev);
+ if (result != VK_SUCCESS && result != VK_INCOMPLETE) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: vkEnumeratePhysicalDevices failed (%s)", vk_Result_to_str(result));
+ return;
}
+ screen->pdev = pdev;
+ }
+ VKSCR(GetPhysicalDeviceProperties)(screen->pdev, &screen->info.props);
+
+ /* allow software rendering only if forced by the user */
+ if (!cpu && screen->info.props.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) {
+ screen->pdev = VK_NULL_HANDLE;
+ return;
}
- free(pdevs);
+
+ screen->info.device_version = screen->info.props.apiVersion;
/* runtime version is the lesser of the instance version and device version */
screen->vk_version = MIN2(screen->info.device_version, screen->instance_info.loader_version);
/* calculate SPIR-V version based on VK version */
- if (screen->vk_version >= VK_MAKE_VERSION(1, 2, 0))
+ if (screen->vk_version >= VK_MAKE_VERSION(1, 3, 0))
+ screen->spirv_version = SPIRV_VERSION(1, 6);
+ else if (screen->vk_version >= VK_MAKE_VERSION(1, 2, 0))
screen->spirv_version = SPIRV_VERSION(1, 5);
else if (screen->vk_version >= VK_MAKE_VERSION(1, 1, 0))
screen->spirv_version = SPIRV_VERSION(1, 3);
@@ -1156,65 +1772,90 @@ static void
update_queue_props(struct zink_screen *screen)
{
uint32_t num_queues;
- vkGetPhysicalDeviceQueueFamilyProperties(screen->pdev, &num_queues, NULL);
+ VKSCR(GetPhysicalDeviceQueueFamilyProperties)(screen->pdev, &num_queues, NULL);
assert(num_queues > 0);
VkQueueFamilyProperties *props = malloc(sizeof(*props) * num_queues);
- vkGetPhysicalDeviceQueueFamilyProperties(screen->pdev, &num_queues, props);
+ if (!props) {
+ mesa_loge("ZINK: failed to allocate props!");
+ return;
+ }
+
+ VKSCR(GetPhysicalDeviceQueueFamilyProperties)(screen->pdev, &num_queues, props);
+ bool found_gfx = false;
+ uint32_t sparse_only = UINT32_MAX;
+ screen->sparse_queue = UINT32_MAX;
for (uint32_t i = 0; i < num_queues; i++) {
if (props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) {
- screen->gfx_queue = i;
+ if (found_gfx)
+ continue;
+ screen->sparse_queue = screen->gfx_queue = i;
screen->max_queues = props[i].queueCount;
screen->timestamp_valid_bits = props[i].timestampValidBits;
- break;
- }
+ found_gfx = true;
+ } else if (props[i].queueFlags & VK_QUEUE_SPARSE_BINDING_BIT)
+ sparse_only = i;
}
+ if (sparse_only != UINT32_MAX)
+ screen->sparse_queue = sparse_only;
free(props);
}
static void
init_queue(struct zink_screen *screen)
{
- vkGetDeviceQueue(screen->dev, screen->gfx_queue, 0, &screen->queue);
- if (screen->threaded && screen->max_queues > 1)
- vkGetDeviceQueue(screen->dev, screen->gfx_queue, 1, &screen->thread_queue);
+ simple_mtx_init(&screen->queue_lock, mtx_plain);
+ VKSCR(GetDeviceQueue)(screen->dev, screen->gfx_queue, 0, &screen->queue);
+ if (screen->sparse_queue != screen->gfx_queue)
+ VKSCR(GetDeviceQueue)(screen->dev, screen->sparse_queue, 0, &screen->queue_sparse);
else
- screen->thread_queue = screen->queue;
+ screen->queue_sparse = screen->queue;
}
static void
zink_flush_frontbuffer(struct pipe_screen *pscreen,
- struct pipe_context *pcontext,
+ struct pipe_context *pctx,
struct pipe_resource *pres,
unsigned level, unsigned layer,
void *winsys_drawable_handle,
+ unsigned nboxes,
struct pipe_box *sub_box)
{
struct zink_screen *screen = zink_screen(pscreen);
- struct sw_winsys *winsys = screen->winsys;
struct zink_resource *res = zink_resource(pres);
+ struct zink_context *ctx = zink_context(pctx);
+
+ /* if the surface is no longer a swapchain, this is a no-op */
+ if (!zink_is_swapchain(res))
+ return;
+
+ ctx = zink_tc_context_unwrap(pctx, screen->threaded);
+
+ if (!zink_kopper_acquired(res->obj->dt, res->obj->dt_idx)) {
+ /* swapbuffers to an undefined surface: acquire and present garbage */
+ zink_kopper_acquire(ctx, res, UINT64_MAX);
+ ctx->needs_present = res;
+ /* set batch usage to submit acquire semaphore */
+ zink_batch_resource_usage_set(&ctx->batch, res, true, false);
+ /* ensure the resource is set up to present garbage */
+ ctx->base.flush_resource(&ctx->base, pres);
+ }
- if (!winsys)
- return;
- void *map = winsys->displaytarget_map(winsys, res->dt, 0);
-
- if (map) {
- struct pipe_transfer *transfer = NULL;
- void *res_map = pipe_texture_map(pcontext, pres, level, layer, PIPE_MAP_READ, 0, 0,
- u_minify(pres->width0, level),
- u_minify(pres->height0, level),
- &transfer);
- if (res_map) {
- util_copy_rect((ubyte*)map, pres->format, res->dt_stride, 0, 0,
- transfer->box.width, transfer->box.height,
- (const ubyte*)res_map, transfer->stride, 0, 0);
- pipe_texture_unmap(pcontext, transfer);
+ /* handle any outstanding acquire submits (not just from above) */
+ if (ctx->batch.swapchain || ctx->needs_present) {
+ ctx->batch.has_work = true;
+ pctx->flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
+ if (ctx->last_batch_state && screen->threaded_submit) {
+ struct zink_batch_state *bs = ctx->last_batch_state;
+ util_queue_fence_wait(&bs->flush_completed);
}
- winsys->displaytarget_unmap(winsys, res->dt);
}
+ res->use_damage = false;
- winsys->displaytarget_display(winsys, res->dt, winsys_drawable_handle, sub_box);
+ /* always verify that this was acquired */
+ assert(zink_kopper_acquired(res->obj->dt, res->obj->dt_idx));
+ zink_kopper_present_queue(screen, res, nboxes, sub_box);
}
bool
@@ -1229,13 +1870,15 @@ zink_is_depth_format_supported(struct zink_screen *screen, VkFormat format)
static enum pipe_format
emulate_x8(enum pipe_format format)
{
- /* convert missing X8 variants to A8 */
+ /* convert missing Xn variants to An */
switch (format) {
case PIPE_FORMAT_B8G8R8X8_UNORM:
return PIPE_FORMAT_B8G8R8A8_UNORM;
case PIPE_FORMAT_B8G8R8X8_SRGB:
return PIPE_FORMAT_B8G8R8A8_SRGB;
+ case PIPE_FORMAT_R8G8B8X8_SRGB:
+ return PIPE_FORMAT_R8G8B8A8_SRGB;
case PIPE_FORMAT_R8G8B8X8_SINT:
return PIPE_FORMAT_R8G8B8A8_SINT;
@@ -1244,6 +1887,20 @@ emulate_x8(enum pipe_format format)
case PIPE_FORMAT_R8G8B8X8_UNORM:
return PIPE_FORMAT_R8G8B8A8_UNORM;
+ case PIPE_FORMAT_R16G16B16X16_FLOAT:
+ return PIPE_FORMAT_R16G16B16A16_FLOAT;
+ case PIPE_FORMAT_R16G16B16X16_SINT:
+ return PIPE_FORMAT_R16G16B16A16_SINT;
+ case PIPE_FORMAT_R16G16B16X16_SNORM:
+ return PIPE_FORMAT_R16G16B16A16_SNORM;
+ case PIPE_FORMAT_R16G16B16X16_UNORM:
+ return PIPE_FORMAT_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R32G32B32X32_FLOAT:
+ return PIPE_FORMAT_R32G32B32A32_FLOAT;
+ case PIPE_FORMAT_R32G32B32X32_SINT:
+ return PIPE_FORMAT_R32G32B32A32_SINT;
+
default:
return format;
}
@@ -1252,9 +1909,15 @@ emulate_x8(enum pipe_format format)
VkFormat
zink_get_format(struct zink_screen *screen, enum pipe_format format)
{
- VkFormat ret = zink_pipe_format_to_vk_format(emulate_x8(format));
+ if (format == PIPE_FORMAT_A8_UNORM && !screen->driver_workarounds.missing_a8_unorm)
+ return VK_FORMAT_A8_UNORM_KHR;
+ else if (!screen->driver_workarounds.broken_l4a4 || format != PIPE_FORMAT_L4A4_UNORM)
+ format = zink_format_get_emulated_alpha(format);
- if (format == PIPE_FORMAT_X32_S8X24_UINT)
+ VkFormat ret = vk_format_from_pipe_format(emulate_x8(format));
+
+ if (format == PIPE_FORMAT_X32_S8X24_UINT &&
+ screen->have_D32_SFLOAT_S8_UINT)
return VK_FORMAT_D32_SFLOAT_S8_UINT;
if (format == PIPE_FORMAT_X24S8_UINT)
@@ -1270,61 +1933,76 @@ zink_get_format(struct zink_screen *screen, enum pipe_format format)
if (ret == VK_FORMAT_D24_UNORM_S8_UINT &&
!screen->have_D24_UNORM_S8_UINT) {
- assert(zink_is_depth_format_supported(screen,
- VK_FORMAT_D32_SFLOAT_S8_UINT));
+ assert(screen->have_D32_SFLOAT_S8_UINT);
return VK_FORMAT_D32_SFLOAT_S8_UINT;
}
- if ((ret == VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT &&
+ if ((ret == VK_FORMAT_A4B4G4R4_UNORM_PACK16 &&
!screen->info.format_4444_feats.formatA4B4G4R4) ||
- (ret == VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT &&
+ (ret == VK_FORMAT_A4R4G4B4_UNORM_PACK16 &&
!screen->info.format_4444_feats.formatA4R4G4B4))
return VK_FORMAT_UNDEFINED;
+ if (format == PIPE_FORMAT_R4A4_UNORM)
+ return VK_FORMAT_R4G4_UNORM_PACK8;
+
return ret;
}
void
-zink_screen_init_descriptor_funcs(struct zink_screen *screen, bool fallback)
-{
- if (screen->info.have_KHR_descriptor_update_template &&
- !fallback &&
- screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY) {
-#define LAZY(FUNC) screen->FUNC = zink_##FUNC##_lazy
- LAZY(descriptor_program_init);
- LAZY(descriptor_program_deinit);
- LAZY(context_invalidate_descriptor_state);
- LAZY(batch_descriptor_init);
- LAZY(batch_descriptor_reset);
- LAZY(batch_descriptor_deinit);
- LAZY(descriptors_init);
- LAZY(descriptors_deinit);
- LAZY(descriptors_update);
-#undef LAZY
- } else {
-#define DEFAULT(FUNC) screen->FUNC = zink_##FUNC
- DEFAULT(descriptor_program_init);
- DEFAULT(descriptor_program_deinit);
- DEFAULT(context_invalidate_descriptor_state);
- DEFAULT(batch_descriptor_init);
- DEFAULT(batch_descriptor_reset);
- DEFAULT(batch_descriptor_deinit);
- DEFAULT(descriptors_init);
- DEFAULT(descriptors_deinit);
- DEFAULT(descriptors_update);
-#undef DEFAULT
+zink_convert_color(const struct zink_screen *screen, enum pipe_format format,
+ union pipe_color_union *dst,
+ const union pipe_color_union *src)
+{
+ const struct util_format_description *desc = util_format_description(format);
+ union pipe_color_union tmp = *src;
+
+ for (unsigned i = 0; i < 4; i++)
+ zink_format_clamp_channel_color(desc, &tmp, src, i);
+
+ if (zink_format_is_emulated_alpha(format) &&
+ /* Don't swizzle colors if the driver supports real A8_UNORM */
+ (format != PIPE_FORMAT_A8_UNORM ||
+ screen->driver_workarounds.missing_a8_unorm)) {
+ if (util_format_is_alpha(format)) {
+ tmp.ui[0] = tmp.ui[3];
+ tmp.ui[1] = 0;
+ tmp.ui[2] = 0;
+ tmp.ui[3] = 0;
+ } else if (util_format_is_luminance(format)) {
+ tmp.ui[1] = 0;
+ tmp.ui[2] = 0;
+ tmp.f[3] = 1.0;
+ } else if (util_format_is_luminance_alpha(format)) {
+ tmp.ui[1] = tmp.ui[3];
+ tmp.ui[2] = 0;
+ tmp.f[3] = 1.0;
+ } else /* zink_format_is_red_alpha */ {
+ tmp.ui[1] = tmp.ui[3];
+ tmp.ui[2] = 0;
+ tmp.ui[3] = 0;
+ }
}
+
+ memcpy(dst, &tmp, sizeof(union pipe_color_union));
}
static bool
check_have_device_time(struct zink_screen *screen)
{
uint32_t num_domains = 0;
- VKSCR(GetPhysicalDeviceCalibrateableTimeDomainsEXT)(screen->pdev, &num_domains, NULL);
+ VkTimeDomainEXT domains[8]; //current max is 4
+ VkResult result = VKSCR(GetPhysicalDeviceCalibrateableTimeDomainsEXT)(screen->pdev, &num_domains, NULL);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkGetPhysicalDeviceCalibrateableTimeDomainsEXT failed (%s)", vk_Result_to_str(result));
+ }
assert(num_domains > 0);
+ assert(num_domains < ARRAY_SIZE(domains));
- VkTimeDomainEXT *domains = malloc(sizeof(VkTimeDomainEXT) * num_domains);
- VKSCR(GetPhysicalDeviceCalibrateableTimeDomainsEXT)(screen->pdev, &num_domains, domains);
+ result = VKSCR(GetPhysicalDeviceCalibrateableTimeDomainsEXT)(screen->pdev, &num_domains, domains);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkGetPhysicalDeviceCalibrateableTimeDomainsEXT failed (%s)", vk_Result_to_str(result));
+ }
/* VK_TIME_DOMAIN_DEVICE_EXT is used for the ctx->get_timestamp hook and is the only one we really need */
for (unsigned i = 0; i < num_domains; i++) {
@@ -1333,10 +2011,29 @@ check_have_device_time(struct zink_screen *screen)
}
}
- free(domains);
return false;
}
+static void
+zink_error(const char *msg)
+{
+}
+
+static void
+zink_warn(const char *msg)
+{
+}
+
+static void
+zink_info(const char *msg)
+{
+}
+
+static void
+zink_msg(const char *msg)
+{
+}
+
static VKAPI_ATTR VkBool32 VKAPI_CALL
zink_debug_util_callback(
VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
@@ -1344,19 +2041,17 @@ zink_debug_util_callback(
const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData,
void *pUserData)
{
- const char *severity = "MSG";
-
// Pick message prefix and color to use.
// Only MacOS and Linux have been tested for color support
if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
- severity = "ERR";
+ zink_error(pCallbackData->pMessage);
} else if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
- severity = "WRN";
+ zink_warn(pCallbackData->pMessage);
} else if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
- severity = "NFO";
- }
+ zink_info(pCallbackData->pMessage);
+ } else
+ zink_msg(pCallbackData->pMessage);
- fprintf(stderr, "zink DEBUG: %s: '%s'\n", severity, pCallbackData->pMessage);
return VK_FALSE;
}
@@ -1380,12 +2075,14 @@ create_debug(struct zink_screen *screen)
VkDebugUtilsMessengerEXT vkDebugUtilsCallbackEXT = VK_NULL_HANDLE;
- VKSCR(CreateDebugUtilsMessengerEXT)(
- screen->instance,
- &vkDebugUtilsMessengerCreateInfoEXT,
- NULL,
- &vkDebugUtilsCallbackEXT
- );
+ VkResult result = VKSCR(CreateDebugUtilsMessengerEXT)(
+ screen->instance,
+ &vkDebugUtilsMessengerCreateInfoEXT,
+ NULL,
+ &vkDebugUtilsCallbackEXT);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateDebugUtilsMessengerEXT failed (%s)", vk_Result_to_str(result));
+ }
screen->debugUtilsCallbackHandle = vkDebugUtilsCallbackEXT;
@@ -1399,9 +2096,9 @@ zink_internal_setup_moltenvk(struct zink_screen *screen)
if (!screen->instance_info.have_MVK_moltenvk)
return true;
- GET_PROC_ADDR_INSTANCE_LOCAL(screen->instance, GetMoltenVKConfigurationMVK);
- GET_PROC_ADDR_INSTANCE_LOCAL(screen->instance, SetMoltenVKConfigurationMVK);
- GET_PROC_ADDR_INSTANCE_LOCAL(screen->instance, GetVersionStringsMVK);
+ GET_PROC_ADDR_INSTANCE_LOCAL(screen, screen->instance, GetMoltenVKConfigurationMVK);
+ GET_PROC_ADDR_INSTANCE_LOCAL(screen, screen->instance, SetMoltenVKConfigurationMVK);
+ GET_PROC_ADDR_INSTANCE_LOCAL(screen, screen->instance, GetVersionStringsMVK);
if (vk_GetVersionStringsMVK) {
char molten_version[64] = {0};
@@ -1430,26 +2127,97 @@ zink_internal_setup_moltenvk(struct zink_screen *screen)
}
static void
-check_device_needs_mesa_wsi(struct zink_screen *screen)
+check_vertex_formats(struct zink_screen *screen)
{
- if (
- /* Raspberry Pi 4 V3DV driver */
- (screen->info.props.vendorID == 0x14E4 &&
- screen->info.props.deviceID == 42) ||
- /* RADV */
- screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV_KHR
- ) {
- screen->needs_mesa_wsi = true;
- } else if (screen->info.driver_props.driverID == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR)
- screen->needs_mesa_flush_wsi = true;
-
+ /* from vbuf */
+ enum pipe_format format_list[] = {
+ /* not supported by vk
+ PIPE_FORMAT_R32_FIXED,
+ PIPE_FORMAT_R32G32_FIXED,
+ PIPE_FORMAT_R32G32B32_FIXED,
+ PIPE_FORMAT_R32G32B32A32_FIXED,
+ */
+ PIPE_FORMAT_R16_FLOAT,
+ PIPE_FORMAT_R16G16_FLOAT,
+ PIPE_FORMAT_R16G16B16_FLOAT,
+ PIPE_FORMAT_R16G16B16A16_FLOAT,
+ /* not supported by vk
+ PIPE_FORMAT_R64_FLOAT,
+ PIPE_FORMAT_R64G64_FLOAT,
+ PIPE_FORMAT_R64G64B64_FLOAT,
+ PIPE_FORMAT_R64G64B64A64_FLOAT,
+ PIPE_FORMAT_R32_UNORM,
+ PIPE_FORMAT_R32G32_UNORM,
+ PIPE_FORMAT_R32G32B32_UNORM,
+ PIPE_FORMAT_R32G32B32A32_UNORM,
+ PIPE_FORMAT_R32_SNORM,
+ PIPE_FORMAT_R32G32_SNORM,
+ PIPE_FORMAT_R32G32B32_SNORM,
+ PIPE_FORMAT_R32G32B32A32_SNORM,
+ PIPE_FORMAT_R32_USCALED,
+ PIPE_FORMAT_R32G32_USCALED,
+ PIPE_FORMAT_R32G32B32_USCALED,
+ PIPE_FORMAT_R32G32B32A32_USCALED,
+ PIPE_FORMAT_R32_SSCALED,
+ PIPE_FORMAT_R32G32_SSCALED,
+ PIPE_FORMAT_R32G32B32_SSCALED,
+ PIPE_FORMAT_R32G32B32A32_SSCALED,
+ */
+ PIPE_FORMAT_R16_UNORM,
+ PIPE_FORMAT_R16G16_UNORM,
+ PIPE_FORMAT_R16G16B16_UNORM,
+ PIPE_FORMAT_R16G16B16A16_UNORM,
+ PIPE_FORMAT_R16_SNORM,
+ PIPE_FORMAT_R16G16_SNORM,
+ PIPE_FORMAT_R16G16B16_SNORM,
+ PIPE_FORMAT_R16G16B16_SINT,
+ PIPE_FORMAT_R16G16B16_UINT,
+ PIPE_FORMAT_R16G16B16A16_SNORM,
+ PIPE_FORMAT_R16_USCALED,
+ PIPE_FORMAT_R16G16_USCALED,
+ PIPE_FORMAT_R16G16B16_USCALED,
+ PIPE_FORMAT_R16G16B16A16_USCALED,
+ PIPE_FORMAT_R16_SSCALED,
+ PIPE_FORMAT_R16G16_SSCALED,
+ PIPE_FORMAT_R16G16B16_SSCALED,
+ PIPE_FORMAT_R16G16B16A16_SSCALED,
+ PIPE_FORMAT_R8_UNORM,
+ PIPE_FORMAT_R8G8_UNORM,
+ PIPE_FORMAT_R8G8B8_UNORM,
+ PIPE_FORMAT_R8G8B8A8_UNORM,
+ PIPE_FORMAT_R8_SNORM,
+ PIPE_FORMAT_R8G8_SNORM,
+ PIPE_FORMAT_R8G8B8_SNORM,
+ PIPE_FORMAT_R8G8B8A8_SNORM,
+ PIPE_FORMAT_R8_USCALED,
+ PIPE_FORMAT_R8G8_USCALED,
+ PIPE_FORMAT_R8G8B8_USCALED,
+ PIPE_FORMAT_R8G8B8A8_USCALED,
+ PIPE_FORMAT_R8_SSCALED,
+ PIPE_FORMAT_R8G8_SSCALED,
+ PIPE_FORMAT_R8G8B8_SSCALED,
+ PIPE_FORMAT_R8G8B8A8_SSCALED,
+ };
+ for (unsigned i = 0; i < ARRAY_SIZE(format_list); i++) {
+ if (zink_is_format_supported(&screen->base, format_list[i], PIPE_BUFFER, 0, 0, PIPE_BIND_VERTEX_BUFFER))
+ continue;
+ if (util_format_get_nr_components(format_list[i]) == 1)
+ continue;
+ enum pipe_format decomposed = zink_decompose_vertex_format(format_list[i]);
+ if (zink_is_format_supported(&screen->base, decomposed, PIPE_BUFFER, 0, 0, PIPE_BIND_VERTEX_BUFFER)) {
+ screen->need_decompose_attrs = true;
+ mesa_logw("zink: this application would be much faster if %s supported vertex format %s", screen->info.props.deviceName, util_format_name(format_list[i]));
+ }
+ }
}
static void
populate_format_props(struct zink_screen *screen)
{
for (unsigned i = 0; i < PIPE_FORMAT_COUNT; i++) {
- VkFormat format = zink_get_format(screen, i);
+ VkFormat format;
+retry:
+ format = zink_get_format(screen, i);
if (!format)
continue;
if (VKSCR(GetPhysicalDeviceFormatProperties2)) {
@@ -1465,8 +2233,29 @@ populate_format_props(struct zink_screen *screen)
mod_props.pDrmFormatModifierProperties = mods;
props.pNext = &mod_props;
}
+ VkFormatProperties3 props3 = {0};
+ if (screen->info.have_KHR_format_feature_flags2 || screen->info.have_vulkan13) {
+ props3.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3;
+ props3.pNext = props.pNext;
+ props.pNext = &props3;
+ }
+
VKSCR(GetPhysicalDeviceFormatProperties2)(screen->pdev, format, &props);
- screen->format_props[i] = props.formatProperties;
+
+ if (screen->info.have_KHR_format_feature_flags2 || screen->info.have_vulkan13) {
+ screen->format_props[i].linearTilingFeatures = props3.linearTilingFeatures;
+ screen->format_props[i].optimalTilingFeatures = props3.optimalTilingFeatures;
+ screen->format_props[i].bufferFeatures = props3.bufferFeatures;
+
+ if (props3.linearTilingFeatures & VK_FORMAT_FEATURE_2_LINEAR_COLOR_ATTACHMENT_BIT_NV)
+ screen->format_props[i].linearTilingFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
+ } else {
+ // MoltenVk is 1.2 API
+ screen->format_props[i].linearTilingFeatures = props.formatProperties.linearTilingFeatures;
+ screen->format_props[i].optimalTilingFeatures = props.formatProperties.optimalTilingFeatures;
+ screen->format_props[i].bufferFeatures = props.formatProperties.bufferFeatures;
+ }
+
if (screen->info.have_EXT_image_drm_format_modifier && mod_props.drmFormatModifierCount) {
screen->modifier_props[i].drmFormatModifierCount = mod_props.drmFormatModifierCount;
screen->modifier_props[i].pDrmFormatModifierProperties = ralloc_array(screen, VkDrmFormatModifierPropertiesEXT, mod_props.drmFormatModifierCount);
@@ -1475,9 +2264,80 @@ populate_format_props(struct zink_screen *screen)
screen->modifier_props[i].pDrmFormatModifierProperties[j] = mod_props.pDrmFormatModifierProperties[j];
}
}
- } else
- VKSCR(GetPhysicalDeviceFormatProperties)(screen->pdev, format, &screen->format_props[i]);
+ } else {
+ VkFormatProperties props = {0};
+ VKSCR(GetPhysicalDeviceFormatProperties)(screen->pdev, format, &props);
+ screen->format_props[i].linearTilingFeatures = props.linearTilingFeatures;
+ screen->format_props[i].optimalTilingFeatures = props.optimalTilingFeatures;
+ screen->format_props[i].bufferFeatures = props.bufferFeatures;
+ }
+ if (i == PIPE_FORMAT_A8_UNORM && !screen->driver_workarounds.missing_a8_unorm) {
+ if (!screen->format_props[i].linearTilingFeatures &&
+ !screen->format_props[i].optimalTilingFeatures &&
+ !screen->format_props[i].bufferFeatures) {
+ screen->driver_workarounds.missing_a8_unorm = true;
+ goto retry;
+ }
+ }
+ if (zink_format_is_emulated_alpha(i)) {
+ VkFormatFeatureFlags blocked = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
+ screen->format_props[i].linearTilingFeatures &= ~blocked;
+ screen->format_props[i].optimalTilingFeatures &= ~blocked;
+ screen->format_props[i].bufferFeatures = 0;
+ }
+ }
+ check_vertex_formats(screen);
+ VkImageFormatProperties image_props;
+ VkResult ret = VKSCR(GetPhysicalDeviceImageFormatProperties)(screen->pdev, VK_FORMAT_D32_SFLOAT,
+ VK_IMAGE_TYPE_1D,
+ VK_IMAGE_TILING_OPTIMAL,
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+ 0, &image_props);
+ if (ret != VK_SUCCESS && ret != VK_ERROR_FORMAT_NOT_SUPPORTED) {
+ mesa_loge("ZINK: vkGetPhysicalDeviceImageFormatProperties failed (%s)", vk_Result_to_str(ret));
+ }
+ screen->need_2D_zs = ret != VK_SUCCESS;
+
+ if (screen->info.feats.features.sparseResidencyImage2D)
+ screen->need_2D_sparse = !screen->base.get_sparse_texture_virtual_page_size(&screen->base, PIPE_TEXTURE_1D, false, PIPE_FORMAT_R32_FLOAT, 0, 16, NULL, NULL, NULL);
+}
+
+static void
+setup_renderdoc(struct zink_screen *screen)
+{
+#ifdef HAVE_RENDERDOC_APP_H
+ const char *capture_id = debug_get_option("ZINK_RENDERDOC", NULL);
+ if (!capture_id)
+ return;
+ void *renderdoc = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD);
+ /* not loaded */
+ if (!renderdoc)
+ return;
+
+ pRENDERDOC_GetAPI get_api = dlsym(renderdoc, "RENDERDOC_GetAPI");
+ if (!get_api)
+ return;
+
+ /* need synchronous dispatch for renderdoc coherency */
+ screen->threaded_submit = false;
+ get_api(eRENDERDOC_API_Version_1_0_0, (void*)&screen->renderdoc_api);
+ screen->renderdoc_api->SetActiveWindow(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(screen->instance), NULL);
+
+ int count = sscanf(capture_id, "%u:%u", &screen->renderdoc_capture_start, &screen->renderdoc_capture_end);
+ if (count != 2) {
+ count = sscanf(capture_id, "%u", &screen->renderdoc_capture_start);
+ if (!count) {
+ if (!strcmp(capture_id, "all")) {
+ screen->renderdoc_capture_all = true;
+ } else {
+ printf("`ZINK_RENDERDOC` usage: ZINK_RENDERDOC=all|frame_no[:end_frame_no]\n");
+ abort();
+ }
+ }
+ screen->renderdoc_capture_end = screen->renderdoc_capture_start;
}
+ p_atomic_set(&screen->renderdoc_frame, 1);
+#endif
}
bool
@@ -1485,135 +2345,185 @@ zink_screen_init_semaphore(struct zink_screen *screen)
{
VkSemaphoreCreateInfo sci = {0};
VkSemaphoreTypeCreateInfo tci = {0};
- VkSemaphore sem;
sci.pNext = &tci;
sci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
tci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO;
tci.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;
- if (VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &sem) == VK_SUCCESS) {
- /* semaphore signal values can never decrease,
- * so we need a new semaphore anytime we overflow
- */
- if (screen->prev_sem)
- VKSCR(DestroySemaphore)(screen->dev, screen->prev_sem, NULL);
- screen->prev_sem = screen->sem;
- screen->sem = sem;
- return true;
- }
- screen->info.have_KHR_timeline_semaphore = false;
- return false;
+ return VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &screen->sem) == VK_SUCCESS;
}
-bool
-zink_screen_timeline_wait(struct zink_screen *screen, uint32_t batch_id, uint64_t timeout)
+VkSemaphore
+zink_create_exportable_semaphore(struct zink_screen *screen)
{
- VkSemaphoreWaitInfo wi = {0};
+ VkExportSemaphoreCreateInfo eci = {
+ VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
+ NULL,
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT
+ };
+ VkSemaphoreCreateInfo sci = {
+ VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ &eci,
+ 0
+ };
- if (zink_screen_check_last_finished(screen, batch_id))
- return true;
+ VkSemaphore sem = VK_NULL_HANDLE;
+ if (util_dynarray_contains(&screen->fd_semaphores, VkSemaphore)) {
+ simple_mtx_lock(&screen->semaphores_lock);
+ if (util_dynarray_contains(&screen->fd_semaphores, VkSemaphore))
+ sem = util_dynarray_pop(&screen->fd_semaphores, VkSemaphore);
+ simple_mtx_unlock(&screen->semaphores_lock);
+ }
+ if (sem)
+ return sem;
+ VkResult ret = VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &sem);
+ return ret == VK_SUCCESS ? sem : VK_NULL_HANDLE;
+}
- wi.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO;
- wi.semaphoreCount = 1;
- /* handle batch_id overflow */
- wi.pSemaphores = batch_id > screen->curr_batch ? &screen->prev_sem : &screen->sem;
- uint64_t batch_id64 = batch_id;
- wi.pValues = &batch_id64;
- bool success = false;
- if (screen->device_lost)
- return true;
- VkResult ret = VKSCR(WaitSemaphores)(screen->dev, &wi, timeout);
- success = zink_screen_handle_vkresult(screen, ret);
+VkSemaphore
+zink_screen_export_dmabuf_semaphore(struct zink_screen *screen, struct zink_resource *res)
+{
+ VkSemaphore sem = VK_NULL_HANDLE;
+#if defined(HAVE_LIBDRM) && (DETECT_OS_LINUX || DETECT_OS_BSD)
+ struct dma_buf_export_sync_file export = {
+ .flags = DMA_BUF_SYNC_RW,
+ .fd = -1,
+ };
- if (success)
- zink_screen_update_last_finished(screen, batch_id);
+ int fd = -1;
+ if (res->obj->is_aux) {
+ fd = os_dupfd_cloexec(res->obj->handle);
+ } else {
+ VkMemoryGetFdInfoKHR fd_info = {0};
+ fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
+ fd_info.memory = zink_bo_get_mem(res->obj->bo);
+ fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ VKSCR(GetMemoryFdKHR)(screen->dev, &fd_info, &fd);
+ }
- return success;
-}
+ if (unlikely(fd < 0)) {
+ mesa_loge("MESA: Unable to get a valid memory fd");
+ return VK_NULL_HANDLE;
+ }
-struct noop_submit_info {
- struct zink_screen *screen;
- VkFence fence;
-};
+ int ret = drmIoctl(fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE, &export);
+ if (ret) {
+ if (errno == ENOTTY || errno == EBADF || errno == ENOSYS) {
+ assert(!"how did this fail?");
+ return VK_NULL_HANDLE;
+ } else {
+ mesa_loge("MESA: failed to import sync file '%s'", strerror(errno));
+ return VK_NULL_HANDLE;
+ }
+ }
-static void
-noop_submit(void *data, void *gdata, int thread_index)
-{
- struct noop_submit_info *n = data;
- VkSubmitInfo si = {0};
- si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
- if (n->VKSCR(QueueSubmit)(n->screen->threaded ? n->screen->thread_queue : n->screen->queue,
- 1, &si, n->fence) != VK_SUCCESS) {
- debug_printf("ZINK: vkQueueSubmit() failed\n");
- n->screen->device_lost = true;
+ sem = zink_create_exportable_semaphore(screen);
+
+ const VkImportSemaphoreFdInfoKHR sdi = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
+ .semaphore = sem,
+ .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
+ .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
+ .fd = export.fd,
+ };
+ bool success = VKSCR(ImportSemaphoreFdKHR)(screen->dev, &sdi) == VK_SUCCESS;
+ close(fd);
+ if (!success) {
+ VKSCR(DestroySemaphore)(screen->dev, sem, NULL);
+ return VK_NULL_HANDLE;
}
+#endif
+ return sem;
}
bool
-zink_screen_batch_id_wait(struct zink_screen *screen, uint32_t batch_id, uint64_t timeout)
+zink_screen_import_dmabuf_semaphore(struct zink_screen *screen, struct zink_resource *res, VkSemaphore sem)
{
- if (zink_screen_check_last_finished(screen, batch_id))
- return true;
-
- if (screen->info.have_KHR_timeline_semaphore)
- return zink_screen_timeline_wait(screen, batch_id, timeout);
-
- if (!timeout)
- return false;
-
- uint32_t new_id = 0;
- while (!new_id)
- new_id = p_atomic_inc_return(&screen->curr_batch);
- VkResult ret;
- struct noop_submit_info n;
- uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
- uint64_t remaining = PIPE_TIMEOUT_INFINITE;
- VkFenceCreateInfo fci = {0};
- struct util_queue_fence fence;
- util_queue_fence_init(&fence);
- fci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
-
- if (VKSCR(CreateFence)(screen->dev, &fci, NULL, &n.fence) != VK_SUCCESS)
+#if defined(HAVE_LIBDRM) && (DETECT_OS_LINUX || DETECT_OS_BSD)
+ const VkSemaphoreGetFdInfoKHR get_fd_info = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
+ .semaphore = sem,
+ .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
+ };
+ int sync_file_fd = -1;
+ VkResult result = VKSCR(GetSemaphoreFdKHR)(screen->dev, &get_fd_info, &sync_file_fd);
+ if (result != VK_SUCCESS) {
return false;
+ }
- n.screen = screen;
- if (screen->threaded) {
- /* must use thread dispatch for sanity */
- util_queue_add_job(&screen->flush_queue, &n, &fence, noop_submit, NULL, 0);
- util_queue_fence_wait(&fence);
+ bool ret = false;
+ int fd;
+ if (res->obj->is_aux) {
+ fd = os_dupfd_cloexec(res->obj->handle);
} else {
- noop_submit(&n, NULL, 0);
+ VkMemoryGetFdInfoKHR fd_info = {0};
+ fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
+ fd_info.memory = zink_bo_get_mem(res->obj->bo);
+ fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ if (VKSCR(GetMemoryFdKHR)(screen->dev, &fd_info, &fd) != VK_SUCCESS)
+ fd = -1;
}
- if (timeout != PIPE_TIMEOUT_INFINITE) {
- int64_t time_ns = os_time_get_nano();
- remaining = abs_timeout > time_ns ? abs_timeout - time_ns : 0;
+ if (fd != -1) {
+ struct dma_buf_import_sync_file import = {
+ .flags = DMA_BUF_SYNC_RW,
+ .fd = sync_file_fd,
+ };
+ int ioctl_ret = drmIoctl(fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE, &import);
+ if (ioctl_ret) {
+ if (errno == ENOTTY || errno == EBADF || errno == ENOSYS) {
+ assert(!"how did this fail?");
+ } else {
+ ret = true;
+ }
+ }
+ close(fd);
}
+ close(sync_file_fd);
+ return ret;
+#else
+ return true;
+#endif
+}
- if (remaining)
- ret = VKSCR(WaitForFences)(screen->dev, 1, &n.fence, VK_TRUE, remaining);
- else
- ret = VKSCR(GetFenceStatus)(screen->dev, n.fence);
- VKSCR(DestroyFence)(screen->dev, n.fence, NULL);
- bool success = zink_screen_handle_vkresult(screen, ret);
+bool
+zink_screen_timeline_wait(struct zink_screen *screen, uint64_t batch_id, uint64_t timeout)
+{
+ VkSemaphoreWaitInfo wi = {0};
+
+ if (zink_screen_check_last_finished(screen, batch_id))
+ return true;
+
+ wi.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO;
+ wi.semaphoreCount = 1;
+ wi.pSemaphores = &screen->sem;
+ wi.pValues = &batch_id;
+ bool success = false;
+ if (screen->device_lost)
+ return true;
+ VkResult ret = VKSCR(WaitSemaphores)(screen->dev, &wi, timeout);
+ success = zink_screen_handle_vkresult(screen, ret);
if (success)
- zink_screen_update_last_finished(screen, new_id);
+ zink_screen_update_last_finished(screen, batch_id);
return success;
}
static uint32_t
-zink_get_loader_version(void)
+zink_get_loader_version(struct zink_screen *screen)
{
uint32_t loader_version = VK_API_VERSION_1_0;
// Get the Loader version
- GET_PROC_ADDR_INSTANCE_LOCAL(NULL, EnumerateInstanceVersion);
+ GET_PROC_ADDR_INSTANCE_LOCAL(screen, NULL, EnumerateInstanceVersion);
if (vk_EnumerateInstanceVersion) {
uint32_t loader_version_temp = VK_API_VERSION_1_0;
- if (VK_SUCCESS == (*vk_EnumerateInstanceVersion)(&loader_version_temp)) {
+ VkResult result = (*vk_EnumerateInstanceVersion)(&loader_version_temp);
+ if (VK_SUCCESS == result) {
loader_version = loader_version_temp;
+ } else {
+ mesa_loge("ZINK: vkEnumerateInstanceVersion failed (%s)", vk_Result_to_str(result));
}
}
@@ -1638,11 +2548,11 @@ zink_query_memory_info(struct pipe_screen *pscreen, struct pipe_memory_info *inf
if (mem.memoryProperties.memoryHeaps[i].flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
/* VRAM */
info->total_device_memory += mem.memoryProperties.memoryHeaps[i].size / 1024;
- info->avail_device_memory += (budget.heapBudget[i] - budget.heapUsage[i]) / 1024;
+ info->avail_device_memory += (mem.memoryProperties.memoryHeaps[i].size - budget.heapUsage[i]) / 1024;
} else {
/* GART */
info->total_staging_memory += mem.memoryProperties.memoryHeaps[i].size / 1024;
- info->avail_staging_memory += (budget.heapBudget[i] - budget.heapUsage[i]) / 1024;
+ info->avail_staging_memory += (mem.memoryProperties.memoryHeaps[i].size - budget.heapUsage[i]) / 1024;
}
}
/* evictions not yet supported in vulkan */
@@ -1668,8 +2578,12 @@ zink_query_dmabuf_modifiers(struct pipe_screen *pscreen, enum pipe_format format
{
struct zink_screen *screen = zink_screen(pscreen);
*count = screen->modifier_props[format].drmFormatModifierCount;
- for (int i = 0; i < MIN2(max, *count); i++)
+ for (int i = 0; i < MIN2(max, *count); i++) {
+ if (external_only)
+ external_only[i] = 0;
+
modifiers[i] = screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifier;
+ }
}
static bool
@@ -1689,7 +2603,115 @@ zink_get_dmabuf_modifier_planes(struct pipe_screen *pscreen, uint64_t modifier,
for (unsigned i = 0; i < screen->modifier_props[format].drmFormatModifierCount; i++)
if (screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifier == modifier)
return screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifierPlaneCount;
- return 0;
+ return util_format_get_num_planes(format);
+}
+
+static int
+zink_get_sparse_texture_virtual_page_size(struct pipe_screen *pscreen,
+ enum pipe_texture_target target,
+ bool multi_sample,
+ enum pipe_format pformat,
+ unsigned offset, unsigned size,
+ int *x, int *y, int *z)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+ static const int page_size_2d[][3] = {
+ { 256, 256, 1 }, /* 8bpp */
+ { 256, 128, 1 }, /* 16bpp */
+ { 128, 128, 1 }, /* 32bpp */
+ { 128, 64, 1 }, /* 64bpp */
+ { 64, 64, 1 }, /* 128bpp */
+ };
+ static const int page_size_3d[][3] = {
+ { 64, 32, 32 }, /* 8bpp */
+ { 32, 32, 32 }, /* 16bpp */
+ { 32, 32, 16 }, /* 32bpp */
+ { 32, 16, 16 }, /* 64bpp */
+ { 16, 16, 16 }, /* 128bpp */
+ };
+ /* Only support one type of page size. */
+ if (offset != 0)
+ return 0;
+
+ /* reject multisample if 2x isn't supported; assume none are */
+ if (multi_sample && !screen->info.feats.features.sparseResidency2Samples)
+ return 0;
+
+ VkFormat format = zink_get_format(screen, pformat);
+ bool is_zs = util_format_is_depth_or_stencil(pformat);
+ VkImageType type;
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ type = (screen->need_2D_sparse || (screen->need_2D_zs && is_zs)) ? VK_IMAGE_TYPE_2D : VK_IMAGE_TYPE_1D;
+ break;
+
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ type = VK_IMAGE_TYPE_2D;
+ break;
+
+ case PIPE_TEXTURE_3D:
+ type = VK_IMAGE_TYPE_3D;
+ break;
+
+ case PIPE_BUFFER:
+ goto hack_it_up;
+
+ default:
+ return 0;
+ }
+
+ VkImageUsageFlags use_flags = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_STORAGE_BIT;
+ use_flags |= is_zs ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ VkImageUsageFlags flags = screen->format_props[pformat].optimalTilingFeatures & use_flags;
+ VkSparseImageFormatProperties props[4]; //planar?
+ unsigned prop_count = ARRAY_SIZE(props);
+ VKSCR(GetPhysicalDeviceSparseImageFormatProperties)(screen->pdev, format, type,
+ multi_sample ? VK_SAMPLE_COUNT_2_BIT : VK_SAMPLE_COUNT_1_BIT,
+ flags,
+ VK_IMAGE_TILING_OPTIMAL,
+ &prop_count, props);
+ if (!prop_count) {
+ /* format may not support storage; try without */
+ flags &= ~VK_IMAGE_USAGE_STORAGE_BIT;
+ prop_count = ARRAY_SIZE(props);
+ VKSCR(GetPhysicalDeviceSparseImageFormatProperties)(screen->pdev, format, type,
+ multi_sample ? VK_SAMPLE_COUNT_2_BIT : VK_SAMPLE_COUNT_1_BIT,
+ flags,
+ VK_IMAGE_TILING_OPTIMAL,
+ &prop_count, props);
+ if (!prop_count)
+ return 0;
+ }
+
+ if (size) {
+ if (x)
+ *x = props[0].imageGranularity.width;
+ if (y)
+ *y = props[0].imageGranularity.height;
+ if (z)
+ *z = props[0].imageGranularity.depth;
+ }
+
+ return 1;
+hack_it_up:
+ {
+ const int (*page_sizes)[3] = target == PIPE_TEXTURE_3D ? page_size_3d : page_size_2d;
+ int blk_size = util_format_get_blocksize(pformat);
+
+ if (size) {
+ unsigned index = util_logbase2(blk_size);
+ if (x) *x = page_sizes[index][0];
+ if (y) *y = page_sizes[index][1];
+ if (z) *z = page_sizes[index][2];
+ }
+ }
+ return 1;
}
static VkDevice
@@ -1697,17 +2719,27 @@ zink_create_logical_device(struct zink_screen *screen)
{
VkDevice dev = VK_NULL_HANDLE;
- VkDeviceQueueCreateInfo qci = {0};
+ VkDeviceQueueCreateInfo qci[2] = {0};
+ uint32_t queues[3] = {
+ screen->gfx_queue,
+ screen->sparse_queue,
+ };
float dummy = 0.0f;
- qci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
- qci.queueFamilyIndex = screen->gfx_queue;
- qci.queueCount = screen->threaded && screen->max_queues > 1 ? 2 : 1;
- qci.pQueuePriorities = &dummy;
+ for (unsigned i = 0; i < ARRAY_SIZE(qci); i++) {
+ qci[i].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+ qci[i].queueFamilyIndex = queues[i];
+ qci[i].queueCount = 1;
+ qci[i].pQueuePriorities = &dummy;
+ }
+
+ unsigned num_queues = 1;
+ if (screen->sparse_queue != screen->gfx_queue)
+ num_queues++;
VkDeviceCreateInfo dci = {0};
dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
- dci.queueCreateInfoCount = 1;
- dci.pQueueCreateInfos = &qci;
+ dci.queueCreateInfoCount = num_queues;
+ dci.pQueueCreateInfos = qci;
/* extensions don't have bool members in pEnabledFeatures.
* this requires us to pass the whole VkPhysicalDeviceFeatures2 struct
*/
@@ -1720,26 +2752,27 @@ zink_create_logical_device(struct zink_screen *screen)
dci.ppEnabledExtensionNames = screen->info.extensions;
dci.enabledExtensionCount = screen->info.num_extensions;
- vkCreateDevice(screen->pdev, &dci, NULL, &dev);
+ VkResult result = VKSCR(CreateDevice)(screen->pdev, &dci, NULL, &dev);
+ if (result != VK_SUCCESS)
+ mesa_loge("ZINK: vkCreateDevice failed (%s)", vk_Result_to_str(result));
+
return dev;
}
static void
-pre_hash_descriptor_states(struct zink_screen *screen)
-{
- VkImageViewCreateInfo null_info = {.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
- VkBufferViewCreateInfo null_binfo = {.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO};
- screen->null_descriptor_hashes.image_view = _mesa_hash_data(&null_info, sizeof(VkImageViewCreateInfo));
- screen->null_descriptor_hashes.buffer_view = _mesa_hash_data(&null_binfo, sizeof(VkBufferViewCreateInfo));
-}
-
-static void
check_base_requirements(struct zink_screen *screen)
{
+ if (zink_debug & ZINK_DEBUG_QUIET)
+ return;
+ if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_V3DV) {
+ /* v3dv doesn't support straddling i/o, but zink doesn't do that so this is effectively supported:
+ * don't spam errors in this case
+ */
+ screen->info.feats12.scalarBlockLayout = true;
+ screen->info.have_EXT_scalar_block_layout = true;
+ }
if (!screen->info.feats.features.logicOp ||
!screen->info.feats.features.fillModeNonSolid ||
- !screen->info.feats.features.wideLines ||
- !screen->info.feats.features.largePoints ||
!screen->info.feats.features.shaderClipDistance ||
!(screen->info.feats12.scalarBlockLayout ||
screen->info.have_EXT_scalar_block_layout) ||
@@ -1754,16 +2787,18 @@ check_base_requirements(struct zink_screen *screen)
fprintf(stderr, "%s ", #X)
CHECK_OR_PRINT(feats.features.logicOp);
CHECK_OR_PRINT(feats.features.fillModeNonSolid);
- CHECK_OR_PRINT(feats.features.wideLines);
- CHECK_OR_PRINT(feats.features.largePoints);
CHECK_OR_PRINT(feats.features.shaderClipDistance);
if (!screen->info.feats12.scalarBlockLayout && !screen->info.have_EXT_scalar_block_layout)
- printf("scalarBlockLayout OR EXT_scalar_block_layout ");
+ fprintf(stderr, "scalarBlockLayout OR EXT_scalar_block_layout ");
CHECK_OR_PRINT(have_KHR_maintenance1);
CHECK_OR_PRINT(have_EXT_custom_border_color);
CHECK_OR_PRINT(have_EXT_line_rasterization);
fprintf(stderr, "\n");
}
+ if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_V3DV) {
+ screen->info.feats12.scalarBlockLayout = false;
+ screen->info.have_EXT_scalar_block_layout = false;
+ }
}
static void
@@ -1777,43 +2812,544 @@ zink_get_sample_pixel_grid(struct pipe_screen *pscreen, unsigned sample_count,
*height = screen->maxSampleLocationGridSize[idx].height;
}
+static void
+init_driver_workarounds(struct zink_screen *screen)
+{
+ /* enable implicit sync for all non-mesa drivers */
+ screen->driver_workarounds.implicit_sync = true;
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_MESA_RADV:
+ case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
+ case VK_DRIVER_ID_MESA_LLVMPIPE:
+ case VK_DRIVER_ID_MESA_TURNIP:
+ case VK_DRIVER_ID_MESA_V3DV:
+ case VK_DRIVER_ID_MESA_PANVK:
+ case VK_DRIVER_ID_MESA_VENUS:
+ screen->driver_workarounds.implicit_sync = false;
+ break;
+ default:
+ break;
+ }
+ /* TODO: maybe compile multiple variants for different set counts for compact mode? */
+ if (screen->info.props.limits.maxBoundDescriptorSets < ZINK_DESCRIPTOR_ALL_TYPES ||
+ zink_debug & (ZINK_DEBUG_COMPACT | ZINK_DEBUG_NOSHOBJ))
+ screen->info.have_EXT_shader_object = false;
+ /* EDS2 is only used with EDS1 */
+ if (!screen->info.have_EXT_extended_dynamic_state) {
+ screen->info.have_EXT_extended_dynamic_state2 = false;
+ /* CWE usage needs EDS1 */
+ screen->info.have_EXT_color_write_enable = false;
+ }
+ if (screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
+ /* this completely breaks xfb somehow */
+ screen->info.have_EXT_extended_dynamic_state2 = false;
+ /* EDS3 is only used with EDS2 */
+ if (!screen->info.have_EXT_extended_dynamic_state2)
+ screen->info.have_EXT_extended_dynamic_state3 = false;
+ /* EXT_vertex_input_dynamic_state is only used with EDS2 and above */
+ if (!screen->info.have_EXT_extended_dynamic_state2)
+ screen->info.have_EXT_vertex_input_dynamic_state = false;
+ if (screen->info.line_rast_feats.stippledRectangularLines &&
+ screen->info.line_rast_feats.stippledBresenhamLines &&
+ screen->info.line_rast_feats.stippledSmoothLines &&
+ !screen->info.dynamic_state3_feats.extendedDynamicState3LineStippleEnable)
+ screen->info.have_EXT_extended_dynamic_state3 = false;
+ if (!screen->info.dynamic_state3_feats.extendedDynamicState3PolygonMode ||
+ !screen->info.dynamic_state3_feats.extendedDynamicState3DepthClampEnable ||
+ !screen->info.dynamic_state3_feats.extendedDynamicState3DepthClipNegativeOneToOne ||
+ !screen->info.dynamic_state3_feats.extendedDynamicState3DepthClipEnable ||
+ !screen->info.dynamic_state3_feats.extendedDynamicState3ProvokingVertexMode ||
+ !screen->info.dynamic_state3_feats.extendedDynamicState3LineRasterizationMode)
+ screen->info.have_EXT_extended_dynamic_state3 = false;
+ else if (screen->info.dynamic_state3_feats.extendedDynamicState3SampleMask &&
+ screen->info.dynamic_state3_feats.extendedDynamicState3AlphaToCoverageEnable &&
+ (!screen->info.feats.features.alphaToOne || screen->info.dynamic_state3_feats.extendedDynamicState3AlphaToOneEnable) &&
+ screen->info.dynamic_state3_feats.extendedDynamicState3ColorBlendEnable &&
+ screen->info.dynamic_state3_feats.extendedDynamicState3RasterizationSamples &&
+ screen->info.dynamic_state3_feats.extendedDynamicState3ColorWriteMask &&
+ screen->info.dynamic_state3_feats.extendedDynamicState3ColorBlendEquation &&
+ screen->info.dynamic_state3_feats.extendedDynamicState3LogicOpEnable &&
+ screen->info.dynamic_state2_feats.extendedDynamicState2LogicOp)
+ screen->have_full_ds3 = true;
+ if (screen->info.have_EXT_graphics_pipeline_library)
+ screen->info.have_EXT_graphics_pipeline_library = screen->info.have_EXT_extended_dynamic_state &&
+ screen->info.have_EXT_extended_dynamic_state2 &&
+ ((zink_debug & ZINK_DEBUG_GPL) ||
+ screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) &&
+ screen->info.have_EXT_extended_dynamic_state3 &&
+ screen->info.have_KHR_dynamic_rendering &&
+ screen->info.have_EXT_non_seamless_cube_map &&
+ (!(zink_debug & ZINK_DEBUG_GPL) ||
+ screen->info.gpl_props.graphicsPipelineLibraryFastLinking ||
+ screen->is_cpu);
+ screen->driver_workarounds.broken_l4a4 = screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY;
+ if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_TURNIP) {
+ /* performance */
+ screen->info.border_color_feats.customBorderColorWithoutFormat = VK_FALSE;
+ }
+ if (!screen->info.have_KHR_maintenance5)
+ screen->driver_workarounds.missing_a8_unorm = true;
+
+ if ((!screen->info.have_EXT_line_rasterization ||
+ !screen->info.line_rast_feats.stippledBresenhamLines) &&
+ screen->info.feats.features.geometryShader &&
+ screen->info.feats.features.sampleRateShading) {
+ /* we're using stippledBresenhamLines as a proxy for all of these, to
+ * avoid accidentally changing behavior on VK-drivers where we don't
+ * want to add emulation.
+ */
+ screen->driver_workarounds.no_linestipple = true;
+ }
+
+ if (screen->info.driver_props.driverID ==
+ VK_DRIVER_ID_IMAGINATION_PROPRIETARY) {
+ assert(screen->info.feats.features.geometryShader);
+ screen->driver_workarounds.no_linesmooth = true;
+ }
+
+ /* This is a workarround for the lack of
+ * gl_PointSize + glPolygonMode(..., GL_LINE), in the imagination
+ * proprietary driver.
+ */
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_IMAGINATION_PROPRIETARY:
+ screen->driver_workarounds.no_hw_gl_point = true;
+ break;
+ default:
+ screen->driver_workarounds.no_hw_gl_point = false;
+ break;
+ }
+
+ if (screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
+ screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY ||
+ screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY ||
+ screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV)
+ screen->driver_workarounds.z24_unscaled_bias = 1<<23;
+ else
+ screen->driver_workarounds.z24_unscaled_bias = 1<<24;
+ if (screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY)
+ screen->driver_workarounds.z16_unscaled_bias = 1<<15;
+ else
+ screen->driver_workarounds.z16_unscaled_bias = 1<<16;
+ /* these drivers don't use VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT, so it can always be set */
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_MESA_RADV:
+ case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
+ case VK_DRIVER_ID_MESA_LLVMPIPE:
+ case VK_DRIVER_ID_MESA_VENUS:
+ case VK_DRIVER_ID_NVIDIA_PROPRIETARY:
+ case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS:
+ case VK_DRIVER_ID_IMAGINATION_PROPRIETARY:
+ screen->driver_workarounds.always_feedback_loop = screen->info.have_EXT_attachment_feedback_loop_layout;
+ break;
+ default:
+ break;
+ }
+ /* these drivers don't use VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT, so it can always be set */
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_MESA_LLVMPIPE:
+ case VK_DRIVER_ID_MESA_VENUS:
+ case VK_DRIVER_ID_NVIDIA_PROPRIETARY:
+ case VK_DRIVER_ID_IMAGINATION_PROPRIETARY:
+ screen->driver_workarounds.always_feedback_loop_zs = screen->info.have_EXT_attachment_feedback_loop_layout;
+ break;
+ default:
+ break;
+ }
+ /* use same mechanics if dynamic state is supported */
+ screen->driver_workarounds.always_feedback_loop |= screen->info.have_EXT_attachment_feedback_loop_dynamic_state;
+ screen->driver_workarounds.always_feedback_loop_zs |= screen->info.have_EXT_attachment_feedback_loop_dynamic_state;
+
+ /* these drivers cannot handle OOB gl_Layer values, and therefore need clamping in shader.
+ * TODO: Vulkan extension that details whether vulkan driver can handle OOB layer values
+ */
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_IMAGINATION_PROPRIETARY:
+ screen->driver_workarounds.needs_sanitised_layer = true;
+ break;
+ default:
+ screen->driver_workarounds.needs_sanitised_layer = false;
+ break;
+ }
+ /* these drivers will produce undefined results when using swizzle 1 with combined z/s textures
+ * TODO: use a future device property when available
+ */
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_IMAGINATION_PROPRIETARY:
+ case VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA:
+ screen->driver_workarounds.needs_zs_shader_swizzle = true;
+ break;
+ default:
+ screen->driver_workarounds.needs_zs_shader_swizzle = false;
+ break;
+ }
+
+ /* When robust contexts are advertised but robustImageAccess2 is not available */
+ screen->driver_workarounds.lower_robustImageAccess2 =
+ !screen->info.rb2_feats.robustImageAccess2 &&
+ screen->info.feats.features.robustBufferAccess &&
+ screen->info.rb_image_feats.robustImageAccess;
+
+ /* once more testing has been done, use the #if 0 block */
+ unsigned illegal = ZINK_DEBUG_RP | ZINK_DEBUG_NORP;
+ if ((zink_debug & illegal) == illegal) {
+ mesa_loge("Cannot specify ZINK_DEBUG=rp and ZINK_DEBUG=norp");
+ abort();
+ }
+
+ /* these drivers benefit from renderpass optimization */
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_MESA_LLVMPIPE:
+ case VK_DRIVER_ID_MESA_TURNIP:
+ case VK_DRIVER_ID_MESA_PANVK:
+ case VK_DRIVER_ID_MESA_V3DV:
+ case VK_DRIVER_ID_IMAGINATION_PROPRIETARY:
+ case VK_DRIVER_ID_QUALCOMM_PROPRIETARY:
+ case VK_DRIVER_ID_BROADCOM_PROPRIETARY:
+ case VK_DRIVER_ID_ARM_PROPRIETARY:
+ screen->driver_workarounds.track_renderpasses = true; //screen->info.primgen_feats.primitivesGeneratedQueryWithRasterizerDiscard
+ break;
+ default:
+ break;
+ }
+ if (zink_debug & ZINK_DEBUG_RP)
+ screen->driver_workarounds.track_renderpasses = true;
+ else if (zink_debug & ZINK_DEBUG_NORP)
+ screen->driver_workarounds.track_renderpasses = false;
+
+ /* these drivers can't optimize non-overlapping copy ops */
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_MESA_TURNIP:
+ case VK_DRIVER_ID_QUALCOMM_PROPRIETARY:
+ screen->driver_workarounds.broken_cache_semantics = true;
+ break;
+ default:
+ break;
+ }
+
+ /* these drivers can successfully do INVALID <-> LINEAR dri3 modifier swap */
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_MESA_TURNIP:
+ case VK_DRIVER_ID_MESA_VENUS:
+ screen->driver_workarounds.can_do_invalid_linear_modifier = true;
+ break;
+ default:
+ break;
+ }
+
+ /* these drivers have no difference between unoptimized and optimized shader compilation */
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_MESA_LLVMPIPE:
+ screen->driver_workarounds.disable_optimized_compile = true;
+ break;
+ default:
+ if (zink_debug & ZINK_DEBUG_NOOPT)
+ screen->driver_workarounds.disable_optimized_compile = true;
+ break;
+ }
+
+ switch (screen->info.driver_props.driverID) {
+ case VK_DRIVER_ID_MESA_RADV:
+ case VK_DRIVER_ID_AMD_OPEN_SOURCE:
+ case VK_DRIVER_ID_AMD_PROPRIETARY:
+ /* this has bad perf on AMD */
+ screen->info.have_KHR_push_descriptor = false;
+ break;
+ default:
+ break;
+ }
+
+ if (!screen->resizable_bar)
+ screen->info.have_EXT_host_image_copy = false;
+}
+
+static void
+fixup_driver_props(struct zink_screen *screen)
+{
+ VkPhysicalDeviceProperties2 props = {
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2
+ };
+ if (screen->info.have_EXT_host_image_copy) {
+ /* fill in layouts */
+ screen->info.hic_props.pNext = props.pNext;
+ props.pNext = &screen->info.hic_props;
+ screen->info.hic_props.pCopySrcLayouts = ralloc_array(screen, VkImageLayout, screen->info.hic_props.copySrcLayoutCount);
+ screen->info.hic_props.pCopyDstLayouts = ralloc_array(screen, VkImageLayout, screen->info.hic_props.copyDstLayoutCount);
+ }
+ if (props.pNext)
+ screen->vk.GetPhysicalDeviceProperties2(screen->pdev, &props);
+
+ if (screen->info.have_EXT_host_image_copy) {
+ for (unsigned i = 0; i < screen->info.hic_props.copyDstLayoutCount; i++) {
+ if (screen->info.hic_props.pCopyDstLayouts[i] == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
+ screen->can_hic_shader_read = true;
+ break;
+ }
+ }
+ }
+}
+
+static void
+init_optimal_keys(struct zink_screen *screen)
+{
+ /* assume that anyone who knows enough to enable optimal_keys on turnip doesn't care about missing line stipple */
+ if (zink_debug & ZINK_DEBUG_OPTIMAL_KEYS && screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_TURNIP)
+ zink_debug |= ZINK_DEBUG_QUIET;
+ screen->optimal_keys = !screen->need_decompose_attrs &&
+ screen->info.have_EXT_non_seamless_cube_map &&
+ screen->info.have_EXT_provoking_vertex &&
+ !screen->driconf.inline_uniforms &&
+ !screen->driver_workarounds.no_linestipple &&
+ !screen->driver_workarounds.no_linesmooth &&
+ !screen->driver_workarounds.no_hw_gl_point &&
+ !screen->driver_workarounds.lower_robustImageAccess2 &&
+ !screen->driconf.emulate_point_smooth &&
+ !screen->driver_workarounds.needs_zs_shader_swizzle;
+ if (!screen->optimal_keys && zink_debug & ZINK_DEBUG_OPTIMAL_KEYS && !(zink_debug & ZINK_DEBUG_QUIET)) {
+ fprintf(stderr, "The following criteria are preventing optimal_keys enablement:\n");
+ if (screen->need_decompose_attrs)
+ fprintf(stderr, "missing vertex attribute formats\n");
+ if (screen->driconf.inline_uniforms)
+ fprintf(stderr, "uniform inlining must be disabled (set ZINK_INLINE_UNIFORMS=0 in your env)\n");
+ if (screen->driconf.emulate_point_smooth)
+ fprintf(stderr, "smooth point emulation is enabled\n");
+ if (screen->driver_workarounds.needs_zs_shader_swizzle)
+ fprintf(stderr, "Z/S shader swizzle workaround is enabled\n");
+ CHECK_OR_PRINT(have_EXT_line_rasterization);
+ CHECK_OR_PRINT(line_rast_feats.stippledBresenhamLines);
+ CHECK_OR_PRINT(feats.features.geometryShader);
+ CHECK_OR_PRINT(feats.features.sampleRateShading);
+ CHECK_OR_PRINT(have_EXT_non_seamless_cube_map);
+ CHECK_OR_PRINT(have_EXT_provoking_vertex);
+ if (screen->driver_workarounds.no_linesmooth)
+ fprintf(stderr, "driver does not support smooth lines\n");
+ if (screen->driver_workarounds.no_hw_gl_point)
+ fprintf(stderr, "driver does not support hardware GL_POINT\n");
+ CHECK_OR_PRINT(rb2_feats.robustImageAccess2);
+ CHECK_OR_PRINT(feats.features.robustBufferAccess);
+ CHECK_OR_PRINT(rb_image_feats.robustImageAccess);
+ printf("\n");
+ mesa_logw("zink: force-enabling optimal_keys despite missing features. Good luck!");
+ }
+ if (zink_debug & ZINK_DEBUG_OPTIMAL_KEYS)
+ screen->optimal_keys = true;
+ if (!screen->optimal_keys)
+ screen->info.have_EXT_graphics_pipeline_library = false;
+
+ if (!screen->optimal_keys ||
+ !screen->info.have_KHR_maintenance5 ||
+ /* EXT_shader_object needs either dynamic feedback loop or per-app enablement */
+ (!screen->driconf.zink_shader_object_enable && !screen->info.have_EXT_attachment_feedback_loop_dynamic_state))
+ screen->info.have_EXT_shader_object = false;
+ if (screen->info.have_EXT_shader_object)
+ screen->have_full_ds3 = true;
+ if (zink_debug & ZINK_DEBUG_DGC) {
+ if (!screen->optimal_keys) {
+ mesa_loge("zink: can't DGC without optimal_keys!");
+ zink_debug &= ~ZINK_DEBUG_DGC;
+ } else {
+ screen->info.have_EXT_multi_draw = false;
+ screen->info.have_EXT_shader_object = false;
+ screen->info.have_EXT_graphics_pipeline_library = false;
+ screen->info.have_EXT_vertex_input_dynamic_state = false;
+ }
+ }
+}
+
+static struct disk_cache *
+zink_get_disk_shader_cache(struct pipe_screen *_screen)
+{
+ struct zink_screen *screen = zink_screen(_screen);
+
+ return screen->disk_cache;
+}
+
+VkSemaphore
+zink_create_semaphore(struct zink_screen *screen)
+{
+ VkSemaphoreCreateInfo sci = {
+ VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ NULL,
+ 0
+ };
+ VkSemaphore sem = VK_NULL_HANDLE;
+ if (util_dynarray_contains(&screen->semaphores, VkSemaphore)) {
+ simple_mtx_lock(&screen->semaphores_lock);
+ if (util_dynarray_contains(&screen->semaphores, VkSemaphore))
+ sem = util_dynarray_pop(&screen->semaphores, VkSemaphore);
+ simple_mtx_unlock(&screen->semaphores_lock);
+ }
+ if (sem)
+ return sem;
+ VkResult ret = VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &sem);
+ return ret == VK_SUCCESS ? sem : VK_NULL_HANDLE;
+}
+
+void
+zink_screen_lock_context(struct zink_screen *screen)
+{
+ simple_mtx_lock(&screen->copy_context_lock);
+ if (!screen->copy_context)
+ screen->copy_context = zink_context(screen->base.context_create(&screen->base, NULL, ZINK_CONTEXT_COPY_ONLY));
+ if (!screen->copy_context) {
+ mesa_loge("zink: failed to create copy context");
+ /* realistically there's nothing that can be done here */
+ }
+}
+
+void
+zink_screen_unlock_context(struct zink_screen *screen)
+{
+ simple_mtx_unlock(&screen->copy_context_lock);
+}
+
+static bool
+init_layouts(struct zink_screen *screen)
+{
+ if (screen->info.have_EXT_descriptor_indexing) {
+ VkDescriptorSetLayoutBinding bindings[4];
+ const unsigned num_bindings = 4;
+ VkDescriptorSetLayoutCreateInfo dcslci = {0};
+ dcslci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+ dcslci.pNext = NULL;
+ VkDescriptorSetLayoutBindingFlagsCreateInfo fci = {0};
+ VkDescriptorBindingFlags flags[4];
+ dcslci.pNext = &fci;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ dcslci.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
+ else
+ dcslci.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT;
+ fci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO;
+ fci.bindingCount = num_bindings;
+ fci.pBindingFlags = flags;
+ for (unsigned i = 0; i < num_bindings; i++) {
+ flags[i] = VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT;
+ if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB)
+ flags[i] |= VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT;
+ }
+ /* there is exactly 1 bindless descriptor set per context, and it has 4 bindings, 1 for each descriptor type */
+ for (unsigned i = 0; i < num_bindings; i++) {
+ bindings[i].binding = i;
+ bindings[i].descriptorType = zink_descriptor_type_from_bindless_index(i);
+ bindings[i].descriptorCount = ZINK_MAX_BINDLESS_HANDLES;
+ bindings[i].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
+ bindings[i].pImmutableSamplers = NULL;
+ }
+
+ dcslci.bindingCount = num_bindings;
+ dcslci.pBindings = bindings;
+ VkResult result = VKSCR(CreateDescriptorSetLayout)(screen->dev, &dcslci, 0, &screen->bindless_layout);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateDescriptorSetLayout failed (%s)", vk_Result_to_str(result));
+ return false;
+ }
+ }
+
+ screen->gfx_push_constant_layout = zink_pipeline_layout_create(screen, NULL, 0, false, 0);
+ return !!screen->gfx_push_constant_layout;
+}
+
+static int
+zink_screen_get_fd(struct pipe_screen *pscreen)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+
+ return screen->drm_fd;
+}
+
static struct zink_screen *
-zink_internal_create_screen(const struct pipe_screen_config *config)
+zink_internal_create_screen(const struct pipe_screen_config *config, int64_t dev_major, int64_t dev_minor)
{
+ if (getenv("ZINK_USE_LAVAPIPE")) {
+ mesa_loge("ZINK_USE_LAVAPIPE is obsolete. Use LIBGL_ALWAYS_SOFTWARE\n");
+ return NULL;
+ }
+
struct zink_screen *screen = rzalloc(NULL, struct zink_screen);
- if (!screen)
+ if (!screen) {
+ if (!config->implicit_driver_load)
+ mesa_loge("ZINK: failed to allocate screen");
return NULL;
+ }
- util_cpu_detect();
- screen->threaded = util_get_cpu_caps()->nr_cpus > 1 && debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1);
- if (screen->threaded)
- util_queue_init(&screen->flush_queue, "zfq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL);
+ screen->implicitly_loaded = config->implicit_driver_load;
+ screen->drm_fd = -1;
+ glsl_type_singleton_init_or_ref();
zink_debug = debug_get_option_zink_debug();
- screen->descriptor_mode = debug_get_option_zink_descriptor_mode();
- if (screen->descriptor_mode > ZINK_DESCRIPTOR_MODE_NOTEMPLATES) {
- printf("Specify exactly one descriptor mode.\n");
- abort();
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_AUTO)
+ zink_descriptor_mode = debug_get_option_zink_descriptor_mode();
+
+ screen->threaded = util_get_cpu_caps()->nr_cpus > 1 && debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1);
+ if (zink_debug & ZINK_DEBUG_FLUSHSYNC)
+ screen->threaded_submit = false;
+ else
+ screen->threaded_submit = screen->threaded;
+ screen->abort_on_hang = debug_get_bool_option("ZINK_HANG_ABORT", false);
+
+
+ u_trace_state_init();
+
+ screen->loader_lib = util_dl_open(VK_LIBNAME);
+ if (!screen->loader_lib) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: failed to load "VK_LIBNAME);
+ goto fail;
}
- screen->instance_info.loader_version = zink_get_loader_version();
- screen->instance = zink_create_instance(&screen->instance_info);
+ screen->vk_GetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)util_dl_get_proc_address(screen->loader_lib, "vkGetInstanceProcAddr");
+ screen->vk_GetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)util_dl_get_proc_address(screen->loader_lib, "vkGetDeviceProcAddr");
+ if (!screen->vk_GetInstanceProcAddr ||
+ !screen->vk_GetDeviceProcAddr) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: failed to get proc address");
+ goto fail;
+ }
+
+ screen->instance_info.loader_version = zink_get_loader_version(screen);
+ if (config) {
+ driParseConfigFiles(config->options, config->options_info, 0, "zink",
+ NULL, NULL, NULL, 0, NULL, 0);
+ screen->driconf.dual_color_blend_by_location = driQueryOptionb(config->options, "dual_color_blend_by_location");
+ //screen->driconf.inline_uniforms = driQueryOptionb(config->options, "radeonsi_inline_uniforms");
+ screen->driconf.emulate_point_smooth = driQueryOptionb(config->options, "zink_emulate_point_smooth");
+ screen->driconf.zink_shader_object_enable = driQueryOptionb(config->options, "zink_shader_object_enable");
+ }
- if (!screen->instance)
+ if (!zink_create_instance(screen, dev_major > 0 && dev_major < 255))
goto fail;
- vk_instance_dispatch_table_load(&screen->vk.instance, &vkGetInstanceProcAddr, screen->instance);
- vk_physical_device_dispatch_table_load(&screen->vk.physical_device, &vkGetInstanceProcAddr, screen->instance);
+ if (zink_debug & ZINK_DEBUG_VALIDATION) {
+ if (!screen->instance_info.have_layer_KHRONOS_validation &&
+ !screen->instance_info.have_layer_LUNARG_standard_validation) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("Failed to load validation layer");
+ goto fail;
+ }
+ }
+
+ vk_instance_uncompacted_dispatch_table_load(&screen->vk.instance,
+ screen->vk_GetInstanceProcAddr,
+ screen->instance);
+ vk_physical_device_uncompacted_dispatch_table_load(&screen->vk.physical_device,
+ screen->vk_GetInstanceProcAddr,
+ screen->instance);
zink_verify_instance_extensions(screen);
if (screen->instance_info.have_EXT_debug_utils &&
- (zink_debug & ZINK_DEBUG_VALIDATION) && !create_debug(screen))
- debug_printf("ZINK: failed to setup debug utils\n");
+ (zink_debug & ZINK_DEBUG_VALIDATION) && !create_debug(screen)) {
+ if (!screen->implicitly_loaded)
+ debug_printf("ZINK: failed to setup debug utils\n");
+ }
- choose_pdev(screen);
- if (screen->pdev == VK_NULL_HANDLE)
+ choose_pdev(screen, dev_major, dev_minor);
+ if (screen->pdev == VK_NULL_HANDLE) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: failed to choose pdev");
goto fail;
+ }
+ screen->is_cpu = screen->info.props.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU;
update_queue_props(screen);
@@ -1821,65 +3357,171 @@ zink_internal_create_screen(const struct pipe_screen_config *config)
VK_FORMAT_X8_D24_UNORM_PACK32);
screen->have_D24_UNORM_S8_UINT = zink_is_depth_format_supported(screen,
VK_FORMAT_D24_UNORM_S8_UINT);
+ screen->have_D32_SFLOAT_S8_UINT = zink_is_depth_format_supported(screen,
+ VK_FORMAT_D32_SFLOAT_S8_UINT);
if (!zink_get_physical_device_info(screen)) {
- debug_printf("ZINK: failed to detect features\n");
+ if (!screen->implicitly_loaded)
+ debug_printf("ZINK: failed to detect features\n");
goto fail;
}
- /* Some Vulkan implementations have special requirements for WSI
- * allocations.
- */
- check_device_needs_mesa_wsi(screen);
+ memset(&screen->heap_map, UINT8_MAX, sizeof(screen->heap_map));
+ for (enum zink_heap i = 0; i < ZINK_HEAP_MAX; i++) {
+ for (unsigned j = 0; j < screen->info.mem_props.memoryTypeCount; j++) {
+ VkMemoryPropertyFlags domains = vk_domain_from_heap(i);
+ if ((screen->info.mem_props.memoryTypes[j].propertyFlags & domains) == domains) {
+ screen->heap_map[i][screen->heap_count[i]++] = j;
+ }
+ }
+ }
+ /* iterate again to check for missing heaps */
+ for (enum zink_heap i = 0; i < ZINK_HEAP_MAX; i++) {
+ /* not found: use compatible heap */
+ if (screen->heap_map[i][0] == UINT8_MAX) {
+ /* only cached mem has a failure case for now */
+ assert(i == ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED || i == ZINK_HEAP_DEVICE_LOCAL_LAZY ||
+ i == ZINK_HEAP_DEVICE_LOCAL_VISIBLE);
+ if (i == ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED) {
+ memcpy(screen->heap_map[i], screen->heap_map[ZINK_HEAP_HOST_VISIBLE_COHERENT], screen->heap_count[ZINK_HEAP_HOST_VISIBLE_COHERENT]);
+ screen->heap_count[i] = screen->heap_count[ZINK_HEAP_HOST_VISIBLE_COHERENT];
+ } else {
+ memcpy(screen->heap_map[i], screen->heap_map[ZINK_HEAP_DEVICE_LOCAL], screen->heap_count[ZINK_HEAP_DEVICE_LOCAL]);
+ screen->heap_count[i] = screen->heap_count[ZINK_HEAP_DEVICE_LOCAL];
+ }
+ }
+ }
+ {
+ uint64_t biggest_vis_vram = 0;
+ for (unsigned i = 0; i < screen->heap_count[ZINK_HEAP_DEVICE_LOCAL_VISIBLE]; i++)
+ biggest_vis_vram = MAX2(biggest_vis_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[screen->heap_map[ZINK_HEAP_DEVICE_LOCAL_VISIBLE][i]].heapIndex].size);
+ uint64_t biggest_vram = 0;
+ for (unsigned i = 0; i < screen->heap_count[ZINK_HEAP_DEVICE_LOCAL]; i++)
+ biggest_vram = MAX2(biggest_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[screen->heap_map[ZINK_HEAP_DEVICE_LOCAL][i]].heapIndex].size);
+ /* determine if vis vram is roughly equal to total vram */
+ if (biggest_vis_vram > biggest_vram * 0.9)
+ screen->resizable_bar = true;
+ }
+
+ setup_renderdoc(screen);
+ if (screen->threaded_submit && !util_queue_init(&screen->flush_queue, "zfq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL, screen)) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("zink: Failed to create flush queue.\n");
+ goto fail;
+ }
zink_internal_setup_moltenvk(screen);
+ if (!screen->info.have_KHR_timeline_semaphore && !screen->info.feats12.timelineSemaphore) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("zink: KHR_timeline_semaphore is required");
+ goto fail;
+ }
+ if (zink_debug & ZINK_DEBUG_DGC) {
+ if (!screen->info.have_NV_device_generated_commands) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("zink: can't use DGC without NV_device_generated_commands");
+ goto fail;
+ }
+ }
+
+ if (zink_debug & ZINK_DEBUG_MEM) {
+ simple_mtx_init(&screen->debug_mem_lock, mtx_plain);
+ screen->debug_mem_sizes = _mesa_hash_table_create(screen, _mesa_hash_string, _mesa_key_string_equal);
+ }
+
+ fixup_driver_props(screen);
+
+ init_driver_workarounds(screen);
screen->dev = zink_create_logical_device(screen);
if (!screen->dev)
goto fail;
- init_queue(screen);
- if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
- screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
- screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
- /* this has bad perf on AMD */
- screen->info.have_KHR_push_descriptor = false;
+ vk_device_uncompacted_dispatch_table_load(&screen->vk.device,
+ screen->vk_GetDeviceProcAddr,
+ screen->dev);
- vk_device_dispatch_table_load(&screen->vk.device, &vkGetDeviceProcAddr, screen->dev);
+ init_queue(screen);
zink_verify_device_extensions(screen);
+ /* descriptor set indexing is determined by 'compact' descriptor mode:
+ * by default, 6 sets are used to provide more granular updating
+ * in compact mode, a maximum of 4 sets are used, with like-types combined
+ */
+ if ((zink_debug & ZINK_DEBUG_COMPACT) ||
+ screen->info.props.limits.maxBoundDescriptorSets < ZINK_MAX_DESCRIPTOR_SETS) {
+ screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_UNIFORMS] = 0;
+ screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_UBO] = 1;
+ screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_SSBO] = 1;
+ screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] = 2;
+ screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_IMAGE] = 2;
+ screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS] = 3;
+ screen->compact_descriptors = true;
+ } else {
+ screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_UNIFORMS] = 0;
+ screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_UBO] = 1;
+ screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] = 2;
+ screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_SSBO] = 3;
+ screen->desc_set_id[ZINK_DESCRIPTOR_TYPE_IMAGE] = 4;
+ screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS] = 5;
+ }
+
if (screen->info.have_EXT_calibrated_timestamps && !check_have_device_time(screen))
goto fail;
screen->have_triangle_fans = true;
-#if defined(VK_EXTX_PORTABILITY_SUBSET_EXTENSION_NAME)
- if (screen->info.have_EXTX_portability_subset) {
- screen->have_triangle_fans = (VK_TRUE == screen->info.portability_subset_extx_feats.triangleFans);
+#if defined(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME)
+ if (screen->info.have_KHR_portability_subset) {
+ screen->have_triangle_fans = (VK_TRUE == screen->info.portability_subset_feats.triangleFans);
}
-#endif // VK_EXTX_PORTABILITY_SUBSET_EXTENSION_NAME
+#endif // VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME
check_base_requirements(screen);
util_live_shader_cache_init(&screen->shaders, zink_create_gfx_shader_state, zink_delete_shader_state);
screen->base.get_name = zink_get_name;
+ if (screen->instance_info.have_KHR_external_memory_capabilities) {
+ screen->base.get_device_uuid = zink_get_device_uuid;
+ screen->base.get_driver_uuid = zink_get_driver_uuid;
+ }
+ if (screen->info.have_KHR_external_memory_win32) {
+ screen->base.get_device_luid = zink_get_device_luid;
+ screen->base.get_device_node_mask = zink_get_device_node_mask;
+ }
+ screen->base.set_max_shader_compiler_threads = zink_set_max_shader_compiler_threads;
+ screen->base.is_parallel_shader_compilation_finished = zink_is_parallel_shader_compilation_finished;
screen->base.get_vendor = zink_get_vendor;
screen->base.get_device_vendor = zink_get_device_vendor;
screen->base.get_compute_param = zink_get_compute_param;
+ screen->base.get_timestamp = zink_get_timestamp;
screen->base.query_memory_info = zink_query_memory_info;
screen->base.get_param = zink_get_param;
screen->base.get_paramf = zink_get_paramf;
screen->base.get_shader_param = zink_get_shader_param;
screen->base.get_compiler_options = zink_get_compiler_options;
screen->base.get_sample_pixel_grid = zink_get_sample_pixel_grid;
+ screen->base.is_compute_copy_faster = zink_is_compute_copy_faster;
screen->base.is_format_supported = zink_is_format_supported;
- screen->base.query_dmabuf_modifiers = zink_query_dmabuf_modifiers;
- screen->base.is_dmabuf_modifier_supported = zink_is_dmabuf_modifier_supported;
- screen->base.get_dmabuf_modifier_planes = zink_get_dmabuf_modifier_planes;
+ screen->base.driver_thread_add_job = zink_driver_thread_add_job;
+ if (screen->info.have_EXT_image_drm_format_modifier && screen->info.have_EXT_external_memory_dma_buf) {
+ screen->base.query_dmabuf_modifiers = zink_query_dmabuf_modifiers;
+ screen->base.is_dmabuf_modifier_supported = zink_is_dmabuf_modifier_supported;
+ screen->base.get_dmabuf_modifier_planes = zink_get_dmabuf_modifier_planes;
+ }
+#if defined(_WIN32)
+ if (screen->info.have_KHR_external_memory_win32)
+ screen->base.create_fence_win32 = zink_create_fence_win32;
+#endif
screen->base.context_create = zink_context_create;
screen->base.flush_frontbuffer = zink_flush_frontbuffer;
screen->base.destroy = zink_destroy_screen;
screen->base.finalize_nir = zink_shader_finalize;
+ screen->base.get_disk_shader_cache = zink_get_disk_shader_cache;
+ screen->base.get_sparse_texture_virtual_page_size = zink_get_sparse_texture_virtual_page_size;
+ screen->base.get_driver_query_group_info = zink_get_driver_query_group_info;
+ screen->base.get_driver_query_info = zink_get_driver_query_info;
+ screen->base.set_damage_region = zink_set_damage_region;
if (screen->info.have_EXT_sample_locations) {
VkMultisamplePropertiesEXT prop;
@@ -1895,94 +3537,225 @@ zink_internal_create_screen(const struct pipe_screen_config *config)
if (!zink_screen_resource_init(&screen->base))
goto fail;
- zink_bo_init(screen);
+ if (!zink_bo_init(screen)) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: failed to initialize suballocator");
+ goto fail;
+ }
zink_screen_fence_init(&screen->base);
+ if (zink_debug & ZINK_DEBUG_IOOPT)
+ screen->driver_workarounds.io_opt = true;
zink_screen_init_compiler(screen);
- disk_cache_init(screen);
+ if (!disk_cache_init(screen)) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: failed to initialize disk cache");
+ goto fail;
+ }
+ if (!util_queue_init(&screen->cache_get_thread, "zcfq", 8, 4,
+ UTIL_QUEUE_INIT_RESIZE_IF_FULL, screen))
+ goto fail;
populate_format_props(screen);
- pre_hash_descriptor_states(screen);
slab_create_parent(&screen->transfer_pool, sizeof(struct zink_transfer), 16);
+ slab_create(&screen->present_mempool, sizeof(struct zink_kopper_present_info), 16);
-#if WITH_XMLCONFIG
- if (config) {
- driParseConfigFiles(config->options, config->options_info, 0, "zink",
- NULL, NULL, NULL, 0, NULL, 0);
- screen->driconf.dual_color_blend_by_location = driQueryOptionb(config->options, "dual_color_blend_by_location");
- //screen->driconf.inline_uniforms = driQueryOptionb(config->options, "radeonsi_inline_uniforms");
- }
-#endif
- screen->driconf.inline_uniforms = debug_get_bool_option("ZINK_INLINE_UNIFORMS", false);
+ screen->driconf.inline_uniforms = debug_get_bool_option("ZINK_INLINE_UNIFORMS", screen->is_cpu) && !(zink_debug & ZINK_DEBUG_DGC);
screen->total_video_mem = get_video_mem(screen);
screen->clamp_video_mem = screen->total_video_mem * 0.8;
- if (!os_get_total_physical_memory(&screen->total_mem))
+ if (!os_get_total_physical_memory(&screen->total_mem)) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: failed to get total physical memory");
goto fail;
+ }
- if (debug_get_bool_option("ZINK_NO_TIMELINES", false))
- screen->info.have_KHR_timeline_semaphore = false;
- if (screen->info.have_KHR_timeline_semaphore)
- zink_screen_init_semaphore(screen);
+ if (!zink_screen_init_semaphore(screen)) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("zink: failed to create timeline semaphore");
+ goto fail;
+ }
- memset(&screen->heap_map, UINT8_MAX, sizeof(screen->heap_map));
- for (enum zink_heap i = 0; i < ZINK_HEAP_MAX; i++) {
- for (unsigned j = 0; j < screen->info.mem_props.memoryTypeCount; j++) {
- VkMemoryPropertyFlags domains = vk_domain_from_heap(i);
- if ((screen->info.mem_props.memoryTypes[j].propertyFlags & domains) == domains) {
- assert(screen->heap_map[i] == UINT8_MAX);
- screen->heap_map[i] = j;
- break;
+ bool can_db = true;
+ {
+ if (!screen->info.have_EXT_descriptor_buffer) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("Cannot use db descriptor mode without EXT_descriptor_buffer");
+ goto fail;
}
+ can_db = false;
}
-
- /* not found: use compatible heap */
- if (screen->heap_map[i] == UINT8_MAX) {
- /* only cached mem has a failure case for now */
- assert(i == ZINK_HEAP_HOST_VISIBLE_CACHED);
- screen->heap_map[i] = screen->heap_map[ZINK_HEAP_HOST_VISIBLE_COHERENT];
+ if (!screen->resizable_bar) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("Cannot use db descriptor mode without resizable bar");
+ goto fail;
+ }
+ can_db = false;
+ }
+ if (!screen->info.have_EXT_non_seamless_cube_map) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("Cannot use db descriptor mode without EXT_non_seamless_cube_map");
+ goto fail;
+ }
+ can_db = false;
+ }
+ if (!screen->info.rb2_feats.nullDescriptor) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("Cannot use db descriptor mode without robustness2.nullDescriptor");
+ goto fail;
+ }
+ can_db = false;
+ }
+ if (ZINK_FBFETCH_DESCRIPTOR_SIZE < screen->info.db_props.inputAttachmentDescriptorSize) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("Cannot use db descriptor mode with inputAttachmentDescriptorSize(%u) > %u", (unsigned)screen->info.db_props.inputAttachmentDescriptorSize, ZINK_FBFETCH_DESCRIPTOR_SIZE);
+ goto fail;
+ }
+ mesa_logw("zink: bug detected: inputAttachmentDescriptorSize(%u) > %u", (unsigned)screen->info.db_props.inputAttachmentDescriptorSize, ZINK_FBFETCH_DESCRIPTOR_SIZE);
+ can_db = false;
+ }
+ if (screen->info.db_props.maxDescriptorBufferBindings < 2 || screen->info.db_props.maxSamplerDescriptorBufferBindings < 2) {
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ /* allow for testing, but disable bindless */
+ mesa_logw("Cannot use bindless and db descriptor mode with (maxDescriptorBufferBindings||maxSamplerDescriptorBufferBindings) < 2");
+ } else {
+ can_db = false;
+ }
}
}
- {
- unsigned vis_vram = screen->heap_map[ZINK_HEAP_DEVICE_LOCAL_VISIBLE];
- unsigned vram = screen->heap_map[ZINK_HEAP_DEVICE_LOCAL];
- /* determine if vis vram is roughly equal to total vram */
- if (screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[vis_vram].heapIndex].size >
- screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[vram].heapIndex].size * 0.9)
- screen->resizable_bar = true;
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_AUTO) {
+ /* descriptor buffer is not performant with virt yet */
+ if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_VENUS)
+ zink_descriptor_mode = ZINK_DESCRIPTOR_MODE_LAZY;
+ else
+ zink_descriptor_mode = can_db ? ZINK_DESCRIPTOR_MODE_DB : ZINK_DESCRIPTOR_MODE_LAZY;
}
-
- if (!screen->info.have_KHR_imageless_framebuffer) {
- simple_mtx_init(&screen->framebuffer_mtx, mtx_plain);
- _mesa_hash_table_init(&screen->framebuffer_cache, screen, hash_framebuffer_state, equals_framebuffer_state);
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ const uint32_t sampler_size = MAX2(screen->info.db_props.combinedImageSamplerDescriptorSize, screen->info.db_props.robustUniformTexelBufferDescriptorSize);
+ const uint32_t image_size = MAX2(screen->info.db_props.storageImageDescriptorSize, screen->info.db_props.robustStorageTexelBufferDescriptorSize);
+ if (screen->compact_descriptors) {
+ screen->db_size[ZINK_DESCRIPTOR_TYPE_UBO] = screen->info.db_props.robustUniformBufferDescriptorSize +
+ screen->info.db_props.robustStorageBufferDescriptorSize;
+ screen->db_size[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] = sampler_size + image_size;
+ } else {
+ screen->db_size[ZINK_DESCRIPTOR_TYPE_UBO] = screen->info.db_props.robustUniformBufferDescriptorSize;
+ screen->db_size[ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW] = sampler_size;
+ screen->db_size[ZINK_DESCRIPTOR_TYPE_SSBO] = screen->info.db_props.robustStorageBufferDescriptorSize;
+ screen->db_size[ZINK_DESCRIPTOR_TYPE_IMAGE] = image_size;
+ }
+ screen->db_size[ZINK_DESCRIPTOR_TYPE_UNIFORMS] = screen->info.db_props.robustUniformBufferDescriptorSize;
+ screen->info.have_KHR_push_descriptor = false;
+ screen->base_descriptor_size = MAX4(screen->db_size[0], screen->db_size[1], screen->db_size[2], screen->db_size[3]);
}
- zink_screen_init_descriptor_funcs(screen, false);
+ simple_mtx_init(&screen->free_batch_states_lock, mtx_plain);
+ simple_mtx_init(&screen->dt_lock, mtx_plain);
+
util_idalloc_mt_init_tc(&screen->buffer_ids);
+ simple_mtx_init(&screen->semaphores_lock, mtx_plain);
+ util_dynarray_init(&screen->semaphores, screen);
+ util_dynarray_init(&screen->fd_semaphores, screen);
+
+ util_vertex_state_cache_init(&screen->vertex_state_cache,
+ zink_create_vertex_state, zink_vertex_state_destroy);
+ screen->base.create_vertex_state = zink_cache_create_vertex_state;
+ screen->base.vertex_state_destroy = zink_cache_vertex_state_destroy;
+
+ zink_synchronization_init(screen);
+
+ zink_init_screen_pipeline_libs(screen);
+
+ if (!init_layouts(screen)) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: failed to initialize layouts");
+ goto fail;
+ }
+
+ if (!zink_descriptor_layouts_init(screen)) {
+ if (!screen->implicitly_loaded)
+ mesa_loge("ZINK: failed to initialize descriptor layouts");
+ goto fail;
+ }
+
+ simple_mtx_init(&screen->copy_context_lock, mtx_plain);
+
+ init_optimal_keys(screen);
+
+ screen->screen_id = p_atomic_inc_return(&num_screens);
+ zink_tracing = screen->instance_info.have_EXT_debug_utils &&
+ (u_trace_is_enabled(U_TRACE_TYPE_PERFETTO) || u_trace_is_enabled(U_TRACE_TYPE_MARKERS));
+
+ screen->frame_marker_emitted = zink_screen_debug_marker_begin(screen, "frame");
+
return screen;
fail:
- ralloc_free(screen);
+ zink_destroy_screen(&screen->base);
return NULL;
}
struct pipe_screen *
-zink_create_screen(struct sw_winsys *winsys)
+zink_create_screen(struct sw_winsys *winsys, const struct pipe_screen_config *config)
{
- struct zink_screen *ret = zink_internal_create_screen(NULL);
+ struct zink_screen *ret = zink_internal_create_screen(config, -1, -1);
if (ret) {
- ret->winsys = winsys;
ret->drm_fd = -1;
}
return &ret->base;
}
+static inline int
+zink_render_rdev(int fd, int64_t *dev_major, int64_t *dev_minor)
+{
+ int ret = 0;
+ *dev_major = *dev_minor = -1;
+#ifdef HAVE_LIBDRM
+ struct stat stx;
+ drmDevicePtr dev;
+
+ if (fd == -1)
+ return 0;
+
+ if (drmGetDevice2(fd, 0, &dev))
+ return -1;
+
+ if(!(dev->available_nodes & (1 << DRM_NODE_RENDER))) {
+ ret = -1;
+ goto free_device;
+ }
+
+ if(stat(dev->nodes[DRM_NODE_RENDER], &stx)) {
+ ret = -1;
+ goto free_device;
+ }
+
+ *dev_major = major(stx.st_rdev);
+ *dev_minor = minor(stx.st_rdev);
+
+free_device:
+ drmFreeDevice(&dev);
+#endif //HAVE_LIBDRM
+
+ return ret;
+}
+
struct pipe_screen *
zink_drm_create_screen(int fd, const struct pipe_screen_config *config)
{
- struct zink_screen *ret = zink_internal_create_screen(config);
+ int64_t dev_major, dev_minor;
+ struct zink_screen *ret;
+
+ if (zink_render_rdev(fd, &dev_major, &dev_minor))
+ return NULL;
+
+ ret = zink_internal_create_screen(config, dev_major, dev_minor);
if (ret)
ret->drm_fd = os_dupfd_cloexec(fd);
@@ -2003,3 +3776,35 @@ void zink_stub_function_not_loaded()
mesa_loge("ZINK: a Vulkan function was called without being loaded");
abort();
}
+
+bool
+zink_screen_debug_marker_begin(struct zink_screen *screen, const char *fmt, ...)
+{
+ if (!zink_tracing)
+ return false;
+
+ char *name;
+ va_list va;
+ va_start(va, fmt);
+ int ret = vasprintf(&name, fmt, va);
+ va_end(va);
+
+ if (ret == -1)
+ return false;
+
+ VkDebugUtilsLabelEXT info = { 0 };
+ info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
+ info.pLabelName = name;
+
+ VKSCR(QueueBeginDebugUtilsLabelEXT)(screen->queue, &info);
+
+ free(name);
+ return true;
+}
+
+void
+zink_screen_debug_marker_end(struct zink_screen *screen, bool emitted)
+{
+ if (emitted)
+ VKSCR(QueueEndDebugUtilsLabelEXT)(screen->queue);
+}
diff --git a/src/gallium/drivers/zink/zink_screen.h b/src/gallium/drivers/zink/zink_screen.h
index 4a30ef5adba..c907bc6e85d 100644
--- a/src/gallium/drivers/zink/zink_screen.h
+++ b/src/gallium/drivers/zink/zink_screen.h
@@ -24,186 +24,54 @@
#ifndef ZINK_SCREEN_H
#define ZINK_SCREEN_H
-#include "zink_device_info.h"
-#include "zink_instance.h"
-#include "vk_dispatch_table.h"
+#include "zink_types.h"
-#include "util/u_idalloc.h"
-#include "pipe/p_screen.h"
-#include "util/slab.h"
-#include "compiler/nir/nir.h"
-#include "util/disk_cache.h"
-#include "util/log.h"
-#include "util/simple_mtx.h"
-#include "util/u_queue.h"
-#include "util/u_live_shader_cache.h"
-#include "pipebuffer/pb_cache.h"
-#include "pipebuffer/pb_slab.h"
-#include <vulkan/vulkan.h>
-extern uint32_t zink_debug;
-struct hash_table;
-
-struct zink_batch_state;
-struct zink_context;
-struct zink_descriptor_layout_key;
-struct zink_program;
-struct zink_shader;
-enum zink_descriptor_type;
-
-/* this is the spec minimum */
-#define ZINK_SPARSE_BUFFER_PAGE_SIZE (64 * 1024)
-
-#define ZINK_DEBUG_NIR 0x1
-#define ZINK_DEBUG_SPIRV 0x2
-#define ZINK_DEBUG_TGSI 0x4
-#define ZINK_DEBUG_VALIDATION 0x8
-
-#define NUM_SLAB_ALLOCATORS 3
-
-enum zink_descriptor_mode {
- ZINK_DESCRIPTOR_MODE_AUTO,
- ZINK_DESCRIPTOR_MODE_LAZY,
- ZINK_DESCRIPTOR_MODE_NOFALLBACK,
- ZINK_DESCRIPTOR_MODE_NOTEMPLATES,
-};
-
-struct zink_modifier_prop {
- uint32_t drmFormatModifierCount;
- VkDrmFormatModifierPropertiesEXT* pDrmFormatModifierProperties;
-};
-
-struct zink_screen {
- struct pipe_screen base;
- bool threaded;
- uint32_t curr_batch; //the current batch id
- uint32_t last_finished; //this is racy but ultimately doesn't matter
- VkSemaphore sem;
- VkSemaphore prev_sem;
- struct util_queue flush_queue;
-
- unsigned buffer_rebind_counter;
-
- bool device_lost;
- struct sw_winsys *winsys;
- int drm_fd;
-
- struct hash_table framebuffer_cache;
- simple_mtx_t framebuffer_mtx;
-
- struct slab_parent_pool transfer_pool;
- struct disk_cache *disk_cache;
- struct util_queue cache_put_thread;
- struct util_queue cache_get_thread;
-
- struct util_live_shader_cache shaders;
-
- struct {
- struct pb_cache bo_cache;
- struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
- unsigned min_alloc_size;
- struct hash_table *bo_export_table;
- simple_mtx_t bo_export_table_lock;
- uint32_t next_bo_unique_id;
- } pb;
- uint8_t heap_map[VK_MAX_MEMORY_TYPES];
- bool resizable_bar;
-
- uint64_t total_video_mem;
- uint64_t clamp_video_mem;
- uint64_t total_mem;
-
- VkInstance instance;
- struct zink_instance_info instance_info;
-
- VkPhysicalDevice pdev;
- uint32_t vk_version, spirv_version;
- struct util_idalloc_mt buffer_ids;
-
- struct zink_device_info info;
- struct nir_shader_compiler_options nir_options;
-
- bool have_X8_D24_UNORM_PACK32;
- bool have_D24_UNORM_S8_UINT;
- bool have_triangle_fans;
-
- uint32_t gfx_queue;
- uint32_t max_queues;
- uint32_t timestamp_valid_bits;
- VkDevice dev;
- VkQueue queue; //gfx+compute
- VkQueue thread_queue; //gfx+compute
- VkDebugUtilsMessengerEXT debugUtilsCallbackHandle;
-
- uint32_t cur_custom_border_color_samplers;
-
- bool needs_mesa_wsi;
- bool needs_mesa_flush_wsi;
-
- struct vk_dispatch_table vk;
-
- bool (*descriptor_program_init)(struct zink_context *ctx, struct zink_program *pg);
- void (*descriptor_program_deinit)(struct zink_screen *screen, struct zink_program *pg);
- void (*descriptors_update)(struct zink_context *ctx, bool is_compute);
- void (*context_update_descriptor_states)(struct zink_context *ctx, bool is_compute);
- void (*context_invalidate_descriptor_state)(struct zink_context *ctx, enum pipe_shader_type shader,
- enum zink_descriptor_type type,
- unsigned start, unsigned count);
- bool (*batch_descriptor_init)(struct zink_screen *screen, struct zink_batch_state *bs);
- void (*batch_descriptor_reset)(struct zink_screen *screen, struct zink_batch_state *bs);
- void (*batch_descriptor_deinit)(struct zink_screen *screen, struct zink_batch_state *bs);
- bool (*descriptors_init)(struct zink_context *ctx);
- void (*descriptors_deinit)(struct zink_context *ctx);
- enum zink_descriptor_mode descriptor_mode;
-
- struct {
- bool dual_color_blend_by_location;
- bool inline_uniforms;
- } driconf;
+#ifdef __cplusplus
+extern "C" {
+#endif
- VkFormatProperties format_props[PIPE_FORMAT_COUNT];
- struct zink_modifier_prop modifier_props[PIPE_FORMAT_COUNT];
- struct {
- uint32_t image_view;
- uint32_t buffer_view;
- } null_descriptor_hashes;
+struct util_dl_library;
- VkExtent2D maxSampleLocationGridSize[5];
-};
+void
+zink_init_screen_pipeline_libs(struct zink_screen *screen);
/* update last_finished to account for batch_id wrapping */
static inline void
-zink_screen_update_last_finished(struct zink_screen *screen, uint32_t batch_id)
+zink_screen_update_last_finished(struct zink_screen *screen, uint64_t batch_id)
{
+ const uint32_t check_id = (uint32_t)batch_id;
/* last_finished may have wrapped */
if (screen->last_finished < UINT_MAX / 2) {
/* last_finished has wrapped, batch_id has not */
- if (batch_id > UINT_MAX / 2)
+ if (check_id > UINT_MAX / 2)
return;
- } else if (batch_id < UINT_MAX / 2) {
+ } else if (check_id < UINT_MAX / 2) {
/* batch_id has wrapped, last_finished has not */
- screen->last_finished = batch_id;
+ screen->last_finished = check_id;
return;
}
/* neither have wrapped */
- screen->last_finished = MAX2(batch_id, screen->last_finished);
+ screen->last_finished = MAX2(check_id, screen->last_finished);
}
/* check a batch_id against last_finished while accounting for wrapping */
static inline bool
zink_screen_check_last_finished(struct zink_screen *screen, uint32_t batch_id)
{
+ const uint32_t check_id = (uint32_t)batch_id;
+ assert(check_id);
/* last_finished may have wrapped */
if (screen->last_finished < UINT_MAX / 2) {
/* last_finished has wrapped, batch_id has not */
- if (batch_id > UINT_MAX / 2)
+ if (check_id > UINT_MAX / 2)
return true;
- } else if (batch_id < UINT_MAX / 2) {
+ } else if (check_id < UINT_MAX / 2) {
/* batch_id has wrapped, last_finished has not */
return false;
}
- return screen->last_finished >= batch_id;
+ return screen->last_finished >= check_id;
}
bool
@@ -219,6 +87,10 @@ zink_screen_handle_vkresult(struct zink_screen *screen, VkResult ret)
break;
case VK_ERROR_DEVICE_LOST:
screen->device_lost = true;
+ mesa_loge("zink: DEVICE LOST!\n");
+ /* if nothing can save us, abort */
+ if (screen->abort_on_hang && !screen->robust_ctx_count)
+ abort();
FALLTHROUGH;
default:
success = false;
@@ -227,56 +99,92 @@ zink_screen_handle_vkresult(struct zink_screen *screen, VkResult ret)
return success;
}
-static inline struct zink_screen *
-zink_screen(struct pipe_screen *pipe)
+typedef const char *(*zink_vkflags_func)(uint64_t);
+
+static inline unsigned
+zink_string_vkflags_unroll(char *buf, size_t bufsize, uint64_t flags, zink_vkflags_func func)
{
- return (struct zink_screen *)pipe;
+ bool first = true;
+ unsigned idx = 0;
+ u_foreach_bit64(bit, flags) {
+ if (!first)
+ buf[idx++] = '|';
+ idx += snprintf(&buf[idx], bufsize - idx, "%s", func((BITFIELD64_BIT(bit))));
+ first = false;
+ }
+ return idx;
}
+#define VRAM_ALLOC_LOOP(RET, DOIT, ...) \
+ do { \
+ unsigned _us[] = {0, 1000, 10000, 500000, 1000000}; \
+ for (unsigned _i = 0; _i < ARRAY_SIZE(_us); _i++) { \
+ RET = DOIT; \
+ if (RET == VK_SUCCESS || RET != VK_ERROR_OUT_OF_DEVICE_MEMORY) \
+ break; \
+ os_time_sleep(_us[_i]); \
+ } \
+ __VA_ARGS__ \
+ } while (0)
-struct mem_cache_entry {
- VkDeviceMemory mem;
- void *map;
-};
+VkSemaphore
+zink_create_semaphore(struct zink_screen *screen);
-#define VKCTX(fn) zink_screen(ctx->base.screen)->vk.fn
-#define VKSCR(fn) screen->vk.fn
+void
+zink_screen_lock_context(struct zink_screen *screen);
+void
+zink_screen_unlock_context(struct zink_screen *screen);
+
+VkSemaphore
+zink_create_exportable_semaphore(struct zink_screen *screen);
+VkSemaphore
+zink_screen_export_dmabuf_semaphore(struct zink_screen *screen, struct zink_resource *res);
+bool
+zink_screen_import_dmabuf_semaphore(struct zink_screen *screen, struct zink_resource *res, VkSemaphore sem);
VkFormat
zink_get_format(struct zink_screen *screen, enum pipe_format format);
-bool
-zink_screen_batch_id_wait(struct zink_screen *screen, uint32_t batch_id, uint64_t timeout);
+void
+zink_convert_color(const struct zink_screen *screen, enum pipe_format format,
+ union pipe_color_union *dst,
+ const union pipe_color_union *src);
bool
-zink_screen_timeline_wait(struct zink_screen *screen, uint32_t batch_id, uint64_t timeout);
+zink_screen_timeline_wait(struct zink_screen *screen, uint64_t batch_id, uint64_t timeout);
bool
zink_is_depth_format_supported(struct zink_screen *screen, VkFormat format);
-#define GET_PROC_ADDR_INSTANCE_LOCAL(instance, x) PFN_vk##x vk_##x = (PFN_vk##x)vkGetInstanceProcAddr(instance, "vk"#x)
+#define GET_PROC_ADDR_INSTANCE_LOCAL(screen, instance, x) PFN_vk##x vk_##x = (PFN_vk##x)(screen)->vk_GetInstanceProcAddr(instance, "vk"#x)
void
-zink_screen_update_pipeline_cache(struct zink_screen *screen, struct zink_program *pg);
+zink_screen_update_pipeline_cache(struct zink_screen *screen, struct zink_program *pg, bool in_thread);
void
-zink_screen_get_pipeline_cache(struct zink_screen *screen, struct zink_program *pg);
+zink_screen_get_pipeline_cache(struct zink_screen *screen, struct zink_program *pg, bool in_thread);
void
-zink_screen_init_descriptor_funcs(struct zink_screen *screen, bool fallback);
+zink_stub_function_not_loaded(void);
+bool
+zink_screen_debug_marker_begin(struct zink_screen *screen, const char *fmt, ...);
void
-zink_stub_function_not_loaded(void);
+zink_screen_debug_marker_end(struct zink_screen *screen, bool emitted);
-#define warn_missing_feature(feat) \
+#define warn_missing_feature(warned, feat) \
do { \
- static bool warned = false; \
if (!warned) { \
- fprintf(stderr, "WARNING: Incorrect rendering will happen, " \
- "because the Vulkan device doesn't support " \
- "the %s feature\n", feat); \
+ if (!(zink_debug & ZINK_DEBUG_QUIET)) \
+ mesa_logw("WARNING: Incorrect rendering will happen " \
+ "because the Vulkan device doesn't support " \
+ "the '%s' feature\n", feat); \
warned = true; \
} \
} while (0)
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/drivers/zink/zink_shader_keys.h b/src/gallium/drivers/zink/zink_shader_keys.h
index 318728e87d8..1dab2447fd8 100644
--- a/src/gallium/drivers/zink/zink_shader_keys.h
+++ b/src/gallium/drivers/zink/zink_shader_keys.h
@@ -29,9 +29,11 @@
#include "compiler/shader_info.h"
struct zink_vs_key_base {
- bool clip_halfz;
- bool push_drawid;
- bool last_vertex_stage;
+ bool last_vertex_stage : 1;
+ bool clip_halfz : 1;
+ bool push_drawid : 1;
+ bool robust_access : 1;
+ uint8_t pad : 4;
};
struct zink_vs_key {
@@ -55,14 +57,64 @@ struct zink_vs_key {
unsigned size;
};
-struct zink_fs_key {
+struct zink_gs_key {
+ struct zink_vs_key_base base;
+ uint8_t pad;
+ bool lower_line_stipple : 1;
+ bool lower_line_smooth : 1;
+ bool lower_gl_point : 1;
+ bool line_rectangular : 1;
+ unsigned lower_pv_mode : 2;
+ // not hashed
+ unsigned size;
+};
+
+struct zink_zs_swizzle {
+ uint8_t s[4];
+};
+
+struct zink_zs_swizzle_key {
+ /* Mask of sampler views with zs_view, i.e. have swizzles other than GL_RED for depth */
+ uint32_t mask;
+ struct zink_zs_swizzle swizzle[32];
+};
+
+struct zink_fs_key_base {
+ bool point_coord_yinvert : 1;
+ bool samples : 1;
+ bool force_dual_color_blend : 1;
+ bool force_persample_interp : 1;
+ bool fbfetch_ms : 1;
+ bool shadow_needs_shader_swizzle : 1; //append zink_zs_swizzle_key after the key data
+ uint8_t pad : 2;
uint8_t coord_replace_bits;
- bool coord_replace_yinvert;
- bool samples;
- bool force_dual_color_blend;
+};
+
+struct zink_fs_key {
+ struct zink_fs_key_base base;
+ /* non-optimal bits after this point */
+ bool lower_line_stipple : 1;
+ bool lower_line_smooth : 1;
+ bool lower_point_smooth : 1;
+ bool robust_access : 1;
+ uint16_t pad2 : 12;
+};
+
+struct zink_tcs_key {
+ uint8_t patch_vertices;
+};
+
+/* when adding a new field, make sure
+ * ctx->compute_pipeline_state.key.size is set in zink_context_create.
+ */
+struct zink_cs_key {
+ bool robust_access : 1;
+ uint32_t pad : 31;
};
struct zink_shader_key_base {
+ bool needs_zs_shader_swizzle;
+ uint32_t nonseamless_cube_mask;
uint32_t inlined_uniform_values[MAX_INLINABLE_UNIFORMS];
};
@@ -73,16 +125,54 @@ struct zink_shader_key_base {
*/
struct zink_shader_key {
union {
- /* reuse vs key for now with tes/gs since we only use clip_halfz */
+ /* reuse vs key for now with tes since we only use clip_halfz */
struct zink_vs_key vs;
struct zink_vs_key_base vs_base;
+ struct zink_tcs_key tcs;
+ struct zink_gs_key gs;
struct zink_fs_key fs;
+ struct zink_fs_key_base fs_base;
+ struct zink_cs_key cs;
} key;
struct zink_shader_key_base base;
unsigned inline_uniforms:1;
uint32_t size;
};
+union zink_shader_key_optimal {
+ struct {
+ struct zink_vs_key_base vs_base;
+ struct zink_tcs_key tcs;
+ struct zink_fs_key_base fs;
+ };
+ struct {
+ uint8_t vs_bits;
+ uint8_t tcs_bits;
+ uint16_t fs_bits;
+ };
+ uint32_t val;
+};
+
+/* the default key has only last_vertex_stage set*/
+#define ZINK_SHADER_KEY_OPTIMAL_DEFAULT (1<<0)
+/* Ignore patch_vertices bits that would only be used if we had to generate the missing TCS */
+static inline uint32_t
+zink_shader_key_optimal_no_tcs(uint32_t key)
+{
+ union zink_shader_key_optimal k;
+ k.val = key;
+ k.tcs_bits = 0;
+ return k.val;
+}
+#define ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(key) (zink_shader_key_optimal_no_tcs(key) == ZINK_SHADER_KEY_OPTIMAL_DEFAULT)
+
+static inline const struct zink_fs_key_base *
+zink_fs_key_base(const struct zink_shader_key *key)
+{
+ assert(key);
+ return &key->key.fs.base;
+}
+
static inline const struct zink_fs_key *
zink_fs_key(const struct zink_shader_key *key)
{
@@ -103,6 +193,25 @@ zink_vs_key(const struct zink_shader_key *key)
return &key->key.vs;
}
+static inline const struct zink_gs_key *
+zink_gs_key(const struct zink_shader_key *key)
+{
+ assert(key);
+ return &key->key.gs;
+}
+
+static inline const struct zink_tcs_key *
+zink_tcs_key(const struct zink_shader_key *key)
+{
+ assert(key);
+ return &key->key.tcs;
+}
+static inline const struct zink_cs_key *
+zink_cs_key(const struct zink_shader_key *key)
+{
+ assert(key);
+ return &key->key.cs;
+}
#endif
diff --git a/src/gallium/drivers/zink/zink_state.c b/src/gallium/drivers/zink/zink_state.c
index 68e8d413ff9..4298086f6ee 100644
--- a/src/gallium/drivers/zink/zink_state.c
+++ b/src/gallium/drivers/zink/zink_state.c
@@ -31,6 +31,8 @@
#include "compiler/shader_enums.h"
#include "util/u_dual_blend.h"
#include "util/u_memory.h"
+#include "util/u_helpers.h"
+#include "vk_format.h"
#include <math.h>
@@ -47,20 +49,21 @@ zink_create_vertex_elements_state(struct pipe_context *pctx,
ves->hw_state.hash = _mesa_hash_pointer(ves);
int buffer_map[PIPE_MAX_ATTRIBS];
- for (int i = 0; i < ARRAY_SIZE(buffer_map); ++i)
- buffer_map[i] = -1;
+ for (int j = 0; j < ARRAY_SIZE(buffer_map); ++j)
+ buffer_map[j] = -1;
int num_bindings = 0;
unsigned num_decomposed = 0;
uint32_t size8 = 0;
uint32_t size16 = 0;
uint32_t size32 = 0;
+ uint16_t strides[PIPE_MAX_ATTRIBS];
for (i = 0; i < num_elements; ++i) {
const struct pipe_vertex_element *elem = elements + i;
int binding = elem->vertex_buffer_index;
if (buffer_map[binding] < 0) {
- ves->binding_map[num_bindings] = binding;
+ ves->hw_state.binding_map[num_bindings] = binding;
buffer_map[binding] = num_bindings++;
}
binding = buffer_map[binding];
@@ -102,6 +105,7 @@ zink_create_vertex_elements_state(struct pipe_context *pctx,
ves->decomposed_attrs_without_w |= BITFIELD_BIT(i);
ves->decomposed_attrs_without_w_size = size;
}
+ ves->has_decomposed_attrs = true;
}
if (screen->info.have_EXT_vertex_input_dynamic_state) {
@@ -109,32 +113,37 @@ zink_create_vertex_elements_state(struct pipe_context *pctx,
ves->hw_state.dynattribs[i].binding = binding;
ves->hw_state.dynattribs[i].location = i;
ves->hw_state.dynattribs[i].format = format;
+ strides[binding] = elem->src_stride;
assert(ves->hw_state.dynattribs[i].format != VK_FORMAT_UNDEFINED);
ves->hw_state.dynattribs[i].offset = elem->src_offset;
} else {
ves->hw_state.attribs[i].binding = binding;
ves->hw_state.attribs[i].location = i;
ves->hw_state.attribs[i].format = format;
+ ves->hw_state.b.strides[binding] = elem->src_stride;
assert(ves->hw_state.attribs[i].format != VK_FORMAT_UNDEFINED);
ves->hw_state.attribs[i].offset = elem->src_offset;
+ ves->min_stride[binding] = MAX2(ves->min_stride[binding], elem->src_offset + vk_format_get_blocksize(format));
}
}
assert(num_decomposed + num_elements <= PIPE_MAX_ATTRIBS);
- u_foreach_bit(i, ves->decomposed_attrs | ves->decomposed_attrs_without_w) {
- const struct pipe_vertex_element *elem = elements + i;
+ u_foreach_bit(attr_index, ves->decomposed_attrs | ves->decomposed_attrs_without_w) {
+ const struct pipe_vertex_element *elem = elements + attr_index;
const struct util_format_description *desc = util_format_description(elem->src_format);
unsigned size = 1;
- if (size32 & BITFIELD_BIT(i))
+ if (size32 & BITFIELD_BIT(attr_index))
size = 4;
- else if (size16 & BITFIELD_BIT(i))
+ else if (size16 & BITFIELD_BIT(attr_index))
size = 2;
+ else
+ assert(size8 & BITFIELD_BIT(attr_index));
for (unsigned j = 1; j < desc->nr_channels; j++) {
if (screen->info.have_EXT_vertex_input_dynamic_state) {
- memcpy(&ves->hw_state.dynattribs[num_elements], &ves->hw_state.dynattribs[i], sizeof(VkVertexInputAttributeDescription2EXT));
+ memcpy(&ves->hw_state.dynattribs[num_elements], &ves->hw_state.dynattribs[attr_index], sizeof(VkVertexInputAttributeDescription2EXT));
ves->hw_state.dynattribs[num_elements].location = num_elements;
ves->hw_state.dynattribs[num_elements].offset += j * size;
} else {
- memcpy(&ves->hw_state.attribs[num_elements], &ves->hw_state.attribs[i], sizeof(VkVertexInputAttributeDescription));
+ memcpy(&ves->hw_state.attribs[num_elements], &ves->hw_state.attribs[attr_index], sizeof(VkVertexInputAttributeDescription));
ves->hw_state.attribs[num_elements].location = num_elements;
ves->hw_state.attribs[num_elements].offset += j * size;
}
@@ -144,22 +153,23 @@ zink_create_vertex_elements_state(struct pipe_context *pctx,
ves->hw_state.num_bindings = num_bindings;
ves->hw_state.num_attribs = num_elements;
if (screen->info.have_EXT_vertex_input_dynamic_state) {
- for (int i = 0; i < num_bindings; ++i) {
- ves->hw_state.dynbindings[i].sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT;
- ves->hw_state.dynbindings[i].binding = ves->bindings[i].binding;
- ves->hw_state.dynbindings[i].inputRate = ves->bindings[i].inputRate;
- if (ves->divisor[i])
- ves->hw_state.dynbindings[i].divisor = ves->divisor[i];
+ for (int j = 0; j < num_bindings; ++j) {
+ ves->hw_state.dynbindings[j].sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT;
+ ves->hw_state.dynbindings[j].binding = ves->bindings[j].binding;
+ ves->hw_state.dynbindings[j].inputRate = ves->bindings[j].inputRate;
+ ves->hw_state.dynbindings[j].stride = strides[j];
+ if (ves->divisor[j])
+ ves->hw_state.dynbindings[j].divisor = ves->divisor[j];
else
- ves->hw_state.dynbindings[i].divisor = 1;
+ ves->hw_state.dynbindings[j].divisor = 1;
}
} else {
- for (int i = 0; i < num_bindings; ++i) {
- ves->hw_state.b.bindings[i].binding = ves->bindings[i].binding;
- ves->hw_state.b.bindings[i].inputRate = ves->bindings[i].inputRate;
- if (ves->divisor[i]) {
- ves->hw_state.b.divisors[ves->hw_state.b.divisors_present].divisor = ves->divisor[i];
- ves->hw_state.b.divisors[ves->hw_state.b.divisors_present].binding = ves->bindings[i].binding;
+ for (int j = 0; j < num_bindings; ++j) {
+ ves->hw_state.b.bindings[j].binding = ves->bindings[j].binding;
+ ves->hw_state.b.bindings[j].inputRate = ves->bindings[j].inputRate;
+ if (ves->divisor[j]) {
+ ves->hw_state.b.divisors[ves->hw_state.b.divisors_present].divisor = ves->divisor[j];
+ ves->hw_state.b.divisors[ves->hw_state.b.divisors_present].binding = ves->bindings[j].binding;
ves->hw_state.b.divisors_present++;
}
}
@@ -173,12 +183,16 @@ zink_bind_vertex_elements_state(struct pipe_context *pctx,
{
struct zink_context *ctx = zink_context(pctx);
struct zink_gfx_pipeline_state *state = &ctx->gfx_pipeline_state;
+ zink_flush_dgc_if_enabled(ctx);
ctx->element_state = cso;
if (cso) {
if (state->element_state != &ctx->element_state->hw_state) {
ctx->vertex_state_changed = !zink_screen(pctx->screen)->info.have_EXT_vertex_input_dynamic_state;
ctx->vertex_buffers_dirty = ctx->element_state->hw_state.num_bindings > 0;
}
+ state->element_state = &ctx->element_state->hw_state;
+ if (zink_screen(pctx->screen)->optimal_keys)
+ return;
const struct zink_vs_key *vs = zink_get_vs_key(ctx);
uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
switch (vs->size) {
@@ -218,7 +232,6 @@ zink_bind_vertex_elements_state(struct pipe_context *pctx,
key->key.vs.size = size;
key->size += 2 * size;
}
- state->element_state = &ctx->element_state->hw_state;
} else {
state->element_state = NULL;
ctx->vertex_buffers_dirty = false;
@@ -272,21 +285,6 @@ blend_factor(enum pipe_blendfactor factor)
}
-static bool
-need_blend_constants(enum pipe_blendfactor factor)
-{
- switch (factor) {
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- return true;
-
- default:
- return false;
- }
-}
-
static VkBlendOp
blend_op(enum pipe_blend_func func)
{
@@ -362,8 +360,7 @@ zink_create_blend_state(struct pipe_context *pctx,
*/
cso->alpha_to_coverage = blend_state->alpha_to_coverage;
cso->alpha_to_one = blend_state->alpha_to_one;
-
- cso->need_blend_constants = false;
+ cso->num_rts = blend_state->max_rt + 1;
for (int i = 0; i < blend_state->max_rt + 1; ++i) {
const struct pipe_rt_blend_state *rt = blend_state->rt;
@@ -380,12 +377,6 @@ zink_create_blend_state(struct pipe_context *pctx,
att.srcAlphaBlendFactor = blend_factor(fix_blendfactor(rt->alpha_src_factor, cso->alpha_to_one));
att.dstAlphaBlendFactor = blend_factor(fix_blendfactor(rt->alpha_dst_factor, cso->alpha_to_one));
att.alphaBlendOp = blend_op(rt->alpha_func);
-
- if (need_blend_constants(rt->rgb_src_factor) ||
- need_blend_constants(rt->rgb_dst_factor) ||
- need_blend_constants(rt->alpha_src_factor) ||
- need_blend_constants(rt->alpha_dst_factor))
- cso->need_blend_constants = true;
}
if (rt->colormask & PIPE_MASK_R)
@@ -397,7 +388,20 @@ zink_create_blend_state(struct pipe_context *pctx,
if (rt->colormask & PIPE_MASK_A)
att.colorWriteMask |= VK_COLOR_COMPONENT_A_BIT;
+ cso->wrmask |= (rt->colormask << i);
+ if (rt->blend_enable)
+ cso->enables |= BITFIELD_BIT(i);
+
cso->attachments[i] = att;
+
+ cso->ds3.enables[i] = att.blendEnable;
+ cso->ds3.eq[i].alphaBlendOp = att.alphaBlendOp;
+ cso->ds3.eq[i].dstAlphaBlendFactor = att.dstAlphaBlendFactor;
+ cso->ds3.eq[i].srcAlphaBlendFactor = att.srcAlphaBlendFactor;
+ cso->ds3.eq[i].colorBlendOp = att.colorBlendOp;
+ cso->ds3.eq[i].dstColorBlendFactor = att.dstColorBlendFactor;
+ cso->ds3.eq[i].srcColorBlendFactor = att.srcColorBlendFactor;
+ cso->ds3.wrmask[i] = att.colorWriteMask;
}
cso->dual_src_blend = util_blend_state_is_dual(blend_state, 0);
@@ -408,18 +412,47 @@ static void
zink_bind_blend_state(struct pipe_context *pctx, void *cso)
{
struct zink_context *ctx = zink_context(pctx);
+ struct zink_screen *screen = zink_screen(pctx->screen);
struct zink_gfx_pipeline_state* state = &zink_context(pctx)->gfx_pipeline_state;
+ zink_flush_dgc_if_enabled(ctx);
struct zink_blend_state *blend = cso;
+ struct zink_blend_state *old_blend = state->blend_state;
if (state->blend_state != cso) {
state->blend_state = cso;
- state->blend_id = blend ? blend->hash : 0;
- state->dirty = true;
- bool force_dual_color_blend = zink_screen(pctx->screen)->driconf.dual_color_blend_by_location &&
- blend && blend->dual_src_blend && state->blend_state->attachments[1].blendEnable;
- if (force_dual_color_blend != zink_get_fs_key(ctx)->force_dual_color_blend)
- zink_set_fs_key(ctx)->force_dual_color_blend = force_dual_color_blend;
+ if (!screen->have_full_ds3) {
+ state->blend_id = blend ? blend->hash : 0;
+ state->dirty = true;
+ }
+ bool force_dual_color_blend = screen->driconf.dual_color_blend_by_location &&
+ blend && blend->dual_src_blend && state->blend_state->attachments[0].blendEnable;
+ if (force_dual_color_blend != zink_get_fs_base_key(ctx)->force_dual_color_blend)
+ zink_set_fs_base_key(ctx)->force_dual_color_blend = force_dual_color_blend;
ctx->blend_state_changed = true;
+
+ if (cso && screen->have_full_ds3) {
+#define STATE_CHECK(NAME, FLAG) \
+ if ((!old_blend || old_blend->NAME != blend->NAME)) \
+ ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_##FLAG)
+
+ STATE_CHECK(alpha_to_coverage, A2C);
+ if (screen->info.dynamic_state3_feats.extendedDynamicState3AlphaToOneEnable) {
+ STATE_CHECK(alpha_to_one, A21);
+ }
+ STATE_CHECK(enables, ON);
+ STATE_CHECK(wrmask, WRITE);
+ if (old_blend && blend->num_rts == old_blend->num_rts) {
+ if (memcmp(blend->ds3.eq, old_blend->ds3.eq, blend->num_rts * sizeof(blend->ds3.eq[0])))
+ ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_EQ);
+ } else {
+ ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_EQ);
+ }
+ STATE_CHECK(logicop_enable, LOGIC_ON);
+ STATE_CHECK(logicop_func, LOGIC);
+
+#undef STATE_CHECK
+ }
+
}
}
@@ -516,7 +549,7 @@ zink_bind_depth_stencil_alpha_state(struct pipe_context *pctx, void *cso)
{
struct zink_context *ctx = zink_context(pctx);
- bool prev_zwrite = ctx->dsa_state ? ctx->dsa_state->hw_state.depth_write : false;
+ zink_flush_dgc_if_enabled(ctx);
ctx->dsa_state = cso;
if (cso) {
@@ -527,10 +560,8 @@ zink_bind_depth_stencil_alpha_state(struct pipe_context *pctx, void *cso)
ctx->dsa_state_changed = true;
}
}
- if (prev_zwrite != (ctx->dsa_state ? ctx->dsa_state->hw_state.depth_write : false)) {
- ctx->rp_changed = true;
- zink_batch_no_rp(ctx);
- }
+ if (!ctx->track_renderpasses && !ctx->blitting)
+ ctx->rp_tc_info_updated = true;
}
static void
@@ -558,18 +589,6 @@ line_width(float width, float granularity, const float range[2])
return CLAMP(width, range[0], range[1]);
}
-#define warn_line_feature(feat) \
- do { \
- static bool warned = false; \
- if (!warned) { \
- fprintf(stderr, "WARNING: Incorrect rendering will happen, " \
- "because the Vulkan device doesn't support " \
- "the %s feature of " \
- "VK_EXT_line_rasterization\n", feat); \
- warned = true; \
- } \
- } while (0)
-
static void *
zink_create_rasterizer_state(struct pipe_context *pctx,
const struct pipe_rasterizer_state *rs_state)
@@ -582,83 +601,70 @@ zink_create_rasterizer_state(struct pipe_context *pctx,
state->base = *rs_state;
state->base.line_stipple_factor++;
- state->hw_state.line_stipple_enable = rs_state->line_stipple_enable;
+
+ state->hw_state.line_stipple_enable =
+ rs_state->line_stipple_enable &&
+ !screen->driver_workarounds.no_linestipple;
assert(rs_state->depth_clip_far == rs_state->depth_clip_near);
- state->hw_state.depth_clamp = rs_state->depth_clip_near == 0;
- state->hw_state.rasterizer_discard = rs_state->rasterizer_discard;
- state->hw_state.force_persample_interp = rs_state->force_persample_interp;
+ state->hw_state.depth_clip = rs_state->depth_clip_near;
+ state->hw_state.depth_clamp = rs_state->depth_clamp;
state->hw_state.pv_last = !rs_state->flatshade_first;
state->hw_state.clip_halfz = rs_state->clip_halfz;
assert(rs_state->fill_front <= PIPE_POLYGON_MODE_POINT);
if (rs_state->fill_back != rs_state->fill_front)
debug_printf("BUG: vulkan doesn't support different front and back fill modes\n");
- state->hw_state.polygon_mode = rs_state->fill_front; // same values
- state->hw_state.cull_mode = rs_state->cull_face; // same bits
+
+ if (rs_state->fill_front == PIPE_POLYGON_MODE_POINT &&
+ screen->driver_workarounds.no_hw_gl_point) {
+ state->hw_state.polygon_mode = VK_POLYGON_MODE_FILL;
+ state->cull_mode = VK_CULL_MODE_NONE;
+ } else {
+ state->hw_state.polygon_mode = rs_state->fill_front; // same values
+ state->cull_mode = rs_state->cull_face; // same bits
+ }
state->front_face = rs_state->front_ccw ?
VK_FRONT_FACE_COUNTER_CLOCKWISE :
VK_FRONT_FACE_CLOCKWISE;
- VkPhysicalDeviceLineRasterizationFeaturesEXT *line_feats =
- &screen->info.line_rast_feats;
- state->hw_state.line_mode =
- VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
-
- if (rs_state->line_stipple_enable) {
- if (screen->info.have_EXT_line_rasterization) {
- if (rs_state->line_rectangular) {
- if (rs_state->line_smooth) {
- if (line_feats->stippledSmoothLines)
- state->hw_state.line_mode =
- VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
- else
- warn_line_feature("stippledSmoothLines");
- } else if (line_feats->stippledRectangularLines)
- state->hw_state.line_mode =
- VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
- else
- warn_line_feature("stippledRectangularLines");
- } else if (line_feats->stippledBresenhamLines)
- state->hw_state.line_mode =
- VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
- else {
- warn_line_feature("stippledBresenhamLines");
-
- /* no suitable mode that supports line stippling */
- state->base.line_stipple_factor = 0;
- state->base.line_stipple_pattern = UINT16_MAX;
- }
- }
+ state->hw_state.line_mode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
+ if (rs_state->line_rectangular) {
+ if (rs_state->line_smooth &&
+ !screen->driver_workarounds.no_linesmooth)
+ state->hw_state.line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
+ else
+ state->hw_state.line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
} else {
- if (screen->info.have_EXT_line_rasterization) {
- if (rs_state->line_rectangular) {
- if (rs_state->line_smooth) {
- if (line_feats->smoothLines)
- state->hw_state.line_mode =
- VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
- else
- warn_line_feature("smoothLines");
- } else if (line_feats->rectangularLines)
- state->hw_state.line_mode =
- VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
- else
- warn_line_feature("rectangularLines");
- } else if (line_feats->bresenhamLines)
- state->hw_state.line_mode =
- VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
- else
- warn_line_feature("bresenhamLines");
- }
- state->base.line_stipple_factor = 0;
+ state->hw_state.line_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
+ }
+ state->dynamic_line_mode = state->hw_state.line_mode;
+ switch (state->hw_state.line_mode) {
+ case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
+ if (!screen->info.line_rast_feats.rectangularLines)
+ state->dynamic_line_mode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
+ break;
+ case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
+ if (!screen->info.line_rast_feats.smoothLines)
+ state->dynamic_line_mode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
+ break;
+ case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
+ if (!screen->info.line_rast_feats.bresenhamLines)
+ state->dynamic_line_mode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
+ break;
+ default: break;
+ }
+
+ if (!rs_state->line_stipple_enable) {
+ state->base.line_stipple_factor = 1;
state->base.line_stipple_pattern = UINT16_MAX;
}
- state->offset_point = rs_state->offset_point;
- state->offset_line = rs_state->offset_line;
- state->offset_tri = rs_state->offset_tri;
+ state->offset_fill = util_get_offset(rs_state, rs_state->fill_front);
state->offset_units = rs_state->offset_units;
+ if (!rs_state->offset_units_unscaled)
+ state->offset_units *= 2;
state->offset_clamp = rs_state->offset_clamp;
state->offset_scale = rs_state->offset_scale;
@@ -674,9 +680,16 @@ zink_bind_rasterizer_state(struct pipe_context *pctx, void *cso)
{
struct zink_context *ctx = zink_context(pctx);
struct zink_screen *screen = zink_screen(pctx->screen);
+ struct zink_rasterizer_state *prev_state = ctx->rast_state;
bool point_quad_rasterization = ctx->rast_state ? ctx->rast_state->base.point_quad_rasterization : false;
bool scissor = ctx->rast_state ? ctx->rast_state->base.scissor : false;
bool pv_last = ctx->rast_state ? ctx->rast_state->hw_state.pv_last : false;
+ bool force_persample_interp = ctx->gfx_pipeline_state.force_persample_interp;
+ bool clip_halfz = ctx->rast_state ? ctx->rast_state->hw_state.clip_halfz : false;
+ bool rasterizer_discard = ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false;
+ bool half_pixel_center = ctx->rast_state ? ctx->rast_state->base.half_pixel_center : true;
+ float line_width = ctx->rast_state ? ctx->rast_state->base.line_width : 1.0;
+ zink_flush_dgc_if_enabled(ctx);
ctx->rast_state = cso;
if (ctx->rast_state) {
@@ -685,26 +698,83 @@ zink_bind_rasterizer_state(struct pipe_context *pctx, void *cso)
/* without this prop, change in pv mode requires new rp */
!screen->info.pv_props.provokingVertexModePerPipeline)
zink_batch_no_rp(ctx);
- uint32_t rast_bits = 0;
- memcpy(&rast_bits, &ctx->rast_state->hw_state, sizeof(struct zink_rasterizer_hw_state));
- ctx->gfx_pipeline_state.rast_state = rast_bits & BITFIELD_MASK(ZINK_RAST_HW_STATE_SIZE);
+ memcpy(&ctx->gfx_pipeline_state.dyn_state3, &ctx->rast_state->hw_state, sizeof(struct zink_rasterizer_hw_state));
- ctx->gfx_pipeline_state.dirty = true;
+ ctx->gfx_pipeline_state.dirty |= !zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state3;
ctx->rast_state_changed = true;
- if (zink_get_last_vertex_key(ctx)->clip_halfz != ctx->rast_state->base.clip_halfz) {
- zink_set_last_vertex_key(ctx)->clip_halfz = ctx->rast_state->base.clip_halfz;
+ if (clip_halfz != ctx->rast_state->base.clip_halfz) {
+ if (screen->info.have_EXT_depth_clip_control)
+ ctx->gfx_pipeline_state.dirty = true;
+ else
+ zink_set_last_vertex_key(ctx)->clip_halfz = ctx->rast_state->base.clip_halfz;
ctx->vp_state_changed = true;
}
+ if (screen->info.have_EXT_extended_dynamic_state3) {
+#define STATE_CHECK(NAME, FLAG) \
+ if (cso && (!prev_state || prev_state->NAME != ctx->rast_state->NAME)) \
+ ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_RAST_##FLAG)
+
+ if (!screen->driver_workarounds.no_linestipple) {
+ if (ctx->rast_state->base.line_stipple_enable) {
+ STATE_CHECK(base.line_stipple_factor, STIPPLE);
+ STATE_CHECK(base.line_stipple_pattern, STIPPLE);
+ } else {
+ ctx->ds3_states &= ~BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE);
+ }
+ if (screen->info.dynamic_state3_feats.extendedDynamicState3LineStippleEnable) {
+ STATE_CHECK(hw_state.line_stipple_enable, STIPPLE_ON);
+ }
+ }
+ STATE_CHECK(hw_state.depth_clip, CLIP);
+ STATE_CHECK(hw_state.depth_clamp, CLAMP);
+ STATE_CHECK(hw_state.polygon_mode, POLYGON);
+ STATE_CHECK(hw_state.clip_halfz, HALFZ);
+ STATE_CHECK(hw_state.pv_last, PV);
+ STATE_CHECK(dynamic_line_mode, LINE);
+
+#undef STATE_CHECK
+ }
+
+ if (fabs(ctx->rast_state->base.line_width - line_width) > FLT_EPSILON)
+ ctx->line_width_changed = true;
+
+ bool lower_gl_point = screen->driver_workarounds.no_hw_gl_point;
+ lower_gl_point &= ctx->rast_state->base.fill_front == PIPE_POLYGON_MODE_POINT;
+ if (zink_get_gs_key(ctx)->lower_gl_point != lower_gl_point)
+ zink_set_gs_key(ctx)->lower_gl_point = lower_gl_point;
+
if (ctx->gfx_pipeline_state.dyn_state1.front_face != ctx->rast_state->front_face) {
ctx->gfx_pipeline_state.dyn_state1.front_face = ctx->rast_state->front_face;
ctx->gfx_pipeline_state.dirty |= !zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state;
}
- if (ctx->rast_state->base.point_quad_rasterization != point_quad_rasterization)
+ if (ctx->gfx_pipeline_state.dyn_state1.cull_mode != ctx->rast_state->cull_mode) {
+ ctx->gfx_pipeline_state.dyn_state1.cull_mode = ctx->rast_state->cull_mode;
+ ctx->gfx_pipeline_state.dirty |= !zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state;
+ }
+ if (!ctx->primitives_generated_active)
+ zink_set_rasterizer_discard(ctx, false);
+ else if (rasterizer_discard != ctx->rast_state->base.rasterizer_discard)
+ zink_set_null_fs(ctx);
+
+ if (ctx->rast_state->base.point_quad_rasterization ||
+ ctx->rast_state->base.point_quad_rasterization != point_quad_rasterization)
zink_set_fs_point_coord_key(ctx);
if (ctx->rast_state->base.scissor != scissor)
ctx->scissor_changed = true;
+
+ if (ctx->rast_state->base.force_persample_interp != force_persample_interp) {
+ zink_set_fs_base_key(ctx)->force_persample_interp = ctx->rast_state->base.force_persample_interp;
+ ctx->gfx_pipeline_state.dirty = true;
+ }
+ ctx->gfx_pipeline_state.force_persample_interp = ctx->rast_state->base.force_persample_interp;
+
+ if (ctx->rast_state->base.half_pixel_center != half_pixel_center)
+ ctx->vp_state_changed = true;
+
+ if (!screen->optimal_keys)
+ zink_update_gs_key_rectangular_line(ctx);
}
}
@@ -714,6 +784,65 @@ zink_delete_rasterizer_state(struct pipe_context *pctx, void *rs_state)
FREE(rs_state);
}
+struct pipe_vertex_state *
+zink_create_vertex_state(struct pipe_screen *pscreen,
+ struct pipe_vertex_buffer *buffer,
+ const struct pipe_vertex_element *elements,
+ unsigned num_elements,
+ struct pipe_resource *indexbuf,
+ uint32_t full_velem_mask)
+{
+ struct zink_vertex_state *zstate = CALLOC_STRUCT(zink_vertex_state);
+ if (!zstate) {
+ mesa_loge("ZINK: failed to allocate zstate!");
+ return NULL;
+ }
+
+ util_init_pipe_vertex_state(pscreen, buffer, elements, num_elements, indexbuf, full_velem_mask,
+ &zstate->b);
+
+ /* Initialize the vertex element state in state->element.
+ * Do it by creating a vertex element state object and copying it there.
+ */
+ struct zink_context ctx;
+ ctx.base.screen = pscreen;
+ struct zink_vertex_elements_state *elems = zink_create_vertex_elements_state(&ctx.base, num_elements, elements);
+ zstate->velems = *elems;
+ zink_delete_vertex_elements_state(&ctx.base, elems);
+
+ return &zstate->b;
+}
+
+void
+zink_vertex_state_destroy(struct pipe_screen *pscreen, struct pipe_vertex_state *vstate)
+{
+ pipe_vertex_buffer_unreference(&vstate->input.vbuffer);
+ pipe_resource_reference(&vstate->input.indexbuf, NULL);
+ FREE(vstate);
+}
+
+struct pipe_vertex_state *
+zink_cache_create_vertex_state(struct pipe_screen *pscreen,
+ struct pipe_vertex_buffer *buffer,
+ const struct pipe_vertex_element *elements,
+ unsigned num_elements,
+ struct pipe_resource *indexbuf,
+ uint32_t full_velem_mask)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+
+ return util_vertex_state_cache_get(pscreen, buffer, elements, num_elements, indexbuf,
+ full_velem_mask, &screen->vertex_state_cache);
+}
+
+void
+zink_cache_vertex_state_destroy(struct pipe_screen *pscreen, struct pipe_vertex_state *vstate)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+
+ util_vertex_state_destroy(pscreen, &screen->vertex_state_cache, vstate);
+}
+
void
zink_context_state_init(struct pipe_context *pctx)
{
diff --git a/src/gallium/drivers/zink/zink_state.h b/src/gallium/drivers/zink/zink_state.h
index 1254498377c..71dc6457170 100644
--- a/src/gallium/drivers/zink/zink_state.h
+++ b/src/gallium/drivers/zink/zink_state.h
@@ -24,98 +24,38 @@
#ifndef ZINK_STATE_H
#define ZINK_STATE_H
-#include <vulkan/vulkan.h>
+#include "zink_types.h"
-#include "pipe/p_state.h"
-
-struct zink_vertex_elements_hw_state {
- uint32_t hash;
- union {
- VkVertexInputAttributeDescription attribs[PIPE_MAX_ATTRIBS];
- VkVertexInputAttributeDescription2EXT dynattribs[PIPE_MAX_ATTRIBS];
- };
- union {
- struct {
- VkVertexInputBindingDivisorDescriptionEXT divisors[PIPE_MAX_ATTRIBS];
- VkVertexInputBindingDescription bindings[PIPE_MAX_ATTRIBS]; // combination of element_state and stride
- uint8_t divisors_present;
- } b;
- VkVertexInputBindingDescription2EXT dynbindings[PIPE_MAX_ATTRIBS];
- };
- uint32_t num_bindings, num_attribs;
-};
-
-struct zink_vertex_elements_state {
- struct {
- uint32_t binding;
- VkVertexInputRate inputRate;
- } bindings[PIPE_MAX_ATTRIBS];
- uint32_t divisor[PIPE_MAX_ATTRIBS];
- uint8_t binding_map[PIPE_MAX_ATTRIBS];
- uint32_t decomposed_attrs;
- unsigned decomposed_attrs_size;
- uint32_t decomposed_attrs_without_w;
- unsigned decomposed_attrs_without_w_size;
- struct zink_vertex_elements_hw_state hw_state;
-};
-
-struct zink_rasterizer_hw_state {
- unsigned polygon_mode : 2; //VkPolygonMode
- unsigned cull_mode : 2; //VkCullModeFlags
- unsigned line_mode : 2; //VkLineRasterizationModeEXT
- bool depth_clamp:1;
- bool rasterizer_discard:1;
- bool pv_last:1;
- bool line_stipple_enable:1;
- bool force_persample_interp:1;
- bool clip_halfz:1;
-};
-#define ZINK_RAST_HW_STATE_SIZE 12
-
-
-struct zink_rasterizer_state {
- struct pipe_rasterizer_state base;
- bool offset_point, offset_line, offset_tri;
- float offset_units, offset_clamp, offset_scale;
- float line_width;
- VkFrontFace front_face;
- struct zink_rasterizer_hw_state hw_state;
-};
-
-struct zink_blend_state {
- uint32_t hash;
- VkPipelineColorBlendAttachmentState attachments[PIPE_MAX_COLOR_BUFS];
-
- VkBool32 logicop_enable;
- VkLogicOp logicop_func;
-
- VkBool32 alpha_to_coverage;
- VkBool32 alpha_to_one;
-
- bool need_blend_constants;
- bool dual_src_blend;
-};
-
-struct zink_depth_stencil_alpha_hw_state {
- VkBool32 depth_test;
- VkCompareOp depth_compare_op;
+#ifdef __cplusplus
+extern "C" {
+#endif
- VkBool32 depth_bounds_test;
- float min_depth_bounds, max_depth_bounds;
+void
+zink_context_state_init(struct pipe_context *pctx);
- VkBool32 stencil_test;
- VkStencilOpState stencil_front;
- VkStencilOpState stencil_back;
- VkBool32 depth_write;
-};
+struct pipe_vertex_state *
+zink_create_vertex_state(struct pipe_screen *pscreen,
+ struct pipe_vertex_buffer *buffer,
+ const struct pipe_vertex_element *elements,
+ unsigned num_elements,
+ struct pipe_resource *indexbuf,
+ uint32_t full_velem_mask);
+void
+zink_vertex_state_destroy(struct pipe_screen *pscreen, struct pipe_vertex_state *vstate);
+struct pipe_vertex_state *
+zink_cache_create_vertex_state(struct pipe_screen *pscreen,
+ struct pipe_vertex_buffer *buffer,
+ const struct pipe_vertex_element *elements,
+ unsigned num_elements,
+ struct pipe_resource *indexbuf,
+ uint32_t full_velem_mask);
+void
+zink_cache_vertex_state_destroy(struct pipe_screen *pscreen, struct pipe_vertex_state *vstate);
-struct zink_depth_stencil_alpha_state {
- struct pipe_depth_stencil_alpha_state base;
- struct zink_depth_stencil_alpha_hw_state hw_state;
-};
-void
-zink_context_state_init(struct pipe_context *pctx);
+#ifdef __cplusplus
+}
+#endif
#endif
diff --git a/src/gallium/drivers/zink/zink_surface.c b/src/gallium/drivers/zink/zink_surface.c
index f3cfacb1d14..a24ca83ac86 100644
--- a/src/gallium/drivers/zink/zink_surface.c
+++ b/src/gallium/drivers/zink/zink_surface.c
@@ -23,9 +23,11 @@
#include "zink_context.h"
#include "zink_framebuffer.h"
+#include "zink_format.h"
#include "zink_resource.h"
#include "zink_screen.h"
#include "zink_surface.h"
+#include "zink_kopper.h"
#include "util/format/u_format.h"
#include "util/u_inlines.h"
@@ -37,17 +39,19 @@ create_ivci(struct zink_screen *screen,
const struct pipe_surface *templ,
enum pipe_texture_target target)
{
- VkImageViewCreateInfo ivci = {0};
+ VkImageViewCreateInfo ivci;
+ /* zero holes since this is hashed */
+ memset(&ivci, 0, sizeof(VkImageViewCreateInfo));
ivci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
ivci.image = res->obj->image;
switch (target) {
case PIPE_TEXTURE_1D:
- ivci.viewType = VK_IMAGE_VIEW_TYPE_1D;
+ ivci.viewType = res->need_2D ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D;
break;
case PIPE_TEXTURE_1D_ARRAY:
- ivci.viewType = VK_IMAGE_VIEW_TYPE_1D_ARRAY;
+ ivci.viewType = res->need_2D ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_1D_ARRAY;
break;
case PIPE_TEXTURE_2D:
@@ -75,7 +79,7 @@ create_ivci(struct zink_screen *screen,
unreachable("unsupported target");
}
- ivci.format = zink_get_format(screen, templ->format);
+ ivci.format = res->base.b.format == PIPE_FORMAT_A8_UNORM ? res->format : zink_get_format(screen, templ->format);
assert(ivci.format != VK_FORMAT_UNDEFINED);
/* TODO: it's currently illegal to use non-identity swizzles for framebuffer attachments,
@@ -96,51 +100,112 @@ create_ivci(struct zink_screen *screen,
ivci.subresourceRange.levelCount = 1;
ivci.subresourceRange.baseArrayLayer = templ->u.tex.first_layer;
ivci.subresourceRange.layerCount = 1 + templ->u.tex.last_layer - templ->u.tex.first_layer;
+ assert(ivci.viewType != VK_IMAGE_VIEW_TYPE_3D || ivci.subresourceRange.baseArrayLayer == 0);
+ assert(ivci.viewType != VK_IMAGE_VIEW_TYPE_3D || ivci.subresourceRange.layerCount == 1);
+ /* ensure cube image types get clamped to 2D/2D_ARRAY as expected for partial views */
ivci.viewType = zink_surface_clamp_viewtype(ivci.viewType, templ->u.tex.first_layer, templ->u.tex.last_layer, res->base.b.array_size);
return ivci;
}
+/* this is used for framebuffer attachments to set up imageless framebuffers */
+static void
+init_surface_info(struct zink_screen *screen, struct zink_surface *surface, struct zink_resource *res, VkImageViewCreateInfo *ivci)
+{
+ VkImageViewUsageCreateInfo *usage_info = (VkImageViewUsageCreateInfo *)ivci->pNext;
+ surface->info.flags = res->obj->vkflags;
+ surface->info.usage = usage_info ? usage_info->usage : res->obj->vkusage;
+ surface->info.width = surface->base.width;
+ surface->info.height = surface->base.height;
+ surface->info.layerCount = ivci->subresourceRange.layerCount;
+ surface->info.format[0] = ivci->format;
+ if (res->obj->dt) {
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ if (zink_kopper_has_srgb(cdt))
+ surface->info.format[1] = ivci->format == cdt->formats[0] ? cdt->formats[1] : cdt->formats[0];
+ } else {
+ enum pipe_format srgb = util_format_is_srgb(surface->base.format) ? util_format_linear(surface->base.format) : util_format_srgb(surface->base.format);
+ if (srgb == surface->base.format)
+ srgb = PIPE_FORMAT_NONE;
+ if (srgb) {
+ VkFormat format = zink_get_format(screen, srgb);
+ if (format)
+ surface->info.format[1] = format;
+ }
+ }
+}
+
+static void
+init_pipe_surface_info(struct pipe_context *pctx, struct pipe_surface *psurf, const struct pipe_surface *templ, const struct pipe_resource *pres)
+{
+ unsigned int level = templ->u.tex.level;
+ psurf->context = pctx;
+ psurf->format = templ->format;
+ psurf->width = u_minify(pres->width0, level);
+ assert(psurf->width);
+ psurf->height = u_minify(pres->height0, level);
+ assert(psurf->height);
+ psurf->nr_samples = templ->nr_samples;
+ psurf->u.tex.level = level;
+ psurf->u.tex.first_layer = templ->u.tex.first_layer;
+ psurf->u.tex.last_layer = templ->u.tex.last_layer;
+}
+
+static void
+apply_view_usage_for_format(struct zink_screen *screen, struct zink_resource *res, struct zink_surface *surface, enum pipe_format format, VkImageViewCreateInfo *ivci)
+{
+ VkFormatFeatureFlags feats = res->linear ?
+ screen->format_props[format].linearTilingFeatures :
+ screen->format_props[format].optimalTilingFeatures;
+ VkImageUsageFlags attachment = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT);
+ surface->usage_info.usage = res->obj->vkusage & ~attachment;
+ if (res->obj->modifier_aspect) {
+ feats = res->obj->vkfeats;
+ /* intersect format features for current modifier */
+ for (unsigned i = 0; i < screen->modifier_props[format].drmFormatModifierCount; i++) {
+ if (res->obj->modifier == screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifier)
+ feats &= screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifierTilingFeatures;
+ }
+ }
+ /* if the format features don't support framebuffer attachment, use VkImageViewUsageCreateInfo to remove it */
+ if ((res->obj->vkusage & attachment) &&
+ !(feats & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))) {
+ ivci->pNext = &surface->usage_info;
+ }
+}
+
static struct zink_surface *
create_surface(struct pipe_context *pctx,
struct pipe_resource *pres,
const struct pipe_surface *templ,
- VkImageViewCreateInfo *ivci)
+ VkImageViewCreateInfo *ivci,
+ bool actually)
{
struct zink_screen *screen = zink_screen(pctx->screen);
struct zink_resource *res = zink_resource(pres);
- unsigned int level = templ->u.tex.level;
struct zink_surface *surface = CALLOC_STRUCT(zink_surface);
if (!surface)
return NULL;
+ surface->usage_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO;
+ surface->usage_info.pNext = NULL;
+ apply_view_usage_for_format(screen, res, surface, templ->format, ivci);
+
pipe_resource_reference(&surface->base.texture, pres);
pipe_reference_init(&surface->base.reference, 1);
- surface->base.context = pctx;
- surface->base.format = templ->format;
- surface->base.width = u_minify(pres->width0, level);
- assert(surface->base.width);
- surface->base.height = u_minify(pres->height0, level);
- assert(surface->base.height);
- surface->base.nr_samples = templ->nr_samples;
- surface->base.u.tex.level = level;
- surface->base.u.tex.first_layer = templ->u.tex.first_layer;
- surface->base.u.tex.last_layer = templ->u.tex.last_layer;
+ init_pipe_surface_info(pctx, &surface->base, templ, pres);
surface->obj = zink_resource(pres)->obj;
- util_dynarray_init(&surface->framebuffer_refs, NULL);
- util_dynarray_init(&surface->desc_set_refs.refs, NULL);
- surface->info.flags = res->obj->vkflags;
- surface->info.usage = res->obj->vkusage;
- surface->info.width = surface->base.width;
- surface->info.height = surface->base.height;
- surface->info.layerCount = ivci->subresourceRange.layerCount;
- surface->info.format = ivci->format;
- surface->info_hash = _mesa_hash_data(&surface->info, sizeof(surface->info));
+ init_surface_info(screen, surface, res, ivci);
- if (VKSCR(CreateImageView)(screen->dev, ivci, NULL,
- &surface->image_view) != VK_SUCCESS) {
+ if (!actually)
+ return surface;
+ assert(ivci->image);
+ VkResult result = VKSCR(CreateImageView)(screen->dev, ivci, NULL,
+ &surface->image_view);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: vkCreateImageView failed (%s)", vk_Result_to_str(result));
FREE(surface);
return NULL;
}
@@ -154,7 +219,20 @@ hash_ivci(const void *key)
return _mesa_hash_data((char*)key + offsetof(VkImageViewCreateInfo, flags), sizeof(VkImageViewCreateInfo) - offsetof(VkImageViewCreateInfo, flags));
}
-struct pipe_surface *
+static struct zink_surface *
+do_create_surface(struct pipe_context *pctx, struct pipe_resource *pres, const struct pipe_surface *templ, VkImageViewCreateInfo *ivci, uint32_t hash, bool actually)
+{
+ /* create a new surface */
+ struct zink_surface *surface = create_surface(pctx, pres, templ, ivci, actually);
+ /* only transient surfaces have nr_samples set */
+ surface->base.nr_samples = zink_screen(pctx->screen)->info.have_EXT_multisampled_render_to_single_sampled ? templ->nr_samples : 0;
+ surface->hash = hash;
+ surface->ivci = *ivci;
+ return surface;
+}
+
+/* get a cached surface for a shader descriptor */
+struct zink_surface *
zink_get_surface(struct zink_context *ctx,
struct pipe_resource *pres,
const struct pipe_surface *templ,
@@ -168,10 +246,11 @@ zink_get_surface(struct zink_context *ctx,
struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(&res->surface_cache, hash, ivci);
if (!entry) {
- /* create a new surface */
- surface = create_surface(&ctx->base, pres, templ, ivci);
- surface->hash = hash;
- surface->ivci = *ivci;
+ /* create a new surface, but don't actually create the imageview if mutable isn't set and the format is different;
+ * mutable will be set later and the imageview will be filled in
+ */
+ bool actually = !zink_format_needs_mutable(pres->format, templ->format) || (pres->bind & ZINK_BIND_MUTABLE);
+ surface = do_create_surface(&ctx->base, pres, templ, ivci, hash, actually);
entry = _mesa_hash_table_insert_pre_hashed(&res->surface_cache, hash, &surface->ivci, surface);
if (!entry) {
simple_mtx_unlock(&res->surface_mtx);
@@ -184,60 +263,144 @@ zink_get_surface(struct zink_context *ctx,
p_atomic_inc(&surface->base.reference.count);
}
simple_mtx_unlock(&res->surface_mtx);
- return &surface->base;
+
+ return surface;
}
-static struct pipe_surface *
-wrap_surface(struct pipe_context *pctx, struct pipe_surface *psurf)
+/* wrap a surface for use as a framebuffer attachment
+ * Takes ownership of surface */
+static struct zink_ctx_surface *
+wrap_surface(struct pipe_context *pctx,
+ struct zink_surface *surface,
+ const struct pipe_surface *templ)
{
struct zink_ctx_surface *csurf = CALLOC_STRUCT(zink_ctx_surface);
- csurf->base = *psurf;
+ if (!csurf) {
+ zink_surface_reference (zink_screen(pctx->screen), &surface, NULL);
+ return NULL;
+ }
+
+ csurf->base = *templ;
pipe_reference_init(&csurf->base.reference, 1);
- csurf->surf = (struct zink_surface*)psurf;
+ csurf->surf = surface;
csurf->base.context = pctx;
- return &csurf->base;
+ return csurf;
}
+/* this is the context hook, so only zink_ctx_surfaces will reach it */
+static void
+zink_surface_destroy(struct pipe_context *pctx,
+ struct pipe_surface *psurface)
+{
+ struct zink_ctx_surface *csurf = (struct zink_ctx_surface *)psurface;
+ if (csurf->needs_mutable)
+ /* this has an extra resource ref */
+ pipe_resource_reference(&csurf->base.texture, NULL);
+ zink_surface_reference(zink_screen(pctx->screen), &csurf->surf, NULL);
+ pipe_surface_release(pctx, (struct pipe_surface**)&csurf->transient);
+ FREE(csurf);
+}
+
+/* this the context hook that returns a zink_ctx_surface */
static struct pipe_surface *
zink_create_surface(struct pipe_context *pctx,
struct pipe_resource *pres,
const struct pipe_surface *templ)
{
+ struct zink_resource *res = zink_resource(pres);
+ struct zink_screen *screen = zink_screen(pctx->screen);
+ bool is_array = templ->u.tex.last_layer != templ->u.tex.first_layer;
+ bool needs_mutable = false;
+ enum pipe_texture_target target_2d[] = {PIPE_TEXTURE_2D, PIPE_TEXTURE_2D_ARRAY};
+ if (!res->obj->dt && zink_format_needs_mutable(pres->format, templ->format)) {
+ /* mutable not set by default */
+ needs_mutable = !(res->base.b.bind & ZINK_BIND_MUTABLE);
+ /*
+ VUID-VkImageViewCreateInfo-image-07072
+ If image was created with the VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT flag and
+ format is a non-compressed format, the levelCount and layerCount members of
+ subresourceRange must both be 1
+
+ ...but this is allowed with a maintenance6 property
+ */
+ if (util_format_is_compressed(pres->format) && templ->u.tex.first_layer != templ->u.tex.last_layer &&
+ (!screen->info.have_KHR_maintenance6 || !screen->info.maint6_props.blockTexelViewCompatibleMultipleLayers))
+ return NULL;
+ }
- VkImageViewCreateInfo ivci = create_ivci(zink_screen(pctx->screen),
- zink_resource(pres), templ, pres->target);
- if (pres->target == PIPE_TEXTURE_3D)
- ivci.viewType = VK_IMAGE_VIEW_TYPE_2D;
+ if (!screen->threaded && needs_mutable) {
+ /* this is fine without tc */
+ needs_mutable = false;
+ zink_resource_object_init_mutable(zink_context(pctx), res);
+ }
- return wrap_surface(pctx, zink_get_surface(zink_context(pctx), pres, templ, &ivci));
-}
+ if (!zink_get_format(screen, templ->format))
+ return NULL;
-/* framebuffers are owned by their surfaces, so each time a surface that's part of a cached fb
- * is destroyed, it has to unref all the framebuffers it's attached to in order to avoid leaking
- * all the framebuffers
- *
- * surfaces are always batch-tracked, so it is impossible for a framebuffer to be destroyed
- * while it is in use
- */
-static void
-surface_clear_fb_refs(struct zink_screen *screen, struct pipe_surface *psurface)
-{
- struct zink_surface *surface = zink_surface(psurface);
- util_dynarray_foreach(&surface->framebuffer_refs, struct zink_framebuffer*, fb_ref) {
- struct zink_framebuffer *fb = *fb_ref;
- for (unsigned i = 0; i < fb->state.num_attachments; i++) {
- if (fb->surfaces[i] == psurface) {
- simple_mtx_lock(&screen->framebuffer_mtx);
- fb->surfaces[i] = NULL;
- _mesa_hash_table_remove_key(&screen->framebuffer_cache, &fb->state);
- zink_framebuffer_reference(screen, &fb, NULL);
- simple_mtx_unlock(&screen->framebuffer_mtx);
- break;
- }
+ VkImageViewCreateInfo ivci = create_ivci(screen, res, templ,
+ pres->target == PIPE_TEXTURE_3D ? target_2d[is_array] : pres->target);
+
+ struct zink_surface *surface = NULL;
+ if (res->obj->dt) {
+ /* don't cache swapchain surfaces. that's weird. */
+ surface = do_create_surface(pctx, pres, templ, &ivci, 0, false);
+ if (unlikely(!surface)) {
+ mesa_loge("ZINK: failed do_create_surface!");
+ return NULL;
}
+
+ surface->is_swapchain = true;
+ } else if (!needs_mutable) {
+ surface = zink_get_surface(zink_context(pctx), pres, templ, &ivci);
+ if (unlikely(!surface)) {
+ mesa_loge("ZINK: failed to get non-mutable surface!");
+ return NULL;
+ }
+ }
+
+ struct zink_ctx_surface *csurf = wrap_surface(pctx, surface, needs_mutable ? templ : &surface->base); /* move ownership of surface */
+ if (!unlikely (csurf)) {
+ mesa_loge("ZINK: failed to allocate csurf!");
+ return NULL;
+ }
+
+ csurf->needs_mutable = needs_mutable;
+ if (needs_mutable) {
+ pipe_resource_reference(&csurf->base.texture, pres);
+ init_pipe_surface_info(pctx, &csurf->base, templ, pres);
}
- util_dynarray_fini(&surface->framebuffer_refs);
+
+ if (templ->nr_samples && !screen->info.have_EXT_multisampled_render_to_single_sampled) {
+ /* transient fb attachment: not cached */
+ struct pipe_resource rtempl = *pres;
+ rtempl.nr_samples = templ->nr_samples;
+ rtempl.bind |= ZINK_BIND_TRANSIENT;
+ struct zink_resource *transient = zink_resource(pctx->screen->resource_create(pctx->screen, &rtempl));
+ if (unlikely(!transient)) {
+ mesa_loge("ZINK: failed to create transient resource!");
+ goto fail;
+ }
+
+ ivci.image = transient->obj->image;
+ struct zink_surface *tsurf = create_surface(pctx, &transient->base.b, templ, &ivci, true);
+ pipe_resource_reference((struct pipe_resource**)&transient, NULL);
+ if (unlikely(!tsurf)) {
+ mesa_loge("ZINK: failed to create transient surface!");
+ goto fail;
+ }
+
+ csurf->transient = wrap_surface(pctx, tsurf, &tsurf->base); /* move ownership of tsurf */
+ if (unlikely(!csurf->transient)) {
+ mesa_loge("ZINK: failed to wrap transient surface!");
+ goto fail;
+ }
+ }
+
+ return &csurf->base;
+fail:
+ zink_surface_destroy(pctx, &csurf->base);
+ return NULL;
}
void
@@ -245,55 +408,53 @@ zink_destroy_surface(struct zink_screen *screen, struct pipe_surface *psurface)
{
struct zink_surface *surface = zink_surface(psurface);
struct zink_resource *res = zink_resource(psurface->texture);
- simple_mtx_lock(&res->surface_mtx);
- struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->surface_cache, surface->hash, &surface->ivci);
- assert(he);
- assert(he->data == surface);
- _mesa_hash_table_remove(&res->surface_cache, he);
- simple_mtx_unlock(&res->surface_mtx);
- if (!screen->info.have_KHR_imageless_framebuffer)
- surface_clear_fb_refs(screen, psurface);
- zink_descriptor_set_refs_clear(&surface->desc_set_refs, surface);
- util_dynarray_fini(&surface->framebuffer_refs);
+ if ((!psurface->nr_samples || screen->info.have_EXT_multisampled_render_to_single_sampled) && !surface->is_swapchain) {
+ simple_mtx_lock(&res->surface_mtx);
+ if (psurface->reference.count) {
+ /* a different context got a cache hit during deletion: this surface is alive again */
+ simple_mtx_unlock(&res->surface_mtx);
+ return;
+ }
+ struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->surface_cache, surface->hash, &surface->ivci);
+ assert(he);
+ assert(he->data == surface);
+ _mesa_hash_table_remove(&res->surface_cache, he);
+ simple_mtx_unlock(&res->surface_mtx);
+ }
+ /* this surface is dead now */
+ simple_mtx_lock(&res->obj->view_lock);
+ /* imageviews are never destroyed directly to ensure lifetimes for in-use surfaces */
+ if (surface->is_swapchain) {
+ for (unsigned i = 0; i < surface->swapchain_size; i++)
+ util_dynarray_append(&res->obj->views, VkImageView, surface->swapchain[i]);
+ free(surface->swapchain);
+ } else
+ util_dynarray_append(&res->obj->views, VkImageView, surface->image_view);
+ simple_mtx_unlock(&res->obj->view_lock);
pipe_resource_reference(&psurface->texture, NULL);
- if (surface->simage_view)
- VKSCR(DestroyImageView)(screen->dev, surface->simage_view, NULL);
- VKSCR(DestroyImageView)(screen->dev, surface->image_view, NULL);
FREE(surface);
}
-static void
-zink_surface_destroy(struct pipe_context *pctx,
- struct pipe_surface *psurface)
-{
- struct zink_ctx_surface *csurf = (struct zink_ctx_surface *)psurface;
- zink_surface_reference(zink_screen(pctx->screen), &csurf->surf, NULL);
- FREE(csurf);
-}
-
+/* this is called when a surface is rebound for mutable/storage use */
bool
zink_rebind_surface(struct zink_context *ctx, struct pipe_surface **psurface)
{
struct zink_surface *surface = zink_surface(*psurface);
struct zink_resource *res = zink_resource((*psurface)->texture);
struct zink_screen *screen = zink_screen(ctx->base.screen);
- if (surface->simage_view)
+ if (surface->obj == res->obj)
return false;
- VkImageViewCreateInfo ivci = create_ivci(screen,
- zink_resource((*psurface)->texture), (*psurface), surface->base.texture->target);
+ assert(!res->obj->dt);
+ VkImageViewCreateInfo ivci = surface->ivci;
+ ivci.image = res->obj->image;
uint32_t hash = hash_ivci(&ivci);
simple_mtx_lock(&res->surface_mtx);
struct hash_entry *new_entry = _mesa_hash_table_search_pre_hashed(&res->surface_cache, hash, &ivci);
- if (zink_batch_usage_exists(surface->batch_uses))
- zink_batch_reference_surface(&ctx->batch, surface);
- surface_clear_fb_refs(screen, *psurface);
- zink_descriptor_set_refs_clear(&surface->desc_set_refs, surface);
if (new_entry) {
/* reuse existing surface; old one will be cleaned up naturally */
struct zink_surface *new_surface = new_entry->data;
simple_mtx_unlock(&res->surface_mtx);
- zink_batch_usage_set(&new_surface->batch_uses, ctx->batch.state);
zink_surface_reference(screen, (struct zink_surface**)psurface, new_surface);
return true;
}
@@ -301,8 +462,10 @@ zink_rebind_surface(struct zink_context *ctx, struct pipe_surface **psurface)
assert(entry);
_mesa_hash_table_remove(&res->surface_cache, entry);
VkImageView image_view;
- if (VKSCR(CreateImageView)(screen->dev, &ivci, NULL, &image_view) != VK_SUCCESS) {
- debug_printf("zink: failed to create new imageview");
+ apply_view_usage_for_format(screen, res, surface, surface->base.format, &ivci);
+ VkResult result = VKSCR(CreateImageView)(screen->dev, &ivci, NULL, &image_view);
+ if (result != VK_SUCCESS) {
+ mesa_loge("ZINK: failed to create new imageview (%s)", vk_Result_to_str(result));
simple_mtx_unlock(&res->surface_mtx);
return false;
}
@@ -310,18 +473,19 @@ zink_rebind_surface(struct zink_context *ctx, struct pipe_surface **psurface)
surface->ivci = ivci;
entry = _mesa_hash_table_insert_pre_hashed(&res->surface_cache, surface->hash, &surface->ivci, surface);
assert(entry);
- surface->simage_view = surface->image_view;
+ simple_mtx_lock(&res->obj->view_lock);
+ util_dynarray_append(&res->obj->views, VkImageView, surface->image_view);
+ simple_mtx_unlock(&res->obj->view_lock);
surface->image_view = image_view;
surface->obj = zink_resource(surface->base.texture)->obj;
/* update for imageless fb */
surface->info.flags = res->obj->vkflags;
surface->info.usage = res->obj->vkusage;
- surface->info_hash = _mesa_hash_data(&surface->info, sizeof(surface->info));
- zink_batch_usage_set(&surface->batch_uses, ctx->batch.state);
simple_mtx_unlock(&res->surface_mtx);
return true;
}
+/* dummy surfaces are used for null framebuffer/descriptors */
struct pipe_surface *
zink_surface_create_null(struct zink_context *ctx, enum pipe_texture_target target, unsigned width, unsigned height, unsigned samples)
{
@@ -332,16 +496,18 @@ zink_surface_create_null(struct zink_context *ctx, enum pipe_texture_target targ
templ.width0 = width;
templ.height0 = height;
templ.depth0 = 1;
- templ.format = PIPE_FORMAT_R8_UINT;
+ templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
templ.target = target;
- templ.bind = PIPE_BIND_RENDER_TARGET;
+ templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+ if (samples < 2)
+ templ.bind |= PIPE_BIND_SHADER_IMAGE;
templ.nr_samples = samples;
pres = ctx->base.screen->resource_create(ctx->base.screen, &templ);
if (!pres)
return NULL;
- surf_templ.format = PIPE_FORMAT_R8_UINT;
+ surf_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
surf_templ.nr_samples = 0;
struct pipe_surface *psurf = ctx->base.create_surface(&ctx->base, pres, &surf_templ);
pipe_resource_reference(&pres, NULL);
@@ -354,3 +520,43 @@ zink_context_surface_init(struct pipe_context *context)
context->create_surface = zink_create_surface;
context->surface_destroy = zink_surface_destroy;
}
+
+/* must be called before a swapchain image is used to ensure correct imageview is used */
+void
+zink_surface_swapchain_update(struct zink_context *ctx, struct zink_surface *surface)
+{
+ struct zink_screen *screen = zink_screen(ctx->base.screen);
+ struct zink_resource *res = zink_resource(surface->base.texture);
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ if (!cdt)
+ return; //dead swapchain
+ if (cdt->swapchain != surface->dt_swapchain) {
+ /* new swapchain: clear out previous swapchain imageviews/array and setup a new one;
+ * old views will be pruned normally in zink_batch or on object destruction
+ */
+ simple_mtx_lock(&res->obj->view_lock);
+ for (unsigned i = 0; i < surface->swapchain_size; i++)
+ util_dynarray_append(&res->obj->views, VkImageView, surface->swapchain[i]);
+ simple_mtx_unlock(&res->obj->view_lock);
+ free(surface->swapchain);
+ surface->swapchain_size = cdt->swapchain->num_images;
+ surface->swapchain = calloc(surface->swapchain_size, sizeof(VkImageView));
+ if (!surface->swapchain) {
+ mesa_loge("ZINK: failed to allocate surface->swapchain!");
+ return;
+ }
+ surface->base.width = res->base.b.width0;
+ surface->base.height = res->base.b.height0;
+ init_surface_info(screen, surface, res, &surface->ivci);
+ surface->dt_swapchain = cdt->swapchain;
+ }
+ if (!surface->swapchain[res->obj->dt_idx]) {
+ /* no current swapchain imageview exists: create it */
+ assert(res->obj->image && cdt->swapchain->images[res->obj->dt_idx].image == res->obj->image);
+ surface->ivci.image = res->obj->image;
+ assert(surface->ivci.image);
+ VKSCR(CreateImageView)(screen->dev, &surface->ivci, NULL, &surface->swapchain[res->obj->dt_idx]);
+ }
+ /* the current swapchain imageview is now the view for the current swapchain image */
+ surface->image_view = surface->swapchain[res->obj->dt_idx];
+}
diff --git a/src/gallium/drivers/zink/zink_surface.h b/src/gallium/drivers/zink/zink_surface.h
index 617084d39e8..9207b2e8b89 100644
--- a/src/gallium/drivers/zink/zink_surface.h
+++ b/src/gallium/drivers/zink/zink_surface.h
@@ -24,56 +24,7 @@
#ifndef ZINK_SURFACE_H
#define ZINK_SURFACE_H
-#include "pipe/p_state.h"
-#include "zink_batch.h"
-#include <vulkan/vulkan.h>
-
-struct pipe_context;
-
-struct zink_surface_info {
- VkImageCreateFlags flags;
- VkImageUsageFlags usage;
- uint32_t width;
- uint32_t height;
- uint32_t layerCount;
- VkFormat format;
-};
-
-struct zink_surface {
- struct pipe_surface base;
- VkImageViewCreateInfo ivci;
- struct zink_surface_info info; //TODO: union with fb refs
- uint32_t info_hash;
- VkImageView image_view;
- VkImageView simage_view;//old iview after storage replacement/rebind
- void *obj; //backing resource object
- uint32_t hash;
- struct zink_batch_usage *batch_uses;
- struct util_dynarray framebuffer_refs;
- struct zink_descriptor_refs desc_set_refs;
-};
-
-/* wrapper object that preserves the gallium expectation of having
- * pipe_surface::context match the context used to create the surface
- */
-struct zink_ctx_surface {
- struct pipe_surface base;
- struct zink_surface *surf;
-};
-
-/* use this cast for framebuffer surfaces */
-static inline struct zink_surface *
-zink_csurface(struct pipe_surface *psurface)
-{
- return psurface ? ((struct zink_ctx_surface *)psurface)->surf : NULL;
-}
-
-/* use this cast for internal surfaces */
-static inline struct zink_surface *
-zink_surface(struct pipe_surface *psurface)
-{
- return (struct zink_surface *)psurface;
-}
+#include "zink_types.h"
void
zink_destroy_surface(struct zink_screen *screen, struct pipe_surface *psurface);
@@ -100,12 +51,13 @@ create_ivci(struct zink_screen *screen,
const struct pipe_surface *templ,
enum pipe_texture_target target);
-struct pipe_surface *
+struct zink_surface *
zink_get_surface(struct zink_context *ctx,
struct pipe_resource *pres,
const struct pipe_surface *templ,
VkImageViewCreateInfo *ivci);
+/* cube image types are clamped by gallium rules to 2D or 2D_ARRAY viewtypes if not using all layers */
static inline VkImageViewType
zink_surface_clamp_viewtype(VkImageViewType viewType, unsigned first_layer, unsigned last_layer, unsigned array_size)
{
@@ -113,14 +65,8 @@ zink_surface_clamp_viewtype(VkImageViewType viewType, unsigned first_layer, unsi
if (viewType == VK_IMAGE_VIEW_TYPE_CUBE || viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
if (first_layer == last_layer)
return VK_IMAGE_VIEW_TYPE_2D;
- if (layerCount % 6 == 0) {
- if (viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY && layerCount == 6)
- return VK_IMAGE_VIEW_TYPE_CUBE;
- } else if (first_layer || layerCount != array_size)
+ if (layerCount % 6 != 0 && (first_layer || layerCount != array_size))
return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
- } else if (viewType == VK_IMAGE_VIEW_TYPE_2D_ARRAY) {
- if (first_layer == last_layer)
- return VK_IMAGE_VIEW_TYPE_2D;
}
return viewType;
}
@@ -137,4 +83,7 @@ zink_rebind_ctx_surface(struct zink_context *ctx, struct pipe_surface **psurface
struct pipe_surface *
zink_surface_create_null(struct zink_context *ctx, enum pipe_texture_target target, unsigned width, unsigned height, unsigned samples);
+
+void
+zink_surface_swapchain_update(struct zink_context *ctx, struct zink_surface *surface);
#endif
diff --git a/src/gallium/drivers/zink/zink_synchronization.cpp b/src/gallium/drivers/zink/zink_synchronization.cpp
new file mode 100644
index 00000000000..78fce1e29e9
--- /dev/null
+++ b/src/gallium/drivers/zink/zink_synchronization.cpp
@@ -0,0 +1,794 @@
+/*
+ * Copyright © 2023 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+ */
+
+#include "zink_batch.h"
+#include "zink_context.h"
+#include "zink_descriptors.h"
+#include "zink_resource.h"
+#include "zink_screen.h"
+
+
+static VkAccessFlags
+access_src_flags(VkImageLayout layout)
+{
+ switch (layout) {
+ case VK_IMAGE_LAYOUT_UNDEFINED:
+ return VK_ACCESS_NONE;
+
+ case VK_IMAGE_LAYOUT_GENERAL:
+ return VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+
+ case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+ case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
+ return VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
+
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+ return VK_ACCESS_SHADER_READ_BIT;
+
+ case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
+ return VK_ACCESS_TRANSFER_READ_BIT;
+
+ case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+ return VK_ACCESS_TRANSFER_WRITE_BIT;
+
+ case VK_IMAGE_LAYOUT_PREINITIALIZED:
+ return VK_ACCESS_HOST_WRITE_BIT;
+
+ case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
+ return VK_ACCESS_NONE;
+
+ default:
+ unreachable("unexpected layout");
+ }
+}
+
+static VkAccessFlags
+access_dst_flags(VkImageLayout layout)
+{
+ switch (layout) {
+ case VK_IMAGE_LAYOUT_UNDEFINED:
+ return VK_ACCESS_NONE;
+
+ case VK_IMAGE_LAYOUT_GENERAL:
+ return VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+
+ case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+ case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
+ return VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+
+ case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+ return VK_ACCESS_SHADER_READ_BIT;
+
+ case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
+ return VK_ACCESS_TRANSFER_READ_BIT;
+
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ return VK_ACCESS_SHADER_READ_BIT;
+
+ case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+ return VK_ACCESS_TRANSFER_WRITE_BIT;
+
+ case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
+ return VK_ACCESS_NONE;
+
+ default:
+ unreachable("unexpected layout");
+ }
+}
+
+static VkPipelineStageFlags
+pipeline_dst_stage(VkImageLayout layout)
+{
+ switch (layout) {
+ case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+ return VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ return VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+
+ case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
+ return VK_PIPELINE_STAGE_TRANSFER_BIT;
+ case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+ return VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+ case VK_IMAGE_LAYOUT_GENERAL:
+ return VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+
+ case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+
+ default:
+ return VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+ }
+}
+
+#define ALL_READ_ACCESS_FLAGS \
+ (VK_ACCESS_INDIRECT_COMMAND_READ_BIT | \
+ VK_ACCESS_INDEX_READ_BIT | \
+ VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | \
+ VK_ACCESS_UNIFORM_READ_BIT | \
+ VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | \
+ VK_ACCESS_SHADER_READ_BIT | \
+ VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | \
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | \
+ VK_ACCESS_TRANSFER_READ_BIT |\
+ VK_ACCESS_HOST_READ_BIT |\
+ VK_ACCESS_MEMORY_READ_BIT |\
+ VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |\
+ VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT |\
+ VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT |\
+ VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR |\
+ VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR |\
+ VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT |\
+ VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_NV |\
+ VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR |\
+ VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR)
+
+
+bool
+zink_resource_access_is_write(VkAccessFlags flags)
+{
+ return (flags & ~ALL_READ_ACCESS_FLAGS) > 0;
+}
+
+static bool
+zink_resource_image_needs_barrier(struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline)
+{
+ return res->layout != new_layout || (res->obj->access_stage & pipeline) != pipeline ||
+ (res->obj->access & flags) != flags ||
+ zink_resource_access_is_write(res->obj->access) ||
+ zink_resource_access_is_write(flags);
+}
+
+void
+zink_resource_image_barrier_init(VkImageMemoryBarrier *imb, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline)
+{
+ if (!pipeline)
+ pipeline = pipeline_dst_stage(new_layout);
+ if (!flags)
+ flags = access_dst_flags(new_layout);
+
+ VkImageSubresourceRange isr = {
+ res->aspect,
+ 0, VK_REMAINING_MIP_LEVELS,
+ 0, VK_REMAINING_ARRAY_LAYERS
+ };
+ *imb = VkImageMemoryBarrier {
+ VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ NULL,
+ res->obj->access ? res->obj->access : access_src_flags(res->layout),
+ flags,
+ res->layout,
+ new_layout,
+ VK_QUEUE_FAMILY_IGNORED,
+ VK_QUEUE_FAMILY_IGNORED,
+ res->obj->image,
+ isr
+ };
+}
+
+void
+zink_resource_image_barrier2_init(VkImageMemoryBarrier2 *imb, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline)
+{
+ if (!pipeline)
+ pipeline = pipeline_dst_stage(new_layout);
+ if (!flags)
+ flags = access_dst_flags(new_layout);
+
+ VkImageSubresourceRange isr = {
+ res->aspect,
+ 0, VK_REMAINING_MIP_LEVELS,
+ 0, VK_REMAINING_ARRAY_LAYERS
+ };
+ *imb = VkImageMemoryBarrier2 {
+ VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ NULL,
+ res->obj->access_stage ? res->obj->access_stage : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ res->obj->access ? res->obj->access : access_src_flags(res->layout),
+ pipeline,
+ flags,
+ res->layout,
+ new_layout,
+ VK_QUEUE_FAMILY_IGNORED,
+ VK_QUEUE_FAMILY_IGNORED,
+ res->obj->image,
+ isr
+ };
+}
+
+static inline bool
+is_shader_pipline_stage(VkPipelineStageFlags pipeline)
+{
+ return pipeline & GFX_SHADER_BITS;
+}
+
+static void
+resource_check_defer_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkPipelineStageFlags pipeline)
+{
+ assert(res->obj->is_buffer);
+ if (res->bind_count[0] - res->so_bind_count > 0) {
+ if ((res->vbo_bind_mask && !(pipeline & VK_PIPELINE_STAGE_VERTEX_INPUT_BIT)) ||
+ (util_bitcount(res->vbo_bind_mask) != res->bind_count[0] && !is_shader_pipline_stage(pipeline)))
+ /* gfx rebind */
+ _mesa_set_add(ctx->need_barriers[0], res);
+ }
+ if (res->bind_count[1] && !(pipeline & VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT))
+ /* compute rebind */
+ _mesa_set_add(ctx->need_barriers[1], res);
+}
+
+static inline bool
+unordered_res_exec(const struct zink_context *ctx, const struct zink_resource *res, bool is_write)
+{
+ /* if all usage is unordered, keep unordered */
+ if (res->obj->unordered_read && res->obj->unordered_write)
+ return true;
+ /* if testing write access but have any ordered read access, cannot promote */
+ if (is_write && zink_batch_usage_matches(res->obj->bo->reads.u, ctx->batch.state) && !res->obj->unordered_read)
+ return false;
+ /* if write access is unordered or nonexistent, always promote */
+ return res->obj->unordered_write || !zink_batch_usage_matches(res->obj->bo->writes.u, ctx->batch.state);
+}
+
+static ALWAYS_INLINE bool
+check_unordered_exec(struct zink_context *ctx, struct zink_resource *res, bool is_write)
+{
+ if (res) {
+ if (!res->obj->is_buffer) {
+ /* TODO: figure out how to link up unordered layout -> ordered layout and delete this conditionals */
+ if (zink_resource_usage_is_unflushed(res) && !res->obj->unordered_read && !res->obj->unordered_write)
+ return false;
+ }
+ return unordered_res_exec(ctx, res, is_write);
+ }
+ return true;
+}
+
+VkCommandBuffer
+zink_get_cmdbuf(struct zink_context *ctx, struct zink_resource *src, struct zink_resource *dst)
+{
+ bool unordered_exec = !ctx->no_reorder;
+
+ unordered_exec &= check_unordered_exec(ctx, src, false) &&
+ check_unordered_exec(ctx, dst, true);
+
+ if (src)
+ src->obj->unordered_read = unordered_exec;
+ if (dst)
+ dst->obj->unordered_write = unordered_exec;
+
+ if (!unordered_exec || ctx->unordered_blitting)
+ zink_batch_no_rp(ctx);
+
+ if (unordered_exec) {
+ ctx->batch.state->has_barriers = true;
+ ctx->batch.has_work = true;
+ return ctx->batch.state->reordered_cmdbuf;
+ }
+ return ctx->batch.state->cmdbuf;
+}
+
+static void
+resource_check_defer_image_barrier(struct zink_context *ctx, struct zink_resource *res, VkImageLayout layout, VkPipelineStageFlags pipeline)
+{
+ assert(!res->obj->is_buffer);
+ assert(!ctx->blitting);
+
+ bool is_compute = pipeline == VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ /* if this is a non-shader barrier and there are binds, always queue a shader barrier */
+ bool is_shader = is_shader_pipline_stage(pipeline);
+ if ((is_shader || !res->bind_count[is_compute]) &&
+ /* if no layout change is needed between gfx and compute, do nothing */
+ !res->bind_count[!is_compute] && (!is_compute || !res->fb_bind_count))
+ return;
+
+ if (res->bind_count[!is_compute] && is_shader) {
+ /* if the layout is the same between gfx and compute, do nothing */
+ if (layout == zink_descriptor_util_image_layout_eval(ctx, res, !is_compute))
+ return;
+ }
+ /* queue a layout change if a layout change will be needed */
+ if (res->bind_count[!is_compute])
+ _mesa_set_add(ctx->need_barriers[!is_compute], res);
+ /* also queue a layout change if this is a non-shader layout */
+ if (res->bind_count[is_compute] && !is_shader)
+ _mesa_set_add(ctx->need_barriers[is_compute], res);
+}
+
+enum barrier_type {
+ barrier_default,
+ barrier_KHR_synchronzation2
+};
+
+template <barrier_type BARRIER_API>
+struct emit_memory_barrier {
+ static void for_image(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout,
+ VkAccessFlags flags, VkPipelineStageFlags pipeline, bool completed, VkCommandBuffer cmdbuf,
+ bool *queue_import)
+ {
+ VkImageMemoryBarrier imb;
+ zink_resource_image_barrier_init(&imb, res, new_layout, flags, pipeline);
+ if (!res->obj->access_stage || completed)
+ imb.srcAccessMask = 0;
+ if (res->obj->needs_zs_evaluate)
+ imb.pNext = &res->obj->zs_evaluate;
+ res->obj->needs_zs_evaluate = false;
+ if (res->queue != zink_screen(ctx->base.screen)->gfx_queue && res->queue != VK_QUEUE_FAMILY_IGNORED) {
+ imb.srcQueueFamilyIndex = res->queue;
+ imb.dstQueueFamilyIndex = zink_screen(ctx->base.screen)->gfx_queue;
+ res->queue = VK_QUEUE_FAMILY_IGNORED;
+ *queue_import = true;
+ }
+ VKCTX(CmdPipelineBarrier)(
+ cmdbuf,
+ res->obj->access_stage ? res->obj->access_stage : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ pipeline,
+ 0,
+ 0, NULL,
+ 0, NULL,
+ 1, &imb
+ );
+ }
+
+ static void for_buffer(struct zink_context *ctx, struct zink_resource *res,
+ VkPipelineStageFlags pipeline,
+ VkAccessFlags flags,
+ bool unordered,
+ bool usage_matches,
+ VkPipelineStageFlags stages,
+ VkCommandBuffer cmdbuf)
+ {
+ VkMemoryBarrier bmb;
+ bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ bmb.pNext = NULL;
+ if (unordered) {
+ stages = usage_matches ? res->obj->unordered_access_stage : stages;
+ bmb.srcAccessMask = usage_matches ? res->obj->unordered_access : res->obj->access;
+ } else {
+ bmb.srcAccessMask = res->obj->access;
+ }
+ bmb.dstAccessMask = flags;
+ VKCTX(CmdPipelineBarrier)(
+ cmdbuf,
+ stages,
+ pipeline,
+ 0,
+ 1, &bmb,
+ 0, NULL,
+ 0, NULL);
+ }
+};
+
+
+template <>
+struct emit_memory_barrier<barrier_KHR_synchronzation2> {
+ static void for_image(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout,
+ VkAccessFlags flags, VkPipelineStageFlags pipeline, bool completed, VkCommandBuffer cmdbuf,
+ bool *queue_import)
+ {
+ VkImageMemoryBarrier2 imb;
+ zink_resource_image_barrier2_init(&imb, res, new_layout, flags, pipeline);
+ if (!res->obj->access_stage || completed)
+ imb.srcAccessMask = 0;
+ if (res->obj->needs_zs_evaluate)
+ imb.pNext = &res->obj->zs_evaluate;
+ res->obj->needs_zs_evaluate = false;
+ if (res->queue != zink_screen(ctx->base.screen)->gfx_queue && res->queue != VK_QUEUE_FAMILY_IGNORED) {
+ imb.srcQueueFamilyIndex = res->queue;
+ imb.dstQueueFamilyIndex = zink_screen(ctx->base.screen)->gfx_queue;
+ res->queue = VK_QUEUE_FAMILY_IGNORED;
+ *queue_import = true;
+ }
+ VkDependencyInfo dep = {
+ VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ NULL,
+ 0,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ 1,
+ &imb
+ };
+ VKCTX(CmdPipelineBarrier2)(cmdbuf, &dep);
+ }
+
+ static void for_buffer(struct zink_context *ctx, struct zink_resource *res,
+ VkPipelineStageFlags pipeline,
+ VkAccessFlags flags,
+ bool unordered,
+ bool usage_matches,
+ VkPipelineStageFlags stages,
+ VkCommandBuffer cmdbuf)
+ {
+ VkMemoryBarrier2 bmb;
+ bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2;
+ bmb.pNext = NULL;
+ if (unordered) {
+ bmb.srcStageMask = usage_matches ? res->obj->unordered_access_stage : stages;
+ bmb.srcAccessMask = usage_matches ? res->obj->unordered_access : res->obj->access;
+ } else {
+ bmb.srcStageMask = stages;
+ bmb.srcAccessMask = res->obj->access;
+ }
+ bmb.dstStageMask = pipeline;
+ bmb.dstAccessMask = flags;
+ VkDependencyInfo dep = {
+ VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ NULL,
+ 0,
+ 1,
+ &bmb,
+ 0,
+ NULL,
+ 0,
+ NULL
+ };
+ VKCTX(CmdPipelineBarrier2)(cmdbuf, &dep);
+ }
+};
+
+template <bool UNSYNCHRONIZED>
+struct update_unordered_access_and_get_cmdbuf {
+ /* use base template to make the cases for true and false more explicite below */
+};
+
+template <>
+struct update_unordered_access_and_get_cmdbuf<true> {
+ static VkCommandBuffer apply(struct zink_context *ctx, struct zink_resource *res, bool usage_matches, bool is_write)
+ {
+ assert(!usage_matches);
+ res->obj->unordered_write = true;
+ res->obj->unordered_read = true;
+ ctx->batch.state->has_unsync = true;
+ return ctx->batch.state->unsynchronized_cmdbuf;
+ }
+};
+
+template <>
+struct update_unordered_access_and_get_cmdbuf<false> {
+ static VkCommandBuffer apply(struct zink_context *ctx, struct zink_resource *res, bool usage_matches, bool is_write)
+ {
+ VkCommandBuffer cmdbuf;
+ if (!usage_matches) {
+ res->obj->unordered_write = true;
+ if (is_write || zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, ZINK_RESOURCE_ACCESS_RW))
+ res->obj->unordered_read = true;
+ }
+ if (zink_resource_usage_matches(res, ctx->batch.state) && !ctx->unordered_blitting &&
+ /* if current batch usage exists with ordered non-transfer access, never promote
+ * this avoids layout dsync
+ */
+ (!res->obj->unordered_read || !res->obj->unordered_write)) {
+ cmdbuf = ctx->batch.state->cmdbuf;
+ res->obj->unordered_write = false;
+ res->obj->unordered_read = false;
+ /* it's impossible to detect this from the caller
+ * there should be no valid case where this barrier can occur inside a renderpass
+ */
+ zink_batch_no_rp(ctx);
+ } else {
+ cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL);
+ /* force subsequent barriers to be ordered to avoid layout desync */
+ if (cmdbuf != ctx->batch.state->reordered_cmdbuf) {
+ res->obj->unordered_write = false;
+ res->obj->unordered_read = false;
+ }
+ }
+ return cmdbuf;
+ }
+};
+
+template <barrier_type BARRIER_API, bool UNSYNCHRONIZED>
+void
+zink_resource_image_barrier(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline)
+{
+ if (!pipeline)
+ pipeline = pipeline_dst_stage(new_layout);
+ if (!flags)
+ flags = access_dst_flags(new_layout);
+
+ bool is_write = zink_resource_access_is_write(flags);
+ if (is_write && zink_is_swapchain(res))
+ zink_kopper_set_readback_needs_update(res);
+ if (!res->obj->needs_zs_evaluate && !zink_resource_image_needs_barrier(res, new_layout, flags, pipeline) &&
+ (res->queue == zink_screen(ctx->base.screen)->gfx_queue || res->queue == VK_QUEUE_FAMILY_IGNORED))
+ return;
+ enum zink_resource_access rw = is_write ? ZINK_RESOURCE_ACCESS_RW : ZINK_RESOURCE_ACCESS_WRITE;
+ bool completed = zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw);
+ bool usage_matches = !completed && zink_resource_usage_matches(res, ctx->batch.state);
+ VkCommandBuffer cmdbuf = update_unordered_access_and_get_cmdbuf<UNSYNCHRONIZED>::apply(ctx, res, usage_matches, is_write);
+
+ assert(new_layout);
+ bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "image_barrier(%s->%s)", vk_ImageLayout_to_str(res->layout), vk_ImageLayout_to_str(new_layout));
+ bool queue_import = false;
+ emit_memory_barrier<BARRIER_API>::for_image(ctx, res, new_layout, flags, pipeline, completed, cmdbuf, &queue_import);
+ zink_cmd_debug_marker_end(ctx, cmdbuf, marker);
+
+ if (!UNSYNCHRONIZED)
+ resource_check_defer_image_barrier(ctx, res, new_layout, pipeline);
+
+ if (is_write)
+ res->obj->last_write = flags;
+
+ res->obj->access = flags;
+ res->obj->access_stage = pipeline;
+ res->layout = new_layout;
+
+ if (new_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
+ zink_resource_copies_reset(res);
+
+ if (res->obj->exportable)
+ simple_mtx_lock(&ctx->batch.state->exportable_lock);
+ if (res->obj->dt) {
+ struct kopper_displaytarget *cdt = res->obj->dt;
+ if (cdt->swapchain->num_acquires && res->obj->dt_idx != UINT32_MAX) {
+ cdt->swapchain->images[res->obj->dt_idx].layout = res->layout;
+ }
+ } else if (res->obj->exportable) {
+ struct pipe_resource *pres = NULL;
+ bool found = false;
+ _mesa_set_search_or_add(&ctx->batch.state->dmabuf_exports, res, &found);
+ if (!found) {
+ pipe_resource_reference(&pres, &res->base.b);
+ }
+ }
+ if (res->obj->exportable && queue_import) {
+ for (struct zink_resource *r = res; r; r = zink_resource(r->base.b.next)) {
+ VkSemaphore sem = zink_screen_export_dmabuf_semaphore(zink_screen(ctx->base.screen), r);
+ if (sem)
+ util_dynarray_append(&ctx->batch.state->fd_wait_semaphores, VkSemaphore, sem);
+ }
+ }
+ if (res->obj->exportable)
+ simple_mtx_unlock(&ctx->batch.state->exportable_lock);
+}
+
+bool
+zink_check_unordered_transfer_access(struct zink_resource *res, unsigned level, const struct pipe_box *box)
+{
+ /* always barrier against previous non-transfer writes */
+ bool non_transfer_write = res->obj->last_write && res->obj->last_write != VK_ACCESS_TRANSFER_WRITE_BIT;
+ /* must barrier if clobbering a previous write */
+ bool transfer_clobber = res->obj->last_write == VK_ACCESS_TRANSFER_WRITE_BIT && zink_resource_copy_box_intersects(res, level, box);
+ return non_transfer_write || transfer_clobber;
+}
+
+bool
+zink_check_valid_buffer_src_access(struct zink_context *ctx, struct zink_resource *res, unsigned offset, unsigned size)
+{
+ return res->obj->access && util_ranges_intersect(&res->valid_buffer_range, offset, offset + size) && !unordered_res_exec(ctx, res, false);
+}
+
+void
+zink_resource_image_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box, bool unsync)
+{
+ if (res->obj->copies_need_reset)
+ zink_resource_copies_reset(res);
+ /* skip TRANSFER_DST barrier if no intersection from previous copies */
+ if (res->layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ||
+ zink_screen(ctx->base.screen)->driver_workarounds.broken_cache_semantics ||
+ zink_check_unordered_transfer_access(res, level, box)) {
+ if (unsync)
+ zink_screen(ctx->base.screen)->image_barrier_unsync(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
+ else
+ zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
+ } else {
+ res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
+ res->obj->last_write = VK_ACCESS_TRANSFER_WRITE_BIT;
+ res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ }
+ zink_resource_copy_box_add(ctx, res, level, box);
+}
+
+bool
+zink_resource_buffer_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned offset, unsigned size)
+{
+ if (res->obj->copies_need_reset)
+ zink_resource_copies_reset(res);
+ bool unordered = true;
+ struct pipe_box box;
+ u_box_3d((int)offset, 0, 0, (int)size, 0, 0, &box);
+ bool can_unordered_write = unordered_res_exec(ctx, res, true);
+ /* must barrier if something read the valid buffer range */
+ bool valid_read = (res->obj->access || res->obj->unordered_access) &&
+ util_ranges_intersect(&res->valid_buffer_range, offset, offset + size) && !can_unordered_write;
+ if (valid_read || zink_screen(ctx->base.screen)->driver_workarounds.broken_cache_semantics || zink_check_unordered_transfer_access(res, 0, &box)) {
+ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
+ unordered = res->obj->unordered_write;
+ } else {
+ res->obj->unordered_access = VK_ACCESS_TRANSFER_WRITE_BIT;
+ res->obj->last_write = VK_ACCESS_TRANSFER_WRITE_BIT;
+ res->obj->unordered_access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+ ctx->batch.state->unordered_write_access |= VK_ACCESS_TRANSFER_WRITE_BIT;
+ ctx->batch.state->unordered_write_stages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
+ if (!zink_resource_usage_matches(res, ctx->batch.state)) {
+ res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
+ res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ res->obj->ordered_access_is_copied = true;
+ }
+ }
+ zink_resource_copy_box_add(ctx, res, 0, &box);
+ /* this return value implies that the caller could do an unordered op on this resource */
+ return unordered;
+}
+
+VkPipelineStageFlags
+zink_pipeline_flags_from_stage(VkShaderStageFlagBits stage)
+{
+ switch (stage) {
+ case VK_SHADER_STAGE_VERTEX_BIT:
+ return VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
+ case VK_SHADER_STAGE_FRAGMENT_BIT:
+ return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+ case VK_SHADER_STAGE_GEOMETRY_BIT:
+ return VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
+ case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+ return VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT;
+ case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+ return VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
+ case VK_SHADER_STAGE_COMPUTE_BIT:
+ return VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ default:
+ unreachable("unknown shader stage bit");
+ }
+}
+
+ALWAYS_INLINE static VkPipelineStageFlags
+pipeline_access_stage(VkAccessFlags flags)
+{
+ if (flags & (VK_ACCESS_UNIFORM_READ_BIT |
+ VK_ACCESS_SHADER_READ_BIT |
+ VK_ACCESS_SHADER_WRITE_BIT))
+ return VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
+ VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
+ VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
+ VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ return VK_PIPELINE_STAGE_TRANSFER_BIT;
+}
+
+ALWAYS_INLINE static bool
+buffer_needs_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline, bool unordered)
+{
+ return zink_resource_access_is_write(unordered ? res->obj->unordered_access : res->obj->access) ||
+ zink_resource_access_is_write(flags) ||
+ ((unordered ? res->obj->unordered_access_stage : res->obj->access_stage) & pipeline) != pipeline ||
+ ((unordered ? res->obj->unordered_access : res->obj->access) & flags) != flags;
+}
+
+
+
+template <barrier_type BARRIER_API>
+void
+zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline)
+{
+ if (!pipeline)
+ pipeline = pipeline_access_stage(flags);
+
+ bool is_write = zink_resource_access_is_write(flags);
+ enum zink_resource_access rw = is_write ? ZINK_RESOURCE_ACCESS_RW : ZINK_RESOURCE_ACCESS_WRITE;
+ bool completed = zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw);
+ bool usage_matches = !completed && zink_resource_usage_matches(res, ctx->batch.state);
+ if (!usage_matches) {
+ res->obj->unordered_write = true;
+ if (is_write || zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, ZINK_RESOURCE_ACCESS_RW))
+ res->obj->unordered_read = true;
+ }
+ bool unordered_usage_matches = res->obj->unordered_access && usage_matches;
+ bool unordered = unordered_res_exec(ctx, res, is_write);
+ if (!buffer_needs_barrier(res, flags, pipeline, unordered))
+ return;
+ if (completed) {
+ /* reset access on complete */
+ res->obj->access = VK_ACCESS_NONE;
+ res->obj->access_stage = VK_PIPELINE_STAGE_NONE;
+ res->obj->last_write = VK_ACCESS_NONE;
+ } else if (unordered && unordered_usage_matches && res->obj->ordered_access_is_copied) {
+ /* always reset propagated access to avoid weirdness */
+ res->obj->access = VK_ACCESS_NONE;
+ res->obj->access_stage = VK_PIPELINE_STAGE_NONE;
+ } else if (!unordered && !unordered_usage_matches) {
+ /* reset unordered access on first ordered barrier */
+ res->obj->unordered_access = VK_ACCESS_NONE;
+ res->obj->unordered_access_stage = VK_PIPELINE_STAGE_NONE;
+ }
+ if (!usage_matches) {
+ /* reset unordered on first new cmdbuf barrier */
+ res->obj->unordered_access = VK_ACCESS_NONE;
+ res->obj->unordered_access_stage = VK_PIPELINE_STAGE_NONE;
+ res->obj->ordered_access_is_copied = false;
+ }
+ /* unordered barriers can be skipped when:
+ * - there is no current-batch unordered access AND previous batch usage is not write access
+ * - there is current-batch unordered access AND the unordered access is not write access
+ */
+ bool can_skip_unordered = !unordered ? false : !zink_resource_access_is_write(!unordered_usage_matches ? res->obj->access : res->obj->unordered_access);
+ /* ordered barriers can be skipped if both:
+ * - there is no current access
+ * - there is no current-batch unordered access
+ */
+ bool can_skip_ordered = unordered ? false : (!res->obj->access && !unordered_usage_matches);
+ if (ctx->no_reorder)
+ can_skip_unordered = can_skip_ordered = false;
+
+ if (!can_skip_unordered && !can_skip_ordered) {
+ VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL);
+ bool marker = false;
+ if (unlikely(zink_tracing)) {
+ char buf[4096];
+ zink_string_vkflags_unroll(buf, sizeof(buf), flags, (zink_vkflags_func)vk_AccessFlagBits_to_str);
+ marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "buffer_barrier(%s)", buf);
+ }
+
+ VkPipelineStageFlags stages = res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access);;
+ emit_memory_barrier<BARRIER_API>::for_buffer(ctx, res, pipeline, flags, unordered,usage_matches, stages, cmdbuf);
+
+ zink_cmd_debug_marker_end(ctx, cmdbuf, marker);
+ }
+
+ resource_check_defer_buffer_barrier(ctx, res, pipeline);
+
+ if (is_write)
+ res->obj->last_write = flags;
+ if (unordered) {
+ /* these should get automatically emitted during submission */
+ res->obj->unordered_access = flags;
+ res->obj->unordered_access_stage = pipeline;
+ if (is_write) {
+ ctx->batch.state->unordered_write_access |= flags;
+ ctx->batch.state->unordered_write_stages |= pipeline;
+ }
+ }
+ if (!unordered || !usage_matches || res->obj->ordered_access_is_copied) {
+ res->obj->access = flags;
+ res->obj->access_stage = pipeline;
+ res->obj->ordered_access_is_copied = unordered;
+ }
+ if (pipeline != VK_PIPELINE_STAGE_TRANSFER_BIT && is_write)
+ zink_resource_copies_reset(res);
+}
+
+void
+zink_synchronization_init(struct zink_screen *screen)
+{
+ if (screen->info.have_vulkan13 || screen->info.have_KHR_synchronization2) {
+ screen->buffer_barrier = zink_resource_buffer_barrier<barrier_KHR_synchronzation2>;
+ screen->image_barrier = zink_resource_image_barrier<barrier_KHR_synchronzation2, false>;
+ screen->image_barrier_unsync = zink_resource_image_barrier<barrier_KHR_synchronzation2, true>;
+ } else {
+ screen->buffer_barrier = zink_resource_buffer_barrier<barrier_default>;
+ screen->image_barrier = zink_resource_image_barrier<barrier_default, false>;
+ screen->image_barrier_unsync = zink_resource_image_barrier<barrier_default, true>;
+ }
+}
diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h
new file mode 100644
index 00000000000..403f600c793
--- /dev/null
+++ b/src/gallium/drivers/zink/zink_types.h
@@ -0,0 +1,2068 @@
+/*
+ * Copyright © 2022 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+ */
+
+#ifndef ZINK_TYPES_H
+#define ZINK_TYPES_H
+
+#include <vulkan/vulkan_core.h>
+
+#include "compiler/nir/nir.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipebuffer/pb_cache.h"
+#include "pipebuffer/pb_slab.h"
+
+#include "util/disk_cache.h"
+#include "util/hash_table.h"
+#include "util/list.h"
+#include "util/log.h"
+#include "util/rwlock.h"
+#include "util/set.h"
+#include "util/simple_mtx.h"
+#include "util/slab.h"
+#include "util/u_dynarray.h"
+#include "util/u_idalloc.h"
+#include "util/u_live_shader_cache.h"
+#include "util/u_queue.h"
+#include "util/u_range.h"
+#include "util/u_threaded_context.h"
+#include "util/u_transfer.h"
+#include "util/u_vertex_state_cache.h"
+
+#include "vk_util.h"
+
+#include "zink_device_info.h"
+#include "zink_instance.h"
+#include "zink_shader_keys.h"
+#include "vk_dispatch_table.h"
+
+#ifdef HAVE_RENDERDOC_APP_H
+#include "renderdoc_app.h"
+#endif
+
+/* the descriptor binding id for fbfetch/input attachment */
+#define ZINK_FBFETCH_BINDING 5
+#define ZINK_GFX_SHADER_COUNT 5
+
+/* number of descriptors to allocate in a pool */
+#define MAX_LAZY_DESCRIPTORS 500
+/* explicit clamping because descriptor caching used to exist */
+#define ZINK_MAX_SHADER_IMAGES 32
+/* total number of bindless ids that can be allocated */
+#define ZINK_MAX_BINDLESS_HANDLES 1024
+
+/* enum zink_descriptor_type */
+#define ZINK_MAX_DESCRIPTOR_SETS 6
+#define ZINK_MAX_DESCRIPTORS_PER_TYPE (32 * ZINK_GFX_SHADER_COUNT)
+/* Descriptor size reported by lavapipe. */
+#define ZINK_FBFETCH_DESCRIPTOR_SIZE 280
+
+/* suballocator defines */
+#define NUM_SLAB_ALLOCATORS 3
+#define MIN_SLAB_ORDER 8
+
+
+/* this is the spec minimum */
+#define ZINK_SPARSE_BUFFER_PAGE_SIZE (64 * 1024)
+
+/* flag to create screen->copy_context */
+#define ZINK_CONTEXT_COPY_ONLY (1<<30)
+
+/* convenience macros for accessing dispatch table functions */
+#define VKCTX(fn) zink_screen(ctx->base.screen)->vk.fn
+#define VKSCR(fn) screen->vk.fn
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern uint32_t zink_debug;
+extern bool zink_tracing;
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** enums */
+
+/* features for draw/program templates */
+typedef enum {
+ ZINK_NO_MULTIDRAW,
+ ZINK_MULTIDRAW,
+} zink_multidraw;
+
+typedef enum {
+ ZINK_NO_DYNAMIC_STATE,
+ ZINK_DYNAMIC_STATE,
+ ZINK_DYNAMIC_STATE2,
+ ZINK_DYNAMIC_VERTEX_INPUT2,
+ ZINK_DYNAMIC_STATE3,
+ ZINK_DYNAMIC_VERTEX_INPUT,
+} zink_dynamic_state;
+
+typedef enum {
+ ZINK_PIPELINE_NO_DYNAMIC_STATE,
+ ZINK_PIPELINE_DYNAMIC_STATE,
+ ZINK_PIPELINE_DYNAMIC_STATE2,
+ ZINK_PIPELINE_DYNAMIC_STATE2_PCP,
+ ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT2,
+ ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT2_PCP,
+ ZINK_PIPELINE_DYNAMIC_STATE3,
+ ZINK_PIPELINE_DYNAMIC_STATE3_PCP,
+ ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT,
+ ZINK_PIPELINE_DYNAMIC_VERTEX_INPUT_PCP,
+} zink_pipeline_dynamic_state;
+
+enum zink_blit_flags {
+ ZINK_BLIT_NORMAL = 1 << 0,
+ ZINK_BLIT_SAVE_FS = 1 << 1,
+ ZINK_BLIT_SAVE_FB = 1 << 2,
+ ZINK_BLIT_SAVE_TEXTURES = 1 << 3,
+ ZINK_BLIT_NO_COND_RENDER = 1 << 4,
+ ZINK_BLIT_SAVE_FS_CONST_BUF = 1 << 5,
+};
+
+/* descriptor types; also the ordering of the sets
+ * ...except that ZINK_DESCRIPTOR_BASE_TYPES is actually ZINK_DESCRIPTOR_TYPE_UNIFORMS,
+ * and all base type values are thus +1 to get the set id (using screen->desc_set_id[idx])
+ */
+enum zink_descriptor_type {
+ ZINK_DESCRIPTOR_TYPE_UBO,
+ ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW,
+ ZINK_DESCRIPTOR_TYPE_SSBO,
+ ZINK_DESCRIPTOR_TYPE_IMAGE,
+ ZINK_DESCRIPTOR_BASE_TYPES, /**< the count/iterator for basic descriptor types */
+ ZINK_DESCRIPTOR_BINDLESS,
+ ZINK_DESCRIPTOR_ALL_TYPES,
+ ZINK_DESCRIPTOR_TYPE_UNIFORMS = ZINK_DESCRIPTOR_BASE_TYPES, /**< this is aliased for convenience */
+ ZINK_DESCRIPTOR_NON_BINDLESS_TYPES = ZINK_DESCRIPTOR_BASE_TYPES + 1, /**< for struct sizing */
+};
+
+enum zink_descriptor_mode {
+ ZINK_DESCRIPTOR_MODE_AUTO,
+ ZINK_DESCRIPTOR_MODE_LAZY,
+ ZINK_DESCRIPTOR_MODE_DB,
+};
+
+/* the current mode */
+extern enum zink_descriptor_mode zink_descriptor_mode;
+
+/* indexing for descriptor template management */
+enum zink_descriptor_size_index {
+ ZDS_INDEX_UBO,
+ ZDS_INDEX_COMBINED_SAMPLER,
+ ZDS_INDEX_UNIFORM_TEXELS,
+ ZDS_INDEX_SAMPLER,
+ ZDS_INDEX_STORAGE_BUFFER,
+ ZDS_INDEX_STORAGE_IMAGE,
+ ZDS_INDEX_STORAGE_TEXELS,
+ ZDS_INDEX_MAX,
+};
+
+/* indexing for descriptor template management in COMPACT mode */
+enum zink_descriptor_size_index_compact {
+ ZDS_INDEX_COMP_UBO,
+ ZDS_INDEX_COMP_STORAGE_BUFFER,
+ ZDS_INDEX_COMP_COMBINED_SAMPLER,
+ ZDS_INDEX_COMP_UNIFORM_TEXELS,
+ ZDS_INDEX_COMP_SAMPLER,
+ ZDS_INDEX_COMP_STORAGE_IMAGE,
+ ZDS_INDEX_COMP_STORAGE_TEXELS,
+};
+
+enum zink_resource_access {
+ ZINK_RESOURCE_ACCESS_READ = 1,
+ ZINK_RESOURCE_ACCESS_WRITE = 32,
+ ZINK_RESOURCE_ACCESS_RW = ZINK_RESOURCE_ACCESS_READ | ZINK_RESOURCE_ACCESS_WRITE,
+};
+
+
+/* zink heaps are based off of vulkan memory types, but are not a 1-to-1 mapping to vulkan memory type indices and have no direct relation to vulkan memory heaps*/
+enum zink_heap {
+ ZINK_HEAP_DEVICE_LOCAL,
+ ZINK_HEAP_DEVICE_LOCAL_SPARSE,
+ ZINK_HEAP_DEVICE_LOCAL_LAZY,
+ ZINK_HEAP_DEVICE_LOCAL_VISIBLE,
+ ZINK_HEAP_HOST_VISIBLE_COHERENT,
+ ZINK_HEAP_HOST_VISIBLE_COHERENT_CACHED,
+ ZINK_HEAP_MAX,
+};
+
+enum zink_alloc_flag {
+ ZINK_ALLOC_SPARSE = 1<<0,
+ ZINK_ALLOC_NO_SUBALLOC = 1<<1,
+};
+
+enum zink_debug {
+ ZINK_DEBUG_NIR = (1<<0),
+ ZINK_DEBUG_SPIRV = (1<<1),
+ ZINK_DEBUG_TGSI = (1<<2),
+ ZINK_DEBUG_VALIDATION = (1<<3),
+ ZINK_DEBUG_SYNC = (1<<4),
+ ZINK_DEBUG_COMPACT = (1<<5),
+ ZINK_DEBUG_NOREORDER = (1<<6),
+ ZINK_DEBUG_GPL = (1<<7),
+ ZINK_DEBUG_SHADERDB = (1<<8),
+ ZINK_DEBUG_RP = (1<<9),
+ ZINK_DEBUG_NORP = (1<<10),
+ ZINK_DEBUG_MAP = (1<<11),
+ ZINK_DEBUG_FLUSHSYNC = (1<<12),
+ ZINK_DEBUG_NOSHOBJ = (1<<13),
+ ZINK_DEBUG_OPTIMAL_KEYS = (1<<14),
+ ZINK_DEBUG_NOOPT = (1<<15),
+ ZINK_DEBUG_NOBGC = (1<<16),
+ ZINK_DEBUG_DGC = (1<<17),
+ ZINK_DEBUG_MEM = (1<<18),
+ ZINK_DEBUG_QUIET = (1<<19),
+ ZINK_DEBUG_IOOPT = (1<<20),
+ ZINK_DEBUG_NOPC = (1<<21),
+};
+
+enum zink_pv_emulation_primitive {
+ ZINK_PVE_PRIMITIVE_NONE = 0,
+ ZINK_PVE_PRIMITIVE_SIMPLE = 1,
+ /* when triangle or quad strips are used and the gs outputs triangles */
+ ZINK_PVE_PRIMITIVE_TRISTRIP = 2,
+ ZINK_PVE_PRIMITIVE_FAN = 3,
+};
+
+enum zink_dgc_buffer {
+ ZINK_DGC_VBO,
+ ZINK_DGC_IB,
+ ZINK_DGC_PSO,
+ ZINK_DGC_PUSH,
+ ZINK_DGC_DRAW,
+ ZINK_DGC_MAX,
+};
+
+/** fence types */
+struct tc_unflushed_batch_token;
+
+/* an async fence created for tc */
+struct zink_tc_fence {
+ struct pipe_reference reference;
+ /* enables distinction between tc fence submission and vk queue submission */
+ uint32_t submit_count;
+ /* when the tc fence is signaled for use */
+ struct util_queue_fence ready;
+ struct tc_unflushed_batch_token *tc_token;
+ /* for deferred flushes */
+ struct pipe_context *deferred_ctx;
+ /* multiple tc fences may point to a real fence */
+ struct zink_fence *fence;
+ /* for use with semaphore/imported fences */
+ VkSemaphore sem;
+};
+
+/* a fence is actually a zink_batch_state, but these are split out for logical consistency */
+struct zink_fence {
+ uint64_t batch_id;
+ bool submitted;
+ bool completed;
+ struct util_dynarray mfences;
+};
+
+
+/** state types */
+
+struct zink_vertex_elements_hw_state {
+ uint32_t hash;
+ uint32_t num_bindings, num_attribs;
+ /* VK_EXT_vertex_input_dynamic_state uses different types */
+ union {
+ VkVertexInputAttributeDescription attribs[PIPE_MAX_ATTRIBS];
+ VkVertexInputAttributeDescription2EXT dynattribs[PIPE_MAX_ATTRIBS];
+ };
+ union {
+ struct {
+ VkVertexInputBindingDivisorDescriptionEXT divisors[PIPE_MAX_ATTRIBS];
+ VkVertexInputBindingDescription bindings[PIPE_MAX_ATTRIBS]; // combination of element_state and stride
+ VkDeviceSize strides[PIPE_MAX_ATTRIBS];
+ uint8_t divisors_present;
+ } b;
+ VkVertexInputBindingDescription2EXT dynbindings[PIPE_MAX_ATTRIBS];
+ };
+ uint8_t binding_map[PIPE_MAX_ATTRIBS];
+};
+
+struct zink_vertex_elements_state {
+ /* decomposed attributes read only a single component for format compatibility */
+ bool has_decomposed_attrs;
+ struct {
+ uint32_t binding;
+ VkVertexInputRate inputRate;
+ } bindings[PIPE_MAX_ATTRIBS];
+ uint32_t divisor[PIPE_MAX_ATTRIBS];
+ uint32_t min_stride[PIPE_MAX_ATTRIBS]; //for dynamic_state1
+ uint32_t decomposed_attrs;
+ unsigned decomposed_attrs_size;
+ uint32_t decomposed_attrs_without_w;
+ unsigned decomposed_attrs_without_w_size;
+ struct zink_vertex_elements_hw_state hw_state;
+};
+
+/* for vertex state draws */
+struct zink_vertex_state {
+ struct pipe_vertex_state b;
+ struct zink_vertex_elements_state velems;
+};
+
+struct zink_rasterizer_hw_state {
+ unsigned polygon_mode : 2; //VkPolygonMode
+ unsigned line_mode : 2; //VkLineRasterizationModeEXT
+ unsigned depth_clip:1;
+ unsigned depth_clamp:1;
+ unsigned pv_last:1;
+ unsigned line_stipple_enable:1;
+ unsigned clip_halfz:1;
+};
+
+struct zink_rasterizer_state {
+ struct pipe_rasterizer_state base;
+ bool offset_fill;
+ float offset_units, offset_clamp, offset_scale;
+ float line_width;
+ VkFrontFace front_face;
+ VkCullModeFlags cull_mode;
+ VkLineRasterizationModeEXT dynamic_line_mode;
+ struct zink_rasterizer_hw_state hw_state;
+};
+
+struct zink_blend_state {
+ uint32_t hash;
+ unsigned num_rts;
+ VkPipelineColorBlendAttachmentState attachments[PIPE_MAX_COLOR_BUFS];
+
+ struct {
+ VkBool32 enables[PIPE_MAX_COLOR_BUFS];
+ VkColorBlendEquationEXT eq[PIPE_MAX_COLOR_BUFS];
+ VkColorComponentFlags wrmask[PIPE_MAX_COLOR_BUFS];
+ } ds3;
+
+ VkBool32 logicop_enable;
+ VkLogicOp logicop_func;
+
+ VkBool32 alpha_to_coverage;
+ VkBool32 alpha_to_one;
+
+ uint32_t wrmask;
+ uint8_t enables;
+
+ bool dual_src_blend;
+};
+
+struct zink_depth_stencil_alpha_hw_state {
+ VkBool32 depth_test;
+ VkCompareOp depth_compare_op;
+
+ VkBool32 depth_bounds_test;
+ float min_depth_bounds, max_depth_bounds;
+
+ VkBool32 stencil_test;
+ VkStencilOpState stencil_front;
+ VkStencilOpState stencil_back;
+
+ VkBool32 depth_write;
+};
+
+struct zink_depth_stencil_alpha_state {
+ struct pipe_depth_stencil_alpha_state base;
+ struct zink_depth_stencil_alpha_hw_state hw_state;
+};
+
+
+/** descriptor types */
+
+/* zink_descriptor_layout objects are cached: this is the key for one */
+struct zink_descriptor_layout_key {
+ unsigned num_bindings;
+ VkDescriptorSetLayoutBinding *bindings;
+};
+
+struct zink_descriptor_layout {
+ VkDescriptorSetLayout layout;
+};
+
+/* descriptor pools are cached: zink_descriptor_pool_key::id is the id for a type of pool */
+struct zink_descriptor_pool_key {
+ unsigned use_count;
+ unsigned num_type_sizes;
+ unsigned id;
+ VkDescriptorPoolSize sizes[4];
+ struct zink_descriptor_layout_key *layout;
+};
+
+/* a template used for updating descriptor buffers */
+struct zink_descriptor_template {
+ uint16_t stride; //the stride between mem pointers
+ uint16_t db_size; //the size of the entry in the buffer
+ unsigned count; //the number of descriptors
+ size_t offset; //the offset of the base host pointer to update from
+};
+
+/* ctx->dd; created at context creation */
+struct zink_descriptor_data {
+ bool bindless_bound;
+ bool bindless_init;
+ bool has_fbfetch;
+ bool push_state_changed[2]; //gfx, compute
+ uint8_t state_changed[2]; //gfx, compute
+ struct zink_descriptor_layout_key *push_layout_keys[2]; //gfx, compute
+ struct zink_descriptor_layout *push_dsl[2]; //gfx, compute
+ VkDescriptorUpdateTemplate push_template[2]; //gfx, compute
+
+ struct zink_descriptor_layout *dummy_dsl;
+
+ union {
+ struct {
+ VkDescriptorPool bindless_pool;
+ VkDescriptorSet bindless_set;
+ } t;
+ struct {
+ struct zink_resource *bindless_db;
+ uint8_t *bindless_db_map;
+ struct pipe_transfer *bindless_db_xfer;
+ uint32_t bindless_db_offsets[4];
+ unsigned max_db_size;
+ unsigned size_enlarge_scale;
+ } db;
+ };
+
+ struct zink_program *pg[2]; //gfx, compute
+
+ VkDescriptorUpdateTemplateEntry push_entries[MESA_SHADER_STAGES]; //gfx+fbfetch
+ VkDescriptorUpdateTemplateEntry compute_push_entry;
+
+ /* push descriptor layout size and binding offsets */
+ uint32_t db_size[2]; //gfx, compute
+ uint32_t db_offset[ZINK_GFX_SHADER_COUNT + 1]; //gfx + fbfetch
+ /* compute offset is always 0 */
+};
+
+/* pg->dd; created at program creation */
+struct zink_program_descriptor_data {
+ bool bindless;
+ bool fbfetch;
+ /* bitmask of ubo0 usage for stages */
+ uint8_t push_usage;
+ /* bitmask of which sets are used by the program */
+ uint8_t binding_usage;
+ /* all the pool keys for the program */
+ struct zink_descriptor_pool_key *pool_key[ZINK_DESCRIPTOR_BASE_TYPES]; //push set doesn't need one
+ /* all the layouts for the program */
+ struct zink_descriptor_layout *layouts[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES];
+ /* all the templates for the program */
+ union {
+ VkDescriptorUpdateTemplate templates[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES];
+ struct zink_descriptor_template *db_template[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES];
+ };
+ uint32_t db_size[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; //the total size of the layout
+ uint32_t *db_offset[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; //the offset of each binding in the layout
+};
+
+struct zink_descriptor_pool {
+ /* the current index of 'sets' */
+ unsigned set_idx;
+ /* number of sets allocated */
+ unsigned sets_alloc;
+ VkDescriptorPool pool;
+ /* sets are lazily allocated */
+ VkDescriptorSet sets[MAX_LAZY_DESCRIPTORS];
+};
+
+/* a zink_descriptor_pool_key matches up to this struct */
+struct zink_descriptor_pool_multi {
+ /* for flagging when overflowed pools must be destroyed instead of reused */
+ bool reinit_overflow;
+ /* this flips to split usable overflow from in-use overflow */
+ unsigned overflow_idx;
+ /* zink_descriptor_pool objects that have exceeded MAX_LAZY_DESCRIPTORS sets */
+ struct util_dynarray overflowed_pools[2];
+ /* the current pool; may be null */
+ struct zink_descriptor_pool *pool;
+ /* pool key for convenience */
+ const struct zink_descriptor_pool_key *pool_key;
+};
+
+/* bs->dd; created on batch state creation */
+struct zink_batch_descriptor_data {
+ /* pools have fbfetch initialized */
+ bool has_fbfetch;
+ /* are descriptor buffers bound */
+ bool db_bound;
+ /* real size of 'pools' */
+ unsigned pool_size[ZINK_DESCRIPTOR_BASE_TYPES];
+ /* this array is sized based on the max zink_descriptor_pool_key::id used by the batch; members may be NULL */
+ struct util_dynarray pools[ZINK_DESCRIPTOR_BASE_TYPES];
+ struct zink_descriptor_pool_multi push_pool[2]; //gfx, compute
+ /* the current program (for descriptor updating) */
+ struct zink_program *pg[2]; //gfx, compute
+ /* the current pipeline compatibility id (for pipeline compatibility rules) */
+ uint32_t compat_id[2]; //gfx, compute
+ /* the current set layout */
+ VkDescriptorSetLayout dsl[2][ZINK_DESCRIPTOR_BASE_TYPES]; //gfx, compute
+ union {
+ /* the current set for a given type; used for rebinding if pipeline compat id changes and current set must be rebound */
+ VkDescriptorSet sets[2][ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; //gfx, compute
+ uint64_t cur_db_offset[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; //gfx, compute; the current offset of a descriptor buffer for rebinds
+ };
+ /* mask of push descriptor usage */
+ unsigned push_usage[2]; //gfx, compute
+
+ struct zink_resource *db; //the descriptor buffer for a given type
+ uint8_t *db_map; //the host map for the buffer
+ struct pipe_transfer *db_xfer; //the transfer map for the buffer
+ uint64_t db_offset; //the "next" offset that will be used when the buffer is updated
+};
+
+/** batch types */
+/* zink_batch_usage concepts:
+ * - batch "usage" is an indicator of when and how a BO was accessed
+ * - batch "tracking" is the batch state(s) containing an extra ref for a BO
+ *
+ * - usage prevents a BO from being mapped while it has pending+conflicting access
+ * - usage affects pipeline barrier generation for synchronizing reads and writes
+ * - usage MUST be removed before context destruction to avoid crashing during BO
+ * reclaiming in suballocator
+ *
+ * - tracking prevents a BO from being destroyed early
+ * - tracking enables usage to be pruned
+ *
+ *
+ * tracking is added:
+ * - any time a BO is used in a "one-off" operation (e.g., blit, index buffer, indirect buffer)
+ * - any time a descriptor is unbound
+ * - when a buffer is replaced (IFF: resource is bound as a descriptor or usage previously existed)
+ *
+ * tracking is removed:
+ * - in zink_reset_batch_state()
+ *
+ * usage is added:
+ * - any time a BO is used in a "one-off" operation (e.g., blit, index buffer, indirect buffer)
+ * - any time a descriptor is bound
+ * - any time a descriptor is unbound (IFF: usage previously existed)
+ * - for all bound descriptors on the first draw/dispatch after a flush (zink_update_descriptor_refs)
+ *
+ * usage is removed:
+ * - when tracking is removed (IFF: BO usage == tracking, i.e., this is the last batch that a BO was active on)
+ */
+struct zink_batch_usage {
+ uint32_t usage;
+ /* this is a monotonic int used to disambiguate internal fences from their tc fence references */
+ uint32_t submit_count;
+ cnd_t flush;
+ mtx_t mtx;
+ bool unflushed;
+};
+
+struct zink_bo_usage {
+ uint32_t submit_count;
+ struct zink_batch_usage *u;
+};
+
+struct zink_batch_obj_list {
+ unsigned max_buffers;
+ unsigned num_buffers;
+ struct zink_resource_object **objs;
+};
+
+struct zink_batch_state {
+ struct zink_fence fence;
+ struct zink_batch_state *next;
+
+ struct zink_batch_usage usage;
+ struct zink_context *ctx;
+ VkCommandPool cmdpool;
+ VkCommandBuffer cmdbuf;
+ VkCommandBuffer reordered_cmdbuf;
+ VkCommandPool unsynchronized_cmdpool;
+ VkCommandBuffer unsynchronized_cmdbuf;
+ VkSemaphore signal_semaphore; //external signal semaphore
+ struct util_dynarray signal_semaphores; //external signal semaphores
+ struct util_dynarray wait_semaphores; //external wait semaphores
+ struct util_dynarray wait_semaphore_stages; //external wait semaphores
+ struct util_dynarray fd_wait_semaphores; //dmabuf wait semaphores
+ struct util_dynarray fd_wait_semaphore_stages; //dmabuf wait semaphores
+ struct util_dynarray fences; //zink_tc_fence refs
+
+ VkSemaphore present;
+ struct zink_resource *swapchain;
+ struct util_dynarray acquires;
+ struct util_dynarray acquire_flags;
+
+ struct {
+ struct util_dynarray pipelines;
+ struct util_dynarray layouts;
+ } dgc;
+
+ VkAccessFlags unordered_write_access;
+ VkPipelineStageFlags unordered_write_stages;
+
+ simple_mtx_t exportable_lock;
+
+ struct util_queue_fence flush_completed;
+
+ struct set programs;
+ struct set dmabuf_exports;
+
+#define BUFFER_HASHLIST_SIZE 32768
+ /* buffer_indices_hashlist[hash(bo)] returns -1 if the bo
+ * isn't part of any buffer lists or the index where the bo could be found.
+ * Since 1) hash collisions of 2 different bo can happen and 2) we use a
+ * single hashlist for the 3 buffer list, this is only a hint.
+ * batch_find_resource uses this hint to speed up buffers look up.
+ */
+ int16_t buffer_indices_hashlist[BUFFER_HASHLIST_SIZE];
+ struct zink_batch_obj_list real_objs;
+ struct zink_batch_obj_list slab_objs;
+ struct zink_batch_obj_list sparse_objs;
+ struct zink_resource_object *last_added_obj;
+ struct util_dynarray swapchain_obj; //this doesn't have a zink_bo and must be handled differently
+
+ struct util_dynarray unref_resources;
+ struct util_dynarray bindless_releases[2];
+
+ struct util_dynarray zombie_samplers;
+
+ struct set active_queries; /* zink_query objects which were active at some point in this batch */
+ struct util_dynarray dead_querypools;
+
+ struct util_dynarray freed_sparse_backing_bos;
+
+ struct zink_batch_descriptor_data dd;
+
+ VkDeviceSize resource_size;
+
+ bool is_device_lost;
+ bool has_barriers;
+ bool has_unsync;
+};
+
+static inline struct zink_batch_state *
+zink_batch_state(struct zink_fence *fence)
+{
+ return (struct zink_batch_state *)fence;
+}
+
+struct zink_batch {
+ struct zink_batch_state *state;
+
+ struct zink_batch_usage *last_batch_usage;
+ struct zink_resource *swapchain;
+
+ unsigned work_count;
+
+ simple_mtx_t ref_lock;
+
+ bool has_work;
+ bool last_was_compute;
+ bool in_rp; //renderpass is currently active
+};
+
+
+/** bo types */
+struct bo_export {
+ /** File descriptor associated with a handle export. */
+ int drm_fd;
+
+ /** GEM handle in drm_fd */
+ uint32_t gem_handle;
+
+ struct list_head link;
+};
+
+struct zink_bo {
+ struct pb_buffer base;
+
+ union {
+ struct {
+ void *cpu_ptr; /* for user_ptr and permanent maps */
+ int map_count;
+ struct list_head exports;
+ simple_mtx_t export_lock;
+
+ bool is_user_ptr;
+ bool use_reusable_pool;
+
+ /* Whether buffer_get_handle or buffer_from_handle has been called,
+ * it can only transition from false to true. Protected by lock.
+ */
+ bool is_shared;
+ } real;
+ struct {
+ struct pb_slab_entry entry;
+ struct zink_bo *real;
+ } slab;
+ struct {
+ uint32_t num_va_pages;
+ uint32_t num_backing_pages;
+
+ struct list_head backing;
+
+ /* Commitment information for each page of the virtual memory area. */
+ struct zink_sparse_commitment *commitments;
+ } sparse;
+ } u;
+
+ VkDeviceMemory mem;
+ uint64_t offset;
+
+ uint32_t unique_id;
+ const char *name;
+
+ simple_mtx_t lock;
+
+ struct zink_bo_usage reads;
+ struct zink_bo_usage writes;
+
+ struct pb_cache_entry cache_entry[];
+};
+
+static inline struct zink_bo *
+zink_bo(struct pb_buffer *pbuf)
+{
+ return (struct zink_bo*)pbuf;
+}
+
+/** clear types */
+struct zink_framebuffer_clear_data {
+ union {
+ union pipe_color_union color;
+ struct {
+ float depth;
+ unsigned stencil;
+ uint8_t bits : 2; // PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL
+ } zs;
+ };
+ struct pipe_scissor_state scissor;
+ bool has_scissor;
+ bool conditional;
+};
+
+struct zink_framebuffer_clear {
+ struct util_dynarray clears;
+};
+
+
+/** compiler types */
+struct zink_shader_info {
+ uint16_t stride[PIPE_MAX_SO_BUFFERS];
+ uint32_t sampler_mask;
+ bool have_sparse;
+ bool have_vulkan_memory_model;
+ bool have_workgroup_memory_explicit_layout;
+ struct {
+ uint8_t flush_denorms:3; // 16, 32, 64
+ uint8_t preserve_denorms:3; // 16, 32, 64
+ bool denorms_32_bit_independence:1;
+ bool denorms_all_independence:1;
+ } float_controls;
+ unsigned bindless_set_idx;
+};
+
+enum zink_rast_prim {
+ ZINK_PRIM_POINTS,
+ ZINK_PRIM_LINES,
+ ZINK_PRIM_TRIANGLES,
+ ZINK_PRIM_MAX,
+};
+
+struct zink_shader_object {
+ union {
+ VkShaderEXT obj;
+ VkShaderModule mod;
+ };
+ struct spirv_shader *spirv;
+};
+
+struct zink_shader {
+ struct util_live_shader base;
+ uint32_t hash;
+ struct blob blob;
+ struct shader_info info;
+
+ struct zink_shader_info sinfo;
+
+ struct {
+ int index;
+ int binding;
+ VkDescriptorType type;
+ unsigned char size;
+ } bindings[ZINK_DESCRIPTOR_BASE_TYPES][ZINK_MAX_DESCRIPTORS_PER_TYPE];
+ size_t num_bindings[ZINK_DESCRIPTOR_BASE_TYPES];
+ uint32_t ubos_used; // bitfield of which ubo indices are used
+ uint32_t ssbos_used; // bitfield of which ssbo indices are used
+ uint64_t arrayed_inputs; //mask of locations using arrayed io
+ uint64_t arrayed_outputs; //mask of locations using arrayed io
+ uint64_t flat_flags;
+ bool bindless;
+ bool can_inline;
+ bool has_uniforms;
+ bool has_edgeflags;
+ bool needs_inlining;
+ bool uses_sample;
+ struct spirv_shader *spirv;
+
+ struct {
+ struct util_queue_fence fence;
+ struct zink_shader_object obj;
+ VkDescriptorSetLayout dsl;
+ VkPipelineLayout layout;
+ VkPipeline gpl;
+ VkDescriptorSetLayoutBinding *bindings;
+ unsigned num_bindings;
+ struct zink_descriptor_template *db_template;
+ unsigned db_size;
+ unsigned *db_offset;
+ } precompile;
+
+ simple_mtx_t lock;
+ struct set *programs;
+ struct util_dynarray pipeline_libs;
+
+ union {
+ struct {
+ struct zink_shader *generated_tcs; // a generated shader that this shader "owns"; only valid in the tes stage
+ struct zink_shader *generated_gs[MESA_PRIM_COUNT][ZINK_PRIM_MAX]; // generated shaders that this shader "owns"
+ struct zink_shader *parent; // for a generated gs this points to the shader that "owns" it
+
+ bool is_generated; // if this is a driver-created shader (e.g., tcs)
+ } non_fs;
+
+ struct {
+ /* Bitmask of textures that have shadow sampling result components
+ * other than RED accessed. This is a subset of !is_new_style_shadow
+ * (GLSL <1.30, ARB_fp) shadow sampling usage.
+ */
+ uint32_t legacy_shadow_mask;
+ nir_variable *fbfetch; //for fs output
+ } fs;
+ };
+};
+
+
+/** pipeline types */
+struct zink_pipeline_dynamic_state1 {
+ uint8_t front_face; //VkFrontFace:1
+ uint8_t cull_mode; //VkCullModeFlags:2
+ uint16_t num_viewports;
+ struct zink_depth_stencil_alpha_hw_state *depth_stencil_alpha_state; //must be last
+};
+
+struct zink_pipeline_dynamic_state2 {
+ bool primitive_restart;
+ bool rasterizer_discard;
+ uint16_t vertices_per_patch; //5 bits
+};
+
+#define zink_pipeline_dynamic_state3 zink_rasterizer_hw_state
+
+struct zink_gfx_pipeline_state {
+ /* order matches zink_gfx_output_key */
+ unsigned force_persample_interp:1;
+ uint32_t rast_samples:6;
+ uint32_t min_samples:6;
+ uint32_t feedback_loop : 1;
+ uint32_t feedback_loop_zs : 1;
+ uint32_t rast_attachment_order : 1;
+ uint32_t rp_state : 16;
+ VkSampleMask sample_mask;
+ uint32_t blend_id;
+
+ /* Pre-hashed value for table lookup, invalid when zero.
+ * Members after this point are not included in pipeline state hash key */
+ uint32_t hash;
+ bool dirty;
+
+ struct zink_pipeline_dynamic_state1 dyn_state1;
+
+ struct zink_pipeline_dynamic_state2 dyn_state2;
+ struct zink_pipeline_dynamic_state3 dyn_state3;
+
+ union {
+ VkShaderModule modules[MESA_SHADER_STAGES - 1];
+ uint32_t optimal_key;
+ };
+ bool modules_changed;
+
+ uint32_t vertex_hash;
+
+ uint32_t final_hash;
+
+ uint32_t _pad2;
+ /* order matches zink_gfx_input_key */
+ union {
+ struct {
+ unsigned idx:8;
+ bool uses_dynamic_stride;
+ };
+ uint32_t input;
+ };
+ uint32_t vertex_buffers_enabled_mask;
+ uint32_t vertex_strides[PIPE_MAX_ATTRIBS];
+ struct zink_vertex_elements_hw_state *element_state;
+ struct zink_zs_swizzle_key *shadow;
+ bool sample_locations_enabled;
+ enum mesa_prim shader_rast_prim, rast_prim; /* reduced type or max for unknown */
+ union {
+ struct {
+ struct zink_shader_key key[5];
+ struct zink_shader_key last_vertex;
+ } shader_keys;
+ struct {
+ union zink_shader_key_optimal key;
+ } shader_keys_optimal;
+ };
+ struct zink_blend_state *blend_state;
+ struct zink_render_pass *render_pass;
+ struct zink_render_pass *next_render_pass; //will be used next time rp is begun
+ VkFormat rendering_formats[PIPE_MAX_COLOR_BUFS];
+ VkPipelineRenderingCreateInfo rendering_info;
+ VkPipeline pipeline;
+ enum mesa_prim gfx_prim_mode; //pending mode
+};
+
+struct zink_compute_pipeline_state {
+ /* Pre-hashed value for table lookup, invalid when zero.
+ * Members after this point are not included in pipeline state hash key */
+ uint32_t hash;
+ uint32_t final_hash;
+ bool dirty;
+ uint32_t local_size[3];
+ uint32_t variable_shared_mem;
+
+ uint32_t module_hash;
+ VkShaderModule module;
+ bool module_changed;
+
+ struct zink_shader_key key;
+
+ VkPipeline pipeline;
+};
+
+
+/** program types */
+
+/* create_gfx_pushconst must be kept in sync with this struct */
+struct zink_gfx_push_constant {
+ unsigned draw_mode_is_indexed;
+ unsigned draw_id;
+ unsigned framebuffer_is_layered;
+ float default_inner_level[2];
+ float default_outer_level[4];
+ uint32_t line_stipple_pattern;
+ float viewport_scale[2];
+ float line_width;
+};
+
+/* The order of the enums MUST match the order of the zink_gfx_push_constant
+ * members.
+ */
+enum zink_gfx_push_constant_member {
+ ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED,
+ ZINK_GFX_PUSHCONST_DRAW_ID,
+ ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED,
+ ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL,
+ ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL,
+ ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN,
+ ZINK_GFX_PUSHCONST_VIEWPORT_SCALE,
+ ZINK_GFX_PUSHCONST_LINE_WIDTH,
+ ZINK_GFX_PUSHCONST_MAX
+};
+
+/* a shader module is used for directly reusing a shader module between programs,
+ * e.g., in the case where we're swapping out only one shader,
+ * allowing us to skip going through shader keys
+ */
+struct zink_shader_module {
+ struct zink_shader_object obj;
+ uint32_t hash;
+ bool shobj;
+ bool default_variant;
+ bool has_nonseamless;
+ bool needs_zs_shader_swizzle;
+ uint8_t num_uniforms;
+ uint8_t key_size;
+ uint8_t key[0]; /* | key | uniforms | zs shader swizzle | */
+};
+
+struct zink_program {
+ struct pipe_reference reference;
+ struct zink_context *ctx;
+ unsigned char sha1[20];
+ struct util_queue_fence cache_fence;
+ struct u_rwlock pipeline_cache_lock;
+ VkPipelineCache pipeline_cache;
+ size_t pipeline_cache_size;
+ struct zink_batch_usage *batch_uses;
+ bool is_compute;
+ bool can_precompile;
+ bool uses_shobj; //whether shader objects are used; programs CANNOT mix shader objects and shader modules
+
+ struct zink_program_descriptor_data dd;
+
+ uint32_t compat_id;
+ VkPipelineLayout layout;
+ VkDescriptorSetLayout dsl[ZINK_DESCRIPTOR_ALL_TYPES]; // one for each type + push + bindless
+ unsigned num_dsl;
+
+ bool removed;
+};
+
+#define STAGE_MASK_OPTIMAL (1<<16)
+#define STAGE_MASK_OPTIMAL_SHADOW (1<<17)
+typedef bool (*equals_gfx_pipeline_state_func)(const void *a, const void *b);
+
+struct zink_gfx_library_key {
+ uint32_t optimal_key; //equals_pipeline_lib_optimal
+ VkShaderModule modules[ZINK_GFX_SHADER_COUNT];
+ VkPipeline pipeline;
+};
+
+struct zink_gfx_input_key {
+ union {
+ struct {
+ unsigned idx:8;
+ bool uses_dynamic_stride;
+ };
+ uint32_t input;
+ };
+ uint32_t vertex_buffers_enabled_mask;
+ uint32_t vertex_strides[PIPE_MAX_ATTRIBS];
+ struct zink_vertex_elements_hw_state *element_state;
+ VkPipeline pipeline;
+};
+
+struct zink_gfx_output_key {
+ /* order matches zink_gfx_output_key */
+ union {
+ struct {
+ unsigned force_persample_interp:1;
+ uint32_t rast_samples:6;
+ uint32_t min_samples:6;
+ uint32_t feedback_loop : 1;
+ uint32_t feedback_loop_zs : 1;
+ uint32_t rast_attachment_order : 1;
+ uint32_t rp_state : 16;
+ };
+ uint32_t key;
+ };
+
+ /* TODO: compress these */
+ VkSampleMask sample_mask;
+ uint32_t blend_id;
+ VkPipeline pipeline;
+};
+
+struct zink_gfx_pipeline_cache_entry {
+ struct zink_gfx_pipeline_state state;
+ VkPipeline pipeline;
+ struct zink_gfx_program *prog;
+ /* GPL only */
+ struct util_queue_fence fence;
+ union {
+ struct {
+ struct zink_gfx_input_key *ikey;
+ struct zink_gfx_library_key *gkey;
+ struct zink_gfx_output_key *okey;
+ VkPipeline unoptimized_pipeline;
+ } gpl;
+ struct zink_shader_object shobjs[ZINK_GFX_SHADER_COUNT];
+ };
+};
+
+struct zink_gfx_lib_cache {
+ /* for hashing */
+ struct zink_shader *shaders[ZINK_GFX_SHADER_COUNT];
+ unsigned refcount;
+ bool removed; //once removed from cache
+ uint8_t stages_present;
+
+ simple_mtx_t lock;
+ struct set libs; //zink_gfx_library_key -> VkPipeline
+};
+
+struct zink_gfx_program {
+ struct zink_program base;
+
+ bool is_separable; //not a full program
+
+ uint32_t stages_present; //mask of stages present in this program
+ uint32_t stages_remaining; //mask of zink_shader remaining in this program
+ uint32_t gfx_hash; //from ctx->gfx_hash
+
+ struct zink_shader *shaders[ZINK_GFX_SHADER_COUNT];
+ struct zink_shader *last_vertex_stage;
+ struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT];
+
+ /* full */
+ VkShaderEXT objects[ZINK_GFX_SHADER_COUNT];
+ uint32_t module_hash[ZINK_GFX_SHADER_COUNT];
+ struct blob blobs[ZINK_GFX_SHADER_COUNT];
+ struct util_dynarray shader_cache[ZINK_GFX_SHADER_COUNT][2][2]; //normal, nonseamless cubes, inline uniforms
+ unsigned inlined_variant_count[ZINK_GFX_SHADER_COUNT];
+ uint32_t default_variant_hash;
+ uint8_t inline_variants; //which stages are using inlined uniforms
+ bool needs_inlining; // whether this program requires some uniforms to be inlined
+ bool has_edgeflags;
+ bool optimal_keys;
+
+ /* separable */
+ struct zink_gfx_program *full_prog;
+
+ struct hash_table pipelines[2][11]; // [dynamic, renderpass][number of draw modes we support]
+ uint32_t last_variant_hash;
+
+ uint32_t last_finalized_hash[2][4]; //[dynamic, renderpass][primtype idx]
+ struct zink_gfx_pipeline_cache_entry *last_pipeline[2][4]; //[dynamic, renderpass][primtype idx]
+
+ struct zink_gfx_lib_cache *libs;
+};
+
+struct zink_compute_program {
+ struct zink_program base;
+
+ bool use_local_size;
+ bool has_variable_shared_mem;
+
+ unsigned scratch_size;
+
+ unsigned num_inlinable_uniforms;
+ nir_shader *nir; //only until precompile finishes
+
+ struct zink_shader_module *curr;
+
+ struct zink_shader_module *module; //base
+ struct util_dynarray shader_cache[2]; //nonseamless cubes, inline uniforms
+ unsigned inlined_variant_count;
+
+ struct zink_shader *shader;
+ struct hash_table pipelines;
+
+ simple_mtx_t cache_lock; //extra lock because threads are insane and sand was not meant to think
+
+ VkPipeline base_pipeline;
+};
+
+
+/** renderpass types */
+
+struct zink_rt_attrib {
+ VkFormat format;
+ VkSampleCountFlagBits samples;
+ bool clear_color;
+ union {
+ bool clear_stencil;
+ bool fbfetch;
+ };
+ bool invalid;
+ bool needs_write;
+ bool resolve;
+ bool feedback_loop;
+};
+
+struct zink_render_pass_state {
+ union {
+ struct {
+ uint8_t num_cbufs : 5; /* PIPE_MAX_COLOR_BUFS = 8 */
+ uint8_t have_zsbuf : 1;
+ uint8_t samples:1; //for fs samplemask
+ uint32_t num_zsresolves : 1;
+ uint32_t num_cresolves : 24; /* PIPE_MAX_COLOR_BUFS, but this is a struct hole */
+ };
+ uint32_t val; //for comparison
+ };
+ struct zink_rt_attrib rts[PIPE_MAX_COLOR_BUFS + 1];
+ unsigned num_rts;
+ uint32_t clears; //for extra verification and update flagging
+ uint16_t msaa_expand_mask;
+ uint16_t msaa_samples; //used with VK_EXT_multisampled_render_to_single_sampled
+};
+
+struct zink_pipeline_rt {
+ VkFormat format;
+ VkSampleCountFlagBits samples;
+};
+
+struct zink_render_pass_pipeline_state {
+ uint32_t num_attachments:14;
+ uint32_t msaa_samples : 8;
+ uint32_t fbfetch:1;
+ uint32_t color_read:1;
+ uint32_t depth_read:1;
+ uint32_t depth_write:1;
+ uint32_t num_cresolves:4;
+ uint32_t num_zsresolves:1;
+ bool samples:1; //for fs samplemask
+ struct zink_pipeline_rt attachments[PIPE_MAX_COLOR_BUFS + 1];
+ unsigned id;
+};
+
+struct zink_render_pass {
+ VkRenderPass render_pass;
+ struct zink_render_pass_state state;
+ unsigned pipeline_state;
+};
+
+
+/** resource types */
+struct zink_resource_object {
+ struct pipe_reference reference;
+
+ VkPipelineStageFlags access_stage;
+ VkAccessFlags access;
+ VkPipelineStageFlags unordered_access_stage;
+ VkAccessFlags unordered_access;
+ VkAccessFlags last_write;
+
+ /* 'access' is propagated from unordered_access to handle ops occurring
+ * in the ordered cmdbuf which can promote barriers to unordered
+ */
+ bool ordered_access_is_copied;
+ bool unordered_read;
+ bool unordered_write;
+ bool unsync_access;
+ bool copies_valid;
+ bool copies_need_reset; //for use with batch state resets
+
+ struct u_rwlock copy_lock;
+ struct util_dynarray copies[16]; //regions being copied to; for barrier omission
+
+ VkBuffer storage_buffer;
+ simple_mtx_t view_lock;
+ uint32_t view_prune_count; //how many views to prune
+ uint32_t view_prune_timeline; //when to prune
+ struct util_dynarray views;
+
+ union {
+ VkBuffer buffer;
+ VkImage image;
+ };
+ VkDeviceAddress bda;
+
+ VkSampleLocationsInfoEXT zs_evaluate;
+ bool needs_zs_evaluate;
+
+ bool storage_init; //layout was set for image
+ bool transfer_dst;
+ bool render_target;
+ bool is_buffer;
+ bool exportable;
+
+ /* TODO: this should be a union */
+ int handle;
+ struct zink_bo *bo;
+ // struct {
+ struct kopper_displaytarget *dt;
+ uint32_t dt_idx;
+ uint32_t last_dt_idx;
+ VkSemaphore present;
+ bool new_dt;
+ bool indefinite_acquire;
+ // }
+
+
+ VkDeviceSize offset, size, alignment;
+ uint64_t vkflags;
+ uint64_t vkusage;
+ VkFormatFeatureFlags vkfeats;
+ uint64_t modifier;
+ VkImageAspectFlags modifier_aspect;
+ VkSamplerYcbcrConversion sampler_conversion;
+ unsigned plane_offsets[3];
+ unsigned plane_strides[3];
+ unsigned plane_count;
+
+ bool host_visible;
+ bool coherent;
+ bool is_aux;
+};
+
+struct zink_resource {
+ struct threaded_resource base;
+
+ enum pipe_format internal_format:16;
+
+ struct zink_resource_object *obj;
+ uint32_t queue;
+ union {
+ struct {
+ struct util_range valid_buffer_range;
+ uint32_t vbo_bind_mask : PIPE_MAX_ATTRIBS;
+ uint8_t ubo_bind_count[2];
+ uint8_t ssbo_bind_count[2];
+ uint8_t vbo_bind_count;
+ uint8_t so_bind_count; //not counted in all_binds
+ bool so_valid;
+ uint32_t ubo_bind_mask[MESA_SHADER_STAGES];
+ uint32_t ssbo_bind_mask[MESA_SHADER_STAGES];
+ };
+ struct {
+ bool linear;
+ bool need_2D;
+ bool valid;
+ uint8_t fb_bind_count; //not counted in all_binds
+ uint16_t fb_binds; /* mask of attachment idx; zs is PIPE_MAX_COLOR_BUFS */
+ VkSparseImageMemoryRequirements sparse;
+ VkFormat format;
+ VkImageLayout layout;
+ VkImageAspectFlags aspect;
+ };
+ };
+ uint32_t sampler_binds[MESA_SHADER_STAGES];
+ uint32_t image_binds[MESA_SHADER_STAGES];
+ uint16_t sampler_bind_count[2]; //gfx, compute
+ uint16_t image_bind_count[2]; //gfx, compute
+ uint16_t write_bind_count[2]; //gfx, compute
+ union {
+ uint16_t bindless[2]; //tex, img
+ uint32_t all_bindless;
+ };
+ union {
+ uint16_t bind_count[2]; //gfx, compute
+ uint32_t all_binds;
+ };
+
+ VkPipelineStageFlagBits gfx_barrier;
+ VkAccessFlagBits barrier_access[2]; //gfx, compute
+
+ union {
+ struct {
+ struct hash_table bufferview_cache;
+ simple_mtx_t bufferview_mtx;
+ };
+ struct {
+ struct hash_table surface_cache;
+ simple_mtx_t surface_mtx;
+ };
+ };
+
+ VkRect2D damage;
+ bool use_damage;
+
+ bool copies_warned;
+ bool swapchain;
+ bool dmabuf;
+ unsigned dt_stride;
+
+ uint8_t modifiers_count;
+ uint64_t *modifiers;
+};
+
+static inline struct zink_resource *
+zink_resource(struct pipe_resource *r)
+{
+ return (struct zink_resource *)r;
+}
+
+
+struct zink_transfer {
+ struct threaded_transfer base;
+ struct pipe_resource *staging_res;
+ unsigned offset;
+ unsigned depthPitch;
+};
+
+
+/** screen types */
+struct zink_modifier_prop {
+ uint32_t drmFormatModifierCount;
+ VkDrmFormatModifierPropertiesEXT* pDrmFormatModifierProperties;
+};
+
+struct zink_format_props {
+ VkFormatFeatureFlags2 linearTilingFeatures;
+ VkFormatFeatureFlags2 optimalTilingFeatures;
+ VkFormatFeatureFlags2 bufferFeatures;
+};
+
+struct zink_screen {
+ struct pipe_screen base;
+
+ struct util_dl_library *loader_lib;
+ PFN_vkGetInstanceProcAddr vk_GetInstanceProcAddr;
+ PFN_vkGetDeviceProcAddr vk_GetDeviceProcAddr;
+
+ bool threaded;
+ bool threaded_submit;
+ bool is_cpu;
+ bool abort_on_hang;
+ bool frame_marker_emitted;
+ bool implicitly_loaded;
+ uint64_t curr_batch; //the current batch id
+ uint32_t last_finished;
+ VkSemaphore sem;
+ VkFence fence;
+ struct util_queue flush_queue;
+ simple_mtx_t copy_context_lock;
+ struct zink_context *copy_context;
+
+ struct zink_batch_state *free_batch_states; //unused batch states
+ struct zink_batch_state *last_free_batch_state; //for appending
+ simple_mtx_t free_batch_states_lock;
+
+ simple_mtx_t semaphores_lock;
+ struct util_dynarray semaphores;
+ struct util_dynarray fd_semaphores;
+
+ unsigned buffer_rebind_counter;
+ unsigned image_rebind_counter;
+ unsigned robust_ctx_count;
+
+ struct hash_table dts;
+ simple_mtx_t dt_lock;
+
+ bool device_lost;
+ int drm_fd;
+
+ struct slab_mempool present_mempool;
+ struct slab_parent_pool transfer_pool;
+ struct disk_cache *disk_cache;
+ struct util_queue cache_put_thread;
+ struct util_queue cache_get_thread;
+
+ /* there are 5 gfx stages, but VS and FS are assumed to be always present,
+ * thus only 3 stages need to be considered, giving 2^3 = 8 program caches.
+ */
+ struct set pipeline_libs[8];
+ simple_mtx_t pipeline_libs_lock[8];
+
+ simple_mtx_t desc_set_layouts_lock;
+ struct hash_table desc_set_layouts[ZINK_DESCRIPTOR_BASE_TYPES];
+ simple_mtx_t desc_pool_keys_lock;
+ struct set desc_pool_keys[ZINK_DESCRIPTOR_BASE_TYPES];
+ struct util_live_shader_cache shaders;
+
+ uint64_t db_size[ZINK_DESCRIPTOR_ALL_TYPES];
+ unsigned base_descriptor_size;
+ VkDescriptorSetLayout bindless_layout;
+
+ struct {
+ struct pb_cache bo_cache;
+ struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
+ unsigned min_alloc_size;
+ uint32_t next_bo_unique_id;
+ } pb;
+ uint8_t heap_map[ZINK_HEAP_MAX][VK_MAX_MEMORY_TYPES]; // mapping from zink heaps to memory type indices
+ uint8_t heap_count[ZINK_HEAP_MAX]; // number of memory types per zink heap
+ bool resizable_bar;
+
+ uint64_t total_video_mem;
+ uint64_t clamp_video_mem;
+ uint64_t total_mem;
+ uint64_t mapped_vram;
+
+ VkInstance instance;
+ struct zink_instance_info instance_info;
+
+ struct hash_table *debug_mem_sizes;
+ simple_mtx_t debug_mem_lock;
+
+ VkPhysicalDevice pdev;
+ uint32_t vk_version, spirv_version;
+ struct util_idalloc_mt buffer_ids;
+ struct util_vertex_state_cache vertex_state_cache;
+
+ struct zink_device_info info;
+ struct nir_shader_compiler_options nir_options;
+
+ bool optimal_keys;
+ bool have_full_ds3;
+ bool have_X8_D24_UNORM_PACK32;
+ bool have_D24_UNORM_S8_UINT;
+ bool have_D32_SFLOAT_S8_UINT;
+ bool have_triangle_fans;
+ bool need_decompose_attrs;
+ bool need_2D_zs;
+ bool need_2D_sparse;
+ bool can_hic_shader_read;
+
+ uint32_t gfx_queue;
+ uint32_t sparse_queue;
+ uint32_t max_queues;
+ uint32_t timestamp_valid_bits;
+ VkDevice dev;
+ VkQueue queue; //gfx+compute
+ VkQueue queue_sparse;
+ simple_mtx_t queue_lock;
+ VkDebugUtilsMessengerEXT debugUtilsCallbackHandle;
+
+ uint32_t cur_custom_border_color_samplers;
+
+ unsigned screen_id;
+
+#ifdef HAVE_RENDERDOC_APP_H
+ RENDERDOC_API_1_0_0 *renderdoc_api;
+ unsigned renderdoc_capture_start;
+ unsigned renderdoc_capture_end;
+ unsigned renderdoc_frame;
+ bool renderdoc_capturing;
+ bool renderdoc_capture_all;
+#endif
+
+ struct vk_uncompacted_dispatch_table vk;
+
+ void (*buffer_barrier)(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline);
+ void (*image_barrier)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);
+ void (*image_barrier_unsync)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);
+
+ bool compact_descriptors; /**< toggled if descriptor set ids are compacted */
+ uint8_t desc_set_id[ZINK_MAX_DESCRIPTOR_SETS]; /**< converts enum zink_descriptor_type -> the actual set id */
+
+ struct {
+ bool dual_color_blend_by_location;
+ bool inline_uniforms;
+ bool emulate_point_smooth;
+ bool zink_shader_object_enable;
+ } driconf;
+
+ struct zink_format_props format_props[PIPE_FORMAT_COUNT];
+ struct zink_modifier_prop modifier_props[PIPE_FORMAT_COUNT];
+
+ VkExtent2D maxSampleLocationGridSize[5];
+ VkPipelineLayout gfx_push_constant_layout;
+
+ struct {
+ bool broken_l4a4;
+ /* https://gitlab.khronos.org/vulkan/vulkan/-/issues/3306
+ * HI TURNIP
+ */
+ bool broken_cache_semantics;
+ bool missing_a8_unorm;
+ bool implicit_sync;
+ bool disable_optimized_compile;
+ bool always_feedback_loop;
+ bool always_feedback_loop_zs;
+ bool needs_sanitised_layer;
+ bool track_renderpasses;
+ bool no_linestipple;
+ bool no_linesmooth;
+ bool no_hw_gl_point;
+ bool lower_robustImageAccess2;
+ bool needs_zs_shader_swizzle;
+ bool can_do_invalid_linear_modifier;
+ bool io_opt;
+ unsigned z16_unscaled_bias;
+ unsigned z24_unscaled_bias;
+ } driver_workarounds;
+};
+
+static inline struct zink_screen *
+zink_screen(struct pipe_screen *pipe)
+{
+ return (struct zink_screen *)pipe;
+}
+
+/** surface types */
+
+/* info for validating/creating imageless framebuffers */
+struct zink_surface_info {
+ VkImageCreateFlags flags;
+ VkImageUsageFlags usage;
+ uint32_t width;
+ uint32_t height;
+ uint32_t layerCount;
+ VkFormat format[2]; //base format, srgb format (for srgb framebuffer)
+};
+
+/* an imageview for a zink_resource:
+ - may be a fb attachment, samplerview, or shader image
+ - cached on the parent zink_resource_object
+ - also handles swapchains
+ */
+struct zink_surface {
+ struct pipe_surface base;
+ /* all the info for creating a new imageview */
+ VkImageViewCreateInfo ivci;
+ VkImageViewUsageCreateInfo usage_info;
+ /* for framebuffer use */
+ struct zink_surface_info info;
+ bool is_swapchain;
+ /* the current imageview */
+ VkImageView image_view;
+ /* array of imageviews for swapchains, one for each image */
+ VkImageView *swapchain;
+ unsigned swapchain_size;
+ void *obj; //backing resource object; used to determine rebinds
+ void *dt_swapchain; //current swapchain object; used to determine swapchain rebinds
+ uint32_t hash; //for surface caching
+};
+
+/* wrapper object that preserves the gallium expectation of having
+ * pipe_surface::context match the context used to create the surface
+ */
+struct zink_ctx_surface {
+ struct pipe_surface base;
+ struct zink_surface *surf; //the actual surface
+ struct zink_ctx_surface *transient; //for use with EXT_multisample_render_to_texture
+ bool transient_init; //whether the transient surface has data
+ bool needs_mutable;
+};
+
+/* use this cast for framebuffer surfaces */
+static inline struct zink_surface *
+zink_csurface(struct pipe_surface *psurface)
+{
+ return psurface ? ((struct zink_ctx_surface *)psurface)->surf : NULL;
+}
+
+/* use this cast for checking transient framebuffer surfaces */
+static inline struct zink_surface *
+zink_transient_surface(struct pipe_surface *psurface)
+{
+ return psurface ? ((struct zink_ctx_surface *)psurface)->transient ? ((struct zink_ctx_surface *)psurface)->transient->surf : NULL : NULL;
+}
+
+/* use this cast for internal surfaces */
+static inline struct zink_surface *
+zink_surface(struct pipe_surface *psurface)
+{
+ return (struct zink_surface *)psurface;
+}
+
+
+/** framebuffer types */
+struct zink_framebuffer_state {
+ uint32_t width;
+ uint16_t height;
+ uint32_t layers:6;
+ uint32_t samples:6;
+ uint32_t num_attachments:4;
+ struct zink_surface_info infos[PIPE_MAX_COLOR_BUFS + 1];
+};
+
+struct zink_framebuffer {
+ struct pipe_reference reference;
+
+ /* current objects */
+ VkFramebuffer fb;
+ struct zink_render_pass *rp;
+
+ struct zink_framebuffer_state state;
+ VkFramebufferAttachmentImageInfo infos[PIPE_MAX_COLOR_BUFS + 1];
+ struct hash_table objects;
+};
+
+
+/** context types */
+struct zink_sampler_state {
+ VkSampler sampler;
+ VkSampler sampler_clamped;
+ bool custom_border_color;
+ bool emulate_nonseamless;
+};
+
+struct zink_buffer_view {
+ struct pipe_reference reference;
+ struct pipe_resource *pres;
+ VkBufferViewCreateInfo bvci;
+ VkBufferView buffer_view;
+ uint32_t hash;
+};
+
+struct zink_sampler_view {
+ struct pipe_sampler_view base;
+ union {
+ struct zink_surface *image_view;
+ struct zink_buffer_view *buffer_view;
+ unsigned tbo_size;
+ };
+ struct zink_surface *cube_array;
+ /* Optional sampler view returning red (depth) in all channels, for shader rewrites. */
+ struct zink_surface *zs_view;
+ struct zink_zs_swizzle swizzle;
+};
+
+struct zink_image_view {
+ struct pipe_image_view base;
+ union {
+ struct zink_surface *surface;
+ struct zink_buffer_view *buffer_view;
+ };
+};
+
+static inline struct zink_sampler_view *
+zink_sampler_view(struct pipe_sampler_view *pview)
+{
+ return (struct zink_sampler_view *)pview;
+}
+
+struct zink_so_target {
+ struct pipe_stream_output_target base;
+ struct pipe_resource *counter_buffer;
+ VkDeviceSize counter_buffer_offset;
+ uint32_t stride;
+ bool counter_buffer_valid;
+};
+
+static inline struct zink_so_target *
+zink_so_target(struct pipe_stream_output_target *so_target)
+{
+ return (struct zink_so_target *)so_target;
+}
+
+struct zink_viewport_state {
+ struct pipe_viewport_state viewport_states[PIPE_MAX_VIEWPORTS];
+ struct pipe_scissor_state scissor_states[PIPE_MAX_VIEWPORTS];
+ uint8_t num_viewports;
+};
+
+struct zink_descriptor_db_info {
+ unsigned offset;
+ unsigned size;
+ enum pipe_format format;
+ struct pipe_resource *pres;
+};
+
+struct zink_descriptor_surface {
+ union {
+ struct zink_surface *surface;
+ struct zink_buffer_view *bufferview;
+ struct zink_descriptor_db_info db;
+ };
+ bool is_buffer;
+};
+
+struct zink_bindless_descriptor {
+ struct zink_descriptor_surface ds;
+ struct zink_sampler_state *sampler;
+ uint32_t handle;
+ uint32_t access; //PIPE_ACCESS_...
+};
+
+struct zink_rendering_info {
+ VkPipelineRenderingCreateInfo info;
+ unsigned id;
+};
+
+
+typedef void (*pipe_draw_vertex_state_func)(struct pipe_context *ctx,
+ struct pipe_vertex_state *vstate,
+ uint32_t partial_velem_mask,
+ struct pipe_draw_vertex_state_info info,
+ const struct pipe_draw_start_count_bias *draws,
+ unsigned num_draws);
+typedef void (*pipe_launch_grid_func)(struct pipe_context *pipe, const struct pipe_grid_info *info);
+
+
+enum zink_ds3_state {
+ ZINK_DS3_RAST_STIPPLE,
+ ZINK_DS3_RAST_CLIP,
+ ZINK_DS3_RAST_CLAMP,
+ ZINK_DS3_RAST_POLYGON,
+ ZINK_DS3_RAST_HALFZ,
+ ZINK_DS3_RAST_PV,
+ ZINK_DS3_RAST_LINE,
+ ZINK_DS3_RAST_STIPPLE_ON,
+ ZINK_DS3_BLEND_A2C,
+ ZINK_DS3_BLEND_A21,
+ ZINK_DS3_BLEND_ON,
+ ZINK_DS3_BLEND_WRITE,
+ ZINK_DS3_BLEND_EQ,
+ ZINK_DS3_BLEND_LOGIC_ON,
+ ZINK_DS3_BLEND_LOGIC,
+};
+
+struct zink_context {
+ struct pipe_context base;
+ struct threaded_context *tc;
+ struct slab_child_pool transfer_pool;
+ struct slab_child_pool transfer_pool_unsync;
+ struct blitter_context *blitter;
+ struct util_debug_callback dbg;
+
+ unsigned flags;
+
+ pipe_draw_func draw_vbo[2]; //batch changed
+ pipe_draw_vertex_state_func draw_state[2]; //batch changed
+ pipe_launch_grid_func launch_grid[2]; //batch changed
+
+ struct pipe_device_reset_callback reset;
+
+ struct util_queue_fence unsync_fence; //unsigned during unsync recording (blocks flush ops)
+ struct util_queue_fence flush_fence; //unsigned during flush (blocks unsync ops)
+
+ struct zink_fence *deferred_fence;
+ struct zink_batch_state *last_batch_state; //the last command buffer submitted
+ struct zink_batch_state *batch_states; //list of submitted batch states: ordered by increasing timeline id
+ unsigned batch_states_count; //number of states in `batch_states`
+ struct zink_batch_state *free_batch_states; //unused batch states
+ struct zink_batch_state *last_free_batch_state; //for appending
+ bool oom_flush;
+ bool oom_stall;
+ bool track_renderpasses;
+ bool no_reorder;
+ struct zink_batch batch;
+
+ unsigned shader_has_inlinable_uniforms_mask;
+ unsigned inlinable_uniforms_valid_mask;
+
+ struct pipe_constant_buffer ubos[MESA_SHADER_STAGES][PIPE_MAX_CONSTANT_BUFFERS];
+ struct pipe_shader_buffer ssbos[MESA_SHADER_STAGES][PIPE_MAX_SHADER_BUFFERS];
+ uint32_t writable_ssbos[MESA_SHADER_STAGES];
+ struct zink_image_view image_views[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES];
+
+ uint32_t transient_attachments;
+ struct pipe_framebuffer_state fb_state;
+ struct hash_table framebuffer_cache;
+
+ struct zink_vertex_elements_state *element_state;
+ struct zink_rasterizer_state *rast_state;
+ struct zink_depth_stencil_alpha_state *dsa_state;
+
+ bool pipeline_changed[2]; //gfx, compute
+
+ struct zink_shader *gfx_stages[ZINK_GFX_SHADER_COUNT];
+ struct zink_shader *last_vertex_stage;
+ bool shader_reads_drawid;
+ bool shader_reads_basevertex;
+ struct zink_gfx_pipeline_state gfx_pipeline_state;
+ /* there are 5 gfx stages, but VS and FS are assumed to be always present,
+ * thus only 3 stages need to be considered, giving 2^3 = 8 program caches.
+ */
+ struct hash_table program_cache[8];
+ simple_mtx_t program_lock[8];
+ uint32_t gfx_hash;
+ struct zink_gfx_program *curr_program;
+ struct set gfx_inputs;
+ struct set gfx_outputs;
+
+ struct zink_descriptor_data dd;
+
+ struct zink_compute_pipeline_state compute_pipeline_state;
+ struct zink_compute_program *curr_compute;
+
+ unsigned shader_stages : ZINK_GFX_SHADER_COUNT; /* mask of bound gfx shader stages */
+ uint8_t dirty_gfx_stages; /* mask of changed gfx shader stages */
+ bool last_vertex_stage_dirty;
+ bool compute_dirty;
+ bool is_generated_gs_bound;
+
+ struct {
+ VkRenderingAttachmentInfo attachments[PIPE_MAX_COLOR_BUFS + 2]; //+depth, +stencil
+ VkRenderingInfo info;
+ struct tc_renderpass_info tc_info;
+ } dynamic_fb;
+ uint32_t fb_layer_mismatch; //bitmask
+ unsigned depth_bias_scale_factor;
+ struct set rendering_state_cache[6]; //[util_logbase2_ceil(msrtss samplecount)]
+ struct set render_pass_state_cache;
+ struct hash_table *render_pass_cache;
+ VkExtent2D swapchain_size;
+ bool fb_changed;
+ bool rp_changed; //force renderpass restart
+ bool rp_layout_changed; //renderpass changed, maybe restart
+ bool rp_loadop_changed; //renderpass changed, don't restart
+ bool zsbuf_unused;
+ bool zsbuf_readonly;
+
+ struct zink_framebuffer *framebuffer;
+ struct zink_framebuffer_clear fb_clears[PIPE_MAX_COLOR_BUFS + 1];
+ uint16_t clears_enabled;
+ uint16_t rp_clears_enabled;
+ uint16_t void_clears;
+ uint16_t fbfetch_outputs;
+ uint16_t feedback_loops;
+ struct zink_resource *needs_present;
+
+ struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
+ bool vertex_buffers_dirty;
+
+ struct zink_sampler_state *sampler_states[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_view *sampler_views[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS];
+
+ struct zink_viewport_state vp_state;
+ bool vp_state_changed;
+ bool scissor_changed;
+
+ float blend_constants[4];
+
+ bool sample_locations_changed;
+ VkSampleLocationEXT vk_sample_locations[PIPE_MAX_SAMPLE_LOCATION_GRID_SIZE * PIPE_MAX_SAMPLE_LOCATION_GRID_SIZE];
+ uint8_t sample_locations[2 * 4 * 8 * 16];
+
+ struct pipe_stencil_ref stencil_ref;
+
+ union {
+ struct {
+ float default_inner_level[2];
+ float default_outer_level[4];
+ };
+ float tess_levels[6];
+ };
+
+ struct zink_vk_query *curr_xfb_queries[PIPE_MAX_VERTEX_STREAMS];
+ struct zink_shader *null_fs;
+ struct zink_shader *saved_fs;
+
+ struct list_head query_pools;
+ struct list_head suspended_queries;
+ struct list_head primitives_generated_queries;
+ struct zink_query *vertices_query;
+ bool disable_fs;
+ bool disable_color_writes;
+ bool was_line_loop;
+ bool fs_query_active;
+ bool occlusion_query_active;
+ bool primitives_generated_active;
+ bool primitives_generated_suspended;
+ bool queries_disabled, render_condition_active;
+ bool queries_in_rp;
+ struct {
+ struct zink_query *query;
+ bool inverted;
+ bool active; //this is the internal vk state
+ } render_condition;
+ struct {
+ uint64_t render_passes;
+ } hud;
+
+ struct {
+ bool valid;
+ struct u_upload_mgr *upload[ZINK_DGC_MAX];
+ struct zink_resource *buffers[ZINK_DGC_MAX];
+ struct zink_gfx_program *last_prog;
+ uint8_t *maps[ZINK_DGC_MAX];
+ size_t bind_offsets[ZINK_DGC_MAX];
+ size_t cur_offsets[ZINK_DGC_MAX];
+ size_t max_size[ZINK_DGC_MAX];
+ struct util_dynarray pipelines;
+ struct util_dynarray tokens;
+ } dgc;
+
+ struct pipe_resource *dummy_vertex_buffer;
+ struct pipe_resource *dummy_xfb_buffer;
+ struct pipe_surface *dummy_surface[7];
+ struct zink_buffer_view *dummy_bufferview;
+
+ unsigned buffer_rebind_counter;
+ unsigned image_rebind_counter;
+
+ struct {
+ /* descriptor info */
+ uint8_t num_ubos[MESA_SHADER_STAGES];
+
+ uint8_t num_ssbos[MESA_SHADER_STAGES];
+ struct util_dynarray global_bindings;
+
+ VkDescriptorImageInfo textures[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS];
+ uint32_t emulate_nonseamless[MESA_SHADER_STAGES];
+ uint32_t cubes[MESA_SHADER_STAGES];
+ uint8_t num_samplers[MESA_SHADER_STAGES];
+ uint8_t num_sampler_views[MESA_SHADER_STAGES];
+
+ VkDescriptorImageInfo images[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES];
+ uint8_t num_images[MESA_SHADER_STAGES];
+
+ union {
+ struct {
+ VkDescriptorBufferInfo ubos[MESA_SHADER_STAGES][PIPE_MAX_CONSTANT_BUFFERS];
+ VkDescriptorBufferInfo ssbos[MESA_SHADER_STAGES][PIPE_MAX_SHADER_BUFFERS];
+ VkBufferView tbos[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS];
+ VkBufferView texel_images[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES];
+ } t;
+ struct {
+ VkDescriptorAddressInfoEXT ubos[MESA_SHADER_STAGES][PIPE_MAX_CONSTANT_BUFFERS];
+ VkDescriptorAddressInfoEXT ssbos[MESA_SHADER_STAGES][PIPE_MAX_SHADER_BUFFERS];
+ VkDescriptorAddressInfoEXT tbos[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS];
+ VkDescriptorAddressInfoEXT texel_images[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES];
+ } db;
+ };
+
+ VkDescriptorImageInfo fbfetch;
+ uint8_t fbfetch_db[ZINK_FBFETCH_DESCRIPTOR_SIZE];
+
+ /* the current state of the zs swizzle data */
+ struct zink_zs_swizzle_key zs_swizzle[MESA_SHADER_STAGES];
+
+ struct zink_resource *descriptor_res[ZINK_DESCRIPTOR_BASE_TYPES][MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS];
+
+ struct {
+ struct util_idalloc tex_slots; //img, buffer
+ struct util_idalloc img_slots; //img, buffer
+ struct hash_table tex_handles; //img, buffer
+ struct hash_table img_handles; //img, buffer
+ union {
+ struct {
+ VkBufferView *buffer_infos; //tex, img
+ } t;
+ struct {
+ VkDescriptorAddressInfoEXT *buffer_infos;
+ } db;
+ };
+ VkDescriptorImageInfo *img_infos; //tex, img
+ struct util_dynarray updates; //texture, img
+ struct util_dynarray resident; //texture, img
+ } bindless[2];
+ union {
+ bool bindless_dirty[2]; //tex, img
+ uint16_t any_bindless_dirty;
+ };
+ bool bindless_refs_dirty;
+ bool null_fbfetch_init;
+ } di;
+ void (*invalidate_descriptor_state)(struct zink_context *ctx, gl_shader_stage shader, enum zink_descriptor_type type, unsigned, unsigned);
+ struct set *need_barriers[2]; //gfx, compute
+ struct set update_barriers[2][2]; //[gfx, compute][current, next]
+ uint8_t barrier_set_idx[2];
+ unsigned memory_barrier;
+
+ uint32_t ds3_states;
+
+ uint32_t num_so_targets;
+ struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS];
+ bool dirty_so_targets;
+
+ bool gfx_dirty;
+
+ bool shobj_draw : 1; //using shader objects for draw
+ bool is_device_lost;
+ bool primitive_restart;
+ bool blitting : 1;
+ bool unordered_blitting : 1;
+ bool vertex_state_changed : 1;
+ bool blend_state_changed : 1;
+ bool blend_color_changed : 1;
+ bool sample_mask_changed : 1;
+ bool rast_state_changed : 1;
+ bool line_width_changed : 1;
+ bool dsa_state_changed : 1;
+ bool stencil_ref_changed : 1;
+ bool rasterizer_discard_changed : 1;
+ bool rp_tc_info_updated : 1;
+};
+
+static inline struct zink_context *
+zink_context(struct pipe_context *context)
+{
+ return (struct zink_context *)context;
+}
+
+#endif