1 files changed, 187 insertions, 49 deletions
diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index 91421db9b74..92d65f92133 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -26,6 +26,7 @@
 #define SHADER_INFO_H
 
 #include "util/bitset.h"
+#include "util/sha1/sha1.h"
 #include "shader_enums.h"
 #include <stdint.h>
 
@@ -33,11 +34,20 @@
 extern "C" {
 #endif
 
+#define MAX_XFB_BUFFERS        4
 #define MAX_INLINABLE_UNIFORMS 4
 
 struct spirv_supported_capabilities {
    bool address;
+   bool amd_fragment_mask;
+   bool amd_gcn_shader;
+   bool amd_image_gather_bias_lod;
+   bool amd_image_read_write_lod;
+   bool amd_shader_ballot;
+   bool amd_shader_explicit_vertex_parameter;
+   bool amd_trinary_minmax;
    bool atomic_storage;
+   bool cooperative_matrix;
    bool demote_to_helper_invocation;
    bool derivative_group;
    bool descriptor_array_dynamic_indexing;
@@ -45,70 +55,78 @@ struct spirv_supported_capabilities {
    bool descriptor_indexing;
    bool device_group;
    bool draw_parameters;
+   bool float_controls;
    bool float16_atomic_add;
    bool float16_atomic_min_max;
+   bool float16;
    bool float32_atomic_add;
    bool float32_atomic_min_max;
-   bool float64;
    bool float64_atomic_add;
    bool float64_atomic_min_max;
-   bool fragment_shader_sample_interlock;
+   bool float64;
+   bool fragment_barycentric;
+   bool fragment_density;
+   bool fragment_fully_covered;
    bool fragment_shader_pixel_interlock;
+   bool fragment_shader_sample_interlock;
    bool fragment_shading_rate;
    bool generic_pointers;
    bool geometry_streams;
+   bool groups;
+   bool image_atomic_int64;
    bool image_ms_array;
    bool image_read_without_format;
    bool image_write_without_format;
-   bool image_atomic_int64;
-   bool int8;
    bool int16;
-   bool int64;
    bool int64_atomics;
+   bool int64;
+   bool int8;
    bool integer_functions2;
-   bool kernel;
-   bool kernel_image;
    bool kernel_image_read_write;
+   bool kernel_image;
+   bool kernel;
+   bool linkage;
    bool literal_sampler;
    bool mesh_shading_nv;
+   bool mesh_shading;
    bool min_lod;
    bool multiview;
+   bool per_view_attributes_nv;
    bool physical_storage_buffer_address;
    bool post_depth_coverage;
    bool printf;
-   bool ray_tracing;
+   bool quad_control;
+   bool ray_cull_mask;
    bool ray_query;
+   bool ray_tracing;
    bool ray_traversal_primitive_culling;
+   bool ray_tracing_position_fetch;
    bool runtime_descriptor_array;
-   bool float_controls;
    bool shader_clock;
+   bool shader_enqueue;
+   bool shader_sm_builtins_nv;
    bool shader_viewport_index_layer;
+   bool shader_viewport_mask_nv;
    bool sparse_residency;
    bool stencil_export;
-   bool storage_8bit;
    bool storage_16bit;
+   bool storage_8bit;
    bool storage_image_ms;
    bool subgroup_arithmetic;
    bool subgroup_ballot;
    bool subgroup_basic;
+   bool subgroup_dispatch;
    bool subgroup_quad;
+   bool subgroup_rotate;
    bool subgroup_shuffle;
    bool subgroup_uniform_control_flow;
    bool subgroup_vote;
    bool tessellation;
    bool transform_feedback;
    bool variable_pointers;
-   bool vk_memory_model;
    bool vk_memory_model_device_scope;
+   bool vk_memory_model;
    bool workgroup_memory_explicit_layout;
-   bool float16;
-   bool amd_fragment_mask;
-   bool amd_gcn_shader;
-   bool amd_shader_ballot;
-   bool amd_trinary_minmax;
-   bool amd_image_read_write_lod;
-   bool amd_shader_explicit_vertex_parameter;
-   bool amd_image_gather_bias_lod;
 
    bool intel_subgroup_shuffle;
    bool intel_subgroup_buffer_block_io;
@@ -120,9 +138,12 @@ typedef struct shader_info {
    /* Descriptive name provided by the client; may be NULL */
    const char *label;
 
-   /* Shader is internal, and should be ignored by things like NIR_PRINT */
+   /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */
    bool internal;
 
+   /* SHA1 of the original source, used by shader detection in drivers. */
+   uint8_t source_sha1[SHA1_DIGEST_LENGTH];
+
    /** The shader stage, such as MESA_SHADER_VERTEX. */
    gl_shader_stage stage:8;
 
@@ -148,6 +169,8 @@ typedef struct shader_info {
 
    /* Which inputs are actually read */
    uint64_t inputs_read;
+   /* Which inputs occupy 2 slots. */
+   uint64_t dual_slot_inputs;
    /* Which outputs are actually written */
    uint64_t outputs_written;
    /* Which outputs are actually read */
@@ -161,6 +184,9 @@ typedef struct shader_info {
    uint64_t per_primitive_inputs;
    uint64_t per_primitive_outputs;
 
+   /* Which I/O is per-view */
+   uint64_t per_view_outputs;
+
    /* Which 16-bit inputs and outputs are used corresponding to
     * VARYING_SLOT_VARn_16BIT.
     */
@@ -187,20 +213,23 @@ typedef struct shader_info {
    uint64_t patch_outputs_accessed_indirectly;
 
    /** Bitfield of which textures are used */
-   BITSET_DECLARE(textures_used, 32);
+   BITSET_DECLARE(textures_used, 128);
 
    /** Bitfield of which textures are used by texelFetch() */
-   BITSET_DECLARE(textures_used_by_txf, 32);
+   BITSET_DECLARE(textures_used_by_txf, 128);
+
+   /** Bitfield of which samplers are used */
+   BITSET_DECLARE(samplers_used, 32);
 
    /** Bitfield of which images are used */
-   uint32_t images_used;
+   BITSET_DECLARE(images_used, 64);
    /** Bitfield of which images are buffers. */
-   uint32_t image_buffers;
+   BITSET_DECLARE(image_buffers, 64);
    /** Bitfield of which images are MSAA. */
-   uint32_t msaa_images;
+   BITSET_DECLARE(msaa_images, 64);
 
    /* SPV_KHR_float_controls: execution mode for floating point ops */
-   uint16_t float_controls_execution_mode;
+   uint32_t float_controls_execution_mode;
 
    /**
     * Size of shared variables accessed by compute/task/mesh shaders.
@@ -208,10 +237,32 @@ typedef struct shader_info {
    unsigned shared_size;
 
    /**
+    * Size of task payload variables accessed by task/mesh shaders.
+    */
+   unsigned task_payload_size;
+
+   /**
+    * Number of ray tracing queries in the shader (counts all elements of all
+    * variables).
+    */
+   unsigned ray_queries;
+
+   /**
     * Local workgroup size used by compute/task/mesh shaders.
     */
    uint16_t workgroup_size[3];
 
+   enum gl_subgroup_size subgroup_size;
+   uint8_t num_subgroups;
+
+   /**
+    * Uses subgroup intrinsics which can communicate across a quad.
+    */
+   bool uses_wide_subgroup_intrinsics;
+
+   /* Transform feedback buffer strides in dwords, max. 1K - 4. */
+   uint8_t xfb_stride[MAX_XFB_BUFFERS];
+
    uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
    uint8_t num_inlinable_uniforms:4;
 
@@ -224,6 +275,9 @@ typedef struct shader_info {
    /* Whether or not this shader ever uses textureGather() */
    bool uses_texture_gather:1;
 
+   /* Whether texture size, levels, or samples is queried. */
+   bool uses_resource_info_query:1;
+
    /**
     * True if this shader uses the fddx/fddy opcodes.
     *
@@ -231,6 +285,9 @@ typedef struct shader_info {
     */
    bool uses_fddx_fddy:1;
 
+   /** Has divergence analysis ever been run? */
+   bool divergence_analysis_run:1;
+
    /* Bitmask of bit-sizes used with ALU instructions. */
    uint8_t bit_sizes_float;
    uint8_t bit_sizes_int;
@@ -252,6 +309,11 @@ typedef struct shader_info {
     */
    bool io_lowered:1;
 
+   /** Has nir_lower_var_copies called. To avoid calling any
+    * lowering/optimization that would introduce any copy_deref later.
+    */
+   bool var_copies_lowered:1;
+
    /* Whether the shader writes memory, including transform feedback. */
    bool writes_memory:1;
 
@@ -262,6 +324,9 @@ typedef struct shader_info {
    bool uses_control_barrier : 1;
    bool uses_memory_barrier : 1;
 
+   /* Whether ARB_bindless_texture ops or variables are used */
+   bool uses_bindless : 1;
+
    /**
     * Shared memory types have explicit layout set.  Used for
     * SPV_KHR_workgroup_storage_explicit_layout.
@@ -279,9 +344,38 @@ typedef struct shader_info {
    bool workgroup_size_variable:1;
 
    /**
-     * Is this an ARB assembly-style program.
+    * Whether the shader uses printf instructions.
+    */
+   bool uses_printf:1;
+
+   /**
+    * VK_KHR_shader_maximal_reconvergence
+    */
+   bool maximally_reconverges:1;
+
+   /**
+     * Set if this shader uses legacy (DX9 or ARB assembly) math rules.
+     *
+     * From the ARB_fragment_program specification:
+     *
+     *    "The following rules apply to multiplication:
+     *
+     *      1. <x> * <y> == <y> * <x>, for all <x> and <y>.
+     *      2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to
+     *         *representable numbers (IEEE "not a number" and "infinity"
+     *         *encodings may be exceptions).
+     *      3. +1.0 * <x> = <x>, for all <x>.""
+     *
+     * However, in effect this was due to DX9 semantics implying that 0*x=0 even
+     * for inf/nan if the hardware generated them instead of float_min/max.  So,
+     * you should not have an exception for inf/nan to rule 2 above.
+     *
+     * One implementation of this behavior would be to flush all generated NaNs
+     * to zero, at which point 0*Inf=Nan=0.  Most DX9/ARB-asm hardware did not
+     * generate NaNs, and the only way the GPU saw one was to possibly feed it
+     * in as a uniform.
      */
-   bool is_arb_asm;
+   bool use_legacy_math_rules;
 
    union {
       struct {
@@ -295,6 +389,9 @@ typedef struct shader_info {
           */
          uint8_t blit_sgprs_amd:4;
 
+         /* Software TES executing as HW VS */
+         bool tes_agx:1;
+
          /* True if the shader writes position in window space coordinates pre-transform */
          bool window_space_position:1;
 
@@ -303,11 +400,11 @@ typedef struct shader_info {
       } vs;
 
       struct {
-         /** The output primitive type (GL enum value) */
-         uint16_t output_primitive;
+         /** The output primitive type */
+         enum mesa_prim output_primitive;
 
-         /** The input primitive type (GL enum value) */
-         uint16_t input_primitive;
+         /** The input primitive type */
+         enum mesa_prim input_primitive;
 
          /** The maximum number of vertices the geometry shader might write. */
          uint16_t vertices_out;
@@ -329,21 +426,26 @@ typedef struct shader_info {
          bool uses_discard:1;
          bool uses_demote:1;
          bool uses_fbfetch_output:1;
+         bool fbfetch_coherent:1;
          bool color_is_dual_source:1;
 
          /**
-          * True if this fragment shader requires helper invocations.  This
-          * can be caused by the use of ALU derivative ops, texture
-          * instructions which do implicit derivatives, and the use of quad
-          * subgroup operations.
+          * True if this fragment shader requires full quad invocations.
           */
-         bool needs_quad_helper_invocations:1;
+         bool require_full_quads:1;
 
          /**
-          * True if this fragment shader requires helper invocations for
-          * all subgroup operations, not just quad ops and derivatives.
+          * Whether the derivative group must be equivalent to the quad group.
           */
-         bool needs_all_helper_invocations:1;
+         bool quad_derivatives:1;
+
+         /**
+          * True if this fragment shader requires helper invocations.  This
+          * can be caused by the use of ALU derivative ops, texture
+          * instructions which do implicit derivatives, the use of quad
+          * subgroup operations or if the shader requires full quads.
+          */
+         bool needs_quad_helper_invocations:1;
 
          /**
           * Whether any inputs are declared with the "sample" qualifier.
@@ -418,12 +520,19 @@ typedef struct shader_info {
           * shader.
           */
          unsigned advanced_blend_modes;
+
+         /**
+          * Defined by AMD_shader_early_and_late_fragment_tests.
+          */
+         bool early_and_late_fragment_tests:1;
+         enum gl_frag_stencil_layout stencil_front_layout:3;
+         enum gl_frag_stencil_layout stencil_back_layout:3;
       } fs;
 
       struct {
          uint16_t workgroup_size_hint[3];
 
-         uint8_t user_data_components_amd:3;
+         uint8_t user_data_components_amd:4;
 
          /*
           * Arrangement of invocations used to calculate derivatives in a compute
@@ -431,6 +540,17 @@ typedef struct shader_info {
           */
          enum gl_derivative_group derivative_group:2;
 
+         /*
+          * If the shader might run with shared mem on top of `shared_size`.
+          */
+         bool has_variable_shared_mem:1;
+
+         /**
+          * If the shader has any use of a cooperative matrix. From
+          * SPV_KHR_cooperative_matrix.
+          */
+         bool has_cooperative_matrix:1;
+
          /**
           * pointer size is:
           *   AddressingModelLogical:    0    (default)
@@ -439,19 +559,23 @@ typedef struct shader_info {
           */
          unsigned ptr_size;
 
-         /**
-          * Uses subgroup intrinsics which can communicate across a quad.
-          */
-         bool uses_wide_subgroup_intrinsics;
+         /** Index provided by VkPipelineShaderStageNodeCreateInfoAMDX or ShaderIndexAMDX */
+         uint32_t shader_index;
+
+         /** Maximum size required by any output node payload array */
+         uint32_t node_payloads_size;
+
+         /** Static workgroup count for overwriting the enqueued workgroup count. (0 if dynamic) */
+         uint32_t workgroup_count[3];
       } cs;
 
       /* Applies to both TCS and TES. */
       struct {
-         uint16_t primitive_mode; /* GL_TRIANGLES, GL_QUADS or GL_ISOLINES */
+         enum tess_primitive_mode _primitive_mode;
 
          /** The number of vertices in the TCS output patch. */
          uint8_t tcs_vertices_out;
-         enum gl_tess_spacing spacing:2;
+         unsigned spacing:2; /*gl_tess_spacing*/
 
          /** Is the vertex order counterclockwise? */
          bool ccw:1;
@@ -468,11 +592,25 @@ typedef struct shader_info {
          uint64_t tcs_cross_invocation_outputs_read;
       } tess;
 
-      /* Applies to MESH. */
+      /* Applies to MESH and TASK. */
       struct {
+         /* Bit mask of MS outputs that are used
+          * with an index that is NOT the local invocation index.
+          */
+         uint64_t ms_cross_invocation_output_access;
+
+         /* Dimensions of task->mesh dispatch (EmitMeshTasksEXT)
+          * when they are known compile-time constants.
+          * 0 means they are not known.
+          */
+         uint32_t ts_mesh_dispatch_dimensions[3];
+
          uint16_t max_vertices_out;
          uint16_t max_primitives_out;
-         uint16_t primitive_type;  /* GL_POINTS, GL_LINES or GL_TRIANGLES. */
+         enum mesa_prim primitive_type; /* POINTS, LINES or TRIANGLES. */
+
+         /* TODO: remove this when we stop supporting NV_mesh_shader. */
+         bool nv;
       } mesh;
    };
 } shader_info;