summaryrefslogtreecommitdiff
path: root/src/compiler/shader_info.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/compiler/shader_info.h')
-rw-r--r--src/compiler/shader_info.h236
1 files changed, 187 insertions, 49 deletions
diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index 91421db9b74..92d65f92133 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -26,6 +26,7 @@
#define SHADER_INFO_H
#include "util/bitset.h"
+#include "util/sha1/sha1.h"
#include "shader_enums.h"
#include <stdint.h>
@@ -33,11 +34,20 @@
extern "C" {
#endif
+#define MAX_XFB_BUFFERS 4
#define MAX_INLINABLE_UNIFORMS 4
struct spirv_supported_capabilities {
bool address;
+ bool amd_fragment_mask;
+ bool amd_gcn_shader;
+ bool amd_image_gather_bias_lod;
+ bool amd_image_read_write_lod;
+ bool amd_shader_ballot;
+ bool amd_shader_explicit_vertex_parameter;
+ bool amd_trinary_minmax;
bool atomic_storage;
+ bool cooperative_matrix;
bool demote_to_helper_invocation;
bool derivative_group;
bool descriptor_array_dynamic_indexing;
@@ -45,70 +55,78 @@ struct spirv_supported_capabilities {
bool descriptor_indexing;
bool device_group;
bool draw_parameters;
+ bool float_controls;
bool float16_atomic_add;
bool float16_atomic_min_max;
+ bool float16;
bool float32_atomic_add;
bool float32_atomic_min_max;
- bool float64;
bool float64_atomic_add;
bool float64_atomic_min_max;
- bool fragment_shader_sample_interlock;
+ bool float64;
+ bool fragment_barycentric;
+ bool fragment_density;
+ bool fragment_fully_covered;
bool fragment_shader_pixel_interlock;
+ bool fragment_shader_sample_interlock;
bool fragment_shading_rate;
bool generic_pointers;
bool geometry_streams;
+ bool groups;
+ bool image_atomic_int64;
bool image_ms_array;
bool image_read_without_format;
bool image_write_without_format;
- bool image_atomic_int64;
- bool int8;
bool int16;
- bool int64;
bool int64_atomics;
+ bool int64;
+ bool int8;
bool integer_functions2;
- bool kernel;
- bool kernel_image;
bool kernel_image_read_write;
+ bool kernel_image;
+ bool kernel;
+ bool linkage;
bool literal_sampler;
bool mesh_shading_nv;
+ bool mesh_shading;
bool min_lod;
bool multiview;
+ bool per_view_attributes_nv;
bool physical_storage_buffer_address;
bool post_depth_coverage;
bool printf;
- bool ray_tracing;
+ bool quad_control;
+ bool ray_cull_mask;
bool ray_query;
+ bool ray_tracing;
bool ray_traversal_primitive_culling;
+ bool ray_tracing_position_fetch;
bool runtime_descriptor_array;
- bool float_controls;
bool shader_clock;
+ bool shader_enqueue;
+ bool shader_sm_builtins_nv;
bool shader_viewport_index_layer;
+ bool shader_viewport_mask_nv;
bool sparse_residency;
bool stencil_export;
- bool storage_8bit;
bool storage_16bit;
+ bool storage_8bit;
bool storage_image_ms;
bool subgroup_arithmetic;
bool subgroup_ballot;
bool subgroup_basic;
+ bool subgroup_dispatch;
bool subgroup_quad;
+ bool subgroup_rotate;
bool subgroup_shuffle;
bool subgroup_uniform_control_flow;
bool subgroup_vote;
bool tessellation;
bool transform_feedback;
bool variable_pointers;
- bool vk_memory_model;
bool vk_memory_model_device_scope;
+ bool vk_memory_model;
bool workgroup_memory_explicit_layout;
- bool float16;
- bool amd_fragment_mask;
- bool amd_gcn_shader;
- bool amd_shader_ballot;
- bool amd_trinary_minmax;
- bool amd_image_read_write_lod;
- bool amd_shader_explicit_vertex_parameter;
- bool amd_image_gather_bias_lod;
bool intel_subgroup_shuffle;
bool intel_subgroup_buffer_block_io;
@@ -120,9 +138,12 @@ typedef struct shader_info {
/* Descriptive name provided by the client; may be NULL */
const char *label;
- /* Shader is internal, and should be ignored by things like NIR_PRINT */
+ /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */
bool internal;
+ /* SHA1 of the original source, used by shader detection in drivers. */
+ uint8_t source_sha1[SHA1_DIGEST_LENGTH];
+
/** The shader stage, such as MESA_SHADER_VERTEX. */
gl_shader_stage stage:8;
@@ -148,6 +169,8 @@ typedef struct shader_info {
/* Which inputs are actually read */
uint64_t inputs_read;
+ /* Which inputs occupy 2 slots. */
+ uint64_t dual_slot_inputs;
/* Which outputs are actually written */
uint64_t outputs_written;
/* Which outputs are actually read */
@@ -161,6 +184,9 @@ typedef struct shader_info {
uint64_t per_primitive_inputs;
uint64_t per_primitive_outputs;
+ /* Which I/O is per-view */
+ uint64_t per_view_outputs;
+
/* Which 16-bit inputs and outputs are used corresponding to
* VARYING_SLOT_VARn_16BIT.
*/
@@ -187,20 +213,23 @@ typedef struct shader_info {
uint64_t patch_outputs_accessed_indirectly;
/** Bitfield of which textures are used */
- BITSET_DECLARE(textures_used, 32);
+ BITSET_DECLARE(textures_used, 128);
/** Bitfield of which textures are used by texelFetch() */
- BITSET_DECLARE(textures_used_by_txf, 32);
+ BITSET_DECLARE(textures_used_by_txf, 128);
+
+ /** Bitfield of which samplers are used */
+ BITSET_DECLARE(samplers_used, 32);
/** Bitfield of which images are used */
- uint32_t images_used;
+ BITSET_DECLARE(images_used, 64);
/** Bitfield of which images are buffers. */
- uint32_t image_buffers;
+ BITSET_DECLARE(image_buffers, 64);
/** Bitfield of which images are MSAA. */
- uint32_t msaa_images;
+ BITSET_DECLARE(msaa_images, 64);
/* SPV_KHR_float_controls: execution mode for floating point ops */
- uint16_t float_controls_execution_mode;
+ uint32_t float_controls_execution_mode;
/**
* Size of shared variables accessed by compute/task/mesh shaders.
@@ -208,10 +237,32 @@ typedef struct shader_info {
unsigned shared_size;
/**
+ * Size of task payload variables accessed by task/mesh shaders.
+ */
+ unsigned task_payload_size;
+
+ /**
+ * Number of ray tracing queries in the shader (counts all elements of all
+ * variables).
+ */
+ unsigned ray_queries;
+
+ /**
* Local workgroup size used by compute/task/mesh shaders.
*/
uint16_t workgroup_size[3];
+ enum gl_subgroup_size subgroup_size;
+ uint8_t num_subgroups;
+
+ /**
+ * Uses subgroup intrinsics which can communicate across a quad.
+ */
+ bool uses_wide_subgroup_intrinsics;
+
+ /* Transform feedback buffer strides in dwords, max. 1K - 4. */
+ uint8_t xfb_stride[MAX_XFB_BUFFERS];
+
uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
uint8_t num_inlinable_uniforms:4;
@@ -224,6 +275,9 @@ typedef struct shader_info {
/* Whether or not this shader ever uses textureGather() */
bool uses_texture_gather:1;
+ /* Whether texture size, levels, or samples is queried. */
+ bool uses_resource_info_query:1;
+
/**
* True if this shader uses the fddx/fddy opcodes.
*
@@ -231,6 +285,9 @@ typedef struct shader_info {
*/
bool uses_fddx_fddy:1;
+ /** Has divergence analysis ever been run? */
+ bool divergence_analysis_run:1;
+
/* Bitmask of bit-sizes used with ALU instructions. */
uint8_t bit_sizes_float;
uint8_t bit_sizes_int;
@@ -252,6 +309,11 @@ typedef struct shader_info {
*/
bool io_lowered:1;
+ /** Has nir_lower_var_copies called. To avoid calling any
+ * lowering/optimization that would introduce any copy_deref later.
+ */
+ bool var_copies_lowered:1;
+
/* Whether the shader writes memory, including transform feedback. */
bool writes_memory:1;
@@ -262,6 +324,9 @@ typedef struct shader_info {
bool uses_control_barrier : 1;
bool uses_memory_barrier : 1;
+ /* Whether ARB_bindless_texture ops or variables are used */
+ bool uses_bindless : 1;
+
/**
* Shared memory types have explicit layout set. Used for
* SPV_KHR_workgroup_storage_explicit_layout.
@@ -279,9 +344,38 @@ typedef struct shader_info {
bool workgroup_size_variable:1;
/**
- * Is this an ARB assembly-style program.
+ * Whether the shader uses printf instructions.
+ */
+ bool uses_printf:1;
+
+ /**
+ * VK_KHR_shader_maximal_reconvergence
+ */
+ bool maximally_reconverges:1;
+
+ /**
+ * Set if this shader uses legacy (DX9 or ARB assembly) math rules.
+ *
+ * From the ARB_fragment_program specification:
+ *
+ * "The following rules apply to multiplication:
+ *
+ * 1. <x> * <y> == <y> * <x>, for all <x> and <y>.
+ * 2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to
+ * *representable numbers (IEEE "not a number" and "infinity"
+ * *encodings may be exceptions).
+ * 3. +1.0 * <x> = <x>, for all <x>.""
+ *
+ * However, in effect this was due to DX9 semantics implying that 0*x=0 even
+ * for inf/nan if the hardware generated them instead of float_min/max. So,
+ * you should not have an exception for inf/nan to rule 2 above.
+ *
+ * One implementation of this behavior would be to flush all generated NaNs
+ * to zero, at which point 0*Inf=Nan=0. Most DX9/ARB-asm hardware did not
+ * generate NaNs, and the only way the GPU saw one was to possibly feed it
+ * in as a uniform.
*/
- bool is_arb_asm;
+ bool use_legacy_math_rules;
union {
struct {
@@ -295,6 +389,9 @@ typedef struct shader_info {
*/
uint8_t blit_sgprs_amd:4;
+ /* Software TES executing as HW VS */
+ bool tes_agx:1;
+
/* True if the shader writes position in window space coordinates pre-transform */
bool window_space_position:1;
@@ -303,11 +400,11 @@ typedef struct shader_info {
} vs;
struct {
- /** The output primitive type (GL enum value) */
- uint16_t output_primitive;
+ /** The output primitive type */
+ enum mesa_prim output_primitive;
- /** The input primitive type (GL enum value) */
- uint16_t input_primitive;
+ /** The input primitive type */
+ enum mesa_prim input_primitive;
/** The maximum number of vertices the geometry shader might write. */
uint16_t vertices_out;
@@ -329,21 +426,26 @@ typedef struct shader_info {
bool uses_discard:1;
bool uses_demote:1;
bool uses_fbfetch_output:1;
+ bool fbfetch_coherent:1;
bool color_is_dual_source:1;
/**
- * True if this fragment shader requires helper invocations. This
- * can be caused by the use of ALU derivative ops, texture
- * instructions which do implicit derivatives, and the use of quad
- * subgroup operations.
+ * True if this fragment shader requires full quad invocations.
*/
- bool needs_quad_helper_invocations:1;
+ bool require_full_quads:1;
/**
- * True if this fragment shader requires helper invocations for
- * all subgroup operations, not just quad ops and derivatives.
+ * Whether the derivative group must be equivalent to the quad group.
*/
- bool needs_all_helper_invocations:1;
+ bool quad_derivatives:1;
+
+ /**
+ * True if this fragment shader requires helper invocations. This
+ * can be caused by the use of ALU derivative ops, texture
+ * instructions which do implicit derivatives, the use of quad
+ * subgroup operations or if the shader requires full quads.
+ */
+ bool needs_quad_helper_invocations:1;
/**
* Whether any inputs are declared with the "sample" qualifier.
@@ -418,12 +520,19 @@ typedef struct shader_info {
* shader.
*/
unsigned advanced_blend_modes;
+
+ /**
+ * Defined by AMD_shader_early_and_late_fragment_tests.
+ */
+ bool early_and_late_fragment_tests:1;
+ enum gl_frag_stencil_layout stencil_front_layout:3;
+ enum gl_frag_stencil_layout stencil_back_layout:3;
} fs;
struct {
uint16_t workgroup_size_hint[3];
- uint8_t user_data_components_amd:3;
+ uint8_t user_data_components_amd:4;
/*
* Arrangement of invocations used to calculate derivatives in a compute
@@ -431,6 +540,17 @@ typedef struct shader_info {
*/
enum gl_derivative_group derivative_group:2;
+ /*
+ * If the shader might run with shared mem on top of `shared_size`.
+ */
+ bool has_variable_shared_mem:1;
+
+ /**
+ * If the shader has any use of a cooperative matrix. From
+ * SPV_KHR_cooperative_matrix.
+ */
+ bool has_cooperative_matrix:1;
+
/**
* pointer size is:
* AddressingModelLogical: 0 (default)
@@ -439,19 +559,23 @@ typedef struct shader_info {
*/
unsigned ptr_size;
- /**
- * Uses subgroup intrinsics which can communicate across a quad.
- */
- bool uses_wide_subgroup_intrinsics;
+ /** Index provided by VkPipelineShaderStageNodeCreateInfoAMDX or ShaderIndexAMDX */
+ uint32_t shader_index;
+
+ /** Maximum size required by any output node payload array */
+ uint32_t node_payloads_size;
+
+ /** Static workgroup count for overwriting the enqueued workgroup count. (0 if dynamic) */
+ uint32_t workgroup_count[3];
} cs;
/* Applies to both TCS and TES. */
struct {
- uint16_t primitive_mode; /* GL_TRIANGLES, GL_QUADS or GL_ISOLINES */
+ enum tess_primitive_mode _primitive_mode;
/** The number of vertices in the TCS output patch. */
uint8_t tcs_vertices_out;
- enum gl_tess_spacing spacing:2;
+ unsigned spacing:2; /*gl_tess_spacing*/
/** Is the vertex order counterclockwise? */
bool ccw:1;
@@ -468,11 +592,25 @@ typedef struct shader_info {
uint64_t tcs_cross_invocation_outputs_read;
} tess;
- /* Applies to MESH. */
+ /* Applies to MESH and TASK. */
struct {
+ /* Bit mask of MS outputs that are used
+ * with an index that is NOT the local invocation index.
+ */
+ uint64_t ms_cross_invocation_output_access;
+
+ /* Dimensions of task->mesh dispatch (EmitMeshTasksEXT)
+ * when they are known compile-time constants.
+ * 0 means they are not known.
+ */
+ uint32_t ts_mesh_dispatch_dimensions[3];
+
uint16_t max_vertices_out;
uint16_t max_primitives_out;
- uint16_t primitive_type; /* GL_POINTS, GL_LINES or GL_TRIANGLES. */
+ enum mesa_prim primitive_type; /* POINTS, LINES or TRIANGLES. */
+
+ /* TODO: remove this when we stop supporting NV_mesh_shader. */
+ bool nv;
} mesh;
};
} shader_info;