diff options
Diffstat (limited to 'src/freedreno/ir3/ir3_compiler.h')
-rw-r--r-- | src/freedreno/ir3/ir3_compiler.h | 156 |
1 files changed, 146 insertions, 10 deletions
diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 0a13f0465df..77d36767deb 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -27,16 +27,56 @@ #ifndef IR3_COMPILER_H_ #define IR3_COMPILER_H_ +#include "compiler/nir/nir.h" #include "util/disk_cache.h" #include "util/log.h" +#include "util/perf/cpu_trace.h" #include "freedreno_dev_info.h" #include "ir3.h" +BEGINC; + struct ir3_ra_reg_set; struct ir3_shader; +struct ir3_compiler_options { + /* If true, UBO/SSBO accesses are assumed to be bounds-checked as defined by + * VK_EXT_robustness2 and optimizations may have to be more conservative. + */ + bool robust_buffer_access2; + + /* If true, promote UBOs (except for constant data) to constants using ldc.k + * in the preamble. The driver should ignore everything in ubo_state except + * for the constant data UBO, which is excluded because the command pushing + * constants for it can be pre-baked when compiling the shader. + */ + bool push_ubo_with_preamble; + + /* If true, disable the shader cache. The driver is then responsible for + * caching. + */ + bool disable_cache; + + /* If >= 0, this specifies the bindless descriptor set + descriptor to use + * for txf_ms_fb + */ + int bindless_fb_read_descriptor; + int bindless_fb_read_slot; + + /* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */ + bool storage_16bit; + + /* If base_vertex should be lowered in nir */ + bool lower_base_vertex; + + bool shared_push_consts; + + /* "dual_color_blend_by_location" workaround is enabled: */ + bool dual_color_blend_by_location; +}; + struct ir3_compiler { struct fd_device *dev; const struct fd_dev_id *dev_id; @@ -45,16 +85,21 @@ struct ir3_compiler { struct disk_cache *disk_cache; - /* If true, UBO accesses are assumed to be bounds-checked as defined by - * VK_EXT_robustness2 and optimizations may have to be more conservative. + struct nir_shader_compiler_options nir_options; + + /* + * Configuration options for things handled differently by turnip vs + * gallium */ - bool robust_ubo_access; + struct ir3_compiler_options options; /* * Configuration options for things that are handled differently on * different generations: */ + bool is_64bit; + /* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate * so we need to use ldlv.u32 to load the varying directly: */ @@ -151,37 +196,118 @@ struct ir3_compiler { /* The number of total branch stack entries, divided by wave_granularity. */ uint32_t branchstack_size; + /* The byte increment of MEMSIZEPERITEM, the private memory per-fiber allocation. */ + uint32_t pvtmem_per_fiber_align; + /* Whether clip+cull distances are supported */ bool has_clip_cull; /* Whether private memory is supported */ bool has_pvtmem; + /* Whether SSBOs have descriptors for sampling with ISAM */ + bool has_isam_ssbo; + /* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */ bool storage_16bit; + + /* True if getfiberid, getlast.w8, brcst.active, and quad_shuffle + * instructions are supported which are necessary to support + * subgroup quad and arithmetic operations. + */ + bool has_getfiberid; + + /* Number of available predicate registers (p0.c) */ + uint32_t num_predicates; + + /* True if bitops (and.b, or.b, xor.b, not.b) can write to p0.c */ + bool bitops_can_write_predicates; + + /* True if braa/brao are available. */ + bool has_branch_and_or; + + /* True if predt/predf/prede are supported. */ + bool has_predication; + + /* MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB */ + uint32_t max_variable_workgroup_size; + + bool has_dp2acc; + bool has_dp4acc; + + /* Type to use for 1b nir bools: */ + type_t bool_type; + + /* Whether compute invocation params are passed in via shared regfile or + * constbuf. a5xx+ has the shared regfile. + */ + bool has_shared_regfile; + + /* True if preamble instructions (shps, shpe, etc.) are supported */ + bool has_preamble; + + /* Where the shared consts start in constants file, in vec4's. */ + uint16_t shared_consts_base_offset; + + /* The size of shared consts for CS and FS(in vec4's). + * Also the size that is actually used on geometry stages (on a6xx). + */ + uint64_t shared_consts_size; + + /* Found on a6xx for geometry stages, that is different from + * actually used shared consts. + * + * TODO: Keep an eye on this for next gens. + */ + uint64_t geom_shared_consts_size_quirk; + + bool has_fs_tex_prefetch; + + bool stsc_duplication_quirk; + + bool load_shader_consts_via_preamble; + bool load_inline_uniforms_via_preamble_ldgk; + + /* True if there is a scalar ALU capable of executing a subset of + * cat2-cat4 instructions with a shared register destination. This also + * implies expanded MOV/COV capability when writing to shared registers, + * as MOV/COV is now executed on the scalar ALU except when reading from a + * normal register, as well as the ability for ldc to write to a shared + * register. + */ + bool has_scalar_alu; }; void ir3_compiler_destroy(struct ir3_compiler *compiler); struct ir3_compiler *ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, - bool robust_ubo_access); + const struct fd_dev_info *dev_info, + const struct ir3_compiler_options *options); void ir3_disk_cache_init(struct ir3_compiler *compiler); void ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler, struct ir3_shader *shader); -bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler, +struct ir3_shader_variant *ir3_retrieve_variant(struct blob_reader *blob, + struct ir3_compiler *compiler, + void *mem_ctx); +void ir3_store_variant(struct blob *blob, const struct ir3_shader_variant *v); +bool ir3_disk_cache_retrieve(struct ir3_shader *shader, struct ir3_shader_variant *v); -void ir3_disk_cache_store(struct ir3_compiler *compiler, +void ir3_disk_cache_store(struct ir3_shader *shader, struct ir3_shader_variant *v); +const nir_shader_compiler_options * +ir3_get_compiler_options(struct ir3_compiler *compiler); + int ir3_compile_shader_nir(struct ir3_compiler *compiler, + struct ir3_shader *shader, struct ir3_shader_variant *so); /* gpu pointer size in units of 32bit registers/slots */ static inline unsigned ir3_pointer_size(struct ir3_compiler *compiler) { - return fd_dev_64b(compiler->dev_id) ? 2 : 1; + return compiler->is_64bit ? 2 : 1; } enum ir3_shader_debug { @@ -198,8 +324,12 @@ enum ir3_shader_debug { IR3_DBG_NOFP16 = BITFIELD_BIT(10), IR3_DBG_NOCACHE = BITFIELD_BIT(11), IR3_DBG_SPILLALL = BITFIELD_BIT(12), + IR3_DBG_NOPREAMBLE = BITFIELD_BIT(13), + IR3_DBG_SHADER_INTERNAL = BITFIELD_BIT(14), + IR3_DBG_FULLSYNC = BITFIELD_BIT(15), + IR3_DBG_FULLNOP = BITFIELD_BIT(16), - /* DEBUG-only options: */ + /* MESA_DEBUG-only options: */ IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20), IR3_DBG_RAMSGS = BITFIELD_BIT(21), @@ -211,8 +341,11 @@ extern enum ir3_shader_debug ir3_shader_debug; extern const char *ir3_shader_override_path; static inline bool -shader_debug_enabled(gl_shader_stage type) +shader_debug_enabled(gl_shader_stage type, bool internal) { + if (internal) + return !!(ir3_shader_debug & IR3_DBG_SHADER_INTERNAL); + if (ir3_shader_debug & IR3_DBG_DISASM) return true; @@ -228,9 +361,10 @@ shader_debug_enabled(gl_shader_stage type) case MESA_SHADER_FRAGMENT: return !!(ir3_shader_debug & IR3_DBG_SHADER_FS); case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: return !!(ir3_shader_debug & IR3_DBG_SHADER_CS); default: - debug_assert(0); + assert(0); return false; } } @@ -244,4 +378,6 @@ ir3_debug_print(struct ir3 *ir, const char *when) } } +ENDC; + #endif /* IR3_COMPILER_H_ */ |