summaryrefslogtreecommitdiff
path: root/src/freedreno/ir3/ir3_compiler.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/freedreno/ir3/ir3_compiler.h')
-rw-r--r--src/freedreno/ir3/ir3_compiler.h156
1 files changed, 146 insertions, 10 deletions
diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h
index 0a13f0465df..77d36767deb 100644
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@@ -27,16 +27,56 @@
#ifndef IR3_COMPILER_H_
#define IR3_COMPILER_H_
+#include "compiler/nir/nir.h"
#include "util/disk_cache.h"
#include "util/log.h"
+#include "util/perf/cpu_trace.h"
#include "freedreno_dev_info.h"
#include "ir3.h"
+BEGINC;
+
struct ir3_ra_reg_set;
struct ir3_shader;
+struct ir3_compiler_options {
+ /* If true, UBO/SSBO accesses are assumed to be bounds-checked as defined by
+ * VK_EXT_robustness2 and optimizations may have to be more conservative.
+ */
+ bool robust_buffer_access2;
+
+ /* If true, promote UBOs (except for constant data) to constants using ldc.k
+ * in the preamble. The driver should ignore everything in ubo_state except
+ * for the constant data UBO, which is excluded because the command pushing
+ * constants for it can be pre-baked when compiling the shader.
+ */
+ bool push_ubo_with_preamble;
+
+ /* If true, disable the shader cache. The driver is then responsible for
+ * caching.
+ */
+ bool disable_cache;
+
+ /* If >= 0, this specifies the bindless descriptor set + descriptor to use
+ * for txf_ms_fb
+ */
+ int bindless_fb_read_descriptor;
+ int bindless_fb_read_slot;
+
+ /* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */
+ bool storage_16bit;
+
+ /* If base_vertex should be lowered in nir */
+ bool lower_base_vertex;
+
+ bool shared_push_consts;
+
+ /* "dual_color_blend_by_location" workaround is enabled: */
+ bool dual_color_blend_by_location;
+};
+
struct ir3_compiler {
struct fd_device *dev;
const struct fd_dev_id *dev_id;
@@ -45,16 +85,21 @@ struct ir3_compiler {
struct disk_cache *disk_cache;
- /* If true, UBO accesses are assumed to be bounds-checked as defined by
- * VK_EXT_robustness2 and optimizations may have to be more conservative.
+ struct nir_shader_compiler_options nir_options;
+
+ /*
+ * Configuration options for things handled differently by turnip vs
+ * gallium
*/
- bool robust_ubo_access;
+ struct ir3_compiler_options options;
/*
* Configuration options for things that are handled differently on
* different generations:
*/
+ bool is_64bit;
+
/* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate
* so we need to use ldlv.u32 to load the varying directly:
*/
@@ -151,37 +196,118 @@ struct ir3_compiler {
/* The number of total branch stack entries, divided by wave_granularity. */
uint32_t branchstack_size;
+ /* The byte increment of MEMSIZEPERITEM, the private memory per-fiber allocation. */
+ uint32_t pvtmem_per_fiber_align;
+
/* Whether clip+cull distances are supported */
bool has_clip_cull;
/* Whether private memory is supported */
bool has_pvtmem;
+ /* Whether SSBOs have descriptors for sampling with ISAM */
+ bool has_isam_ssbo;
+
/* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */
bool storage_16bit;
+
+ /* True if getfiberid, getlast.w8, brcst.active, and quad_shuffle
+ * instructions are supported which are necessary to support
+ * subgroup quad and arithmetic operations.
+ */
+ bool has_getfiberid;
+
+ /* Number of available predicate registers (p0.c) */
+ uint32_t num_predicates;
+
+ /* True if bitops (and.b, or.b, xor.b, not.b) can write to p0.c */
+ bool bitops_can_write_predicates;
+
+ /* True if braa/brao are available. */
+ bool has_branch_and_or;
+
+ /* True if predt/predf/prede are supported. */
+ bool has_predication;
+
+ /* MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB */
+ uint32_t max_variable_workgroup_size;
+
+ bool has_dp2acc;
+ bool has_dp4acc;
+
+ /* Type to use for 1b nir bools: */
+ type_t bool_type;
+
+ /* Whether compute invocation params are passed in via shared regfile or
+ * constbuf. a5xx+ has the shared regfile.
+ */
+ bool has_shared_regfile;
+
+ /* True if preamble instructions (shps, shpe, etc.) are supported */
+ bool has_preamble;
+
+ /* Where the shared consts start in constants file, in vec4's. */
+ uint16_t shared_consts_base_offset;
+
+ /* The size of shared consts for CS and FS(in vec4's).
+ * Also the size that is actually used on geometry stages (on a6xx).
+ */
+ uint64_t shared_consts_size;
+
+ /* Found on a6xx for geometry stages, that is different from
+ * actually used shared consts.
+ *
+ * TODO: Keep an eye on this for next gens.
+ */
+ uint64_t geom_shared_consts_size_quirk;
+
+ bool has_fs_tex_prefetch;
+
+ bool stsc_duplication_quirk;
+
+ bool load_shader_consts_via_preamble;
+ bool load_inline_uniforms_via_preamble_ldgk;
+
+ /* True if there is a scalar ALU capable of executing a subset of
+ * cat2-cat4 instructions with a shared register destination. This also
+ * implies expanded MOV/COV capability when writing to shared registers,
+ * as MOV/COV is now executed on the scalar ALU except when reading from a
+ * normal register, as well as the ability for ldc to write to a shared
+ * register.
+ */
+ bool has_scalar_alu;
};
void ir3_compiler_destroy(struct ir3_compiler *compiler);
struct ir3_compiler *ir3_compiler_create(struct fd_device *dev,
const struct fd_dev_id *dev_id,
- bool robust_ubo_access);
+ const struct fd_dev_info *dev_info,
+ const struct ir3_compiler_options *options);
void ir3_disk_cache_init(struct ir3_compiler *compiler);
void ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
struct ir3_shader *shader);
-bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler,
+struct ir3_shader_variant *ir3_retrieve_variant(struct blob_reader *blob,
+ struct ir3_compiler *compiler,
+ void *mem_ctx);
+void ir3_store_variant(struct blob *blob, const struct ir3_shader_variant *v);
+bool ir3_disk_cache_retrieve(struct ir3_shader *shader,
struct ir3_shader_variant *v);
-void ir3_disk_cache_store(struct ir3_compiler *compiler,
+void ir3_disk_cache_store(struct ir3_shader *shader,
struct ir3_shader_variant *v);
+const nir_shader_compiler_options *
+ir3_get_compiler_options(struct ir3_compiler *compiler);
+
int ir3_compile_shader_nir(struct ir3_compiler *compiler,
+ struct ir3_shader *shader,
struct ir3_shader_variant *so);
/* gpu pointer size in units of 32bit registers/slots */
static inline unsigned
ir3_pointer_size(struct ir3_compiler *compiler)
{
- return fd_dev_64b(compiler->dev_id) ? 2 : 1;
+ return compiler->is_64bit ? 2 : 1;
}
enum ir3_shader_debug {
@@ -198,8 +324,12 @@ enum ir3_shader_debug {
IR3_DBG_NOFP16 = BITFIELD_BIT(10),
IR3_DBG_NOCACHE = BITFIELD_BIT(11),
IR3_DBG_SPILLALL = BITFIELD_BIT(12),
+ IR3_DBG_NOPREAMBLE = BITFIELD_BIT(13),
+ IR3_DBG_SHADER_INTERNAL = BITFIELD_BIT(14),
+ IR3_DBG_FULLSYNC = BITFIELD_BIT(15),
+ IR3_DBG_FULLNOP = BITFIELD_BIT(16),
- /* DEBUG-only options: */
+ /* MESA_DEBUG-only options: */
IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20),
IR3_DBG_RAMSGS = BITFIELD_BIT(21),
@@ -211,8 +341,11 @@ extern enum ir3_shader_debug ir3_shader_debug;
extern const char *ir3_shader_override_path;
static inline bool
-shader_debug_enabled(gl_shader_stage type)
+shader_debug_enabled(gl_shader_stage type, bool internal)
{
+ if (internal)
+ return !!(ir3_shader_debug & IR3_DBG_SHADER_INTERNAL);
+
if (ir3_shader_debug & IR3_DBG_DISASM)
return true;
@@ -228,9 +361,10 @@ shader_debug_enabled(gl_shader_stage type)
case MESA_SHADER_FRAGMENT:
return !!(ir3_shader_debug & IR3_DBG_SHADER_FS);
case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_KERNEL:
return !!(ir3_shader_debug & IR3_DBG_SHADER_CS);
default:
- debug_assert(0);
+ assert(0);
return false;
}
}
@@ -244,4 +378,6 @@ ir3_debug_print(struct ir3 *ir, const char *when)
}
}
+ENDC;
+
#endif /* IR3_COMPILER_H_ */