summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/freedreno/a2xx/ir2_private.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/freedreno/a2xx/ir2_private.h')
-rw-r--r--src/gallium/drivers/freedreno/a2xx/ir2_private.h392
1 files changed, 392 insertions, 0 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_private.h b/src/gallium/drivers/freedreno/a2xx/ir2_private.h
new file mode 100644
index 00000000000..d1fbacd908f
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_private.h
@@ -0,0 +1,392 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Jonathan Marek <jonathan@marek.ca>
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "ir2.h"
+#include "fd2_program.h"
+#include "instr-a2xx.h"
+
+enum ir2_src_type {
+ IR2_SRC_SSA,
+ IR2_SRC_REG,
+ IR2_SRC_INPUT,
+ IR2_SRC_CONST,
+};
+
+struct ir2_src {
+ /* num can mean different things
+ * ssa: index of instruction
+ * reg: index in ctx->reg array
+ * input: index in ctx->input array
+ * const: constant index (C0, C1, etc)
+ */
+ uint16_t num;
+ uint8_t swizzle;
+ enum ir2_src_type type : 2;
+ uint8_t abs : 1;
+ uint8_t negate : 1;
+ uint8_t : 4;
+};
+
+struct ir2_reg_component {
+ uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
+ bool alloc : 1; /* is it currently allocated */
+ uint8_t ref_count; /* for ra */
+};
+
+struct ir2_reg {
+ uint8_t idx; /* assigned hardware register */
+ uint8_t ncomp;
+
+ uint8_t loop_depth;
+ bool initialized;
+ /* block_idx to free on (-1 = free on ref_count==0) */
+ int block_idx_free;
+ struct ir2_reg_component comp[4];
+};
+
+struct ir2_instr {
+ unsigned idx;
+
+ unsigned block_idx;
+
+ enum {
+ IR2_NONE,
+ IR2_FETCH,
+ IR2_ALU,
+ IR2_CF,
+ } type : 2;
+
+ /* instruction needs to be emitted (for scheduling) */
+ bool need_emit : 1;
+
+ /* predicate value - (usually) same for entire block */
+ uint8_t pred : 2;
+
+ /* src */
+ uint8_t src_count;
+ struct ir2_src src[4];
+
+ /* dst */
+ bool is_ssa;
+ union {
+ struct ir2_reg ssa;
+ struct ir2_reg *reg;
+ };
+
+ /* type-specific */
+ union {
+ struct {
+ instr_fetch_opc_t opc : 5;
+ union {
+ struct {
+ uint8_t const_idx;
+ uint8_t const_idx_sel;
+ } vtx;
+ struct {
+ bool is_cube : 1;
+ bool is_rect : 1;
+ uint8_t samp_id;
+ } tex;
+ };
+ } fetch;
+ struct {
+ /* store possible opcs, then we can choose vector/scalar instr */
+ instr_scalar_opc_t scalar_opc : 6;
+ instr_vector_opc_t vector_opc : 5;
+ /* same as nir */
+ uint8_t write_mask : 4;
+ bool saturate : 1;
+
+ /* export idx (-1 no export) */
+ int8_t export;
+
+ /* for scalarized 2 src instruction */
+ uint8_t src1_swizzle;
+ } alu;
+ struct {
+ /* jmp dst block_idx */
+ uint8_t block_idx;
+ } cf;
+ };
+};
+
+struct ir2_sched_instr {
+ uint32_t reg_state[8];
+ struct ir2_instr *instr, *instr_s;
+};
+
+struct ir2_context {
+ struct fd2_shader_stateobj *so;
+
+ unsigned block_idx, pred_idx;
+ uint8_t pred;
+ bool block_has_jump[64];
+
+ unsigned loop_last_block[64];
+ unsigned loop_depth;
+
+ nir_shader *nir;
+
+ /* ssa index of position output */
+ struct ir2_src position;
+
+ /* to translate SSA ids to instruction ids */
+ int16_t ssa_map[1024];
+
+ struct ir2_shader_info *info;
+ struct ir2_frag_linkage *f;
+
+ int prev_export;
+
+ /* RA state */
+ struct ir2_reg* live_regs[64];
+ uint32_t reg_state[256/32]; /* 64*4 bits */
+
+ /* inputs */
+ struct ir2_reg input[16 + 1]; /* 16 + param */
+
+ /* non-ssa regs */
+ struct ir2_reg reg[64];
+ unsigned reg_count;
+
+ struct ir2_instr instr[0x300];
+ unsigned instr_count;
+
+ struct ir2_sched_instr instr_sched[0x180];
+ unsigned instr_sched_count;
+};
+
+void assemble(struct ir2_context *ctx, bool binning);
+
+void ir2_nir_compile(struct ir2_context *ctx, bool binning);
+
+void ra_count_refs(struct ir2_context *ctx);
+void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
+ bool export, uint8_t export_writemask);
+void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
+void ra_block_free(struct ir2_context *ctx, unsigned block);
+
+/* utils */
+enum {
+ IR2_SWIZZLE_Y = 1 << 0,
+ IR2_SWIZZLE_Z = 2 << 0,
+ IR2_SWIZZLE_W = 3 << 0,
+
+ IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
+
+ IR2_SWIZZLE_XYW = 0 << 0 | 0 << 2 | 1 << 4,
+
+ IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
+ IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
+ IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
+ IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
+ IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
+ IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
+ IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
+ IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
+};
+
+#define compile_error(ctx, args...) ({ \
+ printf(args); \
+ assert(0); \
+})
+
+static inline struct ir2_src
+ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
+{
+ return (struct ir2_src) {
+ .num = num,
+ .swizzle = swizzle,
+ .type = type
+ };
+}
+
+/* ir2_assemble uses it .. */
+struct ir2_src ir2_zero(struct ir2_context *ctx);
+
+#define ir2_foreach_instr(it, ctx) \
+ for (struct ir2_instr *it = (ctx)->instr; ({ \
+ while (it != &(ctx)->instr[(ctx)->instr_count] && it->type == IR2_NONE) it++; \
+ it != &(ctx)->instr[(ctx)->instr_count]; }); it++)
+
+#define ir2_foreach_live_reg(it, ctx) \
+ for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
+ while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) __ptr++; \
+ __ptr != &(ctx)->live_regs[64] ? (it=*__ptr) : NULL; }); it++)
+
+#define ir2_foreach_avail(it) \
+ for (struct ir2_instr **__instrp = avail, *it; \
+ it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
+
+#define ir2_foreach_src(it, instr) \
+ for (struct ir2_src *it = instr->src; \
+ it != &instr->src[instr->src_count]; it++)
+
+/* mask for register allocation
+ * 64 registers with 4 components each = 256 bits
+ */
+/* typedef struct {
+ uint64_t data[4];
+} regmask_t; */
+
+static inline bool mask_isset(uint32_t * mask, unsigned num)
+{
+ return ! !(mask[num / 32] & 1 << num % 32);
+}
+
+static inline void mask_set(uint32_t * mask, unsigned num)
+{
+ mask[num / 32] |= 1 << num % 32;
+}
+
+static inline void mask_unset(uint32_t * mask, unsigned num)
+{
+ mask[num / 32] &= ~(1 << num % 32);
+}
+
+static inline unsigned mask_reg(uint32_t * mask, unsigned num)
+{
+ return mask[num / 8] >> num % 8 * 4 & 0xf;
+}
+
+static inline bool is_export(struct ir2_instr *instr)
+{
+ return instr->type == IR2_ALU && instr->alu.export >= 0;
+}
+
+static inline instr_alloc_type_t export_buf(unsigned num)
+{
+ return num < 32 ? SQ_PARAMETER_PIXEL :
+ num >= 62 ? SQ_POSITION : SQ_MEMORY;
+}
+
+/* component c for channel i */
+static inline unsigned swiz_set(unsigned c, unsigned i)
+{
+ return ((c - i) & 3) << i * 2;
+}
+
+/* get swizzle in channel i */
+static inline unsigned swiz_get(unsigned swiz, unsigned i)
+{
+ return ((swiz >> i * 2) + i) & 3;
+}
+
+static inline unsigned swiz_merge(unsigned swiz0, unsigned swiz1)
+{
+ unsigned swiz = 0;
+ for (int i = 0; i < 4; i++)
+ swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
+ return swiz;
+}
+
+static inline void swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
+{
+ unsigned swiz = 0;
+ for (int i = 0; i < 4; i++)
+ swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
+ *swiz0 = swiz;
+}
+
+static inline struct ir2_reg * get_reg(struct ir2_instr *instr)
+{
+ return instr->is_ssa ? &instr->ssa : instr->reg;
+}
+
+static inline struct ir2_reg *
+get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
+{
+ switch (src->type) {
+ case IR2_SRC_INPUT:
+ return &ctx->input[src->num];
+ case IR2_SRC_SSA:
+ return &ctx->instr[src->num].ssa;
+ case IR2_SRC_REG:
+ return &ctx->reg[src->num];
+ default:
+ return NULL;
+ }
+}
+
+/* gets a ncomp value for the dst */
+static inline unsigned dst_ncomp(struct ir2_instr *instr)
+{
+ if (instr->is_ssa)
+ return instr->ssa.ncomp;
+
+ if (instr->type == IR2_FETCH)
+ return instr->reg->ncomp;
+
+ assert(instr->type == IR2_ALU);
+
+ unsigned ncomp = 0;
+ for (int i = 0; i < instr->reg->ncomp; i++)
+ ncomp += !!(instr->alu.write_mask & 1 << i);
+ return ncomp;
+}
+
+/* gets a ncomp value for the src registers */
+static inline unsigned src_ncomp(struct ir2_instr *instr)
+{
+ if (instr->type == IR2_FETCH) {
+ switch (instr->fetch.opc) {
+ case VTX_FETCH:
+ return 1;
+ case TEX_FETCH:
+ return instr->fetch.tex.is_cube ? 3 : 2;
+ case TEX_SET_TEX_LOD:
+ return 1;
+ default:
+ assert(0);
+ }
+ }
+
+ switch (instr->alu.scalar_opc) {
+ case PRED_SETEs ... KILLONEs:
+ return 1;
+ default:
+ break;
+ }
+
+ switch (instr->alu.vector_opc) {
+ case DOT2ADDv:
+ return 2;
+ case DOT3v:
+ return 3;
+ case DOT4v:
+ case CUBEv:
+ case PRED_SETE_PUSHv:
+ return 4;
+ default:
+ return dst_ncomp(instr);
+ }
+}