summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
diff options
context:
space:
mode:
authorJonathan Marek <jonathan@marek.ca>2018-12-18 20:15:57 -0500
committerRob Clark <robdclark@gmail.com>2019-01-22 14:45:03 +0000
commit67610a0323ddfe0d7cced121abb43286b862b495 (patch)
tree994887050ff640f4c2db9f56ec55688d2f1412a4 /src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
parentda3ca69afadb142c1591c75c2bfd345c8d4337de (diff)
freedreno: a2xx: NIR backend
This patch replaces the a2xx TGSI compiler with a NIR compiler. It also adds several new features: -gl_FrontFacing, gl_FragCoord, gl_PointCoord, gl_PointSize -control flow (including loops) -texture related features (LOD/bias, cubemaps) -filling scalar ALU slot when possible Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Diffstat (limited to 'src/gallium/drivers/freedreno/a2xx/ir-a2xx.c')
-rw-r--r--src/gallium/drivers/freedreno/a2xx/ir-a2xx.c809
1 files changed, 0 insertions, 809 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
deleted file mode 100644
index af9811864ff..00000000000
--- a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
+++ /dev/null
@@ -1,809 +0,0 @@
-/*
- * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ir-a2xx.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-#include "freedreno_util.h"
-#include "instr-a2xx.h"
-
-#define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
-#define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__)
-#define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__)
-
-static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
- uint32_t idx, struct ir2_shader_info *info);
-
-static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n);
-static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg);
-static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg);
-static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg);
-
-/* simple allocator to carve allocations out of an up-front allocated heap,
- * so that we can free everything easily in one shot.
- */
-static void * ir2_alloc(struct ir2_shader *shader, int sz)
-{
- void *ptr = &shader->heap[shader->heap_idx];
- shader->heap_idx += align(sz, 4) / 4;
- return ptr;
-}
-
-static char * ir2_strdup(struct ir2_shader *shader, const char *str)
-{
- char *ptr = NULL;
- if (str) {
- int len = strlen(str);
- ptr = ir2_alloc(shader, len+1);
- memcpy(ptr, str, len);
- ptr[len] = '\0';
- }
- return ptr;
-}
-
-struct ir2_shader * ir2_shader_create(void)
-{
- DEBUG_MSG("");
- struct ir2_shader *shader = calloc(1, sizeof(struct ir2_shader));
- shader->max_reg = -1;
- return shader;
-}
-
-void ir2_shader_destroy(struct ir2_shader *shader)
-{
- DEBUG_MSG("");
- free(shader);
-}
-
-/* check if an instruction is a simple MOV
- */
-static struct ir2_instruction * simple_mov(struct ir2_instruction *instr,
- bool output)
-{
- struct ir2_src_register *src_reg = instr->src_reg;
- struct ir2_dst_register *dst_reg = &instr->dst_reg;
- struct ir2_register *reg;
- unsigned i;
-
- /* MAXv used for MOV */
- if (instr->instr_type != IR2_ALU_VECTOR ||
- instr->alu_vector.opc != MAXv)
- return NULL;
-
- /* non identical srcs */
- if (src_reg[0].num != src_reg[1].num)
- return NULL;
-
- /* flags */
- int flags = IR2_REG_NEGATE | IR2_REG_ABS;
- if (output)
- flags |= IR2_REG_INPUT | IR2_REG_CONST;
- if ((src_reg[0].flags & flags) || (src_reg[1].flags & flags))
- return NULL;
-
- /* clamping */
- if (instr->alu_vector.clamp)
- return NULL;
-
- /* swizzling */
- for (i = 0; i < 4; i++) {
- char swiz = (dst_reg->swizzle ? dst_reg->swizzle : "xyzw")[i];
- if (swiz == '_')
- continue;
-
- if (swiz != (src_reg[0].swizzle ? src_reg[0].swizzle : "xyzw")[i] ||
- swiz != (src_reg[1].swizzle ? src_reg[1].swizzle : "xyzw")[i])
- return NULL;
- }
-
- if (output)
- reg = &instr->shader->reg[src_reg[0].num];
- else
- reg = &instr->shader->reg[dst_reg->num];
-
- assert(reg->write_idx >= 0);
- if (reg->write_idx != reg->write_idx2)
- return NULL;
-
- if (!output)
- return instr;
-
- instr = instr->shader->instr[reg->write_idx];
- return instr->instr_type != IR2_ALU_VECTOR ? NULL : instr;
-}
-
-static int src_to_reg(struct ir2_instruction *instr,
- struct ir2_src_register *reg)
-{
- if (reg->flags & IR2_REG_CONST)
- return reg->num;
-
- return instr->shader->reg[reg->num].reg;
-}
-
-static int dst_to_reg(struct ir2_instruction *instr,
- struct ir2_dst_register *reg)
-{
- if (reg->flags & IR2_REG_EXPORT)
- return reg->num;
-
- return instr->shader->reg[reg->num].reg;
-}
-
-static bool mask_get(uint32_t *mask, unsigned index)
-{
- return !!(mask[index / 32] & 1 << index % 32);
-}
-
-static void mask_set(uint32_t *mask, struct ir2_register *reg, int index)
-{
- if (reg) {
- unsigned i;
- for (i = 0; i < ARRAY_SIZE(reg->regmask); i++)
- mask[i] |= reg->regmask[i];
- }
- if (index >= 0)
- mask[index / 32] |= 1 << index % 32;
-}
-
-static bool sets_pred(struct ir2_instruction *instr)
-{
- return instr->instr_type == IR2_ALU_SCALAR &&
- instr->alu_scalar.opc >= PRED_SETEs &&
- instr->alu_scalar.opc <= PRED_SET_RESTOREs;
-}
-
-
-
-void* ir2_shader_assemble(struct ir2_shader *shader,
- struct ir2_shader_info *info)
-{
- /* NOTES
- * blob compiler seems to always puts PRED_* instrs in a CF by
- * themselves, and wont combine EQ/NE in the same CF
- * (not doing this - doesn't seem to make a difference)
- *
- * TODO: implement scheduling for combining vector+scalar instructions
- * -some vector instructions can be replaced by scalar
- */
-
- /* first step:
- * 1. remove "NOP" MOV instructions generated by TGSI for input/output:
- * 2. track information for register allocation, and to remove
- * the dead code when some exports are not needed
- * 3. add additional instructions for a20x hw binning if needed
- * NOTE: modifies the shader instrs
- * this step could be done as instructions are added by compiler instead
- */
-
- /* mask of exports that must be generated
- * used to avoid calculating ps exports with hw binning
- */
- uint64_t export = ~0ull;
- /* bitmask of variables required for exports defined by "export" */
- uint32_t export_mask[REG_MASK/32+1] = {};
-
- unsigned idx, reg_idx;
- unsigned max_input = 0;
- int export_size = -1;
-
- for (idx = 0; idx < shader->instr_count; idx++) {
- struct ir2_instruction *instr = shader->instr[idx], *prev;
- struct ir2_dst_register dst_reg = instr->dst_reg;
-
- if (dst_reg.flags & IR2_REG_EXPORT) {
- if (dst_reg.num < 32)
- export_size++;
-
- if ((prev = simple_mov(instr, true))) {
- /* copy instruction but keep dst */
- *instr = *prev;
- instr->dst_reg = dst_reg;
- }
- }
-
- for (reg_idx = 0; reg_idx < instr->src_reg_count; reg_idx++) {
- struct ir2_src_register *src_reg = &instr->src_reg[reg_idx];
- struct ir2_register *reg;
- int num;
-
- if (src_reg->flags & IR2_REG_CONST)
- continue;
-
- num = src_reg->num;
- reg = &shader->reg[num];
- reg->read_idx = idx;
-
- if (src_reg->flags & IR2_REG_INPUT) {
- max_input = MAX2(max_input, num);
- } else {
- /* bypass simple mov used to set src_reg */
- assert(reg->write_idx >= 0);
- prev = shader->instr[reg->write_idx];
- if (simple_mov(prev, false)) {
- *src_reg = prev->src_reg[0];
- /* process same src_reg again */
- reg_idx -= 1;
- continue;
- }
- }
-
- /* update dependencies */
- uint32_t *mask = (dst_reg.flags & IR2_REG_EXPORT) ?
- export_mask : shader->reg[dst_reg.num].regmask;
- mask_set(mask, reg, num);
- if (sets_pred(instr))
- mask_set(export_mask, reg, num);
- }
- }
-
- /* second step:
- * emit instructions (with CFs) + RA
- */
- instr_cf_t cfs[128], *cf = cfs;
- uint32_t alufetch[3*256], *af = alufetch;
-
- /* RA is done on write, so inputs must be allocated here */
- for (reg_idx = 0; reg_idx <= max_input; reg_idx++)
- shader->reg[reg_idx].reg = reg_idx;
- info->max_reg = max_input;
-
- /* CF instr state */
- instr_cf_exec_t exec = { .opc = EXEC };
- instr_cf_alloc_t alloc = { .opc = ALLOC };
- bool need_alloc = 0;
- bool pos_export = 0;
-
- export_size = MAX2(export_size, 0);
-
- for (idx = 0; idx < shader->instr_count; idx++) {
- struct ir2_instruction *instr = shader->instr[idx];
- struct ir2_dst_register *dst_reg = &instr->dst_reg;
- unsigned num = dst_reg->num;
- struct ir2_register *reg;
-
- /* a2xx only has 64 registers, so we can use a single 64-bit mask */
- uint64_t regmask = 0ull;
-
- /* compute the current regmask */
- for (reg_idx = 0; (int) reg_idx <= shader->max_reg; reg_idx++) {
- reg = &shader->reg[reg_idx];
- if ((int) idx > reg->write_idx && idx < reg->read_idx)
- regmask |= (1ull << reg->reg);
- }
-
- if (dst_reg->flags & IR2_REG_EXPORT) {
- /* skip if export is not needed */
- if (!(export & (1ull << num)))
- continue;
-
- /* ALLOC CF:
- * want to alloc all < 32 at once
- * 32/33 and 62/63 come in pairs
- * XXX assuming all 3 types are never interleaved
- */
- if (num < 32) {
- alloc.size = export_size;
- alloc.buffer_select = SQ_PARAMETER_PIXEL;
- need_alloc = export_size >= 0;
- export_size = -1;
- } else if (num == 32 || num == 33) {
- alloc.size = 0;
- alloc.buffer_select = SQ_MEMORY;
- need_alloc = num != 33;
- } else {
- alloc.size = 0;
- alloc.buffer_select = SQ_POSITION;
- need_alloc = !pos_export;
- pos_export = true;
- }
-
- } else {
- /* skip if dst register not needed to compute exports */
- if (!mask_get(export_mask, num))
- continue;
-
- /* RA on first write */
- reg = &shader->reg[num];
- if (reg->write_idx == idx) {
- reg->reg = ffsll(~regmask) - 1;
- info->max_reg = MAX2(info->max_reg, reg->reg);
- }
- }
-
- if (exec.count == 6 || (exec.count && need_alloc)) {
- *cf++ = *(instr_cf_t*) &exec;
- exec.address += exec.count;
- exec.serialize = 0;
- exec.count = 0;
- }
-
- if (need_alloc) {
- *cf++ = *(instr_cf_t*) &alloc;
- need_alloc = false;
- }
-
- int ret = instr_emit(instr, af, idx, info); af += 3;
- assert(!ret);
-
- if (instr->instr_type == IR2_FETCH)
- exec.serialize |= 0x1 << exec.count * 2;
- if (instr->sync)
- exec.serialize |= 0x2 << exec.count * 2;
- exec.count += 1;
- }
-
-
- exec.opc = !export_size ? EXEC : EXEC_END;
- *cf++ = *(instr_cf_t*) &exec;
- exec.address += exec.count;
- exec.serialize = 0;
- exec.count = 0;
-
- /* GPU will hang without at least one pixel alloc */
- if (!export_size) {
- alloc.size = 0;
- alloc.buffer_select = SQ_PARAMETER_PIXEL;
- *cf++ = *(instr_cf_t*) &alloc;
-
- exec.opc = EXEC_END;
- *cf++ = *(instr_cf_t*) &exec;
- }
-
- unsigned num_cfs = cf - cfs;
-
- /* insert nop to get an even # of CFs */
- if (num_cfs % 2) {
- *cf++ = (instr_cf_t) { .opc = NOP };
- num_cfs++;
- }
-
- /* offset cf addrs */
- for (idx = 0; idx < num_cfs; idx++) {
- switch (cfs[idx].opc) {
- case EXEC:
- case EXEC_END:
- cfs[idx].exec.address += num_cfs / 2;
- break;
- default:
- break;
- /* XXX and any other address using cf that gets implemented */
- }
- }
-
- /* concatenate cfs+alufetchs */
- uint32_t cfdwords = num_cfs / 2 * 3;
- uint32_t alufetchdwords = exec.address * 3;
- info->sizedwords = cfdwords + alufetchdwords;
- uint32_t *dwords = malloc(info->sizedwords * 4);
- assert(dwords);
- memcpy(dwords, cfs, cfdwords * 4);
- memcpy(&dwords[cfdwords], alufetch, alufetchdwords * 4);
- return dwords;
-}
-
-struct ir2_instruction * ir2_instr_create(struct ir2_shader *shader,
- int instr_type)
-{
- struct ir2_instruction *instr =
- ir2_alloc(shader, sizeof(struct ir2_instruction));
- DEBUG_MSG("%d", instr_type);
- instr->shader = shader;
- instr->idx = shader->instr_count;
- instr->pred = shader->pred;
- instr->instr_type = instr_type;
- shader->instr[shader->instr_count++] = instr;
- return instr;
-}
-
-
-/*
- * FETCH instructions:
- */
-
-static int instr_emit_fetch(struct ir2_instruction *instr,
- uint32_t *dwords, uint32_t idx,
- struct ir2_shader_info *info)
-{
- instr_fetch_t *fetch = (instr_fetch_t *)dwords;
- struct ir2_dst_register *dst_reg = &instr->dst_reg;
- struct ir2_src_register *src_reg = &instr->src_reg[0];
-
- memset(fetch, 0, sizeof(*fetch));
-
- fetch->opc = instr->fetch.opc;
-
- if (instr->fetch.opc == VTX_FETCH) {
- instr_fetch_vtx_t *vtx = &fetch->vtx;
-
- assert(instr->fetch.stride <= 0xff);
- assert(instr->fetch.fmt <= 0x3f);
- assert(instr->fetch.const_idx <= 0x1f);
- assert(instr->fetch.const_idx_sel <= 0x3);
-
- vtx->src_reg = src_to_reg(instr, src_reg);
- vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
- vtx->dst_reg = dst_to_reg(instr, dst_reg);
- vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
- vtx->must_be_one = 1;
- vtx->const_index = instr->fetch.const_idx;
- vtx->const_index_sel = instr->fetch.const_idx_sel;
- vtx->format_comp_all = !!instr->fetch.is_signed;
- vtx->num_format_all = !instr->fetch.is_normalized;
- vtx->format = instr->fetch.fmt;
- vtx->stride = instr->fetch.stride;
- vtx->offset = instr->fetch.offset;
-
- if (instr->pred != IR2_PRED_NONE) {
- vtx->pred_select = 1;
- vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
- }
-
- /* XXX seems like every FETCH but the first has
- * this bit set:
- */
- vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
- vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
- } else if (instr->fetch.opc == TEX_FETCH) {
- instr_fetch_tex_t *tex = &fetch->tex;
-
- assert(instr->fetch.const_idx <= 0x1f);
-
- tex->src_reg = src_to_reg(instr, src_reg);
- tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
- tex->dst_reg = dst_to_reg(instr, dst_reg);
- tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
- tex->const_idx = instr->fetch.const_idx;
- tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
- tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
- tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->use_comp_lod = 1;
- tex->use_reg_lod = !instr->fetch.is_cube;
- tex->sample_location = SAMPLE_CENTER;
- tex->tx_coord_denorm = instr->fetch.is_rect;
-
- if (instr->pred != IR2_PRED_NONE) {
- tex->pred_select = 1;
- tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
- }
-
- } else {
- ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
- return -1;
- }
-
- return 0;
-}
-
-/*
- * ALU instructions:
- */
-
-static int instr_emit_alu(struct ir2_instruction *instr_v,
- struct ir2_instruction *instr_s, uint32_t *dwords,
- struct ir2_shader_info *info)
-{
- instr_alu_t *alu = (instr_alu_t *)dwords;
- struct ir2_dst_register *vdst_reg, *sdst_reg;
- struct ir2_src_register *src1_reg, *src2_reg, *src3_reg;
- struct ir2_shader *shader = instr_v ? instr_v->shader : instr_s->shader;
- enum ir2_pred pred = IR2_PRED_NONE;
-
- memset(alu, 0, sizeof(*alu));
-
- vdst_reg = NULL;
- sdst_reg = NULL;
- src1_reg = NULL;
- src2_reg = NULL;
- src3_reg = NULL;
-
- if (instr_v) {
- vdst_reg = &instr_v->dst_reg;
- assert(instr_v->src_reg_count >= 2);
- src1_reg = &instr_v->src_reg[0];
- src2_reg = &instr_v->src_reg[1];
- if (instr_v->src_reg_count > 2)
- src3_reg = &instr_v->src_reg[2];
- pred = instr_v->pred;
- }
-
- if (instr_s) {
- sdst_reg = &instr_s->dst_reg;
- assert(instr_s->src_reg_count == 1);
- assert(!instr_v || vdst_reg->flags == sdst_reg->flags);
- assert(!instr_v || pred == instr_s->pred);
- if (src3_reg) {
- assert(src3_reg->flags == instr_s->src_reg[0].flags);
- assert(src3_reg->num == instr_s->src_reg[0].num);
- assert(!strcmp(src3_reg->swizzle, instr_s->src_reg[0].swizzle));
- }
- src3_reg = &instr_s->src_reg[0];
- pred = instr_s->pred;
- }
-
- if (vdst_reg) {
- assert((vdst_reg->flags & ~IR2_REG_EXPORT) == 0);
- assert(!vdst_reg->swizzle || (strlen(vdst_reg->swizzle) == 4));
- alu->vector_opc = instr_v->alu_vector.opc;
- alu->vector_write_mask = reg_alu_dst_swiz(vdst_reg);
- alu->vector_dest = dst_to_reg(instr_v, vdst_reg);
- } else {
- alu->vector_opc = MAXv;
- }
-
- if (sdst_reg) {
- alu->scalar_opc = instr_s->alu_scalar.opc;
- alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg);
- alu->scalar_dest = dst_to_reg(instr_s, sdst_reg);
- } else {
- /* not sure if this is required, but adreno compiler seems
- * to always set scalar opc to MAXs if it is not used:
- */
- alu->scalar_opc = MAXs;
- }
-
- alu->export_data =
- !!((instr_v ? vdst_reg : sdst_reg)->flags & IR2_REG_EXPORT);
-
- /* export32 has this bit set.. it seems to do more than just set
- * the base address of the constants used to zero
- * TODO make this less of a hack
- */
- if (alu->export_data && alu->vector_dest == 32) {
- assert(!instr_s);
- alu->relative_addr = 1;
- }
-
- if (src1_reg) {
- if (src1_reg->flags & IR2_REG_CONST) {
- assert(!(src1_reg->flags & IR2_REG_ABS));
- alu->src1_reg_const = src1_reg->num;
- } else {
- alu->src1_reg = shader->reg[src1_reg->num].reg;
- alu->src1_reg_abs = !!(src1_reg->flags & IR2_REG_ABS);
- }
- alu->src1_swiz = reg_alu_src_swiz(src1_reg);
- alu->src1_reg_negate = !!(src1_reg->flags & IR2_REG_NEGATE);
- alu->src1_sel = !(src1_reg->flags & IR2_REG_CONST);
- } else {
- alu->src1_sel = 1;
- }
-
- if (src2_reg) {
- if (src2_reg->flags & IR2_REG_CONST) {
- assert(!(src2_reg->flags & IR2_REG_ABS));
- alu->src2_reg_const = src2_reg->num;
- } else {
- alu->src2_reg = shader->reg[src2_reg->num].reg;
- alu->src2_reg_abs = !!(src2_reg->flags & IR2_REG_ABS);
- }
- alu->src2_swiz = reg_alu_src_swiz(src2_reg);
- alu->src2_reg_negate = !!(src2_reg->flags & IR2_REG_NEGATE);
- alu->src2_sel = !(src2_reg->flags & IR2_REG_CONST);
- } else {
- alu->src2_sel = 1;
- }
-
- if (src3_reg) {
- if (src3_reg->flags & IR2_REG_CONST) {
- assert(!(src3_reg->flags & IR2_REG_ABS));
- alu->src3_reg_const = src3_reg->num;
- } else {
- alu->src3_reg = shader->reg[src3_reg->num].reg;
- alu->src3_reg_abs = !!(src3_reg->flags & IR2_REG_ABS);
- }
- alu->src3_swiz = reg_alu_src_swiz(src3_reg);
- alu->src3_reg_negate = !!(src3_reg->flags & IR2_REG_NEGATE);
- alu->src3_sel = !(src3_reg->flags & IR2_REG_CONST);
- } else {
- /* not sure if this is required, but adreno compiler seems
- * to always set register bank for 3rd src if unused:
- */
- alu->src3_sel = 1;
- }
-
- alu->vector_clamp = instr_v ? instr_v->alu_vector.clamp : 0;
- alu->scalar_clamp = instr_s ? instr_s->alu_scalar.clamp : 0;
-
- if (pred != IR2_PRED_NONE)
- alu->pred_select = (pred == IR2_PRED_EQ) ? 3 : 2;
-
- return 0;
-}
-
-static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
- uint32_t idx, struct ir2_shader_info *info)
-{
- switch (instr->instr_type) {
- case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
- case IR2_ALU_VECTOR: return instr_emit_alu(instr, NULL, dwords, info);
- case IR2_ALU_SCALAR: return instr_emit_alu(NULL, instr, dwords, info);
- }
- return -1;
-}
-
-struct ir2_dst_register * ir2_dst_create(struct ir2_instruction *instr,
- int num, const char *swizzle, int flags)
-{
- if (!(flags & IR2_REG_EXPORT)) {
- struct ir2_register *reg = &instr->shader->reg[num];
-
- unsigned i;
- for (i = instr->shader->max_reg + 1; i <= num; i++)
- instr->shader->reg[i].write_idx = -1;
- instr->shader->max_reg = i - 1;
-
- if (reg->write_idx < 0)
- reg->write_idx = instr->idx;
- reg->write_idx2 = instr->idx;
- }
-
- struct ir2_dst_register *reg = &instr->dst_reg;
- reg->flags = flags;
- reg->num = num;
- reg->swizzle = ir2_strdup(instr->shader, swizzle);
- return reg;
-}
-
-struct ir2_src_register * ir2_reg_create(struct ir2_instruction *instr,
- int num, const char *swizzle, int flags)
-{
- assert(instr->src_reg_count + 1 <= ARRAY_SIZE(instr->src_reg));
- if (!(flags & IR2_REG_CONST)) {
- struct ir2_register *reg = &instr->shader->reg[num];
-
- reg->read_idx = instr->idx;
-
- unsigned i;
- for (i = instr->shader->max_reg + 1; i <= num; i++)
- instr->shader->reg[i].write_idx = -1;
- instr->shader->max_reg = i - 1;
- }
-
- struct ir2_src_register *reg = &instr->src_reg[instr->src_reg_count++];
- reg->flags = flags;
- reg->num = num;
- reg->swizzle = ir2_strdup(instr->shader, swizzle);
- return reg;
-}
-
-static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n)
-{
- uint32_t swiz = 0;
- int i;
-
- assert((reg->flags & ~IR2_REG_INPUT) == 0);
- assert(reg->swizzle);
-
- DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
-
- for (i = n-1; i >= 0; i--) {
- swiz <<= 2;
- switch (reg->swizzle[i]) {
- default:
- ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
- case 'x': swiz |= 0x0; break;
- case 'y': swiz |= 0x1; break;
- case 'z': swiz |= 0x2; break;
- case 'w': swiz |= 0x3; break;
- }
- }
-
- return swiz;
-}
-
-static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg)
-{
- uint32_t swiz = 0;
- int i;
-
- assert(reg->flags == 0);
- assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
-
- DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
-
- if (reg->swizzle) {
- for (i = 3; i >= 0; i--) {
- swiz <<= 3;
- switch (reg->swizzle[i]) {
- default:
- ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
- case 'x': swiz |= 0x0; break;
- case 'y': swiz |= 0x1; break;
- case 'z': swiz |= 0x2; break;
- case 'w': swiz |= 0x3; break;
- case '0': swiz |= 0x4; break;
- case '1': swiz |= 0x5; break;
- case '_': swiz |= 0x7; break;
- }
- }
- } else {
- swiz = 0x688;
- }
-
- return swiz;
-}
-
-/* actually, a write-mask */
-static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg)
-{
- uint32_t swiz = 0;
- int i;
-
- assert((reg->flags & ~IR2_REG_EXPORT) == 0);
- assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
-
- DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
-
- if (reg->swizzle) {
- for (i = 3; i >= 0; i--) {
- swiz <<= 1;
- if (reg->swizzle[i] == "xyzw"[i]) {
- swiz |= 0x1;
- } else if (reg->swizzle[i] != '_') {
- ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
- break;
- }
- }
- } else {
- swiz = 0xf;
- }
-
- return swiz;
-}
-
-static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg)
-{
- uint32_t swiz = 0;
- int i;
-
- assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
-
- DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
-
- if (reg->swizzle) {
- for (i = 3; i >= 0; i--) {
- swiz <<= 2;
- switch (reg->swizzle[i]) {
- default:
- ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
- case 'x': swiz |= (0x0 - i) & 0x3; break;
- case 'y': swiz |= (0x1 - i) & 0x3; break;
- case 'z': swiz |= (0x2 - i) & 0x3; break;
- case 'w': swiz |= (0x3 - i) & 0x3; break;
- }
- }
- } else {
- swiz = 0x0;
- }
-
- return swiz;
-}