/* * Copyright (c) 2012 Rob Clark * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "ir-a2xx.h" #include #include #include #include #include "freedreno_util.h" #include "instr-a2xx.h" #define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0) #define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__) #define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__) static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, uint32_t idx, struct ir2_shader_info *info); static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n); static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg); static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg); static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg); /* simple allocator to carve allocations out of an up-front allocated heap, * so that we can free everything easily in one shot. */ static void * ir2_alloc(struct ir2_shader *shader, int sz) { void *ptr = &shader->heap[shader->heap_idx]; shader->heap_idx += align(sz, 4) / 4; return ptr; } static char * ir2_strdup(struct ir2_shader *shader, const char *str) { char *ptr = NULL; if (str) { int len = strlen(str); ptr = ir2_alloc(shader, len+1); memcpy(ptr, str, len); ptr[len] = '\0'; } return ptr; } struct ir2_shader * ir2_shader_create(void) { DEBUG_MSG(""); struct ir2_shader *shader = calloc(1, sizeof(struct ir2_shader)); shader->max_reg = -1; return shader; } void ir2_shader_destroy(struct ir2_shader *shader) { DEBUG_MSG(""); free(shader); } /* check if an instruction is a simple MOV */ static struct ir2_instruction * simple_mov(struct ir2_instruction *instr, bool output) { struct ir2_src_register *src_reg = instr->src_reg; struct ir2_dst_register *dst_reg = &instr->dst_reg; struct ir2_register *reg; unsigned i; /* MAXv used for MOV */ if (instr->instr_type != IR2_ALU_VECTOR || instr->alu_vector.opc != MAXv) return NULL; /* non identical srcs */ if (src_reg[0].num != src_reg[1].num) return NULL; /* flags */ int flags = IR2_REG_NEGATE | IR2_REG_ABS; if (output) flags |= IR2_REG_INPUT | IR2_REG_CONST; if ((src_reg[0].flags & flags) || (src_reg[1].flags & flags)) return NULL; /* clamping */ if (instr->alu_vector.clamp) return NULL; /* swizzling */ for (i = 0; i < 4; i++) { char swiz = (dst_reg->swizzle ? dst_reg->swizzle : "xyzw")[i]; if (swiz == '_') continue; if (swiz != (src_reg[0].swizzle ? src_reg[0].swizzle : "xyzw")[i] || swiz != (src_reg[1].swizzle ? src_reg[1].swizzle : "xyzw")[i]) return NULL; } if (output) reg = &instr->shader->reg[src_reg[0].num]; else reg = &instr->shader->reg[dst_reg->num]; assert(reg->write_idx >= 0); if (reg->write_idx != reg->write_idx2) return NULL; if (!output) return instr; instr = instr->shader->instr[reg->write_idx]; return instr->instr_type != IR2_ALU_VECTOR ? NULL : instr; } static int src_to_reg(struct ir2_instruction *instr, struct ir2_src_register *reg) { if (reg->flags & IR2_REG_CONST) return reg->num; return instr->shader->reg[reg->num].reg; } static int dst_to_reg(struct ir2_instruction *instr, struct ir2_dst_register *reg) { if (reg->flags & IR2_REG_EXPORT) return reg->num; return instr->shader->reg[reg->num].reg; } static bool mask_get(uint32_t *mask, unsigned index) { return !!(mask[index / 32] & 1 << index % 32); } static void mask_set(uint32_t *mask, struct ir2_register *reg, int index) { if (reg) { unsigned i; for (i = 0; i < ARRAY_SIZE(reg->regmask); i++) mask[i] |= reg->regmask[i]; } if (index >= 0) mask[index / 32] |= 1 << index % 32; } static bool sets_pred(struct ir2_instruction *instr) { return instr->instr_type == IR2_ALU_SCALAR && instr->alu_scalar.opc >= PRED_SETEs && instr->alu_scalar.opc <= PRED_SET_RESTOREs; } void* ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info) { /* NOTES * blob compiler seems to always puts PRED_* instrs in a CF by * themselves, and wont combine EQ/NE in the same CF * (not doing this - doesn't seem to make a difference) * * TODO: implement scheduling for combining vector+scalar instructions * -some vector instructions can be replaced by scalar */ /* first step: * 1. remove "NOP" MOV instructions generated by TGSI for input/output: * 2. track information for register allocation, and to remove * the dead code when some exports are not needed * 3. add additional instructions for a20x hw binning if needed * NOTE: modifies the shader instrs * this step could be done as instructions are added by compiler instead */ /* mask of exports that must be generated * used to avoid calculating ps exports with hw binning */ uint64_t export = ~0ull; /* bitmask of variables required for exports defined by "export" */ uint32_t export_mask[REG_MASK/32+1] = {}; unsigned idx, reg_idx; unsigned max_input = 0; int export_size = -1; for (idx = 0; idx < shader->instr_count; idx++) { struct ir2_instruction *instr = shader->instr[idx], *prev; struct ir2_dst_register dst_reg = instr->dst_reg; if (dst_reg.flags & IR2_REG_EXPORT) { if (dst_reg.num < 32) export_size++; if ((prev = simple_mov(instr, true))) { /* copy instruction but keep dst */ *instr = *prev; instr->dst_reg = dst_reg; } } for (reg_idx = 0; reg_idx < instr->src_reg_count; reg_idx++) { struct ir2_src_register *src_reg = &instr->src_reg[reg_idx]; struct ir2_register *reg; int num; if (src_reg->flags & IR2_REG_CONST) continue; num = src_reg->num; reg = &shader->reg[num]; reg->read_idx = idx; if (src_reg->flags & IR2_REG_INPUT) { max_input = MAX2(max_input, num); } else { /* bypass simple mov used to set src_reg */ assert(reg->write_idx >= 0); prev = shader->instr[reg->write_idx]; if (simple_mov(prev, false)) { *src_reg = prev->src_reg[0]; /* process same src_reg again */ reg_idx -= 1; continue; } } /* update dependencies */ uint32_t *mask = (dst_reg.flags & IR2_REG_EXPORT) ? export_mask : shader->reg[dst_reg.num].regmask; mask_set(mask, reg, num); if (sets_pred(instr)) mask_set(export_mask, reg, num); } } /* second step: * emit instructions (with CFs) + RA */ instr_cf_t cfs[128], *cf = cfs; uint32_t alufetch[3*256], *af = alufetch; /* RA is done on write, so inputs must be allocated here */ for (reg_idx = 0; reg_idx <= max_input; reg_idx++) shader->reg[reg_idx].reg = reg_idx; info->max_reg = max_input; /* CF instr state */ instr_cf_exec_t exec = { .opc = EXEC }; instr_cf_alloc_t alloc = { .opc = ALLOC }; bool need_alloc = 0; bool pos_export = 0; export_size = MAX2(export_size, 0); for (idx = 0; idx < shader->instr_count; idx++) { struct ir2_instruction *instr = shader->instr[idx]; struct ir2_dst_register *dst_reg = &instr->dst_reg; unsigned num = dst_reg->num; struct ir2_register *reg; /* a2xx only has 64 registers, so we can use a single 64-bit mask */ uint64_t regmask = 0ull; /* compute the current regmask */ for (reg_idx = 0; (int) reg_idx <= shader->max_reg; reg_idx++) { reg = &shader->reg[reg_idx]; if ((int) idx > reg->write_idx && idx < reg->read_idx) regmask |= (1ull << reg->reg); } if (dst_reg->flags & IR2_REG_EXPORT) { /* skip if export is not needed */ if (!(export & (1ull << num))) continue; /* ALLOC CF: * want to alloc all < 32 at once * 32/33 and 62/63 come in pairs * XXX assuming all 3 types are never interleaved */ if (num < 32) { alloc.size = export_size; alloc.buffer_select = SQ_PARAMETER_PIXEL; need_alloc = export_size >= 0; export_size = -1; } else if (num == 32 || num == 33) { alloc.size = 0; alloc.buffer_select = SQ_MEMORY; need_alloc = num != 33; } else { alloc.size = 0; alloc.buffer_select = SQ_POSITION; need_alloc = !pos_export; pos_export = true; } } else { /* skip if dst register not needed to compute exports */ if (!mask_get(export_mask, num)) continue; /* RA on first write */ reg = &shader->reg[num]; if (reg->write_idx == idx) { reg->reg = ffsll(~regmask) - 1; info->max_reg = MAX2(info->max_reg, reg->reg); } } if (exec.count == 6 || (exec.count && need_alloc)) { *cf++ = *(instr_cf_t*) &exec; exec.address += exec.count; exec.serialize = 0; exec.count = 0; } if (need_alloc) { *cf++ = *(instr_cf_t*) &alloc; need_alloc = false; } int ret = instr_emit(instr, af, idx, info); af += 3; assert(!ret); if (instr->instr_type == IR2_FETCH) exec.serialize |= 0x1 << exec.count * 2; if (instr->sync) exec.serialize |= 0x2 << exec.count * 2; exec.count += 1; } exec.opc = !export_size ? EXEC : EXEC_END; *cf++ = *(instr_cf_t*) &exec; exec.address += exec.count; exec.serialize = 0; exec.count = 0; /* GPU will hang without at least one pixel alloc */ if (!export_size) { alloc.size = 0; alloc.buffer_select = SQ_PARAMETER_PIXEL; *cf++ = *(instr_cf_t*) &alloc; exec.opc = EXEC_END; *cf++ = *(instr_cf_t*) &exec; } unsigned num_cfs = cf - cfs; /* insert nop to get an even # of CFs */ if (num_cfs % 2) { *cf++ = (instr_cf_t) { .opc = NOP }; num_cfs++; } /* offset cf addrs */ for (idx = 0; idx < num_cfs; idx++) { switch (cfs[idx].opc) { case EXEC: case EXEC_END: cfs[idx].exec.address += num_cfs / 2; break; default: break; /* XXX and any other address using cf that gets implemented */ } } /* concatenate cfs+alufetchs */ uint32_t cfdwords = num_cfs / 2 * 3; uint32_t alufetchdwords = exec.address * 3; info->sizedwords = cfdwords + alufetchdwords; uint32_t *dwords = malloc(info->sizedwords * 4); assert(dwords); memcpy(dwords, cfs, cfdwords * 4); memcpy(&dwords[cfdwords], alufetch, alufetchdwords * 4); return dwords; } struct ir2_instruction * ir2_instr_create(struct ir2_shader *shader, int instr_type) { struct ir2_instruction *instr = ir2_alloc(shader, sizeof(struct ir2_instruction)); DEBUG_MSG("%d", instr_type); instr->shader = shader; instr->idx = shader->instr_count; instr->pred = shader->pred; instr->instr_type = instr_type; shader->instr[shader->instr_count++] = instr; return instr; } /* * FETCH instructions: */ static int instr_emit_fetch(struct ir2_instruction *instr, uint32_t *dwords, uint32_t idx, struct ir2_shader_info *info) { instr_fetch_t *fetch = (instr_fetch_t *)dwords; struct ir2_dst_register *dst_reg = &instr->dst_reg; struct ir2_src_register *src_reg = &instr->src_reg[0]; memset(fetch, 0, sizeof(*fetch)); fetch->opc = instr->fetch.opc; if (instr->fetch.opc == VTX_FETCH) { instr_fetch_vtx_t *vtx = &fetch->vtx; assert(instr->fetch.stride <= 0xff); assert(instr->fetch.fmt <= 0x3f); assert(instr->fetch.const_idx <= 0x1f); assert(instr->fetch.const_idx_sel <= 0x3); vtx->src_reg = src_to_reg(instr, src_reg); vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1); vtx->dst_reg = dst_to_reg(instr, dst_reg); vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg); vtx->must_be_one = 1; vtx->const_index = instr->fetch.const_idx; vtx->const_index_sel = instr->fetch.const_idx_sel; vtx->format_comp_all = !!instr->fetch.is_signed; vtx->num_format_all = !instr->fetch.is_normalized; vtx->format = instr->fetch.fmt; vtx->stride = instr->fetch.stride; vtx->offset = instr->fetch.offset; if (instr->pred != IR2_PRED_NONE) { vtx->pred_select = 1; vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; } /* XXX seems like every FETCH but the first has * this bit set: */ vtx->reserved3 = (idx > 0) ? 0x1 : 0x0; vtx->reserved0 = (idx > 0) ? 0x2 : 0x3; } else if (instr->fetch.opc == TEX_FETCH) { instr_fetch_tex_t *tex = &fetch->tex; assert(instr->fetch.const_idx <= 0x1f); tex->src_reg = src_to_reg(instr, src_reg); tex->src_swiz = reg_fetch_src_swiz(src_reg, 3); tex->dst_reg = dst_to_reg(instr, dst_reg); tex->dst_swiz = reg_fetch_dst_swiz(dst_reg); tex->const_idx = instr->fetch.const_idx; tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; tex->min_filter = TEX_FILTER_USE_FETCH_CONST; tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; tex->use_comp_lod = 1; tex->use_reg_lod = !instr->fetch.is_cube; tex->sample_location = SAMPLE_CENTER; tex->tx_coord_denorm = instr->fetch.is_rect; if (instr->pred != IR2_PRED_NONE) { tex->pred_select = 1; tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; } } else { ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc); return -1; } return 0; } /* * ALU instructions: */ static int instr_emit_alu(struct ir2_instruction *instr_v, struct ir2_instruction *instr_s, uint32_t *dwords, struct ir2_shader_info *info) { instr_alu_t *alu = (instr_alu_t *)dwords; struct ir2_dst_register *vdst_reg, *sdst_reg; struct ir2_src_register *src1_reg, *src2_reg, *src3_reg; struct ir2_shader *shader = instr_v ? instr_v->shader : instr_s->shader; enum ir2_pred pred = IR2_PRED_NONE; memset(alu, 0, sizeof(*alu)); vdst_reg = NULL; sdst_reg = NULL; src1_reg = NULL; src2_reg = NULL; src3_reg = NULL; if (instr_v) { vdst_reg = &instr_v->dst_reg; assert(instr_v->src_reg_count >= 2); src1_reg = &instr_v->src_reg[0]; src2_reg = &instr_v->src_reg[1]; if (instr_v->src_reg_count > 2) src3_reg = &instr_v->src_reg[2]; pred = instr_v->pred; } if (instr_s) { sdst_reg = &instr_s->dst_reg; assert(instr_s->src_reg_count == 1); assert(!instr_v || vdst_reg->flags == sdst_reg->flags); assert(!instr_v || pred == instr_s->pred); if (src3_reg) { assert(src3_reg->flags == instr_s->src_reg[0].flags); assert(src3_reg->num == instr_s->src_reg[0].num); assert(!strcmp(src3_reg->swizzle, instr_s->src_reg[0].swizzle)); } src3_reg = &instr_s->src_reg[0]; pred = instr_s->pred; } if (vdst_reg) { assert((vdst_reg->flags & ~IR2_REG_EXPORT) == 0); assert(!vdst_reg->swizzle || (strlen(vdst_reg->swizzle) == 4)); alu->vector_opc = instr_v->alu_vector.opc; alu->vector_write_mask = reg_alu_dst_swiz(vdst_reg); alu->vector_dest = dst_to_reg(instr_v, vdst_reg); } else { alu->vector_opc = MAXv; } if (sdst_reg) { alu->scalar_opc = instr_s->alu_scalar.opc; alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg); alu->scalar_dest = dst_to_reg(instr_s, sdst_reg); } else { /* not sure if this is required, but adreno compiler seems * to always set scalar opc to MAXs if it is not used: */ alu->scalar_opc = MAXs; } alu->export_data = !!((instr_v ? vdst_reg : sdst_reg)->flags & IR2_REG_EXPORT); /* export32 has this bit set.. it seems to do more than just set * the base address of the constants used to zero * TODO make this less of a hack */ if (alu->export_data && alu->vector_dest == 32) { assert(!instr_s); alu->relative_addr = 1; } if (src1_reg) { if (src1_reg->flags & IR2_REG_CONST) { assert(!(src1_reg->flags & IR2_REG_ABS)); alu->src1_reg_const = src1_reg->num; } else { alu->src1_reg = shader->reg[src1_reg->num].reg; alu->src1_reg_abs = !!(src1_reg->flags & IR2_REG_ABS); } alu->src1_swiz = reg_alu_src_swiz(src1_reg); alu->src1_reg_negate = !!(src1_reg->flags & IR2_REG_NEGATE); alu->src1_sel = !(src1_reg->flags & IR2_REG_CONST); } else { alu->src1_sel = 1; } if (src2_reg) { if (src2_reg->flags & IR2_REG_CONST) { assert(!(src2_reg->flags & IR2_REG_ABS)); alu->src2_reg_const = src2_reg->num; } else { alu->src2_reg = shader->reg[src2_reg->num].reg; alu->src2_reg_abs = !!(src2_reg->flags & IR2_REG_ABS); } alu->src2_swiz = reg_alu_src_swiz(src2_reg); alu->src2_reg_negate = !!(src2_reg->flags & IR2_REG_NEGATE); alu->src2_sel = !(src2_reg->flags & IR2_REG_CONST); } else { alu->src2_sel = 1; } if (src3_reg) { if (src3_reg->flags & IR2_REG_CONST) { assert(!(src3_reg->flags & IR2_REG_ABS)); alu->src3_reg_const = src3_reg->num; } else { alu->src3_reg = shader->reg[src3_reg->num].reg; alu->src3_reg_abs = !!(src3_reg->flags & IR2_REG_ABS); } alu->src3_swiz = reg_alu_src_swiz(src3_reg); alu->src3_reg_negate = !!(src3_reg->flags & IR2_REG_NEGATE); alu->src3_sel = !(src3_reg->flags & IR2_REG_CONST); } else { /* not sure if this is required, but adreno compiler seems * to always set register bank for 3rd src if unused: */ alu->src3_sel = 1; } alu->vector_clamp = instr_v ? instr_v->alu_vector.clamp : 0; alu->scalar_clamp = instr_s ? instr_s->alu_scalar.clamp : 0; if (pred != IR2_PRED_NONE) alu->pred_select = (pred == IR2_PRED_EQ) ? 3 : 2; return 0; } static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, uint32_t idx, struct ir2_shader_info *info) { switch (instr->instr_type) { case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info); case IR2_ALU_VECTOR: return instr_emit_alu(instr, NULL, dwords, info); case IR2_ALU_SCALAR: return instr_emit_alu(NULL, instr, dwords, info); } return -1; } struct ir2_dst_register * ir2_dst_create(struct ir2_instruction *instr, int num, const char *swizzle, int flags) { if (!(flags & IR2_REG_EXPORT)) { struct ir2_register *reg = &instr->shader->reg[num]; unsigned i; for (i = instr->shader->max_reg + 1; i <= num; i++) instr->shader->reg[i].write_idx = -1; instr->shader->max_reg = i - 1; if (reg->write_idx < 0) reg->write_idx = instr->idx; reg->write_idx2 = instr->idx; } struct ir2_dst_register *reg = &instr->dst_reg; reg->flags = flags; reg->num = num; reg->swizzle = ir2_strdup(instr->shader, swizzle); return reg; } struct ir2_src_register * ir2_reg_create(struct ir2_instruction *instr, int num, const char *swizzle, int flags) { assert(instr->src_reg_count + 1 <= ARRAY_SIZE(instr->src_reg)); if (!(flags & IR2_REG_CONST)) { struct ir2_register *reg = &instr->shader->reg[num]; reg->read_idx = instr->idx; unsigned i; for (i = instr->shader->max_reg + 1; i <= num; i++) instr->shader->reg[i].write_idx = -1; instr->shader->max_reg = i - 1; } struct ir2_src_register *reg = &instr->src_reg[instr->src_reg_count++]; reg->flags = flags; reg->num = num; reg->swizzle = ir2_strdup(instr->shader, swizzle); return reg; } static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n) { uint32_t swiz = 0; int i; assert((reg->flags & ~IR2_REG_INPUT) == 0); assert(reg->swizzle); DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle); for (i = n-1; i >= 0; i--) { swiz <<= 2; switch (reg->swizzle[i]) { default: ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle); case 'x': swiz |= 0x0; break; case 'y': swiz |= 0x1; break; case 'z': swiz |= 0x2; break; case 'w': swiz |= 0x3; break; } } return swiz; } static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg) { uint32_t swiz = 0; int i; assert(reg->flags == 0); assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle); if (reg->swizzle) { for (i = 3; i >= 0; i--) { swiz <<= 3; switch (reg->swizzle[i]) { default: ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); case 'x': swiz |= 0x0; break; case 'y': swiz |= 0x1; break; case 'z': swiz |= 0x2; break; case 'w': swiz |= 0x3; break; case '0': swiz |= 0x4; break; case '1': swiz |= 0x5; break; case '_': swiz |= 0x7; break; } } } else { swiz = 0x688; } return swiz; } /* actually, a write-mask */ static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg) { uint32_t swiz = 0; int i; assert((reg->flags & ~IR2_REG_EXPORT) == 0); assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle); if (reg->swizzle) { for (i = 3; i >= 0; i--) { swiz <<= 1; if (reg->swizzle[i] == "xyzw"[i]) { swiz |= 0x1; } else if (reg->swizzle[i] != '_') { ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); break; } } } else { swiz = 0xf; } return swiz; } static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg) { uint32_t swiz = 0; int i; assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle); if (reg->swizzle) { for (i = 3; i >= 0; i--) { swiz <<= 2; switch (reg->swizzle[i]) { default: ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle); case 'x': swiz |= (0x0 - i) & 0x3; break; case 'y': swiz |= (0x1 - i) & 0x3; break; case 'z': swiz |= (0x2 - i) & 0x3; break; case 'w': swiz |= (0x3 - i) & 0x3; break; } } } else { swiz = 0x0; } return swiz; }