From 67610a0323ddfe0d7cced121abb43286b862b495 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Tue, 18 Dec 2018 20:15:57 -0500 Subject: freedreno: a2xx: NIR backend This patch replaces the a2xx TGSI compiler with a NIR compiler. It also adds several new features: -gl_FrontFacing, gl_FragCoord, gl_PointCoord, gl_PointSize -control flow (including loops) -texture related features (LOD/bias, cubemaps) -filling scalar ALU slot when possible Signed-off-by: Jonathan Marek --- src/gallium/drivers/freedreno/a2xx/fd2_compiler.c | 1119 --------------------- 1 file changed, 1119 deletions(-) delete mode 100644 src/gallium/drivers/freedreno/a2xx/fd2_compiler.c (limited to 'src/gallium/drivers/freedreno/a2xx/fd2_compiler.c') diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c b/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c deleted file mode 100644 index 156bfc247c2..00000000000 --- a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c +++ /dev/null @@ -1,1119 +0,0 @@ -/* - * Copyright (C) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "pipe/p_state.h" -#include "util/u_string.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_info.h" -#include "tgsi/tgsi_strings.h" -#include "tgsi/tgsi_dump.h" - -#include "fd2_compiler.h" -#include "fd2_program.h" -#include "fd2_util.h" - -#include "instr-a2xx.h" -#include "ir-a2xx.h" - -struct fd2_compile_context { - struct fd_program_stateobj *prog; - struct fd2_shader_stateobj *so; - - struct tgsi_parse_context parser; - unsigned type; - - /* predicate stack: */ - int pred_depth; - enum ir2_pred pred_stack[8]; - - /* Internal-Temporary and Predicate register assignment: - * - * Some TGSI instructions which translate into multiple actual - * instructions need one or more temporary registers, which are not - * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY). - * And some instructions (texture fetch) cannot write directly to - * output registers. We could be more clever and re-use dst or a - * src register in some cases. But for now don't try to be clever. - * Eventually we should implement an optimization pass that re- - * juggles the register usage and gets rid of unneeded temporaries. - * - * The predicate register must be valid across multiple TGSI - * instructions, but internal temporary's do not. For this reason, - * once the predicate register is requested, until it is no longer - * needed, it gets the first register slot after after the TGSI - * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the - * internal temporaries get the register slots above this. - */ - - int pred_reg; - int num_internal_temps; - - uint8_t num_regs[TGSI_FILE_COUNT]; - - /* maps input register idx to prog->export_linkage idx: */ - uint8_t input_export_idx[64]; - - /* maps output register idx to prog->export_linkage idx: */ - uint8_t output_export_idx[64]; - - /* idx/slot for last compiler generated immediate */ - unsigned immediate_idx; - - // TODO we can skip emit exports in the VS that the FS doesn't need.. - // and get rid perhaps of num_param.. - unsigned num_position, num_param; - unsigned position, psize; - - uint64_t need_sync; -}; - -static int -semantic_idx(struct tgsi_declaration_semantic *semantic) -{ - int idx = semantic->Name; - if (idx == TGSI_SEMANTIC_GENERIC) - idx = TGSI_SEMANTIC_COUNT + semantic->Index; - return idx; -} - -/* assign/get the input/export register # for given semantic idx as - * returned by semantic_idx(): - */ -static int -export_linkage(struct fd2_compile_context *ctx, int idx) -{ - struct fd_program_stateobj *prog = ctx->prog; - - /* if first time we've seen this export, assign the next available slot: */ - if (prog->export_linkage[idx] == 0xff) - prog->export_linkage[idx] = prog->num_exports++; - - return prog->export_linkage[idx]; -} - -static unsigned -compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog, - struct fd2_shader_stateobj *so) -{ - unsigned ret; - - ctx->prog = prog; - ctx->so = so; - ctx->pred_depth = 0; - - ret = tgsi_parse_init(&ctx->parser, so->tokens); - if (ret != TGSI_PARSE_OK) - return ret; - - ctx->type = ctx->parser.FullHeader.Processor.Processor; - ctx->position = ~0; - ctx->psize = ~0; - ctx->num_position = 0; - ctx->num_param = 0; - ctx->need_sync = 0; - ctx->immediate_idx = 0; - ctx->pred_reg = -1; - ctx->num_internal_temps = 0; - - memset(ctx->num_regs, 0, sizeof(ctx->num_regs)); - memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx)); - memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx)); - - /* do first pass to extract declarations: */ - while (!tgsi_parse_end_of_tokens(&ctx->parser)) { - tgsi_parse_token(&ctx->parser); - - switch (ctx->parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: { - struct tgsi_full_declaration *decl = - &ctx->parser.FullToken.FullDeclaration; - if (decl->Declaration.File == TGSI_FILE_OUTPUT) { - unsigned name = decl->Semantic.Name; - - assert(decl->Declaration.Semantic); // TODO is this ever not true? - - ctx->output_export_idx[decl->Range.First] = - semantic_idx(&decl->Semantic); - - if (ctx->type == PIPE_SHADER_VERTEX) { - switch (name) { - case TGSI_SEMANTIC_POSITION: - ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT]; - ctx->num_position++; - break; - case TGSI_SEMANTIC_PSIZE: - ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT]; - ctx->num_position++; - break; - case TGSI_SEMANTIC_COLOR: - case TGSI_SEMANTIC_GENERIC: - ctx->num_param++; - break; - default: - DBG("unknown VS semantic name: %s", - tgsi_semantic_names[name]); - assert(0); - } - } else { - switch (name) { - case TGSI_SEMANTIC_COLOR: - case TGSI_SEMANTIC_GENERIC: - ctx->num_param++; - break; - default: - DBG("unknown PS semantic name: %s", - tgsi_semantic_names[name]); - assert(0); - } - } - } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - ctx->input_export_idx[decl->Range.First] = - semantic_idx(&decl->Semantic); - } - ctx->num_regs[decl->Declaration.File] = - MAX2(ctx->num_regs[decl->Declaration.File], decl->Range.Last + 1); - break; - } - case TGSI_TOKEN_TYPE_IMMEDIATE: { - struct tgsi_full_immediate *imm = - &ctx->parser.FullToken.FullImmediate; - unsigned n = ctx->so->num_immediates++; - memcpy(ctx->so->immediates[n].val, imm->u, 16); - break; - } - default: - break; - } - } - - /* TGSI generated immediates are always entire vec4's, ones we - * generate internally are not: - */ - ctx->immediate_idx = ctx->so->num_immediates * 4; - - ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT]; - - tgsi_parse_free(&ctx->parser); - - return tgsi_parse_init(&ctx->parser, so->tokens); -} - -static void -compile_free(struct fd2_compile_context *ctx) -{ - tgsi_parse_free(&ctx->parser); -} - -static void -compile_vtx_fetch(struct fd2_compile_context *ctx) -{ - struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs; - int i; - for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) { - struct ir2_instruction *instr = ir2_instr_create( - ctx->so->ir, IR2_FETCH); - instr->fetch.opc = VTX_FETCH; - - ctx->need_sync |= 1 << (i+1); - - ir2_dst_create(instr, i+1, "xyzw", 0); - ir2_reg_create(instr, 0, "x", IR2_REG_INPUT); - - if (i == 0) - instr->sync = true; - - vfetch_instrs[i] = instr; - } - ctx->so->num_vfetch_instrs = i; -} - -/* - * For vertex shaders (VS): - * --- ------ ------------- - * - * Inputs: R1-R(num_input) - * Constants: C0-C(num_const-1) - * Immediates: C(num_const)-C(num_const+num_imm-1) - * Outputs: export0-export(n) and export62, export63 - * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63) - * Temps: R(num_input+1)-R(num_input+num_temps) - * - * R0 could be clobbered after the vertex fetch instructions.. so we - * could use it for one of the temporaries. - * - * TODO: maybe the vertex fetch part could fetch first input into R0 as - * the last vtx fetch instruction, which would let us use the same - * register layout in either case.. although this is not what the blob - * compiler does. - * - * - * For frag shaders (PS): - * --- ---- ------------- - * - * Inputs: R0-R(num_input-1) - * Constants: same as VS - * Immediates: same as VS - * Outputs: export0-export(num_outputs) - * Temps: R(num_input)-R(num_input+num_temps-1) - * - * In either case, immediates are are postpended to the constants - * (uniforms). - * - */ - -static unsigned -get_temp_gpr(struct fd2_compile_context *ctx, int idx) -{ - unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT]; - if (ctx->type == PIPE_SHADER_VERTEX) - num++; - return num; -} - -static struct ir2_dst_register * -add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, - const struct tgsi_dst_register *dst) -{ - unsigned flags = 0, num = 0; - char swiz[5]; - - switch (dst->File) { - case TGSI_FILE_OUTPUT: - flags |= IR2_REG_EXPORT; - if (ctx->type == PIPE_SHADER_VERTEX) { - if (dst->Index == ctx->position) { - num = 62; - } else if (dst->Index == ctx->psize) { - num = 63; - } else { - num = export_linkage(ctx, - ctx->output_export_idx[dst->Index]); - } - } else { - num = dst->Index; - } - break; - case TGSI_FILE_TEMPORARY: - num = get_temp_gpr(ctx, dst->Index); - break; - default: - DBG("unsupported dst register file: %s", - tgsi_file_name(dst->File)); - assert(0); - break; - } - - swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_'; - swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_'; - swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_'; - swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_'; - swiz[4] = '\0'; - - return ir2_dst_create(alu, num, swiz, flags); -} - -static struct ir2_src_register * -add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, - const struct tgsi_src_register *src) -{ - static const char swiz_vals[] = { - 'x', 'y', 'z', 'w', - }; - char swiz[5]; - unsigned flags = 0, num = 0; - - switch (src->File) { - case TGSI_FILE_CONSTANT: - num = src->Index; - flags |= IR2_REG_CONST; - break; - case TGSI_FILE_INPUT: - if (ctx->type == PIPE_SHADER_VERTEX) { - num = src->Index + 1; - } else { - flags |= IR2_REG_INPUT; - num = export_linkage(ctx, - ctx->input_export_idx[src->Index]); - } - break; - case TGSI_FILE_TEMPORARY: - num = get_temp_gpr(ctx, src->Index); - break; - case TGSI_FILE_IMMEDIATE: - num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT]; - flags |= IR2_REG_CONST; - break; - default: - DBG("unsupported src register file: %s", - tgsi_file_name(src->File)); - assert(0); - break; - } - - if (src->Absolute) - flags |= IR2_REG_ABS; - if (src->Negate) - flags |= IR2_REG_NEGATE; - - swiz[0] = swiz_vals[src->SwizzleX]; - swiz[1] = swiz_vals[src->SwizzleY]; - swiz[2] = swiz_vals[src->SwizzleZ]; - swiz[3] = swiz_vals[src->SwizzleW]; - swiz[4] = '\0'; - - if ((ctx->need_sync & ((uint64_t)1 << num)) && - !(flags & IR2_REG_CONST)) { - alu->sync = true; - ctx->need_sync &= ~((uint64_t)1 << num); - } - - return ir2_reg_create(alu, num, swiz, flags); -} - -static void -add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - if (inst->Instruction.Saturate) { - alu->alu_vector.clamp = true; - } -} - -static void -add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - if (inst->Instruction.Saturate) { - alu->alu_scalar.clamp = true; - } -} - -static void -add_regs_vector_1(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - assert(inst->Instruction.NumSrcRegs == 1); - assert(inst->Instruction.NumDstRegs == 1); - - add_dst_reg(ctx, alu, &inst->Dst[0].Register); - add_src_reg(ctx, alu, &inst->Src[0].Register); - add_src_reg(ctx, alu, &inst->Src[0].Register); - add_vector_clamp(inst, alu); -} - -static void -add_regs_vector_2(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - assert(inst->Instruction.NumSrcRegs == 2); - assert(inst->Instruction.NumDstRegs == 1); - - add_dst_reg(ctx, alu, &inst->Dst[0].Register); - add_src_reg(ctx, alu, &inst->Src[0].Register); - add_src_reg(ctx, alu, &inst->Src[1].Register); - add_vector_clamp(inst, alu); -} - -static void -add_regs_vector_3(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - assert(inst->Instruction.NumSrcRegs == 3); - assert(inst->Instruction.NumDstRegs == 1); - - add_dst_reg(ctx, alu, &inst->Dst[0].Register); - add_src_reg(ctx, alu, &inst->Src[0].Register); - add_src_reg(ctx, alu, &inst->Src[1].Register); - add_src_reg(ctx, alu, &inst->Src[2].Register); - add_vector_clamp(inst, alu); -} - -static void -add_regs_scalar_1(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - assert(inst->Instruction.NumSrcRegs == 1); - assert(inst->Instruction.NumDstRegs == 1); - - add_dst_reg(ctx, alu, &inst->Dst[0].Register); - add_src_reg(ctx, alu, &inst->Src[0].Register); - add_scalar_clamp(inst, alu); -} - -/* - * Helpers for TGSI instructions that don't map to a single shader instr: - */ - -static void -src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) -{ - src->File = dst->File; - src->Indirect = dst->Indirect; - src->Dimension = dst->Dimension; - src->Index = dst->Index; - src->Absolute = 0; - src->Negate = 0; - src->SwizzleX = TGSI_SWIZZLE_X; - src->SwizzleY = TGSI_SWIZZLE_Y; - src->SwizzleZ = TGSI_SWIZZLE_Z; - src->SwizzleW = TGSI_SWIZZLE_W; -} - -/* Get internal-temp src/dst to use for a sequence of instructions - * generated by a single TGSI op. - */ -static void -get_internal_temp(struct fd2_compile_context *ctx, - struct tgsi_dst_register *tmp_dst, - struct tgsi_src_register *tmp_src) -{ - int n; - - tmp_dst->File = TGSI_FILE_TEMPORARY; - tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; - tmp_dst->Indirect = 0; - tmp_dst->Dimension = 0; - - /* assign next temporary: */ - n = ctx->num_internal_temps++; - if (ctx->pred_reg != -1) - n++; - - tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n; - - src_from_dst(tmp_src, tmp_dst); -} - -static void -get_predicate(struct fd2_compile_context *ctx, struct tgsi_dst_register *dst, - struct tgsi_src_register *src) -{ - assert(ctx->pred_reg != -1); - - dst->File = TGSI_FILE_TEMPORARY; - dst->WriteMask = TGSI_WRITEMASK_W; - dst->Indirect = 0; - dst->Dimension = 0; - dst->Index = get_temp_gpr(ctx, ctx->pred_reg); - - if (src) { - src_from_dst(src, dst); - src->SwizzleX = TGSI_SWIZZLE_W; - src->SwizzleY = TGSI_SWIZZLE_W; - src->SwizzleZ = TGSI_SWIZZLE_W; - src->SwizzleW = TGSI_SWIZZLE_W; - } -} - -static void -push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src) -{ - struct ir2_instruction *alu; - struct tgsi_dst_register pred_dst; - - if (ctx->pred_depth == 0) { - /* assign predicate register: */ - ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY]; - - get_predicate(ctx, &pred_dst, NULL); - - alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SETNEs); - add_dst_reg(ctx, alu, &pred_dst); - add_src_reg(ctx, alu, src); - } else { - struct tgsi_src_register pred_src; - - get_predicate(ctx, &pred_dst, &pred_src); - - alu = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_dst_reg(ctx, alu, &pred_dst); - add_src_reg(ctx, alu, &pred_src); - add_src_reg(ctx, alu, src); - - // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make - // sure src reg is valid if it was calculated with a predicate - // condition.. - alu->pred = IR2_PRED_NONE; - } - - /* save previous pred state to restore in pop_predicate(): */ - ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred; -} - -static void -pop_predicate(struct fd2_compile_context *ctx) -{ - /* restore previous predicate state: */ - ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth]; - - if (ctx->pred_depth != 0) { - struct ir2_instruction *alu; - struct tgsi_dst_register pred_dst; - struct tgsi_src_register pred_src; - - get_predicate(ctx, &pred_dst, &pred_src); - - alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SET_POPs); - add_dst_reg(ctx, alu, &pred_dst); - add_src_reg(ctx, alu, &pred_src); - alu->pred = IR2_PRED_NONE; - } else { - /* predicate register no longer needed: */ - ctx->pred_reg = -1; - } -} - -static void -get_immediate(struct fd2_compile_context *ctx, - struct tgsi_src_register *reg, uint32_t val) -{ - unsigned neg, swiz, idx, i; - /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ - static const unsigned swiz2tgsi[] = { - TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, - }; - - for (i = 0; i < ctx->immediate_idx; i++) { - swiz = i % 4; - idx = i / 4; - - if (ctx->so->immediates[idx].val[swiz] == val) { - neg = 0; - break; - } - - if (ctx->so->immediates[idx].val[swiz] == -val) { - neg = 1; - break; - } - } - - if (i == ctx->immediate_idx) { - /* need to generate a new immediate: */ - swiz = i % 4; - idx = i / 4; - neg = 0; - ctx->so->immediates[idx].val[swiz] = val; - ctx->so->num_immediates = idx + 1; - ctx->immediate_idx++; - } - - reg->File = TGSI_FILE_IMMEDIATE; - reg->Indirect = 0; - reg->Dimension = 0; - reg->Index = idx; - reg->Absolute = 0; - reg->Negate = neg; - reg->SwizzleX = swiz2tgsi[swiz]; - reg->SwizzleY = swiz2tgsi[swiz]; - reg->SwizzleZ = swiz2tgsi[swiz]; - reg->SwizzleW = swiz2tgsi[swiz]; -} - -/* POW(a,b) = EXP2(b * LOG2(a)) */ -static void -translate_pow(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; - struct ir2_instruction *alu; - - get_internal_temp(ctx, &tmp_dst, &tmp_src); - - alu = ir2_instr_create_alu_s(ctx->so->ir, LOG_CLAMP); - add_dst_reg(ctx, alu, &tmp_dst); - add_src_reg(ctx, alu, &inst->Src[0].Register); - - alu = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_dst_reg(ctx, alu, &tmp_dst); - add_src_reg(ctx, alu, &tmp_src); - add_src_reg(ctx, alu, &inst->Src[1].Register); - - /* NOTE: some of the instructions, like EXP_IEEE, seem hard- - * coded to take their input from the w component. - */ - switch(inst->Dst[0].Register.WriteMask) { - case TGSI_WRITEMASK_X: - tmp_src.SwizzleW = TGSI_SWIZZLE_X; - break; - case TGSI_WRITEMASK_Y: - tmp_src.SwizzleW = TGSI_SWIZZLE_Y; - break; - case TGSI_WRITEMASK_Z: - tmp_src.SwizzleW = TGSI_SWIZZLE_Z; - break; - case TGSI_WRITEMASK_W: - tmp_src.SwizzleW = TGSI_SWIZZLE_W; - break; - default: - DBG("invalid writemask!"); - assert(0); - break; - } - - alu = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE); - add_dst_reg(ctx, alu, &inst->Dst[0].Register); - add_src_reg(ctx, alu, &tmp_src); - add_scalar_clamp(inst, alu); -} - -static void -translate_tex(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, unsigned opc) -{ - struct ir2_instruction *instr; - struct ir2_src_register *reg; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; - const struct tgsi_src_register *coord; - bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) || - inst->Instruction.Saturate; - int idx; - - if (using_temp || (opc == TGSI_OPCODE_TXP)) - get_internal_temp(ctx, &tmp_dst, &tmp_src); - - if (opc == TGSI_OPCODE_TXP) { - static const char *swiz[] = { - [TGSI_SWIZZLE_X] = "xxxx", - [TGSI_SWIZZLE_Y] = "yyyy", - [TGSI_SWIZZLE_Z] = "zzzz", - [TGSI_SWIZZLE_W] = "wwww", - }; - - /* TXP - Projective Texture Lookup: - * - * coord.x = src0.x / src.w - * coord.y = src0.y / src.w - * coord.z = src0.z / src.w - * coord.w = src0.w - * bias = 0.0 - * - * dst = texture_sample(unit, coord, bias) - */ - - instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); - add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w"; - add_src_reg(ctx, instr, &inst->Src[0].Register); - add_src_reg(ctx, instr, &inst->Src[0].Register); - - instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE); - add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___"; - memcpy(add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle, - swiz[inst->Src[0].Register.SwizzleW], 4); - - instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_"; - add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx"; - add_src_reg(ctx, instr, &inst->Src[0].Register); - - coord = &tmp_src; - } else { - coord = &inst->Src[0].Register; - } - - instr = ir2_instr_create(ctx->so->ir, IR2_FETCH); - instr->fetch.opc = TEX_FETCH; - instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D); - instr->fetch.is_rect = (inst->Texture.Texture == TGSI_TEXTURE_RECT); - assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases? - - /* save off the tex fetch to be patched later with correct const_idx: */ - idx = ctx->so->num_tfetch_instrs++; - ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index; - ctx->so->tfetch_instrs[idx].instr = instr; - - add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register); - reg = add_src_reg(ctx, instr, coord); - - /* blob compiler always sets 3rd component to same as 1st for 2d: */ - if (inst->Texture.Texture == TGSI_TEXTURE_2D || inst->Texture.Texture == TGSI_TEXTURE_RECT) - reg->swizzle[2] = reg->swizzle[0]; - - /* dst register needs to be marked for sync: */ - ctx->need_sync |= 1 << instr->dst_reg.num; - - /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */ - instr->sync = true; - - if (using_temp) { - /* texture fetch can't write directly to export, so if tgsi - * is telling us the dst register is in output file, we load - * the texture to a temp and the use ALU instruction to move - * to output - */ - instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); - - add_dst_reg(ctx, instr, &inst->Dst[0].Register); - add_src_reg(ctx, instr, &tmp_src); - add_src_reg(ctx, instr, &tmp_src); - add_vector_clamp(inst, instr); - } -} - -/* SGE(a,b) = GTE((b - a), 1.0, 0.0) */ -/* SLT(a,b) = GTE((b - a), 0.0, 1.0) */ -/* SEQ(a,b) = EQU((b - a), 1.0, 0.0) */ -/* SNE(a,b) = EQU((b - a), 0.0, 1.0) */ -static void -translate_sge_slt_seq_sne(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, unsigned opc) -{ - struct ir2_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; - struct tgsi_src_register tmp_const; - float c0, c1; - instr_vector_opc_t vopc; - - switch (opc) { - default: - assert(0); - case TGSI_OPCODE_SGE: - c0 = 1.0; - c1 = 0.0; - vopc = CNDGTEv; - break; - case TGSI_OPCODE_SLT: - c0 = 0.0; - c1 = 1.0; - vopc = CNDGTEv; - break; - case TGSI_OPCODE_SEQ: - c0 = 0.0; - c1 = 1.0; - vopc = CNDEv; - break; - case TGSI_OPCODE_SNE: - c0 = 1.0; - c1 = 0.0; - vopc = CNDEv; - break; - } - - get_internal_temp(ctx, &tmp_dst, &tmp_src); - - instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); - add_dst_reg(ctx, instr, &tmp_dst); - add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; - add_src_reg(ctx, instr, &inst->Src[1].Register); - - instr = ir2_instr_create_alu_v(ctx->so->ir, vopc); - add_dst_reg(ctx, instr, &inst->Dst[0].Register); - add_src_reg(ctx, instr, &tmp_src); - get_immediate(ctx, &tmp_const, fui(c1)); - add_src_reg(ctx, instr, &tmp_const); - get_immediate(ctx, &tmp_const, fui(c0)); - add_src_reg(ctx, instr, &tmp_const); -} - -/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ -static void -translate_lrp(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, - unsigned opc) -{ - struct ir2_instruction *instr; - struct tgsi_dst_register tmp_dst1, tmp_dst2; - struct tgsi_src_register tmp_src1, tmp_src2; - struct tgsi_src_register tmp_const; - - get_internal_temp(ctx, &tmp_dst1, &tmp_src1); - get_internal_temp(ctx, &tmp_dst2, &tmp_src2); - - get_immediate(ctx, &tmp_const, fui(1.0)); - - /* tmp1 = (a * b) */ - instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_dst_reg(ctx, instr, &tmp_dst1); - add_src_reg(ctx, instr, &inst->Src[0].Register); - add_src_reg(ctx, instr, &inst->Src[1].Register); - - /* tmp2 = (1 - a) */ - instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); - add_dst_reg(ctx, instr, &tmp_dst2); - add_src_reg(ctx, instr, &tmp_const); - add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; - - /* tmp2 = tmp2 * c */ - instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_dst_reg(ctx, instr, &tmp_dst2); - add_src_reg(ctx, instr, &tmp_src2); - add_src_reg(ctx, instr, &inst->Src[2].Register); - - /* dst = tmp1 + tmp2 */ - instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); - add_dst_reg(ctx, instr, &inst->Dst[0].Register); - add_src_reg(ctx, instr, &tmp_src1); - add_src_reg(ctx, instr, &tmp_src2); -} - -static void -translate_trig(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, - unsigned opc) -{ - struct ir2_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; - struct tgsi_src_register tmp_const; - instr_scalar_opc_t op; - - switch (opc) { - default: - assert(0); - case TGSI_OPCODE_SIN: - op = SIN; - break; - case TGSI_OPCODE_COS: - op = COS; - break; - } - - get_internal_temp(ctx, &tmp_dst, &tmp_src); - - tmp_dst.WriteMask = TGSI_WRITEMASK_X; - tmp_src.SwizzleX = tmp_src.SwizzleY = - tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X; - - instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv); - add_dst_reg(ctx, instr, &tmp_dst); - add_src_reg(ctx, instr, &inst->Src[0].Register); - get_immediate(ctx, &tmp_const, fui(0.159155)); - add_src_reg(ctx, instr, &tmp_const); - get_immediate(ctx, &tmp_const, fui(0.5)); - add_src_reg(ctx, instr, &tmp_const); - - instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv); - add_dst_reg(ctx, instr, &tmp_dst); - add_src_reg(ctx, instr, &tmp_src); - add_src_reg(ctx, instr, &tmp_src); - - instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv); - add_dst_reg(ctx, instr, &tmp_dst); - add_src_reg(ctx, instr, &tmp_src); - get_immediate(ctx, &tmp_const, fui(6.283185)); - add_src_reg(ctx, instr, &tmp_const); - get_immediate(ctx, &tmp_const, fui(-3.141593)); - add_src_reg(ctx, instr, &tmp_const); - - instr = ir2_instr_create_alu_s(ctx->so->ir, op); - add_dst_reg(ctx, instr, &inst->Dst[0].Register); - add_src_reg(ctx, instr, &tmp_src); -} - -static void -translate_dp2(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, - unsigned opc) -{ - struct tgsi_src_register tmp_const; - struct ir2_instruction *instr; - /* DP2ADD c,a,b -> dot2(a,b) + c */ - /* for c we use the constant 0.0 */ - instr = ir2_instr_create_alu_v(ctx->so->ir, DOT2ADDv); - add_dst_reg(ctx, instr, &inst->Dst[0].Register); - add_src_reg(ctx, instr, &inst->Src[0].Register); - add_src_reg(ctx, instr, &inst->Src[1].Register); - get_immediate(ctx, &tmp_const, fui(0.0f)); - add_src_reg(ctx, instr, &tmp_const); - add_vector_clamp(inst, instr); -} - -/* - * Main part of compiler/translator: - */ - -static void -translate_instruction(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - unsigned opc = inst->Instruction.Opcode; - struct ir2_instruction *instr; - - if (opc == TGSI_OPCODE_END) - return; - - /* TODO turn this into a table: */ - switch (opc) { - case TGSI_OPCODE_MOV: - instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); - add_regs_vector_1(ctx, inst, instr); - break; - case TGSI_OPCODE_RCP: - instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE); - add_regs_scalar_1(ctx, inst, instr); - break; - case TGSI_OPCODE_RSQ: - instr = ir2_instr_create_alu_s(ctx->so->ir, RECIPSQ_IEEE); - add_regs_scalar_1(ctx, inst, instr); - break; - case TGSI_OPCODE_SQRT: - instr = ir2_instr_create_alu_s(ctx->so->ir, SQRT_IEEE); - add_regs_scalar_1(ctx, inst, instr); - break; - case TGSI_OPCODE_MUL: - instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_ADD: - instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_DP2: - translate_dp2(ctx, inst, opc); - break; - case TGSI_OPCODE_DP3: - instr = ir2_instr_create_alu_v(ctx->so->ir, DOT3v); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_DP4: - instr = ir2_instr_create_alu_v(ctx->so->ir, DOT4v); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_MIN: - instr = ir2_instr_create_alu_v(ctx->so->ir, MINv); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_MAX: - instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SNE: - translate_sge_slt_seq_sne(ctx, inst, opc); - break; - case TGSI_OPCODE_MAD: - instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv); - add_regs_vector_3(ctx, inst, instr); - break; - case TGSI_OPCODE_LRP: - translate_lrp(ctx, inst, opc); - break; - case TGSI_OPCODE_FRC: - instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv); - add_regs_vector_1(ctx, inst, instr); - break; - case TGSI_OPCODE_FLR: - instr = ir2_instr_create_alu_v(ctx->so->ir, FLOORv); - add_regs_vector_1(ctx, inst, instr); - break; - case TGSI_OPCODE_EX2: - instr = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE); - add_regs_scalar_1(ctx, inst, instr); - break; - case TGSI_OPCODE_POW: - translate_pow(ctx, inst); - break; - case TGSI_OPCODE_COS: - case TGSI_OPCODE_SIN: - translate_trig(ctx, inst, opc); - break; - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXP: - translate_tex(ctx, inst, opc); - break; - case TGSI_OPCODE_CMP: - instr = ir2_instr_create_alu_v(ctx->so->ir, CNDGTEv); - add_regs_vector_3(ctx, inst, instr); - instr->src_reg[0].flags ^= IR2_REG_NEGATE; /* src1 */ - break; - case TGSI_OPCODE_IF: - push_predicate(ctx, &inst->Src[0].Register); - ctx->so->ir->pred = IR2_PRED_EQ; - break; - case TGSI_OPCODE_ELSE: - ctx->so->ir->pred = IR2_PRED_NE; - break; - case TGSI_OPCODE_ENDIF: - pop_predicate(ctx); - break; - case TGSI_OPCODE_F2I: - instr = ir2_instr_create_alu_v(ctx->so->ir, TRUNCv); - add_regs_vector_1(ctx, inst, instr); - break; - default: - DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc)); - tgsi_dump(ctx->so->tokens, 0); - assert(0); - break; - } - - /* internal temporaries are only valid for the duration of a single - * TGSI instruction: - */ - ctx->num_internal_temps = 0; -} - -static void -compile_instructions(struct fd2_compile_context *ctx) -{ - while (!tgsi_parse_end_of_tokens(&ctx->parser)) { - tgsi_parse_token(&ctx->parser); - - switch (ctx->parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - translate_instruction(ctx, - &ctx->parser.FullToken.FullInstruction); - break; - default: - break; - } - } -} - -int -fd2_compile_shader(struct fd_program_stateobj *prog, - struct fd2_shader_stateobj *so) -{ - struct fd2_compile_context ctx; - - ir2_shader_destroy(so->ir); - so->ir = ir2_shader_create(); - so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0; - - if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK) - return -1; - - if (ctx.type == PIPE_SHADER_VERTEX) { - compile_vtx_fetch(&ctx); - } else if (ctx.type == PIPE_SHADER_FRAGMENT) { - prog->num_exports = 0; - memset(prog->export_linkage, 0xff, - sizeof(prog->export_linkage)); - } - - compile_instructions(&ctx); - - compile_free(&ctx); - - return 0; -} - -- cgit v1.2.3