summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
diff options
context:
space:
mode:
authorJonathan Marek <jonathan@marek.ca>2018-12-18 20:15:57 -0500
committerRob Clark <robdclark@gmail.com>2019-01-22 14:45:03 +0000
commit67610a0323ddfe0d7cced121abb43286b862b495 (patch)
tree994887050ff640f4c2db9f56ec55688d2f1412a4 /src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
parentda3ca69afadb142c1591c75c2bfd345c8d4337de (diff)
freedreno: a2xx: NIR backend
This patch replaces the a2xx TGSI compiler with a NIR compiler. It also adds several new features: -gl_FrontFacing, gl_FragCoord, gl_PointCoord, gl_PointSize -control flow (including loops) -texture related features (LOD/bias, cubemaps) -filling scalar ALU slot when possible Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Diffstat (limited to 'src/gallium/drivers/freedreno/a2xx/fd2_compiler.c')
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_compiler.c1119
1 files changed, 0 insertions, 1119 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c b/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
deleted file mode 100644
index 156bfc247c2..00000000000
--- a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
+++ /dev/null
@@ -1,1119 +0,0 @@
-/*
- * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Rob Clark <robclark@freedesktop.org>
- */
-
-#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_info.h"
-#include "tgsi/tgsi_strings.h"
-#include "tgsi/tgsi_dump.h"
-
-#include "fd2_compiler.h"
-#include "fd2_program.h"
-#include "fd2_util.h"
-
-#include "instr-a2xx.h"
-#include "ir-a2xx.h"
-
-struct fd2_compile_context {
- struct fd_program_stateobj *prog;
- struct fd2_shader_stateobj *so;
-
- struct tgsi_parse_context parser;
- unsigned type;
-
- /* predicate stack: */
- int pred_depth;
- enum ir2_pred pred_stack[8];
-
- /* Internal-Temporary and Predicate register assignment:
- *
- * Some TGSI instructions which translate into multiple actual
- * instructions need one or more temporary registers, which are not
- * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
- * And some instructions (texture fetch) cannot write directly to
- * output registers. We could be more clever and re-use dst or a
- * src register in some cases. But for now don't try to be clever.
- * Eventually we should implement an optimization pass that re-
- * juggles the register usage and gets rid of unneeded temporaries.
- *
- * The predicate register must be valid across multiple TGSI
- * instructions, but internal temporary's do not. For this reason,
- * once the predicate register is requested, until it is no longer
- * needed, it gets the first register slot after after the TGSI
- * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
- * internal temporaries get the register slots above this.
- */
-
- int pred_reg;
- int num_internal_temps;
-
- uint8_t num_regs[TGSI_FILE_COUNT];
-
- /* maps input register idx to prog->export_linkage idx: */
- uint8_t input_export_idx[64];
-
- /* maps output register idx to prog->export_linkage idx: */
- uint8_t output_export_idx[64];
-
- /* idx/slot for last compiler generated immediate */
- unsigned immediate_idx;
-
- // TODO we can skip emit exports in the VS that the FS doesn't need..
- // and get rid perhaps of num_param..
- unsigned num_position, num_param;
- unsigned position, psize;
-
- uint64_t need_sync;
-};
-
-static int
-semantic_idx(struct tgsi_declaration_semantic *semantic)
-{
- int idx = semantic->Name;
- if (idx == TGSI_SEMANTIC_GENERIC)
- idx = TGSI_SEMANTIC_COUNT + semantic->Index;
- return idx;
-}
-
-/* assign/get the input/export register # for given semantic idx as
- * returned by semantic_idx():
- */
-static int
-export_linkage(struct fd2_compile_context *ctx, int idx)
-{
- struct fd_program_stateobj *prog = ctx->prog;
-
- /* if first time we've seen this export, assign the next available slot: */
- if (prog->export_linkage[idx] == 0xff)
- prog->export_linkage[idx] = prog->num_exports++;
-
- return prog->export_linkage[idx];
-}
-
-static unsigned
-compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog,
- struct fd2_shader_stateobj *so)
-{
- unsigned ret;
-
- ctx->prog = prog;
- ctx->so = so;
- ctx->pred_depth = 0;
-
- ret = tgsi_parse_init(&ctx->parser, so->tokens);
- if (ret != TGSI_PARSE_OK)
- return ret;
-
- ctx->type = ctx->parser.FullHeader.Processor.Processor;
- ctx->position = ~0;
- ctx->psize = ~0;
- ctx->num_position = 0;
- ctx->num_param = 0;
- ctx->need_sync = 0;
- ctx->immediate_idx = 0;
- ctx->pred_reg = -1;
- ctx->num_internal_temps = 0;
-
- memset(ctx->num_regs, 0, sizeof(ctx->num_regs));
- memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx));
- memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx));
-
- /* do first pass to extract declarations: */
- while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
- tgsi_parse_token(&ctx->parser);
-
- switch (ctx->parser.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_DECLARATION: {
- struct tgsi_full_declaration *decl =
- &ctx->parser.FullToken.FullDeclaration;
- if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
- unsigned name = decl->Semantic.Name;
-
- assert(decl->Declaration.Semantic); // TODO is this ever not true?
-
- ctx->output_export_idx[decl->Range.First] =
- semantic_idx(&decl->Semantic);
-
- if (ctx->type == PIPE_SHADER_VERTEX) {
- switch (name) {
- case TGSI_SEMANTIC_POSITION:
- ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT];
- ctx->num_position++;
- break;
- case TGSI_SEMANTIC_PSIZE:
- ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT];
- ctx->num_position++;
- break;
- case TGSI_SEMANTIC_COLOR:
- case TGSI_SEMANTIC_GENERIC:
- ctx->num_param++;
- break;
- default:
- DBG("unknown VS semantic name: %s",
- tgsi_semantic_names[name]);
- assert(0);
- }
- } else {
- switch (name) {
- case TGSI_SEMANTIC_COLOR:
- case TGSI_SEMANTIC_GENERIC:
- ctx->num_param++;
- break;
- default:
- DBG("unknown PS semantic name: %s",
- tgsi_semantic_names[name]);
- assert(0);
- }
- }
- } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
- ctx->input_export_idx[decl->Range.First] =
- semantic_idx(&decl->Semantic);
- }
- ctx->num_regs[decl->Declaration.File] =
- MAX2(ctx->num_regs[decl->Declaration.File], decl->Range.Last + 1);
- break;
- }
- case TGSI_TOKEN_TYPE_IMMEDIATE: {
- struct tgsi_full_immediate *imm =
- &ctx->parser.FullToken.FullImmediate;
- unsigned n = ctx->so->num_immediates++;
- memcpy(ctx->so->immediates[n].val, imm->u, 16);
- break;
- }
- default:
- break;
- }
- }
-
- /* TGSI generated immediates are always entire vec4's, ones we
- * generate internally are not:
- */
- ctx->immediate_idx = ctx->so->num_immediates * 4;
-
- ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT];
-
- tgsi_parse_free(&ctx->parser);
-
- return tgsi_parse_init(&ctx->parser, so->tokens);
-}
-
-static void
-compile_free(struct fd2_compile_context *ctx)
-{
- tgsi_parse_free(&ctx->parser);
-}
-
-static void
-compile_vtx_fetch(struct fd2_compile_context *ctx)
-{
- struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs;
- int i;
- for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) {
- struct ir2_instruction *instr = ir2_instr_create(
- ctx->so->ir, IR2_FETCH);
- instr->fetch.opc = VTX_FETCH;
-
- ctx->need_sync |= 1 << (i+1);
-
- ir2_dst_create(instr, i+1, "xyzw", 0);
- ir2_reg_create(instr, 0, "x", IR2_REG_INPUT);
-
- if (i == 0)
- instr->sync = true;
-
- vfetch_instrs[i] = instr;
- }
- ctx->so->num_vfetch_instrs = i;
-}
-
-/*
- * For vertex shaders (VS):
- * --- ------ -------------
- *
- * Inputs: R1-R(num_input)
- * Constants: C0-C(num_const-1)
- * Immediates: C(num_const)-C(num_const+num_imm-1)
- * Outputs: export0-export(n) and export62, export63
- * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
- * Temps: R(num_input+1)-R(num_input+num_temps)
- *
- * R0 could be clobbered after the vertex fetch instructions.. so we
- * could use it for one of the temporaries.
- *
- * TODO: maybe the vertex fetch part could fetch first input into R0 as
- * the last vtx fetch instruction, which would let us use the same
- * register layout in either case.. although this is not what the blob
- * compiler does.
- *
- *
- * For frag shaders (PS):
- * --- ---- -------------
- *
- * Inputs: R0-R(num_input-1)
- * Constants: same as VS
- * Immediates: same as VS
- * Outputs: export0-export(num_outputs)
- * Temps: R(num_input)-R(num_input+num_temps-1)
- *
- * In either case, immediates are are postpended to the constants
- * (uniforms).
- *
- */
-
-static unsigned
-get_temp_gpr(struct fd2_compile_context *ctx, int idx)
-{
- unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT];
- if (ctx->type == PIPE_SHADER_VERTEX)
- num++;
- return num;
-}
-
-static struct ir2_dst_register *
-add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
- const struct tgsi_dst_register *dst)
-{
- unsigned flags = 0, num = 0;
- char swiz[5];
-
- switch (dst->File) {
- case TGSI_FILE_OUTPUT:
- flags |= IR2_REG_EXPORT;
- if (ctx->type == PIPE_SHADER_VERTEX) {
- if (dst->Index == ctx->position) {
- num = 62;
- } else if (dst->Index == ctx->psize) {
- num = 63;
- } else {
- num = export_linkage(ctx,
- ctx->output_export_idx[dst->Index]);
- }
- } else {
- num = dst->Index;
- }
- break;
- case TGSI_FILE_TEMPORARY:
- num = get_temp_gpr(ctx, dst->Index);
- break;
- default:
- DBG("unsupported dst register file: %s",
- tgsi_file_name(dst->File));
- assert(0);
- break;
- }
-
- swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_';
- swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_';
- swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_';
- swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_';
- swiz[4] = '\0';
-
- return ir2_dst_create(alu, num, swiz, flags);
-}
-
-static struct ir2_src_register *
-add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
- const struct tgsi_src_register *src)
-{
- static const char swiz_vals[] = {
- 'x', 'y', 'z', 'w',
- };
- char swiz[5];
- unsigned flags = 0, num = 0;
-
- switch (src->File) {
- case TGSI_FILE_CONSTANT:
- num = src->Index;
- flags |= IR2_REG_CONST;
- break;
- case TGSI_FILE_INPUT:
- if (ctx->type == PIPE_SHADER_VERTEX) {
- num = src->Index + 1;
- } else {
- flags |= IR2_REG_INPUT;
- num = export_linkage(ctx,
- ctx->input_export_idx[src->Index]);
- }
- break;
- case TGSI_FILE_TEMPORARY:
- num = get_temp_gpr(ctx, src->Index);
- break;
- case TGSI_FILE_IMMEDIATE:
- num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT];
- flags |= IR2_REG_CONST;
- break;
- default:
- DBG("unsupported src register file: %s",
- tgsi_file_name(src->File));
- assert(0);
- break;
- }
-
- if (src->Absolute)
- flags |= IR2_REG_ABS;
- if (src->Negate)
- flags |= IR2_REG_NEGATE;
-
- swiz[0] = swiz_vals[src->SwizzleX];
- swiz[1] = swiz_vals[src->SwizzleY];
- swiz[2] = swiz_vals[src->SwizzleZ];
- swiz[3] = swiz_vals[src->SwizzleW];
- swiz[4] = '\0';
-
- if ((ctx->need_sync & ((uint64_t)1 << num)) &&
- !(flags & IR2_REG_CONST)) {
- alu->sync = true;
- ctx->need_sync &= ~((uint64_t)1 << num);
- }
-
- return ir2_reg_create(alu, num, swiz, flags);
-}
-
-static void
-add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
- if (inst->Instruction.Saturate) {
- alu->alu_vector.clamp = true;
- }
-}
-
-static void
-add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
- if (inst->Instruction.Saturate) {
- alu->alu_scalar.clamp = true;
- }
-}
-
-static void
-add_regs_vector_1(struct fd2_compile_context *ctx,
- struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
- assert(inst->Instruction.NumSrcRegs == 1);
- assert(inst->Instruction.NumDstRegs == 1);
-
- add_dst_reg(ctx, alu, &inst->Dst[0].Register);
- add_src_reg(ctx, alu, &inst->Src[0].Register);
- add_src_reg(ctx, alu, &inst->Src[0].Register);
- add_vector_clamp(inst, alu);
-}
-
-static void
-add_regs_vector_2(struct fd2_compile_context *ctx,
- struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
- assert(inst->Instruction.NumSrcRegs == 2);
- assert(inst->Instruction.NumDstRegs == 1);
-
- add_dst_reg(ctx, alu, &inst->Dst[0].Register);
- add_src_reg(ctx, alu, &inst->Src[0].Register);
- add_src_reg(ctx, alu, &inst->Src[1].Register);
- add_vector_clamp(inst, alu);
-}
-
-static void
-add_regs_vector_3(struct fd2_compile_context *ctx,
- struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
- assert(inst->Instruction.NumSrcRegs == 3);
- assert(inst->Instruction.NumDstRegs == 1);
-
- add_dst_reg(ctx, alu, &inst->Dst[0].Register);
- add_src_reg(ctx, alu, &inst->Src[0].Register);
- add_src_reg(ctx, alu, &inst->Src[1].Register);
- add_src_reg(ctx, alu, &inst->Src[2].Register);
- add_vector_clamp(inst, alu);
-}
-
-static void
-add_regs_scalar_1(struct fd2_compile_context *ctx,
- struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
- assert(inst->Instruction.NumSrcRegs == 1);
- assert(inst->Instruction.NumDstRegs == 1);
-
- add_dst_reg(ctx, alu, &inst->Dst[0].Register);
- add_src_reg(ctx, alu, &inst->Src[0].Register);
- add_scalar_clamp(inst, alu);
-}
-
-/*
- * Helpers for TGSI instructions that don't map to a single shader instr:
- */
-
-static void
-src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
-{
- src->File = dst->File;
- src->Indirect = dst->Indirect;
- src->Dimension = dst->Dimension;
- src->Index = dst->Index;
- src->Absolute = 0;
- src->Negate = 0;
- src->SwizzleX = TGSI_SWIZZLE_X;
- src->SwizzleY = TGSI_SWIZZLE_Y;
- src->SwizzleZ = TGSI_SWIZZLE_Z;
- src->SwizzleW = TGSI_SWIZZLE_W;
-}
-
-/* Get internal-temp src/dst to use for a sequence of instructions
- * generated by a single TGSI op.
- */
-static void
-get_internal_temp(struct fd2_compile_context *ctx,
- struct tgsi_dst_register *tmp_dst,
- struct tgsi_src_register *tmp_src)
-{
- int n;
-
- tmp_dst->File = TGSI_FILE_TEMPORARY;
- tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
- tmp_dst->Indirect = 0;
- tmp_dst->Dimension = 0;
-
- /* assign next temporary: */
- n = ctx->num_internal_temps++;
- if (ctx->pred_reg != -1)
- n++;
-
- tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n;
-
- src_from_dst(tmp_src, tmp_dst);
-}
-
-static void
-get_predicate(struct fd2_compile_context *ctx, struct tgsi_dst_register *dst,
- struct tgsi_src_register *src)
-{
- assert(ctx->pred_reg != -1);
-
- dst->File = TGSI_FILE_TEMPORARY;
- dst->WriteMask = TGSI_WRITEMASK_W;
- dst->Indirect = 0;
- dst->Dimension = 0;
- dst->Index = get_temp_gpr(ctx, ctx->pred_reg);
-
- if (src) {
- src_from_dst(src, dst);
- src->SwizzleX = TGSI_SWIZZLE_W;
- src->SwizzleY = TGSI_SWIZZLE_W;
- src->SwizzleZ = TGSI_SWIZZLE_W;
- src->SwizzleW = TGSI_SWIZZLE_W;
- }
-}
-
-static void
-push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src)
-{
- struct ir2_instruction *alu;
- struct tgsi_dst_register pred_dst;
-
- if (ctx->pred_depth == 0) {
- /* assign predicate register: */
- ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY];
-
- get_predicate(ctx, &pred_dst, NULL);
-
- alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SETNEs);
- add_dst_reg(ctx, alu, &pred_dst);
- add_src_reg(ctx, alu, src);
- } else {
- struct tgsi_src_register pred_src;
-
- get_predicate(ctx, &pred_dst, &pred_src);
-
- alu = ir2_instr_create_alu_v(ctx->so->ir, MULv);
- add_dst_reg(ctx, alu, &pred_dst);
- add_src_reg(ctx, alu, &pred_src);
- add_src_reg(ctx, alu, src);
-
- // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make
- // sure src reg is valid if it was calculated with a predicate
- // condition..
- alu->pred = IR2_PRED_NONE;
- }
-
- /* save previous pred state to restore in pop_predicate(): */
- ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred;
-}
-
-static void
-pop_predicate(struct fd2_compile_context *ctx)
-{
- /* restore previous predicate state: */
- ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth];
-
- if (ctx->pred_depth != 0) {
- struct ir2_instruction *alu;
- struct tgsi_dst_register pred_dst;
- struct tgsi_src_register pred_src;
-
- get_predicate(ctx, &pred_dst, &pred_src);
-
- alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SET_POPs);
- add_dst_reg(ctx, alu, &pred_dst);
- add_src_reg(ctx, alu, &pred_src);
- alu->pred = IR2_PRED_NONE;
- } else {
- /* predicate register no longer needed: */
- ctx->pred_reg = -1;
- }
-}
-
-static void
-get_immediate(struct fd2_compile_context *ctx,
- struct tgsi_src_register *reg, uint32_t val)
-{
- unsigned neg, swiz, idx, i;
- /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
- static const unsigned swiz2tgsi[] = {
- TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
- };
-
- for (i = 0; i < ctx->immediate_idx; i++) {
- swiz = i % 4;
- idx = i / 4;
-
- if (ctx->so->immediates[idx].val[swiz] == val) {
- neg = 0;
- break;
- }
-
- if (ctx->so->immediates[idx].val[swiz] == -val) {
- neg = 1;
- break;
- }
- }
-
- if (i == ctx->immediate_idx) {
- /* need to generate a new immediate: */
- swiz = i % 4;
- idx = i / 4;
- neg = 0;
- ctx->so->immediates[idx].val[swiz] = val;
- ctx->so->num_immediates = idx + 1;
- ctx->immediate_idx++;
- }
-
- reg->File = TGSI_FILE_IMMEDIATE;
- reg->Indirect = 0;
- reg->Dimension = 0;
- reg->Index = idx;
- reg->Absolute = 0;
- reg->Negate = neg;
- reg->SwizzleX = swiz2tgsi[swiz];
- reg->SwizzleY = swiz2tgsi[swiz];
- reg->SwizzleZ = swiz2tgsi[swiz];
- reg->SwizzleW = swiz2tgsi[swiz];
-}
-
-/* POW(a,b) = EXP2(b * LOG2(a)) */
-static void
-translate_pow(struct fd2_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
- struct ir2_instruction *alu;
-
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
-
- alu = ir2_instr_create_alu_s(ctx->so->ir, LOG_CLAMP);
- add_dst_reg(ctx, alu, &tmp_dst);
- add_src_reg(ctx, alu, &inst->Src[0].Register);
-
- alu = ir2_instr_create_alu_v(ctx->so->ir, MULv);
- add_dst_reg(ctx, alu, &tmp_dst);
- add_src_reg(ctx, alu, &tmp_src);
- add_src_reg(ctx, alu, &inst->Src[1].Register);
-
- /* NOTE: some of the instructions, like EXP_IEEE, seem hard-
- * coded to take their input from the w component.
- */
- switch(inst->Dst[0].Register.WriteMask) {
- case TGSI_WRITEMASK_X:
- tmp_src.SwizzleW = TGSI_SWIZZLE_X;
- break;
- case TGSI_WRITEMASK_Y:
- tmp_src.SwizzleW = TGSI_SWIZZLE_Y;
- break;
- case TGSI_WRITEMASK_Z:
- tmp_src.SwizzleW = TGSI_SWIZZLE_Z;
- break;
- case TGSI_WRITEMASK_W:
- tmp_src.SwizzleW = TGSI_SWIZZLE_W;
- break;
- default:
- DBG("invalid writemask!");
- assert(0);
- break;
- }
-
- alu = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE);
- add_dst_reg(ctx, alu, &inst->Dst[0].Register);
- add_src_reg(ctx, alu, &tmp_src);
- add_scalar_clamp(inst, alu);
-}
-
-static void
-translate_tex(struct fd2_compile_context *ctx,
- struct tgsi_full_instruction *inst, unsigned opc)
-{
- struct ir2_instruction *instr;
- struct ir2_src_register *reg;
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
- const struct tgsi_src_register *coord;
- bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) ||
- inst->Instruction.Saturate;
- int idx;
-
- if (using_temp || (opc == TGSI_OPCODE_TXP))
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
-
- if (opc == TGSI_OPCODE_TXP) {
- static const char *swiz[] = {
- [TGSI_SWIZZLE_X] = "xxxx",
- [TGSI_SWIZZLE_Y] = "yyyy",
- [TGSI_SWIZZLE_Z] = "zzzz",
- [TGSI_SWIZZLE_W] = "wwww",
- };
-
- /* TXP - Projective Texture Lookup:
- *
- * coord.x = src0.x / src.w
- * coord.y = src0.y / src.w
- * coord.z = src0.z / src.w
- * coord.w = src0.w
- * bias = 0.0
- *
- * dst = texture_sample(unit, coord, bias)
- */
-
- instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
- add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w";
- add_src_reg(ctx, instr, &inst->Src[0].Register);
- add_src_reg(ctx, instr, &inst->Src[0].Register);
-
- instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE);
- add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___";
- memcpy(add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle,
- swiz[inst->Src[0].Register.SwizzleW], 4);
-
- instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
- add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_";
- add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx";
- add_src_reg(ctx, instr, &inst->Src[0].Register);
-
- coord = &tmp_src;
- } else {
- coord = &inst->Src[0].Register;
- }
-
- instr = ir2_instr_create(ctx->so->ir, IR2_FETCH);
- instr->fetch.opc = TEX_FETCH;
- instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D);
- instr->fetch.is_rect = (inst->Texture.Texture == TGSI_TEXTURE_RECT);
- assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases?
-
- /* save off the tex fetch to be patched later with correct const_idx: */
- idx = ctx->so->num_tfetch_instrs++;
- ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index;
- ctx->so->tfetch_instrs[idx].instr = instr;
-
- add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register);
- reg = add_src_reg(ctx, instr, coord);
-
- /* blob compiler always sets 3rd component to same as 1st for 2d: */
- if (inst->Texture.Texture == TGSI_TEXTURE_2D || inst->Texture.Texture == TGSI_TEXTURE_RECT)
- reg->swizzle[2] = reg->swizzle[0];
-
- /* dst register needs to be marked for sync: */
- ctx->need_sync |= 1 << instr->dst_reg.num;
-
- /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
- instr->sync = true;
-
- if (using_temp) {
- /* texture fetch can't write directly to export, so if tgsi
- * is telling us the dst register is in output file, we load
- * the texture to a temp and the use ALU instruction to move
- * to output
- */
- instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
-
- add_dst_reg(ctx, instr, &inst->Dst[0].Register);
- add_src_reg(ctx, instr, &tmp_src);
- add_src_reg(ctx, instr, &tmp_src);
- add_vector_clamp(inst, instr);
- }
-}
-
-/* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
-/* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
-/* SEQ(a,b) = EQU((b - a), 1.0, 0.0) */
-/* SNE(a,b) = EQU((b - a), 0.0, 1.0) */
-static void
-translate_sge_slt_seq_sne(struct fd2_compile_context *ctx,
- struct tgsi_full_instruction *inst, unsigned opc)
-{
- struct ir2_instruction *instr;
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
- struct tgsi_src_register tmp_const;
- float c0, c1;
- instr_vector_opc_t vopc;
-
- switch (opc) {
- default:
- assert(0);
- case TGSI_OPCODE_SGE:
- c0 = 1.0;
- c1 = 0.0;
- vopc = CNDGTEv;
- break;
- case TGSI_OPCODE_SLT:
- c0 = 0.0;
- c1 = 1.0;
- vopc = CNDGTEv;
- break;
- case TGSI_OPCODE_SEQ:
- c0 = 0.0;
- c1 = 1.0;
- vopc = CNDEv;
- break;
- case TGSI_OPCODE_SNE:
- c0 = 1.0;
- c1 = 0.0;
- vopc = CNDEv;
- break;
- }
-
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
-
- instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
- add_dst_reg(ctx, instr, &tmp_dst);
- add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
- add_src_reg(ctx, instr, &inst->Src[1].Register);
-
- instr = ir2_instr_create_alu_v(ctx->so->ir, vopc);
- add_dst_reg(ctx, instr, &inst->Dst[0].Register);
- add_src_reg(ctx, instr, &tmp_src);
- get_immediate(ctx, &tmp_const, fui(c1));
- add_src_reg(ctx, instr, &tmp_const);
- get_immediate(ctx, &tmp_const, fui(c0));
- add_src_reg(ctx, instr, &tmp_const);
-}
-
-/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
-static void
-translate_lrp(struct fd2_compile_context *ctx,
- struct tgsi_full_instruction *inst,
- unsigned opc)
-{
- struct ir2_instruction *instr;
- struct tgsi_dst_register tmp_dst1, tmp_dst2;
- struct tgsi_src_register tmp_src1, tmp_src2;
- struct tgsi_src_register tmp_const;
-
- get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
- get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
-
- get_immediate(ctx, &tmp_const, fui(1.0));
-
- /* tmp1 = (a * b) */
- instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
- add_dst_reg(ctx, instr, &tmp_dst1);
- add_src_reg(ctx, instr, &inst->Src[0].Register);
- add_src_reg(ctx, instr, &inst->Src[1].Register);
-
- /* tmp2 = (1 - a) */
- instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
- add_dst_reg(ctx, instr, &tmp_dst2);
- add_src_reg(ctx, instr, &tmp_const);
- add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
-
- /* tmp2 = tmp2 * c */
- instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
- add_dst_reg(ctx, instr, &tmp_dst2);
- add_src_reg(ctx, instr, &tmp_src2);
- add_src_reg(ctx, instr, &inst->Src[2].Register);
-
- /* dst = tmp1 + tmp2 */
- instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
- add_dst_reg(ctx, instr, &inst->Dst[0].Register);
- add_src_reg(ctx, instr, &tmp_src1);
- add_src_reg(ctx, instr, &tmp_src2);
-}
-
-static void
-translate_trig(struct fd2_compile_context *ctx,
- struct tgsi_full_instruction *inst,
- unsigned opc)
-{
- struct ir2_instruction *instr;
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
- struct tgsi_src_register tmp_const;
- instr_scalar_opc_t op;
-
- switch (opc) {
- default:
- assert(0);
- case TGSI_OPCODE_SIN:
- op = SIN;
- break;
- case TGSI_OPCODE_COS:
- op = COS;
- break;
- }
-
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
-
- tmp_dst.WriteMask = TGSI_WRITEMASK_X;
- tmp_src.SwizzleX = tmp_src.SwizzleY =
- tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X;
-
- instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv);
- add_dst_reg(ctx, instr, &tmp_dst);
- add_src_reg(ctx, instr, &inst->Src[0].Register);
- get_immediate(ctx, &tmp_const, fui(0.159155));
- add_src_reg(ctx, instr, &tmp_const);
- get_immediate(ctx, &tmp_const, fui(0.5));
- add_src_reg(ctx, instr, &tmp_const);
-
- instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv);
- add_dst_reg(ctx, instr, &tmp_dst);
- add_src_reg(ctx, instr, &tmp_src);
- add_src_reg(ctx, instr, &tmp_src);
-
- instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv);
- add_dst_reg(ctx, instr, &tmp_dst);
- add_src_reg(ctx, instr, &tmp_src);
- get_immediate(ctx, &tmp_const, fui(6.283185));
- add_src_reg(ctx, instr, &tmp_const);
- get_immediate(ctx, &tmp_const, fui(-3.141593));
- add_src_reg(ctx, instr, &tmp_const);
-
- instr = ir2_instr_create_alu_s(ctx->so->ir, op);
- add_dst_reg(ctx, instr, &inst->Dst[0].Register);
- add_src_reg(ctx, instr, &tmp_src);
-}
-
-static void
-translate_dp2(struct fd2_compile_context *ctx,
- struct tgsi_full_instruction *inst,
- unsigned opc)
-{
- struct tgsi_src_register tmp_const;
- struct ir2_instruction *instr;
- /* DP2ADD c,a,b -> dot2(a,b) + c */
- /* for c we use the constant 0.0 */
- instr = ir2_instr_create_alu_v(ctx->so->ir, DOT2ADDv);
- add_dst_reg(ctx, instr, &inst->Dst[0].Register);
- add_src_reg(ctx, instr, &inst->Src[0].Register);
- add_src_reg(ctx, instr, &inst->Src[1].Register);
- get_immediate(ctx, &tmp_const, fui(0.0f));
- add_src_reg(ctx, instr, &tmp_const);
- add_vector_clamp(inst, instr);
-}
-
-/*
- * Main part of compiler/translator:
- */
-
-static void
-translate_instruction(struct fd2_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- unsigned opc = inst->Instruction.Opcode;
- struct ir2_instruction *instr;
-
- if (opc == TGSI_OPCODE_END)
- return;
-
- /* TODO turn this into a table: */
- switch (opc) {
- case TGSI_OPCODE_MOV:
- instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
- add_regs_vector_1(ctx, inst, instr);
- break;
- case TGSI_OPCODE_RCP:
- instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE);
- add_regs_scalar_1(ctx, inst, instr);
- break;
- case TGSI_OPCODE_RSQ:
- instr = ir2_instr_create_alu_s(ctx->so->ir, RECIPSQ_IEEE);
- add_regs_scalar_1(ctx, inst, instr);
- break;
- case TGSI_OPCODE_SQRT:
- instr = ir2_instr_create_alu_s(ctx->so->ir, SQRT_IEEE);
- add_regs_scalar_1(ctx, inst, instr);
- break;
- case TGSI_OPCODE_MUL:
- instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
- add_regs_vector_2(ctx, inst, instr);
- break;
- case TGSI_OPCODE_ADD:
- instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
- add_regs_vector_2(ctx, inst, instr);
- break;
- case TGSI_OPCODE_DP2:
- translate_dp2(ctx, inst, opc);
- break;
- case TGSI_OPCODE_DP3:
- instr = ir2_instr_create_alu_v(ctx->so->ir, DOT3v);
- add_regs_vector_2(ctx, inst, instr);
- break;
- case TGSI_OPCODE_DP4:
- instr = ir2_instr_create_alu_v(ctx->so->ir, DOT4v);
- add_regs_vector_2(ctx, inst, instr);
- break;
- case TGSI_OPCODE_MIN:
- instr = ir2_instr_create_alu_v(ctx->so->ir, MINv);
- add_regs_vector_2(ctx, inst, instr);
- break;
- case TGSI_OPCODE_MAX:
- instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
- add_regs_vector_2(ctx, inst, instr);
- break;
- case TGSI_OPCODE_SLT:
- case TGSI_OPCODE_SGE:
- case TGSI_OPCODE_SEQ:
- case TGSI_OPCODE_SNE:
- translate_sge_slt_seq_sne(ctx, inst, opc);
- break;
- case TGSI_OPCODE_MAD:
- instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv);
- add_regs_vector_3(ctx, inst, instr);
- break;
- case TGSI_OPCODE_LRP:
- translate_lrp(ctx, inst, opc);
- break;
- case TGSI_OPCODE_FRC:
- instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv);
- add_regs_vector_1(ctx, inst, instr);
- break;
- case TGSI_OPCODE_FLR:
- instr = ir2_instr_create_alu_v(ctx->so->ir, FLOORv);
- add_regs_vector_1(ctx, inst, instr);
- break;
- case TGSI_OPCODE_EX2:
- instr = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE);
- add_regs_scalar_1(ctx, inst, instr);
- break;
- case TGSI_OPCODE_POW:
- translate_pow(ctx, inst);
- break;
- case TGSI_OPCODE_COS:
- case TGSI_OPCODE_SIN:
- translate_trig(ctx, inst, opc);
- break;
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TXP:
- translate_tex(ctx, inst, opc);
- break;
- case TGSI_OPCODE_CMP:
- instr = ir2_instr_create_alu_v(ctx->so->ir, CNDGTEv);
- add_regs_vector_3(ctx, inst, instr);
- instr->src_reg[0].flags ^= IR2_REG_NEGATE; /* src1 */
- break;
- case TGSI_OPCODE_IF:
- push_predicate(ctx, &inst->Src[0].Register);
- ctx->so->ir->pred = IR2_PRED_EQ;
- break;
- case TGSI_OPCODE_ELSE:
- ctx->so->ir->pred = IR2_PRED_NE;
- break;
- case TGSI_OPCODE_ENDIF:
- pop_predicate(ctx);
- break;
- case TGSI_OPCODE_F2I:
- instr = ir2_instr_create_alu_v(ctx->so->ir, TRUNCv);
- add_regs_vector_1(ctx, inst, instr);
- break;
- default:
- DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc));
- tgsi_dump(ctx->so->tokens, 0);
- assert(0);
- break;
- }
-
- /* internal temporaries are only valid for the duration of a single
- * TGSI instruction:
- */
- ctx->num_internal_temps = 0;
-}
-
-static void
-compile_instructions(struct fd2_compile_context *ctx)
-{
- while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
- tgsi_parse_token(&ctx->parser);
-
- switch (ctx->parser.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- translate_instruction(ctx,
- &ctx->parser.FullToken.FullInstruction);
- break;
- default:
- break;
- }
- }
-}
-
-int
-fd2_compile_shader(struct fd_program_stateobj *prog,
- struct fd2_shader_stateobj *so)
-{
- struct fd2_compile_context ctx;
-
- ir2_shader_destroy(so->ir);
- so->ir = ir2_shader_create();
- so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0;
-
- if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK)
- return -1;
-
- if (ctx.type == PIPE_SHADER_VERTEX) {
- compile_vtx_fetch(&ctx);
- } else if (ctx.type == PIPE_SHADER_FRAGMENT) {
- prog->num_exports = 0;
- memset(prog->export_linkage, 0xff,
- sizeof(prog->export_linkage));
- }
-
- compile_instructions(&ctx);
-
- compile_free(&ctx);
-
- return 0;
-}
-