summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosé Fonseca <jfonseca@vmware.com>2010-09-02 15:54:07 +0100
committerJosé Fonseca <jfonseca@vmware.com>2010-10-11 13:05:32 +0100
commit7c1b5772a81c4f701ae9a6208c9e34792c05d4ab (patch)
tree609257a4a888219ee56c512ba88bc70717536c7f
parent11dad217186a4c177cb41aa526531d6cd46ae5b0 (diff)
gallivm: More detailed analysis of tgsi shaders.
To allow more optimizations, in particular for direct textures.
-rw-r--r--src/gallium/auxiliary/Makefile1
-rw-r--r--src/gallium/auxiliary/SConscript1
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h77
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c480
4 files changed, 559 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 02af4d9280a..abd33f6eef1 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -176,6 +176,7 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_struct.c \
gallivm/lp_bld_swizzle.c \
gallivm/lp_bld_tgsi_aos.c \
+ gallivm/lp_bld_tgsi_info.c \
gallivm/lp_bld_tgsi_soa.c \
gallivm/lp_bld_type.c \
draw/draw_llvm.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 48547c4b2c6..94cd74424a0 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -227,6 +227,7 @@ if env['llvm']:
'gallivm/lp_bld_struct.c',
'gallivm/lp_bld_swizzle.c',
'gallivm/lp_bld_tgsi_aos.c',
+ 'gallivm/lp_bld_tgsi_info.c',
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
'draw/draw_llvm.c',
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 97318b3456c..0173bc4a7fc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -36,6 +36,9 @@
#define LP_BLD_TGSI_H
#include "gallivm/lp_bld.h"
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
struct tgsi_token;
@@ -55,6 +58,75 @@ enum lp_build_tex_modifier {
/**
+ * Describe a channel of a register.
+ *
+ * The value can be a:
+ * - immediate value (i.e. derived from a IMM register)
+ * - CONST[n].x/y/z/w
+ * - IN[n].x/y/z/w
+ * - undetermined (when .file == TGSI_FILE_NULL)
+ *
+ * This is one of the analysis results, and is used to described
+ * the output color in terms of inputs.
+ */
+struct lp_tgsi_channel_info
+{
+ unsigned file:4; /* TGSI_FILE_* */
+ unsigned swizzle:3; /* PIPE_SWIZZLE_x */
+ union {
+ uint32_t index;
+ float value; /* for TGSI_FILE_IMMEDIATE */
+ };
+};
+
+
+/**
+ * Describe a texture sampler interpolator.
+ *
+ * The interpolation is described in terms of regular inputs.
+ */
+struct lp_tgsi_texture_info
+{
+ struct lp_tgsi_channel_info coord[4];
+ unsigned target:8; /* TGSI_TEXTURE_* */
+ unsigned unit:8; /* Sampler unit */
+ unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */
+};
+
+
+struct lp_tgsi_info
+{
+ struct tgsi_shader_info base;
+
+ /*
+ * Whether any of the texture opcodes access a register file other than
+ * TGSI_FILE_INPUT.
+ *
+ * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little
+ * benefit.
+ */
+ unsigned indirect_textures:1;
+
+ /*
+ * Texture opcode description. Aimed at detecting and described direct
+ * texture opcodes.
+ */
+ unsigned num_texs;
+ struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS];
+
+ /*
+ * Output description. Aimed at detecting and describing simple blit
+ * shaders.
+ */
+ struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4];
+
+ /*
+ * Shortcut pointers into the above (for fragment shaders).
+ */
+ const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS];
+};
+
+/**
* Sampler code generation interface.
*
* Although texture sampling is a requirement for TGSI translation, it is
@@ -97,6 +169,11 @@ struct lp_build_sampler_aos
void
+lp_build_tgsi_info(const struct tgsi_token *tokens,
+ struct lp_tgsi_info *info);
+
+
+void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
struct lp_type type,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
new file mode 100644
index 00000000000..eab72b8eb7d
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
@@ -0,0 +1,480 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_tgsi.h"
+
+
+/**
+ * Analysis context.
+ *
+ * This is where we keep store the value of each channel of the IMM/TEMP/OUT
+ * register values, as we walk the shader.
+ */
+struct analysis_context
+{
+ struct lp_tgsi_info *info;
+
+ unsigned num_imms;
+ float imm[32][4];
+
+ struct lp_tgsi_channel_info temp[32][4];
+};
+
+
+/**
+ * Describe the specified channel of the src register.
+ */
+static void
+analyse_src(struct analysis_context *ctx,
+ struct lp_tgsi_channel_info *chan_info,
+ const struct tgsi_src_register *src,
+ unsigned chan)
+{
+ chan_info->file = TGSI_FILE_NULL;
+ if (!src->Indirect && !src->Absolute && !src->Negate) {
+ unsigned swizzle = tgsi_util_get_src_register_swizzle(src, chan);
+ if (src->File == TGSI_FILE_TEMPORARY) {
+ if (src->Index < Elements(ctx->temp)) {
+ *chan_info = ctx->temp[src->Index][swizzle];
+ }
+ } else {
+ chan_info->file = src->File;
+ if (src->File == TGSI_FILE_IMMEDIATE) {
+ assert(src->Index < Elements(ctx->imm));
+ if (src->Index < Elements(ctx->imm)) {
+ chan_info->value = ctx->imm[src->Index][swizzle];
+ }
+ } else {
+ chan_info->index = src->Index;
+ chan_info->swizzle = swizzle;
+ }
+ }
+ }
+}
+
+
+/**
+ * Whether this register channel refers to a specific immediate value.
+ */
+static boolean
+is_immediate(const struct lp_tgsi_channel_info *chan_info, float value)
+{
+ return chan_info->file == TGSI_FILE_IMMEDIATE &&
+ chan_info->value == value;
+}
+
+
+static void
+analyse_tex(struct analysis_context *ctx,
+ const struct tgsi_full_instruction *inst,
+ enum lp_build_tex_modifier modifier)
+{
+ struct lp_tgsi_info *info = ctx->info;
+ unsigned chan;
+
+ if (info->num_texs < Elements(info->tex)) {
+ struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
+ bool indirect = FALSE;
+ unsigned readmask = 0;
+
+ tex_info->target = inst->Texture.Texture;
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_1D:
+ readmask = TGSI_WRITEMASK_X;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ readmask = TGSI_WRITEMASK_XY;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ readmask = TGSI_WRITEMASK_XYZ;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
+ /* We don't track explicit derivatives, although we could */
+ indirect = TRUE;
+ tex_info->unit = inst->Src[3].Register.Index;
+ } else {
+ if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED ||
+ modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
+ modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+ readmask |= TGSI_WRITEMASK_W;
+ }
+ tex_info->unit = inst->Src[1].Register.Index;
+ }
+
+ for (chan = 0; chan < 4; ++chan) {
+ struct lp_tgsi_channel_info *chan_info = &tex_info->coord[chan];
+ if (readmask & (1 << chan)) {
+ analyse_src(ctx, chan_info, &inst->Src[0].Register, chan);
+ if (chan_info->file != TGSI_FILE_INPUT) {
+ indirect = TRUE;
+ }
+ } else {
+ memset(chan_info, 0, sizeof *chan_info);
+ }
+ }
+
+ if (indirect) {
+ info->indirect_textures = TRUE;
+ }
+
+ ++info->num_texs;
+ } else {
+ info->indirect_textures = TRUE;
+ }
+}
+
+
+/**
+ * Process an instruction, and update the register values accordingly.
+ */
+static void
+analyse_instruction(struct analysis_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct lp_tgsi_info *info = ctx->info;
+ struct lp_tgsi_channel_info (*regs)[4];
+ unsigned max_regs;
+ unsigned i;
+ unsigned index;
+ unsigned chan;
+
+ for (i = 0; i < inst->Instruction.NumDstRegs; ++i) {
+ const struct tgsi_dst_register *dst = &inst->Dst[i].Register;
+
+ /*
+ * Get the lp_tgsi_channel_info array corresponding to the destination
+ * register file.
+ */
+
+ if (dst->File == TGSI_FILE_TEMPORARY) {
+ regs = ctx->temp;
+ max_regs = Elements(ctx->temp);
+ } else if (dst->File == TGSI_FILE_OUTPUT) {
+ regs = info->output;
+ max_regs = Elements(info->output);
+ } else if (dst->File == TGSI_FILE_ADDRESS ||
+ dst->File == TGSI_FILE_PREDICATE) {
+ continue;
+ } else {
+ assert(0);
+ continue;
+ }
+
+ /*
+ * Detect direct TEX instructions
+ */
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_TEX:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_NONE);
+ break;
+ case TGSI_OPCODE_TXD:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
+ break;
+ case TGSI_OPCODE_TXB:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
+ break;
+ case TGSI_OPCODE_TXL:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
+ break;
+ case TGSI_OPCODE_TXP:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Keep track of assignments and writes
+ */
+
+ if (dst->Indirect) {
+ /*
+ * It could be any register index so clear all register indices.
+ */
+
+ for (chan = 0; chan < 4; ++chan) {
+ if (dst->WriteMask & (1 << chan)) {
+ for (index = 0; index < max_regs; ++index) {
+ regs[index][chan].file = TGSI_FILE_NULL;
+ }
+ }
+ }
+ } else if (dst->Index < max_regs) {
+ /*
+ * Update this destination register value.
+ */
+
+ struct lp_tgsi_channel_info res[4];
+
+ memset(res, 0, sizeof res);
+
+ if (!inst->Instruction.Predicate &&
+ !inst->Instruction.Saturate) {
+ for (chan = 0; chan < 4; ++chan) {
+ if (dst->WriteMask & (1 << chan)) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
+ analyse_src(ctx, &res[chan],
+ &inst->Src[0].Register, chan);
+ } else if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) {
+ /*
+ * Propagate values across 1.0 and 0.0 multiplications.
+ */
+
+ struct lp_tgsi_channel_info src0;
+ struct lp_tgsi_channel_info src1;
+
+ analyse_src(ctx, &src0, &inst->Src[0].Register, chan);
+ analyse_src(ctx, &src1, &inst->Src[1].Register, chan);
+
+ if (is_immediate(&src0, 0.0f)) {
+ res[chan] = src0;
+ } else if (is_immediate(&src1, 0.0f)) {
+ res[chan] = src1;
+ } else if (is_immediate(&src0, 1.0f)) {
+ res[chan] = src1;
+ } else if (is_immediate(&src1, 1.0f)) {
+ res[chan] = src0;
+ }
+ }
+ }
+ }
+ }
+
+ for (chan = 0; chan < 4; ++chan) {
+ if (dst->WriteMask & (1 << chan)) {
+ regs[dst->Index][chan] = res[chan];
+ }
+ }
+ }
+ }
+
+ /*
+ * Clear all temporaries information in presence of a control flow opcode.
+ */
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_IFC:
+ case TGSI_OPCODE_ELSE:
+ case TGSI_OPCODE_ENDIF:
+ case TGSI_OPCODE_BGNLOOP:
+ case TGSI_OPCODE_BRK:
+ case TGSI_OPCODE_BREAKC:
+ case TGSI_OPCODE_CONT:
+ case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_CALLNZ:
+ case TGSI_OPCODE_CAL:
+ case TGSI_OPCODE_BGNSUB:
+ case TGSI_OPCODE_ENDSUB:
+ case TGSI_OPCODE_SWITCH:
+ case TGSI_OPCODE_CASE:
+ case TGSI_OPCODE_DEFAULT:
+ case TGSI_OPCODE_ENDSWITCH:
+ case TGSI_OPCODE_RET:
+ case TGSI_OPCODE_END:
+ /* XXX: Are there more cases? */
+ memset(&ctx->temp, 0, sizeof ctx->temp);
+ memset(&info->output, 0, sizeof info->output);
+ default:
+ break;
+ }
+}
+
+
+static INLINE void
+dump_info(const struct tgsi_token *tokens,
+ struct lp_tgsi_info *info)
+{
+ unsigned index;
+ unsigned chan;
+
+ tgsi_dump(tokens, 0);
+
+ for (index = 0; index < info->num_texs; ++index) {
+ const struct lp_tgsi_texture_info *tex_info = &info->tex[index];
+ debug_printf("TEX[%u] =", index);
+ for (chan = 0; chan < 4; ++chan) {
+ const struct lp_tgsi_channel_info *chan_info =
+ &tex_info->coord[chan];
+ if (chan_info->file != TGSI_FILE_NULL) {
+ debug_printf(" %s[%u].%c",
+ tgsi_file_names[chan_info->file],
+ chan_info->index,
+ "xyzw01"[chan_info->swizzle]);
+ } else {
+ debug_printf(" _");
+ }
+ }
+ debug_printf(", SAMP[%u], %s\n",
+ tex_info->unit,
+ tgsi_texture_names[tex_info->target]);
+ }
+
+ for (index = 0; index < PIPE_MAX_SHADER_OUTPUTS; ++index) {
+ for (chan = 0; chan < 4; ++chan) {
+ const struct lp_tgsi_channel_info *chan_info =
+ &info->output[index][chan];
+ if (chan_info->file != TGSI_FILE_NULL) {
+ debug_printf("OUT[%u].%c = ", index, "xyzw"[chan]);
+ if (chan_info->file == TGSI_FILE_IMMEDIATE) {
+ debug_printf("%f", chan_info->value);
+ } else {
+ const char *file_name;
+ switch (chan_info->file) {
+ case TGSI_FILE_CONSTANT:
+ file_name = "CONST";
+ break;
+ case TGSI_FILE_INPUT:
+ file_name = "IN";
+ break;
+ default:
+ file_name = "???";
+ break;
+ }
+ debug_printf("%s[%u].%c",
+ file_name,
+ chan_info->index,
+ "xyzw01"[chan_info->swizzle]);
+ }
+ debug_printf("\n");
+ }
+ }
+ }
+}
+
+
+/**
+ * Detect any direct relationship between the output color
+ */
+void
+lp_build_tgsi_info(const struct tgsi_token *tokens,
+ struct lp_tgsi_info *info)
+{
+ struct tgsi_parse_context parse;
+ struct analysis_context ctx;
+ unsigned index;
+ unsigned chan;
+
+ memset(info, 0, sizeof *info);
+
+ tgsi_scan_shader(tokens, &info->base);
+
+ memset(&ctx, 0, sizeof ctx);
+ ctx.info = info;
+
+ tgsi_parse_init(&parse, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ struct tgsi_full_instruction *inst =
+ &parse.FullToken.FullInstruction;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_END ||
+ inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
+ /* We reached the end of main function body. */
+ goto finished;
+ }
+
+ analyse_instruction(&ctx, inst);
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const unsigned size =
+ parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ assert(size <= 4);
+ if (ctx.num_imms < Elements(ctx.imm)) {
+ for (chan = 0; chan < size; ++chan) {
+ ctx.imm[ctx.num_imms][chan] =
+ parse.FullToken.FullImmediate.u[chan].Float;
+ }
+ ++ctx.num_imms;
+ }
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+finished:
+
+ tgsi_parse_free(&parse);
+
+
+ /*
+ * Link the output color values.
+ */
+
+ for (index = 0; index < PIPE_MAX_COLOR_BUFS; ++index) {
+ const struct lp_tgsi_channel_info null_output[4];
+ info->cbuf[index] = null_output;
+ }
+
+ for (index = 0; index < info->base.num_outputs; ++index) {
+ unsigned semantic_name = info->base.output_semantic_name[index];
+ unsigned semantic_index = info->base.output_semantic_index[index];
+ if (semantic_name == TGSI_SEMANTIC_COLOR &&
+ semantic_index < PIPE_MAX_COLOR_BUFS) {
+ info->cbuf[semantic_index] = info->output[index];
+ }
+ }
+
+ if (gallivm_debug & GALLIVM_DEBUG_TGSI) {
+ dump_info(tokens, info);
+ }
+}