summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/i965/brw_wm_fp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/i965/brw_wm_fp.c')
-rw-r--r--src/gallium/drivers/i965/brw_wm_fp.c1223
1 files changed, 0 insertions, 1223 deletions
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
deleted file mode 100644
index a65e16edec0..00000000000
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ /dev/null
@@ -1,1223 +0,0 @@
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "pipe/p_shader_tokens.h"
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_info.h"
-#include "tgsi/tgsi_util.h"
-
-#include "brw_wm.h"
-#include "brw_debug.h"
-
-
-/***********************************************************************
- * Source regs
- */
-
-static struct brw_fp_src src_reg(GLuint file, GLuint idx)
-{
- struct brw_fp_src reg;
- reg.file = file;
- reg.index = idx;
- reg.swizzle = BRW_SWIZZLE_XYZW;
- reg.indirect = 0;
- reg.negate = 0;
- reg.abs = 0;
- return reg;
-}
-
-static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst)
-{
- return src_reg(dst.file, dst.index);
-}
-
-static struct brw_fp_src src_undef( void )
-{
- return src_reg(TGSI_FILE_NULL, 0);
-}
-
-static GLboolean src_is_undef(struct brw_fp_src src)
-{
- return src.file == TGSI_FILE_NULL;
-}
-
-static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w )
-{
- unsigned swz = reg.swizzle;
-
- reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 |
- BRW_GET_SWZ(swz, y) << 2 |
- BRW_GET_SWZ(swz, z) << 4 |
- BRW_GET_SWZ(swz, w) << 6 );
-
- return reg;
-}
-
-static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x )
-{
- return src_swizzle(reg, x, x, x, x);
-}
-
-static struct brw_fp_src src_abs( struct brw_fp_src src )
-{
- src.negate = 0;
- src.abs = 1;
- return src;
-}
-
-static struct brw_fp_src src_negate( struct brw_fp_src src )
-{
- src.negate = 1;
- src.abs = 0;
- return src;
-}
-
-
-static int match_or_expand_immediate( const float *v,
- unsigned nr,
- float *v2,
- unsigned *nr2,
- unsigned *swizzle )
-{
- unsigned i, j;
-
- *swizzle = 0;
-
- for (i = 0; i < nr; i++) {
- boolean found = FALSE;
-
- for (j = 0; j < *nr2 && !found; j++) {
- if (v[i] == v2[j]) {
- *swizzle |= j << (i * 2);
- found = TRUE;
- }
- }
-
- if (!found) {
- if (*nr2 >= 4)
- return FALSE;
-
- v2[*nr2] = v[i];
- *swizzle |= *nr2 << (i * 2);
- (*nr2)++;
- }
- }
-
- return TRUE;
-}
-
-
-
-/* Internally generated immediates: overkill...
- */
-static struct brw_fp_src src_imm( struct brw_wm_compile *c,
- const GLfloat *v,
- unsigned nr)
-{
- unsigned i, j;
- unsigned swizzle;
-
- /* Could do a first pass where we examine all existing immediates
- * without expanding.
- */
-
- for (i = 0; i < c->nr_immediates; i++) {
- if (match_or_expand_immediate( v,
- nr,
- c->immediate[i].v,
- &c->immediate[i].nr,
- &swizzle ))
- goto out;
- }
-
- if (c->nr_immediates < Elements(c->immediate)) {
- i = c->nr_immediates++;
- if (match_or_expand_immediate( v,
- nr,
- c->immediate[i].v,
- &c->immediate[i].nr,
- &swizzle ))
- goto out;
- }
-
- c->error = 1;
- return src_undef();
-
-out:
- /* Make sure that all referenced elements are from this immediate.
- * Has the effect of making size-one immediates into scalars.
- */
- for (j = nr; j < 4; j++)
- swizzle |= (swizzle & 0x3) << (j * 2);
-
- return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
- BRW_GET_SWZ(swizzle, X),
- BRW_GET_SWZ(swizzle, Y),
- BRW_GET_SWZ(swizzle, Z),
- BRW_GET_SWZ(swizzle, W) );
-}
-
-
-
-static struct brw_fp_src src_imm1f( struct brw_wm_compile *c,
- GLfloat f )
-{
- return src_imm(c, &f, 1);
-}
-
-static struct brw_fp_src src_imm4f( struct brw_wm_compile *c,
- GLfloat x,
- GLfloat y,
- GLfloat z,
- GLfloat w)
-{
- GLfloat f[4] = {x,y,z,w};
- return src_imm(c, f, 4);
-}
-
-
-
-/***********************************************************************
- * Dest regs
- */
-
-static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
-{
- struct brw_fp_dst reg;
- reg.file = file;
- reg.index = idx;
- reg.writemask = BRW_WRITEMASK_XYZW;
- reg.indirect = 0;
- reg.saturate = 0;
- return reg;
-}
-
-static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask )
-{
- reg.writemask &= mask;
- return reg;
-}
-
-static struct brw_fp_dst dst_undef( void )
-{
- return dst_reg(TGSI_FILE_NULL, 0);
-}
-
-static boolean dst_is_undef( struct brw_fp_dst dst )
-{
- return dst.file == TGSI_FILE_NULL;
-}
-
-static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag )
-{
- reg.saturate = flag;
- return reg;
-}
-
-static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
-{
- int bit = ffs( ~c->fp_temp );
-
- if (!bit) {
- debug_printf("%s: out of temporaries\n", __FILE__);
- }
-
- c->fp_temp |= 1<<(bit-1);
- return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
-}
-
-
-static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp )
-{
- c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp));
-}
-
-
-/***********************************************************************
- * Instructions
- */
-
-static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
-{
- return &c->fp_instructions[c->nr_fp_insns++];
-}
-
-static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
- GLuint op,
- struct brw_fp_dst dest,
- GLuint tex_unit,
- GLuint target,
- GLuint sampler,
- struct brw_fp_src src0,
- struct brw_fp_src src1,
- struct brw_fp_src src2 )
-{
- struct brw_fp_instruction *inst = get_fp_inst(c);
-
- if (tex_unit || target)
- assert(op == TGSI_OPCODE_TXP ||
- op == TGSI_OPCODE_TXB ||
- op == TGSI_OPCODE_TEX ||
- op == WM_FB_WRITE);
-
- inst->opcode = op;
- inst->dst = dest;
- inst->tex_unit = tex_unit;
- inst->target = target;
- inst->sampler = sampler;
- inst->src[0] = src0;
- inst->src[1] = src1;
- inst->src[2] = src2;
-
- return inst;
-}
-
-
-static INLINE void emit_op3(struct brw_wm_compile *c,
- GLuint op,
- struct brw_fp_dst dest,
- struct brw_fp_src src0,
- struct brw_fp_src src1,
- struct brw_fp_src src2 )
-{
- emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src2);
-}
-
-
-static INLINE void emit_op2(struct brw_wm_compile *c,
- GLuint op,
- struct brw_fp_dst dest,
- struct brw_fp_src src0,
- struct brw_fp_src src1)
-{
- emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src_undef());
-}
-
-static INLINE void emit_op1(struct brw_wm_compile *c,
- GLuint op,
- struct brw_fp_dst dest,
- struct brw_fp_src src0)
-{
- emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef());
-}
-
-static INLINE void emit_op0(struct brw_wm_compile *c,
- GLuint op,
- struct brw_fp_dst dest)
-{
- emit_tex_op(c, op, dest, 0, 0, 0, src_undef(), src_undef(), src_undef());
-}
-
-
-
-/* Many opcodes produce the same value across all the result channels.
- * We'd rather not have to support that splatting in the opcode implementations,
- * and brw_wm_pass*.c wants to optimize them out by shuffling references around
- * anyway. We can easily get both by emitting the opcode to one channel, and
- * then MOVing it to the others, which brw_wm_pass*.c already understands.
- */
-static void emit_scalar_insn(struct brw_wm_compile *c,
- unsigned opcode,
- struct brw_fp_dst dst,
- struct brw_fp_src src0,
- struct brw_fp_src src1,
- struct brw_fp_src src2 )
-{
- unsigned first_chan = ffs(dst.writemask) - 1;
- unsigned first_mask = 1 << first_chan;
-
- if (dst.writemask == 0)
- return;
-
- emit_op3( c, opcode,
- dst_mask(dst, first_mask),
- src0, src1, src2 );
-
- if (dst.writemask != first_mask) {
- emit_op1(c, TGSI_OPCODE_MOV,
- dst_mask(dst, ~first_mask),
- src_scalar(src_reg_from_dst(dst), first_chan));
- }
-}
-
-
-/***********************************************************************
- * Special instructions for interpolation and other tasks
- */
-
-static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c )
-{
- if (src_is_undef(c->fp_pixel_xy)) {
- struct brw_fp_dst pixel_xy = get_temp(c);
- struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
-
-
- /* Emit the out calculations, and hold onto the results. Use
- * two instructions as a temporary is required.
- */
- /* pixel_xy.xy = PIXELXY payload[0];
- */
- emit_op1(c,
- WM_PIXELXY,
- dst_mask(pixel_xy, BRW_WRITEMASK_XY),
- payload_r0_depth);
-
- c->fp_pixel_xy = src_reg_from_dst(pixel_xy);
- }
-
- return c->fp_pixel_xy;
-}
-
-static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c )
-{
- if (src_is_undef(c->fp_delta_xy)) {
- struct brw_fp_dst delta_xy = get_temp(c);
- struct brw_fp_src pixel_xy = get_pixel_xy(c);
- struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
-
- /* deltas.xy = DELTAXY pixel_xy, payload[0]
- */
- emit_op3(c,
- WM_DELTAXY,
- dst_mask(delta_xy, BRW_WRITEMASK_XY),
- pixel_xy,
- payload_r0_depth,
- src_undef());
-
- c->fp_delta_xy = src_reg_from_dst(delta_xy);
- }
-
- return c->fp_delta_xy;
-}
-
-static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c )
-{
- if (src_is_undef(c->fp_pixel_w)) {
- struct brw_fp_dst pixel_w = get_temp(c);
- struct brw_fp_src deltas = get_delta_xy(c);
-
- /* XXX: assuming position is always first -- valid?
- */
- struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0);
-
- /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
- */
- emit_op3(c,
- WM_PIXELW,
- dst_mask(pixel_w, BRW_WRITEMASK_W),
- interp_wpos,
- deltas,
- src_undef());
-
-
- c->fp_pixel_w = src_reg_from_dst(pixel_w);
- }
-
- return c->fp_pixel_w;
-}
-
-
-/***********************************************************************
- * Emit INTERP instructions ahead of first use of each attrib.
- */
-
-static void emit_interp( struct brw_wm_compile *c,
- GLuint idx,
- GLuint semantic,
- GLuint interp_mode )
-{
- struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
- struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
- struct brw_fp_src deltas = get_delta_xy(c);
-
- /* Need to use PINTERP on attributes which have been
- * multiplied by 1/W in the SF program, and LINTERP on those
- * which have not:
- */
- switch (semantic) {
- case TGSI_SEMANTIC_POSITION:
- /* Have to treat wpos.xy specially:
- */
- emit_op1(c,
- WM_WPOSXY,
- dst_mask(dst, BRW_WRITEMASK_XY),
- get_pixel_xy(c));
-
- /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
- */
- emit_op2(c,
- WM_LINTERP,
- dst_mask(dst, BRW_WRITEMASK_ZW),
- interp,
- deltas);
- break;
-
- case TGSI_SEMANTIC_COLOR:
- if (c->key.flat_shade) {
- emit_op1(c,
- WM_CINTERP,
- dst,
- interp);
- }
- else if (interp_mode == TGSI_INTERPOLATE_LINEAR) {
- emit_op2(c,
- WM_LINTERP,
- dst,
- interp,
- deltas);
- }
- else {
- emit_op3(c,
- WM_PINTERP,
- dst,
- interp,
- deltas,
- get_pixel_w(c));
- }
-
- break;
-
- case TGSI_SEMANTIC_FOG:
- /* Interpolate the fog coordinate */
- emit_op3(c,
- WM_PINTERP,
- dst_mask(dst, BRW_WRITEMASK_X),
- interp,
- deltas,
- get_pixel_w(c));
-
- emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_mask(dst, BRW_WRITEMASK_YZ),
- src_imm1f(c, 0.0));
-
- emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_mask(dst, BRW_WRITEMASK_W),
- src_imm1f(c, 1.0));
- break;
-
- case TGSI_SEMANTIC_FACE:
- /* XXX review/test this case */
- emit_op0(c,
- WM_FRONTFACING,
- dst_mask(dst, BRW_WRITEMASK_X));
-
- emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_mask(dst, BRW_WRITEMASK_YZ),
- src_imm1f(c, 0.0));
-
- emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_mask(dst, BRW_WRITEMASK_W),
- src_imm1f(c, 1.0));
- break;
-
- case TGSI_SEMANTIC_PSIZE:
- /* XXX review/test this case */
- emit_op3(c,
- WM_PINTERP,
- dst_mask(dst, BRW_WRITEMASK_XY),
- interp,
- deltas,
- get_pixel_w(c));
-
- emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_mask(dst, BRW_WRITEMASK_Z),
- src_imm1f(c, 0.0f));
-
- emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_mask(dst, BRW_WRITEMASK_W),
- src_imm1f(c, 1.0f));
- break;
-
- default:
- switch (interp_mode) {
- case TGSI_INTERPOLATE_CONSTANT:
- emit_op1(c,
- WM_CINTERP,
- dst,
- interp);
- break;
-
- case TGSI_INTERPOLATE_LINEAR:
- emit_op2(c,
- WM_LINTERP,
- dst,
- interp,
- deltas);
- break;
-
- case TGSI_INTERPOLATE_PERSPECTIVE:
- emit_op3(c,
- WM_PINTERP,
- dst,
- interp,
- deltas,
- get_pixel_w(c));
- break;
- }
- break;
- }
-}
-
-
-/***********************************************************************
- * Expand various instructions here to simpler forms.
- */
-static void precalc_dst( struct brw_wm_compile *c,
- struct brw_fp_dst dst,
- struct brw_fp_src src0,
- struct brw_fp_src src1 )
-{
- if (dst.writemask & BRW_WRITEMASK_Y) {
- /* dst.y = mul src0.y, src1.y
- */
- emit_op2(c,
- TGSI_OPCODE_MUL,
- dst_mask(dst, BRW_WRITEMASK_Y),
- src0,
- src1);
- }
-
- if (dst.writemask & BRW_WRITEMASK_XZ) {
- /* dst.z = mov src0.zzzz
- */
- emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_mask(dst, BRW_WRITEMASK_Z),
- src_scalar(src0, Z));
-
- /* dst.x = imm1f(1.0)
- */
- emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
- src_imm1f(c, 1.0));
- }
- if (dst.writemask & BRW_WRITEMASK_W) {
- /* dst.w = mov src1.w
- */
- emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_mask(dst, BRW_WRITEMASK_W),
- src1);
- }
-}
-
-
-static void precalc_lit( struct brw_wm_compile *c,
- struct brw_fp_dst dst,
- struct brw_fp_src src0 )
-{
- if (dst.writemask & BRW_WRITEMASK_XW) {
- /* dst.xw = imm(1.0f)
- */
- emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0),
- src_imm1f(c, 1.0f));
- }
-
- if (dst.writemask & BRW_WRITEMASK_YZ) {
- emit_op1(c,
- TGSI_OPCODE_LIT,
- dst_mask(dst, BRW_WRITEMASK_YZ),
- src0);
- }
-}
-
-
-/**
- * Some TEX instructions require extra code, cube map coordinate
- * normalization, or coordinate scaling for RECT textures, etc.
- * This function emits those extra instructions and the TEX
- * instruction itself.
- */
-static void precalc_tex( struct brw_wm_compile *c,
- struct brw_fp_dst dst,
- unsigned target,
- unsigned unit,
- struct brw_fp_src src0,
- struct brw_fp_src sampler )
-{
- struct brw_fp_src coord;
- struct brw_fp_dst tmp = dst_undef();
-
- assert(unit < BRW_MAX_TEX_UNIT);
-
- /* Cubemap: find longest component of coord vector and normalize
- * it.
- */
- if (target == TGSI_TEXTURE_CUBE) {
- struct brw_fp_src tmpsrc;
-
- tmp = get_temp(c);
- tmpsrc = src_reg_from_dst(tmp);
-
- /* tmp = abs(src0) */
- emit_op1(c,
- TGSI_OPCODE_MOV,
- tmp,
- src_abs(src0));
-
- /* tmp.X = MAX(tmp.X, tmp.Y) */
- emit_op2(c, TGSI_OPCODE_MAX,
- dst_mask(tmp, BRW_WRITEMASK_X),
- src_scalar(tmpsrc, X),
- src_scalar(tmpsrc, Y));
-
- /* tmp.X = MAX(tmp.X, tmp.Z) */
- emit_op2(c, TGSI_OPCODE_MAX,
- dst_mask(tmp, BRW_WRITEMASK_X),
- tmpsrc,
- src_scalar(tmpsrc, Z));
-
- /* tmp.X = 1 / tmp.X */
- emit_op1(c, TGSI_OPCODE_RCP,
- dst_mask(tmp, BRW_WRITEMASK_X),
- tmpsrc);
-
- /* tmp = src0 * tmp.xxxx */
- emit_op2(c, TGSI_OPCODE_MUL,
- tmp,
- src0,
- src_scalar(tmpsrc, X));
-
- coord = tmpsrc;
- }
- else if (target == TGSI_TEXTURE_RECT ||
- target == TGSI_TEXTURE_SHADOWRECT) {
- /* XXX: need a mechanism for internally generated constants.
- */
- coord = src0;
- }
- else {
- coord = src0;
- }
-
- /* Need to emit YUV texture conversions by hand. Probably need to
- * do this here - the alternative is in brw_wm_emit.c, but the
- * conversion requires allocating a temporary variable which we
- * don't have the facility to do that late in the compilation.
- */
- if (c->key.yuvtex_mask & (1 << unit)) {
- /* convert ycbcr to RGBA */
- GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
- struct brw_fp_dst tmp = get_temp(c);
- struct brw_fp_src tmpsrc = src_reg_from_dst(tmp);
- struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 );
- struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 );
-
- /* tmp = TEX ...
- */
- emit_tex_op(c,
- TGSI_OPCODE_TEX,
- dst_saturate(tmp, dst.saturate),
- unit,
- target,
- sampler.index,
- coord,
- src_undef(),
- src_undef());
-
- /* tmp.xyz = ADD TMP, C0
- */
- emit_op2(c, TGSI_OPCODE_ADD,
- dst_mask(tmp, BRW_WRITEMASK_XYZ),
- tmpsrc,
- C0);
-
- /* YUV.y = MUL YUV.y, C0.w
- */
- emit_op2(c, TGSI_OPCODE_MUL,
- dst_mask(tmp, BRW_WRITEMASK_Y),
- tmpsrc,
- src_scalar(C0, W));
-
- /*
- * if (UV swaped)
- * RGB.xyz = MAD YUV.zzx, C1, YUV.y
- * else
- * RGB.xyz = MAD YUV.xxz, C1, YUV.y
- */
-
- emit_op3(c, TGSI_OPCODE_MAD,
- dst_mask(dst, BRW_WRITEMASK_XYZ),
- ( swap_uv ?
- src_swizzle(tmpsrc, Z,Z,X,X) :
- src_swizzle(tmpsrc, X,X,Z,Z)),
- C1,
- src_scalar(tmpsrc, Y));
-
- /* RGB.y = MAD YUV.z, C1.w, RGB.y
- */
- emit_op3(c,
- TGSI_OPCODE_MAD,
- dst_mask(dst, BRW_WRITEMASK_Y),
- src_scalar(tmpsrc, Z),
- src_scalar(C1, W),
- src_scalar(src_reg_from_dst(dst), Y));
-
- release_temp(c, tmp);
- }
- else {
- /* ordinary RGBA tex instruction */
- emit_tex_op(c,
- TGSI_OPCODE_TEX,
- dst,
- unit,
- target,
- sampler.index,
- coord,
- src_undef(),
- src_undef());
- }
-
- /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
- * generating shader variants in mesa state tracker.
- */
-
- /* Release this temp if we ended up allocating it:
- */
- if (!dst_is_undef(tmp))
- release_temp(c, tmp);
-}
-
-
-/**
- * Check if the given TXP instruction really needs the divide-by-W step.
- */
-static GLboolean projtex( struct brw_wm_compile *c,
- unsigned target,
- struct brw_fp_src src )
-{
- /* Only try to detect the simplest cases. Could detect (later)
- * cases where we are trying to emit code like RCP {1.0}, MUL x,
- * {1.0}, and so on.
- *
- * More complex cases than this typically only arise from
- * user-provided fragment programs anyway:
- */
- if (target == TGSI_TEXTURE_CUBE)
- return GL_FALSE; /* ut2004 gun rendering !?! */
-
- if (src.file == TGSI_FILE_INPUT &&
- BRW_GET_SWZ(src.swizzle, W) == W &&
- c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE)
- return GL_FALSE;
-
- return GL_TRUE;
-}
-
-
-/**
- * Emit code for TXP.
- */
-static void precalc_txp( struct brw_wm_compile *c,
- struct brw_fp_dst dst,
- unsigned target,
- unsigned unit,
- struct brw_fp_src src0,
- struct brw_fp_src sampler )
-{
- if (projtex(c, target, src0)) {
- struct brw_fp_dst tmp = get_temp(c);
-
- /* tmp0.w = RCP inst.arg[0][3]
- */
- emit_op1(c,
- TGSI_OPCODE_RCP,
- dst_mask(tmp, BRW_WRITEMASK_W),
- src_scalar(src0, W));
-
- /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
- */
- emit_op2(c,
- TGSI_OPCODE_MUL,
- dst_mask(tmp, BRW_WRITEMASK_XYZ),
- src0,
- src_scalar(src_reg_from_dst(tmp), W));
-
- /* dst = TEX tmp0
- */
- precalc_tex(c,
- dst,
- target,
- unit,
- src_reg_from_dst(tmp),
- sampler );
-
- release_temp(c, tmp);
- }
- else
- {
- /* dst = TEX src0
- */
- precalc_tex(c, dst, target, unit, src0, sampler);
- }
-}
-
-
-/* XXX: note this returns a src_reg.
- */
-static struct brw_fp_src
-find_output_by_semantic( struct brw_wm_compile *c,
- unsigned semantic,
- unsigned index )
-{
- const struct tgsi_shader_info *info = &c->fp->info;
- unsigned i;
-
- for (i = 0; i < info->num_outputs; i++)
- if (info->output_semantic_name[i] == semantic &&
- info->output_semantic_index[i] == index)
- return src_reg( TGSI_FILE_OUTPUT, i );
-
- /* If not found, return some arbitrary immediate value:
- *
- * XXX: this is a good idea but immediates are up generating extra
- * curbe entries atm, as they would have in the original driver.
- */
- return src_reg( TGSI_FILE_OUTPUT, 0 ); /* src_imm1f(c, 1.0); */
-}
-
-
-static void emit_fb_write( struct brw_wm_compile *c )
-{
- struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
- struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0);
- GLuint i;
-
-
- outdepth = src_scalar(outdepth, Z);
-
- for (i = 0 ; i < c->key.nr_cbufs; i++) {
- struct brw_fp_src outcolor;
-
- outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
-
- /* Use emit_tex_op so that we can specify the inst->target
- * field, which is abused to contain the FB write target and the
- * EOT marker
- */
- emit_tex_op(c, WM_FB_WRITE,
- dst_undef(),
- (i == c->key.nr_cbufs - 1), /* EOT */
- i,
- 0, /* no sampler */
- outcolor,
- payload_r0_depth,
- outdepth);
- }
-}
-
-
-static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
- const struct tgsi_full_dst_register *dst,
- unsigned saturate )
-{
- struct brw_fp_dst out;
-
- out.file = dst->Register.File;
- out.index = dst->Register.Index;
- out.writemask = dst->Register.WriteMask;
- out.indirect = dst->Register.Indirect;
- out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
-
- if (out.indirect) {
- assert(dst->Indirect.File == TGSI_FILE_ADDRESS);
- assert(dst->Indirect.Index == 0);
- }
-
- return out;
-}
-
-
-static struct brw_fp_src translate_src( struct brw_wm_compile *c,
- const struct tgsi_full_src_register *src )
-{
- struct brw_fp_src out;
-
- out.file = src->Register.File;
- out.index = src->Register.Index;
- out.indirect = src->Register.Indirect;
-
- out.swizzle = ((src->Register.SwizzleX << 0) |
- (src->Register.SwizzleY << 2) |
- (src->Register.SwizzleZ << 4) |
- (src->Register.SwizzleW << 6));
-
- switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
- case TGSI_UTIL_SIGN_CLEAR:
- out.abs = 1;
- out.negate = 0;
- break;
-
- case TGSI_UTIL_SIGN_SET:
- out.abs = 1;
- out.negate = 1;
- break;
-
- case TGSI_UTIL_SIGN_TOGGLE:
- out.abs = 0;
- out.negate = 1;
- break;
-
- case TGSI_UTIL_SIGN_KEEP:
- default:
- out.abs = 0;
- out.negate = 0;
- break;
- }
-
- if (out.indirect) {
- assert(src->Indirect.File == TGSI_FILE_ADDRESS);
- assert(src->Indirect.Index == 0);
- }
-
- return out;
-}
-
-
-
-static void emit_insn( struct brw_wm_compile *c,
- const struct tgsi_full_instruction *inst )
-{
- unsigned opcode = inst->Instruction.Opcode;
- struct brw_fp_dst dst;
- struct brw_fp_src src[3];
- int i;
-
- dst = translate_dst( c, &inst->Dst[0],
- inst->Instruction.Saturate );
-
- for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
- src[i] = translate_src( c, &inst->Src[i] );
-
- switch (opcode) {
- case TGSI_OPCODE_ABS:
- emit_op1(c, TGSI_OPCODE_MOV,
- dst,
- src_abs(src[0]));
- break;
-
- case TGSI_OPCODE_SUB:
- emit_op2(c, TGSI_OPCODE_ADD,
- dst,
- src[0],
- src_negate(src[1]));
- break;
-
- case TGSI_OPCODE_SCS:
- emit_op1(c, TGSI_OPCODE_SCS,
- dst_mask(dst, BRW_WRITEMASK_XY),
- src[0]);
- break;
-
- case TGSI_OPCODE_DST:
- precalc_dst(c, dst, src[0], src[1]);
- break;
-
- case TGSI_OPCODE_LIT:
- precalc_lit(c, dst, src[0]);
- break;
-
- case TGSI_OPCODE_TEX:
- precalc_tex(c, dst,
- inst->Texture.Texture,
- src[1].index, /* use sampler unit for tex idx */
- src[0], /* coord */
- src[1]); /* sampler */
- break;
-
- case TGSI_OPCODE_TXP:
- precalc_txp(c, dst,
- inst->Texture.Texture,
- src[1].index, /* use sampler unit for tex idx */
- src[0], /* coord */
- src[1]); /* sampler */
- break;
-
- case TGSI_OPCODE_TXB:
- /* XXX: TXB not done
- */
- precalc_tex(c, dst,
- inst->Texture.Texture,
- src[1].index, /* use sampler unit for tex idx*/
- src[0],
- src[1]);
- break;
-
- case TGSI_OPCODE_XPD:
- emit_op2(c, TGSI_OPCODE_XPD,
- dst_mask(dst, BRW_WRITEMASK_XYZ),
- src[0],
- src[1]);
- break;
-
- case TGSI_OPCODE_KIL:
- emit_op1(c, TGSI_OPCODE_KIL,
- dst_mask(dst_undef(), 0),
- src[0]);
- break;
-
- case TGSI_OPCODE_END:
- emit_fb_write(c);
- break;
- default:
- if (!c->key.has_flow_control &&
- brw_wm_is_scalar_result(opcode))
- emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
- else
- emit_op3(c, opcode, dst, src[0], src[1], src[2]);
- break;
- }
-}
-
-/**
- * Initial pass for fragment program code generation.
- * This function is used by both the GLSL and non-GLSL paths.
- */
-int brw_wm_pass_fp( struct brw_wm_compile *c )
-{
- struct brw_fragment_shader *fs = c->fp;
- struct tgsi_parse_context parse;
- struct tgsi_full_instruction *inst;
- struct tgsi_full_declaration *decl;
- const float *imm;
- GLuint size;
- GLuint i;
-
- if (BRW_DEBUG & DEBUG_WM) {
- debug_printf("pre-fp:\n");
- tgsi_dump(fs->tokens, 0);
- }
-
- c->fp_pixel_xy = src_undef();
- c->fp_delta_xy = src_undef();
- c->fp_pixel_w = src_undef();
- c->nr_fp_insns = 0;
- c->nr_immediates = 0;
-
-
- /* Loop over all instructions doing assorted simplifications and
- * transformations.
- */
- tgsi_parse_init( &parse, fs->tokens );
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
- tgsi_parse_token( &parse );
-
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- /* Turn intput declarations into special WM_* instructions.
- *
- * XXX: For non-branching shaders, consider deferring variable
- * initialization as late as possible to minimize register
- * usage. This is how the original BRW driver worked.
- *
- * In a branching shader, must preamble instructions at decl
- * time, as instruction order in the shader does not
- * correspond to the order instructions are executed in the
- * wild.
- *
- * This is where special instructions such as WM_CINTERP,
- * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
- * compute shader inputs from the payload registers and pixel
- * position.
- */
- decl = &parse.FullToken.FullDeclaration;
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- unsigned attrib;
-
- first = decl->Range.First;
- last = decl->Range.Last;
- mask = decl->Declaration.UsageMask;
-
- for (attrib = first; attrib <= last; attrib++) {
- emit_interp(c,
- attrib,
- decl->Semantic.Name,
- decl->Declaration.Interpolate );
- }
- }
-
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- /* Unlike VS programs we can probably manage fine encoding
- * immediate values directly into the emitted EU
- * instructions, as we probably only need to reference one
- * float value per instruction. Just save the data for now
- * and use directly later.
- */
- i = c->nr_immediates++;
- imm = &parse.FullToken.FullImmediate.u[i].Float;
- size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
-
- if (c->nr_immediates >= BRW_WM_MAX_CONST)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- for (i = 0; i < size; i++)
- c->immediate[c->nr_immediates].v[i] = imm[i];
-
- for (; i < 4; i++)
- c->immediate[c->nr_immediates].v[i] = 0.0;
-
- c->immediate[c->nr_immediates].nr = size;
- c->nr_immediates++;
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- inst = &parse.FullToken.FullInstruction;
- emit_insn(c, inst);
- break;
- }
- }
-
- if (BRW_DEBUG & DEBUG_WM) {
- brw_wm_print_fp_program( c, "pass_fp" );
- debug_printf("\n");
- }
-
- return c->error;
-}
-