summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_vs_emit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vs_emit.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c120
1 files changed, 93 insertions, 27 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 25b4ee85cb0..2084480e898 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -73,8 +73,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->prog_data.curb_read_length = reg - 1;
-
-
/* Allocate input regs:
*/
c->nr_inputs = 0;
@@ -84,8 +82,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0);
reg++;
}
- }
-
+ }
/* Allocate outputs: TODO: could organize the non-position outputs
* to go straight into message regs.
@@ -196,6 +193,7 @@ static void unalias1( struct brw_vs_compile *c,
struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
func(c, tmp, arg0);
brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
}
else {
func(c, dst, arg0);
@@ -217,12 +215,38 @@ static void unalias2( struct brw_vs_compile *c,
struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
func(c, tmp, arg0, arg1);
brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
}
else {
func(c, dst, arg0, arg1);
}
}
+static void unalias3( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ struct brw_reg arg2,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+ (dst.file == arg1.file && dst.nr == arg1.nr) ||
+ (dst.file == arg2.file && dst.nr == arg2.nr)) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0, arg1, arg2);
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+ else {
+ func(c, dst, arg0, arg1, arg2);
+ }
+}
+
static void emit_sop( struct brw_compile *p,
struct brw_reg dst,
struct brw_reg arg0,
@@ -339,6 +363,7 @@ static void emit_math1( struct brw_vs_compile *c,
}
}
+
static void emit_math2( struct brw_vs_compile *c,
GLuint function,
struct brw_reg dst,
@@ -370,7 +395,6 @@ static void emit_math2( struct brw_vs_compile *c,
release_tmp(c, tmp);
}
}
-
static void emit_exp_noalias( struct brw_vs_compile *c,
@@ -420,7 +444,7 @@ static void emit_exp_noalias( struct brw_vs_compile *c,
BRW_MATH_FUNCTION_EXP,
brw_writemask(dst, WRITEMASK_Z),
brw_swizzle1(arg0, 0),
- BRW_MATH_PRECISION_PARTIAL);
+ BRW_MATH_PRECISION_FULL);
}
if (dst.dw1.bits.writemask & WRITEMASK_W) {
@@ -521,8 +545,6 @@ static void emit_log_noalias( struct brw_vs_compile *c,
}
-
-
/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
*/
static void emit_dst_noalias( struct brw_vs_compile *c,
@@ -544,6 +566,7 @@ static void emit_dst_noalias( struct brw_vs_compile *c,
brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1);
}
+
static void emit_xpd( struct brw_compile *p,
struct brw_reg dst,
struct brw_reg t,
@@ -554,7 +577,6 @@ static void emit_xpd( struct brw_compile *p,
}
-
static void emit_lit_noalias( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0 )
@@ -595,8 +617,42 @@ static void emit_lit_noalias( struct brw_vs_compile *c,
brw_ENDIF(p, if_insn);
}
+static void emit_lrp_noalias(struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ struct brw_reg arg2)
+{
+ struct brw_compile *p = &c->func;
+ brw_ADD(p, dst, negate(arg0), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst, arg2);
+ brw_MAC(p, dst, arg0, arg1);
+}
+/** 3 or 4-component vector normalization */
+static void emit_nrm( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ int num_comps)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+
+ /* tmp = dot(arg0, arg0) */
+ if (num_comps == 3)
+ brw_DP3(p, tmp, arg0, arg0);
+ else
+ brw_DP4(p, tmp, arg0, arg0);
+
+ /* tmp = 1 / sqrt(tmp) */
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
+
+ /* dst = arg0 * tmp */
+ brw_MUL(p, dst, arg0, tmp);
+
+ release_tmp(c, tmp);
+}
/* TODO: relative addressing!
@@ -634,7 +690,6 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
}
-
static struct brw_reg deref( struct brw_vs_compile *c,
struct brw_reg arg,
GLint offset)
@@ -728,8 +783,6 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
}
-
-
static void emit_swz( struct brw_vs_compile *c,
struct brw_reg dst,
struct prog_src_register src )
@@ -801,8 +854,8 @@ static void emit_swz( struct brw_vs_compile *c,
}
-
-/* Post-vertex-program processing. Send the results to the URB.
+/**
+ * Post-vertex-program processing. Send the results to the URB.
*/
static void emit_vertex_write( struct brw_vs_compile *c)
{
@@ -817,7 +870,6 @@ static void emit_vertex_write( struct brw_vs_compile *c)
get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
}
-
/* Build ndc coords */
if (!c->key.know_w_is_one) {
ndc = get_tmp(c);
@@ -848,7 +900,6 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
}
-
for (i = 0; i < c->key.nr_userclip; i++) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
@@ -856,7 +907,6 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
-
/* i965 clipping workaround:
* 1) Test for -ve rhw
* 2) If set,
@@ -888,14 +938,12 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
}
-
/* Emit the (interleaved) headers for the two vertices - an 8-reg
* of zeros followed by two sets of NDC coordinates:
*/
brw_set_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, offset(m0, 2), ndc);
brw_MOV(p, offset(m0, 3), pos);
-
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
@@ -909,9 +957,9 @@ static void emit_vertex_write( struct brw_vs_compile *c)
1, /* writes complete */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
-
}
+
static void
post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
{
@@ -1016,6 +1064,11 @@ void brw_vs_emit(struct brw_vs_compile *c )
else
dst = get_dst(c, inst->DstReg);
+ if (inst->SaturateMode != SATURATE_OFF) {
+ _mesa_problem(NULL, "Unsupported saturate %d in vertex shader",
+ inst->SaturateMode);
+ }
+
switch (inst->Opcode) {
case OPCODE_ABS:
brw_MOV(p, dst, brw_abs(args[0]));
@@ -1035,6 +1088,12 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_DPH:
brw_DPH(p, dst, args[0], args[1]);
break;
+ case OPCODE_NRM3:
+ emit_nrm(c, dst, args[0], 3);
+ break;
+ case OPCODE_NRM4:
+ emit_nrm(c, dst, args[0], 4);
+ break;
case OPCODE_DST:
unalias2(c, dst, args[0], args[1], emit_dst_noalias);
break;
@@ -1062,6 +1121,9 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_LIT:
unalias1(c, dst, args[0], emit_lit_noalias);
break;
+ case OPCODE_LRP:
+ unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
+ break;
case OPCODE_MAD:
brw_MOV(p, brw_acc_reg(), args[2]);
brw_MAC(p, dst, args[0], args[1]);
@@ -1102,7 +1164,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
break;
case OPCODE_SGT:
emit_sgt(p, dst, args[0], args[1]);
- break;
+ break;
case OPCODE_SLT:
emit_slt(p, dst, args[0], args[1]);
break;
@@ -1118,6 +1180,10 @@ void brw_vs_emit(struct brw_vs_compile *c )
*/
emit_swz(c, dst, inst->SrcReg[0] );
break;
+ case OPCODE_TRUNC:
+ /* round toward zero */
+ brw_RNDZ(p, dst, args[0]);
+ break;
case OPCODE_XPD:
emit_xpd(p, dst, args[0], args[1]);
break;
@@ -1136,7 +1202,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
brw_set_predicate_control_flag_value(p, 0xff);
- break;
+ break;
case OPCODE_CAL:
brw_set_access_mode(p, BRW_ALIGN_1);
brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
@@ -1145,7 +1211,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
get_addr_reg(stack_index), brw_imm_d(4));
inst->Data = &p->store[p->nr_insn];
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- break;
+ break;
case OPCODE_RET:
brw_ADD(p, get_addr_reg(stack_index),
get_addr_reg(stack_index), brw_imm_d(-4));
@@ -1154,17 +1220,17 @@ void brw_vs_emit(struct brw_vs_compile *c )
brw_set_access_mode(p, BRW_ALIGN_16);
case OPCODE_END:
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- break;
+ break;
case OPCODE_PRINT:
case OPCODE_BGNSUB:
case OPCODE_ENDSUB:
+ /* no-op instructions */
break;
default:
- _mesa_printf("Unsupported opcode %i (%s) in vertex shader\n",
- inst->Opcode, inst->Opcode < MAX_OPCODE ?
+ _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader",
+ inst->Opcode, inst->Opcode < MAX_OPCODE ?
_mesa_opcode_string(inst->Opcode) :
"unknown");
- break;
}
if ((inst->DstReg.File == PROGRAM_OUTPUT)