summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_wm_emit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_wm_emit.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c770
1 files changed, 484 insertions, 286 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 9b1f54414bc..5390fd25849 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -34,8 +34,6 @@
#include "brw_context.h"
#include "brw_wm.h"
-#define SATURATE (1<<5)
-
/* Not quite sure how correct this is - need to understand horiz
* vs. vertical strides a little better.
*/
@@ -46,6 +44,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
return reg;
}
+
/* Payload R0:
*
* R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
@@ -62,42 +61,50 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
* R1.8 -- ?
*/
-
-static void emit_pixel_xy(struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask)
+void emit_pixel_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask)
{
+ struct brw_compile *p = &c->func;
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+ struct brw_reg dst0_uw, dst1_uw;
+ brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ if (c->dispatch_width == 16) {
+ dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
+ dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
+ } else {
+ dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
+ dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
+ }
+
/* Calculate pixel centers by adding 1 or 0 to each of the
* micro-tile coordinates passed in r1.
*/
if (mask & WRITEMASK_X) {
brw_ADD(p,
- vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
+ dst0_uw,
stride(suboffset(r1_uw, 4), 2, 4, 0),
brw_imm_v(0x10101010));
}
if (mask & WRITEMASK_Y) {
brw_ADD(p,
- vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
+ dst1_uw,
stride(suboffset(r1_uw,5), 2, 4, 0),
brw_imm_v(0x11001100));
}
-
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ brw_pop_insn_state(p);
}
-
-static void emit_delta_xy(struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
+void emit_delta_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
@@ -120,10 +127,10 @@ static void emit_delta_xy(struct brw_compile *p,
}
}
-static void emit_wpos_xy(struct brw_wm_compile *c,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
+void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
@@ -148,12 +155,14 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
}
-static void emit_pixel_w( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas)
+void emit_pixel_w(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
{
+ struct brw_compile *p = &c->func;
+
/* Don't need this if all you are doing is interpolating color, for
* instance.
*/
@@ -167,21 +176,29 @@ static void emit_pixel_w( struct brw_compile *p,
brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
/* Calc w */
- brw_math_16( p, dst[3],
- BRW_MATH_FUNCTION_INV,
- BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
- BRW_MATH_PRECISION_FULL);
+ if (c->dispatch_width == 16) {
+ brw_math_16(p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ } else {
+ brw_math(p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ }
}
}
-
-static void emit_linterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas )
+void emit_linterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -201,12 +218,12 @@ static void emit_linterp( struct brw_compile *p,
}
-static void emit_pinterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas,
- const struct brw_reg *w)
+void emit_pinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas,
+ const struct brw_reg *w)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -231,10 +248,10 @@ static void emit_pinterp( struct brw_compile *p,
}
-static void emit_cinterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+void emit_cinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -253,9 +270,9 @@ static void emit_cinterp( struct brw_compile *p,
}
/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
-static void emit_frontfacing( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask )
+void emit_frontfacing(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask)
{
struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -281,13 +298,86 @@ static void emit_frontfacing( struct brw_compile *p,
brw_set_predicate_control_flag_value(p, 0xff);
}
-static void emit_alu1( struct brw_compile *p,
- struct brw_instruction *(*func)(struct brw_compile *,
- struct brw_reg,
- struct brw_reg),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ * DDX DDY
+ * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
+ * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
+ * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
+ * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
+ * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
+ * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
+ * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
+ * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other. We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0)
+{
+ int i;
+ struct brw_reg src0, src1;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+ for (i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ if (is_ddx) {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ } else {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ }
+ brw_ADD(p, dst[i], src0, negate(src1));
+ }
+ }
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+void emit_alu1(struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
GLuint i;
@@ -305,15 +395,15 @@ static void emit_alu1( struct brw_compile *p,
}
-static void emit_alu2( struct brw_compile *p,
- struct brw_instruction *(*func)(struct brw_compile *,
- struct brw_reg,
- struct brw_reg,
- struct brw_reg),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_alu2(struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -331,12 +421,12 @@ static void emit_alu2( struct brw_compile *p,
}
-static void emit_mad( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2 )
+void emit_mad(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2)
{
GLuint i;
@@ -351,26 +441,12 @@ static void emit_mad( struct brw_compile *p,
}
}
-static void emit_trunc( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
-{
- GLuint i;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- brw_RNDZ(p, dst[i], arg0[i]);
- }
- }
-}
-
-static void emit_lrp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2 )
+void emit_lrp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2)
{
GLuint i;
@@ -390,21 +466,24 @@ static void emit_lrp( struct brw_compile *p,
}
}
-static void emit_sop( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- GLuint cond,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_sop(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLuint cond,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_MOV(p, dst[i], brw_imm_f(0));
+ brw_push_insn_state(p);
brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, dst[i], brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
brw_MOV(p, dst[i], brw_imm_f(1.0));
- brw_set_predicate_control_flag_value(p, 0xff);
+ brw_pop_insn_state(p);
}
}
}
@@ -488,11 +567,11 @@ static void emit_cmp( struct brw_compile *p,
}
}
-static void emit_max( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_max(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -512,11 +591,11 @@ static void emit_max( struct brw_compile *p,
}
}
-static void emit_min( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_min(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -537,11 +616,11 @@ static void emit_min( struct brw_compile *p,
}
-static void emit_dp3( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_dp3(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
@@ -559,11 +638,11 @@ static void emit_dp3( struct brw_compile *p,
}
-static void emit_dp4( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_dp4(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
@@ -582,11 +661,11 @@ static void emit_dp4( struct brw_compile *p,
}
-static void emit_dph( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_dph(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
@@ -605,11 +684,11 @@ static void emit_dph( struct brw_compile *p,
}
-static void emit_xpd( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_xpd(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -630,41 +709,68 @@ static void emit_xpd( struct brw_compile *p,
}
-static void emit_math1( struct brw_compile *p,
- GLuint function,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+void emit_math1(struct brw_wm_compile *c,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
+ struct brw_compile *p = &c->func;
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+ GLuint saturate = ((mask & SATURATE) ?
+ BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE);
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
assert(is_power_of_two(mask & WRITEMASK_XYZW));
+ /* If compressed, this will write message reg 2,3 from arg0.x's 16
+ * channels.
+ */
brw_MOV(p, brw_message_reg(2), arg0[0]);
/* Send two messages to perform all 16 operations:
*/
- brw_math_16(p,
- dst[dst_chan],
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p,
+ dst[dst_chan],
+ function,
+ saturate,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[dst_chan],1),
function,
- (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 2,
+ saturate,
+ 3,
brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+ }
+ brw_pop_insn_state(p);
}
-static void emit_math2( struct brw_compile *p,
- GLuint function,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1)
+void emit_math2(struct brw_wm_compile *c,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
+ struct brw_compile *p = &c->func;
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+ GLuint saturate = ((mask & SATURATE) ?
+ BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE);
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
@@ -675,183 +781,231 @@ static void emit_math2( struct brw_compile *p,
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, brw_message_reg(2), arg0[0]);
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+ }
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, brw_message_reg(3), arg1[0]);
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+ }
-
- /* Send two messages to perform all 16 operations:
- */
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p,
dst[dst_chan],
function,
- (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ saturate,
2,
brw_null_reg(),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math(p,
- offset(dst[dst_chan],1),
- function,
- (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 4,
- brw_null_reg(),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-
+ /* Send two messages to perform all 16 operations:
+ */
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[dst_chan],1),
+ function,
+ saturate,
+ 4,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ }
brw_pop_insn_state(p);
}
-
-static void emit_tex( struct brw_wm_compile *c,
- const struct brw_wm_instruction *inst,
- struct brw_reg *dst,
- GLuint dst_flags,
- struct brw_reg *arg )
+void emit_tex(struct brw_wm_compile *c,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg,
+ struct brw_reg depth_payload,
+ GLuint tex_idx,
+ GLuint sampler,
+ GLboolean shadow)
{
struct brw_compile *p = &c->func;
- GLuint msgLength, responseLength;
- GLuint i, nr;
+ struct brw_reg dst_retyped;
+ GLuint cur_mrf = 2, response_length;
+ GLuint i, nr_texcoords;
GLuint emit;
GLuint msg_type;
+ GLuint mrf_per_channel;
+ GLuint simd_mode;
+
+ if (c->dispatch_width == 16) {
+ mrf_per_channel = 2;
+ response_length = 8;
+ dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ } else {
+ mrf_per_channel = 1;
+ response_length = 4;
+ dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+ }
/* How many input regs are there?
*/
- switch (inst->tex_idx) {
+ switch (tex_idx) {
case TEXTURE_1D_INDEX:
emit = WRITEMASK_X;
- nr = 1;
+ nr_texcoords = 1;
break;
case TEXTURE_2D_INDEX:
case TEXTURE_RECT_INDEX:
emit = WRITEMASK_XY;
- nr = 2;
+ nr_texcoords = 2;
break;
case TEXTURE_3D_INDEX:
case TEXTURE_CUBE_INDEX:
emit = WRITEMASK_XYZ;
- nr = 3;
+ nr_texcoords = 3;
break;
default:
/* unexpected target */
abort();
}
- /* For shadow comparisons, we have to supply u,v,r. */
- if (inst->tex_shadow)
- nr = 3;
+ /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
+ if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8)
+ nr_texcoords = 3;
- msgLength = 1;
+ /* For shadow comparisons, we have to supply u,v,r. */
+ if (shadow)
+ nr_texcoords = 3;
- for (i = 0; i < nr; i++) {
+ /* Emit the texcoords. */
+ for (i = 0; i < nr_texcoords; i++) {
if (emit & (1<<i))
- brw_MOV(p, brw_message_reg(msgLength+1), arg[i]);
+ brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
else
- brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
- msgLength += 2;
- }
-
- /* Fill in the cube map array index value. */
- if (BRW_IS_IGDNG(p->brw) && inst->tex_shadow) {
- brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
- msgLength += 2;
+ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+ cur_mrf += mrf_per_channel;
}
/* Fill in the shadow comparison reference value. */
- if (inst->tex_shadow) {
- brw_MOV(p, brw_message_reg(msgLength+1), arg[2]);
- msgLength += 2;
+ if (shadow) {
+ if (BRW_IS_IGDNG(p->brw)) {
+ /* Fill in the cube map array index value. */
+ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+ cur_mrf += mrf_per_channel;
+ } else if (c->dispatch_width == 8) {
+ /* Fill in the LOD bias value. */
+ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+ cur_mrf += mrf_per_channel;
+ }
+ brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
+ cur_mrf += mrf_per_channel;
}
- responseLength = 8; /* always */
-
if (BRW_IS_IGDNG(p->brw)) {
- if (inst->tex_shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG;
} else {
- if (inst->tex_shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+ /* Note that G45 and older determines shadow compare and dispatch width
+ * from message length for most messages.
+ */
+ if (c->dispatch_width == 16 && shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
}
- brw_SAMPLE(p,
- retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ brw_SAMPLE(p,
+ dst_retyped,
1,
- retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
- SURF_INDEX_TEXTURE(inst->tex_unit),
- inst->tex_unit, /* sampler */
- inst->writemask,
- msg_type,
- responseLength,
- msgLength,
- 0,
+ retype(depth_payload, BRW_REGISTER_TYPE_UW),
+ SURF_INDEX_TEXTURE(sampler),
+ sampler,
+ dst_flags & WRITEMASK_XYZW,
+ msg_type,
+ response_length,
+ cur_mrf - 1,
+ 0,
1,
- BRW_SAMPLER_SIMD_MODE_SIMD16);
+ simd_mode);
}
-static void emit_txb( struct brw_wm_compile *c,
- const struct brw_wm_instruction *inst,
- struct brw_reg *dst,
- GLuint dst_flags,
- struct brw_reg *arg )
+void emit_txb(struct brw_wm_compile *c,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg,
+ struct brw_reg depth_payload,
+ GLuint tex_idx,
+ GLuint sampler)
{
struct brw_compile *p = &c->func;
GLuint msgLength;
GLuint msg_type;
- /* Shadow ignored for txb.
+ GLuint mrf_per_channel;
+ GLuint response_length;
+ struct brw_reg dst_retyped;
+
+ /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
+ * samples, so we'll use the 16-wide instruction, leave the second halves
+ * undefined, and trust the execution mask to keep the undefined pixels
+ * from mattering.
*/
- switch (inst->tex_idx) {
+ if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) {
+ if (BRW_IS_IGDNG(p->brw))
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ mrf_per_channel = 2;
+ dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
+ response_length = 8;
+ } else {
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
+ mrf_per_channel = 1;
+ dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
+ response_length = 4;
+ }
+
+ /* Shadow ignored for txb. */
+ switch (tex_idx) {
case TEXTURE_1D_INDEX:
- brw_MOV(p, brw_message_reg(2), arg[0]);
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+ brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
break;
case TEXTURE_2D_INDEX:
case TEXTURE_RECT_INDEX:
- brw_MOV(p, brw_message_reg(2), arg[0]);
- brw_MOV(p, brw_message_reg(4), arg[1]);
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+ brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
+ brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
break;
case TEXTURE_3D_INDEX:
case TEXTURE_CUBE_INDEX:
- brw_MOV(p, brw_message_reg(2), arg[0]);
- brw_MOV(p, brw_message_reg(4), arg[1]);
- brw_MOV(p, brw_message_reg(6), arg[2]);
+ brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+ brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
+ brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
break;
default:
/* unexpected target */
abort();
}
- brw_MOV(p, brw_message_reg(8), arg[3]);
- msgLength = 9;
-
- if (BRW_IS_IGDNG(p->brw))
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
+ msgLength = 2 + 4 * mrf_per_channel - 1;
brw_SAMPLE(p,
- retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ dst_retyped,
1,
- retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
- SURF_INDEX_TEXTURE(inst->tex_unit),
- inst->tex_unit, /* sampler */
- inst->writemask,
+ retype(depth_payload, BRW_REGISTER_TYPE_UW),
+ SURF_INDEX_TEXTURE(sampler),
+ sampler,
+ dst_flags & WRITEMASK_XYZW,
msg_type,
- 8, /* responseLength */
+ response_length,
msgLength,
0,
1,
@@ -859,11 +1013,13 @@ static void emit_txb( struct brw_wm_compile *c,
}
-static void emit_lit( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+static void emit_lit(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
+ struct brw_compile *p = &c->func;
+
assert((mask & WRITEMASK_XW) == 0);
if (mask & WRITEMASK_Y) {
@@ -873,7 +1029,7 @@ static void emit_lit( struct brw_compile *p,
}
if (mask & WRITEMASK_Z) {
- emit_math2(p, BRW_MATH_FUNCTION_POW,
+ emit_math2(c, BRW_MATH_FUNCTION_POW,
&dst[2],
WRITEMASK_X | (mask & SATURATE),
&arg0[1],
@@ -918,6 +1074,20 @@ static void emit_kil( struct brw_wm_compile *c,
}
}
+/* KIL_NV kills the pixels that are currently executing, not based on a test
+ * of the arguments.
+ */
+static void emit_kil_nv( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
+ brw_pop_insn_state(p);
+}
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
@@ -926,7 +1096,13 @@ static void fire_fb_write( struct brw_wm_compile *c,
GLuint eot )
{
struct brw_compile *p = &c->func;
-
+ struct brw_reg dst;
+
+ if (c->dispatch_width == 16)
+ dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+ else
+ dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+
/* Pass through control information:
*/
/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
@@ -943,7 +1119,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
/* Send framebuffer write message: */
/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
brw_fb_WRITE(p,
- retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ dst,
base_reg,
retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
target,
@@ -975,14 +1151,15 @@ static void emit_aa( struct brw_wm_compile *c,
* \param arg1 the pass-through depth value
* \param arg2 the shader-computed depth value
*/
-static void emit_fb_write( struct brw_wm_compile *c,
- struct brw_reg *arg0,
- struct brw_reg *arg1,
- struct brw_reg *arg2,
- GLuint target,
- GLuint eot)
+void emit_fb_write(struct brw_wm_compile *c,
+ struct brw_reg *arg0,
+ struct brw_reg *arg1,
+ struct brw_reg *arg2,
+ GLuint target,
+ GLuint eot)
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
GLuint nr = 2;
GLuint channel;
@@ -994,30 +1171,37 @@ static void emit_fb_write( struct brw_wm_compile *c,
/* I don't really understand how this achieves the color interleave
* (ie RGBARGBA) in the result: [Do the saturation here]
*/
- {
- brw_push_insn_state(p);
-
- for (channel = 0; channel < 4; channel++) {
+ brw_push_insn_state(p);
+
+ for (channel = 0; channel < 4; channel++) {
+ if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) {
+ /* By setting the high bit of the MRF register number, we indicate
+ * that we want COMPR4 mode - instead of doing the usual destination
+ * + 1 for the second half we get destination + 4.
+ */
+ brw_MOV(p,
+ brw_message_reg(nr + channel + (1 << 7)),
+ arg0[channel]);
+ } else {
/* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
/* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
-
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p,
brw_message_reg(nr + channel),
arg0[channel]);
-
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p,
- brw_message_reg(nr + channel + 4),
- sechalf(arg0[channel]));
- }
- /* skip over the regs populated above:
- */
- nr += 8;
-
- brw_pop_insn_state(p);
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p,
+ brw_message_reg(nr + channel + 4),
+ sechalf(arg0[channel]));
+ }
+ }
}
+ /* skip over the regs populated above:
+ */
+ nr += 8;
+ brw_pop_insn_state(p);
if (c->key.source_depth_to_render_target)
{
@@ -1067,7 +1251,7 @@ static void emit_fb_write( struct brw_wm_compile *c,
get_element_ud(brw_vec8_grf(1,0), 6),
brw_imm_ud(1<<26));
- jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
{
emit_aa(c, arg1, 2);
fire_fb_write(c, 0, nr, target, eot);
@@ -1081,7 +1265,6 @@ static void emit_fb_write( struct brw_wm_compile *c,
}
}
-
/**
* Move a GPR to scratch memory.
*/
@@ -1219,7 +1402,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
/* Generated instructions for calculating triangle interpolants:
*/
case WM_PIXELXY:
- emit_pixel_xy(p, dst, dst_flags);
+ emit_pixel_xy(c, dst, dst_flags);
break;
case WM_DELTAXY:
@@ -1231,7 +1414,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case WM_PIXELW:
- emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
+ emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
break;
case WM_LINTERP:
@@ -1268,6 +1451,14 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
break;
+ case OPCODE_DDX:
+ emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+ break;
+
+ case OPCODE_DDY:
+ emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+ break;
+
case OPCODE_DP3:
emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;
@@ -1281,7 +1472,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case OPCODE_TRUNC:
- emit_trunc(p, dst, dst_flags, args[0]);
+ emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
break;
case OPCODE_LRP:
@@ -1308,27 +1499,27 @@ void brw_wm_emit( struct brw_wm_compile *c )
/* Higher math functions:
*/
case OPCODE_RCP:
- emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
break;
case OPCODE_RSQ:
- emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
break;
case OPCODE_SIN:
- emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
break;
case OPCODE_COS:
- emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
break;
case OPCODE_EX2:
- emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
break;
case OPCODE_LG2:
- emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
break;
case OPCODE_SCS:
@@ -1336,13 +1527,13 @@ void brw_wm_emit( struct brw_wm_compile *c )
* fixup for 16-element execution.
*/
if (dst_flags & WRITEMASK_X)
- emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
if (dst_flags & WRITEMASK_Y)
- emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
break;
case OPCODE_POW:
- emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
+ emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
break;
/* Comparisons:
@@ -1380,23 +1571,30 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case OPCODE_LIT:
- emit_lit(p, dst, dst_flags, args[0]);
+ emit_lit(c, dst, dst_flags, args[0]);
break;
/* Texturing operations:
*/
case OPCODE_TEX:
- emit_tex(c, inst, dst, dst_flags, args[0]);
+ emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
+ inst->tex_idx, inst->tex_unit,
+ inst->tex_shadow);
break;
case OPCODE_TXB:
- emit_txb(c, inst, dst, dst_flags, args[0]);
+ emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
+ inst->tex_idx, inst->tex_unit);
break;
case OPCODE_KIL:
emit_kil(c, args[0]);
break;
+ case OPCODE_KIL_NV:
+ emit_kil_nv(c);
+ break;
+
default:
_mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
inst->opcode, inst->opcode < MAX_OPCODE ?